From 033548ee6e969f7d534fe92774eecac27ba3b755 Mon Sep 17 00:00:00 2001
From: mdbarnesUCSD <mdbarnes@ucsd.edu>
Date: Thu, 11 Sep 2025 15:49:42 -0700
Subject: [PATCH 1/4] Support decompose_fit in higher contexts so that they are
 not collapsed to 96

---
 SigProfilerAssignment/decompose_subroutines.py | 5 +++--
 SigProfilerAssignment/decomposition.py         | 1 +
 SigProfilerAssignment/single_sample.py         | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py
index 7697df7..8ed00bd 100644
--- a/SigProfilerAssignment/decompose_subroutines.py
+++ b/SigProfilerAssignment/decompose_subroutines.py
@@ -301,6 +301,7 @@ def signature_decomposition(
     m_for_subgroups="SBS",
     volume=None,
     add_background_signatures=True,
+    collapse_to_SBS96=True,
 ):
     originalProcessAvg = originalProcessAvg.reset_index()
     if not os.path.exists(directory + "/Solution_Stats"):
@@ -332,12 +333,12 @@ def signature_decomposition(
             sigDatabase = pd.read_csv(signature_database, sep="\t", index_col=0)
             # indx = sigDatabase.index()
             if (
-                sigDatabase.shape[0] == 1536
+                sigDatabase.shape[0] == 1536 and collapse_to_SBS96
             ):  # collapse the 1596 context into 96 only for the deocmposition
                 sigDatabase = sigDatabase.groupby(sigDatabase.index.str[1:8]).sum()
 
             elif (
-                sigDatabase.shape[0] == 288
+                sigDatabase.shape[0] == 288 and collapse_to_SBS96
             ):  # collapse the 288 context into 96 only for the deocmposition
                 # sigDatabase = pd.DataFrame(processAvg, index=index)
                 sigDatabase = sigDatabase.groupby(sigDatabase.index.str[2:9]).sum()
diff --git a/SigProfilerAssignment/decomposition.py b/SigProfilerAssignment/decomposition.py
index 8a99119..22788ae 100644
--- a/SigProfilerAssignment/decomposition.py
+++ b/SigProfilerAssignment/decomposition.py
@@ -860,6 +860,7 @@ def spa_analyze(
             m_for_subgroups=m_for_subgroups,
             volume=volume,
             add_background_signatures=add_background_signatures,
+            collapse_to_SBS96=collapse_to_SBS96,
         )
         # final_signatures = sub.signature_decomposition(processAvg, m, layer_directory2, genome_build=genome_build)
         # extract the global signatures and new signatures from the final_signatures dictionary
diff --git a/SigProfilerAssignment/single_sample.py b/SigProfilerAssignment/single_sample.py
index c62e34c..190ae0a 100644
--- a/SigProfilerAssignment/single_sample.py
+++ b/SigProfilerAssignment/single_sample.py
@@ -379,6 +379,7 @@ def add_signatures(
                 # compute the estimated genome
                 est_genome = np.dot(W1, np.array(newExposure))
             if solver == "nnls":
+                # import pdb; breakpoint();
                 reg = nnls(W1, genome[:, 0])
                 weights = reg[0]
                 est_genome = np.dot(W1, weights)

From 9e51375c8a40ef21bad5526d62e3afa4f7862951 Mon Sep 17 00:00:00 2001
From: mdbarnesUCSD <mdbarnes@ucsd.edu>
Date: Thu, 11 Sep 2025 16:40:22 -0700
Subject: [PATCH 2/4] Fix variable reusage error in nested for loop

---
 .../decompose_subroutines.py                  | 26 +++++++++++++++----
 SigProfilerAssignment/single_sample.py        |  1 -
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py
index 8ed00bd..110b078 100644
--- a/SigProfilerAssignment/decompose_subroutines.py
+++ b/SigProfilerAssignment/decompose_subroutines.py
@@ -519,12 +519,12 @@ def signature_decomposition(
         decomposed_signatures = []
         contribution_percentages = []
 
-        for j in np.nonzero(exposures)[0]:
-            listofinformation[count * 3] = signames[j]
+        for sig_idx in np.nonzero(exposures)[0]:
+            listofinformation[count * 3] = signames[sig_idx]
             listofinformation[count * 3 + 1] = round(exposure_percentages[count], 2)
             contribution_percentages.append(round(exposure_percentages[count], 2))
             listofinformation[count * 3 + 2] = "%"
-            decomposed_signatures.append(signames[j])
+            decomposed_signatures.append(signames[sig_idx])
             count += 1
         ListToTumple = tuple(
             [mtype, letters[i]]
@@ -540,7 +540,12 @@ def signature_decomposition(
         weights = []
         basis_names = []
         nonzero_exposures = exposures[np.nonzero(exposures)]
-        denovo_name = mutation_context + letters[i]
+        if signature_database is not None:
+            # A custom database was provided, so use the original column names from the de novo file.
+            denovo_name = originalProcessAvg.columns[i+1]
+        else:
+            # No custom database was provided, so use the default naming convention (e.g., SBS96A, SBS96B, etc.).
+            denovo_name = mutation_context + letters[i]
         for info in range(0, len(listofinformation), 3):
             # print(info)
             sigName = listofinformation[info]
@@ -570,8 +575,19 @@ def signature_decomposition(
             mtype_par = "32"
         else:
             mtype_par = "none"
+
+        can_plot_decomposition = make_decomposition_plots
+        # Check for unsupported, non-collapsed contexts before attempting to plot
+        if mtype_par in ["288", "1536"] and not collapse_to_SBS96 and can_plot_decomposition:
+            print(f"\nWarning: Decomposition plots for the {mtype_par} context are not currently supported in this workflow when collapse_to_SBS96 is False. "
+                  f"Skipping plot generation for signature {j}.")
+            can_plot_decomposition = False # Temporarily disable plotting for this specific signature
+
         # only create decomposition plots for COSMIC signatures
-        if mtype_par != "none" and make_decomposition_plots == True:
+        if (
+            mtype_par != "none"
+            and can_plot_decomposition == True
+        ):
             # reformat the first column of cosmic signature dataframe
             cosmic_sigs_DF = sigDatabases_DF.copy(deep=True)
             cosmic_sigs_DF.columns = ["MutationType"] + cosmic_sigs_DF.columns[
diff --git a/SigProfilerAssignment/single_sample.py b/SigProfilerAssignment/single_sample.py
index 190ae0a..c62e34c 100644
--- a/SigProfilerAssignment/single_sample.py
+++ b/SigProfilerAssignment/single_sample.py
@@ -379,7 +379,6 @@ def add_signatures(
                 # compute the estimated genome
                 est_genome = np.dot(W1, np.array(newExposure))
             if solver == "nnls":
-                # import pdb; breakpoint();
                 reg = nnls(W1, genome[:, 0])
                 weights = reg[0]
                 est_genome = np.dot(W1, weights)

From 39e5a777e18287b55e51ce736b6e144ca7dbd8b1 Mon Sep 17 00:00:00 2001
From: Mark Barnes <mdbarnes@ucsd.edu>
Date: Fri, 14 Nov 2025 16:41:56 -0800
Subject: [PATCH 3/4] Support custom signature names

---
 .../decompose_subroutines.py                  | 37 +++++++++++++------
 SigProfilerAssignment/decomposition.py        | 37 ++++++++++++++-----
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py
index 110b078..d151dc1 100644
--- a/SigProfilerAssignment/decompose_subroutines.py
+++ b/SigProfilerAssignment/decompose_subroutines.py
@@ -408,11 +408,23 @@ def signature_decomposition(
     denovo_signature_names = make_letter_ids(
         signatures.shape[1], mtype=mutation_context
     )
+    # Get original signature names if available (preserve original names regardless of database type)
+    # This will be used throughout to preserve original names in logs and output files
+    if originalProcessAvg is not None and hasattr(originalProcessAvg, 'columns') and len(originalProcessAvg.columns) > 1:
+        # After reset_index(), the first column is mutation types, so signature names start at index 1
+        original_signature_names = originalProcessAvg.columns[1:].tolist()
+    else:
+        original_signature_names = None
     # lognote.write("\n********** Starting Signature Decomposition **********\n\n")
     activity_percentages = []
     merger = PdfWriter()
 
     for i, j in zip(range(signatures.shape[1]), denovo_signature_names):
+        # Determine the signature name to use (original if available, otherwise default)
+        if original_signature_names is not None and i < len(original_signature_names):
+            current_signature_name = original_signature_names[i]
+        else:
+            current_signature_name = mutation_context + letters[i]
         # Only for context SBS96
         if signatures.shape[0] == 96:
             lognote = open(
@@ -424,7 +436,7 @@ def signature_decomposition(
             )
             lognote.write(
                 "\n\n\n\n######################## Decomposing "
-                + j
+                + current_signature_name
                 + " ########################\n"
             )
             lognote.close()
@@ -474,7 +486,7 @@ def signature_decomposition(
             )
             lognote.write(
                 "\n\n\n\n######################## Decomposing "
-                + j
+                + current_signature_name
                 + " ########################\n"
             )
             lognote.close()
@@ -527,7 +539,7 @@ def signature_decomposition(
             decomposed_signatures.append(signames[sig_idx])
             count += 1
         ListToTumple = tuple(
-            [mtype, letters[i]]
+            [mtype, current_signature_name]
             + listofinformation
             + [L1dist * 100]
             + [L2dist * 100]
@@ -540,11 +552,12 @@ def signature_decomposition(
         weights = []
         basis_names = []
         nonzero_exposures = exposures[np.nonzero(exposures)]
-        if signature_database is not None:
-            # A custom database was provided, so use the original column names from the de novo file.
-            denovo_name = originalProcessAvg.columns[i+1]
+        # Use original signature names if available (preserve original names regardless of database type)
+        if original_signature_names is not None and i < len(original_signature_names):
+            # Original signature names are available, use them
+            denovo_name = original_signature_names[i]
         else:
-            # No custom database was provided, so use the default naming convention (e.g., SBS96A, SBS96B, etc.).
+            # Fallback to default naming convention (e.g., SBS96A, SBS96B, etc.)
             denovo_name = mutation_context + letters[i]
         for info in range(0, len(listofinformation), 3):
             # print(info)
@@ -648,25 +661,25 @@ def signature_decomposition(
             fh.close()
 
             dictionary.update(
-                {"{}".format(mutation_context + letters[i]): decomposed_signatures}
+                {"{}".format(current_signature_name): decomposed_signatures}
             )
 
         else:
-            newsig.append(mutation_context + letters[i])
+            newsig.append(current_signature_name)
             newsigmatrixidx.append(i)
             fh = open(
                 directory + "/De_Novo_map_to_COSMIC_" + mutation_context + ".csv", "a"
             )
             fh.write(
                 "Signature {}-{}, Signature {}-{}, {}, {}, {}, {}, {}\n".format(
-                    mtype, letters[i], mtype, letters[i], 0, 0, 0, 1, 1
+                    mtype, current_signature_name, mtype, current_signature_name, 0, 0, 0, 1, 1
                 )
             )
             fh.close()
             dictionary.update(
                 {
-                    "{}".format(mutation_context + letters[i]): [
-                        "{}".format(mutation_context + letters[i])
+                    "{}".format(current_signature_name): [
+                        "{}".format(current_signature_name)
                     ]
                 }
             )
diff --git a/SigProfilerAssignment/decomposition.py b/SigProfilerAssignment/decomposition.py
index 22788ae..87f7010 100644
--- a/SigProfilerAssignment/decomposition.py
+++ b/SigProfilerAssignment/decomposition.py
@@ -786,10 +786,29 @@ def spa_analyze(
             allsigids = processAvg.columns.to_list()
         except:
             allsigids = list(listOfSignatures)
+        
+        # Preserve originalProcessAvg with original column names BEFORE converting to numpy array
+        # This is needed to maintain original de novo signature names in decomposition plots
+        if isinstance(processAvg, pd.DataFrame):
+            originalProcessAvg = processAvg.copy()
+        else:
+            originalProcessAvg = pd.DataFrame(
+                processAvg, index=index, columns=listOfSignatures
+            )
+        
         processAvg = np.array(processAvg)
         signature_names = sub.make_letter_ids(
             idlenth=processAvg.shape[1], mtype=mutation_context
         )
+        
+        # Use original signature names if available (preserve original names regardless of database type)
+        # This ensures exposureAvg column names match attribution dictionary keys
+        if originalProcessAvg is not None and hasattr(originalProcessAvg, 'columns') and len(originalProcessAvg.columns) > 0:
+            # Use original column names from the signatures file
+            exposure_signature_names = list(listOfSignatures)
+        else:
+            # Fallback to default naming convention
+            exposure_signature_names = signature_names
 
         exposureAvg_dummy = (
             pd.DataFrame(
@@ -801,16 +820,10 @@ def spa_analyze(
             .rename_axis("Samples")
         )
         exposureAvg = exposureAvg_dummy
-        exposureAvg.columns = signature_names
+        exposureAvg.columns = exposure_signature_names
 
         #############################
         # layer_directory2 = output+"/Decompose_Solution"
-        if isinstance(processAvg, pd.DataFrame):
-            pass
-        else:
-            originalProcessAvg = pd.DataFrame(
-                processAvg, index=index, columns=listOfSignatures
-            )
         try:
             if not os.path.exists(layer_directory2):
                 os.makedirs(layer_directory2)
@@ -819,21 +832,27 @@ def spa_analyze(
 
         if (
             processAvg.shape[0] == 1536 and collapse_to_SBS96 == True
-        ):  # collapse the 1596 context into 96 only for the deocmposition
+        ):  # collapse the 1536 context into 96 only for the decomposition
             processAvg = pd.DataFrame(processAvg, index=index)
             processAvg = processAvg.groupby(processAvg.index.str[1:8]).sum()
             genomes = genomes.groupby(genomes.index.str[1:8]).sum()
             index = genomes.index
+            # Also collapse originalProcessAvg to match the collapsed context
+            # originalProcessAvg has mutation types in the index, so group by index directly
+            originalProcessAvg = originalProcessAvg.groupby(originalProcessAvg.index.str[1:8]).sum()
             processAvg = np.array(processAvg)
 
         if (
             processAvg.shape[0] == 288 and collapse_to_SBS96 == True
-        ):  # collapse the 288 context into 96 only for the deocmposition
+        ):  # collapse the 288 context into 96 only for the decomposition
             processAvg = pd.DataFrame(processAvg, index=index)
             processAvg = processAvg.groupby(processAvg.index.str[2:9]).sum()
             genomes = pd.DataFrame(genomes, index=index)
             genomes = genomes.groupby(genomes.index.str[2:9]).sum()
             index = genomes.index
+            # Also collapse originalProcessAvg to match the collapsed context
+            # originalProcessAvg has mutation types in the index, so group by index directly
+            originalProcessAvg = originalProcessAvg.groupby(originalProcessAvg.index.str[2:9]).sum()
             processAvg = np.array(processAvg)
 
         print("\n Decomposing De Novo Signatures  .....")

From 6ed837d5953ca15d4fd8d05c255cde279fad098a Mon Sep 17 00:00:00 2001
From: Mark Barnes <mdbarnes@ucsd.edu>
Date: Fri, 14 Nov 2025 17:04:00 -0800
Subject: [PATCH 4/4] Pass plotting the correct context type

---
 SigProfilerAssignment/decompose_subroutines.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py
index d151dc1..b5daf36 100644
--- a/SigProfilerAssignment/decompose_subroutines.py
+++ b/SigProfilerAssignment/decompose_subroutines.py
@@ -589,6 +589,10 @@ def signature_decomposition(
         else:
             mtype_par = "none"
 
+        # Update mtype_par to reflect the correct dimension for plotting
+        if collapse_to_SBS96 and signatures.shape[0] == 96 and mtype_par in ["288", "1536"]:
+            mtype_par = "96"
+
         can_plot_decomposition = make_decomposition_plots
         # Check for unsupported, non-collapsed contexts before attempting to plot
         if mtype_par in ["288", "1536"] and not collapse_to_SBS96 and can_plot_decomposition: