Skip to content

Commit a86bec7

Browse files
authored
Merge pull request #13 from AlexandrovLab/fixingbugs_spelinking
fixing bugs extractor linking for different context types
2 parents b16c8fd + fb8c9f9 commit a86bec7

File tree

2 files changed

+77
-11
lines changed

2 files changed

+77
-11
lines changed

SigProfilerAssignment/decompose_sub_routines.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
import os
1111
#from matplot,pdblib.backends.backend_pdf import PdfPages
1212
import pandas as pd
13+
import matplotlib.pyplot as plt
14+
plt.switch_backend('agg')
15+
from matplotlib.backends.backend_pdf import PdfPages
1316
#from sklearn import metrics
1417
#import time
1518
#import multiprocessing
@@ -415,7 +418,7 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
415418
signature_total_mutations= " ", signature_stats = "none", cosmic_sigs=False, attribution= 0, denovo_exposureAvg = "none", add_penalty=0.05, \
416419
remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02, background_sigs=0, genome_build="GRCh37", sequence="genome", export_probabilities=True, \
417420
refit_denovo_signatures=True, collapse_to_SBS96=True, connected_sigs=True, pcawg_rule=False, verbose=False):
418-
421+
419422
# Get the type of solution from the last part of the layer_directory name
420423
solution_type = layer_directory.split("/")[-1]
421424
solution_prefix = solution_type.split("_")
@@ -588,7 +591,8 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
588591

589592
else:
590593
# when refilt de_novo_signatures
591-
if refit_denovo_signatures==True:
594+
refit_denovo_signatures_old = False
595+
if refit_denovo_signatures_old==True:
592596
exposureAvg=denovo_exposureAvg
593597
for g in range(allgenomes.shape[1]):
594598
print("Analyzing Sample => " , str(g+1))
@@ -617,7 +621,7 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
617621
# when use the exposures from the initial NMF
618622
else:
619623
exposureAvg=denovo_exposureAvg
620-
624+
621625
processAvg= pd.DataFrame(processAvg.astype(float))
622626
processes = processAvg.set_index(index)
623627
processes.columns = allsigids
@@ -658,7 +662,8 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
658662
else:
659663
all_similarities.to_csv(layer_directory+"/Solution_Stats/"+solution_prefix+"_Samples_Stats.txt", sep="\t")
660664

661-
if cosmic_sigs==False:
665+
#if cosmic_sigs==False:
666+
if refit_denovo_signatures ==True:
662667
try:
663668
process_std_error= pd.DataFrame(process_std_error)
664669
processSTE = process_std_error.set_index(index)
@@ -667,7 +672,8 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
667672
processSTE.to_csv(layer_directory+"/Signatures"+"/"+solution_prefix+"_"+"Signatures_SEM_Error.txt", "\t", float_format='%.2E', index_label=[processes.columns.name])
668673
except:
669674
pass
670-
if cosmic_sigs==False:
675+
#if cosmic_sigs==False:
676+
if refit_denovo_signatures ==True:
671677
try:
672678
signature_stats = signature_stats.set_index(allsigids)
673679
signature_stats = signature_stats.rename_axis("Signatures", axis="columns")
@@ -832,3 +838,47 @@ def probabilities(W, H, index, allsigids, allcolnames):
832838

833839

834840
return result
841+
842+
def custom_signatures_plot(signatures, output):
843+
with PdfPages(output+'/Custom_Signature_Plots.pdf') as pdf:
844+
plt.figure(figsize=(10, 3))
845+
plt.bar(list(range(1,1+len(signatures.iloc[:,0]))),signatures.iloc[:,0])
846+
plt.title('Custom Signature {}'.format(0+1))
847+
plt.xticks([])
848+
plt.xlabel("Mutation Types")
849+
plt.ylabel("Probabilities")
850+
pdf.savefig() # saves the current figure into a pdf page
851+
plt.close()
852+
for i in range(1,signatures.shape[1]):
853+
# if LaTeX is not installed or error caught, change to `usetex=False`
854+
plt.rc('text', usetex=False)
855+
plt.figure(figsize=(10, 3))
856+
plt.bar(list(range(1, 1+len(signatures.iloc[:,i]))),signatures.iloc[:,i])
857+
plt.title('Custom Signature {}'.format(i+1))
858+
plt.xticks([])
859+
plt.xlabel("Mutation Types")
860+
plt.ylabel("Probabilities")
861+
pdf.attach_note("signature plots")
862+
pdf.savefig()
863+
plt.close()
864+
865+
def merge_pdf(input_folder, output_file):
866+
pdf2merge = []
867+
for filename in os.listdir(input_folder):
868+
#print(filename)
869+
if filename.endswith('.pdf'):
870+
pdf2merge.append(filename)
871+
872+
pdf2merge.sort()
873+
pdfWriter = PyPDF2.PdfFileWriter()
874+
for filename in pdf2merge:
875+
pdfFileObj = open(input_folder+"/"+filename,'rb')
876+
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
877+
for pageNum in range(pdfReader.numPages):
878+
pageObj = pdfReader.getPage(pageNum)
879+
pdfWriter.addPage(pageObj)
880+
881+
pdfOutput = open(output_file+'.pdf', 'wb')
882+
pdfWriter.write(pdfOutput)
883+
#Outputting the PDF
884+
pdfOutput.close()

SigProfilerAssignment/decomposition.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from SigProfilerAssignment import decompose_sub_routines as sub
1212
import numpy as np
1313
import pandas as pd
14+
1415
#import SigProfilerExtractor as cosmic
1516
import os,sys
1617

@@ -169,12 +170,27 @@ def spa_analyze( samples, output, signatures=None, signature_database=None,dec
169170
else:
170171
background_sigs = []
171172
exposureAvg_dummy = pd.DataFrame(np.random.rand(processAvg.shape[1],genomes.shape[1]),index=allsigids,columns=colnames.to_list()).transpose().rename_axis('Samples')
172-
exposureAvg = sub.make_final_solution(processAvg, genomes, allsigids, layer_directory1, mutation_type, index, colnames,
173-
cosmic_sigs=True, attribution = attribution, denovo_exposureAvg = exposureAvg_dummy ,
174-
background_sigs=background_sigs, verbose=verbose, genome_build=genome_build,
175-
add_penalty=nnls_add_penalty, remove_penalty=nnls_remove_penalty,
176-
initial_remove_penalty=init_rem_denovo,connected_sigs=connected_sigs,
177-
collapse_to_SBS96=collapse_to_SBS96,refit_denovo_signatures=False)
173+
174+
if devopts == None:
175+
exposureAvg = sub.make_final_solution(processAvg, genomes, allsigids, layer_directory1, mutation_type, index, colnames,
176+
cosmic_sigs=True, attribution = attribution, denovo_exposureAvg = exposureAvg_dummy ,
177+
background_sigs=background_sigs, verbose=verbose, genome_build=genome_build,
178+
add_penalty=nnls_add_penalty, remove_penalty=nnls_remove_penalty,
179+
initial_remove_penalty=init_rem_denovo,connected_sigs=connected_sigs,refit_denovo_signatures=False)
180+
181+
else:
182+
signature_stabilities=devopts['signature_stabilities']
183+
signature_total_mutations=devopts['signature_total_mutations']
184+
signature_stats = devopts['signature_stats']
185+
sequence=devopts['sequence']
186+
processSTE=devopts['processSTE']
187+
sequence =devopts['sequence']
188+
189+
exposureAvg = sub.make_final_solution(processAvg, genomes, allsigids, layer_directory1, mutation_type, index, colnames,
190+
cosmic_sigs=True, attribution = attribution, denovo_exposureAvg = exposureAvg_dummy , sequence=sequence,
191+
background_sigs=background_sigs, verbose=verbose, genome_build=genome_build, signature_total_mutations = signature_total_mutations,
192+
add_penalty=nnls_add_penalty, remove_penalty=nnls_remove_penalty, process_std_error = processSTE, signature_stabilities = signature_stabilities,
193+
initial_remove_penalty=init_rem_denovo,connected_sigs=connected_sigs,refit_denovo_signatures=True)
178194
#################
179195
if decompose_fit_option ==True:
180196
#layer_directory2 = output+"/Decompose_Solution"

0 commit comments

Comments
 (0)