Skip to content

Commit d39c68c

Browse files
authored
Merge branch 'main' into development
2 parents e02a457 + 653e1af commit d39c68c

File tree

5 files changed

+117
-21
lines changed

5 files changed

+117
-21
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,15 @@ Analyze.cosmic_fit( samples,
127127
| **samples** | String | Path to input file for `input_type`:<ul><li>"matrix"</li><li>"seg:TYPE"</li></ul> Path to input folder for `input_type`:<ul><li>"vcf"</li></ul>|
128128
| **output** | String | Path to the output folder. |
129129
| **input_type** | String | The type of input:<br><ul><li>"matrix": used for table format inputs using a tab-separated file where the rows are mutation types and the columns are sample IDs.</li><li>"vcf": used for mutation calling file inputs (VCFs, MAFs or simple text files).</li><li>"seg:TYPE": used for a multi-sample segmentation file for copy number analysis. The accepted callers for TYPE are the following {"ASCAT", "ASCAT_NGS", "SEQUENZA", "ABSOLUTE", "BATTENBERG", "FACETS", "PURPLE", "TCGA"}. For example, when using segmentation file from BATTENBERG then set input_type to "seg:BATTENBERG".</li></ul> The default value is "matrix".|
130-
| **context_type**| String| Required context type if `input_type` is "vcf". `context_type` takes which context type of the input data is considered for assignment. Valid options include "96", "288", "1536", "DINUC", and "INDEL". The default value is "96".|
130+
| **context_type**| String| Required context type if `input_type` is "vcf". `context_type` takes which context type of the input data is considered for assignment. Valid options include "96", "288", "1536", "DINUC", and "ID". The default value is "96".|
131131
| **signatures** | String | Path to a tab delimited file that contains the signature table where the rows are mutation types and colunms are signature IDs. |
132132
| **genome_build** | String | The reference genome build. List of supported genomes: "GRCh37", "GRCh38", "mm9", "mm10" and "rn6". The default value is "GRCh37". If the selected genome is not in the supported list, the default genome will be used. |
133133
| **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2 and 3.3. Defines the version of the COSMIC reference signatures. The default value is 3.3. |
134134
| **new_signature_thresh_hold**| Float | Parameter in cosine similarity to declare a new signature. Applicable for decompose_fit only. The default value is 0.8. |
135135
| **exclude_signature_subgroups** | List | Removes the signatures corresponding to specific subtypes for better fitting. The usage is given above. The default value is None. |
136136
| **exome** | Boolean | Defines if the exome renormalized signatures will be used. The default value is False. |
137-
| **export_probabilities** | Boolean | Defines if the probability matrix is created. The default value is True. |
137+
| **export_probabilities** | Boolean | Defines if the probability matrix per mutational context for all samples is created. The default value is True. |
138+
| **export_probabilities_per_mutation** | Boolean | Defines if the probability matrices per mutation for all samples are created. Only available when `input_type` is "vcf". The default value is False. |
138139
| **make_plots** | Boolean | Toggle on and off for making and saving all plots. The default value is True. |
139140
| **verbose** | Boolean | Prints statements. The default value is False. |
140141

SigProfilerAssignment/Analyzer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from SigProfilerAssignment import decomposition as decomp
22

3-
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96",export_probabilities=True):
3+
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96",export_probabilities=True, export_probabilities_per_mutation=False):
44

5-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities)
5+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities, export_probabilities_per_mutation=export_probabilities_per_mutation)
66

7-
def denovo_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05,nnls_remove_penalty=0.01, initial_remove_penalty=0.05, genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exome=False,input_type='matrix',context_type="96",export_probabilities=True):
8-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, new_signature_thresh_hold=new_signature_thresh_hold, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities)
7+
def denovo_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05,nnls_remove_penalty=0.01, initial_remove_penalty=0.05, genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exome=False,input_type='matrix',context_type="96",export_probabilities=True, export_probabilities_per_mutation=False):
8+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, new_signature_thresh_hold=new_signature_thresh_hold, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities, export_probabilities_per_mutation=export_probabilities_per_mutation)
99

10-
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96",export_probabilities=True, sample_reconstruction_plots=False):
11-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities, sample_reconstruction_plots=sample_reconstruction_plots)
10+
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96",export_probabilities=True, export_probabilities_per_mutation=False, sample_reconstruction_plots=False):
11+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type,export_probabilities=export_probabilities, export_probabilities_per_mutation=export_probabilities_per_mutation, sample_reconstruction_plots=sample_reconstruction_plots)

SigProfilerAssignment/decompose_subroutines.py

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ def signature_decomposition(signatures, mtype, directory, genome_build="GRCh37",
474474
#############################################################################################################
475475
def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, index, allcolnames, process_std_error = "none", signature_stabilities = " ", \
476476
signature_total_mutations= " ", signature_stats = "none", cosmic_sigs=False, attribution= 0, denovo_exposureAvg = "none", add_penalty=0.05, \
477-
remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02, background_sigs=0, genome_build="GRCh37", sequence="genome", export_probabilities=True, \
478-
refit_denovo_signatures=True, collapse_to_SBS96=True, connected_sigs=True, pcawg_rule=False, verbose=False,make_plots = True):
477+
remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02, background_sigs=0, genome_build="GRCh37", sequence="genome", export_probabilities=True, export_probabilities_per_mutation=False, \
478+
refit_denovo_signatures=True, collapse_to_SBS96=True, connected_sigs=True, pcawg_rule=False, verbose=False,make_plots = True, samples='./', input_type='matrix', denovo_refit_option=True):
479479

480480
if processAvg.shape[0]==allgenomes.shape[0] and processAvg.shape[0] != 96:
481481
collapse_to_SBS96=False
@@ -777,15 +777,45 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
777777
probability = probabilities(processAvg, exposureAvg, index, allsigids, allcolnames)
778778
probability=probability.set_index("Sample Names" )
779779

780-
if cosmic_sigs==False:
781-
780+
if denovo_refit_option==True:
782781
if refit_denovo_signatures==True:
783-
probability.to_csv(layer_directory+"/Activities"+"/"+"De_Novo_Mutation_Probabilities_refit.txt", "\t")
782+
probability.to_csv(layer_directory+"/Activities"+"/"+"De_Novo_MutationType_Probabilities_refit.txt", "\t")
783+
else:
784+
probability.to_csv(layer_directory+"/Activities"+"/"+"De_Novo_MutationType_Probabilities.txt", "\t")
785+
if denovo_refit_option==False:
786+
probability.to_csv(layer_directory+"/Activities"+"/"+"Decomposed_MutationType_Probabilities.txt", "\t")
787+
788+
if export_probabilities_per_mutation==True:
789+
if export_probabilities==True:
790+
if input_type=='vcf':
791+
if m=='96' or m=='78' or m=='83':
792+
probability_per_mutation, samples_prob_per_mut = probabilities_per_mutation(probability, samples, m)
793+
794+
if denovo_refit_option==True:
795+
if refit_denovo_signatures==True:
796+
ppm_file_name = "De_Novo_Mutation_Probabilities_refit"
797+
output_path_prob_per_mut = layer_directory+"/Activities"+"/"+ppm_file_name
798+
else:
799+
ppm_file_name = "De_Novo_Mutation_Probabilities"
800+
output_path_prob_per_mut = layer_directory+"/Activities"+"/"+ppm_file_name
801+
else:
802+
ppm_file_name = "Decomposed_Mutation_Probabilities"
803+
output_path_prob_per_mut = layer_directory+"/Activities"+"/"+ppm_file_name
804+
805+
if not os.path.exists(output_path_prob_per_mut):
806+
os.makedirs(output_path_prob_per_mut)
807+
for matrix,sample in zip(probability_per_mutation, samples_prob_per_mut):
808+
matrix=matrix.set_index('Sample Names')
809+
matrix=matrix.sort_values(by=['Chr','Pos'])
810+
matrix.to_csv(layer_directory+"/Activities"+"/"+ ppm_file_name + "/" + ppm_file_name + "_" + sample + ".txt", "\t")
811+
else:
812+
print('Probabilities per mutation are only calculated for SBS96, DBS78 and ID83 mutational contexts.')
784813
else:
785-
probability.to_csv(layer_directory+"/Activities"+"/"+"De_Novo_Mutation_Probabilities.txt", "\t")
786-
if cosmic_sigs==True:
787-
probability.to_csv(layer_directory+"/Activities"+"/"+"Decomposed_Mutation_Probabilities.txt", "\t")
814+
print('Probabilities per mutation are only calculated if input_type is "vcf".')
815+
else:
816+
print('Probabilities per mutation require to calculate probabilities per context type. Please re-run your analysis setting export_probabilites=True.')
788817

818+
# import pdb; pdb.set_trace()
789819

790820
return exposures
791821
################################################################### FUNCTION ONE ###################################################################
@@ -904,6 +934,59 @@ def probabilities(W, H, index, allsigids, allcolnames):
904934

905935
return result
906936

937+
938+
################################################### Generation of probabilities for each processes given to A mutation ############################################
939+
def probabilities_per_mutation(probability_matrix, samples_path, m):
940+
#
941+
probability_matrix=probability_matrix.reset_index()
942+
#
943+
if m=='96':
944+
seqinfo_path = samples_path + '/output/vcf_files/SNV/'
945+
interval_low = 3
946+
interval_high = -1
947+
if m=='78':
948+
seqinfo_path = samples_path + '/output/vcf_files/DBS/'
949+
interval_low = 4
950+
interval_high = -2
951+
if m=='83':
952+
seqinfo_path = samples_path + '/output/vcf_files/ID/'
953+
interval_low = 2
954+
interval_high = 100
955+
#
956+
seqinfo_files = os.listdir(seqinfo_path)
957+
seqinfo_files.sort()
958+
#
959+
all_mutations = pd.DataFrame()
960+
for file in seqinfo_files:
961+
try:
962+
new = pd.read_csv(seqinfo_path + file, sep='\t',header=None)
963+
all_mutations = pd.concat([all_mutations, new])
964+
except (pd.errors.EmptyDataError):
965+
pass
966+
all_mutations[3] = all_mutations[3].str[interval_low:interval_high]
967+
if m=='96' or m=='78':
968+
del all_mutations[4]
969+
else:
970+
del all_mutations[6]
971+
del all_mutations[5]
972+
del all_mutations[4]
973+
974+
all_mutations.columns = ['Sample Names', 'Chr', 'Pos', 'MutationType']
975+
#
976+
all_samples_mutations = [y for x, y in all_mutations.groupby('Sample Names')]
977+
#
978+
prob_per_mut = []
979+
sample_names = []
980+
for sample_mutations in all_samples_mutations:
981+
new = sample_mutations.merge(probability_matrix)
982+
prob_per_mut.append(new)
983+
sample_names.append(new['Sample Names'][0])
984+
#
985+
result = [prob_per_mut, sample_names]
986+
#
987+
return result
988+
989+
907990
def custom_signatures_plot(signatures, output):
908991
with PdfPages(output+'/Custom_Signature_Plots.pdf') as pdf:
909992
plt.figure(figsize=(10, 3))

0 commit comments

Comments
 (0)