Skip to content

Commit 1b09a2c

Browse files
committed
Add filtering for only exome mutations
1 parent 26fee56 commit 1b09a2c

File tree

2 files changed

+19
-8
lines changed

2 files changed

+19
-8
lines changed

SigProfilerAssignment/decompose_subroutines.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def signature_decomposition(signatures, mtype, directory, genome_build="GRCh37",
480480
def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, index, allcolnames, process_std_error = "none", signature_stabilities = " ", \
481481
signature_total_mutations= " ", signature_stats = "none", cosmic_sigs=False, attribution= 0, denovo_exposureAvg = "none", add_penalty=0.05, \
482482
remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02, background_sigs=0, genome_build="GRCh37", sequence="genome", export_probabilities=True, export_probabilities_per_mutation=False, \
483-
refit_denovo_signatures=True, collapse_to_SBS96=True, connected_sigs=True, pcawg_rule=False, verbose=False,make_plots = True, samples='./', input_type='matrix', denovo_refit_option=True):
483+
refit_denovo_signatures=True, collapse_to_SBS96=True, connected_sigs=True, pcawg_rule=False, verbose=False,make_plots = True, samples='./', input_type='matrix', denovo_refit_option=True, exome=False):
484484

485485
if processAvg.shape[0]==allgenomes.shape[0] and processAvg.shape[0] != 96:
486486
collapse_to_SBS96=False
@@ -794,7 +794,7 @@ def make_final_solution(processAvg, allgenomes, allsigids, layer_directory, m, i
794794
if export_probabilities==True:
795795
if input_type=='vcf':
796796
if m=='96' or m=='78' or m=='83':
797-
probability_per_mutation, samples_prob_per_mut = probabilities_per_mutation(probability, samples, m)
797+
probability_per_mutation, samples_prob_per_mut = probabilities_per_mutation(probability, samples, m, exome)
798798

799799
if denovo_refit_option==True:
800800
if refit_denovo_signatures==True:
@@ -941,7 +941,7 @@ def probabilities(W, H, index, allsigids, allcolnames):
941941

942942

943943
################################################### Generation of probabilities for each processes given to A mutation ############################################
944-
def probabilities_per_mutation(probability_matrix, samples_path, m):
944+
def probabilities_per_mutation(probability_matrix, samples_path, m, exome=False):
945945
#
946946
probability_matrix=probability_matrix.reset_index()
947947
#
@@ -963,12 +963,15 @@ def probabilities_per_mutation(probability_matrix, samples_path, m):
963963
#
964964
all_mutations = pd.DataFrame()
965965
for file in seqinfo_files:
966-
if 'exome' not in file:
966+
if 'exome' in file:
967+
exome_df = pd.read_csv(seqinfo_path + file, sep='\t',header=None)
968+
else:
967969
try:
968970
new = pd.read_csv(seqinfo_path + file, sep='\t',header=None)
969971
all_mutations = pd.concat([all_mutations, new])
970972
except (pd.errors.EmptyDataError):
971973
pass
974+
972975
all_mutations[3] = all_mutations[3].str[interval_low:interval_high]
973976
if m=='96' or m=='78':
974977
del all_mutations[4]
@@ -978,6 +981,14 @@ def probabilities_per_mutation(probability_matrix, samples_path, m):
978981
del all_mutations[4]
979982

980983
all_mutations.columns = ['Sample Names', 'Chr', 'Pos', 'MutationType']
984+
if exome==True:
985+
del exome_df[2]
986+
del exome_df[3]
987+
del exome_df[4]
988+
exome_df.columns = ['Chr', 'Pos']
989+
exome_df['Chr'] = [str(x) for x in (exome_df['Chr']).to_list()]
990+
all_mutations['Chr'] = [str(x) for x in (all_mutations['Chr']).to_list()]
991+
all_mutations = pd.merge(all_mutations, exome_df)
981992
#
982993
all_samples_mutations = [y for x, y in all_mutations.groupby('Sample Names')]
983994
#

SigProfilerAssignment/decomposition.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def spa_analyze(samples, output, input_type='matrix', context_type="96", signatu
412412
add_penalty=nnls_add_penalty, remove_penalty=nnls_remove_penalty,
413413
initial_remove_penalty=init_rem_denovo,connected_sigs=connected_sigs,refit_denovo_signatures=False,
414414
make_plots=make_plots,export_probabilities=export_probabilities, export_probabilities_per_mutation=export_probabilities_per_mutation,
415-
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option)
415+
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option, exome=exome)
416416

417417
else:
418418
signature_stabilities=devopts['signature_stabilities']
@@ -426,7 +426,7 @@ def spa_analyze(samples, output, input_type='matrix', context_type="96", signatu
426426
background_sigs=background_sigs, verbose=verbose, genome_build=genome_build, signature_total_mutations = signature_total_mutations,
427427
add_penalty=nnls_add_penalty, remove_penalty=nnls_remove_penalty, process_std_error = processSTE, signature_stabilities = signature_stabilities,
428428
initial_remove_penalty=init_rem_denovo,connected_sigs=connected_sigs,refit_denovo_signatures=True,export_probabilities=export_probabilities,
429-
export_probabilities_per_mutation=export_probabilities_per_mutation, samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option)
429+
export_probabilities_per_mutation=export_probabilities_per_mutation, samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option, exome=exome)
430430

431431
if make_metadata:
432432
with open(os.path.join(output,"JOB_METADATA_SPA.txt"),"a") as sysdata:
@@ -541,7 +541,7 @@ def spa_analyze(samples, output, input_type='matrix', context_type="96", signatu
541541
make_plots=make_plots,
542542
export_probabilities=export_probabilities,
543543
export_probabilities_per_mutation=export_probabilities_per_mutation,
544-
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option)
544+
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option, exome=exome)
545545

546546
if make_metadata:
547547
with open(os.path.join(output,"JOB_METADATA_SPA.txt"),"a") as sysdata:
@@ -659,7 +659,7 @@ def spa_analyze(samples, output, input_type='matrix', context_type="96", signatu
659659
make_plots =make_plots,
660660
export_probabilities=export_probabilities,
661661
export_probabilities_per_mutation=export_probabilities_per_mutation,
662-
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option)
662+
samples=samples, input_type=input_type, denovo_refit_option=denovo_refit_option, exome=exome)
663663
if make_metadata:
664664
with open(os.path.join(output,"JOB_METADATA_SPA.txt"),"a") as sysdata:
665665
current_time_end = datetime.datetime.now()

0 commit comments

Comments
 (0)