Skip to content

Commit 3605d2b

Browse files
committed
Add input_type parameter,Change vcf_opts parameter to context_type,Change signature_subgroups parameter to exclude_signature_subgroups
1 parent 517f428 commit 3605d2b

File tree

4 files changed

+54
-21
lines changed

4 files changed

+54
-21
lines changed

README.md

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ $ pip install .
3030
```
3131
## Signature Subtypes
3232
```python
33-
signature_subgroups = ['remove_MMR_deficiency_signatures',
33+
exclude_signature_subgroups = ['remove_MMR_deficiency_signatures',
3434
'remove_POL_deficiency_signatures',
3535
'remove_HR_deficiency_signatures' ,
3636
'remove_BER_deficiency_signatures',
@@ -83,7 +83,7 @@ Analyze.decompose_fit(samples,
8383
genome_build="GRCh37",
8484
verbose=False,
8585
new_signature_thresh_hold=0.8,
86-
signature_subgroups=signature_subgroups,
86+
exclude_signature_subgroups=exclude_signature_subgroups,
8787
exome=False)
8888
```
8989
### *De Novo* Fit
@@ -114,7 +114,7 @@ Analyze.cosmic_fit( samples,
114114
verbose=False,
115115
collapse_to_SBS96=False,
116116
make_plots=True,
117-
signature_subgroups=signature_subgroups,
117+
exclude_signature_subgroups=exclude_signature_subgroups,
118118
exome=False
119119
)
120120
```
@@ -123,20 +123,21 @@ Analyze.cosmic_fit( samples,
123123
| --------------------- | -------- |-------- |
124124
| **samples** | String | Path to a tab delimilted file that contains the samples table where the rows are mutation types and colunms are sample IDs. or Path to VCF files directory if input files are VCF Files. |
125125
| **output** | String | Path to the output folder. |
126+
| **input_type** | String | The type of input:<br><ul><li>"vcf": used for vcf format inputs.</li><li>"matrix": used for table format inputs using a tab seperated file.</li></ul> Default value is "matrix"|
126127
| **signatures** | String | Path to a tab delimited file that contains the signature table where the rows are mutation types and colunms are signature IDs. |
127128
| **genome_build** | String | The reference genome build. List of supported genomes: "GRCh37", "GRCh38", "mm9", "mm10" and "rn6". The default value is "GRCh37". If the selected genome is not in the supported list, the default genome will be used. |
128129
| **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2 and 3.3. Defines the version of the COSMIC reference signatures. The default value is 3.3. |
129130
| **new_signature_thresh_hold**| Float | Parameter in cosine similarity to declare a new signature. Applicable for decompose_fit only. The default value is 0.8. |
130131
| **make_plots** | Boolean | Toggle on and off for making and saving all plots. Default value is True. |
131-
| **signature_subgroups** | List | Removes the signatures corresponding to specific subtypes for better fitting. The usage is given above. Default value is None. |
132+
| **exclude_signature_subgroups** | List | Removes the signatures corresponding to specific subtypes for better fitting. The usage is given above. Default value is None. |
132133
| **exome** | Boolean | Defines if the exome renormalized signatures will be used. The default value is False. |
133-
| **vcf_opts**|Dict with keys 'project_name' and 'vcf_context'| Reqd options if vcf files are provided as input. 'project_name' key takes a string of the cohort of VCF samples and 'vcf_context' takes what context type of the mutation matrix to be considered for assignment. Valid options include '96', '6', '24', '4608', '288', '18','6144', '384', '1536', 'DINUC'|
134+
| **context_type**| String| Reqd context type if "input_type" is "vcf". 'context_type' takes what context type of the mutation matrix to be considered for assignment. Valid options include '96', '6', '24', '4608', '288', '18','6144', '384', '1536', 'DINUC'. Default Value is '96'|
134135
| **verbose** | Boolean | Prints statements. Default value is False. |
135136

136137

137138
138139

139-
#### SPA analysis Example
140+
#### SPA analysis Example for a matrix
140141

141142

142143
```python
@@ -161,7 +162,39 @@ Analyze.cosmic_fit( samples,
161162
verbose=False,
162163
collapse_to_SBS96=False,
163164
make_plots=True,
164-
signature_subgroups=None,
165+
exclude_signature_subgroups=None,
166+
exome=False)
167+
168+
```
169+
170+
#### SPA analysis Example for input vcf files
171+
172+
173+
```python
174+
#import modules
175+
import SigProfilerAssignment as spa
176+
from SigProfilerAssignment import Analyzer as Analyze
177+
178+
#set directories and paths to signatures and samples
179+
dir_inp = spa.__path__[0]+'/data/Examples/'
180+
samples = spa.__path__[0]+'/data/vcftest/' #directory of vcf files
181+
output = "output_example/"
182+
signatures = dir_inp+"Results_scenario_8/SBS96/All_Solutions/SBS96_3_Signatures/Signatures/SBS96_S3_Signatures.txt"
183+
sigs = "COSMIC_v3_SBS_GRCh37_noSBS84-85.txt" #Custom Signature Database
184+
185+
#Analysis of SP Assignment
186+
Analyze.cosmic_fit( samples,
187+
output,
188+
input_type="vcf",
189+
context_type="96",
190+
signatures=None,
191+
signature_database=sigs,
192+
genome_build="GRCh37",
193+
cosmic_version=3.3,
194+
verbose=False,
195+
collapse_to_SBS96=False,
196+
make_plots=True,
197+
exclude_signature_subgroups=None,
165198
exome=False)
166199

167200
```

SigProfilerAssignment/Analyzer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from SigProfilerAssignment import decomposition as decomp
22

3-
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
3+
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
44

5-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,signature_subgroups=signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)
5+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)
66

77
def denovo_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05,nnls_remove_penalty=0.01, initial_remove_penalty=0.05, genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,input_type='matrix',context_type="96"):
88
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, new_signature_thresh_hold=new_signature_thresh_hold, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts,input_type=input_type,context_type=context_type)
99

10-
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
11-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,signature_subgroups=signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)
10+
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
11+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)

SigProfilerAssignment/decomposition.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
def spa_analyze( samples, output,input_type='matrix',context_type="96", signatures=None, signature_database=None,decompose_fit_option= True,denovo_refit_option=True,cosmic_fit_option=True, nnls_add_penalty=0.05,
2525
nnls_remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02,
2626
genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,
27-
signature_subgroups=None, exome=False):
27+
exclude_signature_subgroups=None, exome=False):
2828

2929

3030
"""
@@ -123,19 +123,19 @@ def spa_analyze( samples, output,input_type='matrix',context_type="96", signat
123123

124124

125125
signature_subgroups_dict = default_subgroups_dict.copy()
126-
if signature_subgroups == None:
126+
if exclude_signature_subgroups == None:
127127
pass
128128
else:
129-
if type(signature_subgroups) is not list:
130-
sys.exit("signature_subgroups input should be a list of appropriate flags, please refer to documentation.")
129+
if type(exclude_signature_subgroups) is not list:
130+
sys.exit("exclude_signature_subgroups input should be a list of appropriate flags, please refer to documentation.")
131131
else:
132132

133133
for key in default_subgroups_dict:
134-
if key in signature_subgroups:
134+
if key in exclude_signature_subgroups:
135135
signature_subgroups_dict[key]=True
136136

137137
sig_exclusion_list=[]
138-
if signature_subgroups == None:
138+
if exclude_signature_subgroups == None:
139139
sig_exclusion_list=[]
140140
else:
141141
for key in signature_subgroups_dict:

test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def main():
2020

2121
# vcf_opts={'project_name': 'test_sample','vcf_context': '288' } # Uncomment this If vcf files are provided as input.
2222

23-
# signature_subgroups = ['remove_MMR_deficiency_signatures',
23+
# exclude_signature_subgroups = ['remove_MMR_deficiency_signatures',
2424
# 'remove_POL_deficiency_signatures',
2525
# 'remove_HR_deficiency_signatures' ,
2626
# 'remove_BER_deficiency_signatures',
@@ -33,7 +33,7 @@ def main():
3333
# 'remove_Artifact_signatures',
3434
# 'remove_Lymphoid_signatures']
3535

36-
signature_subgroups = None
36+
exclude_signature_subgroups = None
3737

3838
Analyze.decompose_fit( samples,
3939
output,
@@ -42,7 +42,7 @@ def main():
4242
genome_build="GRCh37",
4343
verbose=False,
4444
new_signature_thresh_hold=0.8,
45-
signature_subgroups=signature_subgroups,
45+
exclude_signature_subgroups=exclude_signature_subgroups,
4646
# vcf_opts=vcf_opts
4747
)
4848

@@ -62,7 +62,7 @@ def main():
6262
genome_build="GRCh37",
6363
verbose=False,
6464
collapse_to_SBS96=True,
65-
signature_subgroups=signature_subgroups,
65+
exclude_signature_subgroups=exclude_signature_subgroups,
6666
# vcf_opts=vcf_opts
6767
)
6868

0 commit comments

Comments
 (0)