Skip to content

Commit d05af11

Browse files
authored
Merge pull request #35 from AlexandrovLab/development
Development
2 parents df1d5bc + d1949bd commit d05af11

File tree

5 files changed

+82
-67
lines changed

5 files changed

+82
-67
lines changed

README.md

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ $ pip install .
3030
```
3131
## Signature Subtypes
3232
```python
33-
signature_subgroups = ['remove_MMR_deficiency_signatures',
34-
'remove_POL_deficiency_signatures',
35-
'remove_HR_deficiency_signatures' ,
36-
'remove_BER_deficiency_signatures',
37-
'remove_Chemotherapy_signatures',
38-
'remove_Immunosuppressants_signatures'
39-
'remove_Treatment_signatures'
40-
'remove_APOBEC_signatures',
41-
'remove_Tobacco_signatures',
42-
'remove_UV_signatures',
43-
'remove_AA_signatures',
44-
'remove_Colibactin_signatures',
45-
'remove_Artifact_signatures',
46-
'remove_Lymphoid_signatures']
33+
exclude_signature_subgroups = ['remove_MMR_deficiency_signatures',
34+
'remove_POL_deficiency_signatures',
35+
'remove_HR_deficiency_signatures' ,
36+
'remove_BER_deficiency_signatures',
37+
'remove_Chemotherapy_signatures',
38+
'remove_Immunosuppressants_signatures'
39+
'remove_Treatment_signatures'
40+
'remove_APOBEC_signatures',
41+
'remove_Tobacco_signatures',
42+
'remove_UV_signatures',
43+
'remove_AA_signatures',
44+
'remove_Colibactin_signatures',
45+
'remove_Artifact_signatures',
46+
'remove_Lymphoid_signatures']
4747
```
4848

4949

@@ -83,7 +83,7 @@ Analyze.decompose_fit(samples,
8383
genome_build="GRCh37",
8484
verbose=False,
8585
new_signature_thresh_hold=0.8,
86-
signature_subgroups=signature_subgroups,
86+
exclude_signature_subgroups=exclude_signature_subgroups,
8787
exome=False)
8888
```
8989
### *De Novo* Fit
@@ -114,7 +114,7 @@ Analyze.cosmic_fit( samples,
114114
verbose=False,
115115
collapse_to_SBS96=False,
116116
make_plots=True,
117-
signature_subgroups=signature_subgroups,
117+
exclude_signature_subgroups=exclude_signature_subgroups,
118118
exome=False
119119
)
120120
```
@@ -123,20 +123,21 @@ Analyze.cosmic_fit( samples,
123123
| --------------------- | -------- |-------- |
124124
| **samples** | String | Path to a tab delimilted file that contains the samples table where the rows are mutation types and colunms are sample IDs. or Path to VCF files directory if input files are VCF Files. |
125125
| **output** | String | Path to the output folder. |
126+
| **input_type** | String | The type of input:<br><ul><li>"vcf": used for vcf format inputs.</li><li>"matrix": used for table format inputs using a tab seperated file.</li></ul> Default value is "matrix"|
126127
| **signatures** | String | Path to a tab delimited file that contains the signature table where the rows are mutation types and colunms are signature IDs. |
127128
| **genome_build** | String | The reference genome build. List of supported genomes: "GRCh37", "GRCh38", "mm9", "mm10" and "rn6". The default value is "GRCh37". If the selected genome is not in the supported list, the default genome will be used. |
128129
| **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2 and 3.3. Defines the version of the COSMIC reference signatures. The default value is 3.3. |
129130
| **new_signature_thresh_hold**| Float | Parameter in cosine similarity to declare a new signature. Applicable for decompose_fit only. The default value is 0.8. |
130131
| **make_plots** | Boolean | Toggle on and off for making and saving all plots. Default value is True. |
131-
| **signature_subgroups** | List | Removes the signatures corresponding to specific subtypes for better fitting. The usage is given above. Default value is None. |
132+
| **exclude_signature_subgroups** | List | Removes the signatures corresponding to specific subtypes for better fitting. The usage is given above. Default value is None. |
132133
| **exome** | Boolean | Defines if the exome renormalized signatures will be used. The default value is False. |
133-
| **vcf_opts**|Dict with keys 'project_name' and 'vcf_context'| Reqd options if vcf files are provided as input. 'project_name' key takes a string of the cohort of VCF samples and 'vcf_context' takes what context type of the mutation matrix to be considered for assignment. Valid options include '96', '6', '24', '4608', '288', '18','6144', '384', '1536', 'DINUC'|
134+
| **context_type**| String| Reqd context type if "input_type" is "vcf". 'context_type' takes what context type of the mutation matrix to be considered for assignment. Valid options include '96', '6', '24', '4608', '288', '18','6144', '384', '1536', 'DINUC'. Default Value is '96'|
134135
| **verbose** | Boolean | Prints statements. Default value is False. |
135136

136137

137138
138139

139-
#### SPA analysis Example
140+
#### SPA analysis Example for a matrix
140141

141142

142143
```python
@@ -161,7 +162,39 @@ Analyze.cosmic_fit( samples,
161162
verbose=False,
162163
collapse_to_SBS96=False,
163164
make_plots=True,
164-
signature_subgroups=None,
165+
exclude_signature_subgroups=None,
166+
exome=False)
167+
168+
```
169+
170+
#### SPA analysis Example for input vcf files
171+
172+
173+
```python
174+
#import modules
175+
import SigProfilerAssignment as spa
176+
from SigProfilerAssignment import Analyzer as Analyze
177+
178+
#set directories and paths to signatures and samples
179+
dir_inp = spa.__path__[0]+'/data/Examples/'
180+
samples = spa.__path__[0]+'/data/vcftest/' #directory of vcf files
181+
output = "output_example/"
182+
signatures = dir_inp+"Results_scenario_8/SBS96/All_Solutions/SBS96_3_Signatures/Signatures/SBS96_S3_Signatures.txt"
183+
sigs = "COSMIC_v3_SBS_GRCh37_noSBS84-85.txt" #Custom Signature Database
184+
185+
#Analysis of SP Assignment
186+
Analyze.cosmic_fit( samples,
187+
output,
188+
input_type="vcf",
189+
context_type="96",
190+
signatures=None,
191+
signature_database=sigs,
192+
genome_build="GRCh37",
193+
cosmic_version=3.3,
194+
verbose=False,
195+
collapse_to_SBS96=False,
196+
make_plots=True,
197+
exclude_signature_subgroups=None,
165198
exome=False)
166199

167200
```

SigProfilerAssignment/Analyzer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from SigProfilerAssignment import decomposition as decomp
22

3-
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,signature_subgroups=None,exome=False,vcf_opts=None):
3+
def decompose_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
44

5-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,signature_subgroups=signature_subgroups,exome=exome,vcf_opts=vcf_opts)
5+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= True,denovo_refit_option=False,cosmic_fit_option=False,devopts=devopts,new_signature_thresh_hold=new_signature_thresh_hold,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)
66

7-
def denovo_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05,nnls_remove_penalty=0.01, initial_remove_penalty=0.05, genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,vcf_opts=None):
8-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, new_signature_thresh_hold=new_signature_thresh_hold, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts,vcf_opts=vcf_opts)
7+
def denovo_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05,nnls_remove_penalty=0.01, initial_remove_penalty=0.05, genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,input_type='matrix',context_type="96"):
8+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, new_signature_thresh_hold=new_signature_thresh_hold, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=True,cosmic_fit_option=False,devopts=devopts,input_type=input_type,context_type=context_type)
99

10-
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,signature_subgroups=None,exome=False,vcf_opts=None):
11-
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,signature_subgroups=signature_subgroups,exome=exome,vcf_opts=vcf_opts)
10+
def cosmic_fit(samples, output, signatures=None, signature_database=None,nnls_add_penalty=0.05, nnls_remove_penalty=0.01, initial_remove_penalty=0.05,genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,exclude_signature_subgroups=None,exome=False,input_type='matrix',context_type="96"):
11+
decomp.spa_analyze(samples=samples, output=output, signatures=signatures, signature_database=signature_database,nnls_add_penalty=nnls_add_penalty, nnls_remove_penalty=nnls_remove_penalty, initial_remove_penalty=initial_remove_penalty,genome_build=genome_build, cosmic_version=cosmic_version, make_plots=make_plots, collapse_to_SBS96=collapse_to_SBS96,connected_sigs=connected_sigs, verbose=verbose,decompose_fit_option= False,denovo_refit_option=False,cosmic_fit_option=True,devopts=devopts,exclude_signature_subgroups=exclude_signature_subgroups,exome=exome,input_type=input_type,context_type=context_type)

SigProfilerAssignment/decomposition.py

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
import os,sys
2222

2323

24-
def spa_analyze( samples, output, signatures=None, signature_database=None,decompose_fit_option= True,denovo_refit_option=True,cosmic_fit_option=True, nnls_add_penalty=0.05,
24+
def spa_analyze( samples, output,input_type='matrix',context_type="96", signatures=None, signature_database=None,decompose_fit_option= True,denovo_refit_option=True,cosmic_fit_option=True, nnls_add_penalty=0.05,
2525
nnls_remove_penalty=0.01, initial_remove_penalty=0.05, de_novo_fit_penalty=0.02,
2626
genome_build="GRCh37", cosmic_version=3.3, make_plots=True, collapse_to_SBS96=True,connected_sigs=True, verbose=False,devopts=None,new_signature_thresh_hold=0.8,
27-
signature_subgroups=None, exome=False,vcf_opts=None):
27+
exclude_signature_subgroups=None, exome=False):
2828

2929

3030
"""
@@ -76,39 +76,21 @@ def spa_analyze( samples, output, signatures=None, signature_database=None,dec
7676
if (denovo_refit_option == True or decompose_fit_option ==True) and signatures is None:
7777
raise Exception("If denovo_refit or decompose_fit is True, signatures cannot be empty")
7878

79-
if vcf_opts is not None:
80-
if 'project_name' in vcf_opts:
81-
project_name = vcf_opts['project_name']
82-
else:
83-
project_name = 'Input_vcffiles'
84-
85-
if 'vcf_context' in vcf_opts:
86-
vcf_context = vcf_opts['vcf_context']
87-
else:
88-
vcf_context ='96'
89-
79+
if input_type=="vcf":
80+
project_name = 'Input_vcffiles'
81+
vcf_context = context_type
9082
data = datadump.SigProfilerMatrixGeneratorFunc(project_name, genome_build, samples, exome=exome, bed_file=None, chrom_based=False, plot=False, gs=False)
9183
genomes = data[vcf_context]
92-
else:
84+
85+
elif input_type=="matrix":
9386
try:
9487
genomes = pd.read_csv(samples, sep = "\t", index_col = 0)
9588
except:
9689
genomes = samples
9790
genomes = pd.DataFrame(genomes)
98-
99-
# if signatures is None:
100-
# processAvg = sub.getProcessAvg(genomes, genome_build=genome_build, cosmic_version=cosmic_version)[0]
101-
# processAvg = processAvg.rename_axis('MutationType')
102-
# #processAvg = processAvg.set_index('Type').rename_axis('MutationType')
103-
# else:
104-
# try:
105-
# processAvg = pd.read_csv(signatures,sep='\t', index_col=0)
106-
# except:
107-
# try:
108-
# processAvg=signatures
109-
# except:
110-
# sys.exit("Error in formatting of input signatures, Pass a text file of signatures in the format of COSMIC sig database")
111-
91+
else:
92+
sys.exit("Invalid input_type specified")
93+
11294
default_subgroups_dict= {'remove_MMR_deficiency_signatures' :False,
11395
'remove_POL_deficiency_signatures' :False,
11496
'remove_HR_deficiency_signatures' :False,
@@ -141,19 +123,19 @@ def spa_analyze( samples, output, signatures=None, signature_database=None,dec
141123

142124

143125
signature_subgroups_dict = default_subgroups_dict.copy()
144-
if signature_subgroups == None:
126+
if exclude_signature_subgroups == None:
145127
pass
146128
else:
147-
if type(signature_subgroups) is not list:
148-
sys.exit("signature_subgroups input should be a list of appropriate flags, please refer to documentation.")
129+
if type(exclude_signature_subgroups) is not list:
130+
sys.exit("exclude_signature_subgroups input should be a list of appropriate flags, please refer to documentation.")
149131
else:
150132

151133
for key in default_subgroups_dict:
152-
if key in signature_subgroups:
134+
if key in exclude_signature_subgroups:
153135
signature_subgroups_dict[key]=True
154136

155137
sig_exclusion_list=[]
156-
if signature_subgroups == None:
138+
if exclude_signature_subgroups == None:
157139
sig_exclusion_list=[]
158140
else:
159141
for key in signature_subgroups_dict:

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if os.path.exists("dist"):
77
shutil.rmtree("dist")
88

9-
VERSION = '0.0.11'
9+
VERSION = '0.0.12'
1010

1111
with open('README.md') as f:
1212
long_description = f.read()
@@ -17,7 +17,7 @@ def write_version_py(filename='SigProfilerAssignment/version.py'):
1717
# THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY
1818
short_version = '%(version)s'
1919
version = '%(version)s'
20-
Update = 'Integration of VCF files as input'
20+
Update = 'Including input_type and context_type'
2121
2222
"""
2323
fh = open(filename, 'w')
@@ -27,8 +27,8 @@ def write_version_py(filename='SigProfilerAssignment/version.py'):
2727
'scipy>=1.6.3',
2828
'numpy>=1.21.2',
2929
'pandas>=1.2.4',
30-
'SigProfilerExtractor>=1.1.9',
31-
'SigProfilerMatrixGenerator>=1.2.9',
30+
'SigProfilerExtractor>=1.1.11',
31+
'SigProfilerMatrixGenerator>=1.2.12',
3232
'sigProfilerPlotting>=1.2.2',
3333
'pillow',
3434
'statsmodels>=0.9.0',

test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def main():
2020

2121
# vcf_opts={'project_name': 'test_sample','vcf_context': '288' } # Uncomment this If vcf files are provided as input.
2222

23-
# signature_subgroups = ['remove_MMR_deficiency_signatures',
23+
# exclude_signature_subgroups = ['remove_MMR_deficiency_signatures',
2424
# 'remove_POL_deficiency_signatures',
2525
# 'remove_HR_deficiency_signatures' ,
2626
# 'remove_BER_deficiency_signatures',
@@ -33,7 +33,7 @@ def main():
3333
# 'remove_Artifact_signatures',
3434
# 'remove_Lymphoid_signatures']
3535

36-
signature_subgroups = None
36+
exclude_signature_subgroups = None
3737

3838
Analyze.decompose_fit( samples,
3939
output,
@@ -42,7 +42,7 @@ def main():
4242
genome_build="GRCh37",
4343
verbose=False,
4444
new_signature_thresh_hold=0.8,
45-
signature_subgroups=signature_subgroups,
45+
exclude_signature_subgroups=exclude_signature_subgroups,
4646
# vcf_opts=vcf_opts
4747
)
4848

@@ -62,7 +62,7 @@ def main():
6262
genome_build="GRCh37",
6363
verbose=False,
6464
collapse_to_SBS96=True,
65-
signature_subgroups=signature_subgroups,
65+
exclude_signature_subgroups=exclude_signature_subgroups,
6666
# vcf_opts=vcf_opts
6767
)
6868

0 commit comments

Comments
 (0)