Skip to content

Commit 23e9b3d

Browse files
authored
Merge pull request #117 from AlexandrovLab/U57
U57
2 parents 486a8d8 + 14acaa0 commit 23e9b3d

File tree

9 files changed

+822
-67
lines changed

9 files changed

+822
-67
lines changed

SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
MutationType CN1 CN2 CN9 CN20 CNV48B CNV48D
1+
MutationType CN1 CN2 CN9 CN20 CN3 CN4
22
0:homdel:0-100kb 0.00179505 0.00223767 0.00818184 0.00004078 0.00000295 0.00000075
33
0:homdel:100kb-1Mb 0.00380710 0.00719579 0.02099919 0.00017845 0.00039561 0.02703219
44
0:homdel:>1Mb 0.00243675 0.00383711 0.00900922 0.00011722 0.00122668 0.02703635
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
MutationType SV1 SV2 SV3 SV4 SV5 SV6 SV7 SV9
2+
clustered_del_1-10Kb 0.00001830 0.00001750 0.00000831 0.00558463 0.00162643 0.00484161 0.00036457 0.01689226
3+
clustered_del_10-100Kb 0.00000013 0.00000500 0.00000000 0.00883802 0.00395326 0.00397105 0.00493403 0.02027149
4+
clustered_del_100Kb-1Mb 0.00004260 0.00000257 0.00000253 0.01342056 0.00264242 0.01567818 0.00175481 0.04559844
5+
clustered_del_1Mb-10Mb 0.00000003 0.00000557 0.00000000 0.01941683 0.00079517 0.06315566 0.00000135 0.08553749
6+
clustered_del_>10Mb 0.00000003 0.00000224 0.00000009 0.01212996 0.00000205 0.13914616 0.00000121 0.00004090
7+
clustered_tds_1-10Kb 0.00000029 0.00002000 0.00233538 0.00119602 0.00044748 0.00173254 0.00023931 0.00407692
8+
clustered_tds_10-100Kb 0.00080029 0.00003770 0.00137651 0.00406629 0.00092090 0.00411956 0.00031235 0.00799563
9+
clustered_tds_100Kb-1Mb 0.00217807 0.00001180 0.00018608 0.00927806 0.00180766 0.01359699 0.00000078 0.04878886
10+
clustered_tds_1Mb-10Mb 0.00006350 0.00000161 0.00000004 0.01669456 0.00008410 0.06927579 0.00000076 0.07607224
11+
clustered_tds_>10Mb 0.00000003 0.00000314 0.00000010 0.00950079 0.00003530 0.14468700 0.00000187 0.00019873
12+
clustered_inv_1-10Kb 0.00003510 0.00009050 0.00000000 0.01188233 0.01068676 0.00333129 0.00000116 0.00901519
13+
clustered_inv_10-100Kb 0.00000113 0.00002000 0.00000415 0.01283746 0.00421801 0.00778075 0.00000166 0.03440818
14+
clustered_inv_100Kb-1Mb 0.00062736 0.00000829 0.00000000 0.01893295 0.00104651 0.02657075 0.00000052 0.08534106
15+
clustered_inv_1Mb-10Mb 0.00000637 0.00000052 0.00000003 0.03875053 0.00018613 0.13269196 0.00000021 0.15018331
16+
clustered_inv_>10Mb 0.00000001 0.00000229 0.00000003 0.02250855 0.00000130 0.26682614 0.00000158 0.00004010
17+
clustered_trans 0.00005860 0.00000182 0.00000829 0.74672823 0.01500836 0.00000039 0.00000036 0.00000269
18+
non-clustered_del_1-10Kb 0.00016322 0.01077137 0.03108973 0.00000143 0.27051044 0.00049070 0.11080824 0.03271109
19+
non-clustered_del_10-100Kb 0.01062551 0.00189318 0.00026854 0.00000001 0.04236249 0.00047467 0.40908574 0.03762502
20+
non-clustered_del_100Kb-1Mb 0.01031305 0.00009900 0.00051531 0.00110315 0.02494482 0.00056581 0.34862452 0.05284083
21+
non-clustered_del_1Mb-10Mb 0.00810848 0.04772402 0.00323839 0.00114858 0.04036508 0.00908681 0.02776373 0.04842356
22+
non-clustered_del_>10Mb 0.00460793 0.05938062 0.00861972 0.00280099 0.02737192 0.01709695 0.01575099 0.01781587
23+
non-clustered_tds_1-10Kb 0.00000013 0.00004480 0.42763680 0.00000015 0.08730244 0.00000010 0.01078117 0.01195956
24+
non-clustered_tds_10-100Kb 0.04599380 0.00002460 0.32675443 0.00000001 0.12292716 0.00000054 0.00652355 0.01970948
25+
non-clustered_tds_100Kb-1Mb 0.45491853 0.01413625 0.01600693 0.00008590 0.06600995 0.00067638 0.00545841 0.01400244
26+
non-clustered_tds_1Mb-10Mb 0.35062795 0.04029646 0.00096917 0.00013931 0.01815304 0.00881413 0.00239992 0.02583554
27+
non-clustered_tds_>10Mb 0.00580397 0.03967873 0.00649925 0.00298543 0.01284238 0.01668428 0.00007000 0.00744539
28+
non-clustered_inv_1-10Kb 0.00440008 0.00000085 0.00411229 0.00018744 0.04638068 0.00022783 0.00000043 0.01662502
29+
non-clustered_inv_10-100Kb 0.01048762 0.00459802 0.00275734 0.00116401 0.03219888 0.00171438 0.03025930 0.01859228
30+
non-clustered_inv_100Kb-1Mb 0.01700822 0.05558915 0.00038638 0.00198348 0.03887257 0.00243843 0.01706313 0.03888396
31+
non-clustered_inv_1Mb-10Mb 0.01075404 0.09313637 0.00482722 0.00198335 0.03699181 0.01686964 0.00553730 0.04676524
32+
non-clustered_inv_>10Mb 0.01304926 0.09976361 0.01624505 0.01157483 0.04637754 0.02742841 0.00186541 0.02399899
33+
non-clustered_trans 0.04930641 0.53263233 0.14615191 0.02307616 0.04292692 0.00002510 0.00039160 0.00230227
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
MutationType SV32A SV32B SV32C
2+
clustered_del_1-10Kb 0.00662723 0.00177044 0.00000007
3+
clustered_del_10-100Kb 0.00972409 0.00167625 0.00000003
4+
clustered_del_100Kb-1Mb 0.02672959 0.00069852 0.00000004
5+
clustered_del_1Mb-10Mb 0.05108407 0.00033823 0.00000009
6+
clustered_del_>10Mb 0.03969927 0.00000047 0.00000040
7+
clustered_tds_1-10Kb 0.00166860 0.00057069 0.00011002
8+
clustered_tds_10-100Kb 0.00695235 0.00099348 0.00000008
9+
clustered_tds_100Kb-1Mb 0.02396912 0.00136236 0.00000000
10+
clustered_tds_1Mb-10Mb 0.04294237 0.00000963 0.00000009
11+
clustered_tds_>10Mb 0.04211685 0.00000072 0.00000016
12+
clustered_inv_1-10Kb 0.01459632 0.00452079 0.00000000
13+
clustered_inv_10-100Kb 0.01751233 0.00170391 0.00000000
14+
clustered_inv_100Kb-1Mb 0.03681359 0.00037369 0.00000012
15+
clustered_inv_1Mb-10Mb 0.10623513 0.00000047 0.00000013
16+
clustered_inv_>10Mb 0.07525245 0.00108337 0.00000002
17+
clustered_trans 0.23293450 0.00000023 0.00000022
18+
non-clustered_del_1-10Kb 0.00016344 0.11881046 0.04395760
19+
non-clustered_del_10-100Kb 0.00447025 0.09378964 0.00000052
20+
non-clustered_del_100Kb-1Mb 0.01586304 0.03619608 0.00000016
21+
non-clustered_del_1Mb-10Mb 0.02913311 0.02808863 0.00010709
22+
non-clustered_del_>10Mb 0.01433401 0.03214281 0.00690197
23+
non-clustered_tds_1-10Kb 0.00022787 0.00000001 0.34090417
24+
non-clustered_tds_10-100Kb 0.00000028 0.01990271 0.34120828
25+
non-clustered_tds_100Kb-1Mb 0.00938708 0.10950006 0.06107216
26+
non-clustered_tds_1Mb-10Mb 0.02086042 0.02455653 0.02874254
27+
non-clustered_tds_>10Mb 0.01097690 0.02568377 0.01287658
28+
non-clustered_inv_1-10Kb 0.00947122 0.06278640 0.00888667
29+
non-clustered_inv_10-100Kb 0.00786742 0.03894469 0.00344490
30+
non-clustered_inv_100Kb-1Mb 0.01588333 0.02367522 0.00125801
31+
non-clustered_inv_1Mb-10Mb 0.03501238 0.04148272 0.00366291
32+
non-clustered_inv_>10Mb 0.03495364 0.06231916 0.01299224
33+
non-clustered_trans 0.05653771 0.26701783 0.13387279

SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py

Lines changed: 124 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@
2626
import SigProfilerAssignment
2727
import SigProfilerAssignment.DecompositionPlots
2828
from SigProfilerAssignment.DecompositionPlots import SigProfilerPlottingMatrix as mPlt
29-
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS96 as spd_96
30-
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS288 as spd_288
3129
from SigProfilerAssignment.DecompositionPlots import (
30+
PlotDecomposition_SBS96 as spd_96,
31+
PlotDecomposition_SBS288 as spd_288,
3232
PlotDecomposition_SBS1536 as spd_1536,
33+
PlotDecomposition_DBS78 as spd_78,
34+
PlotDecomposition_ID83 as spd_83,
35+
PlotDecomposition_CNV48 as cnv_48,
36+
PlotDecomposition_SV32 as sv_32,
3337
)
34-
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_DBS78 as spd_78
35-
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_ID83 as spd_83
36-
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_CNV48 as cnv_48
3738
from SigProfilerAssignment import decompose_subroutines as sub
3839

3940
# imports for working with plots in memory
@@ -43,11 +44,13 @@
4344
import json
4445
import base64
4546

47+
4648
# Global Variables
4749
SBS_CONTEXTS = ["6", "24", "96", "288", "384", "1536", "6144"]
4850
DBS_CONTEXTS = ["78", "186", "1248", "2976"]
4951
ID_CONTEXTS = ["28", "83", "415"]
5052
CNV_CONTEXTS = ["48"]
53+
SV_CONTEXTS = ["32"]
5154
MTYPE_OPTIONS = [
5255
"6",
5356
"24",
@@ -62,14 +65,14 @@
6265
"186",
6366
"1248",
6467
"2976",
68+
"32",
6569
]
6670
DECOMPOSITION_PATH = SigProfilerAssignment.DecompositionPlots.__path__[0]
6771
REFSIG_PATH = os.path.join(
6872
SigProfilerAssignment.__path__[0], "data/Reference_Signatures"
6973
)
7074
TEMPLATE_PATH = os.path.join(DECOMPOSITION_PATH, "CosmicTemplates")
7175

72-
7376
# Remove templates so that they can be rebuilt
7477
def remove_cosmic_templates():
7578
if not os.path.exists(TEMPLATE_PATH):
@@ -84,10 +87,11 @@ def remove_cosmic_templates():
8487
def install_cosmic_plots(
8588
context_type="96", genome_build="GRCh37", cosmic_version="3.4", exome=False
8689
):
90+
8791
if not os.path.exists(TEMPLATE_PATH):
8892
os.mkdir(TEMPLATE_PATH)
8993

90-
# determine if context is from SBS, ID, DBS, or CNV
94+
# determine if context is from SBS, ID, DBS, CNV or SV
9195
context_type_str = ""
9296
if context_type in SBS_CONTEXTS:
9397
context_type_str = "SBS"
@@ -102,6 +106,9 @@ def install_cosmic_plots(
102106
elif context_type in CNV_CONTEXTS:
103107
context_type_str = "CNV"
104108
cosmic_mtype = "48"
109+
elif context_type in SV_CONTEXTS:
110+
context_type_str = "SV"
111+
cosmic_mtype = "32"
105112
else:
106113
raise ValueError("ERROR: context", context_type, "not in context lists.")
107114

@@ -138,6 +145,20 @@ def install_cosmic_plots(
138145
genome_build = "GRCh37"
139146
exome_str = ""
140147

148+
# CNV signatures exome=False, genome_build=GRCh37
149+
if context_type in CNV_CONTEXTS:
150+
cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.txt"
151+
json_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.json"
152+
genome_build = "GRCh37"
153+
exome_str = ""
154+
155+
# SV signatures exome=False, genome_build=GRCh37
156+
if context_type in SV_CONTEXTS:
157+
cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.txt"
158+
json_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.json"
159+
genome_build = "GRCh38"
160+
exome_str = ""
161+
141162
# Load cosmic plots if they exist
142163
filename = os.path.join(TEMPLATE_PATH, json_file_name)
143164
if os.path.exists(filename):
@@ -165,7 +186,6 @@ def install_cosmic_plots(
165186
+ exome_str,
166187
"now...",
167188
)
168-
169189
# Create the respective plots
170190
if context_type_str == "SBS":
171191
cosmic_buff_plots = sigPlt.plotSBS(
@@ -195,6 +215,25 @@ def install_cosmic_plots(
195215
percentage=True,
196216
savefig_format="PIL_Image",
197217
)
218+
elif context_type_str == "CNV":
219+
cosmic_buff_plots = sigPlt.plotCNV(
220+
cosmic_file_path,
221+
"buffer",
222+
"buffer",
223+
percentage=True,
224+
aggregate=False,
225+
read_from_file=False,
226+
savefig_format="PIL_Image",
227+
)
228+
elif context_type_str == "SV":
229+
cosmic_buff_plots = sigPlt.plotSV(
230+
cosmic_file_path,
231+
"buffer",
232+
"buffer",
233+
percentage=True,
234+
aggregate=False,
235+
savefig_format="PIL_Image",
236+
)
198237

199238
# Process the plots to be stored in JSON file
200239
cosmic_img_dict = {}
@@ -413,8 +452,33 @@ def genCNV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype):
413452
return denovo_plots, basis_plots
414453

415454

455+
def genSV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype):
456+
denovo_plots = dict()
457+
basis_plots = dict()
458+
denovo_plots = sigPlt.plotSV(
459+
denovo_mtx,
460+
output_path,
461+
project,
462+
percentage=True,
463+
aggregate=False,
464+
savefig_format="PIL_Image",
465+
)
466+
467+
if basis_mtx is not None:
468+
basis_plots = sigPlt.plotSV(
469+
basis_mtx,
470+
output_path,
471+
project,
472+
percentage=True,
473+
aggregate=False,
474+
savefig_format="PIL_Image",
475+
)
476+
return denovo_plots, basis_plots
477+
478+
416479
# signames, weights
417480
def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp):
481+
418482
# Make output directory
419483
if not os.path.exists(output_path):
420484
os.makedirs(output_path)
@@ -442,6 +506,11 @@ def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp)
442506
denovo_mtx, basis_mtx, output_path, project, mtype
443507
)
444508
return denovo_plots, basis_plots
509+
elif mtype in SV_CONTEXTS:
510+
denovo_plots, basis_plots = genSV_pngs(
511+
denovo_mtx, basis_mtx, output_path, project, mtype
512+
)
513+
return denovo_plots, basis_plots
445514

446515
else:
447516
print("ERROR: mtype is " + mtype + " and is not yet supported.")
@@ -510,6 +579,15 @@ def gen_reconstructed_png_percent(
510579
read_from_file=False,
511580
savefig_format="PIL_Image",
512581
)
582+
elif mtype in SV_CONTEXTS:
583+
reconstruction_plot = sigPlt.plotSV(
584+
reconstruction_mtx,
585+
output_path,
586+
"reconstruction_" + project,
587+
percentage=True,
588+
aggregate=False,
589+
savefig_format="PIL_Image",
590+
)
513591
else:
514592
print("ERROR: mtype is " + mtype + " and is not yet supported.")
515593

@@ -591,6 +669,16 @@ def gen_reconstructed_png_numerical(
591669
read_from_file=False,
592670
savefig_format="PIL_Image",
593671
)
672+
elif mtype in SV_CONTEXTS:
673+
reconstruction_plot = sigPlt.plotSV(
674+
reconstruction_mtx,
675+
output_path,
676+
"reconstruction_" + project,
677+
percentage=True,
678+
aggregate=False,
679+
read_from_file=False,
680+
savefig_format="PIL_Image",
681+
)
594682
else:
595683
print("ERROR: mtype is " + mtype + " and is not yet supported.")
596684

@@ -612,6 +700,7 @@ def gen_decomposition(
612700
cosmic_version=None,
613701
custom_text=None,
614702
):
703+
615704
"""
616705
Generate the correct plot based on mtype.
617706
@@ -744,6 +833,22 @@ def gen_decomposition(
744833
custom_text,
745834
)
746835
return byte_plot
836+
elif mtype == "32":
837+
byte_plot = sv_32.gen_decomposition(
838+
denovo_name,
839+
basis_names,
840+
weights,
841+
output_path,
842+
project,
843+
denovo_plots_dict,
844+
basis_plots_dict,
845+
reconstruction_plot_dict,
846+
reconstruction,
847+
statistics,
848+
cosmic_version,
849+
custom_text,
850+
)
851+
return byte_plot
747852

748853

749854
def run_PlotDecomposition(
@@ -773,8 +878,7 @@ def run_PlotDecomposition(
773878
774879
basis_mtx: Pandas Dataframe. This format represents the catalog of mutations seperated by tab.
775880
776-
basis_names: List of Strings. The names of the samples in denovo_mtx that
777-
the denovo_name sample from denovo_mtx is decomposed into.
881+
basis_names: List of Strings. The names of the samples in denovo_mtx that the denovo_name sample from denovo_mtx is decomposed into.
778882
ie. basis_names=["SBS1", "SBS5", "SBS15", "SBS20"]
779883
780884
weights: List of Strings. The percentile weight corresponding to each basis
@@ -796,17 +900,9 @@ def run_PlotDecomposition(
796900
None.
797901
"""
798902
# Create the denovo plots and load basis plots
799-
if mtype != "48":
800-
denovo_plots_dict = gen_sub_plots(
801-
denovo_mtx, None, output_path, project, mtype, ss_decomp=False
802-
)
803-
denovo_plots_dict = denovo_plots_dict[0]
804-
else:
805-
# cnv basis plots need to be generated and not loaded
806-
denovo_plots_dict, basis_plots_dict = gen_sub_plots(
807-
denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False
808-
)
809-
# Create the matrix and plot for the reconstructed matrix
903+
denovo_plots_dict, basis_plots_dict = gen_sub_plots(
904+
denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False
905+
)
810906
reconstructed_mtx, reconstruction_plot_dict = gen_reconstructed_png_percent(
811907
denovo_name, basis_mtx, basis_names, weights, output_path, project, mtype
812908
)
@@ -816,14 +912,13 @@ def run_PlotDecomposition(
816912
# Convert dictionary of bytes to dictionary of images
817913
denovo_plots_dict = convert_to_imgReaderDict(denovo_plots_dict)
818914
# Load in the COSMIC plots
819-
if mtype != "48":
820-
basis_plots_dict = install_cosmic_plots(
821-
context_type=mtype,
822-
genome_build=genome_build,
823-
cosmic_version=cosmic_version,
824-
exome=exome,
825-
)
826-
basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names}
915+
basis_plots_dict = install_cosmic_plots(
916+
context_type=mtype,
917+
genome_build=genome_build,
918+
cosmic_version=cosmic_version,
919+
exome=exome,
920+
)
921+
basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names}
827922
basis_plots_dict = convert_to_imgReaderDict(basis_plots_dict)
828923
# Generate the reconstruction plot
829924
reconstruction_plot_dict = convert_to_imgReaderDict(reconstruction_plot_dict)

0 commit comments

Comments
 (0)