Skip to content

Commit b1c1015

Browse files
author
Yuhe Cheng
committed
fix bug
1 parent 45ee532 commit b1c1015

File tree

8 files changed

+39
-2766
lines changed

8 files changed

+39
-2766
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The complete DupCaller pipeline also requires the following tools for data prepr
1616
The tool uses pip for installing scripts and prerequisites. To install DupCaller, simply clone this repository and install via pip:
1717

1818
```bash
19-
git clone https://github.com/AlexandrovLab/DupCaller.git
19+
git clone https://github.com/YuheCheng62/DupCaller.git
2020
cd DupCaller
2121
pip install .
2222
```

hs_err_pid74663.log

Lines changed: 0 additions & 2762 deletions
This file was deleted.

src/DupCaller.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,20 @@
418418
help="contigs to consider for trinucleotide calculation",
419419
default=["chr" + str(_) for _ in range(1, 23, 1)] + ["chrX"],
420420
)
421+
estimate_parser.add_argument(
422+
"-c",
423+
"--clonal",
424+
type=bool,
425+
help="If True, mutations detected in more than one molecule will be considered as clonal mutations",
426+
default=False,
427+
)
428+
estimate_parser.add_argument(
429+
"-d",
430+
"--dilute",
431+
type=bool,
432+
help="Set to true when sample and matched normal are from the same starting DNA material",
433+
default=False,
434+
)
421435

422436
summarize_parser = subparsers.add_parser(
423437
"summarize", help="Summarize results from multiple samples"

src/subcommands/Estimate.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas as pd
77
from matplotlib import pyplot as plt
88
from pysam import VariantFile as VCF
9-
from scipy.stats import chi2
9+
from scipy.stats import chi2, barnard_exact
1010

1111

1212
def calculate_ref_trinuc(args):
@@ -168,10 +168,24 @@ def do_estimate(args):
168168
revcomp = {"A": "T", "T": "A", "C": "G", "G": "C"}
169169
for nn, duplex_no in enumerate(trinuc_by_rf.columns):
170170
duplex_no_dict[duplex_no] = nn
171+
172+
if args.dilute:
173+
vcf_out = VCF(args.prefix + "/" + sample + "_flt.vcf", "w", header=vcf.header)
171174
for rec in vcf.fetch():
172175
F1R2 = rec.info["F1R2"]
173176
F2R1 = rec.info["F2R1"]
174-
duplex_no = str(min(F1R2, F2R1)) + "+" + str(max(F1R2, F2R1))
177+
TAC = rec.samples["TUMOR"]["AC"]
178+
if TAC > 1 and args.dilute:
179+
TDP = rec.samples["TUMOR"]["DP"]
180+
NAC = rec.samples["NORMAL"]["AC"]
181+
NDP = rec.samples["NORMAL"]["DP"]
182+
barnard_p = barnard_exact([[TAC, TDP - TAC], [NAC, NDP - NAC]]).pvalue
183+
if barnard_p <= 0.05:
184+
continue
185+
if args.dilute:
186+
vcf_out.write(rec)
187+
# duplex_no = str(min(F1R2, F2R1)) + "+" + str(max(F1R2, F2R1))
188+
duplex_no = str(F1R2) + "+" + str(F2R1)
175189
ref = rec.ref
176190
if ref == "C" or ref == "T":
177191
trinuc = rec.info["TN"]
316 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
14 Bytes
Binary file not shown.

src/subcommands/funcs/call.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ def callBam(params, processNo):
367367
dmgmat = dmgmat / dmgmat.sum(axis=1, keepdims=True)
368368
dmgmat_min_error = dmgmat.min(axis=1, keepdims=True)
369369
dmgmat = np.concatenate([dmgmat, dmgmat_min_error], axis=1)
370+
# dmgmat += 1e-9
370371

371372
params["dmgmat_top"] = dmgmat
372373
params["trinuc2num_dict"] = trinuc2num
@@ -380,6 +381,7 @@ def callBam(params, processNo):
380381
]
381382
dmgmat_rev_min_error = dmgmat_rev.min(axis=1, keepdims=True)
382383
dmgmat_rev = np.concatenate([dmgmat_rev, dmgmat_rev_min_error], axis=1)
384+
# dmgmat_rev += 1e-9
383385
params["dmgmat_rev_top"] = dmgmat_rev
384386

385387
dmgmat_b = np.vstack((dmgmat[32:64, [1, 0, 3, 2]], dmgmat[:32, [1, 0, 3, 2]]))
@@ -390,6 +392,8 @@ def callBam(params, processNo):
390392
dmgmat_rev_b_min_error = dmgmat_rev_b.min(axis=1, keepdims=True)
391393
dmgmat_b = np.concatenate([dmgmat_b, dmgmat_b_min_error], axis=1)
392394
dmgmat_rev_b = np.concatenate([dmgmat_rev_b, dmgmat_rev_b_min_error], axis=1)
395+
# dmgmat_b += 1e-9
396+
# dmgmat_rev_b += 1e-9
393397
params["dmgmat_bot"] = dmgmat_b
394398
params["dmgmat_rev_bot"] = dmgmat_rev_b
395399

@@ -402,14 +406,15 @@ def callBam(params, processNo):
402406
current_row = dmgmat_indel[nn, :]
403407
current_row[current_row == 0] = dmgmat_indel[nn - 1, :][current_row == 0]
404408
dmgmat_indel[nn, :] = current_row
409+
# dmgmat_indel += 1e-9
405410
params["dmgmat_indel"] = dmgmat_indel
406411
else:
407412
params["dmgmat_indel"] = np.ones([40, 11]) * params["dmgerri"]
408413
dmgmat_indel = params["dmgmat_indel"]
409414

410415
dmgmat_indel_rev = np.fliplr(dmgmat_indel)
416+
# dmgmat_indel_rev += 1e-9
411417
params["dmgmat_indel_rev"] = dmgmat_indel_rev
412-
413418
params["dmgmat_indel_mean"] = np.mean(dmgmat_indel, axis=1)
414419
params["dmgmat_indel_rev_mean"] = np.mean(dmgmat_indel_rev, axis=1)
415420

@@ -936,6 +941,7 @@ def callBam(params, processNo):
936941
# else:
937942
### Calculate genotype probability
938943
if not any(indel_bool) or isLearn:
944+
# if 1:
939945
if isLearn:
940946
(
941947
mismatch_now,
@@ -1429,6 +1435,7 @@ def callBam(params, processNo):
14291435
indel_dict[indel_str] = 1
14301436
# else:
14311437
### Calculate genotype probability
1438+
# if 1:
14321439
if not any(indel_bool) or isLearn: # or len(indels_pass) == 0:
14331440
if isLearn:
14341441
(

0 commit comments

Comments
 (0)