Skip to content

Commit fcafa57

Browse files
committed
Draft VWF to BED7 conversion
1 parent 5f2cfc3 commit fcafa57

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

src/primaschema/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,17 @@ def six_to_seven(bed_path: Path, fasta_path: Path):
137137
print(bed_str)
138138

139139

140+
def vwf_to_bed(vwf_path: Path, chrom: str = "chrom"):
141+
"""
142+
Convert a Viridian VWF scheme TSV to a 7 column primer.bed
143+
144+
:arg vwf_path: path of scheme.bed file
145+
:arg chrom: name of reference chromosome
146+
"""
147+
bed_str = lib.convert_vwf_to_primer_bed(vwf_path=vwf_path, chrom=chrom)
148+
print(bed_str)
149+
150+
140151
def diff(bed1_path: Path, bed2_path: Path, only_positions: bool = False):
141152
"""
142153
Show the symmetric difference of records in two bed files
@@ -215,6 +226,7 @@ def main():
215226
"diff": diff,
216227
"6to7": six_to_seven,
217228
"7to6": seven_to_six,
229+
"vwftobed": vwf_to_bed,
218230
"plot": plot,
219231
"show-intervals": amplicon_intervals,
220232
"show-discordant-primers": discordant_primers,

src/primaschema/lib.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,38 @@ def convert_scheme_bed_to_primer_bed(bed_path: Path, fasta_path: Path) -> str:
170170
return df.to_csv(sep="\t", header=False, index=False)
171171

172172

173+
def convert_vwf_to_primer_bed(vwf_path: Path, chrom: str = "chrom") -> str:
174+
vwf_df = pd.read_csv(vwf_path, sep="\t")
175+
bed_records = []
176+
pool_counter = {}
177+
178+
for r in vwf_df.to_records("dict"):
179+
amplicon_name = r["Amplicon_name"]
180+
primer_name = r["Primer_name"]
181+
orientation = r["Left_or_right"]
182+
amplicon_number = int(amplicon_name.split("_")[-1])
183+
pool_name = 1 if amplicon_number % 2 != 0 else 2
184+
if amplicon_name not in pool_counter:
185+
pool_counter[amplicon_name] = 1
186+
else:
187+
pool_counter[amplicon_name] += 1
188+
strand = "+" if orientation == "left" else "-"
189+
sequence = r["Sequence"]
190+
start_pos = r["Position"]
191+
bed_record = {}
192+
bed_record["chrom"] = chrom
193+
bed_record["chromStart"] = start_pos
194+
bed_record["chromEnd"] = start_pos + len(sequence)
195+
bed_record["name"] = primer_name
196+
bed_record["poolName"] = str(pool_name)
197+
bed_record["strand"] = strand
198+
bed_record["sequence"] = sequence
199+
bed_records.append(bed_record)
200+
201+
bed_df = pd.DataFrame(bed_records)
202+
return bed_df.to_csv(sep="\t", header=False, index=False)
203+
204+
173205
def hash_bed(bed_path: Path) -> str:
174206
bed_type = infer_bed_type(bed_path)
175207
if bed_type == "primer":

0 commit comments

Comments
 (0)