33from enum import Enum
44import re
55import statistics as stat
6+ import sys
67
78from Bio import Phylo
89from Bio import SeqIO
@@ -28,7 +29,11 @@ def collapse_low_support_bipartitions(newtree, support: float):
2829 return newtree
2930
3031
31- def determine_if_dups_are_sister (subtree_tips : list , newtree ):
32+ def determine_if_dups_are_sister (
33+ subtree_tips : list ,
34+ newtree ,
35+ delimiter : str ,
36+ ):
3237 """
3338 determine if dups are sister to one another
3439 """
@@ -41,7 +46,7 @@ def determine_if_dups_are_sister(subtree_tips: list, newtree):
4146 dup_tree = copy .deepcopy (newtree )
4247
4348 dup_tree = dup_tree .common_ancestor (subtree_tips )
44- _ , all_tips = get_all_tips_and_taxa_names (dup_tree )
49+ _ , all_tips = get_all_tips_and_taxa_names (dup_tree , delimiter )
4550 if set (all_tips ) != set (subtree_tips ):
4651 are_sisters = False
4752
@@ -55,7 +60,7 @@ def determine_if_dups_are_sister(subtree_tips: list, newtree):
5560 return are_sisters
5661
5762
58- def get_all_tips_and_taxa_names (tree ):
63+ def get_all_tips_and_taxa_names (tree , delimiter : str ):
5964 """
6065 get all taxa and tip names in a phylogeny
6166
@@ -66,7 +71,11 @@ def get_all_tips_and_taxa_names(tree):
6671
6772 # loop through the tree and collect terminal names
6873 for term in tree .get_terminals ():
69- taxa_name = term .name [: term .name .index ("|" )]
74+ try :
75+ taxa_name = term .name [: term .name .index (delimiter )]
76+ except ValueError :
77+ print ("\n ERROR: Delimiter does not exist in FASTA headers.\n Specify the delimiter using the -d argument." )
78+ sys .exit ()
7079 if taxa_name not in taxa :
7180 taxa .append (taxa_name )
7281 all_tips .append (term .name )
@@ -86,15 +95,15 @@ def check_if_single_copy(taxa: list, all_tips: list):
8695 return False
8796
8897
89- def get_tips_and_taxa_names_and_taxa_counts_from_subtrees (inter ):
98+ def get_tips_and_taxa_names_and_taxa_counts_from_subtrees (inter , delimiter : str ):
9099 """
91100 get taxa, counts of each taxa, and all terminal names
92101 """
93102 taxa_from_terms = []
94103 terms = []
95104 # get taxa and terminal names from subtree
96105 for term in inter .get_terminals ():
97- taxa_from_terms .append (term .name .split ("|" , 1 )[0 ])
106+ taxa_from_terms .append (term .name .split (delimiter , 1 )[0 ])
98107 terms .append (term .name )
99108 # count number of taxa in subtree
100109 counts_of_taxa_from_terms = Counter (taxa_from_terms )
@@ -145,6 +154,7 @@ def handle_multi_copy_subtree(
145154 output_path : str ,
146155 inparalog_handling : dict ,
147156 inparalog_handling_summary : dict ,
157+ delimiter : str ,
148158):
149159 """
150160 handling case where subtree contains all single copy genes
@@ -165,7 +175,9 @@ def handle_multi_copy_subtree(
165175
166176 # check if subtrees are sister to one another
167177 # are_sisters = determine_if_dups_are_sister(subtree_tips)
168- are_sisters = determine_if_dups_are_sister (dups , newtree )
178+ are_sisters = determine_if_dups_are_sister (
179+ dups , newtree , delimiter
180+ )
169181
170182 # if duplicate sequences are sister, get the longest sequence
171183 if are_sisters :
@@ -179,7 +191,7 @@ def handle_multi_copy_subtree(
179191 # if the resulting subtree has only single copy genes
180192 # create a fasta file with sequences from tip labels
181193 _ , _ , _ , counts = \
182- get_tips_and_taxa_names_and_taxa_counts_from_subtrees (newtree )
194+ get_tips_and_taxa_names_and_taxa_counts_from_subtrees (newtree , delimiter )
183195
184196 if set (counts ) == set ([1 ]):
185197 (
0 commit comments