Skip to content

Commit a863f33

Browse files
authored
Merge pull request #11 from JLSteenwyk/new_python_and_biopython_versions
New python and biopython versions
2 parents e70c4d7 + 8199239 commit a863f33

File tree

31 files changed

+1201
-8
lines changed

31 files changed

+1201
-8
lines changed

docs/usage/index.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ To report inparalogs and specify which was kept per SNAP-OG, use the -rih, \-\-r
8787
argument. The resulting file, which will have the suffix ".inparalog_report.txt," will have three columns: |br|
8888
- col 1 is the orthogroup file |br|
8989
- col 2 is the inparalog that was kept |br|
90-
- col 3 is/are the inparalog/s that were trimmed separated by a semi-colon ";" |br|
90+
- col 3 is/are the inparalog/s that were trimmed separated by a semi-colon ";"
9191

9292
To generate this file, use the following command:
9393

@@ -125,3 +125,8 @@ All options
125125
+-------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
126126
*For genome-scale analyses, we recommend changing the -o/\-\-occupancy parameter to be the same for all large gene families so that the minimum SNAP-OG occupancy is the same
127127
for all SNAP-OGs.
128+
129+
130+
.. |br| raw:: html
131+
132+
<br/>

orthosnap/helper.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,6 @@ def write_output_fasta_and_account_for_assigned_tips_single_copy_case(
375375
write_summary_file_with_inparalog_handling(
376376
inparalog_handling, fasta,
377377
output_path, subgroup_counter,
378-
assigned_tips
379378
)
380379
subgroup_counter += 1
381380

@@ -387,7 +386,6 @@ def write_summary_file_with_inparalog_handling(
387386
fasta: str,
388387
output_path: str,
389388
subgroup_count: int,
390-
assigned_tips: list
391389
):
392390
res_arr = []
393391

@@ -406,10 +404,19 @@ def write_summary_file_with_inparalog_handling(
406404
f"{output_path}/{fasta_path_stripped}.orthosnap.{subgroup_count}.fa"
407405
)
408406

409-
if res_arr:
407+
for i in res_arr:
410408
try:
411-
if res_arr[0][1] in open(output_fasta_file_name).read():
409+
if string_exact_match(f">{i[1]}", output_fasta_file_name):
412410
with open(f"{output_path}{inparalog_report_output_name}", "a") as file:
413-
file.writelines('\t'.join(i) + '\n' for i in res_arr)
411+
file.writelines('\t'.join(i) + '\n')
414412
except FileNotFoundError:
415413
1
414+
415+
416+
def string_exact_match(string, filename):
417+
with open(filename, 'r') as f:
418+
for line in f:
419+
line = line.rstrip()
420+
if re.search(r'\b{}\b'.format(string), line):
421+
return True
422+
return False

orthosnap/orthosnap.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ def execute(
150150
fasta,
151151
output_path,
152152
subgroup_counter,
153-
assigned_tips,
154153
)
155154

156155
write_output_stats(

orthosnap/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.2.0"
1+
__version__ = "1.3.0"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
fake_orthologous_group_of_genes.faa.orthosnap.0 species0|gene0-duplicate_copy_1 species0|gene0-duplicate_copy_2;species0|gene0-duplicate_copy_0
2+
fake_orthologous_group_of_genes.faa.orthosnap.1 species4|gene2-duplicate_copy_1 species4|gene2-duplicate_copy_0
3+
fake_orthologous_group_of_genes.faa.orthosnap.1 species2|gene2-duplicate_copy_1 species2|gene2-duplicate_copy_0
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
>species0|gene0-duplicate_copy_1
2+
MFGAVAAGSEESPQAPRCISTRSSSFRVYLSAWNFGMSPERVTTEPLHSPDWDNDWLRQL
3+
AGDIVAGSLSATIIAPITTVIDRSVVERLSSNRSILHTLRTHAICSILKPRKFYFSRPFF
4+
IAWSLYAATYATANATDTSLEHLSKVTEKSTTASLVPTFSFLPTYVVNVCLGILKDIRFS
5+
QIYGHPEGRLKQPPPIPRLAYMAFLFRDSITISSSFTLAPQVASLVPDWITADPHTKRTV
6+
TQLALPALVQYVNTPFHMIALDVIARPQVATIAERSVTIRRGDLAEILNSPAYDYGQDVE
7+
KKKNLDDTSPEDEDPFGNEEFAEVKYRTMGWWKTGILMVAENVSIGILSLPSAFATLGFV
8+
PALIILIGISGISWYTAYILCQFKLRYPQVHSMGDAGEIIMGRFGRELLGIGQLLFLIFV
9+
MASHVLTFTVLMNTITEHGTCTIVFGVIALIVSCVGALPRTMDKVYWMSIASFLSIVAAT
10+
MATMIAVGVEYKGHIPLAVTTHLSFNEEFLAVSNLFFAYVGHASFFGFISEMDKPREFTK
11+
SISVLQVIDTSLYIASAVVIYRYVGADVQSPALGSAGPLGKKIAYGLAIPTVLIAGIVNG
12+
HVASKYVYVRVFRGTNHMHERTLLSIGSWVAIGLISWVVAWVIAESIPVFNNLLSLITAL
13+
FGCWFAYGFPAIFWFTLNKGQWFASSRKIFLTLSNTFILAMAITLCGLGLYVSGDAISKD
14+
SGSGVWTCANNAVTTTTTT
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
>species4|gene2-duplicate_copy_1
2+
MAVSRDLEAPAVVNDPTADDAMVEKKEYADGTPANDPFGNEECGEVRYRVMSWWQCGTLM
3+
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGE
4+
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
5+
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPDHVHVDATTEVSFQDAFLAVTNIIFA
6+
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
7+
LMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWV
8+
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
9+
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNATTTT
10+
>species2|gene2-duplicate_copy_1
11+
MMLWLKRRNMLMGRRQMTRLEMKNAERSNIVSCRGVMVAENISLGILSLSSAVATLGIVP
12+
AVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGELLMGRFGRELFGIGQLLFLIFLM
13+
ASHILTFSVVFNTITNHGTCTIVFGVVGLVVSFIGALPRTMGKVYWMSMASCISIVTATV
14+
VTMIAIGVQAPDHVHVNVTTKVSFQDAFLAVTNIIFAYIAHVAFFGFISEMHDPRDFPKS
15+
LTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGPLMKKVAYGLAIPTVVIAGVVFGH
16+
VACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWVVAWVIAESIPVFNELLSLISSLF
17+
GSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFFILAFACALCGMGLYVSGKSIHDSS
18+
SKASWTCKNNAT
19+
>species1|gene2
20+
MAVSRDLEAPAVVNDPTAYDATVEKKEYADGTPANDPFGNEECGEVKYRVMSWWQCGTLM
21+
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQVHSMGDAGE
22+
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
23+
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPEHVHVDATTEVSFQDAFLAVTNIIFA
24+
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
25+
VMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALSVWV
26+
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
27+
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNAT
28+
>species3|gene7
29+
MAPTTRDLEALTVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
30+
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
31+
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
32+
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAMQAPDHVQVDITTHPSFSTAFLSVTNI
33+
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
34+
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
35+
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
36+
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
37+
>species0|gene1
38+
MAPTTRDLEALAVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
39+
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
40+
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
41+
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAVQAPDHVQVDITTHPSFSTAFLSVTNI
42+
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
43+
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
44+
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
45+
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
>species2|gene5
2+
MSPDTSDLDLETRPAVSLNRGEEYKEQPETPDEEPFGDEDDAEVRYRTLEWWFVSPVMLA
3+
GGTSLGILTLPSAVATLGIVPGVILIVGIAILTVYTGYVMGQFKQRYPHVHSIADGGEVL
4+
FGWVGREILGAGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSVVFGVVGLLISLILSLPR
5+
TFKRMSWLSVISFASIVGAVLVTMIALGVQRPPNVRVEVTRPTSLYRAFLAVTDIVFAYA
6+
AHPAFFGFISEMKTPTDWPKTLCFVEIINTTLYTVTGVVIYRFAGQHVASPALGSTSPLM
7+
AKVAYGTAIPTIVIAGVINGHIACKYIYVRVFRGTEHMHRRSLFAIGTWVVISVVLWTVA
8+
WVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLFINKGLWFSSPRKTFLTIVNFTLLG
9+
MGACLCGLGLYASGRAISEESAGRIFSCASTA
10+
>species4|gene1
11+
MSPDTSDLDLETRPAVSLNRGEGYKEQPETPDEEPFGNEEGAEVRYRTLEWWFVSPGSAE
12+
GRQSRSDVACEQEMRDSHAGWGHIARHPNASLGCGDTGNRPVGLPGVILIVGIAILTVYT
13+
GCVMGQFKQRYPHVHSIADGGEVLFGWIGREVLGAGLLLCLVFVMGGHILTFTVMMNTLT
14+
DHGTCSVVFGVVGLLISLILSLPRTFKRMSWLSVISFASIVAAVLVTMIALGVQRPPNVK
15+
VEVTRPTSLYRAFLAVTDIVFAYAAHPAFFGYISEMKTPTDWPKTLCFVEVINTTLYTVT
16+
GVVIYRFAGQHVASPALGSSSPLMAKVAYGIAIPTIVIAGVINGHIACKYIYVRLFRGTE
17+
RMHQRSLFSIGTWVAISVVLWTIAWVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLF
18+
INQGLWFSSPRKTFLTIVNFTLLGMGACLCGLGLYASGRAISEESAGRSFSCASTA
19+
>species1|gene0
20+
MSPDTSDLDLGTRPAVSLNRGEGYKEQPETPDEEPFGDEEGAEVRYRTLEWWFVSPGSAE
21+
GRQSRSDVACEQEMRDRGVILIVGIAILTVYTGCVMGQFKQRYPHVHSIADGGEVLFGWI
22+
GREVLGTGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSIVFGVVGLLISLILSLPRTFKR
23+
MSWLSVISFASIVAAVLVTMIALGVQRPPNVKVEVTRPTSLYRAFLAVTDIVFAYAAHPA
24+
FFGYISEMKTPTDWPKTLCFVEVINTTLYTVTGVVIYRFAGQHVASPALGSSSPLMAKVA
25+
YGIAIPTIVIAGVINGHIACKYIYVRLFRGTEHMHQRSLFAIGTWVAISVVLWTIAWVIA
26+
EAVPEFNNLLSLVLVFVCVSF
27+
>species3|gene3
28+
MSPPSAINNPGDPLAEQEKPVGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
29+
SLGILSLPSAVASLGLVAAVILILGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
30+
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
31+
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
32+
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
33+
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVMIGLVLWTLAWII
34+
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
35+
CLCALGLYVSGKSIHDHPRSSSFSCANNA
36+
>species0|gene8
37+
MSPPSAINNPGDPLAEQEKPAGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
38+
SLGILSLPSAVASLGLVAAVILIIGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
39+
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
40+
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
41+
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
42+
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVIIGLVLWTLAWII
43+
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
44+
CLCALGLYVSGKSIHDHPRSSSFSCANNA
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
>species3|gene4
2+
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
3+
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
4+
EVLCGRYGRMFTEFAQLVFFMFASGXHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
5+
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
6+
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
7+
IVVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
8+
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
9+
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
10+
>species0|gene7
11+
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
12+
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
13+
EVLCGRYGRMFTEFAQLVFFMFASGSHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
14+
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
15+
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
16+
IIVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
17+
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
18+
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
>species2|gene4
2+
MEAINANPPPYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
3+
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPRIQNLGDA
4+
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTVTEHGTCSIVFSVVGMVISMVL
5+
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPGRIIEATVDTTLYSGFQAVSNIV
6+
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
7+
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHRRSKRSVGIWIGLGLTC
8+
WVVAWIIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
9+
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
10+
>species4|gene0
11+
MEAIKANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
12+
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
13+
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
14+
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPDRIIEATVDTTLYSGFQAVSNIV
15+
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
16+
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
17+
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
18+
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
19+
>species1|gene4
20+
MEAINANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
21+
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
22+
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
23+
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPHRIIEATVDTTLYSGFQAVSNIV
24+
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEICLYLTAAVVIYYFVGKDVASPALISA
25+
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
26+
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGHWFASPRKILLTILNI
27+
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA

0 commit comments

Comments
 (0)