Skip to content

Commit 981bb78

Browse files
Merge pull request #13 from Anantha-Rao12/master
Documentation work
2 parents 60989cd + db2935a commit 981bb78

File tree

5 files changed

+589
-9
lines changed

5 files changed

+589
-9
lines changed

Peptides_against_Malaria/1_Preprocessing_and_scoring/RCSB_static_parser.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515

1616
def read_pdb_ids_csv(csv_path):
1717

18+
""" Read a comma separated file that is essentially one row and out put a list
19+
20+
Args :
21+
csv_path (str) : The relative/full path to the .csv file
22+
23+
Returns :
24+
A List of all the values in the .csv file
25+
"""
26+
1827
path = './pdb-ids.csv' ### Path of csv file containing all pdb-ids downloaded from PDB-advanced-search-options
1928
with open(path, 'r') as file: ### reading the file with a context manager
2029
pdb_id_list = file.read().split(',') ### create a list containing pdb-ids
@@ -26,6 +35,16 @@ def get_pdb_details(pdb_id):
2635

2736
'''PDB_ID, Desc, Classification, Exp_system, Method, Lit, Pubmed_id, Pubmed_abs, Org1, Mmol, Org2, Mut, Res is the order of items needed'''
2837

38+
"""
39+
RCSB Web Parser that extracts the above stated information for a single ODB ID
40+
41+
Args :
42+
pdb_id (str) : PDB ID of the molecule obtained from RCSB
43+
44+
Returns :
45+
A list containing all values scrapped from the Databse
46+
47+
"""
2948
pdb_details = []
3049
url = 'https://www.rcsb.org/structure/'+pdb_id
3150

@@ -128,12 +147,23 @@ def get_pdb_details(pdb_id):
128147

129148

130149
def main(csv_path, col_names):
150+
151+
"""
152+
Extract information for all PDB ID
153+
154+
Args :
155+
csv_path (str) : Full/relative path to the csv_path containing the PDB IDS
156+
157+
col_names (list) : Name of the 13 columns that contain information on each aspect of the PDB file
158+
159+
Returns :
160+
A Dataframe object from pandas where each row corresponds to a PDB ID and each column corresponds to a particular attribute of that PDB ID
161+
"""
162+
131163
pdb_ids_list = read_pdb_ids_csv(csv_path)
132164
m = len(pdb_id) ### length of pdb_id list ie no of pdb_ids
133165

134-
dataframe = []
135-
for i in range(m):
136-
dataframe.append(get_pdb_details(pdb_ids_list[i]))
166+
dataframe = list(map(get_pdb_details, pdb_ids_list))
137167

138168
dataframe = pd.DataFrame(dataframe, columns=col_names)
139169

Peptides_against_Malaria/1_Preprocessing_and_scoring/preprocessing_functions.py

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,42 @@
1010
import subprocess
1111
import time
1212
import pandas as pd
13-
#from chimera import runCommand as rc
13+
#from chimera import runCommand as rc
1414

15+
# The above line can be uncommented when running sturated_mutagenesis() on UCSF Chimera
1516

1617
def saturated_mutagenesis(model_no,chain_name,start_residue,stop_residue,input_path,file_name,output_path):
1718

19+
"""
20+
Perform saturated Mutagenesis (SM) of a given length of peptide submitted in .pdb format and return all mutants each in different files
21+
22+
Args:
23+
model_no (str) : The model number of the peptide of interest in the .pdb file
24+
25+
chain_no (str) : Name of the Chain where SM is to be performed. Ex : 'A' , 'B' or ' '
26+
27+
start_residue (int) : Residue number on the given chain and model where SM needs to be performed (started)
28+
29+
stop_residue (int) : Residue number on the given chain where the SM needs to be stopped.
30+
31+
input_path (str) : Path to the directory .pdb containing the peptide that needs to undergo SM
32+
33+
file_name (str) : Name of the .pdb file submitted
34+
35+
output_path (str) : Name of the output directory where the new models are saved.
36+
37+
Returns :
38+
This script is to be run in UCSF Chimera and all models/mutants are returned in .pdb format in the output_directory
39+
40+
41+
Raises :
42+
UCSF Chimera only works with Python 2.x
43+
44+
Notes :
45+
Visit Github.com/Anantha-Rao12/Peptides-against-Malaria for more info
46+
47+
"""
48+
1849
aa_data = 'ala arg asn asp cys glu gln gly his ile leu lys met phe pro ser thr trp tyr val'.split()
1950

2051
for residue_no in range(start_residue,stop_residue+1):
@@ -28,7 +59,26 @@ def saturated_mutagenesis(model_no,chain_name,start_residue,stop_residue,input_p
2859

2960

3061

31-
def AnalyseComplex(foldx_path, file_full_path, output_full_path):
62+
def AnalyseComplex(foldx_path, file_full_path):
63+
64+
"""
65+
Use the subprocess module to execute the --analyseComplexChains=A,B command of FoldX and obtain the Interaction Energy between two chains in a .pdb file
66+
67+
Args:
68+
foldx_path (str) : local full/relative path of the FOLDX executable
69+
70+
file_path (str) : local full path to the .pdb file that is to be analysed
71+
72+
73+
Returns :
74+
Prints the time taken to analyse, process and write the output a single .pdb file
75+
Output is the stdout from the terminal
76+
77+
Notes :
78+
More information can be found here : foldxsuite.crg.eu/command/AnalyseComplex
79+
80+
81+
"""
3282

3383
data=[]
3484
start = time.time()
@@ -43,10 +93,10 @@ def AnalyseComplex(foldx_path, file_full_path, output_full_path):
4393

4494
def make_df_combine(files_path1,files_path2,output_path,csv_file_name):
4595

46-
''' This function visits files_path1 and files_path2 to collect all foldx Summary.fxout files that was created by AnalyseComplex command.
96+
""" Visit files_in path1 and files_in path2 to collect all foldx Summary.fxout files that was created by AnalyseComplex command.
4797
With os.listdir each file name is stored in lists called 'foldx_summary_files' via list comprehension. We then open each .fxout Summary file with
4898
a context manager and store the last line of the file which has the required interaction data (tab separated). A list of lists is thus created
49-
and finally grafted into a dataframe with Pandas that is written as a .csv file to the given 'output_path' with 'csv_file_name'. '''
99+
and finally grafted into a dataframe with Pandas that is written as a .csv file to the given 'output_path' with 'csv_file_name'."""
50100

51101
listoflists =[]
52102
paths= [files_path1,files_path2]
@@ -55,8 +105,8 @@ def make_df_combine(files_path1,files_path2,output_path,csv_file_name):
55105
for file in os.listdir(foldx_summary_files):
56106
with open(os.path.join(path,file),'r') as rf:
57107
lines = rf.read().splitlines()
58-
data = lines[-1].split('\t')
59-
header = lines[-2].split('\t')
108+
data = lines[-1].split('\t') #Obtain the last line in the Summary.fxout file
109+
header = lines[-2].split('\t') #Obtain the 2nd last line as header in the Summary.fxout file
60110
listoflists.append(data)
61111
df = pd.DataFrame(listoflists,columns=header)
62112
os.chdir(output_path)

0 commit comments

Comments
 (0)