Skip to content

Commit 76f6d97

Browse files
committed
refactor(gene_info): reorganize gene info functions for clarity
1 parent 5812a0e commit 76f6d97

File tree

1 file changed

+23
-112
lines changed

1 file changed

+23
-112
lines changed

curationTool/reactions/utils/add_to_vmh_utils.py

Lines changed: 23 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -40,116 +40,6 @@ def save_json(data, filepath):
4040
with open(filepath, 'w') as f:
4141
json.dump(data, f)
4242

43-
def parse_gene_info(info):
44-
if " AND " in info:
45-
info = info.replace(" AND ", " and ")
46-
if not info.startswith("GPR: "):
47-
return info
48-
gpr = info[5:] # Remove "GPR: " prefix
49-
return gpr
50-
51-
def merge_gene_infos(gene_infos):
52-
"""
53-
if more than 1 gene info for a reaction, merge them into a single GPR string (add "or" between them)
54-
"""
55-
infos = [g['info'] for g in gene_infos]
56-
gprs = [parse_gene_info(info) for info in infos]
57-
if len(gprs) == 1:
58-
return gprs[0]
59-
if len(gprs) == 0:
60-
return ""
61-
merged_gpr = " or ".join(gprs)
62-
return merged_gpr
63-
64-
def create_gprs(gene_infos):
65-
print(gene_infos)
66-
print(gene_infos[0][0].keys())
67-
gprs = [merge_gene_infos(gene_info) for gene_info in gene_infos]
68-
print(gprs)
69-
raise
70-
return gprs
71-
def rxn_prepare_json_paths_and_variables(
72-
reaction_identifiers,
73-
reaction_names,
74-
reaction_formulas,
75-
reaction_directions,
76-
reaction_subsystems,
77-
reaction_references,
78-
reaction_external_links,
79-
reaction_gene_info,
80-
reaction_comments,
81-
reaction_confidence_scores):
82-
"""
83-
Prepares JSON paths and variables for MATLAB execution, saving them to temporary files.
84-
"""
85-
rand_float = random.uniform(0, 10000000)
86-
json_paths = [
87-
f'reactionIds.json{rand_float}',
88-
f'reactionNames.json{rand_float}',
89-
f'reactionFormulas.json{rand_float}',
90-
f'reactionDirections.json{rand_float}',
91-
f'reactionSubsystems.json{rand_float}',
92-
f'reactionReferences.json{rand_float}',
93-
f'reactionExternalLinks.json{rand_float}',
94-
f'reactionGeneInfo.json{rand_float}',
95-
f'reactionComments.json{rand_float}',
96-
f'reactionConfidenceScores.json{rand_float}']
97-
variables = [
98-
reaction_identifiers,
99-
reaction_names,
100-
reaction_formulas,
101-
reaction_directions,
102-
reaction_subsystems,
103-
reaction_references,
104-
reaction_external_links,
105-
reaction_gene_info,
106-
reaction_comments,
107-
reaction_confidence_scores]
108-
gprs = create_gprs(reaction_gene_info)
109-
for idx, (path, variable) in enumerate(zip(json_paths, variables)):
110-
path = os.path.join(os.getcwd(), path)
111-
json_paths[idx] = path
112-
save_json(variable, path)
113-
return json_paths
114-
115-
116-
def met_prepare_json_paths_and_variables(
117-
met_abbrs,
118-
met_names,
119-
met_formulas,
120-
met_charges,
121-
met_inchikeys,
122-
met_smiles,
123-
met_external_links,
124-
met_weights):
125-
"""
126-
Prepares JSON paths and variables for MATLAB execution, saving them to temporary files.
127-
"""
128-
rand_float = random.uniform(0, 10000000)
129-
json_paths = [
130-
f'metAbbrs.json{rand_float}',
131-
f'metNames.json{rand_float}',
132-
f'metFormulas.json{rand_float}',
133-
f'metInchikeys.json{rand_float}',
134-
f'metSmiles.json{rand_float}',
135-
f'metCharges.json{rand_float}',
136-
f'metExternalLinks.json{rand_float}',
137-
f'metWeights.json{rand_float}']
138-
variables = [
139-
met_abbrs,
140-
met_names,
141-
met_formulas,
142-
met_inchikeys,
143-
met_smiles,
144-
met_charges,
145-
met_external_links,
146-
met_weights]
147-
for idx, (path, variable) in enumerate(zip(json_paths, variables)):
148-
path = os.path.join(os.getcwd(), path)
149-
json_paths[idx] = path
150-
save_json(variable, path)
151-
return json_paths
152-
15343
def update_vmh_from_constructor(json_dir, matlab_session, update_existing=False, dry_run=False):
15444
"""
15545
Execute the unified MATLAB function updateVMHFromConstructor to add/update
@@ -215,6 +105,26 @@ def update_vmh_from_constructor(json_dir, matlab_session, update_existing=False,
215105
'updatedRxns': []
216106
}
217107

108+
def parse_gene_info(info):
109+
if " AND " in info:
110+
info = info.replace(" AND ", " and ")
111+
if not info.startswith("GPR: "):
112+
return info
113+
gpr = info[5:] # Remove "GPR: " prefix
114+
return gpr
115+
116+
def merge_gene_infos(gene_infos):
117+
"""
118+
if more than 1 gene info for a reaction, merge them into a single GPR string (add "or" between them)
119+
"""
120+
infos = [g['info'] for g in gene_infos]
121+
gprs = [parse_gene_info(info) for info in infos]
122+
if len(gprs) == 1:
123+
return gprs[0]
124+
if len(gprs) == 0:
125+
return ""
126+
merged_gpr = " or ".join(gprs)
127+
return merged_gpr
218128

219129
def prepare_vmh_update_json_files(
220130
reaction_identifiers,
@@ -260,11 +170,12 @@ def prepare_vmh_update_json_files(
260170
processed_gene_info = []
261171
for gene_info in reaction_gene_info:
262172
if gene_info:
263-
merged_gpr = merge_gene_infos(gene_info) if isinstance(gene_info, list) else gene_info
173+
merged_gpr = merge_gene_infos(gene_info)
264174
processed_gene_info.append({'info': merged_gpr} if merged_gpr else {})
265175
else:
266176
processed_gene_info.append({})
267-
177+
print(processed_gene_info)
178+
raise
268179
# Map of filename to data
269180
files_data = {
270181
'reactionIds.json': reaction_identifiers,

0 commit comments

Comments
 (0)