Skip to content

Commit e1de4f0

Browse files
committed
fix: GPR extraction
1 parent 76f6d97 commit e1de4f0

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

curationTool/reactions/utils/add_to_vmh_utils.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,31 @@ def update_vmh_from_constructor(json_dir, matlab_session, update_existing=False,
106106
}
107107

108108
def parse_gene_info(info):
109-
if " AND " in info:
110-
info = info.replace(" AND ", " and ")
111-
if not info.startswith("GPR: "):
112-
return info
113-
gpr = info[5:] # Remove "GPR: " prefix
114-
return gpr
109+
"""
110+
Parse a gene info string to extract just the clean GPR rule.
111+
112+
The input may contain additional metadata like:
113+
"GPR: ABL1 AND AOC1; ORGAN(Adipocytes_), SUBCELLULAR([e], [c])"
114+
115+
We want to extract just: "ABL1 and AOC1"
116+
"""
117+
if not info:
118+
return ""
119+
120+
# First, take only the part before the semicolon (removes ORGAN, SUBCELLULAR metadata)
121+
first_part = info.split(';')[0].strip()
122+
123+
# Remove "GPR: " prefix if present
124+
if first_part.startswith("GPR: "):
125+
first_part = first_part[5:] # Remove "GPR: " prefix
126+
127+
# Normalize AND/OR to lowercase for MATLAB compatibility
128+
if " AND " in first_part:
129+
first_part = first_part.replace(" AND ", " and ")
130+
if " OR " in first_part:
131+
first_part = first_part.replace(" OR ", " or ")
132+
133+
return first_part
115134

116135
def merge_gene_infos(gene_infos):
117136
"""
@@ -174,8 +193,7 @@ def prepare_vmh_update_json_files(
174193
processed_gene_info.append({'info': merged_gpr} if merged_gpr else {})
175194
else:
176195
processed_gene_info.append({})
177-
print(processed_gene_info)
178-
raise
196+
179197
# Map of filename to data
180198
files_data = {
181199
'reactionIds.json': reaction_identifiers,

0 commit comments

Comments
 (0)