Skip to content

Commit 7a0c651

Browse files
fixing misc. minor bugs
1 parent a17ed15 commit 7a0c651

File tree

6 files changed

+61
-59
lines changed

6 files changed

+61
-59
lines changed

Common/kgxmodel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class SubGraphSource(GraphSource):
9696
def get_metadata_representation(self):
9797
return {'graph_id': self.id,
9898
'graph_version': self.graph_version,
99-
'merge_strategy:': self.merge_strategy,
99+
'merge_strategy': self.merge_strategy,
100100
'graph_metadata': self.graph_metadata.metadata if self.graph_metadata else None}
101101

102102
def generate_version(self):

Common/load_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def fetch_source(self, source_id: str, source_version: str='latest', retries: in
185185
f"{failed_error.error_message}")
186186
if retries < 2:
187187
logger.error(f"Retrying fetching for {source_id}.. (retry {retries + 1})")
188-
self.fetch_source(source_id=source_id, source_version=source_version, retries=retries+1)
188+
return self.fetch_source(source_id=source_id, source_version=source_version, retries=retries+1)
189189
else:
190190
source_metadata.set_fetch_error(failed_error.error_message)
191191
source_metadata.set_fetch_status(SourceMetadata.FAILED)
@@ -324,8 +324,8 @@ def normalize_source(self,
324324
composite_normalization_version,
325325
normalization_scheme=normalization_scheme,
326326
normalization_status=SourceMetadata.IN_PROGRESS)
327+
current_time = datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S')
327328
try:
328-
current_time = datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S')
329329
nodes_source_file_path = self.get_source_node_file_path(source_id, source_version, parsing_version)
330330
nodes_norm_file_path = self.get_normalized_node_file_path(source_id, source_version, parsing_version, composite_normalization_version)
331331
node_norm_map_file_path = self.get_node_norm_map_file_path(source_id, source_version, parsing_version, composite_normalization_version)

Common/meta_kg.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,8 @@ def analyze_nodes(self, nodes_file_path: str):
6262
try:
6363
leaf_types = self.bl_utils.find_biolink_leaves(frozenset(node[NODE_TYPES]))
6464
except TypeError:
65-
error_message = f'Node types were not a valid list for node: {node}'
66-
leaf_types = {}
67-
if self.logger:
68-
self.logger.error(error_message)
69-
else:
70-
print(error_message)
65+
error_message = f'Node types were not a valid list for node ({node}): {node[NODE_TYPES]}'
66+
raise RuntimeError(error_message)
7167

7268
# store the leaf types for this node id
7369
node_id_to_leaf_types[node['id']] = leaf_types

Common/normalization.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,11 @@ def call_name_resolution(name: str, biolink_type: str, retries=0, logger=None):
602602
print(error_message)
603603
if retries < 2:
604604
time.sleep(5)
605-
logger.info('Retrying name resolution..')
605+
retry_message = 'Retrying name resolution..'
606+
if logger:
607+
logger.info(retry_message)
608+
else:
609+
print(retry_message)
606610
return call_name_resolution(name, biolink_type, retries + 1, logger)
607611

608612
# if retried 2 times already give up and return the last error

Common/utils.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -371,53 +371,6 @@ def get_swiss_prot_id_set(self, data_dir: str, debug_mode=False) -> set:
371371
# return the list
372372
return ret_val
373373

374-
def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, file_list: list) -> (int, str, str):
375-
"""
376-
gets the food db files
377-
378-
:param full_url: the URL to the data file
379-
:param data_dir: the directory to place the file temporarily
380-
:param data_file_name: the name of the target file archive
381-
:param file_list: list of files to get
382-
:return:
383-
"""
384-
385-
self.logger.debug('Start of foodb file retrieval')
386-
387-
# init the file counter
388-
file_count: int = 0
389-
390-
# init the extraction directory
391-
foodb_dir: str = ''
392-
393-
# get the tar file that has the foodb data
394-
self.pull_via_http(full_url, data_dir)
395-
396-
# open the tar file
397-
tar = tarfile.open(os.path.join(data_dir, data_file_name), "r")
398-
399-
# for each member of the tar fiule
400-
for member in tar.getmembers():
401-
# get the name
402-
name = member.name.split('/')
403-
404-
# if a valid name was found
405-
if len(name) > 1:
406-
# is the name in the target list
407-
if name[1] in file_list:
408-
# save the file
409-
tar.extract(member, data_dir)
410-
411-
# save the extraction directory
412-
foodb_dir = name[0]
413-
414-
# increment the file counter
415-
file_count += 1
416-
417-
self.logger.debug(f'End of foodb file retrieval. {file_count} files retrieved.')
418-
419-
# return the list
420-
return file_count, foodb_dir, name[0]
421374

422375
@staticmethod
423376
def split_file(archive_file_path: str, output_dir: str, data_file_name: str, lines_per_file: int = 500000) -> list:

parsers/FooDB/src/loadFDB.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
import argparse
2+
import tarfile
33
import re
44
import requests
55

@@ -74,6 +74,55 @@ def get_latest_source_version(self):
7474
# return to the caller
7575
return self.archive_name
7676

77+
# ! this function copied from utils, FooDB was never finished, this retrieval looks broken anyway
78+
def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, file_list: list) -> (int, str, str):
79+
"""
80+
gets the food db files
81+
82+
:param full_url: the URL to the data file
83+
:param data_dir: the directory to place the file temporarily
84+
:param data_file_name: the name of the target file archive
85+
:param file_list: list of files to get
86+
:return:
87+
"""
88+
89+
self.logger.debug('Start of foodb file retrieval')
90+
91+
# init the file counter
92+
file_count: int = 0
93+
94+
# init the extraction directory
95+
foodb_dir: str = ''
96+
97+
# get the tar file that has the foodb data
98+
self.pull_via_http(full_url, data_dir)
99+
100+
# open the tar file
101+
tar = tarfile.open(os.path.join(data_dir, data_file_name), "r")
102+
103+
# for each member of the tar fiule
104+
for member in tar.getmembers():
105+
# get the name
106+
name = member.name.split('/')
107+
108+
# if a valid name was found
109+
if len(name) > 1:
110+
# is the name in the target list
111+
if name[1] in file_list:
112+
# save the file
113+
tar.extract(member, data_dir)
114+
115+
# save the extraction directory
116+
foodb_dir = name[0]
117+
118+
# increment the file counter
119+
file_count += 1
120+
121+
self.logger.debug(f'End of foodb file retrieval. {file_count} files retrieved.')
122+
123+
# return the list
124+
return file_count, foodb_dir, name[0]
125+
77126
def get_data(self):
78127
"""
79128
Gets the fooDB data.
@@ -84,7 +133,7 @@ def get_data(self):
84133

85134
if(self.full_url_path==None): self.get_latest_source_version()
86135
# get all the files noted above
87-
file_count, foodb_dir, self.tar_dir_name = gd.get_foodb_files(self.full_url_path, self.data_path, self.archive_name, self.data_files)
136+
file_count, foodb_dir, self.tar_dir_name = self.get_foodb_files(self.full_url_path, self.data_path, self.archive_name, self.data_files)
88137

89138
# abort if we didnt get all the files
90139
if file_count != len(self.data_files):

0 commit comments

Comments
 (0)