fixing misc. minor bugs

EvanDietzMorris · EvanDietzMorris · commit 7a0c651c9ceb · 2025-12-16T22:11:41.000-05:00
diff --git a/Common/kgxmodel.py b/Common/kgxmodel.py
@@ -96,7 +96,7 @@ class SubGraphSource(GraphSource):
     def get_metadata_representation(self):
         return {'graph_id': self.id,
                 'graph_version': self.graph_version,
-                'merge_strategy:': self.merge_strategy,
+                'merge_strategy': self.merge_strategy,
                 'graph_metadata': self.graph_metadata.metadata if self.graph_metadata else None}
 
     def generate_version(self):
diff --git a/Common/load_manager.py b/Common/load_manager.py
@@ -185,7 +185,7 @@ def fetch_source(self, source_id: str, source_version: str='latest', retries: in
                 f"{failed_error.error_message}")
             if retries < 2:
                 logger.error(f"Retrying fetching for {source_id}.. (retry {retries + 1})")
-                self.fetch_source(source_id=source_id, source_version=source_version, retries=retries+1)
+                return self.fetch_source(source_id=source_id, source_version=source_version, retries=retries+1)
             else:
                 source_metadata.set_fetch_error(failed_error.error_message)
                 source_metadata.set_fetch_status(SourceMetadata.FAILED)
@@ -324,8 +324,8 @@ def normalize_source(self,
                                                       composite_normalization_version,
                                                       normalization_scheme=normalization_scheme,
                                                       normalization_status=SourceMetadata.IN_PROGRESS)
+        current_time = datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S')
         try:
-            current_time = datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S')
             nodes_source_file_path = self.get_source_node_file_path(source_id, source_version, parsing_version)
             nodes_norm_file_path = self.get_normalized_node_file_path(source_id, source_version, parsing_version, composite_normalization_version)
             node_norm_map_file_path = self.get_node_norm_map_file_path(source_id, source_version, parsing_version, composite_normalization_version)
diff --git a/Common/meta_kg.py b/Common/meta_kg.py
@@ -62,12 +62,8 @@ def analyze_nodes(self, nodes_file_path: str):
             try:
                 leaf_types = self.bl_utils.find_biolink_leaves(frozenset(node[NODE_TYPES]))
             except TypeError:
-                error_message = f'Node types were not a valid list for node: {node}'
-                leaf_types = {}
-                if self.logger:
-                    self.logger.error(error_message)
-                else:
-                    print(error_message)
+                error_message = f'Node types were not a valid list for node ({node}): {node[NODE_TYPES]}'
+                raise RuntimeError(error_message)
 
             # store the leaf types for this node id
             node_id_to_leaf_types[node['id']] = leaf_types
diff --git a/Common/normalization.py b/Common/normalization.py
@@ -602,7 +602,11 @@ def call_name_resolution(name: str, biolink_type: str, retries=0, logger=None):
         print(error_message)
     if retries < 2:
         time.sleep(5)
-        logger.info('Retrying name resolution..')
+        retry_message = 'Retrying name resolution..'
+        if logger:
+            logger.info(retry_message)
+        else:
+            print(retry_message)
         return call_name_resolution(name, biolink_type, retries + 1, logger)
 
     # if retried 2 times already give up and return the last error
diff --git a/Common/utils.py b/Common/utils.py
@@ -371,53 +371,6 @@ def get_swiss_prot_id_set(self, data_dir: str, debug_mode=False) -> set:
         # return the list
         return ret_val
 
-    def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, file_list: list) -> (int, str, str):
-        """
-        gets the food db files
-
-        :param full_url: the URL to the data file
-        :param data_dir: the directory to place the file temporarily
-        :param data_file_name: the name of the target file archive
-        :param file_list: list of files to get
-        :return:
-        """
-
-        self.logger.debug('Start of foodb file retrieval')
-
-        # init the file counter
-        file_count: int = 0
-
-        # init the extraction directory
-        foodb_dir: str = ''
-
-        # get the tar file that has the foodb data
-        self.pull_via_http(full_url, data_dir)
-
-        # open the tar file
-        tar = tarfile.open(os.path.join(data_dir, data_file_name), "r")
-
-        # for each member of the tar fiule
-        for member in tar.getmembers():
-            # get the name
-            name = member.name.split('/')
-
-            # if a valid name was found
-            if len(name) > 1:
-                # is the name in the target list
-                if name[1] in file_list:
-                    # save the file
-                    tar.extract(member, data_dir)
-
-                    # save the extraction directory
-                    foodb_dir = name[0]
-
-                    # increment the file counter
-                    file_count += 1
-
-        self.logger.debug(f'End of foodb file retrieval. {file_count} files retrieved.')
-
-        # return the list
-        return file_count, foodb_dir, name[0]
 
     @staticmethod
     def split_file(archive_file_path: str, output_dir: str, data_file_name: str, lines_per_file: int = 500000) -> list:
diff --git a/parsers/FooDB/src/loadFDB.py b/parsers/FooDB/src/loadFDB.py
@@ -1,5 +1,5 @@
 import os
-import argparse
+import tarfile
 import re
 import requests
 
@@ -74,6 +74,55 @@ def get_latest_source_version(self):
         # return to the caller
         return self.archive_name
 
+    # ! this function copied from utils, FooDB was never finished, this retrieval looks broken anyway
+    def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, file_list: list) -> (int, str, str):
+        """
+        gets the food db files
+
+        :param full_url: the URL to the data file
+        :param data_dir: the directory to place the file temporarily
+        :param data_file_name: the name of the target file archive
+        :param file_list: list of files to get
+        :return:
+        """
+
+        self.logger.debug('Start of foodb file retrieval')
+
+        # init the file counter
+        file_count: int = 0
+
+        # init the extraction directory
+        foodb_dir: str = ''
+
+        # get the tar file that has the foodb data
+        self.pull_via_http(full_url, data_dir)
+
+        # open the tar file
+        tar = tarfile.open(os.path.join(data_dir, data_file_name), "r")
+
+        # for each member of the tar fiule
+        for member in tar.getmembers():
+            # get the name
+            name = member.name.split('/')
+
+            # if a valid name was found
+            if len(name) > 1:
+                # is the name in the target list
+                if name[1] in file_list:
+                    # save the file
+                    tar.extract(member, data_dir)
+
+                    # save the extraction directory
+                    foodb_dir = name[0]
+
+                    # increment the file counter
+                    file_count += 1
+
+        self.logger.debug(f'End of foodb file retrieval. {file_count} files retrieved.')
+
+        # return the list
+        return file_count, foodb_dir, name[0]
+
     def get_data(self):
         """
         Gets the fooDB data.
@@ -84,7 +133,7 @@ def get_data(self):
 
         if(self.full_url_path==None): self.get_latest_source_version()
         # get all the files noted above
-        file_count, foodb_dir, self.tar_dir_name = gd.get_foodb_files(self.full_url_path, self.data_path, self.archive_name, self.data_files)
+        file_count, foodb_dir, self.tar_dir_name = self.get_foodb_files(self.full_url_path, self.data_path, self.archive_name, self.data_files)
 
         # abort if we didnt get all the files
         if file_count != len(self.data_files):