diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..33cc331 Binary files /dev/null and b/.DS_Store differ diff --git a/.github/.DS_Store b/.github/.DS_Store new file mode 100644 index 0000000..81cac08 Binary files /dev/null and b/.github/.DS_Store differ diff --git a/excelutils/.DS_Store b/excelutils/.DS_Store new file mode 100644 index 0000000..b6f68ad Binary files /dev/null and b/excelutils/.DS_Store differ diff --git a/excelutils/excel_sbol_utils/.DS_Store b/excelutils/excel_sbol_utils/.DS_Store new file mode 100644 index 0000000..e50e337 Binary files /dev/null and b/excelutils/excel_sbol_utils/.DS_Store differ diff --git a/excelutils/excel_sbol_utils/library2.py b/excelutils/excel_sbol_utils/library2.py index e320936..470c92c 100644 --- a/excelutils/excel_sbol_utils/library2.py +++ b/excelutils/excel_sbol_utils/library2.py @@ -2,6 +2,13 @@ import excel_sbol_utils.helpers as hf import re import logging +import requests +import urllib.parse +import webbrowser +import excel2sbol.converter as conv +import os +import sys +import json # might be better if some of the ones like data sources were put in a library # which contained both sbol2 and sbol3. Then excel converter could check # if in lib2 or lib_both for version 2 and lib3 or lib_both for version 3 @@ -15,7 +22,60 @@ def objectType(rowobj): def displayId(rowobj): # used to set the object display id in converter function - pass + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + + dict = os.getenv("SBOL_DICTIONARY") + data = json.loads(dict) + url = data["Domain"] + if url.endswith('/'): + url = url[:-1] + collection = data["Library Name"] + # print(url) + # print(collection) + for col in rowobj.col_cell_dict.keys(): + val = rowobj.col_cell_dict[col] + + if col == "Previous Version (URI)": + # print(rowobj.obj) + print(rowobj.col_cell_dict) + display_id = rowobj.col_cell_dict['Part Id'] + previous_id = rowobj.col_cell_dict['Previous Version (URI)'] + rowobj.obj.wasDerivedFrom = previous_id + # print(rowobj.obj.properties) + return + + sbol2.Config.setOption('sbol_typed_uris', True) + doc = sbol2.Document() + + sbol2.setHomespace(url) + + part_shop = sbol2.PartShop(url) + + if username is None or password is None or url is None: + # do not login + print("No login credentials provided. Proceeding without login.") + else: + try: + part_shop.login(username, password) + print("Successfully logged in.") + except Exception as e: + print(f"Login failed: {e}") + exit(1) + collection_parts = collection.split('/')[:-2] + link = '/'.join(collection_parts) + link2 = f"{link}/{val}/1" + print(link2) + part_shop.pull(link2, doc) + + component = doc.get(link2) + if component: + print(f"Component with displayId {component.displayId} already exists at URL {component.identity}") + print("Terminating") + sys.exit(1) + + + def types(rowobj): for col in rowobj.col_cell_dict.keys(): @@ -36,6 +96,7 @@ def addToDescription(rowobj): raise TypeError(f"A multicolumn value was unexpectedly given in addToDescription, {rowobj.col_cell_dict}") setattr(rowobj.obj, 'description', current) + def moduleModuleDefiniton(rowobj): #NOT IMPLEMENTED # module_name_pref = self.obj_uri.split("/")[-1] # module_name_suf = self.cell_val.split("/")[-1] @@ -54,7 +115,7 @@ def additionalFuncComponent(rowobj): #NOT IMPLEMENTED # self.obj.functionalComponents.add(fc1) pass -def definedFunComponent(rowobj): #NOT IMPLEMENTED +def definedFuncComponent(rowobj): #NOT IMPLEMENTED # if isinstance(self.cell_val, list): # # pulling the functional component object # # by the name (hence the split) from the obj_cit @@ -68,26 +129,148 @@ def definedFunComponent(rowobj): #NOT IMPLEMENTED # self.obj.functionalComponents.add(fcobj.copy()) pass +def sequence_authentication(email, password, base_url,uri): + login_data = { + 'email': email, + 'password': password + } + if email is None or password is None or base_url is None: + seq_search = requests.get( + f'https://synbiohub.org/{uri}', + headers={ + 'Accept': 'application/json' + } + ) + if seq_search.status_code == 200: + public_results = seq_search.json() + if public_results: + if len(public_results) > 1: + print("Number of duplicate sequences found: ", len(public_results)) + for result in public_results: + print("The sequence already exists in the public repository. The URI is: ", result['uri']) + return False + else: + # print("Sequence not found in public repository.") + # double check the logic fo public repos and sequence search + return True + else: + login_response = requests.post( + f"{base_url}/login", + headers={ + 'Accept': 'plain/text' + }, + data= login_data + ) + if login_response.status_code == 200: + sequence_search_response = requests.get( + uri, + headers={ + 'Accept': 'application/json', + 'X-authorization': login_response.content + } + ) + if sequence_search_response.status_code == 200: + search_results = sequence_search_response.json() + if search_results: + if len(search_results) > 1: + + print("Number of duplicate sequences found: ", len(search_results)) + for result in search_results: + print("The sequence already exists in the database. The URI is: ", result['uri']) + + return False + else: + print("Sequence does not exist in the database. Adding sequence.") + return True + + + return True + else: + print("Login failed.") + return False + + +def link_validation(email, password, base_url, target_url): + # initial check w/out auth + # print("Checking link: ", target_url) + login_data = { + 'email': email, + 'password': password + } + initial_response = requests.get(target_url, headers={'Accept': 'application/json'}) + # print("Initital response status code: ", initial_response.status_code) + if initial_response.status_code == 200: + # print("Link is accessible without authentication.") + return True + + # the link is not accessible without authentication, try logging in + elif initial_response.status_code in {401, 403, 404}: + if email is None or password is None or base_url is None: + # print("Need login credentials to access the link.") + return False + else: + login_response = requests.post( + f"{base_url}/login", + headers={ + 'Accept': 'plain/text' + }, + data= login_data + ) + # print("Login response status code: ", login_response.status_code) + + + # check if login was successful + if login_response.status_code == 200: + # retry accessing the link after logging in + final_response = requests.get(target_url,headers = {'Accept': 'application/json', 'X-authorization': login_response.content}) + if final_response.status_code == 200: + # print("Link is accessible after authentication.") + return True + else: + # print("Link is not accessible after authentication.") + return False + + print("Link is not accessible.") + return False + def encodesFor(rowobj): module_name_pref = rowobj.obj_uri.split("/")[-1] - + module_name_suf = None + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") + # print(rowobj.col_cell_dict) + dict = os.getenv("SBOL_DICTIONARY") + data = json.loads(dict) + + print(data["Library Name"]) for col in rowobj.col_cell_dict.keys(): val = rowobj.col_cell_dict[col] + # print("Val: ", val) if isinstance(val, str): - if col == "Encodes for": + if col == "Encodes for": module_name_suf = val.split("/")[-1] protein_comp_uri = val break - - if not module_name_suf: - raise ValueError("No 'Encodes for' value found in rowobj.") + elif col == "Encodes for (URI)": + valid_uri = link_validation(username, password, url, val) + + if not valid_uri: + print(f"URI '{val}' is invalid. Skipping addition for {col}.") + print("Terminating") + sys.exit(1) + # return + module_name_suf = val.split("/")[-2] + protein_comp_uri = val + break + # create a new module definitions module_name = f"{module_name_pref}_{module_name_suf}" module_def = sbol2.ModuleDefinition(module_name) - + #create a fc for the protein if module_name_suf not in [fc.displayId for fc in module_def.functionalComponents]: protein_fc = module_def.functionalComponents.create(module_name_suf) @@ -103,12 +286,14 @@ def encodesFor(rowobj): dna_fc = module_def.functionalComponents.get(module_name_pref) - + # participation_name = f'{module_name_pref}_template' participation = sbol2.Participation(uri = f'{module_name_pref}_template') participation.participant = dna_fc participation.uri = f'{module_name_pref}_template' participation.roles = [sbol2.SBOL_TEMPLATE] + + # participation_name2 = f'{module_name_suf}_product' participation2 = sbol2.Participation(uri= f'{module_name_suf}_product') participation2.participant = protein_fc participation2.uri = f'{module_name_suf}_product' @@ -125,72 +310,112 @@ def encodesFor(rowobj): rowobj.doc.addModuleDefinition(module_def) - def repressor(rowobj): - module_name_pref = rowobj.obj_uri.split("/")[-1] - if not any(isinstance(val, (list, str)) and val for val in rowobj.col_cell_dict.values()): - raise ValueError("No 'Repressors' value found in rowobj.") - - module_name_suf = None - for col in rowobj.col_cell_dict.keys(): - val = rowobj.col_cell_dict[col] - - protein_comp_uris = val - - for protein_comp_uri in protein_comp_uris: - module_name_suf = protein_comp_uri.split("/")[-1] - - module_name = f"{module_name_pref}_{module_name_suf}" - module_def = sbol2.ModuleDefinition(module_name) - - if module_name_suf not in [fc.displayId for fc in module_def.functionalComponents]: - protein_fc = module_def.functionalComponents.create(module_name_suf) - protein_fc.definition = protein_comp_uri - else: - protein_fc = module_def.functionalComponents.get(module_name_suf) - - - if module_name_pref not in [fc.displayId for fc in module_def.functionalComponents]: - dna_fc = module_def.functionalComponents.create(module_name_pref) - dna_fc.definition = rowobj.obj_uri - else: - dna_fc = module_def.functionalComponents.get(module_name_pref) - - - participation = sbol2.Participation(uri = f'{module_name_pref}_inhibited') - participation.participant = dna_fc - participation.uri = f'{module_name_pref}_inhibited' - participation.roles = [sbol2.SBO_INHIBITED] - - - participation2 = sbol2.Participation(uri= f'{module_name_suf}_inhibition') - participation2.participant = protein_fc - participation2.uri = f'{module_name_suf}_inhibitor' - participation2.roles = [sbol2.SBO_INHIBITOR] - - interaction_name = f'{module_name_suf}_repression' - interaction_type = sbol2.SBO_INHIBITION - interaction = sbol2.Interaction(interaction_name, interaction_type) - interaction.participations.add(participation) - interaction.participations.add(participation2) + module_name_pref = rowobj.obj_uri.split("/")[-1] + # print(rowobj.col_cell_dict) + module_name_suf = None + for col in rowobj.col_cell_dict.keys(): + val = rowobj.col_cell_dict[col] + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") + # print(username, password, url) + if col == "Repressors (URI)" and isinstance(val, str): + protein_comp_uris = val.split(",") + # print("Protein comp uris: ", protein_comp_uris) + + for uri in protein_comp_uris: + valid_uri = link_validation(username, password, url, uri) + if not valid_uri: + print(f"URI '{val}' is invalid. Skipping addition for {col}.") + print("Terminating") + sys.exit(1) + continue + else: + protein_comp_uris = [val] if isinstance(val, str) else val + + for protein_comp_uri in protein_comp_uris: + if col == "Repressors": + module_name_suf = protein_comp_uri.split("/")[-1] + + elif col == "Repressors (URI)": + module_name_suf = protein_comp_uri.split("/")[-2] + + module_name = f"{module_name_pref}_{module_name_suf}" + module_def = sbol2.ModuleDefinition(module_name) + + if module_name_suf not in [fc.displayId for fc in module_def.functionalComponents]: + protein_fc = module_def.functionalComponents.create(module_name_suf) + protein_fc.definition = protein_comp_uri + else: + protein_fc = module_def.functionalComponents.get(module_name_suf) + + # create a dna functional component + if module_name_pref not in [fc.displayId for fc in module_def.functionalComponents]: + dna_fc = module_def.functionalComponents.create(module_name_pref) + dna_fc.definition = rowobj.obj_uri + else: + dna_fc = module_def.functionalComponents.get(module_name_pref) + + # participation_name = f'{module_name_pref}_template' + participation = sbol2.Participation(uri = f'{module_name_pref}_inhibited') + participation.participant = dna_fc + participation.uri = f'{module_name_pref}_inhibited' + participation.roles = [sbol2.SBO_INHIBITED] + + + # participation_name2 = f'{module_name_suf}_product' + participation2 = sbol2.Participation(uri= f'{module_name_suf}_inhibition') + participation2.participant = protein_fc + participation2.uri = f'{module_name_suf}_inhibitor' + participation2.roles = [sbol2.SBO_INHIBITOR] + + interaction_name = f'{module_name_suf}_repression' + interaction_type = sbol2.SBO_INHIBITION + interaction = sbol2.Interaction(interaction_name, interaction_type) + interaction.participations.add(participation) + interaction.participations.add(participation2) + + module_def.interactions.add(interaction) + rowobj.doc.addModuleDefinition(module_def) - module_def.interactions.add(interaction) - rowobj.doc.addModuleDefinition(module_def) def activator(rowobj): module_name_pref = rowobj.obj_uri.split("/")[-1] - if not any(isinstance(val, (list, str)) and val for val in rowobj.col_cell_dict.values()): - raise ValueError("No 'Activators' value found in rowobj.") - module_name_suf = None + # print(rowobj.col_cell_dict) for col in rowobj.col_cell_dict.keys(): val = rowobj.col_cell_dict[col] + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") - protein_comp_uris = val + if col == "Activators (URI)" and isinstance(val, str): + # print("Protein comp uris: ", val) + protein_comp_uris = val.split(",") + + for uri in protein_comp_uris: + # print(uri) + valid_uri = link_validation(username, password, url, uri) + if not valid_uri: + print(f"URI in '{uri}' is invalid. Skipping addition for {col}.") + print("Terminating") + sys.exit(1) + continue + else: + protein_comp_uris = [val] if isinstance(val, str) else val + # print("Val: ", val) for protein_comp_uri in protein_comp_uris: - module_name_suf = protein_comp_uri.split("/")[-1] + + if col == "Activators": + # print("Protein comp uri: ", protein_comp_uri) + module_name_suf = protein_comp_uri.split("/")[-1] + + elif col == "Activators (URI)": + # print("Protein comp uri: ", protein_comp_uri) + module_name_suf = protein_comp_uri.split("/")[-2] module_name = f"{module_name_pref}_{module_name_suf}" module_def = sbol2.ModuleDefinition(module_name) @@ -227,12 +452,7 @@ def activator(rowobj): interaction.participations.add(participation2) module_def.interactions.add(interaction) - rowobj.doc.addModuleDefinition(module_def) - - - - - + rowobj.doc.addModuleDefinition(module_def) def complexComponent(rowobj): @@ -241,22 +461,61 @@ def complexComponent(rowobj): protein_comp_uri = None molecule_name = None molecule_comp_uri = None + components = [] + # print(rowobj.col_cell_dict) + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") + # print(rowobj.col_cell_dict) + # if Components column present + for col in rowobj.col_cell_dict.keys(): val = rowobj.col_cell_dict[col] + if col == "Components Ids": + if isinstance(val, list) and len(val) > 0: + module_name_suf = val[0].split("/")[-1] + protein_comp_uri = val[0] + for i in val[1:]: + components.append((i.split("/")[-1], i)) + break + elif isinstance(val, str): + module_name_suf = val.split("/")[-1] + protein_comp_uri = val + break + elif col == "Components (URI)": + if isinstance(val, list) and len(val) > 0: + if isinstance(val, list) and len(val) > 0: + invalid_uris = [] + for uri in val: + valid_uri = link_validation(username, password, url, uri) + if not valid_uri: + print(f"URI '{uri}' is invalid. Skipping addition for {col}.") + invalid_uris.append(uri) + + if invalid_uris: + print(f"Invalid URIs detected: {invalid_uris}. Skipping entire complex formation.") + print("Terminating") + sys.exit(1) + + return + else: + # Process valid URIs + module_name_suf = val[0].split("/")[-2] + protein_comp_uri = val[0] + for i in val[1:]: + components.append((i.split("/")[-2], i)) + + + elif isinstance(val, str): + valid_uri = link_validation(username, password, url, val) + if not valid_uri: + print("URI in '{val}' is invalid. Skipping addition for {col}.") + print("Terminating") + sys.exit(1) + module_name_suf = val.split("/")[-2] + protein_comp_uri = val + break - if isinstance(val, list) and len(val) > 0: - module_name_suf = val[0].split("/")[-1] - protein_comp_uri = val[0] - if len(val) > 1: - molecule_name = val[1].split("/")[-1] - molecule_comp_uri = val[1] - break - elif isinstance(val, str): - module_name_suf = val.split("/")[-1] - protein_comp_uri = val - break - if not module_name_suf: - raise ValueError("No 'Components' value found in rowobj.") module_name = f"{module_name_pref}_complex_formation" # create a new module definition @@ -276,15 +535,16 @@ def complexComponent(rowobj): else: prod_fc = module_def.functionalComponents.get(module_name_pref) - # if exists, create molecule functional component - if molecule_name: - if molecule_name not in [fc.displayId for fc in module_def.functionalComponents]: - molecule_fc = module_def.functionalComponents.create(molecule_name) - molecule_fc.definition = molecule_comp_uri + # if exists, create functional components + components_FC = [] + for name, uri in components: + if name not in [fc.displayId for fc in module_def.functionalComponents]: + elem_fc = module_def.functionalComponents.create(name) + elem_fc.definition = uri + components_FC.append(elem_fc) else: - molecule_fc = module_def.functionalComponents.get(molecule_name) - - + elem_fc = module_def.functionalComponents.get(name) + components_FC.append(elem_fc) # participation for product participation = sbol2.Participation(uri = f'{module_name_pref}_product') @@ -298,28 +558,30 @@ def complexComponent(rowobj): participation2.uri = f'{module_name_suf}_reactant' participation2.roles = [sbol2.SBO_REACTANT] - #if molecule exists, create participation for molecule - if molecule_name: - participation3 = sbol2.Participation(uri= f'{molecule_name}_reactor') - participation3.participant = molecule_fc - participation3.uri = f'{molecule_name}_reactant' + #create participation for each component + components_participants = [] + for elem_fc in components_FC: + participation3 = sbol2.Participation(uri= f'{elem_fc.displayId}_reactor') + participation3.participant = elem_fc + participation3.uri = f'{elem_fc.displayId}_reactor' participation3.roles = [sbol2.SBO_REACTANT] - - + components_participants.append(participation3) + + # define the interaction interaction_name = f'{module_name_pref}_complex_formation' - interaction_type = sbol2.SBO_NONCOVALENT_BINDING interaction = sbol2.Interaction(interaction_name, interaction_type) interaction.participations.add(participation) interaction.participations.add(participation2) - - if molecule_name: - interaction.participations.add(participation3) + for part in components_participants: + interaction.participations.add(part) module_def.interactions.add(interaction) rowobj.doc.addModuleDefinition(module_def) + + def subcomponents(rowobj): sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, True) @@ -370,7 +632,7 @@ def subcomponents(rowobj): template.assemblePrimaryStructure(comp_list) #template.compile(assembly_method=None) - f#or comp in non_var_comps: + #or comp in non_var_comps: rowobj.obj.masterTemplate = template @@ -431,14 +693,28 @@ def dataSource(rowobj): logging.warning('Literal data sources are not yet supported.') def sequence(rowobj): + for col in rowobj.col_cell_dict.keys(): + val = rowobj.col_cell_dict[col] + # print(val) + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") + if isinstance(val, str): # might need to be careful if the object type is sequence! # THIS MIGHT HAVE BUGS IF MULTIPLE SEQUENCES ARE PROVIDED FOR # ONE OBJECT. E.g overwrite in self.obj.sequences = [val] ? if re.fullmatch(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', val): # if a url + # rowobj.obj.sequences = [val] + + valid_uri = link_validation(username, password, url, val) + if not valid_uri: + print("Terminating") + sys.exit(1) + return rowobj.obj.sequences = [val] elif re.match(r'^[a-zA-Z \s*]+$', val): @@ -448,6 +724,13 @@ def sequence(rowobj): val = "".join(val.split()) val = val.replace(u"\ufeff", "").lower() + uri = f'{url}/search/sequence={val}&' + valid_uri = sequence_authentication(username, password, url,uri) + if not valid_uri: + print("Part name: ", rowobj.obj.identity.split('/')[-2]) + # print("Terminating") + # sys.exit(1) + return # create sequence object sequence = sbol2.Sequence(f"{rowobj.obj.displayId}_sequence", elements=val) @@ -469,11 +752,14 @@ def sequence(rowobj): rowobj.obj.sequences = [val] else: raise TypeError(f"A multicolumn value was unexpectedly given in sequence, {rowobj.col_cell_dict}") - + def proteinSequence(rowobj): for col in rowobj.col_cell_dict.keys(): val = rowobj.col_cell_dict[col] + username = os.getenv("SBOL_USERNAME") + password = os.getenv("SBOL_PASSWORD") + url = os.getenv("SBOL_URL") if isinstance(val, str): # might need to be careful if the object type is sequence! # THIS MIGHT HAVE BUGS IF MULTIPLE SEQUENCES ARE PROVIDED FOR @@ -481,6 +767,13 @@ def proteinSequence(rowobj): if re.fullmatch(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', val): # if a url rowobj.obj.sequences = [val] + valid_uri = link_validation(username, password, url, val) + if not valid_uri: + print(f"URI '{val}' is invalid. Skipping addition for {col}.") + print("Terminating") + sys.exit(1) + return + rowobj.obj.sequences = [val] elif re.match(r'^[ACDEFGHIKLMNPQRSTVWY\s*]+$', val): # if a sequence string @@ -490,8 +783,14 @@ def proteinSequence(rowobj): # removes * val = val.replace('*', '') val = val.replace(u"\ufeff", "").upper() - - + uri = f'{url}/search/sequence={val}&' + valid_uri = sequence_authentication(username, password, url,uri) + if not valid_uri: + print("Part name: ", rowobj.obj.identity.split('/')[-2]) + # print("Terminating") + # sys.exit(1) + return + # create sequence object protein_sequence = sbol2.Sequence(f"{rowobj.obj.displayId}_proteinSequence", elements=val, encoding='http://www.chem.qmul.ac.uk/iupac/AminoAcid/') @@ -536,3 +835,9 @@ def finalProduct(rowobj): def circular(rowobj): # NOT IMPLEMENTED pass + + + + + +