Skip to content

Commit a427bab

Browse files
committed
First stab at a debug output.
1 parent 56bab2c commit a427bab

File tree

2 files changed

+55
-10
lines changed

2 files changed

+55
-10
lines changed

api/server.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging, warnings
1212
import os
1313
import re
14+
from enum import Enum
1415
from typing import Dict, List, Union, Annotated, Optional
1516

1617
from fastapi import Body, FastAPI, Query
@@ -102,6 +103,14 @@ async def status() -> Dict:
102103

103104
# ENDPOINT /reverse_lookup
104105

106+
class DebugOptions(str, Enum):
107+
# A list of possible Solr debug options from https://solr.apache.org/guide/solr/latest/query-guide/common-query-parameters.html#debug-parameter
108+
none = "none"
109+
query = "query"
110+
timing = "timing"
111+
results = "results"
112+
all = "all"
113+
105114
class Request(BaseModel):
106115
"""Reverse-lookup request body."""
107116
curies: List[str]
@@ -210,6 +219,8 @@ class LookupResult(BaseModel):
210219
types: List[str]
211220
score: float
212221
clique_identifier_count: int
222+
explain: Optional[str] # Explanation for this specific result
223+
debug: Optional[dict] # The debug information for the entire query
213224

214225

215226
@app.get("/lookup",
@@ -263,12 +274,15 @@ async def lookup_curies_get(
263274
"e.g. `NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955`.",
264275
# We can't use `example` here because otherwise it gets filled in when filling this in.
265276
# example="NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955"
266-
)] = None
277+
)] = None,
278+
debug: Annotated[Union[DebugOptions, None], Query(
279+
description="Provide debugging information on the Solr query at https://solr.apache.org/guide/solr/latest/query-guide/common-query-parameters.html#debug-parameter"
280+
)] = 'none'
267281
) -> List[LookupResult]:
268282
"""
269283
Returns cliques with a name or synonym that contains a specified string.
270284
"""
271-
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)
285+
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa, debug)
272286

273287

274288
@app.post("/lookup",
@@ -324,12 +338,15 @@ async def lookup_curies_post(
324338
"e.g. `NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955`.",
325339
# We can't use `example` here because otherwise it gets filled in when filling this in.
326340
# example="NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955"
327-
)] = None
341+
)] = None,
342+
debug: Annotated[Union[DebugOptions, None], Query(
343+
description="Provide debugging information on the Solr query at https://solr.apache.org/guide/solr/latest/query-guide/common-query-parameters.html#debug-parameter"
344+
)] = 'none'
328345
) -> List[LookupResult]:
329346
"""
330347
Returns cliques with a name or synonym that contains a specified string.
331348
"""
332-
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)
349+
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa, debug)
333350

334351

335352
async def lookup(string: str,
@@ -340,7 +357,8 @@ async def lookup(string: str,
340357
biolink_types: List[str] = None,
341358
only_prefixes: str = "",
342359
exclude_prefixes: str = "",
343-
only_taxa: str = ""
360+
only_taxa: str = "",
361+
debug: DebugOptions = 'none',
344362
) -> List[LookupResult]:
345363
"""
346364
Returns cliques with a name or synonym that contains a specified string.
@@ -435,6 +453,9 @@ async def lookup(string: str,
435453
# "hl.highlightMultiTerm": "true",
436454
})
437455

456+
if debug and debug != 'none':
457+
inner_params['debug'] = debug
458+
438459
params = {
439460
"query": {
440461
"edismax": {
@@ -461,7 +482,8 @@ async def lookup(string: str,
461482
"fields": "*, score",
462483
"params": inner_params,
463484
}
464-
logging.debug(f"Query: {json.dumps(params, indent=2)}")
485+
486+
print(f"Query: {json.dumps(params, indent=2)}")
465487

466488
query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
467489
async with httpx.AsyncClient(timeout=None) as client:
@@ -470,7 +492,12 @@ async def lookup(string: str,
470492
LOGGER.error("Solr REST error: %s", response.text)
471493
response.raise_for_status()
472494
response = response.json()
473-
logging.debug(f"Solr response: {json.dumps(response, indent=2)}")
495+
print(f"Solr response: {json.dumps(response, indent=2)}")
496+
497+
# Do we have any debug.explain information?
498+
explain_info = {}
499+
if 'debug' in response and 'explain' in response['debug']:
500+
explain_info = response['debug']['explain']
474501

475502
# Associate highlighting information with search results.
476503
highlighting_response = response.get("highlighting", {})
@@ -501,6 +528,17 @@ async def lookup(string: str,
501528
# Solr sometimes returns duplicates or a blank string here?
502529
synonym_matches = list(filter(lambda s: s, set(synonym_matches)))
503530

531+
# Prepare debugging and explain information for this request.
532+
debug_for_this_request = response.get('debug', None)
533+
explain_for_this_doc = None
534+
if debug == 'explain' or debug == 'all':
535+
if doc['id'] in explain_info:
536+
explain_for_this_doc = explain_info[doc['id']]
537+
538+
# If we have explain information, we don't need to also include it in the debugging information.
539+
debug_for_this_request['explain'] = {"_comment": "Removed to avoid data duplication"}
540+
541+
504542
outputs.append(LookupResult(curie=doc.get("curie", ""),
505543
label=doc.get("preferred_name", ""),
506544
highlighting={
@@ -511,7 +549,9 @@ async def lookup(string: str,
511549
score=doc.get("score", ""),
512550
taxa=doc.get("taxa", []),
513551
clique_identifier_count=doc.get("clique_identifier_count", 0),
514-
types=[f"biolink:{d}" for d in doc.get("types", [])]))
552+
types=[f"biolink:{d}" for d in doc.get("types", [])],
553+
explain=explain_for_this_doc,
554+
debug=debug_for_this_request))
515555

516556
return outputs
517557

@@ -572,6 +612,10 @@ class NameResQuery(BaseModel):
572612
# We can't use `example` here because otherwise it gets filled in when filling this in.
573613
# example="NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955"
574614
)
615+
debug: Optional[DebugOptions] = Field(
616+
'none',
617+
description="Provide debugging information on the Solr query as per https://solr.apache.org/guide/solr/latest/query-guide/common-query-parameters.html#debug-parameter"
618+
)
575619

576620

577621
@app.post("/bulk-lookup",
@@ -592,7 +636,8 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
592636
query.biolink_types,
593637
query.only_prefixes,
594638
query.exclude_prefixes,
595-
query.only_taxa)
639+
query.only_taxa,
640+
query.debug)
596641
return result
597642

598643

documentation/API.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ for the protein they encode, and that no separate entry will be available for th
1919

2020
### Scoring
2121

22-
Every `/lookup` or `/bulk-lookup` search result returns a search score. This score value is calculated by Apache Solr,
22+
Every `/lookup` or `/bulk-lookup` search result returns a search score. This score value is calculated by Apache Solr
2323
and does not have an upper range. This score begins with the [TF-IDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)
2424
(term frequency-inverse document frequency) score, which is a measure of how relevant a term is to a document in a
2525
collection of documents. The score is then multiplied by the [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) score,

0 commit comments

Comments
 (0)