@@ -425,6 +425,77 @@ mapCell2Accession <- function(
425425}
426426
427427
428+ .match_cellosaurus_candidates <- function (
429+ responses_dt ,
430+ query ,
431+ name ,
432+ keep_duplicates
433+ ) {
434+ strategies <- list (
435+ function () {
436+ if (any(responses_dt $ cellLineName == query )) {
437+ data.table :: setkeyv(responses_dt , " cellLineName" )
438+ responses_dt [query ]
439+ } else {
440+ NULL
441+ }
442+ },
443+ function () {
444+ matches <- matchNested(
445+ query ,
446+ responses_dt ,
447+ keep_duplicates = keep_duplicates
448+ )
449+ if (length(matches ) > 0L ) {
450+ responses_dt [matches ]
451+ } else {
452+ NULL
453+ }
454+ },
455+ function () {
456+ matches <- matchNested(
457+ name ,
458+ responses_dt ,
459+ keep_duplicates = keep_duplicates
460+ )
461+ if (length(matches ) > 0L ) {
462+ responses_dt [matches ]
463+ } else {
464+ NULL
465+ }
466+ },
467+ function () {
468+ matches <- cleanCharacterStrings(responses_dt $ cellLineName ) == name
469+ if (any(matches )) {
470+ responses_dt [matches ][1 ]
471+ } else {
472+ NULL
473+ }
474+ },
475+ function () {
476+ matches <- matchNested(
477+ name ,
478+ lapply(responses_dt $ synonyms , cleanCharacterStrings )
479+ )
480+ if (length(matches ) > 0L ) {
481+ responses_dt [matches ]
482+ } else {
483+ NULL
484+ }
485+ }
486+ )
487+
488+ for (strategy in strategies ) {
489+ candidate <- strategy()
490+ if (! is.null(candidate ) && nrow(candidate ) > 0L ) {
491+ return (candidate )
492+ }
493+ }
494+
495+ NULL
496+ }
497+
498+
428499# ' Find Cellosaurus Matches
429500# '
430501# ' This function searches for matches in a data table based on a given name.
@@ -464,49 +535,14 @@ mapCell2Accession <- function(
464535 # the first row is the wrong cellline but the query is in a synonym
465536 # but the second row is the correct cellline
466537 # TODO:: REFACTOR THIS TO NOT REPEAT THE CONDITIONAL
467- if (any(responses_dt $ cellLineName == query )) {
468- data.table :: setkeyv(responses_dt , " cellLineName" )
469- result <- responses_dt [query ]
470- } else if (
471- length(matchNested(
472- query ,
473- responses_dt ,
474- keep_duplicates = keep_duplicates
475- )) >
476- 0
477- ) {
478- matches <- matchNested(
479- query ,
480- responses_dt ,
481- keep_duplicates = keep_duplicates
482- )
483- result <- responses_dt [matches ]
484- } else if (
485- length(matchNested(name , responses_dt , keep_duplicates = keep_duplicates )) >
486- 0
487- ) {
488- matches <- matchNested(
489- name ,
490- responses_dt ,
491- keep_duplicates = keep_duplicates
492- )
493- result <- responses_dt [matches ]
494- } else if (any(cleanCharacterStrings(responses_dt $ cellLineName ) == name )) {
495- matches <- cleanCharacterStrings(responses_dt $ cellLineName ) == name
496- result <- responses_dt [matches ][1 ]
497- } else if (
498- length(matchNested(
499- name ,
500- lapply(responses_dt $ synonyms , cleanCharacterStrings )
501- )) >
502- 0
503- ) {
504- matches <- matchNested(
505- name ,
506- lapply(responses_dt $ synonyms , cleanCharacterStrings )
507- )
508- result <- responses_dt [matches ]
509- } else {
538+ result <- .match_cellosaurus_candidates(
539+ responses_dt = responses_dt ,
540+ query = query ,
541+ name = name ,
542+ keep_duplicates = keep_duplicates
543+ )
544+
545+ if (is.null(result )) {
510546 .warn(paste0(" No results found for " , query ))
511547 # create an empty data.table with the following columns:
512548 # c("cellLineName", "accession", "query")
0 commit comments