Skip to content

Commit 1db38f0

Browse files
committed
refactor: extract reusable cellosaurus match strategies
1 parent 720f9a0 commit 1db38f0

File tree

1 file changed

+79
-43
lines changed

1 file changed

+79
-43
lines changed

R/cellosaurus.R

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,77 @@ mapCell2Accession <- function(
425425
}
426426

427427

428+
.match_cellosaurus_candidates <- function(
429+
responses_dt,
430+
query,
431+
name,
432+
keep_duplicates
433+
) {
434+
strategies <- list(
435+
function() {
436+
if (any(responses_dt$cellLineName == query)) {
437+
data.table::setkeyv(responses_dt, "cellLineName")
438+
responses_dt[query]
439+
} else {
440+
NULL
441+
}
442+
},
443+
function() {
444+
matches <- matchNested(
445+
query,
446+
responses_dt,
447+
keep_duplicates = keep_duplicates
448+
)
449+
if (length(matches) > 0L) {
450+
responses_dt[matches]
451+
} else {
452+
NULL
453+
}
454+
},
455+
function() {
456+
matches <- matchNested(
457+
name,
458+
responses_dt,
459+
keep_duplicates = keep_duplicates
460+
)
461+
if (length(matches) > 0L) {
462+
responses_dt[matches]
463+
} else {
464+
NULL
465+
}
466+
},
467+
function() {
468+
matches <- cleanCharacterStrings(responses_dt$cellLineName) == name
469+
if (any(matches)) {
470+
responses_dt[matches][1]
471+
} else {
472+
NULL
473+
}
474+
},
475+
function() {
476+
matches <- matchNested(
477+
name,
478+
lapply(responses_dt$synonyms, cleanCharacterStrings)
479+
)
480+
if (length(matches) > 0L) {
481+
responses_dt[matches]
482+
} else {
483+
NULL
484+
}
485+
}
486+
)
487+
488+
for (strategy in strategies) {
489+
candidate <- strategy()
490+
if (!is.null(candidate) && nrow(candidate) > 0L) {
491+
return(candidate)
492+
}
493+
}
494+
495+
NULL
496+
}
497+
498+
428499
#' Find Cellosaurus Matches
429500
#'
430501
#' This function searches for matches in a data table based on a given name.
@@ -464,49 +535,14 @@ mapCell2Accession <- function(
464535
# the first row is the wrong cellline but the query is in a synonym
465536
# but the second row is the correct cellline
466537
# TODO:: REFACTOR THIS TO NOT REPEAT THE CONDITIONAL
467-
if (any(responses_dt$cellLineName == query)) {
468-
data.table::setkeyv(responses_dt, "cellLineName")
469-
result <- responses_dt[query]
470-
} else if (
471-
length(matchNested(
472-
query,
473-
responses_dt,
474-
keep_duplicates = keep_duplicates
475-
)) >
476-
0
477-
) {
478-
matches <- matchNested(
479-
query,
480-
responses_dt,
481-
keep_duplicates = keep_duplicates
482-
)
483-
result <- responses_dt[matches]
484-
} else if (
485-
length(matchNested(name, responses_dt, keep_duplicates = keep_duplicates)) >
486-
0
487-
) {
488-
matches <- matchNested(
489-
name,
490-
responses_dt,
491-
keep_duplicates = keep_duplicates
492-
)
493-
result <- responses_dt[matches]
494-
} else if (any(cleanCharacterStrings(responses_dt$cellLineName) == name)) {
495-
matches <- cleanCharacterStrings(responses_dt$cellLineName) == name
496-
result <- responses_dt[matches][1]
497-
} else if (
498-
length(matchNested(
499-
name,
500-
lapply(responses_dt$synonyms, cleanCharacterStrings)
501-
)) >
502-
0
503-
) {
504-
matches <- matchNested(
505-
name,
506-
lapply(responses_dt$synonyms, cleanCharacterStrings)
507-
)
508-
result <- responses_dt[matches]
509-
} else {
538+
result <- .match_cellosaurus_candidates(
539+
responses_dt = responses_dt,
540+
query = query,
541+
name = name,
542+
keep_duplicates = keep_duplicates
543+
)
544+
545+
if (is.null(result)) {
510546
.warn(paste0("No results found for ", query))
511547
# create an empty data.table with the following columns:
512548
# c("cellLineName", "accession", "query")

0 commit comments

Comments
 (0)