diff --git a/DESCRIPTION b/DESCRIPTION index 8f60bb1..f7275f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: R Client for accessing the tranSMART RESTful API Version: 0.3.2 Date: 2016-07-18 -Depends: httr, rjson, plyr, RProtoBuf, hash, reshape +Depends: httr, jsonlite, plyr, RProtoBuf, hash, reshape, XML Author: Tim Dorscheidt, Jan Kanis, Rianne Jansen Maintainer: Description: This package exposes tranSMART's RESTful API as a set of R functions. It uses tranSMART's OAuth authentication to access the data for which the user is authorized, and allows exploring and downloading the data. diff --git a/LICENSE.md b/LICENSE.md index 74ce6cf..a9bf10f 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -21,12 +21,13 @@ apply: | ---------- | -------- | ----------------------------------------------------------- | Protobuf | BSD | | httr | MIT | -| rjson | GPL2 | -| RProtoBuf | GPL2 | +| jsonlite | MIT | +| RProtoBuf | GPL2+ | | plyr | MIT | -| hash | GPL3 | -| bitopts | GPL3 | -| RCpp | GPL2 | +| hash | GPL2+ | +| reshape | MIT | +| bitopts | GPL2+ | +| RCpp | GPL2+ | | R overall | Multiple | This program is free software: you can redistribute it and/or modify it under diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index 5a07e2d..74d0813 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -22,6 +22,10 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . +.message <- function(...) { + if(getOption("verbose")) message(...) +} + connectToTransmart <- function (transmartDomain, use.authentication = TRUE, token = NULL, .access.token = NULL, ...) { if (!exists("transmartClientEnv") || transmartClientEnv$transmartDomain != transmartDomain) { @@ -171,7 +175,7 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t # Maybe we're talking to an older version of Transmart that uses the version 1 oauth plugin ping <- .transmartServerGetRequest("/oauth/verify", accept.type = "default", onlyContent = F) } - if (getOption("verbose")) { message(paste(ping$content, collapse = ": ")) } + .message(paste(ping$content, collapse = ": ")) if(ping$status == 200) { return(TRUE) } @@ -265,8 +269,14 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t return('unknown') } +# Wrap this in case we need to change json libraries again +.fromJSON <- function(json) { + fromJSON(json, simplifyDataFrame=F, simplifyMatrix=F) +} + .serverMessageExchange <- -function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, show.progress = (accept.type == 'binary') ) { +function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, post.content.type = 'form', + show.progress = (accept.type == 'binary') ) { if (any(accept.type == c("default", "hal"))) { if (accept.type == "hal") { httpHeaderFields <- c(httpHeaderFields, Accept = "application/hal+json;charset=UTF-8") @@ -283,7 +293,8 @@ function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, s body = post.body, add_headers(httpHeaderFields), authenticate(transmartClientEnv$client_id, transmartClientEnv$client_secret), - encode='form', + encode = if(post.content.type == 'form') 'form' else NULL, + if(post.content.type != 'form') content_type(post.content.type), config(verbose = getOption("verbose"))) if (getOption("verbose")) { message("POST body:\n", .list2string(post.body), "\n") } } @@ -292,13 +303,13 @@ function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, s result$headers <- headers(req) result$status <- req$status_code result$statusMessage <- http_status(req)$message - switch(.contentType(result$headers), + switch(.contentType(result$headers), json = { - result$content <- fromJSON(result$content) + result$content <- .fromJSON(result$content) result$JSON <- TRUE }, hal = { - result$content <- .simplifyHalList(fromJSON(result$content)) + result$content <- .simplifyHalList(.fromJSON(result$content)) result$JSON <- TRUE }) return(result) @@ -318,7 +329,8 @@ function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, s add_headers(httpHeaderFields), authenticate(transmartClientEnv$client_id, transmartClientEnv$client_secret), if(show.progress) progress(), - encode='form', + encode = if(post.content.type == 'form') 'form' else 'raw', + if(post.content.type != 'form') content_type(post.content.type), config(verbose = getOption("verbose"))) } if(show.progress) cat("\nDownload complete.\n") diff --git a/R/createPatientSet.R b/R/createPatientSet.R new file mode 100644 index 0000000..d8053c9 --- /dev/null +++ b/R/createPatientSet.R @@ -0,0 +1,755 @@ +# Copyright 2014 - 2016 The Hyve B.V. +# +# This file is part of tranSMART R Client: R package allowing access to +# tranSMART's data via its RESTful API. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version, along with the following terms: +# +# 1. You may convey a work based on this program in accordance with +# section 5, provided that you retain the above notices. +# 2. You may convey verbatim copies of this program code as you receive +# it, in any medium, provided that you retain the above notices. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + +# Retrieve patient.set ID from tranSMART database, based on the constraints given by the user. +# Patient.set constraints are provided as an expression in the shape of, for example, +# (c1 | c2) & (c3|c4|c5) & c6 &... where c is either a constraint built up as {concept}{operator}{constraint_value} +# (e.g. "age" < 60) or a reference to a concept (e.g. "age") +createPatientSet <- function(study.name, patientset.constraints, returnXMLquery = F){ + if(missing(study.name)) { stop("Provide study name") } + if(missing(patientset.constraints)) { stop("Provide patientset.constraints") } + .message("\nProcessing input...", "") + + # retrieve the expression that defines the constraints + patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation + # in parsePatientsetConstraints because otherwise things such as + #"age"<65 & "biomarker data" will result in an error (problem is the + # string without operator) if you try e.g. is.call or is.character + # on the input if constraints are supplied as string, try to parse + # the string + patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) + + + # retrieve concept information for the given study, and only keep relevant columns. + # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept + # paths. + studyConcepts <- getConcepts(study.name) + studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] + studyConcepts <- .findEndLeaves(studyConcepts) + + # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API + xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) + hrConstraints <- .makeSummaryOfQuery(xmlQuery) + xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string + .message(xmlQuery) + + # do POST request, and store result + .message("\nCreating patient set...", "") + serverResult <- .transmartGetJSON("/patient_sets", post.body = xmlQuery, + post.content.type ="text/xml;charset=UTF-8", onlyContent = c(201)) + + #return patient.set ID + patientsetID <- serverResult$id + + result <- list(patientsetID = patientsetID, patientsetSize = serverResult$setSize, + input_patientset.constraints = .expressionToText(patientset.constraints), + finalQueryConstraints = hrConstraints) + + .message(paste("\nBased on the input, the following constraints were defined and sent to the server", + " (always includes study concept):\n", result$finalQueryConstraints, sep = ""), "") + if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} + return(result) +} + + +.checkPatientSetConstraints <- function(patientsetConstraints){ + #test if it is an expression and not a string. If string: try to parse + if(!is.character(patientsetConstraints)) { + return(patientsetConstraints) + } + if(length(patientsetConstraints) > 1) { + stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. + The patient set constraints should be supplied in one single expression (or string).")} + + patientsetConstraintsParsed <- NA + result <- try({ patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] }, silent = T) + if(class(result) == "try-error") { + stop(paste("Detected a string as input for patient set constraints. Have tried to parse the", + "constraints out of the string to convert it into an expression, but the attempt to", + "parse the constraints out of the string failed:\n\n", + result[1], "\nPlease check the format of your input.", + "Type ?createPatientSet for more details on the expected format.")) + } + + if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { + #e.g. happens if input string is "\"age\"" + return(patientsetConstraintsParsed) + } + if(length(patientsetConstraintsParsed) > 1) { + .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + "such as: \"age\" > 65.", + "\nWill attempt to parse the constraints out of the string, converting it", + "into an expression...")) + return(patientsetConstraintsParsed) + } + + # if the input is already a concept name, e.g. "age" + return(patientsetConstraints) +} + + +# parse the constraints, and turn it into a query in XML format +.buildXMLquery <- function(patientset.constraints, studyConcepts, study.name) { + + ## parse the expression containing the constraints and translate this into a query definition in XML format + parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) + + # parsePatientSetConstraints returns a list with XML trees, these trees all either have items as top XMLnodes or + # panels. If the top nodes of the trees are items, add these items to a panel node and add this new node to a list. + if(xmlName(parsedConstraintsXMLlist[[1]]) == "item") { + parsedConstraintsXMLlist <- .makePanelList(parsedConstraintsXMLlist) + } + + #add one panel with study.name, ensuring that only patients from the specified study are selected + parsedConstraintsXMLlist <- .addStudyPanel(parsedConstraintsXMLlist, study.name, studyConcepts[1, "fullName"]) + + # build XML formatted query + xmlQuery <- xmlNode("qd:query_definition", namespaceDefinitions = c(qd="http://www.i2b2.org/xsd/cell/crc/psm/1.1/")) + for(i in 1:length(parsedConstraintsXMLlist)) { + xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) + } + return(xmlQuery) +} + + +# determine for each concept in the concept table whether a concept is an end leaf of the tree, ie. if it is a data node +# (which can be either a numeric, categorical or highdim node) +.findEndLeaves <- function(conceptListStudy) { + conceptTypes <- unique(conceptListStudy$type) + + if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))) { + warning("Unexpected concept type for one or more concepts in the selected study. + Determination which concepts are end-leaves of the tree might not work correcty in all cases. + This only affects the patient selection query if concepts with undetermined type are included in the query. + In that case this message is followed by an accompanying error. + You can help fix it by contacting us. Type ?transmartRClient for contact details. + \n") + } + + # concepts with type numeric and high_dimensional are end-leaves, + # concepts with type categorical_options are not end-leaves + endLeaf <- NA + conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) + conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- T + conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- F + + # find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" + # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and + # remove the last part to retrieve a list of concept paths for categorical nodes. + categoricalOptionsPaths <- conceptListStudy$fullName[conceptListStudy$type == "CATEGORICAL_OPTION"] + categoricalNodes <- sub("\\\\[^\\]*\\\\$", "\\\\",categoricalOptionsPaths) # remove last part of concept path + # containing the categorical value, to obtain path to categorical node + categoricalNodes <- unique(categoricalNodes) + + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- T + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- F + conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" + + return(conceptListStudy) +} + + +# parsePatientSetConstraints takes an expression defining the constraints for the patientset and returns +# either a list of item XMLtrees or list of panel XMLtrees +.parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts) { + + relationalOperators <- c("<", ">", "<=",">=", "==", "!=") + logicalOperators <- c("&","&&", "|", "||") + allowedOperators <- c(relationalOperators, logicalOperators) + + # construct a message that's used later on, when an error occurs. This message includes a listing of the different + # elements (sub units) of the constraint expression, if option "verbose" is set. + elementsMsg <- "" + if(getOption("verbose")) { + subUnits <- "" + for(i in 1:length(patientsetConstraints)) { + subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) + } + elementsMsg <- paste("\nElements of the (sub)constraint after parsing", subUnits,sep = "") + } + + errorMsg <- paste("Incorrect (sub)constraint definition, or failure to parse the (sub)constraint definition correctly.", + "Check the format of the constraint. \nFor more details about how to specify patient set constraints,", + "see the help/manual page of this function. \n(Sub)constraint: ", + .expressionToText(patientsetConstraints), elementsMsg) + + # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level + # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints + # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) + # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], + # data.frame$firstColumn[firstRow],etc + + + if(is.symbol(patientsetConstraints)) { + firstElement_in_allowedOperators <- F + } else { + firstElement <- as.character(patientsetConstraints[[1]]) + firstElement_in_allowedOperators <- firstElement %in% allowedOperators + } + + if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators) { + constraintOperator <- firstElement + + constraint <- list() + + # in case where the (sub)constraint is a concatation of subconstraints, combined by an AND or OR operator + # (e.g. "age" > 12 & "sex" = "Female"): element [[1]] contains the AND or OR operator, element [[2]] the + # subconstraint to the left of the operator, element [[3]] is the subconstraint to the right of the operator + # in case that the (sub)constraint is not a concatenation of subconstraints, but holds a single criterium that a + # concept has to satisfy to: + # [[1]] contains a relational operator, [[2]] the concept, [[3]] the constraint value + is.singleConstraint <- constraintOperator %in% relationalOperators + if(is.singleConstraint) { + itemXMLlist <- list(.parseSingleConstraint(patientsetConstraints, studyConcepts)) + return(itemXMLlist) + } else { + # it's a concatenation of constraints: call function again on the subconstraints. + # right now it only supports the format where the & operators are always the highest level operators and the + # | operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... + + treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) + + # if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints + # cannot have an & anymore (this is for forcing the strict format for constraint definition described above) + if(constraintOperator == "|" ) { + if(grepl("&", .expressionToText(patientsetConstraints))) { + stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", + .expressionToText(patientsetConstraints), + "\nRight now the only format supported for defining patientset constraints is one where the & ", + "operator is on the highest level of the constraint definition \nand the | (or) operator on the ", + "lowest (second) level, \nie. the format is 'x1' or (in case of multiple \'&\' operations) ", + "'x1 & x2 & ...', \nwhere x1, x2, etc. can contain one or more subconstraints (called c here) ", + "separated by an | (or) operator, ie. x = c1 or x = (c1 | c2 | ...),", + "\n where c is a single constraint such as \'\"age\" < 60\' or a reference to a concept.", + "\n Examples of valid constraints: c1, c1|c2, c1&c2&c3, (c1|c2)&c3&(c4|c5|c6)", sep = "" + )) + } + itemXMLlist <- c(treeBeforeOperator,treeAfterOperator) + return(itemXMLlist) + } + + # treeBeforeOperator/treeAfterOperator can be either a list of items or a list of panels + # if it contains a list of items: add the items of that list to a panel node + if(constraintOperator == "&") { + if(xmlName(treeBeforeOperator[[1]]) == "item") { + beforePanels <- .makePanelList(treeBeforeOperator) + } + if(xmlName(treeBeforeOperator[[1]]) == "panel") { + beforePanels <- treeBeforeOperator + } + if(xmlName(treeAfterOperator[[1]]) == "item") { + afterPanels <- .makePanelList(treeAfterOperator) + } + if(xmlName(treeAfterOperator[[1]]) == "panel") { + afterPanels <- treeAfterOperator + } + + panelList <- c(beforePanels, afterPanels) + return(panelList) + } + } + } else if(class(patientsetConstraints) == "(") { + # expression is surrounded by brackets: take expression between brackets and call function again + # element [[2]] contains the expression between the brackets, element [[1]] is '(' + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + return(xmlTreeList) + } else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { + # Then the (sub)constraint should consist of only a specification of a concept. + # This will result in selection of all patients that have a value for this concept. + # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, + #retrieve concept path + conceptPath <- .getConstraintConcept(patientsetConstraints, patientsetConstraints, studyConcepts, + identical.match = F, testIfEndLeave = F)[["conceptPath"]] + + # make itemTree for that concept + itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) + return(itemXMLlist) + } else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call") { + # alternatively it is an expression containing a call to substite or an object (with or without index), + # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an + # expression as created with subsitute for specifying a constraint . + # e.g. when following input is given: concepts<- c("age", "sex"); createPatientSet("Some study", concepts[1]) or + # tmp <- c(substitute("age" <65), substitute("sex"== "female")); createPatientSet("Some study", tmp[[1]]) + # or an object with strings specifying the constraints, and then the strings shoudl be turned into expressions too. + # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); createPatientSet("Some study", tmp[1]) + # try to evaluate the expression and find a matching constraint concept + result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) + if(class(result) == "try-error") { + + stop(paste(attr(result, "condition")$message, "\n",errorMsg, sep = "")) + } + if(is.list(result)) { + if(length(result) > 1) { + stop(paste("Incorrect input for patient set constraints.\n", + "Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with more than one element", + "while the function expects only a single string or a single expression", + "(as created with function substitute), not multiple.")) + } + if(length(result) == 1) { + warning(paste("Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with a single element.", + "Expected is a string or an expression (as created with function substitute).", + "Will try to use this single element in the list")) + result <- result[[1]] + } + } + if(length(result)==1) { + if(is.na(result)) { + stop(paste("Content of \'",.expressionToText(patientsetConstraints), "\' is 'NA'.", + " Cannot use 'NA' as constraint definition/concept specification.", sep = "")) + } + } + result <- .checkPatientSetConstraints(result) #parses constraint definition out of string, if applicable + patientsetConstraints <- result + + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints, studyConcepts) + return(xmlTreeList) + } else { + stop(errorMsg) + } +} + +# the deparse function converts expressions to strings. However it cuts the strings of at a certain bytelength, +# so a long expression could result in a character vector with several portions of the original expression +# this function makes one string out of the vector again +.expressionToText <- function(expression) { + textExpression <- deparse(expression, width.cutoff = 500) + + if(length(textExpression)>1) { + textExpressionPasted <- gsub("^[[:blank:]]+", "", textExpression) + textExpressionPasted <- paste(textExpressionPasted, collapse = "") + + #warnings are truncated, so it doesn't necessarily print all in case of long textExpression + message(paste("\nWhile trying to convert an expression to text, the deparse function cut an expression in two.", + "\nSeparate parts:\n", paste("\t", textExpression, collapse = "\n AND \n"), + "\nThese are pasted again together. Result:\n ", textExpressionPasted)) + textExpression <- textExpressionPasted + } + return(textExpression) +} + + +.makeSummaryOfQuery <- function(xmlQuery) { + + parsedXML <- "" + panels <- xmlChildren(xmlQuery) + + for(i in 1:length(panels)) { + panel <- panels[[i]] + + if(i == 1) {parsedXML <- paste(parsedXML, "( ", sep = "") } #first panel + else { parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "") } + + invert <- xmlValue(panel[["invert"]]) + if(invert == "1") { parsedXML <- paste(parsedXML, "!( ", sep = "") } + + #add the children + items <- xmlElementsByTagName(panel, "item") + for(j in 1:length(items)) { + item <- items[[j]] + if(j > 1) { parsedXML <- paste(parsedXML, " | ", sep = "") } + + #get concept path + item_key <- xmlValue(item[["item_key"]]) + concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) + concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) + parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") + + #if constraint operator and constraint value are given, get these + childNames <- names(item) + if("constrain_by_value" %in% childNames) { + valueConstraints <- item[["constrain_by_value"]] + valueOperator <- xmlValue(valueConstraints[["value_operator"]]) + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) + parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") + } + } + + #close brackets for panel + if(invert == "1") { parsedXML <- paste(parsedXML, " ))", sep = "") } + else { parsedXML <- paste(parsedXML, " )", sep = "") } + } + if(parsedXML == "") { warning("Something went wrong with making a human readable version of the XML. + This does not affect the formation of the patient set") } + return(parsedXML) +} + + +#just needs one conceptPath, can be of any of the concepts in the study. It can be any path in column 'fullName' +.addStudyPanel <- function (constraintXMLlist, study.name, conceptPath) { + # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and + # including the study.name. + # e.g. take "\\Public Studies\\GSE8581\\" from "\\Public Studies\\GSE8581\\Subjects\\Ethnicity\\Afro American\\" + splitPath <- strsplit(conceptPath, "\\\\")[[1]] + nameHit <- grep(study.name, splitPath, ignore.case = T)[1] # take the first, just in case the study.name is repeated + # in later part of path + studyPath <- paste0(c(splitPath[1:nameHit], ""), collapse = "\\") + itemKey <- .makeItemKey(studyPath) + + panel <- xmlNode("panel", + xmlNode("invert", 0), + xmlNode("item", + xmlNode("item_key", itemKey))) + constraintXMLlist <- c(constraintXMLlist, list(panel)) + return(constraintXMLlist) +} + + +# it expects a list of "item" XML trees. It will add all items to a panel XML node, +# and returns that node as part of a list +.makePanelList <- function(itemXMLtreeList) { + panel <- xmlNode("panel", xmlNode("invert", 0)) + for(i in 1:length(itemXMLtreeList)) { + panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) + } + return(list(panel)) +} + +# constraint is of format: {concept definition}{relational operator}{constraint_value}, e.g. "age" < 12. +.parseSingleConstraint <- function(patientsetConstraints, studyConcepts) { + constraint <- list() + + # grab the different elements of the constraint definition + constraint$operator <- as.character(patientsetConstraints[[1]]) + constraint$concept <- patientsetConstraints[[2]] + constraint$value <- patientsetConstraints[[3]] + + if(class(constraint$value) == "name" | class(constraint$value) == "call") { + tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) + if(class(tmpValue) == "try-error") { + try_error <- attr(tmpValue, "condition")$message + stop(paste0(try_error, ". Object was specified in (sub)constraint ", + .expressionToText(patientsetConstraints) , ".\n")) + } + if(length(tmpValue) >1) { + if(getOption("verbose")) { + message("\nInput for constraint_value: ") + print(tmpValue) + } + stop(paste0("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), + ".\nObject length of \'", constraint$value , "\' is larger than 1. ", + "Only a single input value (string/number) is allowed as a constraint_value.")) + } + if(is.na(tmpValue)) { + stop(paste0("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", + " A constraint value cannot be a missing value.")) + } + constraint$value <- tmpValue + } + + # a concept can be defined by a pattern matching the concept name (1), by concept.link(2), concept.path(3) or + # by giving a variable/object containing a string with one of those three + # find the concept path that corresponds to the concept, and determine the type of + # node (numerical, categorical or high dim) + constraint <- c(constraint, .getConstraintConcept(constraint$concept, patientsetConstraints, studyConcepts, + identical.match = F)) + constraint$value_operator <- NA + constraint$value_type <- NA + + if(constraint$conceptType == "NUMERIC") { + #check if the supplied constraint value is numeric + if(!is.numeric(constraint$value)) { + stop(paste("The supplied constraint value ", deparse(constraint$value)," is not numerical, while concept ", + constraint$conceptPath, " is a numerical concept. (This was the concept selected based on the input: \'", + constraint$concept, "\'). \nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + } + + # Each individual constraint is represented as an "item" in the XML tree that holds the query definition for the + # patient.set + # construct the "item" subtree for the current constraint + constraint$item_key <- .makeItemKey(constraint$conceptPath) + constraint$value_type <- "NUMBER" + #translate relational operator from R to a value operator that can be recognized in the query + constraint$value_operator <- .getValueOperator(constraint$operator, "NUMERIC") + constrain_by_value_tree <- xmlNode("constrain_by_value", + xmlNode("value_operator", constraint$value_operator), + xmlNode("value_constraint", constraint$value), + xmlNode("value_type", constraint$value_type)) + itemXMLtree <- xmlNode("item", + xmlNode("item_key", constraint$item_key), + constrain_by_value_tree) + } + + if(constraint$conceptType == "CATEGORICAL_NODE" ){ + + #check if supplied constraint value is character + if(!is.character(constraint$value)) { + warning(paste("The supplied constraint value ", constraint$value," is not of class \'character\', while concept ", + constraint$conceptPath, " is a categorical concept (ie. containing text).", + "\n(This was the concept selected based on the input: \'", + constraint$concept, "\')", "\nWill convert the value to character, but unless there is actually a ", + "categorical value that matches the constraint value, this will result in an error later on.", + "\nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + constraint$value <- as.character(constraint$value) + } + + #check if the given constraint value exists for the specified categorical concept + constraintValuePath <- .getConstraintConcept(constraint$value, patientsetConstraints, studyConcepts, + identical.match = T, testIfEndLeave = F)[["conceptPath"]] + if(constraintValuePath != paste(constraint$conceptPath, constraint$value, "\\", sep = "")){ + stop(paste0("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), + "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", + "of the categorical concept \'", constraint$concept, "\'.", + "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", + constraintValuePath)) + } + + #translate relational operator from R to a value operator that can be used in the query + #only EQ and NE are possible for text variables. Only EQ is supported right now + constraint$value_operator <- .getValueOperator(constraint$operator, "CATEGORICAL_NODE") + + # construct the "item" subtree for the current constraint + if(constraint$value_operator == "EQ") { + itemXMLtree <- xmlNode("item", xmlNode("item_key", .makeItemKey(constraintValuePath))) + } + if(constraint$value_operator == "NE") { + stop("For now the '!=' operation is not supported for categorical values") + ##implement later? So that if you specify conceptX != A then it automatically selects all possible categorical + # values in conceptX, except A. (you can't just use invert=1, for example trial_group != control | x < 1) + # or trial_group != control | lung_abnormal == "YES" should work too) + } + } + + if(constraint$conceptType == "HIGH_DIMENSIONAL") { + # you cannot apply relational operations to the high dimensional node + stop(paste0("Incorrect use of a high dimensional data node in (sub)constraint: ", + .expressionToText(patientsetConstraints),".", + "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", + "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", + " to the node. \nWhen a high dimensional node name is supplied, ", + "all patients that have data for that high dimensional node will be selected.")) + } + + if(is.na(constraint$value_operator)) { + stop(paste0("Could not determine which value_operator to use in the query definition for the constraint \'", + .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator)) + } + + return(itemXMLtree) +} + + +#construct item_key from concept path +# expected format item key:\\Dimension\concept_path. Examples: +# \\Public Studies\Public Studies\Cell-line\Demographics\Age\ +# \\Private Studies\Private Studies\Cell-line\Characteristics\Age\ +.makeItemKey <- function(conceptPath) { + dimension <- strsplit(conceptPath, "\\", fixed=T)[[1]][2] #get first part of the concept path, that is either public or private study + + # TODO: is dit nodig? + #if(!dimension %in% c("Public Studies", "Private Studies")){ + # stop("Could not determine the dimension for the item_key, that is used for the XML query")} + item_key <- paste0("\\\\", dimension, conceptPath) + return(item_key) +} + + +#translate relational operators to a text representation as is expected for the query +.getValueOperator <- function(operator, type) { + if(type == "NUMERIC") { + if(operator == "<"){return("LT")} + if(operator == "<="){return("LE")} + if(operator == ">"){return("GT")} + if(operator == ">="){return("GE")} + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + if(type == "CATEGORICAL_NODE") { + if(operator %in% c("<", "<=", ">", ">=")) { + stop(paste0("The operation \'", operator, "\' is not supported for text variables.")) } + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + #if the function did not return yet, something went wrong. + stop(paste0("Something went wrong with determining the value_operator to use for the query definition. Operator:", + operator,". Value type: ", type)) +} + + +# find the concept path for a given concept definition. Concept can be specified as pattern matching a +# concept name, or as a partial/full concept path or link +.getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T) { + info <- "Correct way to supply a concept (as part of a (sub)constraint) is: + either directly as a string, containing the concept name or path, + or indirectly as an object (variable) that contains a string with the concept name or path. + Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by + getConcepts() should also work. + Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 + or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." + + subconstraint <- .expressionToText(subconstraint) + + #if not string: get the value of the variable/object. Value should be one string. + if(class(concept) == "name" | class(concept) == "call") { + result <- try(eval(concept, envir = globalenv()), silent = T) + if(class(result) == "try-error") { + try_error <- attr(result, "condition")$message + stop(paste0(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info)) + } + if(length(result) >1) { + write(paste("The content of object: \'", .expressionToText(concept), "\' is:", sep = "" ),"") + print(result) + stop(paste0("Incorrect input for concept specification in subconstraint: ", subconstraint, + ".\nObject length of \'", .expressionToText(concept), + "\' is larger than 1. Only a single string is allowed for specifying the concept.", + "The content of this variable is printed above this error message.")) + } + if(is.na(result)) { + stop(paste("Content of \'",.expressionToText(concept), "\' is 'NA'.", + " Cannot use 'NA' as concept specification.", sep = "")) + } + concept <- result + } + #concept should be a string. + if(!is.character(concept)) { + stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\n", info, sep = "")) + } + + orig_concept <- concept + + if(identical.match) { + concept <- paste("^", concept, "$", sep = "") + concept <- gsub("^^", "^", concept, fixed = T) + concept <- gsub("$$", "$", concept, fixed = T) + } + + is.concept.path <- grepl("\\\\", concept) + conceptMatch <- character(0) + if(!is.concept.path) { + #concept paths are in 'fullName' column of getConcepts result + conceptMatch <- grep(concept, studyConcepts$name, ignore.case = !identical.match) + + if(length(conceptMatch) > 1) { + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$name) + } + } + + if(length(conceptMatch) == 0) { + # supplied concept migth be concept path or link. + is.concept.link <- grepl("^/studies/.+/concepts/", concept) | grepl("^studies/.+/concepts/", concept) + + if(is.concept.path & is.concept.link) { + stop(paste0("Something went wrong with detecting whether the provided string \'", concept, + "\' is a concept path or concept link. Please check if the provided string is correct.", + "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", + "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", + "and the concept links in the \'api.link.self.href\' column", + "If the string does have the correct format, you may have encountered a bug.", + "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.")) + } + + if(is.concept.path) { + conceptMatch <- grep(concept, studyConcepts$fullName, fixed = T) + if(length(conceptMatch) > 1) { + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$fullName) + } + } + + if(is.concept.link) { + .message("\nDetecting a concept.link. Will attempt to find matching concept path.") + conceptMatch <- grep(concept, studyConcepts$api.link.self.href) + if(length(conceptMatch) > 1) { + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$api.link.self.href) + } + } + } + + identicalM <- "" + if(identical.match) { identicalM <- "identical(literal) "} + if(length(conceptMatch) == 0) { + stop(paste0("No ", identicalM, "matching concept or categorical value found in this study for \'", + orig_concept, "\', found in subconstraint: ", subconstraint, + "\nNote: The supplied concept in the constraint definition can be a full or partial ", + "match to the concept name, and can even contain regular expressions (pattern matching will be done as", + " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", + "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", + " of the possible categorical values for that concept.")) + } + + #test if matches are endLeaves, ie. a data node. + # If constraints are supplied in the form of {concept}{operator}{constraint_value}, the concept should be an end leave + # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a + # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that + # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. + if(!studyConcepts$endLeaf[[conceptMatch]] & testIfEndLeave) { + stop(paste0("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", + "The supplied concept name/path/link must point to a single numerical or categorical", + " data node (end leaf).")) + } + + matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], + conceptType = studyConcepts$type[conceptMatch]) + .message(paste0("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n") ) + return(matched_concept) +} + + +#called by .getConstraintConcept if there were initially multiple matches found for the concept, using the 'grep' function +.selectMatch <- function(concept, matching_indices, concept_list) { + #any literal, full length matches? (ignoring case) + literalMatches <- tolower(concept_list[matching_indices]) == tolower(concept) + if(any(literalMatches)) { + matching_indices <- matching_indices[literalMatches] + if(length(matching_indices) > 1) { + stop(paste0("There seem to be more than one concepts with the name \'", concept, "\'.", + "\nPlease use the concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).")) + } + message(paste0("\nMultiple matching concepts found for the string \'", concept, + "\'. One identical match was found (ignoring case): \'", + concept_list[matching_indices], "\'.\nThis match is selected.", + "\nFor more precise matching use full-length concept names, paths, or links,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp.", + "Note: regexp can only be used for specifying concept names or links, not paths")) + } + + #if not literal match take the shortest match + if(!any(literalMatches)) { + paths_tmp<- concept_list[matching_indices] + shortest_match<- matching_indices[which.min(nchar(paths_tmp))] + matching_indices<- shortest_match + message(paste0("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", + paste(concept_list[shortest_match], collapse = ","), "\'.", + "\nFor more precise matching use full-length names or paths,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp")) + if(length(matching_indices) > 1) { + stop(paste0("There are multiple shortest matches for \'", concept, "\'. Matches: ", + paste(concept_list[shortest_match], collapse = ", "), ".", + "\nPlease use a more specific/longer string for specifying the concept name or path,", + "or use the (full) concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).")) + } + } + return(matching_indices) +} diff --git a/R/getHighdimData.R b/R/getHighdimData.R index a1607f2..a1c37ad 100644 --- a/R/getHighdimData.R +++ b/R/getHighdimData.R @@ -133,8 +133,9 @@ getHighdimData <- function(study.name, concept.match = NULL, concept.link = NULL # The argument is a single named list .expandConstraints <- function(constraints) { - # The JSON encoder encodes single item vectors as scalars. We need those to be lists as well sometimes. - j <- function(val) if (length(val) == 1) list(val) else val + # Previously used json packages encode length 1 vectors as scalars, we need them as lists. Jsonlite which we are using + # now doesn't do that so this wrapping function is now a no-op. + j <- function(val) val # some deep functional/lazy magic mapply(function(val, con) switch(con, diff --git a/R/getObservations.R b/R/getObservations.R index 29ac5a3..029524d 100644 --- a/R/getObservations.R +++ b/R/getObservations.R @@ -22,7 +22,8 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . -getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE) { +getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, patient.set = NULL, + as.data.frame = TRUE) { .ensureTransmartConnection() if (is.null(concept.links)) { @@ -39,7 +40,7 @@ getObservations <- function(study.name, concept.match = NULL, concept.links = NU } } } else { - concept.links <- paste("/studies/", study.name, sep = "") + concept.links <- paste0("/studies/", study.name) } } @@ -50,10 +51,31 @@ getObservations <- function(study.name, concept.match = NULL, concept.links = NU listOfObservations <- list() - for (oneLink in concept.links) { + if(is.null(patient.set)){ + for (oneLink in concept.links) { serverResult <- .transmartGetJSON(paste(oneLink, "/observations", sep = "")) listOfObservations <- c(listOfObservations, serverResult$observations) + } + }else{ + if(length(patient.set) > 1) { stop("Only one patient.set ID allowed as input") } + if(!is.numeric(patient.set)) { stop("Patient.set ID should be a numeric value") } + + if(length(concept.links) == 1 && concept.links[1] == paste0("/studies/", study.name)) { + tmpConceptPath<- studyConcepts$fullName[1] + fullConceptNames <- gsub(paste0(study.name,"\\\\.*"), paste0(study.name, "\\\\"), + tmpConceptPath, ignore.case=T) + } else { + fullConceptNames <- studyConcepts$fullName[match(concept.links, studyConcepts$api.link.self.href)] + } + for (oneName in fullConceptNames) { + serverResult <- .transmartGetJSON( + paste("/observations?patient_sets=", patient.set, + "&concept_paths=", URLencode(oneName), + sep="")) + listOfObservations <- c(listOfObservations, serverResult$observations) + } } + if (as.data.frame) { dataFrameObservations <- .listToDataFrame(listOfObservations) diff --git a/bin/installCommands.R b/bin/installCommands.R index f6a936f..6f01b4a 100644 --- a/bin/installCommands.R +++ b/bin/installCommands.R @@ -25,9 +25,9 @@ # Notes for first time installers: -# The package transmartRClient depends on five packages: httr, rjson, RProtoBuf, plyr, hash, and reshape. +# The package transmartRClient depends on these packages: httr, jsonlite, RProtoBuf, plyr, hash, reshape and XML. # You can install them as follows: -install.packages(pkgs=c("httr", "rjson", "RProtoBuf", "plyr", "hash", "reshape")) +install.packages(pkgs=c("httr", "jsonlite", "RProtoBuf", "plyr", "hash", "reshape", "XML")) # RProtoBuf depends on the system protobuf headers. For Ubuntu you will need to # install the libprotoc-dev and libprotobuf-dev packages. diff --git a/inst/unittests/resources/gse8581concepts.txt b/inst/unittests/resources/gse8581concepts.txt new file mode 100644 index 0000000..8f4a287 --- /dev/null +++ b/inst/unittests/resources/gse8581concepts.txt @@ -0,0 +1,38 @@ +name fullName type api.link.self.href endLeaf +Afro American \Public Studies\GSE8581\Subjects\Ethnicity\Afro American\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Afro%20American FALSE +Age (year) \Public Studies\GSE8581\Subjects\Age (year)\ NUMERIC /studies/gse8581/concepts/Subjects/Age%20%28year%29 TRUE +Biomarker_Data \Public Studies\GSE8581\MRNA\Biomarker_Data\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data FALSE +carcinoid \Public Studies\GSE8581\Endpoints\Diagnosis\carcinoid\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/carcinoid FALSE +Caucasian \Public Studies\GSE8581\Subjects\Ethnicity\Caucasian\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Caucasian FALSE +chronic obstructive pulmonary disease \Public Studies\GSE8581\Subjects\Lung Disease\chronic obstructive pulmonary disease\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/chronic%20obstructive%20pulmonary%20disease FALSE +control \Public Studies\GSE8581\Subjects\Lung Disease\control\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/control FALSE +Diagnosis \Public Studies\GSE8581\Endpoints\Diagnosis\ CATEGORICAL_NODE /studies/gse8581/concepts/Endpoints/Diagnosis TRUE +emphysema \Public Studies\GSE8581\Endpoints\Diagnosis\emphysema\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/emphysema FALSE +Endpoints \Public Studies\GSE8581\Endpoints\ UNKNOWN /studies/gse8581/concepts/Endpoints FALSE +Ethnicity \Public Studies\GSE8581\Subjects\Ethnicity\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Ethnicity TRUE +female \Public Studies\GSE8581\Subjects\Sex\female\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/female FALSE +FEV1 \Public Studies\GSE8581\Endpoints\FEV1\ NUMERIC /studies/gse8581/concepts/Endpoints/FEV1 TRUE +Forced Expiratory Volume Ratio \Public Studies\GSE8581\Endpoints\Forced Expiratory Volume Ratio\ NUMERIC /studies/gse8581/concepts/Endpoints/Forced%20Expiratory%20Volume%20Ratio TRUE +giant bullae \Public Studies\GSE8581\Endpoints\Diagnosis\giant bullae\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/giant%20bullae FALSE +Giant Cell Tumor \Public Studies\GSE8581\Endpoints\Diagnosis\Giant Cell Tumor\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Giant%20Cell%20Tumor FALSE +GPL570_BOGUS \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS FALSE +Height (inch) \Public Studies\GSE8581\Subjects\Height (inch)\ NUMERIC /studies/gse8581/concepts/Subjects/Height%20%28inch%29 TRUE +hematoma \Public Studies\GSE8581\Endpoints\Diagnosis\hematoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/hematoma FALSE +Homo sapiens \Public Studies\GSE8581\Subjects\Organism\Homo sapiens\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Organism/Homo%20sapiens FALSE +inflammation \Public Studies\GSE8581\Endpoints\Diagnosis\inflammation\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/inflammation FALSE +Lung \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\Lung\ HIGH_DIMENSIONAL /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS/Lung TRUE +Lung Disease \Public Studies\GSE8581\Subjects\Lung Disease\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Lung%20Disease TRUE +lymphoma \Public Studies\GSE8581\Endpoints\Diagnosis\lymphoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/lymphoma FALSE +male \Public Studies\GSE8581\Subjects\Sex\male\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/male FALSE +metastatic non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20non-small%20cell%20adenocarcinoma FALSE +metastatic renal cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic renal cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20renal%20cell%20carcinoma FALSE +MRNA \Public Studies\GSE8581\MRNA\ UNKNOWN /studies/gse8581/concepts/MRNA FALSE +no malignancy \Public Studies\GSE8581\Endpoints\Diagnosis\no malignancy\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/no%20malignancy FALSE +non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20adenocarcinoma FALSE +non-small cell squamous cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell squamous cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20squamous%20cell%20carcinoma FALSE +not specified \Public Studies\GSE8581\Subjects\Lung Disease\not specified\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/not%20specified FALSE +NSC-Mixed \Public Studies\GSE8581\Endpoints\Diagnosis\NSC-Mixed\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/NSC-Mixed FALSE +Organism \Public Studies\GSE8581\Subjects\Organism\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Organism TRUE +Sex \Public Studies\GSE8581\Subjects\Sex\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Sex TRUE +Subjects \Public Studies\GSE8581\Subjects\ UNKNOWN /studies/gse8581/concepts/Subjects FALSE +Unknown \Public Studies\GSE8581\Endpoints\Diagnosis\Unknown\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Unknown FALSE diff --git a/inst/unittests/runitCreatePatientSet.R b/inst/unittests/runitCreatePatientSet.R new file mode 100644 index 0000000..fd063d9 --- /dev/null +++ b/inst/unittests/runitCreatePatientSet.R @@ -0,0 +1,300 @@ +# Copyright 2014, 2015, 2016 The Hyve B.V. +# Copyright 2014 Janssen Research & Development, LLC. +# +# This file is part of tranSMART R Client: R package allowing access to +# tranSMART's data via its RESTful API. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version, along with the following terms: +# +# 1. You may convey a work based on this program in accordance with +# section 5, provided that you retain the above notices. +# 2. You may convey verbatim copies of this program code as you receive +# it, in any medium, provided that you retain the above notices. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + + +# concepts table for GSE8581 +gse8581conceptsLocation <- + if (exists("TRANSMART_RINTERFACE_PKG_ROOT")) { + # Development setting + message(paste0("Loading unit tests from ", TRANSMART_RINTERFACE_PKG_ROOT)) + paste0(TRANSMART_RINTERFACE_PKG_ROOT, "/inst/unittests/resources/gse8581concepts.txt") + } else { + # Assume the package is installed + system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") + } + +gseconcepts <- read.table(gse8581conceptsLocation, header = T, stringsAsFactors = F, sep = "\t") + +### unit tests for function .checkPatientSetConstraints ### +# this should convert a string to an expression, if the constraints are +# provided as a string. Also checks that it only contains one string +# both "\"age\"" and "age" as input should result in returning "age" # suggest to use variable name without the quotes +# if variable with name age should be used +test.checkPatientSetConstraints.simpleString.1 <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("\"age\"") + checkEquals("age", result) +} + +test.checkPatientSetConstraints.simpleString.2 <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("age") + checkEquals("age", result) +} + +test.checkPatientSetConstraints.object <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("concepts[1]") + expected <- substitute(concepts[1]) + checkEquals(expected, result) +} + +test.checkPatientSetConstraints.constraintDefinitionString <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("\"age\" < 60") + expected <- substitute("age" < 60) + checkEquals(expected, result) +} + +#should only work with a single string +test.checkPatientSetConstraints.constraintDefinitionMultipleStrings <- function() { + input <- c("\"age\" < 60", "Male") + checkException(transmartRClient:::.checkPatientSetConstraints(input)) +} + +test.checkPatientSetConstraints.constraintDefinitionExpression <- function() { + result<- transmartRClient:::.checkPatientSetConstraints(substitute("age" < 60)) + expected <- substitute("age" < 60) + checkEquals(expected, result) +} + + +### unit test for function .buildXMLquery(patientset.constraints, studyConcepts) ### +## supplying a single concept only, without constraint operator and constraint value" + +# supplying a concept that is data node +test.buildXMLquery.datanode <- function() { + result <- transmartRClient:::.buildXMLquery("Age", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#concept that is not a data node +test.buildXMLquery.nonDataNode <- function() { + result <- transmartRClient:::.buildXMLquery("Subjects", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# categorical value (end leaf) +test.buildXMLquery.categoricalValue <- function() { + result <- transmartRClient:::.buildXMLquery("control", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\control\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# non-existing data node (a string that does nat match any concepts) +test.buildXMLquery.notExistingConcept <- function() { + checkException( transmartRClient:::.buildXMLquery("Nonsense", gseconcepts, "GSE8581")) +} + +#concept link +test.buildXMLquery.conceptLink<- function() { + input<-substitute("/studies/gse8581/concepts/Subjects/Age%20%28year%29") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# concept path. +test.buildXMLquery.conceptPath <- function() { + input<-substitute("\\Public Studies\\GSE8581\\Subjects\\Age (year)\\") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +## constraints consisting of concept with constraint value +# simple constraint, consisting of only one concept plus constraint value +test.buildXMLquery.simpleConstraint<- function() { + input <- substitute("age" < 65) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# non-existing value for categorical concept +test.buildXMLquery.nonExistingCategoricalValue <- function() { + input <- substitute("sex" == "unknown") + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#relational operation shouldn't work with high dim node +test.buildXMLquery.relationalOperationHighDimNode<- function() { + input <- substitute("lung" < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#relational operation shouldn't work with a concept that is not a datanode +test.buildXMLquery.relationalOperationNonDataNode<- function() { + input <- substitute("Subjects" < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#compound constraint | +test.buildXMLquery.compoundConstraintsORed<- function() { + input <- substitute("age" < 65 | "sex" == "female") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Sex\\female\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint & +test.buildXMLquery.compoundConstraintsANDed<- function() { + input <- substitute("age" < 65 & "sex" == "female") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Sex\\female\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint complex +test.buildXMLquery.compoundConstraintsComplex<- function() { + input <- substitute("age" < 65 & + ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & + "Biomarker_Data") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\control\\\n \n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\chronic obstructive pulmonary disease\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\MRNA\\Biomarker_Data\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint complex, wrong format +test.buildXMLquery.compoundConstraintsComplexWrongFormat<- function() { + input <- substitute("sex"== "female" | ("age" < 65 & "Biomarker_Data")) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#compound constraint complex, wrong format (no brackets around ORed part) +test.buildXMLquery.compoundConstraintsComplexWrongFormat2<- function() { + input <- substitute("sex"== "female" & "age" < 65 | "Biomarker_Data") + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +# concept and constraint value should also be possible to be stored in an object +test.buildXMLquery.simpleConstraintContainingObjects<- function() { + concepts <- c("age", "sex") + some_value <- 65 + input <- substitute(concepts[1] < some_value) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#constraints with double substitute (in createPatientSet another substitute is performed on the input, so if input is +# substitute("age"<65), then input for buildXMLquery is substitute(substitute("age"<65)) +test.buildXMLquery.doubleSubstitute<- function() { + concepts <- c("age", "sex") + some_value <- 65 + input <- substitute(substitute(concepts[1] < some_value[1])) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#constraints as string in object +test.buildXMLquery.stringInObject<- function() { + concepts <- c("age", "sex") + assign("concepts", concepts, envir = .GlobalEnv) + constraint <- "concepts[1] < 65" + input <- substitute(constraint[1]) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#object name without index (this is handled differently than objects with index as the class differs) +test.buildXMLquery.objectWithoutIndex<- function() { + constraint <- "age" + input <- substitute(constraint) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#NA instead of constraint +test.buildXMLquery.objectWithNA<- function() { + constraints <- "age" + input <- substitute(constraints[2]) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#NA concept +test.buildXMLquery.constraintWithNAConcept<- function() { + constraints <- "age" + input <- substitute(constraints[2] < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +test.buildXMLquery.objectWithSubstitute<- function() { + tmp <- c(substitute("age" <65), substitute("sex"== "female")) + input <- substitute(tmp[1]) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + + + + +#may also be useful for testing (need to connect to a database that has both the clinical and high dim data of gse8581): +# createPatientSet("GSE8581", "Age") #works on transmart-dev. +# createPatientSet("GSE8581", "Subjects") #works on transmart-dev. +# createPatientSet("GSE8581", "control") #works on transmart-dev. +# createPatientSet("GSE8581", "Nonsense") #isn't supposed to work +# createPatientSet("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# createPatientSet("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# createPatientSet("GSE8581", "age" < 65) #works on transmart-dev. +# createPatientSet("GSE8581", "sex" == "unknown") #isn't supposed to work +# createPatientSet("GSE8581", "lung" < 65) #isn't supposed to work +# createPatientSet("GSE8581", "Subjects" < 65) #isn't supposed to work +# createPatientSet("GSE8581", "age" < 65 | "sex" == "female") #works on transmart-dev. +# createPatientSet("GSE8581", "age" < 65 & "sex" == "female") #works on transmart-dev. +# createPatientSet("GSE8581", "age" < 65 & ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & "Biomarker_Data") +# createPatientSet("GSE8581","sex"== "female" | ("age" < 65 & "Biomarker_Data")) +# createPatientSet("GSE8581","sex"== "female" & "age" < 65 | "Biomarker_Data") +# concepts <- c("age", "sex") +# createPatientSet("GSE8581", concepts[1] < some_value) #works on transmart-dev. +# createPatientSet("GSE8581", substitute(concepts[1] < some_value[1])) #works on transmart-dev. +# constraint <- "concepts[1] < 65" +# createPatientSet("GSE8581",constraint[1]) #works on transmart-dev. +# constraint <- "age" +# createPatientSet("GSE8581",constraint) #works on transmart-dev. +# constraints <- "age" +# createPatientSet("GSE8581",constraints[2]) #shouldn't work +# createPatientSet("GSE8581",constraints[2] < 65) #shouldn't work +# tmp <- c(substitute("age" <65), substitute("sex"== "female")) +# createPatientSet("GSE8581",tmp[1]) #works on transmart-dev +# concepts <- c("age", "sex") +# constraint <- "concepts[1] < 65" +# createPatientSet("gse8581", "concepts[1] < 65") + diff --git a/man/createPatientSet.Rd b/man/createPatientSet.Rd new file mode 100644 index 0000000..d67a7bb --- /dev/null +++ b/man/createPatientSet.Rd @@ -0,0 +1,166 @@ +\name{createPatientSet} +\alias{createPatientSet} + +\title{ +Define a patient set based on a series of constraints and retrieve the patient.set ID +} +\description{ +This function can be used to create a patient.set in tranSMART based on a set of constraints, and it returns the ID of +the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and +\code{\link{getHighDimData}} to retrieve only the data for the patients that belong to that specific patient.set.\cr +The function returns a list with the ID of the newly created patient.set, the size of the patient.set, the original user +input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART, +called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains +the query definition in XML format as it is sent to tranSMART. +} +\usage{ +createPatientSet(study.name, patientset.constraints, returnXMLquery = FALSE) +} + +\arguments{ + \item{study.name}{a character string giving the name of a study} + \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria + (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as + created with function \code{\link{substitute}}, or a string. This expression can contain one or more constraints, + concatenated by the AND (\code{&}) and OR (\code{|}) operators, and it has to meet a strict format. \cr +A single constraint can either be: +\enumerate{ +\item a string with a reference to a concept. Ie. a pattern/regular expression matching a concept name, or a +partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of +the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept +name/path/link in the \code{getConcepts(study.name)} table and to then return the full concept path for the matching +concept. + If only single node is supplied as a constraint, without constraint operator and constraint value, then any study + concept (node) can be used, and even the values of categorical concepts can be used. All patients with a (non-missing) + value for that concept will be selected. If concept paths are used, then strings are taken literally, as in + \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. +\item or a constraint of the format: +\{string with reference to a concept (see point 1)\}\{relational_operator\}\{constraint_value\}, e.g. "age" < 65. All +patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has +to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. + } +References to concepts and constraint_values can also be passed as an object (variable) with one element, being a string +or a string/number respectively. E.g. if \code{concepts = c("age", "sex", "ethnicity")} supplying +\code{concepts[[1]] < 65} will also work. + +Multiple (sub)constraints can be combined into a larger constraint by use of the AND (\code{&}) and OR (\code{|}) +operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single +patient.set constraints definition, the "AND" operator always has to be on the highest(outer) level and the "OR" +operator has to be on the lower(inner) level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and +"OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a +concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. +\code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraints, could for example +be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...}, or \code{c1}, or \code{c1 | c2 | c3}, \code{c1 & c2}. \cr +Example: +"sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") + + See also \code{\link{substitute}} for creating expressions in case you want to store the expressions in a variable + before calling createPatientSet. E.g. \code{createPatientSet("SOME STUDY", "age" < 65)}, will have the same result as: + \code{my_expression <- substitute("age" < 65) ; createPatientSet("SOME STUDY", my_expression)}. Alternatively, the + constraints can also be given as a string (\code{createPatientSet("SOME STUDY", "\"age\" < 65")} or + \code{age_concept <- "age"; createPatientSet("SOME STUDY", "age_concept < 65") }), but this might not be fully + supported. If the constraints are supplied as a single string, things that should be interpreted as text should be + quoted. This applies for example to concept names/paths/links and the categorical values (e.g. "\"sex\" == \"Male\""). + Else this will be interpreted as a variable name and the function will try to find a variable with that name in the + global environment and use the value stored in that variable.} + \item{returnXMLquery}{If TRUE the body of the POST is request is part of the returned list. This contains the query + definition in XML format.} + +} + +\details{ +For a constraint of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following +operations are possible: +\itemize{ +\item For numerical data nodes the relational_operations "<", ">", "<=",">=", "==" and "!=" can be used. +\item For categorical data nodes, only the "==" operation is possible. +} +some examples: \cr +correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } -- the +& operators are on the outer levels, and the expressions with the | operators are on the inner levels and between +brackets \cr +wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } --the & after +'"test2" < 2' is on a lower/more inner level than the | \cr +wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } --the | before the +'"test4" == 4' is on a higher/more outer level than the & +} + +\value{ +A list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the +constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART), and optionally the +body of the POST request. +\item{patientsetID}{a numerical value containing the ID of the newly created patient.set. This patient.set ID can be +used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data +for only the patients that belong to that specific patient.set} +\item{patientsetSize}{a numerical value specifying the number of patients in the created patient.set} +\item{input_patientset.constraints}{a character string containing the input by the user} +\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the +constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were +selected, based on the input. For concepts the full concept path is given and relational operators are represented by +text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \cr Note: the +query constraints will always include a study concept path as well; this is added to ensure that only patients from the +supplied study are selected. \cr Note 2: if a constraint was supplied for a categorical node in the form of +\{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical valu +e is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} +\item{xmlQuery}{a character string containing the body of the POST request that is sent to the tranSMART instance. +This body contains the query definition in XML format as it is sent to tranSMART. This is only returned if +returnXMLquery = T.} +} +\references{ +} +\author{ +Tim Dorscheidt, Jan Kanis, Rianne Jansen. +Contact: development@thehyve.nl} +\note{To be able to access a transmart database, you need to be connected to the server the database is on. If you +haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} + + +\seealso{ \code{\link{getHighdimData}} and \code{\link{getObservations}} } +\examples{ +\dontrun{ + # obtain a list of all studies in the database + studies <- getStudies() + + # the following call will give all concepts for GSE8581 + concepts <- getConcepts("GSE8581") + + ## create patient.set + + #selecting all patients with a value for concept "age" + createPatientSet("GSE8581", "age") + + #selecting all patients with "age" < 65 + createPatientSet("GSE8581", "age" < 65) + + #or: + + my_concepts <- c("Age", "Sex", "Lung Disease") + constraint_value <- 65 + createPatientSet("GSE8581", my_concepts[1] < constraint_value) + + #multiple constraints can be combined: + createPatientSet("GSE8581", "Age" < 65 & "Sex" == "female" & ("Lung Disease" == "chronic obstructive pulmonary disease" | + "Lung Disease" == "control")) + + + # there are multiple ways the patient.set constraints can be supplied. The following will have the same result: + # 1 as expression + createPatientSet("GSE8581", "age" < 65) + + # 2 as an object (variable) containing a single expression + my_expression <- substitute("age"< 65) + createPatientSet("GSE8581", my_expression) + + #3 as string. + # supplying concept name as string: + createPatientSet("GSE8581", "\"age\" < 65") + # or if concept name is stored in an object (variable): + age_concept<- "age" + createPatientSet("GSE8581", "age_concept < 65") + + } + +} + +\keyword{ database } +\keyword{ transmart } diff --git a/man/getHighdimData.Rd b/man/getHighdimData.Rd index 5890516..e7db047 100644 --- a/man/getHighdimData.Rd +++ b/man/getHighdimData.Rd @@ -48,7 +48,8 @@ The remaining parameters are constraints that limit the amount of data that is r Assay constraints: \item{trial.name}{A single character string with the trial name.} - \item{patient.set}{A number indicating the patient set.} + \item{patient.set}{A number indicating the patient set, as created with \code{\link{createPatientSet}}. + It can be used to retrieve only the data for the patients that belong to that specific patient.set} \item{ontology.term}{A single character string containing the concept path.} \item{assay.ids}{A numeric vector containing the id's of the assays you want to retrieve.} \item{patient.ids}{A character vector with the patient ids that you want to retrieve.} @@ -91,7 +92,7 @@ If no projection is specified this function returns a list of the projections av \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}.} +\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}, \link{createPatientSet}.} \examples{ \dontrun{ diff --git a/man/getObservations.Rd b/man/getObservations.Rd index 8deac55..d95264f 100644 --- a/man/getObservations.Rd +++ b/man/getObservations.Rd @@ -8,12 +8,14 @@ study. A subset of observations can be selected by filtering by concept. } \usage{ -getObservations(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE) +getObservations(study.name, concept.match = NULL, concept.links = NULL, patient.set = NULL, as.data.frame = TRUE) } \arguments{ \item{study.name}{a character string giving the name of a study.} \item{concept.match}{a character string or character vector containing the concept name(s) that should be matched. For each vector element, the \code{getObservations} function will search within the requested study for the first concept which name contains the given character string. It uses the name column of the result from \code{\link{getConcepts}} to perform the matching.} \item{concept.links}{ a character string or a character vector containing the link(s) pointing to the locations of the chosen concepts on the server. Candidate values for this argument can be found in the \code{api.link.self.href} column of the \code{\link{getConcepts}} results. It is the most exact way to refer to a concept, and it overwrites the \code{concept.match} argument.} + \item{patient.set}{ A number indicating the patient set, as created with \code{\link{createPatientSet}}. + It can be used to retrieve only the data for the patients that belong to that specific patient.set} \item{as.data.frame}{logical (default setting is TRUE): should the list containing the observation values be converted to a dataframe?} } \details{ @@ -31,7 +33,7 @@ getObservations(study.name, concept.match = NULL, concept.links = NULL, as.data. \references{} \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{getStudies}, \link{getConcepts}}} +\seealso{\code{\link{getStudies}, \link{getConcepts}, \link{createPatientSet}}} \examples{ \dontrun{ # The following will retrieve a list with observations for the study "foo" diff --git a/man/getPatientSet.Rd b/man/getPatientSet.Rd index a7f29f0..27ddeb3 100644 --- a/man/getPatientSet.Rd +++ b/man/getPatientSet.Rd @@ -13,7 +13,7 @@ getPatientSet(id) \arguments{ \item{id}{an integral number, the id of the patient set} \details{ - The function will return a named list with properties of the patient set. Patient sets are created in the Transmart web interface or with the \code{\link{getPatientSetId}} call (still to be implemented). Currently there is no support in the legacy Transmart web interface to view the id of a patient set, but the new web app will support that. + The function will return a named list with properties of the patient set. Patient sets are created in the Transmart web interface or with the \code{\link{createPatientSet}} call (still to be implemented). Currently there is no support in the legacy Transmart web interface to view the id of a patient set, but the new web app will support that. } } \value{ diff --git a/man/transmartRClient-package.Rd b/man/transmartRClient-package.Rd index 97dc078..d05550e 100644 --- a/man/transmartRClient-package.Rd +++ b/man/transmartRClient-package.Rd @@ -23,6 +23,7 @@ The following functions are available in the package: \code{\link{getHighdimData}}\cr \code{\link{highdimInfo}}\cr \code{\link{getPatientSet}}\cr + \code{\link{createPatientSet}}\cr } } diff --git a/tests/run-tests.R b/tests/run-tests.R index eb2e9f9..e532a52 100644 --- a/tests/run-tests.R +++ b/tests/run-tests.R @@ -25,9 +25,12 @@ library('RUnit') require("transmartRClient") +# If you want to run these tests in development on a not installed version of this package, do: +# TRANSMART_RINTERFACE_PKG_ROOT <- "/path/to/transmart/RInterface" + unittestsLocation <- system.file("unittests", package="transmartRClient") -test.suite <- defineTestSuite("highdimTests", +test.suite <- defineTestSuite("RInterface tests", dirs = unittestsLocation, testFileRegexp = "^runit.+\\.[rR]$")