Skip to content

Commit 1f91d24

Browse files
authored
Merge pull request #16 from databrickslabs/feature-repeating-fields
Feature repeating fields
2 parents db2fcce + dbd45b7 commit 1f91d24

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

src/main/scala/com/databricks/labs/smolder/functions.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ import org.apache.spark.sql.functions._
2121

2222
object functions {
2323

24+
/**
25+
* Extracts the value at a specific index in a repeating field
26+
*
27+
* @param col A column containing the repeated field from a message segment
28+
* @param repIndex The index of repeated field value that must be extracted
29+
* @return Yields a new column containing the field of a message segment.
30+
*/
31+
def repeating_field(col: Column, repIndex: Int, delim: String="~"): Column = {
32+
split(col, delim).getItem(repIndex)
33+
}
34+
2435
/**
2536
* Parses a textual, pipe-delimited HL7v2 message.
2637
*

src/test/scala/com/databricks/labs/smolder/functionsSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,19 @@ class functionsSuite extends SmolderBaseTest {
7070
assert(evnType.first().getString(0) === "A03")
7171
}
7272

73+
test("Test repeating segment function"){
74+
//get a "stable identifier" i.e. val for import
75+
val spark2 = spark
76+
import spark2.implicits._
77+
78+
val df = Seq("MSH\rTST|1234567890^^^HOSPITALONE^MRN~4646464646^^^HOSPITALTWO^MRN~9431675613^^^HOSPITALTHRE^MRN").toDF("text")
79+
val hl7Df = df.select(parse_hl7_message(df("text")).alias("hl7"))
80+
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 0, "~")).first().getString(0) === "1234567890^^^HOSPITALONE^MRN" )
81+
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 1, "~")).first().getString(0) === "4646464646^^^HOSPITALTWO^MRN" )
82+
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 2, "~")).first().getString(0) === "9431675613^^^HOSPITALTHRE^MRN" )
83+
84+
}
85+
7386
test("use the segment field and subfield functions to extract the patient's first name") {
7487

7588
val file = testFile("single_record.hl7")

version.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version in ThisBuild := "0.0.1-SNAPSHOT"
1+
version in ThisBuild := "0.0.3-SNAPSHOT"

0 commit comments

Comments
 (0)