Skip to content

Commit e817a1f

Browse files
committed
Add 'room' column to YMHA output if exists in participants
1 parent 5cfc2f0 commit e817a1f

File tree

3 files changed

+54
-44
lines changed

3 files changed

+54
-44
lines changed

src/mindlogger_data_export/outputs.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def _participants(self) -> pl.DataFrame:
377377
pl.col("firstName").alias("first_name"),
378378
pl.col("lastName").alias("last_name"),
379379
"site",
380+
cs.matches("^room$"),
380381
)
381382

382383
def _attendance(
@@ -427,8 +428,16 @@ def _completion(
427428
"first_name",
428429
"last_name",
429430
"site",
431+
cs.matches(r"^room$"),
430432
cs.exclude(
431-
["secret_id", "nickname", "first_name", "last_name", "site"]
433+
[
434+
"secret_id",
435+
"nickname",
436+
"first_name",
437+
"last_name",
438+
"site",
439+
cs.matches("^room$"),
440+
]
432441
).fill_null(False), # noqa: FBT003
433442
)
434443
site_completion = all_completion.partition_by("site", as_dict=True)

src/mindlogger_data_export/parsers.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -205,21 +205,21 @@ def datatype(self):
205205
"""Return Polars schema for response types."""
206206
return pl.Struct(
207207
[
208-
pl.Field("type", pl.String),
209-
pl.Field("raw_value", pl.String),
210-
pl.Field("null_value", pl.Boolean),
211-
pl.Field("value", pl.List(pl.String)),
212-
pl.Field("text", pl.String),
213-
pl.Field("file", pl.String),
214-
pl.Field("date", pl.Date),
215-
pl.Field("time", pl.Time),
216-
pl.Field("time_range", pl.Duration),
208+
pl.Field("type", pl.String()),
209+
pl.Field("raw_value", pl.String()),
210+
pl.Field("null_value", pl.Boolean()),
211+
pl.Field("value", pl.List(pl.String())),
212+
pl.Field("text", pl.String()),
213+
pl.Field("file", pl.String()),
214+
pl.Field("date", pl.Date()),
215+
pl.Field("time", pl.Time()),
216+
pl.Field("time_range", pl.Duration()),
217217
pl.Field(
218218
"geo",
219219
pl.Struct(
220220
[
221-
pl.Field("latitude", pl.Float64),
222-
pl.Field("longitude", pl.Float64),
221+
pl.Field("latitude", pl.Float64()),
222+
pl.Field("longitude", pl.Float64()),
223223
]
224224
),
225225
),
@@ -228,13 +228,13 @@ def datatype(self):
228228
pl.List(
229229
pl.Struct(
230230
[
231-
pl.Field("row", pl.String),
232-
pl.Field("value", pl.List(pl.String)),
231+
pl.Field("row", pl.String()),
232+
pl.Field("value", pl.List(pl.String())),
233233
]
234234
)
235235
),
236236
),
237-
pl.Field("optional_text", pl.String),
237+
pl.Field("optional_text", pl.String()),
238238
]
239239
)
240240

src/mindlogger_data_export/processors.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ class ResponseStructProcessor(ReportProcessor):
104104
NAME = "ResponseStruct"
105105
PARSER = ResponseParser()
106106
RESPONSE_SCHEMA = {
107-
"status": pl.String,
108-
"value": PARSER.datatype,
109-
"raw_score": pl.String,
107+
"status": pl.String(),
108+
"response": PARSER.datatype,
109+
"raw_score": pl.String(),
110110
}
111111

112112
def _run(self, report: pl.DataFrame) -> pl.DataFrame:
@@ -116,7 +116,8 @@ def _run(self, report: pl.DataFrame) -> pl.DataFrame:
116116
pl.col("rawScore").alias("raw_score"),
117117
pl.col("item_response")
118118
.str.strip_chars()
119-
.map_elements(self.PARSER.parse, self.PARSER.datatype),
119+
.map_elements(self.PARSER.parse, self.PARSER.datatype)
120+
.alias("response"),
120121
)
121122
).drop(
122123
"item_response_status",
@@ -134,12 +135,12 @@ class UserStructProcessor(ReportProcessor):
134135

135136
NAME = "UserStruct"
136137

137-
USER_SCHEMA = {
138-
"id": pl.String,
139-
"secret_id": pl.String,
140-
"nickname": pl.String,
141-
"tag": pl.String,
142-
"relation": pl.String,
138+
USER_SCHEMA: dict[str, pl.DataType] = {
139+
"id": pl.String(),
140+
"secret_id": pl.String(),
141+
"nickname": pl.String(),
142+
"tag": pl.String(),
143+
"relation": pl.String(),
143144
}
144145

145146
def _run(self, report: pl.DataFrame) -> pl.DataFrame:
@@ -180,14 +181,14 @@ class ItemStructProcessor(ReportProcessor):
180181

181182
NAME = "ItemStruct"
182183

183-
ITEM_SCHEMA = {
184-
"id": pl.String,
185-
"name": pl.String,
186-
"prompt": pl.String,
187-
"type": pl.String,
188-
"raw_options": pl.String,
184+
ITEM_SCHEMA: dict[str, pl.DataType] = {
185+
"id": pl.String(),
186+
"name": pl.String(),
187+
"prompt": pl.String(),
188+
"type": pl.String(),
189+
"raw_options": pl.String(),
189190
"response_options": pl.List(
190-
pl.Struct({"name": pl.String, "value": pl.Int64, "score": pl.Int64})
191+
pl.Struct({"name": pl.String(), "value": pl.Int64(), "score": pl.Int64()})
191192
),
192193
}
193194
PARSER = OptionsParser()
@@ -228,10 +229,10 @@ class ActivityFlowStructProcessor(ReportProcessor):
228229

229230
NAME = "ActivityFlowStruct"
230231

231-
ACTIVITY_FLOW_SCHEMA = {
232-
"id": pl.String,
233-
"name": pl.String,
234-
"submission_id": pl.String,
232+
ACTIVITY_FLOW_SCHEMA: dict[str, pl.DataType] = {
233+
"id": pl.String(),
234+
"name": pl.String(),
235+
"submission_id": pl.String(),
235236
}
236237

237238
def _run(self, report: pl.DataFrame) -> pl.DataFrame:
@@ -255,11 +256,11 @@ class ActivityStructProcessor(ReportProcessor):
255256

256257
NAME = "ActivityStruct"
257258

258-
ACTIVITY_SCHEMA = {
259-
"id": pl.String,
260-
"name": pl.String,
261-
"submission_id": pl.String,
262-
"submission_review_id": pl.String,
259+
ACTIVITY_SCHEMA: dict[str, pl.DataType] = {
260+
"id": pl.String(),
261+
"name": pl.String(),
262+
"submission_id": pl.String(),
263+
"submission_review_id": pl.String(),
263264
"start_time": pl.Datetime(time_zone="UTC"),
264265
"end_time": pl.Datetime(time_zone="UTC"),
265266
}
@@ -295,9 +296,9 @@ class ActivityScheduleStructProcessor(ReportProcessor):
295296

296297
NAME = "ActivityScheduleStruct"
297298

298-
ACTIVITY_SCHEDULE_SCHEMA = {
299-
"id": pl.String,
300-
"history_id": pl.String,
299+
ACTIVITY_SCHEDULE_SCHEMA: dict[str, pl.DataType] = {
300+
"id": pl.String(),
301+
"history_id": pl.String(),
301302
"start_time": pl.Datetime(time_zone="UTC"),
302303
}
303304

0 commit comments

Comments
 (0)