Skip to content

Commit beecdd5

Browse files
committed
Add data structure for grouped measurements, pilot with temperature and blood pressure
1 parent 9d684be commit beecdd5

File tree

1 file changed

+79
-39
lines changed

1 file changed

+79
-39
lines changed

person_story.py

Lines changed: 79 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,18 @@
1212
SrcStats = dict[str, SrcStatsResult]
1313
from typing import TypedDict, Callable, List, Optional, Dict, Union
1414

15-
class SingularMeasurement(TypedDict):
16-
values: list[float] | list[int]
15+
class MeasurementItem(TypedDict):
16+
unit_concept_id: int
17+
measurement_type_concept_id: int
18+
generator: Callable[[int|float], int|float]
19+
event_datetime: dt.datetime
20+
value: Union[float, int, None]
1721

1822
class GroupedMeasurements(TypedDict):
19-
datetime: dt.datetime
2023
person_id: int
2124
visit_occurrence_id: int
22-
concepts: Dict[int, tuple[int, int]]
23-
values: Dict[int, List[SingularMeasurement]]
24-
generators: Dict[int, Callable[[int|float], int|float]]
25-
25+
measurements: Dict[int, MeasurementItem]
26+
2627

2728
def random_normal(mean: float, std_dev: Optional[float] = None) -> float:
2829
"""Return a normal distributed value with the given mean and standard deviation.
@@ -241,15 +242,19 @@ def populate_group_measurement(
241242

242243
Systolic_blood_pressure_by_Noninvasive = 21492239
243244
Diastolic_blood_pressure_by_Noninvasive = 21492240
244-
measurement_type_concept_id = 32817 # EHR measurement
245+
measurement_type_concept_id_bp = 32817 # EHR measurement
245246
avg_systolic = 114.236842
246247
avg_diastolic = 74.447368
247248
avg_difference = avg_systolic - avg_diastolic
248-
unit_concept_id = 8876 # mmHg
249+
unit_concept_id_bp = 8876 # mmHg
250+
251+
Body_temperature = 3025315
252+
measurement_type_concept_id_temp = 32817 # EHR measurement
253+
unit_concept_id_temp = 9289 # degree Celsius
249254

250-
def get_diastolic_from_systolic(systolic: List[float]) -> float:
255+
def get_diastolic_from_systolic(systolic:float) -> float:
251256
"""Estimate diastolic value from systolic value."""
252-
return [s - avg_difference for s in systolic]
257+
return systolic - avg_difference
253258

254259
def timeseries(length: int) -> float:
255260
"""Estimate diastolic value from systolic value."""
@@ -260,43 +265,78 @@ def timeseries(length: int) -> float:
260265
"diastolic": get_diastolic_from_systolic
261266
}
262267

263-
m: GroupedMeasurements = {
264-
"concepts": {Systolic_blood_pressure_by_Noninvasive: (measurement_type_concept_id, unit_concept_id),
265-
Diastolic_blood_pressure_by_Noninvasive: (measurement_type_concept_id, unit_concept_id)},
266-
"values": {Systolic_blood_pressure_by_Noninvasive: [],
267-
Diastolic_blood_pressure_by_Noninvasive: []},
268-
"generators": {Systolic_blood_pressure_by_Noninvasive: generators["timeseries"],
269-
Diastolic_blood_pressure_by_Noninvasive: generators["diastolic"]},
270-
"datetime": dt.datetime.now(),
271-
"person_id": cast(int, person["person_id"]),
272-
"visit_occurrence_id": cast(int, visit_occurrence["visit_occurrence_id"]),
273-
}
274-
275-
m["values"][Systolic_blood_pressure_by_Noninvasive] = generators["timeseries"](10)
276-
m["values"][Diastolic_blood_pressure_by_Noninvasive] = generators["diastolic"](m["values"][Systolic_blood_pressure_by_Noninvasive])
277-
278-
def populate_values(
279-
event_datetime: dt.datetime,
280-
) -> dict[int, SqlRow]:
268+
def generate_group_values(
269+
m: GroupedMeasurements,
270+
) -> GroupedMeasurements:
271+
for measurement_id in m["measurements"].keys():
272+
if m["measurements"][measurement_id]["generator"] == generators["timeseries"]:
273+
m["values"][measurement_id] = m["measurements"][measurement_id]["generator"](1)
274+
if m["measurements"][measurement_id]["generator"] == generators["diastolic"]:
275+
m["values"][measurement_id] = m["measurements"][measurement_id]["generator"](m["values"][Systolic_blood_pressure_by_Noninvasive])
276+
return m
277+
278+
def toSqlRows(
279+
group: GroupedMeasurements,
280+
) -> List[SqlRow]:
281281

282282
"""Generate two rows for the measurement table."""
283-
r: SqlRow = {
284-
"measurement_concept_id": m.concept_id,
285-
"person_id": m.person_id,
283+
rows:List[SqlRow] = []
284+
visit_occurrence_id = group["visit_occurrence_id"]
285+
person_id = group["person_id"]
286+
event_datetime = group["event_datetime"]
287+
for (concept_id, value) in zip(group["concepts"], group["values"]):
288+
r: SqlRow = {
289+
"measurement_concept_id": concept_id,
290+
"person_id": person_id,
291+
"visit_occurrence_id": visit_occurrence_id,
292+
"measurement_datetime": event_datetime,
293+
"measurement_date": event_datetime.date(),
294+
"measurement_type_concept_id": group["concepts"][concept_id][0],
295+
"unit_concept_id": group["concepts"][concept_id][1],
296+
"value_as_number": value,
297+
}
298+
rows.append(r)
299+
return rows
300+
301+
def populate_group_values_on_date(
302+
person_id: int,
303+
visit_occurrence_id: int,
304+
event_datetime: dt.datetime,
305+
) -> List[SqlRow]:
306+
307+
rows: List[SqlRow] = []
308+
blood_pressure: GroupedMeasurements = {
309+
"person_id": person_id,
286310
"visit_occurrence_id": visit_occurrence_id,
287-
"measurement_datetime": event_datetime,
288-
"measurement_date": event_datetime.date(),
289-
"measurement_type_concept_id": m.type_concept_id,
290-
"unit_concept_id": m.unit_concept_id,
291-
"value_as_number": abs(random_normal(m.properties["average_value"], m.properties["stddev_value"])),
311+
"measurements": {Systolic_blood_pressure_by_Noninvasive: {"measurement_type_concept_id": measurement_type_concept_id_bp,
312+
"unit_concept_id": unit_concept_id_bp,
313+
"event_datetime": event_datetime,
314+
"value": None,
315+
"generator": generators["timeseries"]},
316+
Diastolic_blood_pressure_by_Noninvasive: {"measurement_type_concept_id": measurement_type_concept_id_bp,
317+
"unit_concept_id": unit_concept_id_bp,
318+
"event_datetime": event_datetime,
319+
"value": None,
320+
"generator": generators["diastolic"]}}
321+
}
322+
323+
body_temperature: GroupedMeasurements = {
324+
"person_id": person_id,
325+
"visit_occurrence_id": visit_occurrence_id,
326+
"measurements": {Body_temperature: {"measurement_type_concept_id": measurement_type_concept_id_temp,
327+
"unit_concept_id": unit_concept_id_temp,
328+
"event_datetime": event_datetime,
329+
"value": None,
330+
"generator": generators["timeseries"]}}
292331
}
293332

294-
return r
333+
rows: List[SqlRow] = toSqlRows(generate_group_values(blood_pressure)) + toSqlRows(generate_group_values(body_temperature))
334+
return rows
295335

296336
event_datetimes = random_event_times(10.0, visit_occurrence)
297337
events: list[tuple[str, SqlRow]] = []
298338
for event_datetime in sorted(event_datetimes):
299-
systolic, diastolic = populate_values(cast(int, person["person_id"]),
339+
systolic, diastolic = populate_group_values_on_date(cast(int, person["person_id"]),
300340
cast(int, visit_occurrence["visit_occurrence_id"]),
301341
event_datetime)
302342
events.append(("measurement", systolic))

0 commit comments

Comments
 (0)