From f1f617fc436e0cd92c8d12b72286395f190c18e8 Mon Sep 17 00:00:00 2001 From: Vishal Vyas Date: Sat, 18 Apr 2026 19:05:17 -0400 Subject: [PATCH 1/3] adding changes for the project --- docs/api/tasks.rst | 1 + ...ealth.tasks.DischargeNoteSummarization.rst | 7 + examples/discharge__summary_samples.ipynb | 3152 +++++++++++++++++ pyhealth/tasks/__init__.py | 1 + .../tasks/discharge_note_summarization.py | 147 + .../discharge/note/discharge.csv.gz | Bin 0 -> 10578 bytes .../core/test_discharge_note_summarization.py | 162 + 7 files changed, 3470 insertions(+) create mode 100644 docs/api/tasks/pyhealth.tasks.DischargeNoteSummarization.rst create mode 100644 examples/discharge__summary_samples.ipynb create mode 100644 pyhealth/tasks/discharge_note_summarization.py create mode 100644 test-resources/discharge/note/discharge.csv.gz create mode 100644 tests/core/test_discharge_note_summarization.py diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst index 399b8f1aa..4a3de79f2 100644 --- a/docs/api/tasks.rst +++ b/docs/api/tasks.rst @@ -229,3 +229,4 @@ Available Tasks Mutation Pathogenicity (COSMIC) Cancer Survival Prediction (TCGA) Cancer Mutation Burden (TCGA) + Discharge Note Summarization (MIMIC-IV) diff --git a/docs/api/tasks/pyhealth.tasks.DischargeNoteSummarization.rst b/docs/api/tasks/pyhealth.tasks.DischargeNoteSummarization.rst new file mode 100644 index 000000000..916a12c9f --- /dev/null +++ b/docs/api/tasks/pyhealth.tasks.DischargeNoteSummarization.rst @@ -0,0 +1,7 @@ +pyhealth.tasks.DischargeNoteSummarization +======================================= + +.. autoclass:: pyhealth.tasks.discharge_note_summarization.DischargeNoteSummarization + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/examples/discharge__summary_samples.ipynb b/examples/discharge__summary_samples.ipynb new file mode 100644 index 000000000..c11f1bc53 --- /dev/null +++ b/examples/discharge__summary_samples.ipynb @@ -0,0 +1,3152 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9QuP-XLwAF3w" + }, + "source": [ + "# Generate cleaned Discharge Summary Samples using DischargeNoteSummarization Task\n", + "\n", + "This notebook demonstrates the usage of MIMIC-IV Note dataset and DischargeNoteSummarizationTask to generate discharge summary samples for LLM training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4Hj9Zi4v2Nis" + }, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eWj28Ms7AEO9" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MUAyQZQnFbbv" + }, + "outputs": [], + "source": [ + "import os\n", + "from pyhealth.datasets import MIMIC4Dataset\n", + "from pyhealth.tasks import BaseTask\n", + "from pyhealth.data import Patient\n", + "from typing import List, Dict, Any\n", + "from pyhealth.processors import TextProcessor\n", + "import argparse\n", + "import random\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "pd.options.mode.chained_assignment = None\n", + "import re\n", + "import pickle\n", + "import nltk\n", + "from collections import Counter\n", + "from tqdm import tqdm\n", + "import string" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "738h8qAMA5Fs" + }, + "source": [ + "# Initialize the MIMI4Dataset using the note data downloaded from Physionet website.\n", + "\n", + "Name of dataset used is discharge.csv.gz from Physionet : https://physionet.org/content/ann-pt-summ/1.0.1/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lRIRtrhQNKS2", + "outputId": "886f4286-e412-4e3a-9f91-bd24c329d397" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Memory usage Starting MIMIC4Dataset init: 882.8 MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Memory usage Starting MIMIC4Dataset init: 882.8 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initializing mimic4 dataset from None|/content/drive/MyDrive/llm_data/|None (dev mode: False)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Initializing mimic4 dataset from None|/content/drive/MyDrive/llm_data/|None (dev mode: False)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No cache_dir provided. Using default cache dir: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:No cache_dir provided. Using default cache dir: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initializing MIMIC4NoteDataset with tables: ['discharge'] (dev mode: False)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Initializing MIMIC4NoteDataset with tables: ['discharge'] (dev mode: False)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using default note config: /usr/local/lib/python3.12/dist-packages/pyhealth/datasets/configs/mimic4_note.yaml\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Using default note config: /usr/local/lib/python3.12/dist-packages/pyhealth/datasets/configs/mimic4_note.yaml\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Memory usage Before initializing mimic4_note: 882.9 MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/pyhealth/datasets/mimic4.py:121: UserWarning: Events from discharge table only have date timestamp (no specific time). This may affect temporal ordering of events.\n", + " warnings.warn(\n", + "INFO:pyhealth.datasets.mimic4:Memory usage Before initializing mimic4_note: 882.9 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initializing mimic4_note dataset from /content/drive/MyDrive/llm_data/ (dev mode: False)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Initializing mimic4_note dataset from /content/drive/MyDrive/llm_data/ (dev mode: False)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using provided cache_dir: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/cf4117bc-6d03-5673-a78c-162795de42ea\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Using provided cache_dir: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/cf4117bc-6d03-5673-a78c-162795de42ea\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Memory usage After initializing mimic4_note: 883.0 MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Memory usage After initializing mimic4_note: 883.0 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Memory usage After Note dataset initialization: 883.0 MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Memory usage After Note dataset initialization: 883.0 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Memory usage Completed MIMIC4Dataset init: 883.0 MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Memory usage Completed MIMIC4Dataset init: 883.0 MB\n" + ] + } + ], + "source": [ + "full_note_dataset = MIMIC4Dataset(\n", + " note_root='/content/drive/llm_data/',\n", + " note_tables=[\"discharge\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Usgcv8CU4cj2" + }, + "outputs": [], + "source": [ + "full_note_dataset.stats()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hhcWPLJmBgOQ" + }, + "source": [ + "# Print an event using a patient id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aacWVtZdnsoA" + }, + "outputs": [], + "source": [ + "print(full_note_dataset.get_patient('10000032').get_events())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xyqAklNtB_t0" + }, + "source": [ + "# Define the DischargeNoteSummarization Task\n", + "\n", + "Create DischargeNoteSummarization class , initialize the input and output schema.\n", + "Extract specific sections \"Brief Hospital Course\" and \"Discharge Instructions\". Clean the samples to remove extra spaces and new lines to create a paragraph for each sample text.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "31loQfr6nSRf" + }, + "outputs": [], + "source": [ + "\n", + "from typing import Dict, List, Any, Tuple, Union\n", + "\n", + "class DischargeNoteSummarization(BaseTask):\n", + " task_name: str = \"DischargeNoteSummarization\"\n", + "\n", + " input_schema: Dict[str , str] = {\n", + " \"subject_id\" : \"text\",\n", + " \"hadm_id\": \"text\",\n", + " \"text\": \"text\"\n", + " }\n", + "\n", + " output_schema: Dict[str, str] = {\n", + " \"brief_hospital_course\": \"text\",\n", + " \"summary\": \"text\"\n", + " }\n", + "\n", + "\n", + " def __call__(self, patient: Patient) -> List[Dict[str, Any]]:\n", + " samples = []\n", + " subject_id = patient.patient_id\n", + " for dis in patient.get_events(\"discharge\"):\n", + "\n", + " textNote = dis.attr_dict['text']\n", + " hadm_id = dis.attr_dict['hadm_id']\n", + "\n", + " ## Extract the brief_hospital_course\n", + "\n", + " start = textNote.find(\"Brief Hospital Course:\")\n", + " if start < 0:\n", + " #brief_hospital_course = None\n", + " continue\n", + " end = textNote.find(\"Medications on Admission:\")\n", + " if end == -1:\n", + " end = textNote.find(\"Discharge Medications:\")\n", + " if end == -1:\n", + " end = textNote.find(\"Discharge Disposition:\")\n", + " if end == 0 or start >= end:\n", + " continue\n", + " brief_hospital_course = textNote[start: end].replace('\\n', ' ')\n", + " brief_hospital_course = ' '.join(brief_hospital_course.split())\n", + " # Quality check\n", + " num_words = len(textNote.split(' '))\n", + " \n", + " #extract the summary\n", + " start = textNote.find(\"Discharge Instructions:\")\n", + " end = textNote.find(\"Followup Instructions:\")\n", + " if start < 0 or end < 0:\n", + " continue\n", + " summary = textNote[start: end].replace('\\n', ' ')\n", + " summary = ' '.join(summary.split())\n", + " if len(summary) == 0 or len(summary) < 350:\n", + " continue\n", + " summary = summary.strip()\n", + "\n", + "\n", + "\n", + " samples.append({\n", + " \"text\":textNote,\n", + " \"brief_hospital_course\": brief_hospital_course,\n", + " \"summary\" : summary,\n", + " \"subject_id\" : subject_id,\n", + " \"hadm_id\": hadm_id\n", + " })\n", + "\n", + " return samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8DqgPHBSrXdm" + }, + "outputs": [], + "source": [ + "! rm -r /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/tasks/PatientNoteProcessingTask_46bb372d-34eb-5a38-bd99-ca6f30f0f026/samples_cdbbc602-34e2-5a41-8643-4c76b08829f6.ld" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7iZFJlJKDEyW" + }, + "source": [ + "# Run the Discharge Note Summarization Task\n", + "\n", + "Run the DischargeNoteSummarization Task with 4 workers and note dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 905 + }, + "id": "5Lh3QhOUqCWe", + "outputId": "5ddfc211-28af-4c22-dbda-e04f368d7b2e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setting task PatientNoteProcessingTask for mimic4 base dataset...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Setting task PatientNoteProcessingTask for mimic4 base dataset...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Task cache paths: task_df=/root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/tasks/PatientNoteProcessingTask_46bb372d-34eb-5a38-bd99-ca6f30f0f026/task_df.ld, samples=/root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/tasks/PatientNoteProcessingTask_46bb372d-34eb-5a38-bd99-ca6f30f0f026/samples_cdbbc602-34e2-5a41-8643-4c76b08829f6.ld\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Task cache paths: task_df=/root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/tasks/PatientNoteProcessingTask_46bb372d-34eb-5a38-bd99-ca6f30f0f026/task_df.ld, samples=/root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/tasks/PatientNoteProcessingTask_46bb372d-34eb-5a38-bd99-ca6f30f0f026/samples_cdbbc602-34e2-5a41-8643-4c76b08829f6.ld\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Applying task transformations on data with 4 workers...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Applying task transformations on data with 4 workers...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Incomplete parquet cache at /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet (directory exists but contains no parquet files). Removing and rebuilding.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:pyhealth.datasets.base_dataset:Incomplete parquet cache at /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet (directory exists but contains no parquet files). Removing and rebuilding.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No cached event dataframe found. Creating: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:No cached event dataframe found. Creating: /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Combining data from note dataset\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Combining data from note dataset\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Scanning table: discharge from /content/drive/MyDrive/llm_data/note/discharge.csv.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Scanning table: discharge from /content/drive/MyDrive/llm_data/note/discharge.csv.gz\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating combined dataframe\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.mimic4:Creating combined dataframe\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Caching event dataframe to /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Caching event dataframe to /root/.cache/pyhealth/98de9a11-0af5-5cd9-81f2-2da31802c232/global_event_df.parquet...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected Jupyter notebook environment, setting num_workers to 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Detected Jupyter notebook environment, setting num_workers to 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single worker mode, processing sequentially\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Single worker mode, processing sequentially\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Worker 0 started processing 145914 patients. (Polars threads: 2)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyhealth.datasets.base_dataset:Worker 0 started processing 145914 patients. (Polars threads: 2)\n", + " 0%| | 0/145914 [00:00\n" + ] + } + ], + "source": [ + "\n", + "mimic_df = pd.DataFrame(processed_dataset)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iVdEXb_gDdGw" + }, + "source": [ + "# Print the dataframe head" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cu-s0F4rkRHm", + "outputId": "5f78d170-f7f5-4136-b1bd-9b10190332e5" + }, + "outputs": [], + "source": [ + "print(mimic_df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HLrS_fpLkwz8", + "outputId": "791bc340-3f3c-4bef-d733-f499627e8f38" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "740" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(mimic_df.iloc[1]['summary'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IUA4wtrjDtMJ" + }, + "source": [ + "# Perform further processing on the dataframe\n", + "\n", + "Run more data cleaning tasks on the mimic_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "4002f3c605c648409776ec137ccff2bf", + "008bd292f7a9429eb3c7570fbe9fd093", + "46d831dc293444229a86184c910243a0", + "68ae05d25d064e0b8bd39fadb955311e", + "cdd7dd43d43d4bed8d5b401b5e242696", + "20fae152868747c2a175d48936cde9ad", + "a31b0d30e44d469c828cb18393803334", + "66a3ab0b3a484264af941300ac647d6f", + "b9632797c62b454fb347a8fecbb226f7", + "e13239bfb1214cf398f9d1d2b303f105", + "9da0e08960a2462cb9f1ba55656b8116" + ] + }, + "id": "qK-wvZ4D2VLX", + "outputId": "c51d6460-b4d8-4ef9-ab6c-21f828f78b48" + }, + "outputs": [], + "source": [ + "import swifter\n", + "re_service = re.compile(r'^Service: (.*)$', re.IGNORECASE|re.MULTILINE) # Either after Serive:\n", + "re_service_extra = re.compile(r'^Date of Birth:.*Sex:\\s{0,10}\\w\\s{0,10}___: (.*)$', re.IGNORECASE|re.MULTILINE) # Fallback if deidentified\n", + "\n", + "mimic_df['service'] = mimic_df['text'].swifter.apply(lambda s: re_service.search(s).group(1) if re_service.search(s) is not None else None)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "81495e8a1be5469a87d0bb300e82688d", + "db208ab02eb6476aa0d1d3bb4349bd54", + "27653f3de94446a69e8d55de32bb8e6f", + "ee6409daec7b495dae8e08b2f89ffcea", + "280591f53ff74b4cb18851435e5916e2", + "483af853bd7047f4b51ea31f9267276d", + "199dcfce4cf24a8b9153fe2c3c5b4914", + "5dcf36ab5c474cc792718ed10f3d5571", + "52c4ae1316c24e9599e86af27d14b394", + "4e91f191ce6242c9a2fe3f1b1b7aec24", + "3d4731460a9549258137e20e10e2ed75" + ] + }, + "id": "ktnXJRz0a5B4", + "outputId": "16bf4c3b-4aa5-4c89-b837-f7f35dbcffb1" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "81495e8a1be5469a87d0bb300e82688d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Pandas Apply: 0%| | 0/183 [00:00= 3]\n", + "print(f\" Removed {old_len - len(mimic_df)} summaries with less than 3 sentences.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "5f77f53997744569aa6d09f758026afd", + "ad18bfabf116422a878bb74608daeb2d", + "e27e54ac631644678726ee2edb2b6236", + "43f577596fdf47bb9eb46a372e2bfc84", + "e2071364bdeb49dabbac2da5d7beb505", + "d045bdf233574808a63194e900bc1551", + "b6dcb898478c4920bf0b8215dc15f679", + "243d576b442d47d8ba4e2d1b9f140d7f", + "ce70cdf766a14c6cbe982314afebff47", + "47dd58da2e29401db04ad7f5e7dc80af", + "84c8d429e6fc405fb73aae1b342a116c" + ] + }, + "id": "VaFfqcruyBAv", + "outputId": "967b99b6-f2ac-43d5-dcad-cbc0d0b2e191" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Combine all sentences with single whitespaces.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f77f53997744569aa6d09f758026afd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Pandas Apply: 0%| | 0/251248 [00:00>> from pyhealth.datasets import MIMIC4Dataset + >>> from pyhealth.tasks import MIMIC4Dataset + >>> dataset = MIMIC4Dataset(note_root=NOTE_ROOT,note_tables=["discharge"]) + >>> task = DataforLlmSummaries() + >>> samples = dataset.set_task(task) + """ + + task_name: str = "DischargeNoteSummarization" + input_schema: Dict[str, str] = { + "subject_id": "text", + "hadm_id": "text", + "text": "text" + } + + output_schema: Dict[str, str] = { + "brief_hospital_course": "text", + "summary": "text" + } + + def __call__(self, patient: Patient) -> List[Dict[str, Any]]: + """ + Generates patient brief_hospital_course and summary samples for a single patient. + + Args: + patient (Patient): A patient object containing at least one 'discharge' event. + + Returns: + List[Dict]: A list containing a dictionary for each patient visit with: + - "text": patient clinical notes text, + - "brief_hospital_course": patient brief hospital course, + - "summary": patient discharge summary text, + - "subject_id": patient identifier, + - "hadm_id": Hospital Admission Identifier, + + """ + samples = [] + subject_id = patient.patient_id + + for dis in patient.get_events("discharge"): + textNote = dis.attr_dict["text"] + hadm_id = dis.attr_dict["hadm_id"] + + # Extract Brief Hospital Course , remove new lines and remove whitespaces to create single paragraph + start = textNote.find("Brief Hospital Course:") + if start < 0: + continue + end = textNote.find("Medications on Admission:") + if end == -1: + end = textNote.find("Discharge Medications:") + if end == -1: + end = textNote.find("Discharge Disposition:") + if end == 0 or start >= end: + continue + brief_hospital_course = textNote[start:end].replace("\n", " ") + brief_hospital_course = " ".join(brief_hospital_course.split()) + + # Extract Discharge Instructions (summary) and filter out samples less than MIN_SUMMARY_LENGTH + start = textNote.find("Discharge Instructions:") + end = textNote.find("Followup Instructions:") + if start >= 0 and end >= 0: + summary = textNote[start:end].replace("\n", " ") + summary = " ".join(summary.split()) + + summary = summary.strip() + #Only add to samples if length of summary greater than specified MIN_SUMMARY_LENGTH + if len(summary) >= MIN_SUMMARY_LENGTH: + samples.append({ + "text": textNote, + "brief_hospital_course": brief_hospital_course, + "summary": summary, + "subject_id": subject_id, + "hadm_id": hadm_id, + }) + + return samples \ No newline at end of file diff --git a/test-resources/discharge/note/discharge.csv.gz b/test-resources/discharge/note/discharge.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a24289a56a137991f3b34d703a6f98c622fb88c4 GIT binary patch literal 10578 zcmV-YDXrEYiwFqge&J~X|72-%V`yP=XJsy9b9MmjeOq%P$+hl#{fhKBCal&#iK~#< zdpiOFEwLK`2GleCFRINP3Ys+S?}m?K#!8cUaC*A{CmaSI#H5%J6Va7Fp~T!k*|qWuhBov zMs={LHtH4gwbE#`uiIDcW@WIb;P*!Ts#dMFtF?}()w=w@{I_dSZ+9D)^xF@@nm9Yz z72>ad{i`_j&qLrPViMvFdc_h}Z@u&4*b4(OaFUZP2VP9`f0X78-IBhEa3gNKDA}H9 zxR9^(@<+V7kkPZ}qV4@~FzSycLmJrkW%TIDxQj`L;!_a51S0!UT!`NKSw=BNjnYRk zmyVlYmNB(V5*e($;IXS`(%*V=Bl_WP?>kD=-mv&+KbVE*XRUmvBx_C zt?r$_g^^gK7|(TlfzPAh*@?YpDP~dV%5@sat~GNa$CW`MV=uPQ>dqq;9%$jSNMe*k zjC?AuRu_U`SYDin7b%}?A*@*< zBDoKvBo?vsH&wjshC$*5DYf^p+E=DYy!WCY^g-h=kjzQyAr_YN7*@nex=spC#9Jvg zQu?v*y(cMt_#xTKgII^bpA!5V9UYP_`u*Vt?lajsiLj{64?nC!8Pi)Y4(OGHVxJ~r zhn*nYt=tj$V*#dYWn`oA0!^ra!2S`|CiMOAMNFJ3NRmXJOGH_FGKp<445P?=$rYX= zZM#?vPhYLA6JrSy{X5UJ>@oq5q=PB6+kj*?PsCc|Rdh!*k!!hgDkLe3=?(wIYuj)i zNJqV_-`!le7s17IMXWDeuy%cj)5z1WKud^C7(xs~FAmi#cooLj=4peeB3E0ei;5E<$Q~yvAFI&&ug4=QEKg%`KC5A4*@Xnz$yAY2eyhu_l zJSDrdme_Nj1W5;^AXY6DC9RB<871++b7_)~=}xSCDc4Nct%n(}qBJHkAa;sGN{Fn5 zY3l)5u59j2&qq4(ff{)+8dbzj`Z55OZ{|0%Ij9E#k-Yn`Y2+^4CkOtF3FE7iD9Q@! zM$0JnC5=m*rH!s6;zcU4)7yyn{DP$6l_$0#cwEQBnA^APhe z-j=we$+afs$LqqHd^g;NK7B2{YSEq~f~zUWHFj*8Z~TmV&_`I^qf9obPh*l9iivi5 z8x5g%JlF>h+VI&+Xj8sO?9uBhyCpg`QLkNITsB4hQe1a_ocGXh?5uc~IuHS-TyMO* z=q>R7850x#XB`bTE?1uK)$PlQ`1z(U+Mhs39|pviiRg4HVlnGU(MzB|EDijCN=>bM_+MNhjh3HuJrQE?sYfo@v`?r2}R{ z2otl1NHijd8f_zCK@&fFVH*1fWj$EMCLS>dD+k3AZm#og#LDHC)o zD~9|lNo-OY4V|wJFm5(eiv9{RAoQngvsJ*SONbv4qp-wy7e1jEdat9F1xAIuHb=yG zBz#F?SX>Pmm)OIItn0TtI8-ye>^gx~>;% z*oV^HhMsGt7^*22x3XF)Y^5TYV)72UoqRp!vJ;5=`H1?i*KAR6Az&KzWQ83T-XM)N z;rLYu{EfGVRR-BN8+e)Int%vlw3cub+!Tr{NFbLxC-7iscAk%|PqQE&!X_W5-Y5;{ zhAwoXYHDJDsvY5N#5r_v%H&3S>gR%AS2 z=qL6kZ%3p!sZC@yVxf1m&=w?>Uw=$5C<9BJflOE}kyWw!M?27)fcSBM_jugBp>~FZ- z*bmd0z$YvN?znWHx*|^Zm`4S~6=Uw8|G6vX^Lb@aUo`mV-9>cqNM;}f`?=>8d9K)_ z@JrU%jqoZRmc9L!)gaoupbd@DTdl*L47%F?T9efx0mkM&z7S#7`UF0rjDV zeDwpoqJWqVwdJO9tT=puy+qE5Tt6yG7hF3Kt%#RdOx3ll+1;1LsNWll;V-@W(WJK=O(zJPG<(6^_63aZb^Cpm{tGgbb%D1- z`)A_payg?}&maC$`G4fZNpfOq$Ouj|S+BX$=_(ql*Sd}N>>gq0?f9WTT@0)B+V#13 zm=AkmQf;))MIRrR)tcQrhf_ZoJ=|C8%`5abUG^3W`r3r^)4v-}=c56&Zi^)zjbWf0 z)ka73Cg7I1?aiv!M+3q!zx_C`KE$j5VFZt!!&N$4+|HIC&}r>lz-z+SDSfTC@OAu2 z%i23w*BGS4W-9#!JxmVX~}t>c<1nx7l)8ZPx5- z4*%agwrtDl|1$5Aa&}zoLGt6*@DfrVs@ov}rtm&y&+y@5L3^!Es(h`q_Lk9mT*i*%0oCiI1X$Lb#6e% z;0S!MjUF8`OBQ>#eb^KD_7ES3M`BlY2EQ52^;pkvH)1s9_5q-F!p zc?@8Jp(FNK9o+v!!bC}mAv5Pch23(AG6)c;(NsM`A7R9Sk9!e*BWA|5Z#Xi#3*$Y- z5TYNZP@Mm2$q}A+7P7H@{iEna@dzP; zw?#J1KXCG;=#*3_$PUv-;-R7m7`5?J#E{OjdD^p}CPyMsaTt237XyJ(hX)u^jh_y;Dz8G@!CBJl5i7CIUz( zxuPI$O1VrorXBQW@ex`&mOBsrcdI6Lk770zgWhQTg`X~9KEgc3)GoaN0}GLmnn@fg z$0-O0&4{55gP>mNu;Kvs*U>Z~wpkm(WjA0*N}=3$f4 z7Rp%{v;K?)hPMM=0in}9dRiNYfLHdW4{8#~7jDk|nYa<&GXDYBOr|W|)1DqHr3KF% zk_29xzC@TNCeVxBNGb^*-VRj!qGAuC3}r|;1n0CX?@c3fU2D0eB&p1#P~@*du(1~) zXpW0TQ;t||H5E15q-qP2(kM#`>AbH>3M;^#5AOoui!VmO)`7<(8e;bF@uL{siNCb& zy0iHN!LO9Razbe_z2TsI?)Zs(jGSjLf`FP{Z$<+SxQ9EEa%e0#9kRNFCfD9B)5Z9GJ=$Pi#lBn%o~W$M|+R}V&tVeaRX&VYo5pzI-3k{ zc)pi+YKD|i&ob==%5&o5H>|kvTh`RHzp{<3`7h#pbNsk2cvB>2@dpZZVcJMM9SR$< zp1t_vd9x;#{0e%I8_)OS?s=0q^8LhVf0NdiUuAITn__hZBD*k|aNEQE7OJS7^?+G0 zfmPw4#3x!IR7b*_;tt09=qVdr4?Hxcg$>p$GYjS3`eCrvecfY!;*~|>Bq@30?98i; zgy0nqZEGx_B}d_ChTg+8<|+haMVjmlY-Hqc-oUnc^p}z$rqAAP1&4<{`YOa~Ehz=U zJ(ngm3ZVZ}SIgpn=5Zgh@~K!l1>*?pZ6_w>nm8QAx;hp=gy|o|moUW-JC2r8?W$OD z67C%Ek%`I)+05MLrq&ieb4nTAs8l4a9iPasQ@U$kD9)%@jIws3Znwvro?t!BBu5XC zSV^B;tDn)HGf?m;TqsxS9eRuYD3M0faaI~>o1c3ATFVYbvbNISbCVRNhO``+f|C7! zvjtzz3rb`6Vm6M36#g=U1zSCgtEx1S{Cny}D$Ql>s3t&}lx%cn zck-v$cpsccF=V64i)M@iM+;tt1zkYi28{ue<#S>r(Y@m1uAuqXyq7WTlazb2L`$tP z+-!JgiP0QE)+fd$WXHi~zP}I+a>JC_F1CM^E-acU>WR6PFrT;1vsA>Rv>!H-8}1Lv zOp(3HtfMWal-eRUvqID**>n6C=MXD-@mLOFKS8~e-1VQyg^#!VtI5r;;O6|n`a+}8 z)RvMQCbEMcdGeThc_UDu@k6 zTSVV3LaO9Van?RnopmT%0I`K&=!DLZOa0x&S(W4|MK)QjmVI3C-`UIb&BN~~v3G7qLHH@hFxdu6_x3=iV@x2kp1)|F6R)wNeR?v$N z2;9P5Oo>NVC4f1f>#Rj{taXnmUz{PFfY1H`t>NlnD<3$YsQpEWt-6vJ2pe+Yk@ugu zp%qg_qr8r;`(de)N_j3-OTb|aoacK~s-Bm@VTP&`$KKAPpqfg)kw<>7+00zTisgnh%!sHuT$2+xy7Jx8=-xIWqXdOH?9;#C z#1u?P)8d+@34#)}7Fs{CUx}ugSxGNahASZL4N5}G(e&^OCk$XGQlHc}MXGM(>0B`J z_h@M18jIV_z0tH#rCU(il&sV{G130nvu*>J~t zizO4UH(qjPmzKt%l4TmB>juA4Z(Q@)4E5METwJxdkr7?~#S(Oy=8KRi1ph$XyTxR5P46k4)& zVYJ^8DH?WL{rXzZQ!;OpFs6>HE50Vfs!A{HaIhPZ=^5)t(G=kgaL1>MuK201gJY)RCG}UkkYP%t?M+UCShy7j_{=F>xds+D3zbt&BDs25@Wnpnt6t=#vD*U!4 zY`t3)ep7Aj4em#a1yy@W1v^uzTk1+|`=Z?uooh;LaV56ZvM-;gigi1j4ClRZm)s-l zFIbL%ddFf(Ypbe*LZ^XmG@rnfFIb01A5ZWlVupelQmOrpFzx|0fS1@#(0nNzv%W`HR`ud&9YP&Kz3 z%o8{&Tqxn%1POru$Qq8SxKzCjU~aj*%Z{Pr?|JWH^TD-s0#1ITD_z{>CwlHH=qMO9 z-eENF;V(ZRcfI-TP|Sv^;=WXPsGFI%2Cv)VjB?^!+?b228YoQ;)3kx7T!$ZBOhuzryQsHo zO6NJd--Y4dQy5NGna91Gg@97X)Pss1Yprg*HoF%$H zuS>sXbIycY&$*5Ey|2&t%`5Lev3|=V%eo)+!5dT7ZC;E0PCU#YY+3+*$u&ynL7s#o0&pd>N^zd%GX~G8~ATUVk~9kI??( zc#5rjCjR_iq6rUdJe_+HQz zO6nK!aWotcK2SPmHXVPtpU!7@)5$}B426LGK79Bz?SEMg7q;nRG8N2+>HM}g87=M? zVgX%~J?03XbQx_GiqazvoY=F5+W}V#-i>=ms0JJ$Xil~vHj}kidB#X?c(d!y7{&_+ z?(v{q$-k#iBZx=Yv4OXmWEDIF7?kJ>5?fP3(dra0)go@^)1Q}j`sLoFH=IvzM*Y0O zEJ7Sbu}^gPaMb2^{`_n6hf!3jzLL0tj%jEuMl6HA>n-{tknBj`ynSd{qx;_NXmYEX zBjB&Tj~p?9B%%i$nCY}YRe?x`uatwL1iTwMgcl>@5@=+Jaycn(ptCXr2+WR*-hIil~-#}rU$ZVE#nrbSgM5Y2|SVdIEN(Uh(^f0iD_(ZPLU?nV_ zy5pL4z6wF$3&j!&-Cy#A8)T6xSVR$(NMWQ!f-s%|{CQTBPO7MZo8v4@S8=T_t@u>0 zKEVI*V9?Tip0csBnT!ICr$VYyh0zmU${_{Zca{!fyn$6p8*x4m-os;r5EBxh3WF30 zg}MxKks1&JoeDXX-s0ega$KCQ(ix`{2`5N@znrejlc#1LDxbhm6ROLEYbvucq=WLX zCQn=dcbKF@R-DqoGJLn?XtrQHE``V82A-beaWp&qRF&;iR)d2zg_{-I&|jM%8iVEs zE3kZ5$Y;k-b=*GqX7<+zPbt}Zoam+8r>_n)6{@`qxd=$GR2QE_+ivI@sp($T*$}r? z|14THP9e!>Z!I+v%!(YR)dAR*6=Wp^PJC|swkilnb^r5D`_v^2U zX`Ink#SJ#y3`VV2Q>^1B<(;1>49 zaMB;n?t0@XzTSN)C28)4sy2Ej_o81yACki4DKmbWNEwafMfwv4m^8buY$KJRg)C%pkXd4PyG`J+n~_R+fyQ5EQ#jxA;r*y*>uLo}F%!57 zSB^GR0yc#*l{c$5+rlC>!q9&O$RDdZyCxl|5RuYx_)r}4sZgCNno?aihbr+yXKgF2 z!W(5=$!qDBDyA9)pO%bJ3w3sbN3(%EOgqLjV8k}*C4Ld-(a;+%{j z%=#0iHO3o-YF9}P!5XPM3Q|af()>J!{{PjamoH|my0N$>v!fZZFQU74Vg{U$NwUt* zmvqvU8HvtDah%Hd{M@QCHY4lq$YQL=E zKJ~5ZdS(0gKL|MeDtz+tJ@_O|`g@_0$2it+;!VE+N4)$#IAUd^008r4ufe z*v^zz*z0Y5Ouet8UH^fVH+~a$o8PnA*0)5U{Z+K=<@>bid{sKW!KUvgGO_DB#NxLh zT4xy4i4I+HGFaDcfVF^+5UlyVQjVcQMx{~;rc_?x-xCqbXjqk8Il{w+3RYPF#2U=L zD(inS(AM{mVkL~6#bApZ(VBT%)Qzh37xD_)W6FF+>tyhhD|qfEI<`Z!B5PeVROUVq zoTXm#&8_nwyJV)+w23lDXaZgSFV9yO)U!yDM^`TvoB@5`0bhDyX(OmE zUy77V>twrZH4OXEzV4=NnUy{&ZS)DgYmE5_vN?8?JV!Y+hndiaN)`x#W z+AJz}kfc$dA_=$@aNZ$kfqM{92v%zVE8}5v!B7|)R)ZCdvZ)lVp-}gesB{Y@fg_GZ z40sSC&4|AUK};owz3w!+CXvqWAHu#i3)t7|TIagfY%%t=nc-X!Y`ww0;{uP2oOa5)oZn@)VyQ$jU#ay%ZcMC`S;ZwMjLywYaB(Kerx=7_PI9fVoFrjdIWY}X zf)z13cS}R>puVF+76GHzN66zQ{qiF zF=}8daBji1SLaLp@R9uP*m+S3acRPg#&Ja6~A?N=Lps|2GjWq#v7Fg%&*HZ4( zB6zNrK69rkOe9iqmbs(G8De>@QL{Z2-c>S!TRFo^n=Oa=uALM##|Mxt(HH5E!ju{} z#g|wq6cZ5e{9TsZ{Hzz`S-A)}b=hq|3&r8_KM zwqtP)>#`y~iA%eqfwvuTmceUlcI!ih@wfGMv%-MfMyFj7%Nx)4s9tql5o2#JE-wv? zw@nOE%NTyTHJCsM1IQI^56O0?kMHayyqdLGV(!mU1CAM+*I)8-N(pSjUTO4-&e#}$ zY!*lkmNY-fU?_1=KB}{TPXo#!-&rVc~mum9w9s`->{bV=@{@3)m?#OUmT^64Ees%sa6sHPA7r(=J>OwzCoALhM}20Q7^HE zEj^}jlI7D*gL$iEEN?eY95x*^tCHR777Q0xYzHk>bf%P)`iZO?0xJSPg`@%K*mVL` z$6JTGA%oXdcL{u48ZrO)xsNSy-4&%rbTIXeXETwhd$Lm2&rYmy%AF)rveVifk!c9W& z1yu#Es}wKVDPN}}Rvul&7O8t}ECs96s5IRpC z!x;i%C!_HAzMC9Id5TR{vD}gVCEMgO!_+?RTvuT^Tg__wiUGAy$e?L+42EH%ott|* zzv&2@+M!%0{L<!&8P-5p|+N@M<6cbe@JT;Y&D0p&=Z5Drr#4 z7{9|)iS-pc)$9tM|NcDVO<;1ng-dz(?wzX6ZBQ{KjY;(@zqZ7s6R1opGLX#v=j!(e z(of$ZO6mYuRotrsJo$Mq_E)`VSC+Ao>{wcOc#KG{7#5#j>qG+bm=Y3zZ~3`>s2<%4 zRHcIUH^oig6I3Y)o@FGD?+&Vp-t-lwVNyVIYHjis(3xu#f=PC*|2qCh0f{cV&Fc)f zNZ09H+Sh!WPSd`=Iv013E4ognF7Di<+N$G&Mfd5{Yw8xAYO`&(%zZkPgVFctpk)be zSsDdvN#bg!VRz2O7;*eIrXld-D*|CwQe0T}yE{ z>wzY)2v!k3Loehb?6+DVV z=7KRIiCjsG?VRPrF_~$G-a#FU#G}ejeo)FXh%=sBVLr(=#H6UoYbI3CpeM!BKz&>7 zl2EoF6G|xSZ7q~_x_XJE68x{98?kJh63eS2vFs_?7JE>W|4e4ZpXrdSl>dXtlp_?RDQZp6}63%k7%!u!{7uj zXD(o{R?^@A^c@IWZIaA~5zyW+`UtNfmcHmR2<=OAAlGP{hVC=Algus?a0i%@mj zFJ$*R)8S6J&uRu)eu0hy7w?PWXi`U-d9^^@k*LD_oC#Q7WG;!`8kDNJ#6b~Dg_86F z6V*`Cz(@IjstJsCDrEn4;1<-;RE2zt#x3<4&x_^oT*^OhcS6)sa_#!ApS~ud(3Ec$N}2Q zMcjv&(pR|^_TX%ERNkPcC0Bv7K55}Zntj%8kTb+Q~}G|1N#2*EyyMb^7Ks+8|+BLrlo zaOfx2Fnt1I7G#i2R%uVu-~j~_8qiuO4RbV{(O6w%lN0$3jahDQr~NLc2^lDbC7s>% zXCDZxU%mm%dM7jc!E{eAy%}OjH}~h)(5O(pkYvsx^`fnDu?}y|GR5jd0b+}H+7)oX z;*9er|EFeOeNa~Dm1?~6&or#>WQWY@`WxoY&Hac@*sA|5Kp4a_dR8e!hMPM2%(7KX zG8;%(F%g$!P#eXk!DN0hn2e56uFhj5jmR<(j8>Rul@szx^5>MrR!J0#pKw}?l9~ZQ zzoN*bme{|eESk#B@a66q!;<3onaCD4;qquDT&$)l0$JFrW{4eQXc~*vmA8>{XR-l zlV}Orx2UVa ze_*4;9g;wezsxm>U-`ThrSmG9x(8sCLGYkn04*ZM8@SAG+b zw~URpy!^|Rf(p*~n#T@$qT?T~6O*=@(OPu5v0mG`bnm6X7#h&t zTSQ^Ib`0Ko7uZKwJtqS)#)R=h2iJJVeY_wto11C2j&`05w9)!mV;lde7|d@j<~QFj z`j1exRg^JR*)=lO{;Kp`eudPXQ)0mArxVwGUi}l;a{d1ShG`X&6ILBXO*X>CUx=x)(U^kV@Hd#B_!!QdkU4<<7Uh& zLniSe=aB+~Kcu^foJUn9<#lsdrHZC-V)*hfK6skpMJm{Q6xXo3RX7l79ZgWXcCF$hi7it|8CMseZFQYOjf>pq7*59^igFXh%nz$l}bPM_S8l-M67`2~Gn gy+~Jee{^-JW$rF5fH-+_-+|!#zm5q^GsJQL0G_^u7ytkO literal 0 HcmV?d00001 diff --git a/tests/core/test_discharge_note_summarization.py b/tests/core/test_discharge_note_summarization.py new file mode 100644 index 000000000..5ba326b6d --- /dev/null +++ b/tests/core/test_discharge_note_summarization.py @@ -0,0 +1,162 @@ +""" +Unit tests for DischargeNoteSummarization task in summarization_data_processing.py. + +Tests cover: + - Class attributes (task_name, input_schema, output_schema) + - __call__: happy-path extraction of brief_hospital_course and summary + - __call__: all boundary / filtering conditions that cause samples to be skipped + - Output dictionary structure and field types + +External dependencies (pyhealth) are fully mocked so the tests run without +installing the real library or accessing any dataset. + +Run with: + python -m pytest test_summarization_data_processing.py -v + # or + python -m unittest test_summarization_data_processing.py -v +""" + + +import unittest +from unittest.mock import MagicMock, patch +from pathlib import Path +from pathlib import Path +from pyhealth.datasets import MIMIC4Dataset +from pyhealth.data import Patient +import tempfile +from pyhealth.tasks import DischargeNoteSummarization +from unittest.mock import MagicMock +from pyhealth.data import Patient, Event + + +import logging + +class TestDischargeNoteSummarizationTask(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.test_resources = Path(__file__).parent.parent.parent / "test-resources" / "discharge" + cls.cache_dir = tempfile.TemporaryDirectory() + cls.full_note_dataset = MIMIC4Dataset( + note_root=cls.test_resources, + note_tables=["discharge"]) + cls.task = DischargeNoteSummarization() + cls.sample_notes = cls.full_note_dataset.set_task(cls.task) + cls.MIN_SUMMARY_LENGTH = 350 + + def create_mock_patient(self, note_text, patient_id="p1", hadm_id="h1", subject_id="20000003"): + """Helper to create a mock Patient with a single discharge event.""" + patient = MagicMock(spec=Patient) + patient.patient_id = patient_id + + # Create a mock Event for the discharge note + event = MagicMock(spec=Event) + event.attr_dict = { + "text": note_text, + "hadm_id": hadm_id, + "subject_id": subject_id + } + + # Mock the get_events method to return our discharge event + patient.get_events.side_effect = lambda event_type: [event] if event_type == "discharge" else [] + return patient + + + def test_generated_samples(self): + self.assertEqual(len(self.sample_notes), 2) + print(self.sample_notes[0]["summary"]) + self.assertTrue(self.sample_notes[0]["summary"].startswith("Discharge Instructions:")) + + + + + def test_task_metadata(self): + self.assertEqual(self.task.task_name,"DischargeNoteSummarization") + self.assertIn("text", self.task.input_schema) + self.assertIn("summary", self.task.output_schema) + + def test_filtering_short_summary(self): + + note = ( + "Brief Hospital Course:\n" + "The patient is an elderly individual with a significant past medical history of chronic obstructive " + "pulmonary disease, congestive heart failure with a reduced ejection fraction of thirty-five percent, " + "and Type 2 diabetes mellitus. The patient presented to the emergency department complaining of " + "progressive shortness of breath, productive cough with yellow sputum, and bilateral lower extremity " + "edema increasing over the last five days. Upon arrival, the patient was tachycardic and hypoxic, " + "requiring supplemental oxygen via nasal cannula to maintain saturations above ninety-two percent. " + "A chest X-ray revealed bilateral pulmonary infiltrates and pleural effusions, consistent with a " + "multifocal pneumonia overlaying a congestive heart failure exacerbation. Laboratory results were " + "significant for an elevated pro-BNP and a leukocytosis with an elevated white blood cell count. " + "During the first forty-eight hours of admission, the patient was started on intravenous antibiotics " + "for community-acquired pneumonia. Diuresis was initiated with intravenous medications, resulting in " + "a significant net negative fluid balance over three days. The patient’s respiratory status " + "improved significantly; oxygen was successfully weaned to room air by hospital day four. " + "Endocrinology was consulted for blood glucose management, and the insulin regimen was " + "adjusted to a sliding scale with a long-acting basal dose. By the day of discharge, the " + "patient was stable, ambulating without distress, and lung sounds were markedly clearer on " + "auscultation. Weight had returned to the documented baseline. " + + "Medications on Admission: " + "Metformin, Lisinopril, Furosemide, and an Albuterol inhaler. " + + "Discharge Instructions: " + "You were treated in the hospital for a combination of pneumonia and a flare-up of your heart " + "failure. It is vital that you finish the entire course of oral antibiotics as prescribed, " + "even if you feel better. Please monitor your weight every morning before breakfast. If you " + "notice a weight gain of more than three pounds in a single day or five pounds in a week, " + "contact your primary care doctor immediately as this indicates fluid buildup. Continue to " + "use your salt-restricted diet and limit your total fluid intake to one and a half liters " + "daily to prevent further strain on your heart. Rest is encouraged for the next week; however, " + "try to perform light walking around the house to prevent blood clots. Avoid any heavy lifting " + "or strenuous exercise until cleared by your cardiologist. You should continue your home " + "medications as updated in the attached list. Seek immediate emergency care if you experience " + "chest pain, severe shortness of breath while sitting still, or if you begin coughing up blood. " + "We have adjusted your diuretic medication slightly to help manage your fluid levels more " + "effectively during your recovery. Ensure you have picked up your new prescriptions from the " + "pharmacy before the end of the day. It is also recommended that you receive your flu and " + "pneumonia vaccinations once you have fully recovered from this current illness. Please bring " + "your updated medication list to all upcoming appointments to ensure your medical record is accurate. " + + "Followup Instructions: " + "Follow up with Cardiology next week. Follow up with your Primary Care Provider within seven days " + "for a transition of care visit." + + ) + patient = self.create_mock_patient(note) + samples = self.task(patient) + + self.assertEqual(len(samples), 1, "This summary should not be filtered out as its length more than 350.") + + def test_edge_cases(self): + """Verify that summaries shorter than MIN_SUMMARY_LENGTH (350) are skipped.""" + short_summary = "This summary is too short." # ~26 chars + note = ( + #"Brief Hospital Course:\nStable.\n" + "Medications on Admission:\nNone.\n" + "Discharge Instructions:\n" + short_summary + "\n" + "Followup Instructions:\nNone." + ) + patient = self.create_mock_patient(note) + samples = self.task(patient) + + self.assertEqual(len(samples), 0, "Should filter out samples with short summaries.") + + def test_edge_cases_1(self): + short_summary = "This is a sample generated summary." + note = ( + "Brief Hospital Course:\nStable.\n" + #"Medications on Admission:\nNone.\n" + #"Discharge Instructions:\n" + short_summary + "\n" + #"Followup Instructions:\nNone." + "This is a sample generated short summary that coes not contain all sections." + ) + + patient = self.create_mock_patient(note) + samples = self.task(patient) + + self.assertEqual(len(samples), 0, "Should filter out samples with short summaries.") + + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From b3116f9b78bd2aaff383ab8d38d8d2e9da1b2b36 Mon Sep 17 00:00:00 2001 From: Vishal Vyas Date: Sat, 18 Apr 2026 19:40:57 -0400 Subject: [PATCH 2/3] removed the dependency on static test file --- tests/core/test_discharge_note_summarization.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/core/test_discharge_note_summarization.py b/tests/core/test_discharge_note_summarization.py index 5ba326b6d..a2a8c7a19 100644 --- a/tests/core/test_discharge_note_summarization.py +++ b/tests/core/test_discharge_note_summarization.py @@ -34,13 +34,13 @@ class TestDischargeNoteSummarizationTask(unittest.TestCase): @classmethod def setUpClass(cls): - cls.test_resources = Path(__file__).parent.parent.parent / "test-resources" / "discharge" + #cls.test_resources = Path(__file__).parent.parent.parent / "test-resources" / "discharge" cls.cache_dir = tempfile.TemporaryDirectory() - cls.full_note_dataset = MIMIC4Dataset( - note_root=cls.test_resources, - note_tables=["discharge"]) + #cls.full_note_dataset = MIMIC4Dataset( + # note_root=cls.test_resources, + # note_tables=["discharge"]) cls.task = DischargeNoteSummarization() - cls.sample_notes = cls.full_note_dataset.set_task(cls.task) + #cls.sample_notes = cls.full_note_dataset.set_task(cls.task) cls.MIN_SUMMARY_LENGTH = 350 def create_mock_patient(self, note_text, patient_id="p1", hadm_id="h1", subject_id="20000003"): @@ -61,10 +61,9 @@ def create_mock_patient(self, note_text, patient_id="p1", hadm_id="h1", subject_ return patient - def test_generated_samples(self): - self.assertEqual(len(self.sample_notes), 2) - print(self.sample_notes[0]["summary"]) - self.assertTrue(self.sample_notes[0]["summary"].startswith("Discharge Instructions:")) + #def test_generated_samples(self): + # self.assertEqual(len(self.sample_notes), 2) + # self.assertTrue(self.sample_notes[0]["summary"].startswith("Discharge Instructions:")) From d205128408c47e49a08af1d10456ddf80ec0da56 Mon Sep 17 00:00:00 2001 From: Vishal Vyas Date: Sat, 18 Apr 2026 19:43:58 -0400 Subject: [PATCH 3/3] deleted the discharge.csv from test resources, generating the test data in test class itself --- test-resources/discharge/note/discharge.csv.gz | Bin 10578 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test-resources/discharge/note/discharge.csv.gz diff --git a/test-resources/discharge/note/discharge.csv.gz b/test-resources/discharge/note/discharge.csv.gz deleted file mode 100644 index a24289a56a137991f3b34d703a6f98c622fb88c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10578 zcmV-YDXrEYiwFqge&J~X|72-%V`yP=XJsy9b9MmjeOq%P$+hl#{fhKBCal&#iK~#< zdpiOFEwLK`2GleCFRINP3Ys+S?}m?K#!8cUaC*A{CmaSI#H5%J6Va7Fp~T!k*|qWuhBov zMs={LHtH4gwbE#`uiIDcW@WIb;P*!Ts#dMFtF?}()w=w@{I_dSZ+9D)^xF@@nm9Yz z72>ad{i`_j&qLrPViMvFdc_h}Z@u&4*b4(OaFUZP2VP9`f0X78-IBhEa3gNKDA}H9 zxR9^(@<+V7kkPZ}qV4@~FzSycLmJrkW%TIDxQj`L;!_a51S0!UT!`NKSw=BNjnYRk zmyVlYmNB(V5*e($;IXS`(%*V=Bl_WP?>kD=-mv&+KbVE*XRUmvBx_C zt?r$_g^^gK7|(TlfzPAh*@?YpDP~dV%5@sat~GNa$CW`MV=uPQ>dqq;9%$jSNMe*k zjC?AuRu_U`SYDin7b%}?A*@*< zBDoKvBo?vsH&wjshC$*5DYf^p+E=DYy!WCY^g-h=kjzQyAr_YN7*@nex=spC#9Jvg zQu?v*y(cMt_#xTKgII^bpA!5V9UYP_`u*Vt?lajsiLj{64?nC!8Pi)Y4(OGHVxJ~r zhn*nYt=tj$V*#dYWn`oA0!^ra!2S`|CiMOAMNFJ3NRmXJOGH_FGKp<445P?=$rYX= zZM#?vPhYLA6JrSy{X5UJ>@oq5q=PB6+kj*?PsCc|Rdh!*k!!hgDkLe3=?(wIYuj)i zNJqV_-`!le7s17IMXWDeuy%cj)5z1WKud^C7(xs~FAmi#cooLj=4peeB3E0ei;5E<$Q~yvAFI&&ug4=QEKg%`KC5A4*@Xnz$yAY2eyhu_l zJSDrdme_Nj1W5;^AXY6DC9RB<871++b7_)~=}xSCDc4Nct%n(}qBJHkAa;sGN{Fn5 zY3l)5u59j2&qq4(ff{)+8dbzj`Z55OZ{|0%Ij9E#k-Yn`Y2+^4CkOtF3FE7iD9Q@! zM$0JnC5=m*rH!s6;zcU4)7yyn{DP$6l_$0#cwEQBnA^APhe z-j=we$+afs$LqqHd^g;NK7B2{YSEq~f~zUWHFj*8Z~TmV&_`I^qf9obPh*l9iivi5 z8x5g%JlF>h+VI&+Xj8sO?9uBhyCpg`QLkNITsB4hQe1a_ocGXh?5uc~IuHS-TyMO* z=q>R7850x#XB`bTE?1uK)$PlQ`1z(U+Mhs39|pviiRg4HVlnGU(MzB|EDijCN=>bM_+MNhjh3HuJrQE?sYfo@v`?r2}R{ z2otl1NHijd8f_zCK@&fFVH*1fWj$EMCLS>dD+k3AZm#og#LDHC)o zD~9|lNo-OY4V|wJFm5(eiv9{RAoQngvsJ*SONbv4qp-wy7e1jEdat9F1xAIuHb=yG zBz#F?SX>Pmm)OIItn0TtI8-ye>^gx~>;% z*oV^HhMsGt7^*22x3XF)Y^5TYV)72UoqRp!vJ;5=`H1?i*KAR6Az&KzWQ83T-XM)N z;rLYu{EfGVRR-BN8+e)Int%vlw3cub+!Tr{NFbLxC-7iscAk%|PqQE&!X_W5-Y5;{ zhAwoXYHDJDsvY5N#5r_v%H&3S>gR%AS2 z=qL6kZ%3p!sZC@yVxf1m&=w?>Uw=$5C<9BJflOE}kyWw!M?27)fcSBM_jugBp>~FZ- z*bmd0z$YvN?znWHx*|^Zm`4S~6=Uw8|G6vX^Lb@aUo`mV-9>cqNM;}f`?=>8d9K)_ z@JrU%jqoZRmc9L!)gaoupbd@DTdl*L47%F?T9efx0mkM&z7S#7`UF0rjDV zeDwpoqJWqVwdJO9tT=puy+qE5Tt6yG7hF3Kt%#RdOx3ll+1;1LsNWll;V-@W(WJK=O(zJPG<(6^_63aZb^Cpm{tGgbb%D1- z`)A_payg?}&maC$`G4fZNpfOq$Ouj|S+BX$=_(ql*Sd}N>>gq0?f9WTT@0)B+V#13 zm=AkmQf;))MIRrR)tcQrhf_ZoJ=|C8%`5abUG^3W`r3r^)4v-}=c56&Zi^)zjbWf0 z)ka73Cg7I1?aiv!M+3q!zx_C`KE$j5VFZt!!&N$4+|HIC&}r>lz-z+SDSfTC@OAu2 z%i23w*BGS4W-9#!JxmVX~}t>c<1nx7l)8ZPx5- z4*%agwrtDl|1$5Aa&}zoLGt6*@DfrVs@ov}rtm&y&+y@5L3^!Es(h`q_Lk9mT*i*%0oCiI1X$Lb#6e% z;0S!MjUF8`OBQ>#eb^KD_7ES3M`BlY2EQ52^;pkvH)1s9_5q-F!p zc?@8Jp(FNK9o+v!!bC}mAv5Pch23(AG6)c;(NsM`A7R9Sk9!e*BWA|5Z#Xi#3*$Y- z5TYNZP@Mm2$q}A+7P7H@{iEna@dzP; zw?#J1KXCG;=#*3_$PUv-;-R7m7`5?J#E{OjdD^p}CPyMsaTt237XyJ(hX)u^jh_y;Dz8G@!CBJl5i7CIUz( zxuPI$O1VrorXBQW@ex`&mOBsrcdI6Lk770zgWhQTg`X~9KEgc3)GoaN0}GLmnn@fg z$0-O0&4{55gP>mNu;Kvs*U>Z~wpkm(WjA0*N}=3$f4 z7Rp%{v;K?)hPMM=0in}9dRiNYfLHdW4{8#~7jDk|nYa<&GXDYBOr|W|)1DqHr3KF% zk_29xzC@TNCeVxBNGb^*-VRj!qGAuC3}r|;1n0CX?@c3fU2D0eB&p1#P~@*du(1~) zXpW0TQ;t||H5E15q-qP2(kM#`>AbH>3M;^#5AOoui!VmO)`7<(8e;bF@uL{siNCb& zy0iHN!LO9Razbe_z2TsI?)Zs(jGSjLf`FP{Z$<+SxQ9EEa%e0#9kRNFCfD9B)5Z9GJ=$Pi#lBn%o~W$M|+R}V&tVeaRX&VYo5pzI-3k{ zc)pi+YKD|i&ob==%5&o5H>|kvTh`RHzp{<3`7h#pbNsk2cvB>2@dpZZVcJMM9SR$< zp1t_vd9x;#{0e%I8_)OS?s=0q^8LhVf0NdiUuAITn__hZBD*k|aNEQE7OJS7^?+G0 zfmPw4#3x!IR7b*_;tt09=qVdr4?Hxcg$>p$GYjS3`eCrvecfY!;*~|>Bq@30?98i; zgy0nqZEGx_B}d_ChTg+8<|+haMVjmlY-Hqc-oUnc^p}z$rqAAP1&4<{`YOa~Ehz=U zJ(ngm3ZVZ}SIgpn=5Zgh@~K!l1>*?pZ6_w>nm8QAx;hp=gy|o|moUW-JC2r8?W$OD z67C%Ek%`I)+05MLrq&ieb4nTAs8l4a9iPasQ@U$kD9)%@jIws3Znwvro?t!BBu5XC zSV^B;tDn)HGf?m;TqsxS9eRuYD3M0faaI~>o1c3ATFVYbvbNISbCVRNhO``+f|C7! zvjtzz3rb`6Vm6M36#g=U1zSCgtEx1S{Cny}D$Ql>s3t&}lx%cn zck-v$cpsccF=V64i)M@iM+;tt1zkYi28{ue<#S>r(Y@m1uAuqXyq7WTlazb2L`$tP z+-!JgiP0QE)+fd$WXHi~zP}I+a>JC_F1CM^E-acU>WR6PFrT;1vsA>Rv>!H-8}1Lv zOp(3HtfMWal-eRUvqID**>n6C=MXD-@mLOFKS8~e-1VQyg^#!VtI5r;;O6|n`a+}8 z)RvMQCbEMcdGeThc_UDu@k6 zTSVV3LaO9Van?RnopmT%0I`K&=!DLZOa0x&S(W4|MK)QjmVI3C-`UIb&BN~~v3G7qLHH@hFxdu6_x3=iV@x2kp1)|F6R)wNeR?v$N z2;9P5Oo>NVC4f1f>#Rj{taXnmUz{PFfY1H`t>NlnD<3$YsQpEWt-6vJ2pe+Yk@ugu zp%qg_qr8r;`(de)N_j3-OTb|aoacK~s-Bm@VTP&`$KKAPpqfg)kw<>7+00zTisgnh%!sHuT$2+xy7Jx8=-xIWqXdOH?9;#C z#1u?P)8d+@34#)}7Fs{CUx}ugSxGNahASZL4N5}G(e&^OCk$XGQlHc}MXGM(>0B`J z_h@M18jIV_z0tH#rCU(il&sV{G130nvu*>J~t zizO4UH(qjPmzKt%l4TmB>juA4Z(Q@)4E5METwJxdkr7?~#S(Oy=8KRi1ph$XyTxR5P46k4)& zVYJ^8DH?WL{rXzZQ!;OpFs6>HE50Vfs!A{HaIhPZ=^5)t(G=kgaL1>MuK201gJY)RCG}UkkYP%t?M+UCShy7j_{=F>xds+D3zbt&BDs25@Wnpnt6t=#vD*U!4 zY`t3)ep7Aj4em#a1yy@W1v^uzTk1+|`=Z?uooh;LaV56ZvM-;gigi1j4ClRZm)s-l zFIbL%ddFf(Ypbe*LZ^XmG@rnfFIb01A5ZWlVupelQmOrpFzx|0fS1@#(0nNzv%W`HR`ud&9YP&Kz3 z%o8{&Tqxn%1POru$Qq8SxKzCjU~aj*%Z{Pr?|JWH^TD-s0#1ITD_z{>CwlHH=qMO9 z-eENF;V(ZRcfI-TP|Sv^;=WXPsGFI%2Cv)VjB?^!+?b228YoQ;)3kx7T!$ZBOhuzryQsHo zO6NJd--Y4dQy5NGna91Gg@97X)Pss1Yprg*HoF%$H zuS>sXbIycY&$*5Ey|2&t%`5Lev3|=V%eo)+!5dT7ZC;E0PCU#YY+3+*$u&ynL7s#o0&pd>N^zd%GX~G8~ATUVk~9kI??( zc#5rjCjR_iq6rUdJe_+HQz zO6nK!aWotcK2SPmHXVPtpU!7@)5$}B426LGK79Bz?SEMg7q;nRG8N2+>HM}g87=M? zVgX%~J?03XbQx_GiqazvoY=F5+W}V#-i>=ms0JJ$Xil~vHj}kidB#X?c(d!y7{&_+ z?(v{q$-k#iBZx=Yv4OXmWEDIF7?kJ>5?fP3(dra0)go@^)1Q}j`sLoFH=IvzM*Y0O zEJ7Sbu}^gPaMb2^{`_n6hf!3jzLL0tj%jEuMl6HA>n-{tknBj`ynSd{qx;_NXmYEX zBjB&Tj~p?9B%%i$nCY}YRe?x`uatwL1iTwMgcl>@5@=+Jaycn(ptCXr2+WR*-hIil~-#}rU$ZVE#nrbSgM5Y2|SVdIEN(Uh(^f0iD_(ZPLU?nV_ zy5pL4z6wF$3&j!&-Cy#A8)T6xSVR$(NMWQ!f-s%|{CQTBPO7MZo8v4@S8=T_t@u>0 zKEVI*V9?Tip0csBnT!ICr$VYyh0zmU${_{Zca{!fyn$6p8*x4m-os;r5EBxh3WF30 zg}MxKks1&JoeDXX-s0ega$KCQ(ix`{2`5N@znrejlc#1LDxbhm6ROLEYbvucq=WLX zCQn=dcbKF@R-DqoGJLn?XtrQHE``V82A-beaWp&qRF&;iR)d2zg_{-I&|jM%8iVEs zE3kZ5$Y;k-b=*GqX7<+zPbt}Zoam+8r>_n)6{@`qxd=$GR2QE_+ivI@sp($T*$}r? z|14THP9e!>Z!I+v%!(YR)dAR*6=Wp^PJC|swkilnb^r5D`_v^2U zX`Ink#SJ#y3`VV2Q>^1B<(;1>49 zaMB;n?t0@XzTSN)C28)4sy2Ej_o81yACki4DKmbWNEwafMfwv4m^8buY$KJRg)C%pkXd4PyG`J+n~_R+fyQ5EQ#jxA;r*y*>uLo}F%!57 zSB^GR0yc#*l{c$5+rlC>!q9&O$RDdZyCxl|5RuYx_)r}4sZgCNno?aihbr+yXKgF2 z!W(5=$!qDBDyA9)pO%bJ3w3sbN3(%EOgqLjV8k}*C4Ld-(a;+%{j z%=#0iHO3o-YF9}P!5XPM3Q|af()>J!{{PjamoH|my0N$>v!fZZFQU74Vg{U$NwUt* zmvqvU8HvtDah%Hd{M@QCHY4lq$YQL=E zKJ~5ZdS(0gKL|MeDtz+tJ@_O|`g@_0$2it+;!VE+N4)$#IAUd^008r4ufe z*v^zz*z0Y5Ouet8UH^fVH+~a$o8PnA*0)5U{Z+K=<@>bid{sKW!KUvgGO_DB#NxLh zT4xy4i4I+HGFaDcfVF^+5UlyVQjVcQMx{~;rc_?x-xCqbXjqk8Il{w+3RYPF#2U=L zD(inS(AM{mVkL~6#bApZ(VBT%)Qzh37xD_)W6FF+>tyhhD|qfEI<`Z!B5PeVROUVq zoTXm#&8_nwyJV)+w23lDXaZgSFV9yO)U!yDM^`TvoB@5`0bhDyX(OmE zUy77V>twrZH4OXEzV4=NnUy{&ZS)DgYmE5_vN?8?JV!Y+hndiaN)`x#W z+AJz}kfc$dA_=$@aNZ$kfqM{92v%zVE8}5v!B7|)R)ZCdvZ)lVp-}gesB{Y@fg_GZ z40sSC&4|AUK};owz3w!+CXvqWAHu#i3)t7|TIagfY%%t=nc-X!Y`ww0;{uP2oOa5)oZn@)VyQ$jU#ay%ZcMC`S;ZwMjLywYaB(Kerx=7_PI9fVoFrjdIWY}X zf)z13cS}R>puVF+76GHzN66zQ{qiF zF=}8daBji1SLaLp@R9uP*m+S3acRPg#&Ja6~A?N=Lps|2GjWq#v7Fg%&*HZ4( zB6zNrK69rkOe9iqmbs(G8De>@QL{Z2-c>S!TRFo^n=Oa=uALM##|Mxt(HH5E!ju{} z#g|wq6cZ5e{9TsZ{Hzz`S-A)}b=hq|3&r8_KM zwqtP)>#`y~iA%eqfwvuTmceUlcI!ih@wfGMv%-MfMyFj7%Nx)4s9tql5o2#JE-wv? zw@nOE%NTyTHJCsM1IQI^56O0?kMHayyqdLGV(!mU1CAM+*I)8-N(pSjUTO4-&e#}$ zY!*lkmNY-fU?_1=KB}{TPXo#!-&rVc~mum9w9s`->{bV=@{@3)m?#OUmT^64Ees%sa6sHPA7r(=J>OwzCoALhM}20Q7^HE zEj^}jlI7D*gL$iEEN?eY95x*^tCHR777Q0xYzHk>bf%P)`iZO?0xJSPg`@%K*mVL` z$6JTGA%oXdcL{u48ZrO)xsNSy-4&%rbTIXeXETwhd$Lm2&rYmy%AF)rveVifk!c9W& z1yu#Es}wKVDPN}}Rvul&7O8t}ECs96s5IRpC z!x;i%C!_HAzMC9Id5TR{vD}gVCEMgO!_+?RTvuT^Tg__wiUGAy$e?L+42EH%ott|* zzv&2@+M!%0{L<!&8P-5p|+N@M<6cbe@JT;Y&D0p&=Z5Drr#4 z7{9|)iS-pc)$9tM|NcDVO<;1ng-dz(?wzX6ZBQ{KjY;(@zqZ7s6R1opGLX#v=j!(e z(of$ZO6mYuRotrsJo$Mq_E)`VSC+Ao>{wcOc#KG{7#5#j>qG+bm=Y3zZ~3`>s2<%4 zRHcIUH^oig6I3Y)o@FGD?+&Vp-t-lwVNyVIYHjis(3xu#f=PC*|2qCh0f{cV&Fc)f zNZ09H+Sh!WPSd`=Iv013E4ognF7Di<+N$G&Mfd5{Yw8xAYO`&(%zZkPgVFctpk)be zSsDdvN#bg!VRz2O7;*eIrXld-D*|CwQe0T}yE{ z>wzY)2v!k3Loehb?6+DVV z=7KRIiCjsG?VRPrF_~$G-a#FU#G}ejeo)FXh%=sBVLr(=#H6UoYbI3CpeM!BKz&>7 zl2EoF6G|xSZ7q~_x_XJE68x{98?kJh63eS2vFs_?7JE>W|4e4ZpXrdSl>dXtlp_?RDQZp6}63%k7%!u!{7uj zXD(o{R?^@A^c@IWZIaA~5zyW+`UtNfmcHmR2<=OAAlGP{hVC=Algus?a0i%@mj zFJ$*R)8S6J&uRu)eu0hy7w?PWXi`U-d9^^@k*LD_oC#Q7WG;!`8kDNJ#6b~Dg_86F z6V*`Cz(@IjstJsCDrEn4;1<-;RE2zt#x3<4&x_^oT*^OhcS6)sa_#!ApS~ud(3Ec$N}2Q zMcjv&(pR|^_TX%ERNkPcC0Bv7K55}Zntj%8kTb+Q~}G|1N#2*EyyMb^7Ks+8|+BLrlo zaOfx2Fnt1I7G#i2R%uVu-~j~_8qiuO4RbV{(O6w%lN0$3jahDQr~NLc2^lDbC7s>% zXCDZxU%mm%dM7jc!E{eAy%}OjH}~h)(5O(pkYvsx^`fnDu?}y|GR5jd0b+}H+7)oX z;*9er|EFeOeNa~Dm1?~6&or#>WQWY@`WxoY&Hac@*sA|5Kp4a_dR8e!hMPM2%(7KX zG8;%(F%g$!P#eXk!DN0hn2e56uFhj5jmR<(j8>Rul@szx^5>MrR!J0#pKw}?l9~ZQ zzoN*bme{|eESk#B@a66q!;<3onaCD4;qquDT&$)l0$JFrW{4eQXc~*vmA8>{XR-l zlV}Orx2UVa ze_*4;9g;wezsxm>U-`ThrSmG9x(8sCLGYkn04*ZM8@SAG+b zw~URpy!^|Rf(p*~n#T@$qT?T~6O*=@(OPu5v0mG`bnm6X7#h&t zTSQ^Ib`0Ko7uZKwJtqS)#)R=h2iJJVeY_wto11C2j&`05w9)!mV;lde7|d@j<~QFj z`j1exRg^JR*)=lO{;Kp`eudPXQ)0mArxVwGUi}l;a{d1ShG`X&6ILBXO*X>CUx=x)(U^kV@Hd#B_!!QdkU4<<7Uh& zLniSe=aB+~Kcu^foJUn9<#lsdrHZC-V)*hfK6skpMJm{Q6xXo3RX7l79ZgWXcCF$hi7it|8CMseZFQYOjf>pq7*59^igFXh%nz$l}bPM_S8l-M67`2~Gn gy+~Jee{^-JW$rF5fH-+_-+|!#zm5q^GsJQL0G_^u7ytkO