Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions support_code/behavior_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,18 @@ def aggregate_data_by_bin_size(
grouped = data.groupby("MouseID")
filtered_data = pd.concat([group.iloc[:bin_size] for _, group in grouped])

# Extract latency values before summing. agg with a positional lambda
# preserves NaN (unlike first()/last() which skip NaN), returns a Series
# indexed by MouseID for correct alignment with aggregated.
latency_first_col = f"{behavior}_latency_to_first_prediction"
latency_last_col = f"{behavior}_latency_to_last_prediction"
latency_first = filtered_data.groupby("MouseID")[latency_first_col].agg(
lambda s: s.iloc[0]
)
latency_last = filtered_data.groupby("MouseID")[latency_last_col].agg(
lambda s: s.iloc[-1]
)

# Aggregate numeric columns by summing them
numeric_cols = filtered_data.select_dtypes(include=["number"]).columns
aggregated = filtered_data.groupby("MouseID")[numeric_cols].sum()
Expand Down Expand Up @@ -181,10 +193,10 @@ def aggregate_data_by_bin_size(
# TODO: var and std need to be aggregated across bins.
# This is non-trivial because of the partial bouts and their associated weights.
aggregated[f"bin_first_{bin_size * 5}.{behavior}_latency_first_prediction"] = (
aggregated[f"{behavior}_latency_to_first_prediction"].head(1)
latency_first
)
aggregated[f"bin_last_{bin_size * 5}.{behavior}_latency_last_prediction"] = (
aggregated[f"{behavior}_latency_to_last_prediction"].tail(1)
latency_last
)

# Reset index to make MouseID a regular column
Expand Down
Empty file added tests/support_code/__init__.py
Empty file.
132 changes: 132 additions & 0 deletions tests/support_code/test_behavior_summaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Unit tests for support_code/behavior_summaries.py."""

import math
import sys
from pathlib import Path

import pandas as pd
import pytest

# behavior_summaries.py lives in support_code/, which is not a package.
# Add it to sys.path so we can import it directly.
sys.path.insert(0, str(Path(__file__).parents[2] / "support_code"))

import behavior_summaries # noqa: E402


BEHAVIOR = "Jumping"


def _make_filtered_data(
latency_first_values: list,
latency_last_values: list,
mouse_id: str = "mouse_A",
) -> pd.DataFrame:
"""Build a minimal per-bin DataFrame matching the shape expected by aggregate_data_by_bin_size."""
n = len(latency_first_values)
return pd.DataFrame(
{
"MouseID": [mouse_id] * n,
f"{BEHAVIOR}_latency_to_first_prediction": latency_first_values,
f"{BEHAVIOR}_latency_to_last_prediction": latency_last_values,
f"{BEHAVIOR}_time_behavior": [100.0] * n,
f"{BEHAVIOR}_time_not_behavior": [200.0] * n,
f"{BEHAVIOR}_behavior_dist": [50.0] * n,
f"{BEHAVIOR}_behavior_dist_threshold": [10.0] * n,
f"{BEHAVIOR}_behavior_dist_seg": [5.0] * n,
f"{BEHAVIOR}_bout_behavior": [2] * n,
f"{BEHAVIOR}_avg_bout_duration": [1.5] * n,
f"{BEHAVIOR}__stats_sample_count": [2] * n,
f"{BEHAVIOR}_bout_duration_std": [0.1] * n,
f"{BEHAVIOR}_bout_duration_var": [0.01] * n,
}
)


class TestLatencyFirstPrediction:
def test_returns_first_bin_value_when_present(self):
"""latency_first should be the first bin's value, not a cumulative sum."""
data = _make_filtered_data(
latency_first_values=[2506.0, 9412.0, 18082.0, float("nan")],
latency_last_values=[4900.0, 11000.0, 19000.0, float("nan")],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=4, behavior=BEHAVIOR)
col = f"bin_first_20.{BEHAVIOR}_latency_first_prediction"
assert result[col].iloc[0] == pytest.approx(2506.0)

def test_returns_nan_when_first_bin_has_no_behavior(self):
"""latency_first should be NaN when the first bin has no behavior, not a later bin's value."""
data = _make_filtered_data(
latency_first_values=[float("nan"), 5000.0, 12000.0, float("nan")],
latency_last_values=[float("nan"), 8000.0, 15000.0, float("nan")],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=4, behavior=BEHAVIOR)
col = f"bin_first_20.{BEHAVIOR}_latency_first_prediction"
assert math.isnan(result[col].iloc[0])

def test_single_bin_returns_that_bins_value(self):
data = _make_filtered_data(
latency_first_values=[2506.0],
latency_last_values=[4900.0],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=1, behavior=BEHAVIOR)
col = f"bin_first_5.{BEHAVIOR}_latency_first_prediction"
assert result[col].iloc[0] == pytest.approx(2506.0)


class TestLatencyLastPrediction:
def test_returns_last_bin_value_when_present(self):
"""latency_last should be the last bin's value, not a cumulative sum."""
data = _make_filtered_data(
latency_first_values=[2506.0, 9412.0, 18082.0, 38222.0],
latency_last_values=[4900.0, 11000.0, 19000.0, 45000.0],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=4, behavior=BEHAVIOR)
col = f"bin_last_20.{BEHAVIOR}_latency_last_prediction"
assert result[col].iloc[0] == pytest.approx(45000.0)

def test_returns_nan_when_last_bin_has_no_behavior(self):
"""latency_last should be NaN when the last bin has no behavior, not a previous bin's value."""
data = _make_filtered_data(
latency_first_values=[float("nan"), 5000.0, 12000.0, float("nan")],
latency_last_values=[float("nan"), 8000.0, 15000.0, float("nan")],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=4, behavior=BEHAVIOR)
col = f"bin_last_20.{BEHAVIOR}_latency_last_prediction"
assert math.isnan(result[col].iloc[0])

def test_single_bin_returns_that_bins_value(self):
data = _make_filtered_data(
latency_first_values=[2506.0],
latency_last_values=[4900.0],
)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=1, behavior=BEHAVIOR)
col = f"bin_last_5.{BEHAVIOR}_latency_last_prediction"
assert result[col].iloc[0] == pytest.approx(4900.0)


class TestMultiMouseAlignment:
def test_each_mouse_gets_its_own_first_latency(self):
"""With multiple mice, each should receive their own first-bin latency value."""
mouse_a = _make_filtered_data(
latency_first_values=[2506.0, 9412.0],
latency_last_values=[4900.0, 11000.0],
mouse_id="mouse_A",
)
mouse_b = _make_filtered_data(
latency_first_values=[float("nan"), 5000.0],
latency_last_values=[float("nan"), 8000.0],
mouse_id="mouse_B",
)
data = pd.concat([mouse_a, mouse_b], ignore_index=True)
result = behavior_summaries.aggregate_data_by_bin_size(data, bin_size=2, behavior=BEHAVIOR)
result = result.set_index("MouseID")

first_col = f"bin_first_10.{BEHAVIOR}_latency_first_prediction"
last_col = f"bin_last_10.{BEHAVIOR}_latency_last_prediction"

assert result.loc["mouse_A", first_col] == pytest.approx(2506.0)
assert math.isnan(result.loc["mouse_B", first_col])

assert result.loc["mouse_A", last_col] == pytest.approx(11000.0)
assert result.loc["mouse_B", last_col] == pytest.approx(8000.0)
Loading