diff --git a/.github/workflows/player-perf.yml b/.github/workflows/player-perf.yml
index 937a60113..cee22abec 100644
--- a/.github/workflows/player-perf.yml
+++ b/.github/workflows/player-perf.yml
@@ -42,6 +42,15 @@ jobs:
- shard: load
scenarios: load
runs: "5"
+ - shard: fps
+ scenarios: fps
+ runs: "3"
+ - shard: scrub
+ scenarios: scrub
+ runs: "3"
+ - shard: drift
+ scenarios: drift
+ runs: "3"
steps:
- uses: actions/checkout@v4
diff --git a/packages/player/tests/perf/baseline.json b/packages/player/tests/perf/baseline.json
index 52211e710..dbe0e7466 100644
--- a/packages/player/tests/perf/baseline.json
+++ b/packages/player/tests/perf/baseline.json
@@ -1,7 +1,7 @@
{
"compLoadColdP95Ms": 2000,
"compLoadWarmP95Ms": 1000,
- "fpsMin": 55,
+ "compositionTimeAdvancementRatioMin": 0.95,
"scrubLatencyP95IsolatedMs": 80,
"scrubLatencyP95InlineMs": 33,
"driftMaxMs": 500,
diff --git a/packages/player/tests/perf/fixtures/10-video-grid/index.html b/packages/player/tests/perf/fixtures/10-video-grid/index.html
new file mode 100644
index 000000000..b4ec61add
--- /dev/null
+++ b/packages/player/tests/perf/fixtures/10-video-grid/index.html
@@ -0,0 +1,126 @@
+
+
+
+
+ perf fixture: 10-video-grid
+
+
+
+
+
+
+
+
+
diff --git a/packages/player/tests/perf/fixtures/10-video-grid/sample.mp4 b/packages/player/tests/perf/fixtures/10-video-grid/sample.mp4
new file mode 100644
index 000000000..1cf4df9e1
Binary files /dev/null and b/packages/player/tests/perf/fixtures/10-video-grid/sample.mp4 differ
diff --git a/packages/player/tests/perf/index.ts b/packages/player/tests/perf/index.ts
index c6942ae9d..e95f2b901 100644
--- a/packages/player/tests/perf/index.ts
+++ b/packages/player/tests/perf/index.ts
@@ -29,7 +29,10 @@ import { execFileSync } from "node:child_process";
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
+import { runFps } from "./scenarios/02-fps.ts";
import { runLoad } from "./scenarios/03-load.ts";
+import { runScrub } from "./scenarios/04-scrub.ts";
+import { runDrift } from "./scenarios/05-drift.ts";
import { reportAndGate, type GateMode, type GateResult, type Metric } from "./perf-gate.ts";
import { launchBrowser } from "./runner.ts";
import { startServer } from "./server.ts";
@@ -38,7 +41,42 @@ const HERE = dirname(fileURLToPath(import.meta.url));
const RESULTS_DIR = resolve(HERE, "results");
const RESULTS_FILE = resolve(RESULTS_DIR, "metrics.json");
-type ScenarioId = "load";
+type ScenarioId = "load" | "fps" | "scrub" | "drift";
+
+/**
+ * Per-scenario default `runs` value when the caller didn't pass `--runs`.
+ *
+ * Why `load` gets 5 runs and the others get 3:
+ *
+ * - `load` reports a single p95 over `runs` measurements, so each `run` is
+ * one sample. p95 over n=3 is mostly noise (the 95th percentile of three
+ * numbers is just `max`), so we bump it to 5. We considered 10 — but cold
+ * load is the slowest scenario in the shard (~2s × 5 runs × 2 fixtures =
+ * ~20s with disk cache cleared), and going to 10 would push the load shard
+ * past 30s of pure-measurement wall time per CI invocation.
+ * - `fps` aggregates as `min(ratio)` over runs — 3 runs gives us a worst-
+ * of-three signal, which is what we want for a floor metric. Adding more
+ * runs would only make the ratio strictly smaller (more chances to catch
+ * a stall) and shift the threshold toward false positives from runner
+ * contention rather than real regressions.
+ * - `scrub` and `drift` *pool* their per-run samples (10 seeks/run for
+ * scrub, ~1500 RVFC frames/run for drift) and compute the percentile over
+ * the pooled set. Their effective sample count for the percentile is
+ * `runs × samples_per_run`, not `runs`, so 3 runs already gives 30+ scrub
+ * samples and 4500+ drift samples per shard — well above the n≈30 rule of
+ * thumb for a stable p95.
+ *
+ * TODO(player-perf): revisit `fps: 3` once we have ~2 weeks of CI baseline
+ * data — if `min(ratio)` shows >5% inter-run variance attributable to runner
+ * jitter (not real player regressions), bump to 5 and tighten the
+ * `compositionTimeAdvancementRatioMin` baseline accordingly.
+ */
+const DEFAULT_RUNS: Record = {
+ load: 5,
+ fps: 3,
+ scrub: 3,
+ drift: 3,
+};
type ResultsFile = {
schemaVersion: 1;
@@ -88,7 +126,7 @@ function parseArgs(argv: string[]): ParsedArgs {
// `mode` is consumed (measure logs regressions but never fails; enforce
// exits non-zero on regression).
mode: (process.env.PLAYER_PERF_MODE as GateMode) === "enforce" ? "enforce" : "measure",
- scenarios: ["load"],
+ scenarios: ["load", "fps", "scrub", "drift"],
runs: null,
fixture: null,
headful: false,
@@ -150,7 +188,31 @@ async function main(): Promise {
const m = await runLoad({
browser,
origin: server.origin,
- runs: args.runs ?? 5,
+ runs: args.runs ?? DEFAULT_RUNS.load,
+ fixture: args.fixture,
+ });
+ metrics.push(...m);
+ } else if (scenario === "fps") {
+ const m = await runFps({
+ browser,
+ origin: server.origin,
+ runs: args.runs ?? DEFAULT_RUNS.fps,
+ fixture: args.fixture,
+ });
+ metrics.push(...m);
+ } else if (scenario === "scrub") {
+ const m = await runScrub({
+ browser,
+ origin: server.origin,
+ runs: args.runs ?? DEFAULT_RUNS.scrub,
+ fixture: args.fixture,
+ });
+ metrics.push(...m);
+ } else if (scenario === "drift") {
+ const m = await runDrift({
+ browser,
+ origin: server.origin,
+ runs: args.runs ?? DEFAULT_RUNS.drift,
fixture: args.fixture,
});
metrics.push(...m);
diff --git a/packages/player/tests/perf/perf-gate.ts b/packages/player/tests/perf/perf-gate.ts
index 60cf7f52e..86cd75ddf 100644
--- a/packages/player/tests/perf/perf-gate.ts
+++ b/packages/player/tests/perf/perf-gate.ts
@@ -31,7 +31,16 @@ export type Metric = {
export type PerfBaseline = {
compLoadColdP95Ms: number;
compLoadWarmP95Ms: number;
- fpsMin: number;
+ /**
+ * Floor on `(compositionTime advanced) / (wallClock elapsed)` over a sustained
+ * playback window — see packages/player/tests/perf/scenarios/02-fps.ts. A
+ * healthy player keeps up with its intended speed and reads ~1.0; values
+ * below 1.0 mean the composition clock fell behind real time, which is the
+ * actual user-visible jank we want to gate against. Refresh-rate independent
+ * by construction, so it does not saturate to display refresh on high-Hz
+ * runners the way the previous `fpsMin` did. Direction: higher-is-better.
+ */
+ compositionTimeAdvancementRatioMin: number;
scrubLatencyP95IsolatedMs: number;
scrubLatencyP95InlineMs: number;
driftMaxMs: number;
diff --git a/packages/player/tests/perf/scenarios/02-fps.ts b/packages/player/tests/perf/scenarios/02-fps.ts
new file mode 100644
index 000000000..ce04595bd
--- /dev/null
+++ b/packages/player/tests/perf/scenarios/02-fps.ts
@@ -0,0 +1,236 @@
+/**
+ * Scenario 02: sustained playback against the composition clock.
+ *
+ * Loads the 10-video-grid fixture, calls `player.play()`, then samples
+ * `__player.getTime()` at fixed wall-clock intervals for ~5 seconds. The
+ * emitted metric is the ratio of composition-time advanced to wall-clock
+ * elapsed:
+ *
+ * composition_time_advancement_ratio = (getTime(end) - getTime(start)) / wallSeconds
+ *
+ * This reads ~1.0 when the runtime is keeping up with its intended playback
+ * speed and falls below 1.0 when the player stalls — a slow video decoder, a
+ * blocked main thread, a GC pause, anything that prevents the composition
+ * clock from advancing at real-time. The metric is independent of the host
+ * display refresh rate by construction: both numerator and denominator are
+ * wall-clock timestamps, neither is a frame count, so a 60Hz, 120Hz, or 240Hz
+ * runner sees the same value for a healthy player.
+ *
+ * Why we replaced the previous rAF-based FPS metric:
+ * The original implementation counted `requestAnimationFrame` ticks per
+ * wall-clock second and asserted `fps >= 55`. On a 120Hz CI runner that
+ * reads ~120 fps regardless of whether the composition is actually
+ * advancing, so the gate passed even when the player was silently stalling.
+ * See PR #400 review (jrusso1020 + miguel-heygen) for the full discussion;
+ * this implementation follows jrusso1020's "first choice" recommendation.
+ *
+ * Per the proposal:
+ * Test 1: Playback frame rate (player-perf-fps)
+ * Load 10-video composition → play 5s → measure how well the player kept
+ * up with the composition clock.
+ *
+ * Methodology details:
+ * - We install the wall-clock sampler before calling `play()` so the very
+ * first post-play tick is captured. We then wait for `__player.isPlaying()`
+ * to flip true (the parent→iframe `play` message is async via postMessage)
+ * and *reset* the sample buffer, so the measurement window only contains
+ * samples taken while the runtime was actively playing the timeline.
+ * - Sampling cadence is 100ms (10 samples/sec). That's fine-grained enough
+ * to spot a half-second stall but coarse enough that the sampler itself
+ * has negligible overhead. With a 5s window we collect ~50 samples; the
+ * ratio is computed from the first and last sample's `getTime()` values.
+ * - We use `setInterval` (not rAF) on purpose: rAF cadence is the metric we
+ * are trying to *avoid* depending on. `setInterval` is wall-clock-driven.
+ *
+ * Outputs one metric:
+ * - composition_time_advancement_ratio_min
+ * (higher-is-better, baseline key compositionTimeAdvancementRatioMin)
+ *
+ * Aggregation: `min(ratio)` across runs because the proposal asserts a floor
+ * — the worst run is the one that gates against regressions.
+ */
+
+import type { Browser, Frame, Page } from "puppeteer-core";
+import { loadHostPage } from "../runner.ts";
+import type { Metric } from "../perf-gate.ts";
+
+export type FpsScenarioOpts = {
+ browser: Browser;
+ origin: string;
+ /** Number of measurement runs. */
+ runs: number;
+ /** If null, runs the default fixture (10-video-grid). */
+ fixture: string | null;
+};
+
+const DEFAULT_FIXTURE = "10-video-grid";
+const PLAYBACK_DURATION_MS = 5_000;
+const SAMPLE_INTERVAL_MS = 100;
+const PLAY_CONFIRM_TIMEOUT_MS = 5_000;
+const FRAME_LOOKUP_TIMEOUT_MS = 5_000;
+
+declare global {
+ interface Window {
+ /** (wallClockMs, compositionTimeSec) pairs collected by the sampler. */
+ __perfPlaySamples?: Array<{ wall: number; comp: number }>;
+ /** setInterval handle used by the sampler; cleared at the end of the window. */
+ __perfPlaySamplerHandle?: number;
+ /** Hyperframes runtime player API exposed inside the composition iframe. */
+ __player?: {
+ play: () => void;
+ pause: () => void;
+ seek: (timeSeconds: number) => void;
+ getTime: () => number;
+ getDuration: () => number;
+ isPlaying: () => boolean;
+ };
+ }
+}
+
+type RunResult = {
+ ratio: number;
+ compElapsedSec: number;
+ wallElapsedSec: number;
+ samples: number;
+};
+
+/**
+ * Find the iframe Puppeteer Frame that hosts the fixture composition. The
+ * `` shell wraps an iframe whose URL is derived from the
+ * player's `src` attribute, so we match by path substring rather than full URL.
+ */
+async function getFixtureFrame(page: Page, fixture: string): Promise {
+ const expected = `/fixtures/${fixture}/`;
+ const deadline = Date.now() + FRAME_LOOKUP_TIMEOUT_MS;
+ while (Date.now() < deadline) {
+ const frame = page.frames().find((f) => f.url().includes(expected));
+ if (frame) return frame;
+ await new Promise((r) => setTimeout(r, 50));
+ }
+ throw new Error(`[scenario:fps] fixture frame not found for "${fixture}" within timeout`);
+}
+
+async function runOnce(
+ opts: FpsScenarioOpts,
+ fixture: string,
+ idx: number,
+ total: number,
+): Promise {
+ const ctx = await opts.browser.createBrowserContext();
+ try {
+ const page = await ctx.newPage();
+ const { duration } = await loadHostPage(page, opts.origin, { fixture });
+ const frame = await getFixtureFrame(page, fixture);
+
+ // Install the wall-clock sampler in the iframe context. We use setInterval
+ // because rAF cadence is exactly the host-display-dependent signal we are
+ // trying NOT to depend on; setInterval is driven by the event loop and
+ // gives us samples at fixed wall-clock cadence regardless of refresh rate.
+ await frame.evaluate((sampleIntervalMs: number) => {
+ window.__perfPlaySamples = [];
+ window.__perfPlaySamplerHandle = window.setInterval(() => {
+ const comp = window.__player?.getTime?.();
+ if (typeof comp !== "number" || !Number.isFinite(comp)) return;
+ window.__perfPlaySamples!.push({
+ wall: performance.timeOrigin + performance.now(),
+ comp,
+ });
+ }, sampleIntervalMs);
+ }, SAMPLE_INTERVAL_MS);
+
+ // Issue play from the host page (parent of the iframe). The player's
+ // public `play()` posts a control message into the iframe.
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as (HTMLElement & { play: () => void }) | null;
+ if (!el) throw new Error("[scenario:fps] player element missing on host page");
+ el.play();
+ });
+
+ // Wait for the runtime to actually transition to playing — this is the
+ // signal that the postMessage round trip + timeline.play() finished.
+ await frame.waitForFunction(() => window.__player?.isPlaying?.() === true, {
+ timeout: PLAY_CONFIRM_TIMEOUT_MS,
+ });
+
+ // Reset samples now that playback is confirmed running. Anything captured
+ // before this point belongs to the ramp-up window (composition clock at
+ // 0, wall clock advancing) and would skew the ratio toward 0.
+ await frame.evaluate(() => {
+ window.__perfPlaySamples = [];
+ });
+
+ // Sustain playback for the measurement window.
+ await new Promise((r) => setTimeout(r, PLAYBACK_DURATION_MS));
+
+ // Stop the sampler and harvest the samples before pausing the runtime,
+ // so the pause command can't perturb the tail of the sample window.
+ const samples = (await frame.evaluate(() => {
+ if (window.__perfPlaySamplerHandle !== undefined) {
+ clearInterval(window.__perfPlaySamplerHandle);
+ window.__perfPlaySamplerHandle = undefined;
+ }
+ return window.__perfPlaySamples ?? [];
+ })) as Array<{ wall: number; comp: number }>;
+
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as (HTMLElement & { pause: () => void }) | null;
+ el?.pause();
+ });
+
+ if (samples.length < 2) {
+ throw new Error(
+ `[scenario:fps] run ${idx + 1}/${total}: only ${samples.length} composition-clock samples captured (composition duration ${duration}s)`,
+ );
+ }
+
+ const first = samples[0]!;
+ const last = samples[samples.length - 1]!;
+ const wallElapsedSec = (last.wall - first.wall) / 1000;
+ const compElapsedSec = last.comp - first.comp;
+ const ratio = wallElapsedSec > 0 ? compElapsedSec / wallElapsedSec : 0;
+
+ console.log(
+ `[scenario:fps] run[${idx + 1}/${total}] ratio=${ratio.toFixed(4)} compElapsed=${compElapsedSec.toFixed(3)}s wallElapsed=${wallElapsedSec.toFixed(3)}s samples=${samples.length}`,
+ );
+
+ await page.close();
+ return {
+ ratio,
+ compElapsedSec,
+ wallElapsedSec,
+ samples: samples.length,
+ };
+ } finally {
+ await ctx.close();
+ }
+}
+
+export async function runFps(opts: FpsScenarioOpts): Promise {
+ const fixture = opts.fixture ?? DEFAULT_FIXTURE;
+ const runs = Math.max(1, opts.runs);
+ console.log(
+ `[scenario:fps] fixture=${fixture} runs=${runs} window=${PLAYBACK_DURATION_MS}ms sampleInterval=${SAMPLE_INTERVAL_MS}ms`,
+ );
+
+ const ratios: number[] = [];
+ for (let i = 0; i < runs; i++) {
+ const result = await runOnce(opts, fixture, i, runs);
+ ratios.push(result.ratio);
+ }
+
+ // Worst run wins: the proposal asserts a floor on this ratio, so a single
+ // bad run (slow decoder, GC pause, host contention) is the one that gates.
+ const ratioMin = Math.min(...ratios);
+ console.log(`[scenario:fps] aggregate min ratio=${ratioMin.toFixed(4)} runs=${runs}`);
+
+ return [
+ {
+ name: "composition_time_advancement_ratio_min",
+ baselineKey: "compositionTimeAdvancementRatioMin",
+ value: ratioMin,
+ unit: "ratio",
+ direction: "higher-is-better",
+ samples: ratios,
+ },
+ ];
+}
diff --git a/packages/player/tests/perf/scenarios/04-scrub.ts b/packages/player/tests/perf/scenarios/04-scrub.ts
new file mode 100644
index 000000000..0c063c26f
--- /dev/null
+++ b/packages/player/tests/perf/scenarios/04-scrub.ts
@@ -0,0 +1,307 @@
+/**
+ * Scenario 04: scrub latency.
+ *
+ * Loads the 10-video-grid fixture, pauses the player, then issues 10 seek
+ * calls in sequence — first through the synchronous "inline" path, then
+ * through the postMessage-driven "isolated" path — and measures the wall-clock
+ * latency from each `seek()` call to the first paint where the iframe's
+ * timeline reports the new time.
+ *
+ * Per the proposal:
+ * Test 2: Scrub latency (player-perf-scrub)
+ * Load composition → seek to 10 positions in sequence → measure time
+ * from seek() call to state update callback
+ * Assert: p95 < 80ms (isolated), p95 < 33ms (inline, Phase 4+)
+ *
+ * Methodology details:
+ * - Both modes are measured in the same page load. Inline runs first so
+ * the isolated mode's monkey-patch (forcing `_trySyncSeek` to return
+ * false) doesn't bleed into the inline samples.
+ * - "Inline" mode is the default behavior of `` when the
+ * iframe is same-origin and exposes `__player.seek()` synchronously.
+ * `seek()` lands the new frame in the same task as the input event.
+ * - "Isolated" mode is forced by replacing the player element's
+ * `_trySyncSeek` method with `() => false`, which sends the player
+ * element through the postMessage bridge — exactly what cross-origin
+ * embeds and Phase 1 (pre-sync) builds did.
+ * - Detection is via a `requestAnimationFrame` watcher inside the iframe
+ * that polls `__player.getTime()` until it is within `MATCH_TOLERANCE_S`
+ * of the requested target. We use a tolerance because the postMessage
+ * bridge converts seconds → frame number → seconds, which can introduce
+ * sub-frame quantization drift even for targets on the canonical fps grid.
+ * - Timing uses `performance.timeOrigin + performance.now()` in both the
+ * host and iframe contexts. `timeOrigin` is consistent across same-process
+ * frames, so the difference is a true wall-clock measurement of latency.
+ * - Seek targets alternate forward/backward across the 10s composition so
+ * no two consecutive seeks land near each other; this avoids the rAF
+ * watcher matching against a stale `getTime()` value before the seek
+ * command is processed.
+ *
+ * Outputs two metrics:
+ * - scrub_latency_p95_inline_ms (lower-is-better, baseline scrubLatencyP95InlineMs)
+ * - scrub_latency_p95_isolated_ms (lower-is-better, baseline scrubLatencyP95IsolatedMs)
+ *
+ * Aggregation: percentile(95) is computed across the pooled per-seek
+ * latencies from every run. With 10 seeks per mode per run × 3 runs we get
+ * 30 samples per mode per CI shard, which is enough for a stable p95.
+ */
+
+import type { Browser, Frame, Page } from "puppeteer-core";
+import { loadHostPage, percentile } from "../runner.ts";
+import type { Metric } from "../perf-gate.ts";
+
+export type ScrubScenarioOpts = {
+ browser: Browser;
+ origin: string;
+ /** Number of measurement runs. */
+ runs: number;
+ /** If null, runs the default fixture (10-video-grid). */
+ fixture: string | null;
+};
+
+const DEFAULT_FIXTURE = "10-video-grid";
+/** Targets are seconds within the composition (10s duration). */
+const SEEK_TARGETS: readonly number[] = [1.0, 7.0, 2.0, 8.0, 3.0, 9.0, 4.0, 6.0, 5.0, 0.5];
+/**
+ * Tolerance window the rAF watcher uses to decide that the iframe's reported
+ * `__player.getTime()` matches the requested seek target. 50ms = 1.5 frames at
+ * 30fps, which absorbs three sources of expected slippage:
+ *
+ * 1. **Frame quantization on the postMessage path.** `_sendControl("seek")`
+ * converts seconds → integer frame number → seconds inside the runtime,
+ * so e.g. a target of 1.0s on a 30fps composition lands at frame 30 →
+ * 1.000s exactly, but a target of 1.005s lands at frame 30 → still
+ * 1.000s, a 5ms quantization error baked into the API itself.
+ * 2. **Sub-frame intra-clip clock advance.** Even with the iframe paused,
+ * between the `seek()` call landing and the next rAF tick, the runtime
+ * may have already nudged time by a fraction of a frame as part of
+ * finalizing the seek; `getTime()` reports the post-finalize value.
+ * 3. **Variable host load + browser jitter on CI.** GitHub runners share
+ * cores, so a noisy neighbor can delay the rAF tick that would otherwise
+ * register the match by tens of ms. Picking a tolerance much tighter
+ * than this would gate against runner contention rather than player
+ * regressions.
+ *
+ * The metric this scenario asserts is *latency to user-visible match*, not
+ * *exact equality of the reported time*, so a 50ms acceptance window is the
+ * intended behavior — but if we ever want to tighten this (e.g. to assert
+ * sub-frame precision on the inline path now that PR #397 documented it),
+ * this is the knob to turn. Configurability is deliberately deferred until
+ * we have a concrete second use case; YAGNI.
+ *
+ * TODO(player-perf): revisit this constant after P0-1b lands and we have ~2
+ * weeks of CI baseline data — if the inline-mode samples consistently cluster
+ * well below 50ms, drop this to e.g. 16ms (1 frame @ 60fps) and split the
+ * tolerance per mode (tighter for inline, current for isolated).
+ */
+const MATCH_TOLERANCE_S = 0.05;
+/** Per-seek timeout; isolated p95 in the proposal is 80ms, so 1s is huge headroom. */
+const SEEK_TIMEOUT_MS = 1_000;
+const PAUSE_CONFIRM_TIMEOUT_MS = 5_000;
+const FRAME_LOOKUP_TIMEOUT_MS = 5_000;
+
+declare global {
+ interface Window {
+ /** Promise resolved by the iframe rAF watcher with the wall-clock t1 of the matching paint. */
+ __perfScrubAwait?: Promise;
+ __player?: {
+ play: () => void;
+ pause: () => void;
+ seek: (timeSeconds: number) => void;
+ getTime: () => number;
+ getDuration: () => number;
+ isPlaying: () => boolean;
+ };
+ }
+}
+
+type Mode = "inline" | "isolated";
+
+type RunResult = {
+ inlineLatencies: number[];
+ isolatedLatencies: number[];
+};
+
+/**
+ * Find the iframe Puppeteer Frame that hosts the fixture composition. Same
+ * helper as 02-fps.ts; duplicated locally so each scenario file is
+ * self-contained.
+ */
+async function getFixtureFrame(page: Page, fixture: string): Promise {
+ const expected = `/fixtures/${fixture}/`;
+ const deadline = Date.now() + FRAME_LOOKUP_TIMEOUT_MS;
+ while (Date.now() < deadline) {
+ const frame = page.frames().find((f) => f.url().includes(expected));
+ if (frame) return frame;
+ await new Promise((r) => setTimeout(r, 50));
+ }
+ throw new Error(`[scenario:scrub] fixture frame not found for "${fixture}" within timeout`);
+}
+
+/**
+ * Measure a single seek's latency.
+ *
+ * Sequence:
+ * 1. Install a rAF watcher in the iframe that resolves with the wall-clock
+ * timestamp of the first paint where `__player.getTime()` is within
+ * tolerance of `target`. Promise is stashed on `window.__perfScrubAwait`.
+ * 2. Capture host wall-clock t0 and call `el.seek(target)` in the same task.
+ * 3. Await the iframe's resolved Promise (returns t1).
+ * 4. Latency = t1 - t0 (ms).
+ */
+async function measureSingleSeek(page: Page, frame: Frame, target: number): Promise {
+ await frame.evaluate(
+ (target: number, tolerance: number, timeoutMs: number) => {
+ window.__perfScrubAwait = new Promise((resolve, reject) => {
+ const deadlineWall = performance.timeOrigin + performance.now() + timeoutMs;
+ const tick = () => {
+ const wall = performance.timeOrigin + performance.now();
+ const time = window.__player?.getTime?.() ?? Number.NaN;
+ if (Number.isFinite(time) && Math.abs(time - target) < tolerance) {
+ resolve(wall);
+ return;
+ }
+ if (wall > deadlineWall) {
+ reject(new Error(`[scrub] timeout target=${target} last=${time}`));
+ return;
+ }
+ requestAnimationFrame(tick);
+ };
+ requestAnimationFrame(tick);
+ });
+ },
+ target,
+ MATCH_TOLERANCE_S,
+ SEEK_TIMEOUT_MS,
+ );
+
+ const t0Wall = await page.evaluate((targetSeconds: number) => {
+ const el = document.getElementById("player") as
+ | (HTMLElement & { seek: (t: number) => void })
+ | null;
+ if (!el) throw new Error("[scenario:scrub] player element missing on host page");
+ const wall = performance.timeOrigin + performance.now();
+ el.seek(targetSeconds);
+ return wall;
+ }, target);
+
+ // Puppeteer awaits the Promise we stashed on window and returns its resolved value.
+ const t1Wall = (await frame.evaluate(() => window.__perfScrubAwait as Promise)) as number;
+
+ return t1Wall - t0Wall;
+}
+
+async function runScrubBatch(
+ page: Page,
+ frame: Frame,
+ mode: Mode,
+ idx: number,
+ total: number,
+): Promise {
+ const latencies: number[] = [];
+ for (const target of SEEK_TARGETS) {
+ const latency = await measureSingleSeek(page, frame, target);
+ latencies.push(latency);
+ }
+ const p95 = percentile(latencies, 95);
+ console.log(
+ `[scenario:scrub] run[${idx + 1}/${total}] mode=${mode} p95=${p95.toFixed(2)}ms n=${latencies.length}`,
+ );
+ return latencies;
+}
+
+async function runOnce(
+ opts: ScrubScenarioOpts,
+ fixture: string,
+ idx: number,
+ total: number,
+): Promise {
+ const ctx = await opts.browser.createBrowserContext();
+ try {
+ const page = await ctx.newPage();
+ const { duration } = await loadHostPage(page, opts.origin, { fixture });
+ const requiredDuration = Math.max(...SEEK_TARGETS);
+ if (duration < requiredDuration) {
+ throw new Error(
+ `[scenario:scrub] fixture composition is ${duration.toFixed(2)}s but scrub targets require >= ${requiredDuration}s`,
+ );
+ }
+ const frame = await getFixtureFrame(page, fixture);
+
+ // Defensively pause: the host shell doesn't autoplay, but `pause()` also
+ // cancels any pending autoplay-on-ready behavior and guarantees the
+ // timeline isn't ticking under our seek measurements.
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as (HTMLElement & { pause?: () => void }) | null;
+ el?.pause?.();
+ });
+ await frame.waitForFunction(() => window.__player?.isPlaying?.() === false, {
+ timeout: PAUSE_CONFIRM_TIMEOUT_MS,
+ });
+
+ // Inline mode first — the player's default `_trySyncSeek` path lands the
+ // seek synchronously when the iframe is same-origin (which it is here).
+ const inlineLatencies = await runScrubBatch(page, frame, "inline", idx, total);
+
+ // Force isolated mode by shadowing `_trySyncSeek` on the instance with
+ // a function that always reports failure. The fallback in `seek()` then
+ // sends the seek through `_sendControl("seek", { frame })`, which is the
+ // same path a cross-origin embed (or a Phase 1 build without sync seek)
+ // would take.
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as
+ | (HTMLElement & { _trySyncSeek?: (t: number) => boolean })
+ | null;
+ if (!el) throw new Error("[scenario:scrub] player element missing on host page");
+ el._trySyncSeek = () => false;
+ });
+
+ const isolatedLatencies = await runScrubBatch(page, frame, "isolated", idx, total);
+
+ await page.close();
+ return { inlineLatencies, isolatedLatencies };
+ } finally {
+ await ctx.close();
+ }
+}
+
+export async function runScrub(opts: ScrubScenarioOpts): Promise {
+ const fixture = opts.fixture ?? DEFAULT_FIXTURE;
+ const runs = Math.max(1, opts.runs);
+ console.log(
+ `[scenario:scrub] fixture=${fixture} runs=${runs} seeks_per_mode=${SEEK_TARGETS.length} tolerance=${(MATCH_TOLERANCE_S * 1000).toFixed(0)}ms`,
+ );
+
+ const allInline: number[] = [];
+ const allIsolated: number[] = [];
+ for (let i = 0; i < runs; i++) {
+ const result = await runOnce(opts, fixture, i, runs);
+ allInline.push(...result.inlineLatencies);
+ allIsolated.push(...result.isolatedLatencies);
+ }
+
+ const inlineP95 = percentile(allInline, 95);
+ const isolatedP95 = percentile(allIsolated, 95);
+ console.log(
+ `[scenario:scrub] aggregate inline_p95=${inlineP95.toFixed(2)}ms isolated_p95=${isolatedP95.toFixed(2)}ms (runs=${runs} samples_per_mode=${allInline.length})`,
+ );
+
+ return [
+ {
+ name: "scrub_latency_p95_inline_ms",
+ baselineKey: "scrubLatencyP95InlineMs",
+ value: inlineP95,
+ unit: "ms",
+ direction: "lower-is-better",
+ samples: allInline,
+ },
+ {
+ name: "scrub_latency_p95_isolated_ms",
+ baselineKey: "scrubLatencyP95IsolatedMs",
+ value: isolatedP95,
+ unit: "ms",
+ direction: "lower-is-better",
+ samples: allIsolated,
+ },
+ ];
+}
diff --git a/packages/player/tests/perf/scenarios/05-drift.ts b/packages/player/tests/perf/scenarios/05-drift.ts
new file mode 100644
index 000000000..1fdd64a5c
--- /dev/null
+++ b/packages/player/tests/perf/scenarios/05-drift.ts
@@ -0,0 +1,307 @@
+/**
+ * Scenario 05: media sync drift.
+ *
+ * Loads the 10-video-grid fixture, starts playback, and uses
+ * `requestVideoFrameCallback` on every video element to record
+ * (compositionTime, actualMediaTime) pairs for each decoded frame. Drift is
+ * the absolute difference between the *expected* media time (derived from the
+ * composition time using the runtime's clip transform) and the actual media
+ * time the decoder presented to the compositor.
+ *
+ * Per the proposal:
+ * Test 4: Media sync drift (player-perf-drift)
+ * Load 5-video composition → play for 10 seconds → on each RVFC callback,
+ * record drift between expected and actual media time
+ * Assert: max drift < 500ms, p95 drift < 100ms
+ *
+ * Methodology details:
+ * - We instrument *every* `video[data-start]` element in the fixture. The
+ * proposal called for 5 videos; the 10-video-grid gives us 10 streams in
+ * the same composition, which is a more conservative regression signal.
+ * - The expected media time uses the same transform the runtime applies in
+ * packages/core/src/runtime/media.ts:
+ *
+ * expectedMediaTime = (compositionTime - clip.start) * clip.playbackRate
+ * + clip.mediaStart
+ *
+ * We snapshot `clip.start` / `clip.mediaStart` / `clip.playbackRate` from
+ * each element's dataset + `defaultPlaybackRate` once when the sampler is
+ * installed, so the per-frame work is just a subtract + multiply + abs.
+ * - The runtime's media sync runs on a 50ms `setInterval`. Between syncs the
+ * video element's clock free-runs. The drift we measure here is the
+ * residual after that 50ms loop catches up — i.e. the user-visible glitch
+ * budget. The runtime hard-resyncs when |currentTime - relTime| > 0.5s
+ * (see media.ts), which is exactly the proposal's max-drift ceiling: a
+ * regression past 500ms means the corrective resync kicked in and the
+ * viewer saw a jump.
+ * - We install RVFC *before* calling play(), then reset the sample buffer
+ * once `__player.isPlaying()` flips true. Frames captured during the
+ * postMessage round-trip would compare a non-zero mediaTime against
+ * `getTime() === 0` and inflate drift to several hundred ms — same gotcha
+ * as 02-fps.ts.
+ * - Sustain window is 6s instead of the proposal's 10s because the fixture
+ * composition is exactly 10s long, and we want headroom before the
+ * end-of-timeline pause/clamp behavior. With 10 videos × ~25fps × 6s we
+ * still pool ~1500 samples per run, more than enough for a stable p95.
+ *
+ * Outputs two metrics:
+ * - media_drift_max_ms (lower-is-better, baseline driftMaxMs)
+ * - media_drift_p95_ms (lower-is-better, baseline driftP95Ms)
+ *
+ * Aggregation: max() and percentile(95) across the pooled per-frame drifts
+ * from every video in every run.
+ */
+
+import type { Browser, Frame, Page } from "puppeteer-core";
+import { loadHostPage, percentile } from "../runner.ts";
+import type { Metric } from "../perf-gate.ts";
+
+export type DriftScenarioOpts = {
+ browser: Browser;
+ origin: string;
+ /** Number of measurement runs. */
+ runs: number;
+ /** If null, runs the default fixture (10-video-grid). */
+ fixture: string | null;
+};
+
+const DEFAULT_FIXTURE = "10-video-grid";
+const PLAYBACK_DURATION_MS = 6_000;
+const PLAY_CONFIRM_TIMEOUT_MS = 5_000;
+const FRAME_LOOKUP_TIMEOUT_MS = 5_000;
+
+type DriftSample = {
+ compTime: number;
+ actualMediaTime: number;
+ clipStart: number;
+ clipMediaStart: number;
+ clipPlaybackRate: number;
+};
+
+declare global {
+ interface Window {
+ /** RVFC samples collected by the iframe-side observer. */
+ __perfDriftSamples?: DriftSample[];
+ /** Set to false to stop sampling at the end of the measurement window. */
+ __perfDriftActive?: boolean;
+ __player?: {
+ play: () => void;
+ pause: () => void;
+ seek: (timeSeconds: number) => void;
+ getTime: () => number;
+ getDuration: () => number;
+ isPlaying: () => boolean;
+ };
+ }
+}
+
+type RunResult = {
+ drifts: number[];
+ videoCount: number;
+};
+
+/**
+ * Find the iframe Puppeteer Frame that hosts the fixture composition. Same
+ * helper as the other scenarios; duplicated locally so each scenario file is
+ * self-contained.
+ */
+async function getFixtureFrame(page: Page, fixture: string): Promise {
+ const expected = `/fixtures/${fixture}/`;
+ const deadline = Date.now() + FRAME_LOOKUP_TIMEOUT_MS;
+ while (Date.now() < deadline) {
+ const frame = page.frames().find((f) => f.url().includes(expected));
+ if (frame) return frame;
+ await new Promise((r) => setTimeout(r, 50));
+ }
+ throw new Error(`[scenario:drift] fixture frame not found for "${fixture}" within timeout`);
+}
+
+async function runOnce(
+ opts: DriftScenarioOpts,
+ fixture: string,
+ idx: number,
+ total: number,
+): Promise {
+ const ctx = await opts.browser.createBrowserContext();
+ try {
+ const page = await ctx.newPage();
+ const { duration } = await loadHostPage(page, opts.origin, { fixture });
+ const requiredDurationSec = PLAYBACK_DURATION_MS / 1000;
+ if (duration < requiredDurationSec) {
+ throw new Error(
+ `[scenario:drift] fixture composition is ${duration.toFixed(2)}s but drift sample window needs >= ${requiredDurationSec.toFixed(0)}s`,
+ );
+ }
+ const frame = await getFixtureFrame(page, fixture);
+
+ // Install RVFC on every `video[data-start]` element in the iframe. Each
+ // callback records the wall-clock-aligned (compositionTime, mediaTime)
+ // pair plus a snapshot of the clip transform so we can compute drift in
+ // node without re-querying the dataset on every frame.
+ const videoCount = (await frame.evaluate(() => {
+ window.__perfDriftSamples = [];
+ window.__perfDriftActive = true;
+ const videos = Array.from(document.querySelectorAll("video[data-start]"));
+ type RvfcMetadata = { mediaTime: number; presentationTime: number };
+ type RvfcVideo = HTMLVideoElement & {
+ requestVideoFrameCallback?: (
+ cb: (now: DOMHighResTimeStamp, metadata: RvfcMetadata) => void,
+ ) => number;
+ };
+ let installed = 0;
+ for (const video of videos) {
+ const rvfcVideo = video as RvfcVideo;
+ const rvfc = rvfcVideo.requestVideoFrameCallback;
+ // Headless Chrome supports RVFC; bail quietly on browsers that don't.
+ if (!rvfc) continue;
+ const clipStart = Number.parseFloat(video.dataset.start ?? "0") || 0;
+ const clipMediaStart =
+ Number.parseFloat(video.dataset.playbackStart ?? video.dataset.mediaStart ?? "0") || 0;
+ const rawRate = video.defaultPlaybackRate;
+ const clipPlaybackRate =
+ Number.isFinite(rawRate) && rawRate > 0 ? Math.max(0.1, Math.min(5, rawRate)) : 1;
+ const tick = (_now: DOMHighResTimeStamp, metadata: RvfcMetadata) => {
+ if (!window.__perfDriftActive) return;
+ const compTime = window.__player?.getTime?.() ?? Number.NaN;
+ if (Number.isFinite(compTime)) {
+ window.__perfDriftSamples!.push({
+ compTime,
+ actualMediaTime: metadata.mediaTime,
+ clipStart,
+ clipMediaStart,
+ clipPlaybackRate,
+ });
+ }
+ rvfc.call(video, tick);
+ };
+ rvfc.call(video, tick);
+ installed++;
+ }
+ return installed;
+ })) as number;
+
+ if (videoCount === 0) {
+ throw new Error(`[scenario:drift] fixture ${fixture} contains no video[data-start] elements`);
+ }
+
+ // Issue play from the host page; the player posts a control message into
+ // the iframe and the runtime starts the 50ms media sync poll.
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as (HTMLElement & { play: () => void }) | null;
+ if (!el) throw new Error("[scenario:drift] player element missing on host page");
+ el.play();
+ });
+
+ // Wait for the runtime to confirm playing before we trust the samples.
+ await frame.waitForFunction(() => window.__player?.isPlaying?.() === true, {
+ timeout: PLAY_CONFIRM_TIMEOUT_MS,
+ });
+
+ // Reset the buffer now that playback is live. Anything captured during
+ // the postMessage round-trip would compare a non-zero mediaTime against
+ // `getTime() === 0` and bias drift up by hundreds of ms.
+ await frame.evaluate(() => {
+ window.__perfDriftSamples = [];
+ });
+
+ await new Promise((r) => setTimeout(r, PLAYBACK_DURATION_MS));
+
+ // Stop sampling first, then pause. Same ordering as 02-fps.ts so the
+ // pause command can't perturb the tail of the measurement window.
+ const samples = (await frame.evaluate(() => {
+ window.__perfDriftActive = false;
+ return window.__perfDriftSamples ?? [];
+ })) as DriftSample[];
+
+ await page.evaluate(() => {
+ const el = document.getElementById("player") as (HTMLElement & { pause: () => void }) | null;
+ el?.pause();
+ });
+
+ if (samples.length === 0) {
+ throw new Error(
+ `[scenario:drift] run ${idx + 1}/${total}: zero RVFC samples captured (videos=${videoCount}, duration=${duration.toFixed(2)}s)`,
+ );
+ }
+
+ // Apply the runtime's transform to derive the expected media time, then
+ // compare against the actual media time the decoder presented. Convert
+ // to ms here so the gate threshold (driftMaxMs / driftP95Ms) compares
+ // apples-to-apples.
+ const drifts: number[] = [];
+ for (const s of samples) {
+ const expectedMediaTime = (s.compTime - s.clipStart) * s.clipPlaybackRate + s.clipMediaStart;
+ const driftMs = Math.abs(s.actualMediaTime - expectedMediaTime) * 1000;
+ drifts.push(driftMs);
+ }
+
+ const max = Math.max(...drifts);
+ const p95 = percentile(drifts, 95);
+ console.log(
+ `[scenario:drift] run[${idx + 1}/${total}] max=${max.toFixed(2)}ms p95=${p95.toFixed(2)}ms videos=${videoCount} samples=${samples.length}`,
+ );
+
+ await page.close();
+ return { drifts, videoCount };
+ } finally {
+ await ctx.close();
+ }
+}
+
+export async function runDrift(opts: DriftScenarioOpts): Promise {
+ const fixture = opts.fixture ?? DEFAULT_FIXTURE;
+ const runs = Math.max(1, opts.runs);
+ console.log(`[scenario:drift] fixture=${fixture} runs=${runs} window=${PLAYBACK_DURATION_MS}ms`);
+
+ const allDrifts: number[] = [];
+ let lastVideoCount = 0;
+ for (let i = 0; i < runs; i++) {
+ const result = await runOnce(opts, fixture, i, runs);
+ allDrifts.push(...result.drifts);
+ lastVideoCount = result.videoCount;
+ }
+
+ // Worst case wins for max; p95 is computed across the pooled per-frame
+ // drifts from every video in every run. The proposal asserts max < 500ms
+ // and p95 < 100ms, so a single bad sample legitimately gates the build.
+ const maxDrift = Math.max(...allDrifts);
+ const p95Drift = percentile(allDrifts, 95);
+ // Coefficient of variation (stddev / mean) is logged here as a soft signal
+ // we can eyeball in CI output. We deliberately do NOT gate on it — the
+ // baseline asserts absolute thresholds (max, p95), and the underlying
+ // distribution is heavy-tailed (most frames are sub-50ms, occasional ones
+ // spike during the 50ms media-sync interval). But CV is a useful early
+ // warning: if it climbs significantly across CI runs while max + p95 stay
+ // green, our jitter assumptions about the runtime's resync loop have
+ // shifted (e.g. if media.ts changes its 50ms `setInterval` cadence) and
+ // we should revisit the baselines before they start producing flakes.
+ // TODO(player-perf): once we have ~2 weeks of CI baseline data, decide
+ // whether to publish CV as a tracked-but-ungated metric in baseline.json
+ // alongside max + p95, or wire it into the Slack regression report.
+ const meanDrift = allDrifts.reduce((a, b) => a + b, 0) / allDrifts.length;
+ const variance = allDrifts.reduce((acc, d) => acc + (d - meanDrift) ** 2, 0) / allDrifts.length;
+ const stddev = Math.sqrt(variance);
+ const cv = meanDrift > 0 ? stddev / meanDrift : 0;
+ console.log(
+ `[scenario:drift] aggregate max=${maxDrift.toFixed(2)}ms p95=${p95Drift.toFixed(2)}ms mean=${meanDrift.toFixed(2)}ms cv=${cv.toFixed(3)} videos=${lastVideoCount} samples=${allDrifts.length} runs=${runs}`,
+ );
+
+ return [
+ {
+ name: "media_drift_max_ms",
+ baselineKey: "driftMaxMs",
+ value: maxDrift,
+ unit: "ms",
+ direction: "lower-is-better",
+ samples: allDrifts,
+ },
+ {
+ name: "media_drift_p95_ms",
+ baselineKey: "driftP95Ms",
+ value: p95Drift,
+ unit: "ms",
+ direction: "lower-is-better",
+ samples: allDrifts,
+ },
+ ];
+}