modelcontextprotocol · ochafik · Dec 13, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -30,3 +30,32 @@
       - run: npm test
 
       - run: npm run prettier
+
+  e2e:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - run: npm install
+
+      - name: Install Playwright browsers
+        run: npx playwright install --with-deps chromium
+
+      - name: Run E2E tests
+        run: npm run test:e2e
+
+      - name: Upload Playwright report
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: playwright-report
+          path: playwright-report/
+          retention-days: 7
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,8 @@ bun.lockb
 .vscode/
 docs/api/
 tmp/
+intermediate-findings/
+
+# Playwright
+playwright-report/
+test-results/
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -35,6 +35,9 @@
     "prepack": "npm run build",
     "build:all": "npm run build && npm run examples:build",
     "test": "bun test",
+    "test:e2e": "playwright test",
+    "test:e2e:update": "playwright test --update-snapshots",
+    "test:e2e:ui": "playwright test --ui",
     "examples:build": "bun examples/run-all.ts build",
     "examples:start": "NODE_ENV=development npm run build && bun examples/run-all.ts start",
     "examples:dev": "NODE_ENV=development bun examples/run-all.ts dev",
@@ -48,10 +51,11 @@
   },
   "author": "Olivier Chafik",
   "devDependencies": {
+    "@playwright/test": "^1.52.0",
     "@types/bun": "^1.3.2",
-    "bun": "^1.3.2",
     "@types/react": "^19.2.2",
     "@types/react-dom": "^19.2.2",
+    "bun": "^1.3.2",
     "concurrently": "^9.2.1",
     "cors": "^2.8.5",
     "esbuild": "^0.25.12",
@@ -71,8 +75,8 @@
   "optionalDependencies": {
     "@rollup/rollup-darwin-arm64": "^4.53.3",
     "@rollup/rollup-darwin-x64": "^4.53.3",
-    "@rollup/rollup-linux-x64-gnu": "^4.53.3",
     "@rollup/rollup-linux-arm64-gnu": "^4.53.3",
+    "@rollup/rollup-linux-x64-gnu": "^4.53.3",
     "@rollup/rollup-win32-x64-msvc": "^4.53.3"
   }
 }
diff --git a/playwright.config.ts b/playwright.config.ts
@@ -0,0 +1,43 @@
+import { defineConfig, devices } from "@playwright/test";
+
+export default defineConfig({
+  testDir: "./tests/e2e",
+  fullyParallel: false, // Run tests sequentially to share server
+  forbidOnly: !!process.env.CI,
+  retries: process.env.CI ? 2 : 0,
+  workers: 1, // Single worker since we share the server
+  reporter: "html",
+  use: {
+    baseURL: "http://localhost:8080",
+    trace: "on-first-retry",
+    screenshot: "only-on-failure",
+  },
+  projects: [
+    {
+      name: "chromium",
+      use: {
+        ...devices["Desktop Chrome"],
+        launchOptions: {
+          // Use system Chrome on macOS for stability, default chromium in CI
+          ...(process.platform === "darwin" ? { channel: "chrome" } : {}),
+        },
+      },
+    },
+  ],
+  // Run examples server before tests
+  webServer: {
+    command: "npm run examples:start",
+    url: "http://localhost:8080",
+    reuseExistingServer: !process.env.CI,
+    timeout: 120000,
+  },
+  // Snapshot configuration
+  expect: {
+    toHaveScreenshot: {
+      // Allow 2% pixel difference for dynamic content (timestamps, etc.)
+      maxDiffPixelRatio: 0.02,
+      // Animation stabilization
+      animations: "disabled",
+    },
+  },
+});
diff --git a/tests/e2e/servers.spec.ts b/tests/e2e/servers.spec.ts
@@ -0,0 +1,88 @@
+import { test, expect } from "@playwright/test";
+
+// Server configurations
+const SERVERS = [
+  { key: "basic-react", index: 0, name: "Basic MCP App Server (React-based)" },
+  {
+    key: "basic-vanillajs",
+    index: 1,
+    name: "Basic MCP App Server (Vanilla JS)",
+  },
+  { key: "budget-allocator", index: 2, name: "Budget Allocator Server" },
+  { key: "cohort-heatmap", index: 3, name: "Cohort Heatmap Server" },
+  {
+    key: "customer-segmentation",
+    index: 4,
+    name: "Customer Segmentation Server",
+  },
+  { key: "scenario-modeler", index: 5, name: "SaaS Scenario Modeler" },
+  { key: "system-monitor", index: 6, name: "System Monitor Server" },
+  { key: "threejs", index: 7, name: "Three.js Server" },
+];
+
+// Increase timeout for iframe-heavy tests
+test.setTimeout(90000);
+
+test.describe("Host UI", () => {
+  test("initial state shows controls", async ({ page }) => {
+    await page.goto("/");
+    await expect(page.locator("label:has-text('Server')")).toBeVisible();
+    await expect(page.locator("label:has-text('Tool')")).toBeVisible();
+    await expect(page.locator('button:has-text("Call Tool")')).toBeVisible();
+  });
+
+  test("screenshot of initial state", async ({ page }) => {
+    await page.goto("/");
+    await page.waitForTimeout(1000);
+    await expect(page).toHaveScreenshot("host-initial.png");
+  });
+});
+
+// Generate tests for each server
+for (const server of SERVERS) {
+  test.describe(`${server.name}`, () => {
+    test(`loads app UI`, async ({ page }) => {
+      await page.goto("/");
+
+      // Select server
+      const serverSelect = page.locator("select").first();
+      await serverSelect.selectOption({ index: server.index });
+
+      // Click Call Tool
+      await page.click('button:has-text("Call Tool")');
+
+      // Wait for outer iframe
+      await page.waitForSelector("iframe", { timeout: 10000 });
+
+      // Wait for content to load (generous timeout for nested iframes)
+      await page.waitForTimeout(5000);
+
+      // Verify iframe structure exists
+      const outerFrame = page.frameLocator("iframe").first();
+      await expect(outerFrame.locator("iframe")).toBeVisible({
+        timeout: 10000,
+      });
+    });
+
+    test(`screenshot matches golden`, async ({ page }) => {
+      await page.goto("/");
+
+      // Select server
+      const serverSelect = page.locator("select").first();
+      await serverSelect.selectOption({ index: server.index });
+
+      // Click Call Tool
+      await page.click('button:has-text("Call Tool")');
+
+      // Wait for app to fully load
+      await page.waitForSelector("iframe", { timeout: 10000 });
+      await page.waitForTimeout(6000); // Extra time for nested iframe content
+
+      // Take screenshot
+      await expect(page).toHaveScreenshot(`${server.key}.png`, {
+        maxDiffPixelRatio: 0.1, // 10% tolerance for dynamic content
+        timeout: 10000,
+      });
+    });
+  });
+}
diff --git a/tests/e2e/servers.spec.ts-snapshots/basic-react-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/basic-react-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/basic-vanillajs-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/basic-vanillajs-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/budget-allocator-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/budget-allocator-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/cohort-heatmap-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/cohort-heatmap-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/customer-segmentation-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/customer-segmentation-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/host-initial-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/host-initial-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/scenario-modeler-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/scenario-modeler-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/system-monitor-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/system-monitor-chromium-darwin.png
diff --git a/tests/e2e/servers.spec.ts-snapshots/threejs-chromium-darwin.png b/tests/e2e/servers.spec.ts-snapshots/threejs-chromium-darwin.png