From 1a2484bc6a6213ecb0c9056991f8d101638a417c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 17:38:55 +0000 Subject: [PATCH 01/10] Refactor AmbientScribe examples to use @corti/sdk with server/client split Replace raw WebSocket implementation with @corti/sdk package. Each example is now split into server.ts (OAuth client creds, interaction creation, scoped stream token) and client.ts (stream connection, audio capture, event handling), with audio utilities in separate audio.ts files and minimal index.html pages. https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/README.md | 298 ++++++++++-------- Web/AmbientScribe/singleMicrophone.ts | 199 ------------ Web/AmbientScribe/singleMicrophone/audio.ts | 16 + Web/AmbientScribe/singleMicrophone/client.ts | 77 +++++ Web/AmbientScribe/singleMicrophone/index.html | 13 + Web/AmbientScribe/singleMicrophone/server.ts | 64 ++++ Web/AmbientScribe/types.ts | 66 ---- Web/AmbientScribe/virtualConsultations.ts | 267 ---------------- .../virtualConsultations/audio.ts | 38 +++ .../virtualConsultations/client.ts | 124 ++++++++ .../virtualConsultations/index.html | 13 + .../virtualConsultations/server.ts | 64 ++++ 12 files changed, 580 insertions(+), 659 deletions(-) delete mode 100644 Web/AmbientScribe/singleMicrophone.ts create mode 100644 Web/AmbientScribe/singleMicrophone/audio.ts create mode 100644 Web/AmbientScribe/singleMicrophone/client.ts create mode 100644 Web/AmbientScribe/singleMicrophone/index.html create mode 100644 Web/AmbientScribe/singleMicrophone/server.ts delete mode 100644 Web/AmbientScribe/types.ts delete mode 100644 Web/AmbientScribe/virtualConsultations.ts create mode 100644 Web/AmbientScribe/virtualConsultations/audio.ts create mode 100644 Web/AmbientScribe/virtualConsultations/client.ts create mode 100644 Web/AmbientScribe/virtualConsultations/index.html create mode 100644 Web/AmbientScribe/virtualConsultations/server.ts diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index 3423131..b581aad 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -1,80 +1,161 @@ -# Corti AI Platform – Live Transcription & Fact-Based Documentation +# Corti AI Platform – Live Transcription & Fact-Based Documentation -This README provides a guide on using the **Corti AI Platform** WebSocket API for **live audio transcription** and **fact-based documentation**. It includes two approaches: -1. **Single audio stream** – Capturing audio from a single microphone. -2. **Dual-channel merged streams** – Combining a **local microphone** and a **WebRTC stream** for doctor-patient scenarios. +This guide covers using the **Corti AI Platform** via the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription** and **fact-based documentation**. It includes two approaches: + +1. **Single microphone** – Capturing audio from a single microphone with speaker diarization. +2. **Virtual consultations** – Merging a **local microphone** and a **WebRTC stream** for doctor-patient scenarios. + +Both examples follow a **server/client split**: the server handles authentication and sensitive API calls, while the client handles audio capture and streaming. + +## Installation + +```bash +npm i @corti/sdk +``` --- -## **1. Overview of Configurations** +## Architecture -### **Single Stream (Diarization Mode)** -This setup uses **one audio source** and **speaker diarization** to distinguish multiple speakers in the same channel automatically. +### Server (`server.ts`) + +The server is responsible for: + +1. **Creating a `CortiClient`** using OAuth2 client credentials (these must never be exposed to the browser). +2. **Creating an interaction** via the REST API. +3. **Obtaining a scoped stream token** that only grants access to the streaming WebSocket, which is safe to send to the client. ```ts -const DEFAULT_CONFIG: Config = { - type: "config", - configuration: { - transcription: { - primaryLanguage: "en", - isDiarization: true, // AI automatically differentiates speakers - isMultichannel: false, - participants: [ - { - channel: 0, - role: "multiple", - }, - ], - }, - mode: { type: "facts", outputLocale: "en" }, +import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; +import { randomUUID } from "crypto"; + +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { + clientId: "YOUR_CLIENT_ID", + clientSecret: "YOUR_CLIENT_SECRET", }, -}; +}); + +const interaction = await client.interactions.create({ + encounter: { + identifier: randomUUID(), + status: "planned", + type: "first_consultation", + }, +}); + +const auth = new CortiAuth({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", +}); + +const streamToken = await auth.getToken({ + clientId: "YOUR_CLIENT_ID", + clientSecret: "YOUR_CLIENT_SECRET", + scopes: ["stream"], +}); + +// Send interaction.id and streamToken.accessToken to the client ``` -### **Dual-Channel (Explicit Roles: Doctor & Patient)** -This setup **merges two separate audio streams** (e.g., a local microphone and a WebRTC stream). Instead of diarization, each stream is assigned a **fixed role** (Doctor or Patient). +### Client (`client.ts`) + +The client receives the scoped token and interaction ID, then: + +1. **Creates a `CortiClient`** with the scoped access token. +2. **Connects to the stream** via `client.stream.connect()`. +3. **Captures and sends audio** using `MediaRecorder`. +4. **Handles transcript and fact events** from the stream. ```ts -const DEFAULT_CONFIG: Config = { - type: "config", - configuration: { - transcription: { - primaryLanguage: "en", - isDiarization: false, // No automatic speaker detection - isMultichannel: false, - participants: [ - { channel: 0, role: "doctor" }, - { channel: 0, role: "patient" }, - ], - }, - mode: { type: "facts", outputLocale: "en" }, +import { CortiClient, CortiEnvironment } from "@corti/sdk"; + +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { + accessToken: streamToken.accessToken, // Token with "stream" scope }, -}; +}); + +// This will work – stream is within the token's scope +const streamSocket = await client.stream.connect({ id: interactionId }); + +// This would fail – outside the token's scope: +// await client.transcribe.connect({ id: "..." }); // Error +// await client.interactions.list(); // Error ``` + --- -## **2. Capturing Audio Streams** +## 1. Single Microphone + +**Files:** `singleMicrophone/server.ts`, `singleMicrophone/client.ts`, `singleMicrophone/audio.ts`, `singleMicrophone/index.html` + +Uses a single audio source with **speaker diarization** to automatically distinguish multiple speakers. + +### Audio Capture (`audio.ts`) + +Exposes `getMicrophoneStream()` to access the user's microphone: -### **Single Microphone Access** -Retrieves and returns a **MediaStream** from the user's microphone. ```ts const microphoneStream = await getMicrophoneStream(); ``` -### **Merging Two Streams (Microphone + WebRTC)** -For doctor-patient conversations, we merge two separate audio sources. +### Streaming & Events (`client.ts`) + ```ts -const { stream, endStream } = mergeMediaStreams([microphoneStream, webRTCStream]); +const streamSocket = await client.stream.connect({ id: interactionId }); + +const microphoneStream = await getMicrophoneStream(); +const mediaRecorder = new MediaRecorder(microphoneStream); +mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + streamSocket.send(event.data); + } +}; +mediaRecorder.start(200); + +streamSocket.on("transcript", (data) => console.log("Transcript:", data)); +streamSocket.on("fact", (data) => console.log("Fact:", data)); ``` -**How Merging Works:** -- **Each stream is treated as a separate channel** -- **WebRTC provides the remote participant's audio** -- **The local microphone captures the speaker on-site** -- **The merged stream is sent to Corti’s API** +### Cleanup ```ts -export const mergeMediaStreams = (mediaStreams: MediaStream[]): { stream: MediaStream; endStream: () => void } => { +mediaRecorder.stop(); +microphoneStream.getAudioTracks().forEach((track) => track.stop()); +streamSocket.close(); +``` + +--- + +## 2. Virtual Consultations + +**Files:** `virtualConsultations/server.ts`, `virtualConsultations/client.ts`, `virtualConsultations/audio.ts`, `virtualConsultations/index.html` + +Merges two separate audio streams — a **local microphone** (doctor) and a **WebRTC stream** (patient) — into a single multi-channel stream. + +### Audio Capture (`audio.ts`) + +Exposes two methods, each returning a `MediaStream`: + +```ts +// Local microphone +const microphoneStream = await getMicrophoneStream(); + +// Remote participant from WebRTC +const remoteStream = getRemoteParticipantStream(peerConnection); +``` + +### Merging Streams (`client.ts`) + +The two streams are merged into a single multi-channel stream where each input maps to a separate channel (channel 0 = doctor, channel 1 = patient): + +```ts +function mergeMediaStreams(mediaStreams: MediaStream[]) { const audioContext = new AudioContext(); const audioDestination = audioContext.createMediaStreamDestination(); const channelMerger = audioContext.createChannelMerger(mediaStreams.length); @@ -83,107 +164,70 @@ export const mergeMediaStreams = (mediaStreams: MediaStream[]): { stream: MediaS const source = audioContext.createMediaStreamSource(stream); source.connect(channelMerger, 0, index); }); - channelMerger.connect(audioDestination); - return { - stream: audioDestination.stream, + return { + stream: audioDestination.stream, endStream: () => { audioDestination.stream.getAudioTracks().forEach((track) => track.stop()); audioContext.close(); - } + }, }; -}; +} ``` ---- - -## **3. Establishing WebSocket Connection** -Once the audio stream is ready, we establish a WebSocket connection to Corti’s API. +### Streaming & Events (`client.ts`) -### **Starting the Audio Flow** ```ts -const { stop } = await startAudioFlow(stream, authCreds, interactionId, handleNewMessage); -``` -- **Sends real-time audio** -- **Receives transcription and facts** -- **Automatically starts when a CONFIG_ACCEPTED message is received** - ---- +const { stream: mergedStream, endStream } = mergeMediaStreams([ + microphoneStream, + remoteStream, +]); -## **4. Handling WebSocket Events (Transcripts & Facts)** -Each incoming WebSocket message is parsed and stored. +const streamSocket = await client.stream.connect({ id: interactionId }); -```ts -const transcripts: TranscriptEventData[] = []; -const facts: FactEventData[] = []; - -const handleNewMessage = (msg: MessageEvent) => { - const parsed = JSON.parse(msg.data); - if (parsed.type === "transcript") { - transcripts.push(parsed.data as TranscriptEventData); - } else if (parsed.type === "fact") { - facts.push(parsed.data as FactEventData); +const mediaRecorder = new MediaRecorder(mergedStream); +mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + streamSocket.send(event.data); } }; -``` +mediaRecorder.start(200); ---- +streamSocket.on("transcript", (data) => console.log("Transcript:", data)); +streamSocket.on("fact", (data) => console.log("Fact:", data)); +``` -## **5. Stopping & Cleanup** -Ensure all resources (WebSocket, MediaRecorder, and merged streams) are properly closed. +### Cleanup ```ts -stop(); +mediaRecorder.stop(); +endStream(); microphoneStream.getAudioTracks().forEach((track) => track.stop()); -webRTCStream.getAudioTracks().forEach((track) => track.stop()); -endStream(); // Stops the merged audio -console.log("Call ended and resources cleaned up."); +remoteStream.getAudioTracks().forEach((track) => track.stop()); +streamSocket.close(); ``` --- -## **6. Full Flow Example** -### **Single-Stream (Diarization Mode)** -```ts -async function startSingleStreamCall() { - const microphoneStream = await getMicrophoneStream(); - const { stop } = await startAudioFlow(microphoneStream, authCreds, interactionId, handleNewMessage); +## File Structure - return { - endCall: () => { - stop(); - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - }, - }; -} ``` - -### **Dual-Channel (Doctor-Patient Setup)** -```ts -async function startDualChannelCall() { - const microphoneStream = await getMicrophoneStream(); - const webRTCStream = new MediaStream(); // Example WebRTC stream - - const { stream, endStream } = mergeMediaStreams([microphoneStream, webRTCStream]); - const { stop } = await startAudioFlow(stream, authCreds, interactionId, handleNewMessage); - - return { - endCall: () => { - stop(); - endStream(); - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - webRTCStream.getAudioTracks().forEach((track) => track.stop()); - }, - }; -} +AmbientScribe/ + README.md + singleMicrophone/ + server.ts # Auth, interaction creation, scoped token + client.ts # Stream connection, audio send, event handling + audio.ts # getMicrophoneStream() + index.html # Minimal page (output goes to console) + virtualConsultations/ + server.ts # Auth, interaction creation, scoped token + client.ts # Stream connection, merged audio, event handling + audio.ts # getMicrophoneStream(), getRemoteParticipantStream() + index.html # Minimal page (output goes to console) ``` ---- - -## **7. Summary** -🚀 **Two streaming options** – single microphone **(diarization)** or **merged dual-channel streams** (doctor-patient). -✅ **Minimal setup** – simply plug in credentials and select a mode. -📡 **Real-time AI transcription & fact extraction** – powered by **Corti’s API**. +## Resources -For further details, refer to **Corti's API documentation**. \ No newline at end of file +- [`@corti/sdk` on npm](https://www.npmjs.com/package/@corti/sdk) +- [Corti API documentation](https://docs.corti.ai) diff --git a/Web/AmbientScribe/singleMicrophone.ts b/Web/AmbientScribe/singleMicrophone.ts deleted file mode 100644 index f35ecf8..0000000 --- a/Web/AmbientScribe/singleMicrophone.ts +++ /dev/null @@ -1,199 +0,0 @@ -import type { AuthCreds, Config, TranscriptEventData, FactEventData } from "./types"; - - const DEFAULT_CONFIG: Config = { - type: "config", - configuration: { - transcription: { - primaryLanguage: "en", - isDiarization: true, - isMultichannel: false, - participants: [ - { - channel: 0, - role: "multiple", - }, - ], - }, - mode: { - type: "facts", - outputLocale: "en", - }, - }, - }; - - - - /** - * Retrieves the user's microphone MediaStream. - * If a device ID is provided, attempts to use that specific microphone, otherwise uses the default. - * - * @param deviceId - Optional ID of the desired audio input device. - * @returns A Promise that resolves with the MediaStream. - * @throws An error if accessing the microphone fails. - */ -const getMicrophoneStream = async (deviceId?: string): Promise => { - if (!navigator.mediaDevices) { - throw new Error("Media Devices API not supported in this browser"); - } - try { - return await navigator.mediaDevices.getUserMedia({ - audio: deviceId ? { deviceId: { exact: deviceId } } : true, - }); - } catch (error) { - console.error("Error accessing microphone:", error); - throw error; - } - }; - - - /** - * Starts an audio flow by connecting a MediaStream to a WebSocket endpoint and sending a config. - * The flow begins once a CONFIG_ACCEPTED message is received, after which audio - * data is sent in 200ms chunks via a MediaRecorder. - * - * @param mediaStream - The audio MediaStream to send. - * @param authCreds - Authentication credentials containing environment, tenant, and token. - * @param interactionId - The interaction identifier used in the WebSocket URL. - * @param config - Optional configuration object; falls back to DEFAULT_CONFIG if not provided. - * @returns An object with a: - * - `recorderStarted` boolean indicating whether the MediaRecorder has started - * - `stop` method to end the flow and clean up resources - */ - async function startAudioFlow(mediaStream: MediaStream, authCreds: AuthCreds, interactionId: string, handleEvent: (arg0: MessageEvent) => void, config?: Config): Promise<{ recorderStarted: boolean, stop: () => void }> { - // 2. Set up configuration if not provided - if (!config) { - config = DEFAULT_CONFIG; - } - - // 3. Start WebSocket connection - const wsUrl = `wss://api.${authCreds.environment}.corti.app/audio-bridge/v2/interactions/${interactionId}/streams?tenant-name=${authCreds.tenant}&token=Bearer%20${authCreds.token}`; - const ws = new WebSocket(wsUrl); - let isOpen = false; - let recorderStarted = false; - let mediaRecorder: MediaRecorder; - - ws.onopen = () => { - ws.send(JSON.stringify(config)); - isOpen = true; - }; - - // 4. Wait for CONFIG_ACCEPTED message - ws.onmessage = (msg: MessageEvent) => { - try { - const data = JSON.parse(msg.data); - if (data.type === "CONFIG_ACCEPTED" && !recorderStarted) { - recorderStarted = true; - startMediaRecorder(); - } - handleEvent(msg); - } catch (err) { - console.error("Failed to parse WebSocket message:", err); - } - }; - - ws.onerror = (err: Event) => { - console.error("WebSocket encountered an error:", err); - // Optionally, call stop() to clean up resources - }; - - ws.onclose = (event: Event) => { - console.log("WebSocket closed:", event); - // Ensure cleanup is performed or notify the user - }; - - // 5. Start MediaRecorder with 200ms chunks and send data to WebSocket - function startMediaRecorder() { - mediaRecorder = new MediaRecorder(mediaStream); - mediaRecorder.ondataavailable = (event: BlobEvent) => { - if (isOpen) { - ws.send(event.data); - } - }; - mediaRecorder.start(200); - } - - // 6. End the flow - const stop = () => { - if (ws.readyState === WebSocket.OPEN) { - ws.send(JSON.stringify({ type: "end" })); - } - if (mediaRecorder && mediaRecorder.state !== "inactive") { - mediaRecorder.stop(); - } - setTimeout(() => { - ws.close(); - }, 10000); - }; - - return { recorderStarted, stop }; - } - - - - // Usage Example: - // Define authentication credentials and interaction identifier. - const authCreds: AuthCreds = { - environment: "us", - tenant: "your-tenant", - token: "your-token", - }; - const interactionId = "interaction-id"; - - const transcripts: TranscriptEventData[] = []; - const facts: FactEventData[] = []; - - const handleNewMessage = (msg: MessageEvent) => { - try { - const parsed = JSON.parse(msg.data); - - switch (parsed.type) { - case "transcript": - transcripts.push(parsed.data as TranscriptEventData); - break; - case "fact": - facts.push(parsed.data as FactEventData); - break; - default: - console.log("Unhandled WebSocket event type:", parsed.type); - } - } catch (err) { - console.error("Failed to parse WebSocket message:", err); - } - }; - - // Encapsulate the call setup in an async function. - async function startCall() { - try { - // Retrieve the user's microphone stream. - const microphoneStream = await getMicrophoneStream(); - - // Start the audio flow over a WebSocket connection. - // The returned `stop` method is used to end the audio flow gracefully. - const { stop } = await startAudioFlow(microphoneStream, authCreds, interactionId, handleNewMessage); - - // Define a cleanup function to end the call. - const endCall = () => { - // End the audio flow (closes WebSocket and stops MediaRecorder). - stop(); - // Optionally, stop original streams if no longer needed. - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - console.log("Call ended and resources cleaned up."); - }; - - return { endCall }; - } catch (error) { - console.error("Error starting call:", error); - throw error; - } - } - - // Example usage: start a call and end it after 10 seconds. - startCall() - .then(({ endCall }) => { - setTimeout(endCall, 10000); - }) - .catch((error) => { - // Handle any errors that occurred during setup. - console.error(error); - }); - \ No newline at end of file diff --git a/Web/AmbientScribe/singleMicrophone/audio.ts b/Web/AmbientScribe/singleMicrophone/audio.ts new file mode 100644 index 0000000..9212678 --- /dev/null +++ b/Web/AmbientScribe/singleMicrophone/audio.ts @@ -0,0 +1,16 @@ +/** + * Retrieves the user's microphone MediaStream. + * If a device ID is provided, attempts to use that specific microphone, + * otherwise uses the default audio input device. + */ +export async function getMicrophoneStream( + deviceId?: string +): Promise { + if (!navigator.mediaDevices) { + throw new Error("Media Devices API not supported in this browser"); + } + + return navigator.mediaDevices.getUserMedia({ + audio: deviceId ? { deviceId: { exact: deviceId } } : true, + }); +} diff --git a/Web/AmbientScribe/singleMicrophone/client.ts b/Web/AmbientScribe/singleMicrophone/client.ts new file mode 100644 index 0000000..f02bce2 --- /dev/null +++ b/Web/AmbientScribe/singleMicrophone/client.ts @@ -0,0 +1,77 @@ +import { CortiClient, CortiEnvironment } from "@corti/sdk"; +import { getMicrophoneStream } from "./audio"; + +// Client-side: receives a scoped token and interaction ID from the server, +// then connects to the Corti streaming API to send audio and receive +// real-time transcripts and facts. + +async function startSession(accessToken: string, interactionId: string) { + // 1. Create client with scoped token (stream scope only) + const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { + accessToken: accessToken, // Token with "stream" scope + }, + }); + + // Note: with a stream-scoped token, only streaming operations are allowed. + // await client.interactions.list(); // Would fail - outside token scope + // await client.transcribe.connect(); // Would fail - outside token scope + + // 2. Connect to the stream + const streamSocket = await client.stream.connect({ id: interactionId }); + + // 3. Get microphone audio + const microphoneStream = await getMicrophoneStream(); + + // 4. Send audio data via MediaRecorder in 200ms chunks + const mediaRecorder = new MediaRecorder(microphoneStream); + mediaRecorder.ondataavailable = (event: BlobEvent) => { + if (event.data.size > 0) { + streamSocket.send(event.data); + } + }; + mediaRecorder.start(200); + + // 5. Handle incoming events + streamSocket.on("transcript", (data) => { + console.log("Transcript:", data); + }); + + streamSocket.on("fact", (data) => { + console.log("Fact:", data); + }); + + console.log("Streaming started for interaction:", interactionId); + + // 6. Return cleanup function + return { + endCall: () => { + if (mediaRecorder.state !== "inactive") { + mediaRecorder.stop(); + } + microphoneStream.getAudioTracks().forEach((track) => track.stop()); + streamSocket.close(); + console.log("Call ended and resources cleaned up."); + }, + }; +} + +// --- Usage --- + +async function main() { + // Fetch session credentials from your server + const response = await fetch("/api/start-session", { method: "POST" }); + const { interactionId, accessToken } = await response.json(); + + const { endCall } = await startSession(accessToken, interactionId); + + // Wire up the end call button + document.getElementById("end-call")?.addEventListener("click", () => { + endCall(); + (document.getElementById("end-call") as HTMLButtonElement).disabled = true; + }); +} + +main().catch(console.error); diff --git a/Web/AmbientScribe/singleMicrophone/index.html b/Web/AmbientScribe/singleMicrophone/index.html new file mode 100644 index 0000000..219a95e --- /dev/null +++ b/Web/AmbientScribe/singleMicrophone/index.html @@ -0,0 +1,13 @@ + + + + + Corti - Single Microphone + + +

Single Microphone - AmbientScribe

+ +

Open the browser console to see transcripts and facts.

+ + + diff --git a/Web/AmbientScribe/singleMicrophone/server.ts b/Web/AmbientScribe/singleMicrophone/server.ts new file mode 100644 index 0000000..8408371 --- /dev/null +++ b/Web/AmbientScribe/singleMicrophone/server.ts @@ -0,0 +1,64 @@ +import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; +import { randomUUID } from "crypto"; + +// Server-side: handles authentication and sensitive API calls. +// Client credentials must NEVER be exposed to the browser. + +const TENANT_NAME = "YOUR_TENANT_NAME"; +const CLIENT_ID = "YOUR_CLIENT_ID"; +const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; + +// 1. Create Corti client with client credentials (OAuth2) +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + auth: { + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + }, +}); + +// 2. Create an interaction +async function createInteraction() { + const interaction = await client.interactions.create({ + encounter: { + identifier: randomUUID(), + status: "planned", + type: "first_consultation", + }, + }); + + console.log("Interaction created:", interaction.id); + return interaction; +} + +// 3. Get a scoped token for WebSocket streaming (stream scope only). +// This token can safely be sent to the client since it only grants +// access to the streaming endpoint, not the full API. +async function getScopedStreamToken() { + const auth = new CortiAuth({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + }); + + const streamToken = await auth.getToken({ + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + scopes: ["stream"], + }); + + return streamToken; +} + +// Example: Express endpoint that provides the client with an interaction ID + scoped token +// +// app.post("/api/start-session", async (req, res) => { +// const interaction = await createInteraction(); +// const streamToken = await getScopedStreamToken(); +// res.json({ +// interactionId: interaction.id, +// accessToken: streamToken.accessToken, +// }); +// }); + +export { createInteraction, getScopedStreamToken }; diff --git a/Web/AmbientScribe/types.ts b/Web/AmbientScribe/types.ts deleted file mode 100644 index 5f95576..0000000 --- a/Web/AmbientScribe/types.ts +++ /dev/null @@ -1,66 +0,0 @@ -export interface AuthCreds { - environment: string; - tenant: string; - token: string; -} - -export interface Config { - type: string; - configuration: { - transcription: { - primaryLanguage: string; - isDiarization: boolean; - isMultichannel: boolean; - participants: Array<{ - channel: number; - role: string; - }>; - }; - mode: { - type: string; - outputLocale: string; - }; - }; -} - -export interface TranscriptEventData { - id: string; - start: number; - duration: number; - transcript: string; - isFinal: boolean; - participant: { - channel: number; - role: string; - }; - time: { - start: number; - end: number; - }; -} - -export interface FactEventData { - id: string; - text: string; - createdAt: string; - createdAtTzOffset: string; - evidence?: Array; - group: string; - groupId: string; - isDiscarded: boolean; - source: "core" | "system" | "user"; - updatedAt: string; - updatedAtTzOffset: string; -} - -export interface TranscriptMessage { - type: "transcript"; - data: TranscriptEventData; -} - -export interface FactMessage { - type: "fact"; - data: FactEventData; -} - -export type WSSEvent = TranscriptMessage | FactMessage; diff --git a/Web/AmbientScribe/virtualConsultations.ts b/Web/AmbientScribe/virtualConsultations.ts deleted file mode 100644 index 84cf7fb..0000000 --- a/Web/AmbientScribe/virtualConsultations.ts +++ /dev/null @@ -1,267 +0,0 @@ -import type { AuthCreds, Config, TranscriptEventData, FactEventData } from "./types"; - -const DEFAULT_CONFIG: Config = { - type: "config", - configuration: { - transcription: { - primaryLanguage: "en", - isDiarization: false, - isMultichannel: false, - participants: [ - { - channel: 0, - role: "doctor", - }, - { - channel: 0, - role: "patient", - }, - ], - }, - mode: { - type: "facts", - outputLocale: "en", - }, - }, -}; - -/** - * Merges multiple audio MediaStreams into a single MediaStream and returns both - * the merged MediaStream and a cleanup method. - * The cleanup method stops the merged stream's audio tracks and closes the AudioContext. - * - * @param mediaStreams - Array of MediaStreams to merge. - * @returns An object containing: - * - stream: the merged MediaStream. - * - endStream: A method to end the merged stream and clean up resources. - * @throws Error if no streams are provided or if any stream lacks an audio track. - */ -const mergeMediaStreams = ( - mediaStreams: MediaStream[] -): { stream: MediaStream; endStream: () => void } => { - if (!mediaStreams.length) { - throw new Error("No media streams provided."); - } - - // Validate that each MediaStream has an audio track. - mediaStreams.forEach((stream, index) => { - if (!stream.getAudioTracks().length) { - throw new Error( - `MediaStream at index ${index} does not have an audio track.` - ); - } - }); - - // Each mediastream is added as a new channel in order of the array. - const audioContext = new AudioContext(); - const audioDestination = audioContext.createMediaStreamDestination(); - const channelMerger = audioContext.createChannelMerger(mediaStreams.length); - mediaStreams.forEach((stream, index) => { - const source = audioContext.createMediaStreamSource(stream); - source.connect(channelMerger, 0, index); - }); - channelMerger.connect(audioDestination); - - // Close the audio context and stop all tracks when the stream ends. - const endStream = () => { - audioDestination.stream.getAudioTracks().forEach((track) => { - track.stop(); - }); - audioContext.close(); - }; - - // Return the merged stream and the endStream method. - return { stream: audioDestination.stream, endStream }; -}; - -/** - * Retrieves the user's microphone MediaStream. - * If a device ID is provided, attempts to use that specific microphone, otherwise uses the default. - * - * @param deviceId - Optional ID of the desired audio input device. - * @returns A Promise that resolves with the MediaStream. - * @throws An error if accessing the microphone fails. - */ -export const getMicrophoneStream = async ( - deviceId?: string -): Promise => { - if (!navigator.mediaDevices) { - throw new Error("Media Devices API not supported in this browser"); - } - try { - return await navigator.mediaDevices.getUserMedia({ - audio: deviceId ? { deviceId: { exact: deviceId } } : true, - }); - } catch (error) { - console.error("Error accessing microphone:", error); - throw error; - } -}; - -/** - * Starts an audio flow by connecting a MediaStream to a WebSocket endpoint and sending a config. - * The flow begins once a CONFIG_ACCEPTED message is received, after which audio - * data is sent in 200ms chunks via a MediaRecorder. - * - * @param mediaStream - The audio MediaStream to send. - * @param authCreds - Authentication credentials containing environment, tenant, and token. - * @param interactionId - The interaction identifier used in the WebSocket URL. - * @param config - Optional configuration object; falls back to DEFAULT_CONFIG if not provided. - * @returns An object with a: - * - `recorderStarted` boolean indicating whether the MediaRecorder has started - * - `stop` method to end the flow and clean up resources - */ -async function startAudioFlow( - mediaStream: MediaStream, - authCreds: AuthCreds, - interactionId: string, - handleEvent: (arg0: MessageEvent) => void, - config?: Config -): Promise<{ recorderStarted: boolean; stop: () => void }> { - // 2. Set up configuration if not provided - if (!config) { - config = DEFAULT_CONFIG; - } - - // 3. Start WebSocket connection - const wsUrl = `wss://api.${authCreds.environment}.corti.app/audio-bridge/v2/interactions/${interactionId}/streams?tenant-name=${authCreds.tenant}&token=Bearer%20${authCreds.token}`; - const ws = new WebSocket(wsUrl); - let isOpen = false; - let recorderStarted = false; - let mediaRecorder: MediaRecorder; - - ws.onopen = () => { - ws.send(JSON.stringify(config)); - isOpen = true; - }; - - // 4. Wait for CONFIG_ACCEPTED message - ws.onmessage = (msg: MessageEvent) => { - try { - const data = JSON.parse(msg.data); - if (data.type === "CONFIG_ACCEPTED" && !recorderStarted) { - recorderStarted = true; - startMediaRecorder(); - } - handleEvent(msg); - } catch (err) { - console.error("Failed to parse WebSocket message:", err); - } - }; - - ws.onerror = (err: Event) => { - console.error("WebSocket encountered an error:", err); - // Optionally, call stop() to clean up resources - }; - - ws.onclose = (event: Event) => { - console.log("WebSocket closed:", event); - // Ensure cleanup is performed or notify the user - }; - - // 5. Start MediaRecorder with 200ms chunks and send data to WebSocket - function startMediaRecorder() { - mediaRecorder = new MediaRecorder(mediaStream); - mediaRecorder.ondataavailable = (event: BlobEvent) => { - if (isOpen) { - ws.send(event.data); - } - }; - mediaRecorder.start(200); - } - - // 6. End the flow - const stop = () => { - if (ws.readyState === WebSocket.OPEN) { - ws.send(JSON.stringify({ type: "end" })); - } - if (mediaRecorder && mediaRecorder.state !== "inactive") { - mediaRecorder.stop(); - } - setTimeout(() => { - ws.close(); - }, 10000); - }; - - return { recorderStarted, stop }; -} - - -// Usage Example: -// Define authentication credentials and interaction identifier. -const authCreds: AuthCreds = { - environment: "us", - tenant: "your-tenant", - token: "your-token", -}; -const interactionId = "interaction-id"; - const transcripts: TranscriptEventData[] = []; - const facts: FactEventData[] = []; - - const handleNewMessage = (msg: MessageEvent) => { - try { - const parsed = JSON.parse(msg.data); - - switch (parsed.type) { - case "transcript": - transcripts.push(parsed.data as TranscriptEventData); - break; - case "fact": - facts.push(parsed.data as FactEventData); - break; - default: - console.log("Unhandled WebSocket event type:", parsed.type); - } - } catch (err) { - console.error("Failed to parse WebSocket message:", err); - } - }; - -// Encapsulate the call setup in an async function. -async function startCall() { - try { - // Retrieve the user's microphone stream. - const microphoneStream = await getMicrophoneStream(); - - // Obtain the WebRTC stream (e.g., from a WebRTC connection). - const webRTCStream = new MediaStream(); - - // Merge the microphone and WebRTC streams. - // The order of the streams should match your default configuration. - const { stream, endStream } = mergeMediaStreams([ - microphoneStream, - webRTCStream, - ]); - - // Start the audio flow over a WebSocket connection. - // The returned `stop` method is used to end the audio flow gracefully. - const { stop } = await startAudioFlow(stream, authCreds, interactionId, handleNewMessage); - - // Define a cleanup function to end the call. - const endCall = () => { - // End the audio flow (closes WebSocket and stops MediaRecorder). - stop(); - // Stop the merged stream. - endStream(); - // Optionally, stop original streams if no longer needed. - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - webRTCStream.getAudioTracks().forEach((track) => track.stop()); - console.log("Call ended and resources cleaned up."); - }; - - return { endCall }; - } catch (error) { - console.error("Error starting call:", error); - throw error; - } -} - -// Example usage: start a call and end it after 10 seconds. -startCall() - .then(({ endCall }) => { - setTimeout(endCall, 10000); - }) - .catch((error) => { - // Handle any errors that occurred during setup. - console.error(error); - }); diff --git a/Web/AmbientScribe/virtualConsultations/audio.ts b/Web/AmbientScribe/virtualConsultations/audio.ts new file mode 100644 index 0000000..f456687 --- /dev/null +++ b/Web/AmbientScribe/virtualConsultations/audio.ts @@ -0,0 +1,38 @@ +/** + * Retrieves the user's local microphone MediaStream. + * If a device ID is provided, attempts to use that specific microphone, + * otherwise uses the default audio input device. + */ +export async function getMicrophoneStream( + deviceId?: string +): Promise { + if (!navigator.mediaDevices) { + throw new Error("Media Devices API not supported in this browser"); + } + + return navigator.mediaDevices.getUserMedia({ + audio: deviceId ? { deviceId: { exact: deviceId } } : true, + }); +} + +/** + * Extracts the remote participant's audio MediaStream from a WebRTC peer connection. + * Use this to capture the other party's audio in a virtual consultation. + */ +export function getRemoteParticipantStream( + peerConnection: RTCPeerConnection +): MediaStream { + const remoteStream = new MediaStream(); + + for (const receiver of peerConnection.getReceivers()) { + if (receiver.track.kind === "audio") { + remoteStream.addTrack(receiver.track); + } + } + + if (!remoteStream.getAudioTracks().length) { + throw new Error("No remote audio tracks found on the peer connection"); + } + + return remoteStream; +} diff --git a/Web/AmbientScribe/virtualConsultations/client.ts b/Web/AmbientScribe/virtualConsultations/client.ts new file mode 100644 index 0000000..fb54365 --- /dev/null +++ b/Web/AmbientScribe/virtualConsultations/client.ts @@ -0,0 +1,124 @@ +import { CortiClient, CortiEnvironment } from "@corti/sdk"; +import { getMicrophoneStream, getRemoteParticipantStream } from "./audio"; + +// Client-side: receives a scoped token and interaction ID from the server, +// then connects to the Corti streaming API with merged doctor + patient audio. + +/** + * Merges multiple audio MediaStreams into a single multi-channel MediaStream. + * Each input stream is mapped to a separate channel (e.g., channel 0 = doctor, + * channel 1 = patient). + */ +function mergeMediaStreams( + mediaStreams: MediaStream[] +): { stream: MediaStream; endStream: () => void } { + const audioContext = new AudioContext(); + const audioDestination = audioContext.createMediaStreamDestination(); + const channelMerger = audioContext.createChannelMerger(mediaStreams.length); + + mediaStreams.forEach((stream, index) => { + const source = audioContext.createMediaStreamSource(stream); + source.connect(channelMerger, 0, index); + }); + + channelMerger.connect(audioDestination); + + return { + stream: audioDestination.stream, + endStream: () => { + audioDestination.stream.getAudioTracks().forEach((track) => track.stop()); + audioContext.close(); + }, + }; +} + +async function startSession( + accessToken: string, + interactionId: string, + peerConnection: RTCPeerConnection +) { + // 1. Create client with scoped token (stream scope only) + const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { + accessToken: accessToken, // Token with "stream" scope + }, + }); + + // Note: with a stream-scoped token, only streaming operations are allowed. + // await client.interactions.list(); // Would fail - outside token scope + // await client.transcribe.connect(); // Would fail - outside token scope + + // 2. Connect to the stream + const streamSocket = await client.stream.connect({ id: interactionId }); + + // 3. Get both audio streams + const microphoneStream = await getMicrophoneStream(); + const remoteStream = getRemoteParticipantStream(peerConnection); + + // 4. Merge streams: channel 0 = doctor (local mic), channel 1 = patient (remote) + const { stream: mergedStream, endStream } = mergeMediaStreams([ + microphoneStream, + remoteStream, + ]); + + // 5. Send merged audio data via MediaRecorder in 200ms chunks + const mediaRecorder = new MediaRecorder(mergedStream); + mediaRecorder.ondataavailable = (event: BlobEvent) => { + if (event.data.size > 0) { + streamSocket.send(event.data); + } + }; + mediaRecorder.start(200); + + // 6. Handle incoming events + streamSocket.on("transcript", (data) => { + console.log("Transcript:", data); + }); + + streamSocket.on("fact", (data) => { + console.log("Fact:", data); + }); + + console.log("Streaming started for interaction:", interactionId); + + // 7. Return cleanup function + return { + endCall: () => { + if (mediaRecorder.state !== "inactive") { + mediaRecorder.stop(); + } + endStream(); + microphoneStream.getAudioTracks().forEach((track) => track.stop()); + remoteStream.getAudioTracks().forEach((track) => track.stop()); + streamSocket.close(); + console.log("Call ended and resources cleaned up."); + }, + }; +} + +// --- Usage --- + +async function main() { + // Fetch session credentials from your server + const response = await fetch("/api/start-session", { method: "POST" }); + const { interactionId, accessToken } = await response.json(); + + // In a real app, this would come from your WebRTC setup + const peerConnection = new RTCPeerConnection(); + + const { endCall } = await startSession( + accessToken, + interactionId, + peerConnection + ); + + // Wire up the end call button + document.getElementById("end-call")?.addEventListener("click", () => { + endCall(); + (document.getElementById("end-call") as HTMLButtonElement).disabled = true; + }); +} + +main().catch(console.error); diff --git a/Web/AmbientScribe/virtualConsultations/index.html b/Web/AmbientScribe/virtualConsultations/index.html new file mode 100644 index 0000000..9098443 --- /dev/null +++ b/Web/AmbientScribe/virtualConsultations/index.html @@ -0,0 +1,13 @@ + + + + + Corti - Virtual Consultations + + +

Virtual Consultations - AmbientScribe

+ +

Open the browser console to see transcripts and facts.

+ + + diff --git a/Web/AmbientScribe/virtualConsultations/server.ts b/Web/AmbientScribe/virtualConsultations/server.ts new file mode 100644 index 0000000..8408371 --- /dev/null +++ b/Web/AmbientScribe/virtualConsultations/server.ts @@ -0,0 +1,64 @@ +import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; +import { randomUUID } from "crypto"; + +// Server-side: handles authentication and sensitive API calls. +// Client credentials must NEVER be exposed to the browser. + +const TENANT_NAME = "YOUR_TENANT_NAME"; +const CLIENT_ID = "YOUR_CLIENT_ID"; +const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; + +// 1. Create Corti client with client credentials (OAuth2) +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + auth: { + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + }, +}); + +// 2. Create an interaction +async function createInteraction() { + const interaction = await client.interactions.create({ + encounter: { + identifier: randomUUID(), + status: "planned", + type: "first_consultation", + }, + }); + + console.log("Interaction created:", interaction.id); + return interaction; +} + +// 3. Get a scoped token for WebSocket streaming (stream scope only). +// This token can safely be sent to the client since it only grants +// access to the streaming endpoint, not the full API. +async function getScopedStreamToken() { + const auth = new CortiAuth({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + }); + + const streamToken = await auth.getToken({ + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + scopes: ["stream"], + }); + + return streamToken; +} + +// Example: Express endpoint that provides the client with an interaction ID + scoped token +// +// app.post("/api/start-session", async (req, res) => { +// const interaction = await createInteraction(); +// const streamToken = await getScopedStreamToken(); +// res.json({ +// interactionId: interaction.id, +// accessToken: streamToken.accessToken, +// }); +// }); + +export { createInteraction, getScopedStreamToken }; From fdb257efe91f44934d3cb27b55d1e870ce266ae0 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:34:39 +0000 Subject: [PATCH 02/10] Combine single mic and virtual consultation into one unified demo Replace separate subdirectories with a single set of files at the AmbientScribe root. index.html has a mode toggle (radio buttons) to switch between single-microphone diarization and virtual consultation (doctor+patient merged streams). audio.ts exposes getMicrophoneStream() and getRemoteParticipantStream() as two separate methods returning a stream, plus mergeMediaStreams(). https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/README.md | 218 ++++++------------ Web/AmbientScribe/audio.ts | 122 ++++++++++ Web/AmbientScribe/client.ts | 207 +++++++++++++++++ Web/AmbientScribe/index.html | 37 +++ Web/AmbientScribe/server.ts | 93 ++++++++ Web/AmbientScribe/singleMicrophone/audio.ts | 16 -- Web/AmbientScribe/singleMicrophone/client.ts | 77 ------- Web/AmbientScribe/singleMicrophone/index.html | 13 -- Web/AmbientScribe/singleMicrophone/server.ts | 64 ----- .../virtualConsultations/audio.ts | 38 --- .../virtualConsultations/client.ts | 124 ---------- .../virtualConsultations/index.html | 13 -- .../virtualConsultations/server.ts | 64 ----- 13 files changed, 528 insertions(+), 558 deletions(-) create mode 100644 Web/AmbientScribe/audio.ts create mode 100644 Web/AmbientScribe/client.ts create mode 100644 Web/AmbientScribe/index.html create mode 100644 Web/AmbientScribe/server.ts delete mode 100644 Web/AmbientScribe/singleMicrophone/audio.ts delete mode 100644 Web/AmbientScribe/singleMicrophone/client.ts delete mode 100644 Web/AmbientScribe/singleMicrophone/index.html delete mode 100644 Web/AmbientScribe/singleMicrophone/server.ts delete mode 100644 Web/AmbientScribe/virtualConsultations/audio.ts delete mode 100644 Web/AmbientScribe/virtualConsultations/client.ts delete mode 100644 Web/AmbientScribe/virtualConsultations/index.html delete mode 100644 Web/AmbientScribe/virtualConsultations/server.ts diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index b581aad..91c3504 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -1,11 +1,11 @@ # Corti AI Platform – Live Transcription & Fact-Based Documentation -This guide covers using the **Corti AI Platform** via the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription** and **fact-based documentation**. It includes two approaches: +A single demo app using the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription** and **fact-based documentation**. Toggle between two modes from the UI: -1. **Single microphone** – Capturing audio from a single microphone with speaker diarization. -2. **Virtual consultations** – Merging a **local microphone** and a **WebRTC stream** for doctor-patient scenarios. +- **Single Microphone** – one audio source with automatic speaker diarization. +- **Virtual Consultation** – local microphone (doctor) + WebRTC stream (patient) merged into a multi-channel stream. -Both examples follow a **server/client split**: the server handles authentication and sensitive API calls, while the client handles audio capture and streaming. +The demo is split into **server** (auth, interaction management) and **client** (audio capture, streaming, event display). ## Installation @@ -15,217 +15,137 @@ npm i @corti/sdk --- -## Architecture +## File Structure + +``` +AmbientScribe/ + server.ts # Server-side: OAuth2 auth, interaction creation, scoped token + client.ts # Client-side: stream connection, audio capture, event handling + audio.ts # Audio utilities: getMicrophoneStream(), getRemoteParticipantStream(), mergeMediaStreams() + index.html # Minimal UI with mode toggle (output goes to console) + README.md +``` + +--- -### Server (`server.ts`) +## Server (`server.ts`) -The server is responsible for: +Runs on your backend. Responsible for: -1. **Creating a `CortiClient`** using OAuth2 client credentials (these must never be exposed to the browser). +1. **Creating a `CortiClient`** with OAuth2 client credentials (never exposed to the browser). 2. **Creating an interaction** via the REST API. -3. **Obtaining a scoped stream token** that only grants access to the streaming WebSocket, which is safe to send to the client. +3. **Minting a scoped stream token** (only grants WebSocket streaming access). ```ts import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; -import { randomUUID } from "crypto"; +// Full-privilege client — server-side only const client = new CortiClient({ environment: CortiEnvironment.Eu, tenantName: "YOUR_TENANT_NAME", - auth: { - clientId: "YOUR_CLIENT_ID", - clientSecret: "YOUR_CLIENT_SECRET", - }, + auth: { clientId: "YOUR_CLIENT_ID", clientSecret: "YOUR_CLIENT_SECRET" }, }); +// Create an interaction const interaction = await client.interactions.create({ - encounter: { - identifier: randomUUID(), - status: "planned", - type: "first_consultation", - }, -}); - -const auth = new CortiAuth({ - environment: CortiEnvironment.Eu, - tenantName: "YOUR_TENANT_NAME", + encounter: { identifier: randomUUID(), status: "planned", type: "first_consultation" }, }); +// Mint a token scoped to streaming only +const auth = new CortiAuth({ environment: CortiEnvironment.Eu, tenantName: "YOUR_TENANT_NAME" }); const streamToken = await auth.getToken({ clientId: "YOUR_CLIENT_ID", clientSecret: "YOUR_CLIENT_SECRET", scopes: ["stream"], }); -// Send interaction.id and streamToken.accessToken to the client +// Send interaction.id + streamToken.accessToken to the client ``` -### Client (`client.ts`) +--- -The client receives the scoped token and interaction ID, then: +## Audio Utilities (`audio.ts`) -1. **Creates a `CortiClient`** with the scoped access token. -2. **Connects to the stream** via `client.stream.connect()`. -3. **Captures and sends audio** using `MediaRecorder`. -4. **Handles transcript and fact events** from the stream. +Two methods for obtaining audio streams, plus a merge utility: ```ts -import { CortiClient, CortiEnvironment } from "@corti/sdk"; - -const client = new CortiClient({ - environment: CortiEnvironment.Eu, - tenantName: "YOUR_TENANT_NAME", - auth: { - accessToken: streamToken.accessToken, // Token with "stream" scope - }, -}); +// 1. Local microphone +const micStream = await getMicrophoneStream(); -// This will work – stream is within the token's scope -const streamSocket = await client.stream.connect({ id: interactionId }); +// 2. Remote participant from a WebRTC peer connection +const remoteStream = getRemoteParticipantStream(peerConnection); -// This would fail – outside the token's scope: -// await client.transcribe.connect({ id: "..." }); // Error -// await client.interactions.list(); // Error +// 3. Merge into a single multi-channel stream (virtual consultation mode) +const { stream, endStream } = mergeMediaStreams([micStream, remoteStream]); ``` --- -## 1. Single Microphone +## Client (`client.ts`) -**Files:** `singleMicrophone/server.ts`, `singleMicrophone/client.ts`, `singleMicrophone/audio.ts`, `singleMicrophone/index.html` +Receives the scoped token + interaction ID from the server, then: -Uses a single audio source with **speaker diarization** to automatically distinguish multiple speakers. +1. Creates a `CortiClient` with the stream-scoped token. +2. Connects via `client.stream.connect()`. +3. Acquires audio — just the mic in single mode, or mic + remote merged in virtual mode. +4. Streams audio in 200 ms chunks via `MediaRecorder`. +5. Logs transcript and fact events to the console. -### Audio Capture (`audio.ts`) +```ts +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { accessToken }, // stream scope only +}); -Exposes `getMicrophoneStream()` to access the user's microphone: +const streamSocket = await client.stream.connect({ id: interactionId }); -```ts -const microphoneStream = await getMicrophoneStream(); +// With a stream-scoped token, only streaming works: +// await client.interactions.list(); // Error — outside scope +// await client.transcribe.connect(); // Error — outside scope ``` -### Streaming & Events (`client.ts`) +### Single Microphone Mode ```ts -const streamSocket = await client.stream.connect({ id: interactionId }); - const microphoneStream = await getMicrophoneStream(); const mediaRecorder = new MediaRecorder(microphoneStream); -mediaRecorder.ondataavailable = (event) => { - if (event.data.size > 0) { - streamSocket.send(event.data); - } -}; +mediaRecorder.ondataavailable = (e) => streamSocket.send(e.data); mediaRecorder.start(200); - -streamSocket.on("transcript", (data) => console.log("Transcript:", data)); -streamSocket.on("fact", (data) => console.log("Fact:", data)); -``` - -### Cleanup - -```ts -mediaRecorder.stop(); -microphoneStream.getAudioTracks().forEach((track) => track.stop()); -streamSocket.close(); ``` ---- - -## 2. Virtual Consultations - -**Files:** `virtualConsultations/server.ts`, `virtualConsultations/client.ts`, `virtualConsultations/audio.ts`, `virtualConsultations/index.html` - -Merges two separate audio streams — a **local microphone** (doctor) and a **WebRTC stream** (patient) — into a single multi-channel stream. - -### Audio Capture (`audio.ts`) - -Exposes two methods, each returning a `MediaStream`: +### Virtual Consultation Mode ```ts -// Local microphone const microphoneStream = await getMicrophoneStream(); - -// Remote participant from WebRTC const remoteStream = getRemoteParticipantStream(peerConnection); -``` - -### Merging Streams (`client.ts`) -The two streams are merged into a single multi-channel stream where each input maps to a separate channel (channel 0 = doctor, channel 1 = patient): +// channel 0 = doctor, channel 1 = patient +const { stream, endStream } = mergeMediaStreams([microphoneStream, remoteStream]); -```ts -function mergeMediaStreams(mediaStreams: MediaStream[]) { - const audioContext = new AudioContext(); - const audioDestination = audioContext.createMediaStreamDestination(); - const channelMerger = audioContext.createChannelMerger(mediaStreams.length); - - mediaStreams.forEach((stream, index) => { - const source = audioContext.createMediaStreamSource(stream); - source.connect(channelMerger, 0, index); - }); - channelMerger.connect(audioDestination); - - return { - stream: audioDestination.stream, - endStream: () => { - audioDestination.stream.getAudioTracks().forEach((track) => track.stop()); - audioContext.close(); - }, - }; -} +const mediaRecorder = new MediaRecorder(stream); +mediaRecorder.ondataavailable = (e) => streamSocket.send(e.data); +mediaRecorder.start(200); ``` -### Streaming & Events (`client.ts`) +### Event Handling ```ts -const { stream: mergedStream, endStream } = mergeMediaStreams([ - microphoneStream, - remoteStream, -]); - -const streamSocket = await client.stream.connect({ id: interactionId }); - -const mediaRecorder = new MediaRecorder(mergedStream); -mediaRecorder.ondataavailable = (event) => { - if (event.data.size > 0) { - streamSocket.send(event.data); - } -}; -mediaRecorder.start(200); - streamSocket.on("transcript", (data) => console.log("Transcript:", data)); streamSocket.on("fact", (data) => console.log("Fact:", data)); ``` -### Cleanup +--- -```ts -mediaRecorder.stop(); -endStream(); -microphoneStream.getAudioTracks().forEach((track) => track.stop()); -remoteStream.getAudioTracks().forEach((track) => track.stop()); -streamSocket.close(); -``` +## UI (`index.html`) ---- +A minimal page with: -## File Structure +- Radio buttons to toggle between **Single Microphone** and **Virtual Consultation** mode. +- **Start Call** / **End Call** buttons. +- All output goes to the browser console. -``` -AmbientScribe/ - README.md - singleMicrophone/ - server.ts # Auth, interaction creation, scoped token - client.ts # Stream connection, audio send, event handling - audio.ts # getMicrophoneStream() - index.html # Minimal page (output goes to console) - virtualConsultations/ - server.ts # Auth, interaction creation, scoped token - client.ts # Stream connection, merged audio, event handling - audio.ts # getMicrophoneStream(), getRemoteParticipantStream() - index.html # Minimal page (output goes to console) -``` +--- ## Resources diff --git a/Web/AmbientScribe/audio.ts b/Web/AmbientScribe/audio.ts new file mode 100644 index 0000000..b5ef052 --- /dev/null +++ b/Web/AmbientScribe/audio.ts @@ -0,0 +1,122 @@ +/** + * audio.ts — Audio stream utilities for AmbientScribe. + * + * Exposes two methods for obtaining audio streams: + * 1. getMicrophoneStream() — local microphone (works in both modes) + * 2. getRemoteParticipantStream() — remote party via WebRTC (virtual consultations) + * + * Also provides mergeMediaStreams() for combining multiple streams into a + * single multi-channel stream before sending to Corti. + */ + +// --------------------------------------------------------------------------- +// 1. Local microphone +// --------------------------------------------------------------------------- + +/** + * Opens the user's microphone and returns the resulting MediaStream. + * + * @param deviceId Optional device ID if a specific microphone is desired. + * When omitted the browser's default audio input is used. + * @returns A MediaStream containing a single audio track from the microphone. + */ +export async function getMicrophoneStream( + deviceId?: string +): Promise { + if (!navigator.mediaDevices) { + throw new Error("Media Devices API not supported in this browser"); + } + + return navigator.mediaDevices.getUserMedia({ + audio: deviceId ? { deviceId: { exact: deviceId } } : true, + }); +} + +// --------------------------------------------------------------------------- +// 2. Remote participant (WebRTC) +// --------------------------------------------------------------------------- + +/** + * Extracts the remote participant's audio from an active WebRTC peer connection. + * + * In a virtual consultation the remote party's audio arrives via WebRTC. + * This helper collects all incoming audio tracks from the connection's + * receivers into a single MediaStream. + * + * @param peerConnection An RTCPeerConnection that already has remote audio tracks. + * @returns A MediaStream containing the remote participant's audio track(s). + * @throws If the peer connection has no remote audio tracks. + */ +export function getRemoteParticipantStream( + peerConnection: RTCPeerConnection +): MediaStream { + const remoteStream = new MediaStream(); + + for (const receiver of peerConnection.getReceivers()) { + if (receiver.track.kind === "audio") { + remoteStream.addTrack(receiver.track); + } + } + + if (!remoteStream.getAudioTracks().length) { + throw new Error("No remote audio tracks found on the peer connection"); + } + + return remoteStream; +} + +// --------------------------------------------------------------------------- +// 3. Stream merging (used in virtual consultation mode) +// --------------------------------------------------------------------------- + +/** + * Merges multiple MediaStreams into a single multi-channel MediaStream. + * + * Each input stream is mapped to its own channel (by array index), so + * channel 0 = first stream, channel 1 = second stream, etc. + * This lets Corti attribute speech to the correct participant without + * relying on diarization. + * + * @param mediaStreams Array of MediaStreams to merge. Each must have at + * least one audio track. + * @returns An object with: + * - `stream` — the merged MediaStream to feed into MediaRecorder + * - `endStream` — cleanup function that stops tracks and closes the AudioContext + */ +export function mergeMediaStreams( + mediaStreams: MediaStream[] +): { stream: MediaStream; endStream: () => void } { + if (!mediaStreams.length) { + throw new Error("No media streams provided"); + } + + // Validate every stream has audio before we start wiring things up. + mediaStreams.forEach((stream, index) => { + if (!stream.getAudioTracks().length) { + throw new Error( + `MediaStream at index ${index} does not have an audio track` + ); + } + }); + + // Create an AudioContext and a ChannelMerger with one input per stream. + const audioContext = new AudioContext(); + const audioDestination = audioContext.createMediaStreamDestination(); + const channelMerger = audioContext.createChannelMerger(mediaStreams.length); + + // Wire each stream's first audio output into its dedicated merger channel. + mediaStreams.forEach((stream, index) => { + const source = audioContext.createMediaStreamSource(stream); + source.connect(channelMerger, 0, index); + }); + + channelMerger.connect(audioDestination); + + return { + stream: audioDestination.stream, + endStream: () => { + audioDestination.stream.getAudioTracks().forEach((track) => track.stop()); + audioContext.close(); + }, + }; +} diff --git a/Web/AmbientScribe/client.ts b/Web/AmbientScribe/client.ts new file mode 100644 index 0000000..0ad0529 --- /dev/null +++ b/Web/AmbientScribe/client.ts @@ -0,0 +1,207 @@ +/** + * client.ts — Browser-side AmbientScribe demo. + * + * Supports two modes toggled from index.html: + * + * "single" — Single microphone with automatic speaker diarization. + * Uses only getMicrophoneStream(). + * + * "virtual" — Virtual consultation (doctor + patient). + * Uses getMicrophoneStream() for the local doctor mic and + * getRemoteParticipantStream() for the patient's WebRTC audio, + * then merges them into a multi-channel stream so Corti can + * attribute speech to each participant without diarization. + * + * All transcript and fact events are logged to the browser console. + */ + +import { CortiClient, CortiEnvironment } from "@corti/sdk"; +import { + getMicrophoneStream, + getRemoteParticipantStream, + mergeMediaStreams, +} from "./audio"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +type Mode = "single" | "virtual"; + +/** Everything we need to tear down a running session. */ +interface ActiveSession { + endCall: () => void; +} + +// --------------------------------------------------------------------------- +// Session lifecycle +// --------------------------------------------------------------------------- + +/** + * Starts a streaming session in the chosen mode. + * + * 1. Creates a CortiClient using the scoped access token from the server. + * 2. Connects to the streaming WebSocket via client.stream.connect(). + * 3. Acquires the appropriate audio stream(s) depending on the mode. + * 4. Pipes audio to Corti in 200 ms chunks via MediaRecorder. + * 5. Listens for transcript / fact events and logs them. + * + * @param accessToken Stream-scoped token received from the server. + * @param interactionId Interaction ID received from the server. + * @param mode "single" for one mic, "virtual" for doctor + patient. + * @param peerConnection Required when mode is "virtual" — the RTCPeerConnection + * carrying the remote participant's audio. + * @returns An object with an `endCall()` method for cleanup. + */ +async function startSession( + accessToken: string, + interactionId: string, + mode: Mode, + peerConnection?: RTCPeerConnection +): Promise { + // -- 1. Create a client scoped to streaming only ------------------------- + const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: "YOUR_TENANT_NAME", + auth: { + accessToken, // Token with "stream" scope only + }, + }); + + // With a stream-scoped token these would fail: + // await client.interactions.list(); // outside scope + // await client.transcribe.connect({ id: "..." }); // outside scope + + // -- 2. Connect to the Corti streaming WebSocket ------------------------- + const streamSocket = await client.stream.connect({ id: interactionId }); + + // -- 3. Acquire audio depending on mode ---------------------------------- + // "single" → just the local microphone + // "virtual" → local mic + remote WebRTC audio, merged into one stream + + const microphoneStream = await getMicrophoneStream(); + console.log(`[${mode}] Microphone stream acquired`); + + // audioStream is what we feed into MediaRecorder. + // endMergedStream is only set when we merge (virtual mode). + let audioStream: MediaStream; + let endMergedStream: (() => void) | undefined; + + if (mode === "virtual") { + if (!peerConnection) { + throw new Error("Virtual mode requires an RTCPeerConnection"); + } + + const remoteStream = getRemoteParticipantStream(peerConnection); + console.log("[virtual] Remote participant stream acquired"); + + // Merge: channel 0 = doctor (mic), channel 1 = patient (WebRTC) + const merged = mergeMediaStreams([microphoneStream, remoteStream]); + audioStream = merged.stream; + endMergedStream = merged.endStream; + } else { + audioStream = microphoneStream; + } + + // -- 4. Stream audio to Corti in 200 ms chunks -------------------------- + const mediaRecorder = new MediaRecorder(audioStream); + + mediaRecorder.ondataavailable = (event: BlobEvent) => { + if (event.data.size > 0) { + streamSocket.send(event.data); + } + }; + + mediaRecorder.start(200); + console.log(`[${mode}] MediaRecorder started — streaming audio to Corti`); + + // -- 5. Handle incoming events ------------------------------------------- + streamSocket.on("transcript", (data) => { + console.log("Transcript:", data); + }); + + streamSocket.on("fact", (data) => { + console.log("Fact:", data); + }); + + // -- 6. Return cleanup function ------------------------------------------ + return { + endCall: () => { + // Stop recording + if (mediaRecorder.state !== "inactive") { + mediaRecorder.stop(); + } + + // Close the stream socket + streamSocket.close(); + + // Release the merged stream (virtual mode only) + endMergedStream?.(); + + // Release the raw microphone track(s) + microphoneStream.getAudioTracks().forEach((track) => track.stop()); + + console.log(`[${mode}] Call ended — all resources cleaned up`); + }, + }; +} + +// --------------------------------------------------------------------------- +// UI wiring (called from index.html) +// --------------------------------------------------------------------------- + +let activeSession: ActiveSession | null = null; + +/** + * Fetches session credentials from the server and starts streaming. + * Reads the selected mode from the radio buttons in index.html. + */ +async function handleStart() { + // Read selected mode from the radio group + const modeInput = document.querySelector( + 'input[name="mode"]:checked' + ); + const mode: Mode = (modeInput?.value as Mode) ?? "single"; + + try { + // Fetch interaction ID + scoped token from the server (see server.ts) + const response = await fetch("/api/start-session", { method: "POST" }); + const { interactionId, accessToken } = await response.json(); + + // In virtual mode you would pass a real RTCPeerConnection here. + // For this demo we pass undefined — replace with your WebRTC connection. + const peerConnection = mode === "virtual" ? new RTCPeerConnection() : undefined; + + activeSession = await startSession( + accessToken, + interactionId, + mode, + peerConnection + ); + + // Update button states + setButtonStates(true); + console.log(`Session started in "${mode}" mode`); + } catch (err) { + console.error("Failed to start session:", err); + } +} + +/** Ends the active session and releases all resources. */ +function handleEnd() { + activeSession?.endCall(); + activeSession = null; + setButtonStates(false); +} + +/** Toggle Start / End button enabled states. */ +function setButtonStates(isRunning: boolean) { + const startBtn = document.getElementById("start-call") as HTMLButtonElement; + const endBtn = document.getElementById("end-call") as HTMLButtonElement; + if (startBtn) startBtn.disabled = isRunning; + if (endBtn) endBtn.disabled = !isRunning; +} + +// Attach handlers once the DOM is ready. +document.getElementById("start-call")?.addEventListener("click", handleStart); +document.getElementById("end-call")?.addEventListener("click", handleEnd); diff --git a/Web/AmbientScribe/index.html b/Web/AmbientScribe/index.html new file mode 100644 index 0000000..2617b4b --- /dev/null +++ b/Web/AmbientScribe/index.html @@ -0,0 +1,37 @@ + + + + + Corti AmbientScribe Demo + + + +

AmbientScribe

+ + +
+ Mode + +
+ +
+ + + + + +

Open the browser console to see transcripts and facts.

+ + + + diff --git a/Web/AmbientScribe/server.ts b/Web/AmbientScribe/server.ts new file mode 100644 index 0000000..168e59d --- /dev/null +++ b/Web/AmbientScribe/server.ts @@ -0,0 +1,93 @@ +/** + * server.ts — Server-side setup for AmbientScribe. + * + * Runs on your backend (Node.js / Express / etc.). Responsible for: + * 1. Creating a fully-privileged CortiClient using OAuth2 client credentials. + * 2. Creating an interaction via the REST API. + * 3. Minting a scoped stream token that can be safely sent to the browser. + * + * IMPORTANT: Client credentials (CLIENT_ID / CLIENT_SECRET) must NEVER be + * exposed to the browser. Only the scoped stream token is sent to the client. + */ + +import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; +import { randomUUID } from "crypto"; + +// --------------------------------------------------------------------------- +// Configuration — replace with your own values or load from environment +// --------------------------------------------------------------------------- + +const TENANT_NAME = "YOUR_TENANT_NAME"; +const CLIENT_ID = "YOUR_CLIENT_ID"; +const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; + +// --------------------------------------------------------------------------- +// 1. Create a CortiClient authenticated with client credentials (OAuth2). +// This client has full API access and must only be used server-side. +// --------------------------------------------------------------------------- + +const client = new CortiClient({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + auth: { + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + }, +}); + +// --------------------------------------------------------------------------- +// 2. Create an interaction. +// An interaction represents a single clinical encounter / session. +// --------------------------------------------------------------------------- + +async function createInteraction() { + const interaction = await client.interactions.create({ + encounter: { + identifier: randomUUID(), + status: "planned", + type: "first_consultation", + }, + }); + + console.log("Interaction created:", interaction.id); + return interaction; +} + +// --------------------------------------------------------------------------- +// 3. Mint a scoped token with only the "stream" scope. +// This token lets the client connect to the streaming WebSocket but +// cannot list interactions, create documents, or call any other REST +// endpoint — keeping the blast radius minimal if it leaks. +// --------------------------------------------------------------------------- + +async function getScopedStreamToken() { + const auth = new CortiAuth({ + environment: CortiEnvironment.Eu, + tenantName: TENANT_NAME, + }); + + const streamToken = await auth.getToken({ + clientId: CLIENT_ID, + clientSecret: CLIENT_SECRET, + scopes: ["stream"], + }); + + return streamToken; +} + +// --------------------------------------------------------------------------- +// Example: Express endpoint that hands the client everything it needs +// --------------------------------------------------------------------------- +// +// app.post("/api/start-session", async (req, res) => { +// const interaction = await createInteraction(); +// const streamToken = await getScopedStreamToken(); +// +// // The client only receives the interaction ID and a limited-scope token. +// res.json({ +// interactionId: interaction.id, +// accessToken: streamToken.accessToken, +// }); +// }); + +export { createInteraction, getScopedStreamToken }; diff --git a/Web/AmbientScribe/singleMicrophone/audio.ts b/Web/AmbientScribe/singleMicrophone/audio.ts deleted file mode 100644 index 9212678..0000000 --- a/Web/AmbientScribe/singleMicrophone/audio.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Retrieves the user's microphone MediaStream. - * If a device ID is provided, attempts to use that specific microphone, - * otherwise uses the default audio input device. - */ -export async function getMicrophoneStream( - deviceId?: string -): Promise { - if (!navigator.mediaDevices) { - throw new Error("Media Devices API not supported in this browser"); - } - - return navigator.mediaDevices.getUserMedia({ - audio: deviceId ? { deviceId: { exact: deviceId } } : true, - }); -} diff --git a/Web/AmbientScribe/singleMicrophone/client.ts b/Web/AmbientScribe/singleMicrophone/client.ts deleted file mode 100644 index f02bce2..0000000 --- a/Web/AmbientScribe/singleMicrophone/client.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { CortiClient, CortiEnvironment } from "@corti/sdk"; -import { getMicrophoneStream } from "./audio"; - -// Client-side: receives a scoped token and interaction ID from the server, -// then connects to the Corti streaming API to send audio and receive -// real-time transcripts and facts. - -async function startSession(accessToken: string, interactionId: string) { - // 1. Create client with scoped token (stream scope only) - const client = new CortiClient({ - environment: CortiEnvironment.Eu, - tenantName: "YOUR_TENANT_NAME", - auth: { - accessToken: accessToken, // Token with "stream" scope - }, - }); - - // Note: with a stream-scoped token, only streaming operations are allowed. - // await client.interactions.list(); // Would fail - outside token scope - // await client.transcribe.connect(); // Would fail - outside token scope - - // 2. Connect to the stream - const streamSocket = await client.stream.connect({ id: interactionId }); - - // 3. Get microphone audio - const microphoneStream = await getMicrophoneStream(); - - // 4. Send audio data via MediaRecorder in 200ms chunks - const mediaRecorder = new MediaRecorder(microphoneStream); - mediaRecorder.ondataavailable = (event: BlobEvent) => { - if (event.data.size > 0) { - streamSocket.send(event.data); - } - }; - mediaRecorder.start(200); - - // 5. Handle incoming events - streamSocket.on("transcript", (data) => { - console.log("Transcript:", data); - }); - - streamSocket.on("fact", (data) => { - console.log("Fact:", data); - }); - - console.log("Streaming started for interaction:", interactionId); - - // 6. Return cleanup function - return { - endCall: () => { - if (mediaRecorder.state !== "inactive") { - mediaRecorder.stop(); - } - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - streamSocket.close(); - console.log("Call ended and resources cleaned up."); - }, - }; -} - -// --- Usage --- - -async function main() { - // Fetch session credentials from your server - const response = await fetch("/api/start-session", { method: "POST" }); - const { interactionId, accessToken } = await response.json(); - - const { endCall } = await startSession(accessToken, interactionId); - - // Wire up the end call button - document.getElementById("end-call")?.addEventListener("click", () => { - endCall(); - (document.getElementById("end-call") as HTMLButtonElement).disabled = true; - }); -} - -main().catch(console.error); diff --git a/Web/AmbientScribe/singleMicrophone/index.html b/Web/AmbientScribe/singleMicrophone/index.html deleted file mode 100644 index 219a95e..0000000 --- a/Web/AmbientScribe/singleMicrophone/index.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - Corti - Single Microphone - - -

Single Microphone - AmbientScribe

- -

Open the browser console to see transcripts and facts.

- - - diff --git a/Web/AmbientScribe/singleMicrophone/server.ts b/Web/AmbientScribe/singleMicrophone/server.ts deleted file mode 100644 index 8408371..0000000 --- a/Web/AmbientScribe/singleMicrophone/server.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; -import { randomUUID } from "crypto"; - -// Server-side: handles authentication and sensitive API calls. -// Client credentials must NEVER be exposed to the browser. - -const TENANT_NAME = "YOUR_TENANT_NAME"; -const CLIENT_ID = "YOUR_CLIENT_ID"; -const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; - -// 1. Create Corti client with client credentials (OAuth2) -const client = new CortiClient({ - environment: CortiEnvironment.Eu, - tenantName: TENANT_NAME, - auth: { - clientId: CLIENT_ID, - clientSecret: CLIENT_SECRET, - }, -}); - -// 2. Create an interaction -async function createInteraction() { - const interaction = await client.interactions.create({ - encounter: { - identifier: randomUUID(), - status: "planned", - type: "first_consultation", - }, - }); - - console.log("Interaction created:", interaction.id); - return interaction; -} - -// 3. Get a scoped token for WebSocket streaming (stream scope only). -// This token can safely be sent to the client since it only grants -// access to the streaming endpoint, not the full API. -async function getScopedStreamToken() { - const auth = new CortiAuth({ - environment: CortiEnvironment.Eu, - tenantName: TENANT_NAME, - }); - - const streamToken = await auth.getToken({ - clientId: CLIENT_ID, - clientSecret: CLIENT_SECRET, - scopes: ["stream"], - }); - - return streamToken; -} - -// Example: Express endpoint that provides the client with an interaction ID + scoped token -// -// app.post("/api/start-session", async (req, res) => { -// const interaction = await createInteraction(); -// const streamToken = await getScopedStreamToken(); -// res.json({ -// interactionId: interaction.id, -// accessToken: streamToken.accessToken, -// }); -// }); - -export { createInteraction, getScopedStreamToken }; diff --git a/Web/AmbientScribe/virtualConsultations/audio.ts b/Web/AmbientScribe/virtualConsultations/audio.ts deleted file mode 100644 index f456687..0000000 --- a/Web/AmbientScribe/virtualConsultations/audio.ts +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Retrieves the user's local microphone MediaStream. - * If a device ID is provided, attempts to use that specific microphone, - * otherwise uses the default audio input device. - */ -export async function getMicrophoneStream( - deviceId?: string -): Promise { - if (!navigator.mediaDevices) { - throw new Error("Media Devices API not supported in this browser"); - } - - return navigator.mediaDevices.getUserMedia({ - audio: deviceId ? { deviceId: { exact: deviceId } } : true, - }); -} - -/** - * Extracts the remote participant's audio MediaStream from a WebRTC peer connection. - * Use this to capture the other party's audio in a virtual consultation. - */ -export function getRemoteParticipantStream( - peerConnection: RTCPeerConnection -): MediaStream { - const remoteStream = new MediaStream(); - - for (const receiver of peerConnection.getReceivers()) { - if (receiver.track.kind === "audio") { - remoteStream.addTrack(receiver.track); - } - } - - if (!remoteStream.getAudioTracks().length) { - throw new Error("No remote audio tracks found on the peer connection"); - } - - return remoteStream; -} diff --git a/Web/AmbientScribe/virtualConsultations/client.ts b/Web/AmbientScribe/virtualConsultations/client.ts deleted file mode 100644 index fb54365..0000000 --- a/Web/AmbientScribe/virtualConsultations/client.ts +++ /dev/null @@ -1,124 +0,0 @@ -import { CortiClient, CortiEnvironment } from "@corti/sdk"; -import { getMicrophoneStream, getRemoteParticipantStream } from "./audio"; - -// Client-side: receives a scoped token and interaction ID from the server, -// then connects to the Corti streaming API with merged doctor + patient audio. - -/** - * Merges multiple audio MediaStreams into a single multi-channel MediaStream. - * Each input stream is mapped to a separate channel (e.g., channel 0 = doctor, - * channel 1 = patient). - */ -function mergeMediaStreams( - mediaStreams: MediaStream[] -): { stream: MediaStream; endStream: () => void } { - const audioContext = new AudioContext(); - const audioDestination = audioContext.createMediaStreamDestination(); - const channelMerger = audioContext.createChannelMerger(mediaStreams.length); - - mediaStreams.forEach((stream, index) => { - const source = audioContext.createMediaStreamSource(stream); - source.connect(channelMerger, 0, index); - }); - - channelMerger.connect(audioDestination); - - return { - stream: audioDestination.stream, - endStream: () => { - audioDestination.stream.getAudioTracks().forEach((track) => track.stop()); - audioContext.close(); - }, - }; -} - -async function startSession( - accessToken: string, - interactionId: string, - peerConnection: RTCPeerConnection -) { - // 1. Create client with scoped token (stream scope only) - const client = new CortiClient({ - environment: CortiEnvironment.Eu, - tenantName: "YOUR_TENANT_NAME", - auth: { - accessToken: accessToken, // Token with "stream" scope - }, - }); - - // Note: with a stream-scoped token, only streaming operations are allowed. - // await client.interactions.list(); // Would fail - outside token scope - // await client.transcribe.connect(); // Would fail - outside token scope - - // 2. Connect to the stream - const streamSocket = await client.stream.connect({ id: interactionId }); - - // 3. Get both audio streams - const microphoneStream = await getMicrophoneStream(); - const remoteStream = getRemoteParticipantStream(peerConnection); - - // 4. Merge streams: channel 0 = doctor (local mic), channel 1 = patient (remote) - const { stream: mergedStream, endStream } = mergeMediaStreams([ - microphoneStream, - remoteStream, - ]); - - // 5. Send merged audio data via MediaRecorder in 200ms chunks - const mediaRecorder = new MediaRecorder(mergedStream); - mediaRecorder.ondataavailable = (event: BlobEvent) => { - if (event.data.size > 0) { - streamSocket.send(event.data); - } - }; - mediaRecorder.start(200); - - // 6. Handle incoming events - streamSocket.on("transcript", (data) => { - console.log("Transcript:", data); - }); - - streamSocket.on("fact", (data) => { - console.log("Fact:", data); - }); - - console.log("Streaming started for interaction:", interactionId); - - // 7. Return cleanup function - return { - endCall: () => { - if (mediaRecorder.state !== "inactive") { - mediaRecorder.stop(); - } - endStream(); - microphoneStream.getAudioTracks().forEach((track) => track.stop()); - remoteStream.getAudioTracks().forEach((track) => track.stop()); - streamSocket.close(); - console.log("Call ended and resources cleaned up."); - }, - }; -} - -// --- Usage --- - -async function main() { - // Fetch session credentials from your server - const response = await fetch("/api/start-session", { method: "POST" }); - const { interactionId, accessToken } = await response.json(); - - // In a real app, this would come from your WebRTC setup - const peerConnection = new RTCPeerConnection(); - - const { endCall } = await startSession( - accessToken, - interactionId, - peerConnection - ); - - // Wire up the end call button - document.getElementById("end-call")?.addEventListener("click", () => { - endCall(); - (document.getElementById("end-call") as HTMLButtonElement).disabled = true; - }); -} - -main().catch(console.error); diff --git a/Web/AmbientScribe/virtualConsultations/index.html b/Web/AmbientScribe/virtualConsultations/index.html deleted file mode 100644 index 9098443..0000000 --- a/Web/AmbientScribe/virtualConsultations/index.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - Corti - Virtual Consultations - - -

Virtual Consultations - AmbientScribe

- -

Open the browser console to see transcripts and facts.

- - - diff --git a/Web/AmbientScribe/virtualConsultations/server.ts b/Web/AmbientScribe/virtualConsultations/server.ts deleted file mode 100644 index 8408371..0000000 --- a/Web/AmbientScribe/virtualConsultations/server.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; -import { randomUUID } from "crypto"; - -// Server-side: handles authentication and sensitive API calls. -// Client credentials must NEVER be exposed to the browser. - -const TENANT_NAME = "YOUR_TENANT_NAME"; -const CLIENT_ID = "YOUR_CLIENT_ID"; -const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; - -// 1. Create Corti client with client credentials (OAuth2) -const client = new CortiClient({ - environment: CortiEnvironment.Eu, - tenantName: TENANT_NAME, - auth: { - clientId: CLIENT_ID, - clientSecret: CLIENT_SECRET, - }, -}); - -// 2. Create an interaction -async function createInteraction() { - const interaction = await client.interactions.create({ - encounter: { - identifier: randomUUID(), - status: "planned", - type: "first_consultation", - }, - }); - - console.log("Interaction created:", interaction.id); - return interaction; -} - -// 3. Get a scoped token for WebSocket streaming (stream scope only). -// This token can safely be sent to the client since it only grants -// access to the streaming endpoint, not the full API. -async function getScopedStreamToken() { - const auth = new CortiAuth({ - environment: CortiEnvironment.Eu, - tenantName: TENANT_NAME, - }); - - const streamToken = await auth.getToken({ - clientId: CLIENT_ID, - clientSecret: CLIENT_SECRET, - scopes: ["stream"], - }); - - return streamToken; -} - -// Example: Express endpoint that provides the client with an interaction ID + scoped token -// -// app.post("/api/start-session", async (req, res) => { -// const interaction = await createInteraction(); -// const streamToken = await getScopedStreamToken(); -// res.json({ -// interactionId: interaction.id, -// accessToken: streamToken.accessToken, -// }); -// }); - -export { createInteraction, getScopedStreamToken }; From f64c902298e9d74339470f36c6cf64d6124e1c5b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:41:13 +0000 Subject: [PATCH 03/10] Add getDisplayMediaStream as alternative remote source for virtual mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit audio.ts now exposes three stream methods: - getMicrophoneStream() — local mic - getRemoteParticipantStream() — WebRTC peer connection - getDisplayMediaStream() — screen/tab audio via getDisplayMedia (for when the video-call app runs in another tab) index.html shows a remote-source radio group when virtual mode is selected, letting the user choose between WebRTC and display capture. https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/README.md | 20 ++++++++-- Web/AmbientScribe/audio.ts | 59 +++++++++++++++++++++++++++++- Web/AmbientScribe/client.ts | 71 ++++++++++++++++++++++++++++-------- Web/AmbientScribe/index.html | 24 ++++++++++++ 4 files changed, 153 insertions(+), 21 deletions(-) diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index 91c3504..0ef8575 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -3,7 +3,7 @@ A single demo app using the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription** and **fact-based documentation**. Toggle between two modes from the UI: - **Single Microphone** – one audio source with automatic speaker diarization. -- **Virtual Consultation** – local microphone (doctor) + WebRTC stream (patient) merged into a multi-channel stream. +- **Virtual Consultation** – local microphone (doctor) + remote audio (patient) merged into a multi-channel stream. The remote audio can come from either a **WebRTC peer connection** or **screen/tab capture** (`getDisplayMedia`). The demo is split into **server** (auth, interaction management) and **client** (audio capture, streaming, event display). @@ -21,7 +21,7 @@ npm i @corti/sdk AmbientScribe/ server.ts # Server-side: OAuth2 auth, interaction creation, scoped token client.ts # Client-side: stream connection, audio capture, event handling - audio.ts # Audio utilities: getMicrophoneStream(), getRemoteParticipantStream(), mergeMediaStreams() + audio.ts # Audio utilities: getMicrophoneStream(), getRemoteParticipantStream(), getDisplayMediaStream(), mergeMediaStreams() index.html # Minimal UI with mode toggle (output goes to console) README.md ``` @@ -66,15 +66,19 @@ const streamToken = await auth.getToken({ ## Audio Utilities (`audio.ts`) -Two methods for obtaining audio streams, plus a merge utility: +Three methods for obtaining audio streams, plus a merge utility: ```ts // 1. Local microphone const micStream = await getMicrophoneStream(); -// 2. Remote participant from a WebRTC peer connection +// 2a. Remote participant from a WebRTC peer connection const remoteStream = getRemoteParticipantStream(peerConnection); +// 2b. OR: screen / tab capture (alternative when you don't control the peer connection, +// e.g. the video-call app runs in another browser tab) +const remoteStream = await getDisplayMediaStream(); + // 3. Merge into a single multi-channel stream (virtual consultation mode) const { stream, endStream } = mergeMediaStreams([micStream, remoteStream]); ``` @@ -116,10 +120,17 @@ mediaRecorder.start(200); ### Virtual Consultation Mode +The remote audio source is selected from the UI — either a WebRTC peer connection or screen/tab capture: + ```ts const microphoneStream = await getMicrophoneStream(); + +// Option A: WebRTC const remoteStream = getRemoteParticipantStream(peerConnection); +// Option B: Screen / tab capture (getDisplayMedia) +const remoteStream = await getDisplayMediaStream(); + // channel 0 = doctor, channel 1 = patient const { stream, endStream } = mergeMediaStreams([microphoneStream, remoteStream]); @@ -142,6 +153,7 @@ streamSocket.on("fact", (data) => console.log("Fact:", data)); A minimal page with: - Radio buttons to toggle between **Single Microphone** and **Virtual Consultation** mode. +- When **Virtual Consultation** is selected, a second radio group appears to choose between **WebRTC** and **Screen / tab capture** as the remote audio source. - **Start Call** / **End Call** buttons. - All output goes to the browser console. diff --git a/Web/AmbientScribe/audio.ts b/Web/AmbientScribe/audio.ts index b5ef052..44163e3 100644 --- a/Web/AmbientScribe/audio.ts +++ b/Web/AmbientScribe/audio.ts @@ -1,9 +1,12 @@ /** * audio.ts — Audio stream utilities for AmbientScribe. * - * Exposes two methods for obtaining audio streams: + * Exposes three methods for obtaining audio streams: * 1. getMicrophoneStream() — local microphone (works in both modes) * 2. getRemoteParticipantStream() — remote party via WebRTC (virtual consultations) + * 3. getDisplayMediaStream() — screen/tab/window audio via getDisplayMedia + * (alternative to WebRTC for virtual consultations, + * e.g. capturing audio from a video-call app) * * Also provides mergeMediaStreams() for combining multiple streams into a * single multi-channel stream before sending to Corti. @@ -66,7 +69,59 @@ export function getRemoteParticipantStream( } // --------------------------------------------------------------------------- -// 3. Stream merging (used in virtual consultation mode) +// 3. Screen / tab audio capture (getDisplayMedia) +// --------------------------------------------------------------------------- + +/** + * Captures audio from a screen, window, or browser tab using getDisplayMedia. + * + * This is an alternative to getRemoteParticipantStream() for virtual + * consultations where the remote party's audio comes through a video-call + * app running in another tab or window rather than a direct WebRTC + * peer connection you control. + * + * The browser will show a picker dialog asking which screen/tab to share. + * We request both audio and video (some browsers require video to be + * requested for tab audio to work) and then strip the video track so only + * the audio track remains. + * + * @returns A MediaStream containing only the audio track from the selected + * screen / tab / window. + * @throws If the browser doesn't support getDisplayMedia, the user cancels + * the picker, or the selected source has no audio track. + */ +export async function getDisplayMediaStream(): Promise { + if (!navigator.mediaDevices?.getDisplayMedia) { + throw new Error("getDisplayMedia is not supported in this browser"); + } + + // Request both audio and video — some browsers (e.g. Chrome) only expose + // tab audio when video is also requested. + const stream = await navigator.mediaDevices.getDisplayMedia({ + audio: true, + video: true, + }); + + // Remove all video tracks — we only need the audio. + for (const track of stream.getTracks()) { + if (track.kind === "video") { + track.stop(); + stream.removeTrack(track); + } + } + + if (!stream.getAudioTracks().length) { + throw new Error( + "The selected source does not have an audio track. " + + "Make sure to pick a browser tab that is playing audio." + ); + } + + return stream; +} + +// --------------------------------------------------------------------------- +// 4. Stream merging (used in virtual consultation mode) // --------------------------------------------------------------------------- /** diff --git a/Web/AmbientScribe/client.ts b/Web/AmbientScribe/client.ts index 0ad0529..b5a584c 100644 --- a/Web/AmbientScribe/client.ts +++ b/Web/AmbientScribe/client.ts @@ -7,10 +7,17 @@ * Uses only getMicrophoneStream(). * * "virtual" — Virtual consultation (doctor + patient). - * Uses getMicrophoneStream() for the local doctor mic and - * getRemoteParticipantStream() for the patient's WebRTC audio, - * then merges them into a multi-channel stream so Corti can - * attribute speech to each participant without diarization. + * Uses getMicrophoneStream() for the local doctor mic and a + * remote audio source for the patient, then merges them into a + * multi-channel stream so Corti can attribute speech to each + * participant without diarization. + * + * The remote source can come from either: + * - "webrtc" — an RTCPeerConnection (getRemoteParticipantStream) + * - "display" — screen/tab capture (getDisplayMediaStream), + * useful when the video-call app is running in + * another tab and you don't have direct access + * to the peer connection. * * All transcript and fact events are logged to the browser console. */ @@ -19,6 +26,7 @@ import { CortiClient, CortiEnvironment } from "@corti/sdk"; import { getMicrophoneStream, getRemoteParticipantStream, + getDisplayMediaStream, mergeMediaStreams, } from "./audio"; @@ -28,6 +36,9 @@ import { type Mode = "single" | "virtual"; +/** How the remote participant's audio is captured in virtual mode. */ +type RemoteSource = "webrtc" | "display"; + /** Everything we need to tear down a running session. */ interface ActiveSession { endCall: () => void; @@ -49,14 +60,18 @@ interface ActiveSession { * @param accessToken Stream-scoped token received from the server. * @param interactionId Interaction ID received from the server. * @param mode "single" for one mic, "virtual" for doctor + patient. - * @param peerConnection Required when mode is "virtual" — the RTCPeerConnection - * carrying the remote participant's audio. + * @param remoteSource How to capture the remote participant's audio in + * virtual mode: "webrtc" or "display". Ignored in + * single mode. + * @param peerConnection Required when remoteSource is "webrtc" — the + * RTCPeerConnection carrying the remote audio. * @returns An object with an `endCall()` method for cleanup. */ async function startSession( accessToken: string, interactionId: string, mode: Mode, + remoteSource: RemoteSource = "webrtc", peerConnection?: RTCPeerConnection ): Promise { // -- 1. Create a client scoped to streaming only ------------------------- @@ -77,7 +92,7 @@ async function startSession( // -- 3. Acquire audio depending on mode ---------------------------------- // "single" → just the local microphone - // "virtual" → local mic + remote WebRTC audio, merged into one stream + // "virtual" → local mic + remote audio (WebRTC or display), merged const microphoneStream = await getMicrophoneStream(); console.log(`[${mode}] Microphone stream acquired`); @@ -85,17 +100,29 @@ async function startSession( // audioStream is what we feed into MediaRecorder. // endMergedStream is only set when we merge (virtual mode). let audioStream: MediaStream; + let remoteStream: MediaStream | undefined; let endMergedStream: (() => void) | undefined; if (mode === "virtual") { - if (!peerConnection) { - throw new Error("Virtual mode requires an RTCPeerConnection"); + // Get the remote participant's audio from the chosen source. + if (remoteSource === "display") { + // Screen / tab capture — the browser will show a picker dialog. + // Useful when the video-call runs in another tab and you don't + // have direct access to the peer connection. + remoteStream = await getDisplayMediaStream(); + console.log("[virtual:display] Display media stream acquired"); + } else { + // WebRTC — pull audio tracks from an existing peer connection. + if (!peerConnection) { + throw new Error( + 'Virtual mode with remoteSource "webrtc" requires an RTCPeerConnection' + ); + } + remoteStream = getRemoteParticipantStream(peerConnection); + console.log("[virtual:webrtc] Remote participant stream acquired"); } - const remoteStream = getRemoteParticipantStream(peerConnection); - console.log("[virtual] Remote participant stream acquired"); - - // Merge: channel 0 = doctor (mic), channel 1 = patient (WebRTC) + // Merge: channel 0 = doctor (mic), channel 1 = patient (remote) const merged = mergeMediaStreams([microphoneStream, remoteStream]); audioStream = merged.stream; endMergedStream = merged.endStream; @@ -138,6 +165,9 @@ async function startSession( // Release the merged stream (virtual mode only) endMergedStream?.(); + // Release the remote stream tracks (virtual mode only) + remoteStream?.getAudioTracks().forEach((track) => track.stop()); + // Release the raw microphone track(s) microphoneStream.getAudioTracks().forEach((track) => track.stop()); @@ -163,19 +193,30 @@ async function handleStart() { ); const mode: Mode = (modeInput?.value as Mode) ?? "single"; + // Read remote source preference (only relevant in virtual mode) + const remoteSourceInput = document.querySelector( + 'input[name="remote-source"]:checked' + ); + const remoteSource: RemoteSource = + (remoteSourceInput?.value as RemoteSource) ?? "webrtc"; + try { // Fetch interaction ID + scoped token from the server (see server.ts) const response = await fetch("/api/start-session", { method: "POST" }); const { interactionId, accessToken } = await response.json(); - // In virtual mode you would pass a real RTCPeerConnection here. + // In virtual/webrtc mode you would pass a real RTCPeerConnection here. // For this demo we pass undefined — replace with your WebRTC connection. - const peerConnection = mode === "virtual" ? new RTCPeerConnection() : undefined; + const peerConnection = + mode === "virtual" && remoteSource === "webrtc" + ? new RTCPeerConnection() + : undefined; activeSession = await startSession( accessToken, interactionId, mode, + remoteSource, peerConnection ); diff --git a/Web/AmbientScribe/index.html b/Web/AmbientScribe/index.html index 2617b4b..e85514e 100644 --- a/Web/AmbientScribe/index.html +++ b/Web/AmbientScribe/index.html @@ -26,6 +26,30 @@

AmbientScribe

+ + + + + From 1552ec767f79005b18c7d38cd688c206cac00346 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:42:10 +0000 Subject: [PATCH 04/10] Add Express server with POST /api/start-session endpoint Replace the commented-out example with a working Express app that serves the static front-end and exposes a single endpoint to create an interaction + scoped stream token. Config reads from environment variables with placeholder fallbacks. https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/server.ts | 66 ++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/Web/AmbientScribe/server.ts b/Web/AmbientScribe/server.ts index 168e59d..43d2fbc 100644 --- a/Web/AmbientScribe/server.ts +++ b/Web/AmbientScribe/server.ts @@ -1,15 +1,20 @@ /** - * server.ts — Server-side setup for AmbientScribe. + * server.ts — Express server for AmbientScribe. * - * Runs on your backend (Node.js / Express / etc.). Responsible for: + * Responsible for: * 1. Creating a fully-privileged CortiClient using OAuth2 client credentials. - * 2. Creating an interaction via the REST API. - * 3. Minting a scoped stream token that can be safely sent to the browser. + * 2. Exposing a POST /api/start-session endpoint that: + * a. Creates an interaction via the Corti REST API. + * b. Mints a scoped stream token (WebSocket access only). + * c. Returns both to the client. + * 3. Serving the static front-end files (index.html, client.ts, audio.ts). * * IMPORTANT: Client credentials (CLIENT_ID / CLIENT_SECRET) must NEVER be * exposed to the browser. Only the scoped stream token is sent to the client. */ +import express from "express"; +import path from "path"; import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; import { randomUUID } from "crypto"; @@ -17,9 +22,10 @@ import { randomUUID } from "crypto"; // Configuration — replace with your own values or load from environment // --------------------------------------------------------------------------- -const TENANT_NAME = "YOUR_TENANT_NAME"; -const CLIENT_ID = "YOUR_CLIENT_ID"; -const CLIENT_SECRET = "YOUR_CLIENT_SECRET"; +const TENANT_NAME = process.env.CORTI_TENANT_NAME ?? "YOUR_TENANT_NAME"; +const CLIENT_ID = process.env.CORTI_CLIENT_ID ?? "YOUR_CLIENT_ID"; +const CLIENT_SECRET = process.env.CORTI_CLIENT_SECRET ?? "YOUR_CLIENT_SECRET"; +const PORT = Number(process.env.PORT ?? 3000); // --------------------------------------------------------------------------- // 1. Create a CortiClient authenticated with client credentials (OAuth2). @@ -36,7 +42,7 @@ const client = new CortiClient({ }); // --------------------------------------------------------------------------- -// 2. Create an interaction. +// 2. Helper: create an interaction. // An interaction represents a single clinical encounter / session. // --------------------------------------------------------------------------- @@ -54,7 +60,7 @@ async function createInteraction() { } // --------------------------------------------------------------------------- -// 3. Mint a scoped token with only the "stream" scope. +// 3. Helper: mint a scoped token with only the "stream" scope. // This token lets the client connect to the streaming WebSocket but // cannot list interactions, create documents, or call any other REST // endpoint — keeping the blast radius minimal if it leaks. @@ -76,18 +82,32 @@ async function getScopedStreamToken() { } // --------------------------------------------------------------------------- -// Example: Express endpoint that hands the client everything it needs +// 4. Express app // --------------------------------------------------------------------------- -// -// app.post("/api/start-session", async (req, res) => { -// const interaction = await createInteraction(); -// const streamToken = await getScopedStreamToken(); -// -// // The client only receives the interaction ID and a limited-scope token. -// res.json({ -// interactionId: interaction.id, -// accessToken: streamToken.accessToken, -// }); -// }); - -export { createInteraction, getScopedStreamToken }; + +const app = express(); + +// Serve the front-end files (index.html, client.ts, audio.ts) from this directory. +app.use(express.static(path.join(__dirname))); + +// POST /api/start-session +// Creates an interaction + scoped token and returns them to the client. +app.post("/api/start-session", async (_req, res) => { + try { + const interaction = await createInteraction(); + const streamToken = await getScopedStreamToken(); + + // The client only receives the interaction ID and a limited-scope token. + res.json({ + interactionId: interaction.id, + accessToken: streamToken.accessToken, + }); + } catch (err) { + console.error("Failed to start session:", err); + res.status(500).json({ error: "Failed to start session" }); + } +}); + +app.listen(PORT, () => { + console.log(`AmbientScribe server listening on http://localhost:${PORT}`); +}); From ce0e77739328d245b6fa68532c183ee9953f9cf8 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:43:32 +0000 Subject: [PATCH 05/10] Rename "Call" to "Consultation" throughout the UI and client code https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/client.ts | 18 +++++++++--------- Web/AmbientScribe/index.html | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Web/AmbientScribe/client.ts b/Web/AmbientScribe/client.ts index b5a584c..c10ba82 100644 --- a/Web/AmbientScribe/client.ts +++ b/Web/AmbientScribe/client.ts @@ -41,7 +41,7 @@ type RemoteSource = "webrtc" | "display"; /** Everything we need to tear down a running session. */ interface ActiveSession { - endCall: () => void; + endConsultation: () => void; } // --------------------------------------------------------------------------- @@ -65,7 +65,7 @@ interface ActiveSession { * single mode. * @param peerConnection Required when remoteSource is "webrtc" — the * RTCPeerConnection carrying the remote audio. - * @returns An object with an `endCall()` method for cleanup. + * @returns An object with an `endConsultation()` method for cleanup. */ async function startSession( accessToken: string, @@ -153,7 +153,7 @@ async function startSession( // -- 6. Return cleanup function ------------------------------------------ return { - endCall: () => { + endConsultation: () => { // Stop recording if (mediaRecorder.state !== "inactive") { mediaRecorder.stop(); @@ -171,7 +171,7 @@ async function startSession( // Release the raw microphone track(s) microphoneStream.getAudioTracks().forEach((track) => track.stop()); - console.log(`[${mode}] Call ended — all resources cleaned up`); + console.log(`[${mode}] Consultation ended — all resources cleaned up`); }, }; } @@ -230,19 +230,19 @@ async function handleStart() { /** Ends the active session and releases all resources. */ function handleEnd() { - activeSession?.endCall(); + activeSession?.endConsultation(); activeSession = null; setButtonStates(false); } /** Toggle Start / End button enabled states. */ function setButtonStates(isRunning: boolean) { - const startBtn = document.getElementById("start-call") as HTMLButtonElement; - const endBtn = document.getElementById("end-call") as HTMLButtonElement; + const startBtn = document.getElementById("start-consultation") as HTMLButtonElement; + const endBtn = document.getElementById("end-consultation") as HTMLButtonElement; if (startBtn) startBtn.disabled = isRunning; if (endBtn) endBtn.disabled = !isRunning; } // Attach handlers once the DOM is ready. -document.getElementById("start-call")?.addEventListener("click", handleStart); -document.getElementById("end-call")?.addEventListener("click", handleEnd); +document.getElementById("start-consultation")?.addEventListener("click", handleStart); +document.getElementById("end-consultation")?.addEventListener("click", handleEnd); diff --git a/Web/AmbientScribe/index.html b/Web/AmbientScribe/index.html index e85514e..82a9f32 100644 --- a/Web/AmbientScribe/index.html +++ b/Web/AmbientScribe/index.html @@ -51,8 +51,8 @@

AmbientScribe

- - + +

Open the browser console to see transcripts and facts.

From c04b59b593554c19c39c0fb625ff187a0b3ac85f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:52:39 +0000 Subject: [PATCH 06/10] Add document creation after consultation ends - Server: new POST /api/create-document endpoint that fetches facts via client.facts.list() then generates a clinical document via client.documents.create() and returns it to the browser - Client: track interactionId across the session lifecycle; after ending a consultation, enable a "Create Document" button that calls the new API and renders the result in the page - HTML: add Create Document button and document output area https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/client.ts | 61 ++++++++++++++++++++++++++++++++---- Web/AmbientScribe/index.html | 4 ++- Web/AmbientScribe/server.ts | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 7 deletions(-) diff --git a/Web/AmbientScribe/client.ts b/Web/AmbientScribe/client.ts index c10ba82..5583b09 100644 --- a/Web/AmbientScribe/client.ts +++ b/Web/AmbientScribe/client.ts @@ -181,6 +181,7 @@ async function startSession( // --------------------------------------------------------------------------- let activeSession: ActiveSession | null = null; +let currentInteractionId: string | null = null; /** * Fetches session credentials from the server and starts streaming. @@ -212,6 +213,8 @@ async function handleStart() { ? new RTCPeerConnection() : undefined; + currentInteractionId = interactionId; + activeSession = await startSession( accessToken, interactionId, @@ -221,7 +224,7 @@ async function handleStart() { ); // Update button states - setButtonStates(true); + setButtonStates("running"); console.log(`Session started in "${mode}" mode`); } catch (err) { console.error("Failed to start session:", err); @@ -232,17 +235,63 @@ async function handleStart() { function handleEnd() { activeSession?.endConsultation(); activeSession = null; - setButtonStates(false); + setButtonStates("stopped"); } -/** Toggle Start / End button enabled states. */ -function setButtonStates(isRunning: boolean) { +/** + * Calls the server to fetch facts and generate a clinical document + * from the consultation that just ended. + */ +async function handleCreateDocument() { + if (!currentInteractionId) { + console.error("No interaction ID available — start a consultation first"); + return; + } + + const createBtn = document.getElementById("create-document") as HTMLButtonElement; + const statusMessage = document.getElementById("status-message") as HTMLElement; + const documentOutput = document.getElementById("document-output") as HTMLPreElement; + + createBtn.disabled = true; + statusMessage.innerHTML = "Creating document…"; + documentOutput.style.display = "none"; + + try { + const response = await fetch("/api/create-document", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ interactionId: currentInteractionId }), + }); + + const { document, error } = await response.json(); + + if (error) { + throw new Error(error); + } + + console.log("Document created:", document); + statusMessage.innerHTML = "Document created successfully."; + documentOutput.textContent = JSON.stringify(document, null, 2); + documentOutput.style.display = ""; + } catch (err) { + console.error("Failed to create document:", err); + statusMessage.innerHTML = "Failed to create document — see console."; + createBtn.disabled = false; + } +} + +/** Update button states based on the consultation lifecycle. */ +function setButtonStates(state: "idle" | "running" | "stopped") { const startBtn = document.getElementById("start-consultation") as HTMLButtonElement; const endBtn = document.getElementById("end-consultation") as HTMLButtonElement; - if (startBtn) startBtn.disabled = isRunning; - if (endBtn) endBtn.disabled = !isRunning; + const createBtn = document.getElementById("create-document") as HTMLButtonElement; + + if (startBtn) startBtn.disabled = state === "running"; + if (endBtn) endBtn.disabled = state !== "running"; + if (createBtn) createBtn.disabled = state !== "stopped"; } // Attach handlers once the DOM is ready. document.getElementById("start-consultation")?.addEventListener("click", handleStart); document.getElementById("end-consultation")?.addEventListener("click", handleEnd); +document.getElementById("create-document")?.addEventListener("click", handleCreateDocument); diff --git a/Web/AmbientScribe/index.html b/Web/AmbientScribe/index.html index 82a9f32..00d7a88 100644 --- a/Web/AmbientScribe/index.html +++ b/Web/AmbientScribe/index.html @@ -53,8 +53,10 @@

AmbientScribe

+ -

Open the browser console to see transcripts and facts.

+

Open the browser console to see transcripts and facts.

+ diff --git a/Web/AmbientScribe/server.ts b/Web/AmbientScribe/server.ts index 43d2fbc..4e548ef 100644 --- a/Web/AmbientScribe/server.ts +++ b/Web/AmbientScribe/server.ts @@ -89,6 +89,7 @@ const app = express(); // Serve the front-end files (index.html, client.ts, audio.ts) from this directory. app.use(express.static(path.join(__dirname))); +app.use(express.json()); // POST /api/start-session // Creates an interaction + scoped token and returns them to the client. @@ -108,6 +109,61 @@ app.post("/api/start-session", async (_req, res) => { } }); +// --------------------------------------------------------------------------- +// 5. POST /api/create-document +// Fetches the facts collected during the consultation, then generates a +// clinical document from them using the Corti Documents API. +// --------------------------------------------------------------------------- + +app.post("/api/create-document", async (req, res) => { + try { + const { interactionId } = req.body; + + if (!interactionId) { + res.status(400).json({ error: "Missing interactionId" }); + return; + } + + // Step 1: Fetch facts collected during the consultation + const facts = await client.facts.list(interactionId); + console.log(`Fetched ${facts.length} facts for interaction ${interactionId}`); + + // Step 2: Map facts into the format expected by the Documents API + const factsContext = facts.map((fact: { text: string; group: string; source: string }) => ({ + text: fact.text, + group: fact.group, + source: fact.source, + })); + + // Step 3: Create a document using the collected facts + const document = await client.documents.create(interactionId, { + context: [ + { + type: "facts", + data: factsContext, + }, + ], + template: { + sections: [ + { key: "corti-hpi" }, + { key: "corti-allergies" }, + { key: "corti-social-history" }, + { key: "corti-plan" }, + ], + }, + outputLanguage: "en", + name: "Consultation Document", + documentationMode: "routed_parallel", + }); + + console.log("Document created:", document); + res.json({ document }); + } catch (err) { + console.error("Failed to create document:", err); + res.status(500).json({ error: "Failed to create document" }); + } +}); + app.listen(PORT, () => { console.log(`AmbientScribe server listening on http://localhost:${PORT}`); }); From 12abf481479493b0935b3cd333ae9afec41c70a8 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 18:54:00 +0000 Subject: [PATCH 07/10] Update README with document generation workflow https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/README.md | 52 ++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index 0ef8575..6614c2a 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -1,11 +1,13 @@ # Corti AI Platform – Live Transcription & Fact-Based Documentation -A single demo app using the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription** and **fact-based documentation**. Toggle between two modes from the UI: +A single demo app using the [`@corti/sdk`](https://www.npmjs.com/package/@corti/sdk) for **live audio transcription**, **fact extraction**, and **clinical document generation**. Toggle between two modes from the UI: - **Single Microphone** – one audio source with automatic speaker diarization. - **Virtual Consultation** – local microphone (doctor) + remote audio (patient) merged into a multi-channel stream. The remote audio can come from either a **WebRTC peer connection** or **screen/tab capture** (`getDisplayMedia`). -The demo is split into **server** (auth, interaction management) and **client** (audio capture, streaming, event display). +After a consultation ends, generate a structured clinical document from the extracted facts with a single click. + +The demo is split into **server** (auth, interaction management, document generation) and **client** (audio capture, streaming, event display, document creation). ## Installation @@ -19,10 +21,10 @@ npm i @corti/sdk ``` AmbientScribe/ - server.ts # Server-side: OAuth2 auth, interaction creation, scoped token - client.ts # Client-side: stream connection, audio capture, event handling + server.ts # Server-side: OAuth2 auth, interaction creation, scoped token, document generation + client.ts # Client-side: stream connection, audio capture, event handling, document creation audio.ts # Audio utilities: getMicrophoneStream(), getRemoteParticipantStream(), getDisplayMediaStream(), mergeMediaStreams() - index.html # Minimal UI with mode toggle (output goes to console) + index.html # Minimal UI with mode toggle, consultation controls, and document output README.md ``` @@ -35,6 +37,7 @@ Runs on your backend. Responsible for: 1. **Creating a `CortiClient`** with OAuth2 client credentials (never exposed to the browser). 2. **Creating an interaction** via the REST API. 3. **Minting a scoped stream token** (only grants WebSocket streaming access). +4. **Generating a clinical document** from the facts collected during a consultation. ```ts import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; @@ -62,6 +65,40 @@ const streamToken = await auth.getToken({ // Send interaction.id + streamToken.accessToken to the client ``` +### Document Generation + +After a consultation ends, the server fetches the extracted facts and generates a structured clinical document: + +```ts +// 1. Fetch facts collected during the consultation +const facts = await client.facts.list(interactionId); + +// 2. Create a document from the facts +const document = await client.documents.create(interactionId, { + context: [ + { + type: "facts", + data: facts.map((fact) => ({ + text: fact.text, + group: fact.group, + source: fact.source, + })), + }, + ], + template: { + sections: [ + { key: "corti-hpi" }, + { key: "corti-allergies" }, + { key: "corti-social-history" }, + { key: "corti-plan" }, + ], + }, + outputLanguage: "en", + name: "Consultation Document", + documentationMode: "routed_parallel", +}); +``` + --- ## Audio Utilities (`audio.ts`) @@ -154,8 +191,9 @@ A minimal page with: - Radio buttons to toggle between **Single Microphone** and **Virtual Consultation** mode. - When **Virtual Consultation** is selected, a second radio group appears to choose between **WebRTC** and **Screen / tab capture** as the remote audio source. -- **Start Call** / **End Call** buttons. -- All output goes to the browser console. +- **Start Consultation** / **End Consultation** buttons to control the streaming session. +- **Create Document** button — enabled after a consultation ends. Calls the server to fetch facts and generate a clinical document, then displays the result on the page. +- Transcript and fact events are logged to the browser console. --- From ebe1d6fea22a0e6bb556de7dc455ba7f532252d5 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 19:04:57 +0000 Subject: [PATCH 08/10] Add minimal setup for easy local development with ts-node - package.json: npm scripts for dev (ts-node) and build (tsc) - tsconfig.json: ES2020 modules and output for browser + Node - .env.example: template for Corti OAuth2 credentials - .gitignore: exclude node_modules, dist, .env files - README: add Quick Start section with 3-step setup process Users can now run the demo in ~5 minutes: copy .env, npm install, npm run dev https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/.env.example | 4 ++++ Web/AmbientScribe/.gitignore | 6 +++++ Web/AmbientScribe/README.md | 40 +++++++++++++++++++++++++++++++-- Web/AmbientScribe/package.json | 26 +++++++++++++++++++++ Web/AmbientScribe/tsconfig.json | 20 +++++++++++++++++ 5 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 Web/AmbientScribe/.env.example create mode 100644 Web/AmbientScribe/.gitignore create mode 100644 Web/AmbientScribe/package.json create mode 100644 Web/AmbientScribe/tsconfig.json diff --git a/Web/AmbientScribe/.env.example b/Web/AmbientScribe/.env.example new file mode 100644 index 0000000..e4835a7 --- /dev/null +++ b/Web/AmbientScribe/.env.example @@ -0,0 +1,4 @@ +CORTI_TENANT_NAME=your_tenant_name_here +CORTI_CLIENT_ID=your_client_id_here +CORTI_CLIENT_SECRET=your_client_secret_here +PORT=3000 diff --git a/Web/AmbientScribe/.gitignore b/Web/AmbientScribe/.gitignore new file mode 100644 index 0000000..94362eb --- /dev/null +++ b/Web/AmbientScribe/.gitignore @@ -0,0 +1,6 @@ +node_modules/ +dist/ +.env +.env.local +*.log +.DS_Store diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index 6614c2a..3698c23 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -9,10 +9,33 @@ After a consultation ends, generate a structured clinical document from the extr The demo is split into **server** (auth, interaction management, document generation) and **client** (audio capture, streaming, event display, document creation). -## Installation +--- + +## Quick Start + +**Prerequisites:** Node.js 18+ + +**Setup (3 steps):** + +```bash +cp .env.example .env +# Edit .env with your Corti credentials (CORTI_TENANT_NAME, CORTI_CLIENT_ID, CORTI_CLIENT_SECRET) + +npm install +npm run dev +``` + +Open http://localhost:3000 in your browser. Transcript and fact events appear in the browser console. + +--- + +## Installation (Manual) + +If setting up without npm: ```bash -npm i @corti/sdk +npm i @corti/sdk express +npm i -D typescript ts-node @types/express @types/node ``` --- @@ -197,6 +220,19 @@ A minimal page with: --- +--- + +## Production Build + +For production deployment, compile and run the server: + +```bash +npm run build # Compile TypeScript to dist/ +npm start # Run compiled server +``` + +--- + ## Resources - [`@corti/sdk` on npm](https://www.npmjs.com/package/@corti/sdk) diff --git a/Web/AmbientScribe/package.json b/Web/AmbientScribe/package.json new file mode 100644 index 0000000..9812e12 --- /dev/null +++ b/Web/AmbientScribe/package.json @@ -0,0 +1,26 @@ +{ + "name": "corti-ambientscribe-demo", + "version": "1.0.0", + "description": "Live audio transcription and clinical document generation demo using Corti SDK", + "main": "server.ts", + "type": "module", + "scripts": { + "dev": "ts-node --esm server.ts", + "start": "node dist/server.js", + "build": "tsc", + "clean": "rm -rf dist" + }, + "keywords": ["corti", "audio", "transcription", "clinical-documentation"], + "author": "", + "license": "MIT", + "dependencies": { + "@corti/sdk": "^1.0.0", + "express": "^4.18.2" + }, + "devDependencies": { + "@types/express": "^4.17.21", + "@types/node": "^20.10.6", + "typescript": "^5.3.3", + "ts-node": "^10.9.2" + } +} diff --git a/Web/AmbientScribe/tsconfig.json b/Web/AmbientScribe/tsconfig.json new file mode 100644 index 0000000..81a0dfb --- /dev/null +++ b/Web/AmbientScribe/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "ES2020", + "moduleResolution": "node", + "lib": ["ES2020", "DOM"], + "outDir": "./dist", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "include": ["*.ts"], + "exclude": ["node_modules"] +} From a8f0d2777759d5f46feaa25a3a54075f41008f9d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 19:13:04 +0000 Subject: [PATCH 09/10] Fix __dirname not defined in ES module scope Replace Node.js CJS global __dirname with ESM equivalent using import.meta.url + fileURLToPath. Also fix duplicate --- in README. https://claude.ai/code/session_01GVKV3jFef5BgGt4sDuoSwr --- Web/AmbientScribe/README.md | 2 -- Web/AmbientScribe/server.ts | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Web/AmbientScribe/README.md b/Web/AmbientScribe/README.md index 3698c23..cac0b67 100644 --- a/Web/AmbientScribe/README.md +++ b/Web/AmbientScribe/README.md @@ -220,8 +220,6 @@ A minimal page with: --- ---- - ## Production Build For production deployment, compile and run the server: diff --git a/Web/AmbientScribe/server.ts b/Web/AmbientScribe/server.ts index 4e548ef..e32cf1c 100644 --- a/Web/AmbientScribe/server.ts +++ b/Web/AmbientScribe/server.ts @@ -15,9 +15,13 @@ import express from "express"; import path from "path"; +import { fileURLToPath } from "url"; import { CortiClient, CortiAuth, CortiEnvironment } from "@corti/sdk"; import { randomUUID } from "crypto"; +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + // --------------------------------------------------------------------------- // Configuration — replace with your own values or load from environment // --------------------------------------------------------------------------- From 34e671b00e96b69cac84dc0a10f575de4bcd1893 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 19:22:01 +0000 Subject: [PATCH 10/10] Refactor client.ts to pure Corti integration, add esbuild bundling - client.ts: Remove all DOM handling. Export a single startSession() function with onTranscript/onFact callbacks. Accept tenantName from server instead of hardcoding it. - index.html: Move all UI wiring (button states, click handlers, fetch calls) into inline + - - - - + function setButtonStates(state) { + startBtn.disabled = state === "running"; + endBtn.disabled = state !== "running"; + createBtn.disabled = state !== "stopped"; + } -

Open the browser console to see transcripts and facts.

- + // -- Start Consultation -------------------------------------------------- + startBtn.addEventListener("click", async function () { + var modeInput = document.querySelector('input[name="mode"]:checked'); + var mode = modeInput ? modeInput.value : "single"; + + var remoteInput = document.querySelector('input[name="remote-source"]:checked'); + var remoteSource = remoteInput ? remoteInput.value : "webrtc"; + + try { + // Fetch interaction ID + scoped token from the server (see server.ts) + var response = await fetch("/api/start-session", { method: "POST" }); + var data = await response.json(); + currentInteractionId = data.interactionId; + + // In virtual/webrtc mode you would pass a real RTCPeerConnection here. + // For this demo we pass undefined — replace with your WebRTC connection. + var peerConnection = + mode === "virtual" && remoteSource === "webrtc" + ? new RTCPeerConnection() + : undefined; + + session = await AmbientScribe.startSession({ + accessToken: data.accessToken, + interactionId: data.interactionId, + tenantName: data.tenantName, + mode: mode, + remoteSource: remoteSource, + peerConnection: peerConnection, + }); + + setButtonStates("running"); + console.log('Session started in "' + mode + '" mode'); + } catch (err) { + console.error("Failed to start session:", err); + } + }); - + // -- End Consultation ---------------------------------------------------- + endBtn.addEventListener("click", function () { + if (session) { + session.endConsultation(); + session = null; + } + setButtonStates("stopped"); + }); + + // -- Create Document ----------------------------------------------------- + createBtn.addEventListener("click", async function () { + if (!currentInteractionId) { + console.error("No interaction ID — start a consultation first"); + return; + } + + createBtn.disabled = true; + statusMessage.innerHTML = "Creating document\u2026"; + documentOutput.style.display = "none"; + + try { + var response = await fetch("/api/create-document", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ interactionId: currentInteractionId }), + }); + var result = await response.json(); + + if (result.error) throw new Error(result.error); + + console.log("Document created:", result.document); + statusMessage.innerHTML = "Document created successfully."; + documentOutput.textContent = JSON.stringify(result.document, null, 2); + documentOutput.style.display = ""; + } catch (err) { + console.error("Failed to create document:", err); + statusMessage.innerHTML = "Failed to create document \u2014 see console."; + createBtn.disabled = false; + } + }); + diff --git a/Web/AmbientScribe/package.json b/Web/AmbientScribe/package.json index 9812e12..f09c781 100644 --- a/Web/AmbientScribe/package.json +++ b/Web/AmbientScribe/package.json @@ -5,9 +5,10 @@ "main": "server.ts", "type": "module", "scripts": { - "dev": "ts-node --esm server.ts", + "build:client": "esbuild client.ts --bundle --outfile=dist/client.js --format=iife --global-name=AmbientScribe", + "dev": "npm run build:client && ts-node --esm server.ts", "start": "node dist/server.js", - "build": "tsc", + "build": "npm run build:client && tsc", "clean": "rm -rf dist" }, "keywords": ["corti", "audio", "transcription", "clinical-documentation"], @@ -20,6 +21,7 @@ "devDependencies": { "@types/express": "^4.17.21", "@types/node": "^20.10.6", + "esbuild": "^0.24.0", "typescript": "^5.3.3", "ts-node": "^10.9.2" } diff --git a/Web/AmbientScribe/server.ts b/Web/AmbientScribe/server.ts index e32cf1c..e175908 100644 --- a/Web/AmbientScribe/server.ts +++ b/Web/AmbientScribe/server.ts @@ -91,7 +91,7 @@ async function getScopedStreamToken() { const app = express(); -// Serve the front-end files (index.html, client.ts, audio.ts) from this directory. +// Serve the front-end files (index.html, dist/client.js) from this directory. app.use(express.static(path.join(__dirname))); app.use(express.json()); @@ -102,9 +102,10 @@ app.post("/api/start-session", async (_req, res) => { const interaction = await createInteraction(); const streamToken = await getScopedStreamToken(); - // The client only receives the interaction ID and a limited-scope token. + // The client only receives the interaction ID, tenant name, and a limited-scope token. res.json({ interactionId: interaction.id, + tenantName: TENANT_NAME, accessToken: streamToken.accessToken, }); } catch (err) {