livekit-examples · bcherry · Jun 5, 2026 · Jun 3, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,15 +1,11 @@
 .zed
 venv
+.venv
 node_modules
 .next
 python-dotenv
 node_modules
 .env
 .env.local
 __pycache__
-.DS_Store
-
-
-# Ignore agent deployment files
-agent/Dockerfile
-agent/fly.toml
+.DS_Store
diff --git a/README.md b/README.md
@@ -49,8 +49,8 @@ Players have access to a simple API for game control, built on top of [RPC](http
 The agent is responsible for judging each player's drawing. It runs a single loop that wakes up every few seconds. On a judgement loop, the agent will:
 
 1. Convert each player's drawing from a set of line segments to a 512x512 PNG image.
-2. Send each drawing to a GPT-4o chat which is configured to "guess" what the drawing is meant to be. (Note: The actual target "prompt" is not included in this request, to avoid polluting its context.)
-3. Collects all guesses and sends them to a different GPT-4o chat which is configured to return a list of all players whose guesses are "correct" (i.e. similar enough to the target prompt).
+2. Send each drawing to a vision LLM (via [LiveKit Inference](https://docs.livekit.io/agents/models/inference/)) which is configured to "guess" what the drawing is meant to be. (Note: The actual target "prompt" is not included in this request, to avoid polluting its context.)
+3. Collects all guesses and sends them to a different LLM chat which is configured to return a list of all players whose guesses are "correct" (i.e. similar enough to the target prompt).
 4. All guesses are published as data messages to all players, using the topic `host.guess`.
 5. If any winners were found, the agent updates the game state to end the game and list the winners. Otherwise it sleeps for a few seconds and checks again.
 
@@ -78,21 +78,19 @@ You'll need a LiveKit instance to run this project, either from [LiveKit Cloud](
 
 ### Running the Agent
 
-First add `agent/.env` with LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL, and OPENAI_API_KEY.
+First add `agent/.env` with LIVEKIT_API_KEY, LIVEKIT_API_SECRET, and LIVEKIT_URL. The agent uses [LiveKit Inference](https://docs.livekit.io/agents/models/inference/) for its vision and judging LLM calls, so no separate model provider API key is required.
 
-Then run the following commands to install dependencies:
+Dependencies are managed with [uv](https://docs.astral.sh/uv/). Install them with:
 
 ```shell
 cd agent
-python -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
+uv sync
 ```
 
 Finally, boot the agent:
 
 ```shell
-python main.py dev
+uv run python main.py dev
 ```
 
 ### Running the Site

diff --git a/agent/.dockerignore b/agent/.dockerignore
@@ -1 +1,9 @@
-venv/
+.venv/
+venv/
+.env
+__pycache__/
+*.pyc
+.ruff_cache/
+.git/
+.dockerignore
+Dockerfile
diff --git a/agent/Dockerfile b/agent/Dockerfile
@@ -0,0 +1,68 @@
+# Example Dockerfile for deploying a LiveKit Agent.
+#
+# It uses a multi-stage build: dependencies are installed with uv in a "builder"
+# stage, then only the resulting virtualenv and the app source are copied into a
+# slim runtime image. This keeps uv and the build toolchain out of the final
+# image, producing a smaller and more secure container.
+# See https://docs.astral.sh/uv/guides/integration/docker/ for more details.
+# syntax=docker/dockerfile:1
+
+ARG PYTHON_VERSION=3.12
+
+# ---- Builder stage: resolve and install dependencies into /app/.venv ----
+FROM ghcr.io/astral-sh/uv:python${PYTHON_VERSION}-bookworm-slim AS builder
+
+# Compile bytecode for faster cold starts and copy (rather than symlink) packages
+# so the virtualenv is self-contained and can be copied to the runtime stage.
+ENV UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_PYTHON_DOWNLOADS=0
+
+WORKDIR /app
+
+# Install dependencies first, without the project source, so this layer is cached
+# and only re-run when pyproject.toml or uv.lock change. --no-dev skips dev-only
+# tooling (e.g. ruff); --locked ensures the install matches uv.lock exactly.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --locked --no-dev --no-install-project
+
+# ---- Runtime stage: minimal image with just Python, the venv, and the app ----
+FROM python:${PYTHON_VERSION}-slim-bookworm
+
+# Keep Python from buffering stdout/stderr (so logs surface immediately) and from
+# writing .pyc files at runtime. Put the venv's executables first on PATH so
+# `python` resolves to the project's interpreter.
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PATH="/app/.venv/bin:${PATH}"
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/home/appuser" \
+    --shell "/sbin/nologin" \
+    --uid "${UID}" \
+    appuser
+
+WORKDIR /app
+
+# Copy the prepared virtualenv from the builder, then the application source.
+COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
+COPY --chown=appuser:appuser . .
+
+USER appuser
+
+# Ensure any dependent models (e.g. plugin assets) are downloaded at build time
+# rather than on first connection.
+RUN python -m livekit.agents download-files
+
+# The agent worker exposes a health check on port 8081 (referenced by fly.toml).
+EXPOSE 8081
+
+# Run the application.
+CMD ["python", "main.py", "start"]
diff --git a/agent/game.py b/agent/game.py
@@ -1,6 +1,6 @@
-from typing import Literal, List
 import json
 from collections import OrderedDict
+from typing import List, Literal
 
 DifficultyLevel = Literal["easy", "medium", "hard"]
 

diff --git a/agent/game_host.py b/agent/game_host.py
@@ -5,11 +5,12 @@
 import random
 from typing import List
 
-import openai
 from livekit import agents, api, rtc
+from livekit.agents import inference
+from livekit.agents.llm import ChatContext, ImageContent
 
-from drawings import Line, PlayerDrawing
 import game
+from drawings import Line, PlayerDrawing
 
 
 # The main class for the game host agent. This instance will live for the duration of the Room
@@ -19,7 +20,12 @@ class GameHost:
     def __init__(self, ctx: agents.JobContext):
         self._ctx = ctx
         self._game_state = game.GameState()
-        self._openai_client = openai.AsyncOpenAI()
+        # We use LiveKit Inference for the vision and judging LLM calls.
+        # It's served through LiveKit Cloud and authenticates with the same
+        # LIVEKIT_API_KEY / LIVEKIT_API_SECRET as the rest of the app, so no
+        # separate model provider API key is required.
+        # See https://docs.livekit.io/agents/models/inference/ for more details
+        self._llm = inference.LLM(model="openai/gpt-4o-mini")
         self._lkapi = api.LiveKitAPI()
         self._drawings = {}
         self._guess_cache = game.GuessCache()
@@ -138,7 +144,7 @@ async def _update_difficulty(self, data: rtc.RpcInvocationData):
         await self._publish_game_state()
         return json.dumps({"updated": True})
 
-    # This judging loop runs when a game is in progress, and uses OpenAI to make guesses and check for winners
+    # This judging loop runs when a game is in progress, and uses an LLM (via LiveKit Inference) to make guesses and check for winners
     async def _run_judge_loop(self, sleep_interval: int = 1):
         print("starting judge loop")
 
@@ -325,6 +331,26 @@ async def _publish_guesses(self, guesses: dict):
             topic="host.guess",
         )
 
+    # Runs a single, non-streaming chat completion against LiveKit Inference and
+    # returns the full text response. `chat()` always returns a streamed response,
+    # so we accumulate the content deltas into a single string for these one-off calls.
+    # See https://docs.livekit.io/agents/models/llm/ for more details
+    async def _llm_complete(self, chat_ctx: ChatContext, **extra_kwargs) -> str:
+        content = ""
+        # A low temperature keeps guesses and judgements fairly deterministic.
+        # Any extra kwargs (e.g. response_format) are passed through to the
+        # underlying chat completions request.
+        stream = self._llm.chat(
+            chat_ctx=chat_ctx, extra_kwargs={"temperature": 0.5, **extra_kwargs}
+        )
+        try:
+            async for chunk in stream:
+                if chunk.delta and chunk.delta.content:
+                    content += chunk.delta.content
+        finally:
+            await stream.aclose()
+        return content
+
     # We use GPT-4o-mini with vision to make guesses based on the current state of a player's drawing.
     # Each drawing is judged independently and context-free (i.e. the LLM has no knowledge of the current prompt nor other players' drawings)
     # to control for context pollution that would degrade its guess quality
@@ -343,38 +369,32 @@ async def _make_guess(self, player_identity: str, drawing: PlayerDrawing) -> str
             drawing.get_image().save(bytes_io, format="PNG")
             encodedImg = base64.b64encode(bytes_io.getvalue()).decode("utf-8")
 
-        # We're using OpenAI's chat completions API via their [official Python SDK](https://github.com/openai/openai-python), as we aren't doing anything particularly complex here
-        # For applications that require realtime audio streaming and conversation, you should use the [LiveKit Agents OpenAI Plugin](https://github.com/livekit/agents/tree/main/livekit-plugins/livekit-plugins-openai) instead
-        response = await self._openai_client.chat.completions.create(
-            temperature=0.5,
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        "You are a guesser in a realtime drawing competition. Players are drawing on a canvas. You will receive their latest drawing as an image, and can make a guess as to what it is."
-                        "The drawing may be incomplete, but you can still make a guess based on what you see so far. However, don't make vague geometric guesses like 'abstract lines' or 'a circle'."
-                        "You will output a single word or phrase indicating your best guess of what the drawing is of, and nothing else."
-                        f"The player is not allowed to draw words to direct your guessing. This would be considered cheating and you should return '{game.CHEATER_CHEATER}' if you see it. However, if they're drawing a logo or something similar with a few letters, that is acceptable."
-                        f"If you don't have a guess at this time, such as if the drawing is empty or extremely incomplete, return '{game.NO_GUESS}'."
-                    ),
-                },
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{encodedImg}",
-                                "detail": "low",
-                            },
-                        },
-                        {"type": "text", "text": "Make your best guess on this image."},
-                    ],
-                },
+        # We build a one-off chat context with the drawing supplied as an image input.
+        # The image is passed as a base64 data URL; "low" inference detail is plenty for
+        # our 512x512 drawing and keeps token usage (and cost) down.
+        # See https://docs.livekit.io/agents/multimodality/vision/images/ for more details
+        chat_ctx = ChatContext.empty()
+        chat_ctx.add_message(
+            role="system",
+            content=(
+                "You are a guesser in a realtime drawing competition. Players are drawing on a canvas. You will receive their latest drawing as an image, and can make a guess as to what it is."
+                "The drawing may be incomplete, but you can still make a guess based on what you see so far. However, don't make vague geometric guesses like 'abstract lines' or 'a circle'."
+                "You will output a single word or phrase indicating your best guess of what the drawing is of, and nothing else."
+                f"The player is not allowed to draw words to direct your guessing. This would be considered cheating and you should return '{game.CHEATER_CHEATER}' if you see it. However, if they're drawing a logo or something similar with a few letters, that is acceptable."
+                f"If you don't have a guess at this time, such as if the drawing is empty or extremely incomplete, return '{game.NO_GUESS}'."
+            ),
+        )
+        chat_ctx.add_message(
+            role="user",
+            content=[
+                "Make your best guess on this image.",
+                ImageContent(
+                    image=f"data:image/png;base64,{encodedImg}",
+                    inference_detail="low",
+                ),
             ],
-            model="gpt-4o-mini",
         )
-        guess = response.choices[0].message.content
+        guess = await self._llm_complete(chat_ctx)
         print("Made new guess (%s) for player %s" % (guess, player_identity))
         self._guess_cache.set(hash, guess)
 
@@ -400,37 +420,32 @@ async def _make_guesses(self) -> List[str]:
     # Winners can be checked in bulk as a single LLM call, and its possible for more than one player to win
     # We use an LLM for this step rather than a string match, because it's more flexible with synonyms and phrasing
     async def _check_winners(self) -> List[str]:
-        # As above, we're using OpenAI's chat completions API via their [official Python SDK](https://github.com/openai/openai-python), as we aren't doing anything particularly complex here
-        # For applications that require realtime audio streaming and conversation, you should use the [LiveKit Agents OpenAI Plugin](https://github.com/livekit/agents/tree/main/livekit-plugins/livekit-plugins-openai) instead
-        response = await self._openai_client.chat.completions.create(
-            temperature=0.5,
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        "You are a judge in a drawing competition. Your role is to review guesses made by all players, and determine if one or more of them has won the game by correctly guessing the drawing prompt."
-                        "You should be reasonably lenient with synonyms. For instance, 'bunny' would count if the prompt was 'rabbit'. And 'ice cream' could be matched with 'ice cream cone' but not with 'ice'."
-                        "Return a JSON object with the key 'winners' containing a list of all winners, or an empty list if no player has won yet."
-                    ),
-                },
-                {
-                    "role": "user",
-                    "content": "\n".join(
-                        [
-                            'Player "%s" guessed "%s"' % (player_identity, guess)
-                            for player_identity, guess in self._last_guesses.items()
-                            if guess != game.NO_GUESS
-                        ]
-                    )
-                    + "\n\n"
-                    + 'The current game prompt is: "%s". Please return only the list of winners.'
-                    % self._game_state.prompt,
-                },
-            ],
-            model="gpt-4o-mini",
-            response_format={"type": "json_object"},
+        # We request a JSON object response so we can reliably parse the winners list.
+        chat_ctx = ChatContext.empty()
+        chat_ctx.add_message(
+            role="system",
+            content=(
+                "You are a judge in a drawing competition. Your role is to review guesses made by all players, and determine if one or more of them has won the game by correctly guessing the drawing prompt."
+                "You should be reasonably lenient with synonyms. For instance, 'bunny' would count if the prompt was 'rabbit'. And 'ice cream' could be matched with 'ice cream cone' but not with 'ice'."
+                "Return a JSON object with the key 'winners' containing a list of all winners, or an empty list if no player has won yet."
+            ),
+        )
+        chat_ctx.add_message(
+            role="user",
+            content="\n".join(
+                [
+                    'Player "%s" guessed "%s"' % (player_identity, guess)
+                    for player_identity, guess in self._last_guesses.items()
+                    if guess != game.NO_GUESS
+                ]
+            )
+            + "\n\n"
+            + 'The current game prompt is: "%s". Please return only the list of winners.'
+            % self._game_state.prompt,
+        )
+        text = await self._llm_complete(
+            chat_ctx, response_format={"type": "json_object"}
         )
-        text = response.choices[0].message.content
         print("text: %s" % text)
         winners = json.loads(text).get("winners", [])
 

diff --git a/agent/livekit.toml b/agent/livekit.toml
@@ -0,0 +1,5 @@
+[project]
+  subdomain = "livepaint-yctql8fh"
+
+[agent]
+  id = "CA_6usVtqLuWMBm"
diff --git a/agent/pyproject.toml b/agent/pyproject.toml
@@ -0,0 +1,36 @@
+[project]
+name = "livepaint-agent"
+version = "1.0.0"
+description = "Realtime drawing game host agent built on LiveKit Agents"
+readme = "../README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "livekit-agents~=1.5",
+    "livekit-api~=1.1",
+    "pillow~=12.0",
+    "python-dotenv~=1.1",
+]
+
+[dependency-groups]
+dev = [
+    "ruff~=0.14",
+]
+
+# This is an application rather than a packaged library, so it is not built or
+# installed into the environment — uv just manages its dependencies.
+[tool.uv]
+package = false
+
+[tool.ruff]
+line-length = 88
+indent-width = 4
+target-version = "py310"
+
+[tool.ruff.lint]
+extend-select = ["I"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "numpy"
+
+[tool.ruff.format]
+docstring-code-format = true
diff --git a/agent/requirements.txt b/agent/requirements.txt