From f7f643d851346d9662e019acc2991cdf3c929f15 Mon Sep 17 00:00:00 2001
From: Calvin Leng <calvinl@science.xyz>
Date: Thu, 5 Feb 2026 13:24:06 -0800
Subject: [PATCH 01/20] deploy-model working

---
 setup.py                                      |   8 +
 synapse/cli/__main__.py                       |   2 +
 synapse/cli/deploy_model.py                   | 232 ++++++++++++++
 synapse/cli/files.py                          |  18 +-
 synapse/client/sftp.py                        |  16 +-
 synapse/utils/model_converter/__init__.py     |   5 +
 synapse/utils/model_converter/convert.py      | 169 +++++++++++
 synapse/utils/model_converter/onnx_to_dlc.py  | 240 +++++++++++++++
 .../utils/model_converter/onnx_transforms.py  | 287 ++++++++++++++++++
 synapse/utils/model_converter/pt_to_onnx.py   | 139 +++++++++
 10 files changed, 1104 insertions(+), 12 deletions(-)
 create mode 100644 synapse/cli/deploy_model.py
 create mode 100644 synapse/utils/model_converter/__init__.py
 create mode 100644 synapse/utils/model_converter/convert.py
 create mode 100644 synapse/utils/model_converter/onnx_to_dlc.py
 create mode 100644 synapse/utils/model_converter/onnx_transforms.py
 create mode 100644 synapse/utils/model_converter/pt_to_onnx.py

diff --git a/setup.py b/setup.py
index 263df0fb..55e722a9 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,14 @@
         "scipy",
         "h5py",
     ],
+    extras_require={
+        "model-convert": [
+            "onnx>=1.12.0,<1.16.0",
+            "torch",
+            "packaging",
+            "protobuf",
+        ],
+    },
     entry_points={
         "console_scripts": [
             "synapsectl = synapse.cli:main",
diff --git a/synapse/cli/__main__.py b/synapse/cli/__main__.py
index af66f34e..b9fc880d 100755
--- a/synapse/cli/__main__.py
+++ b/synapse/cli/__main__.py
@@ -10,6 +10,7 @@
 
 from synapse.cli import (
     apps,
+    deploy_model,
     discover,
     files,
     offline_plot,
@@ -77,6 +78,7 @@ def main():
     taps.add_commands(subparsers)
     apps.add_commands(subparsers)
     settings.add_commands(subparsers)
+    deploy_model.add_commands(subparsers)
     args = parser.parse_args()
 
     # If we need to setup the device URI, do that now
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
new file mode 100644
index 00000000..a1add206
--- /dev/null
+++ b/synapse/cli/deploy_model.py
@@ -0,0 +1,232 @@
+"""CLI command for deploying models to Synapse devices."""
+
+import argparse
+import os
+from typing import Optional
+
+import paramiko.ssh_exception
+from rich.console import Console
+from rich import progress
+
+import synapse.client.sftp as sftp
+from synapse.cli.files import find_password, save_password
+from synapse.utils.model_converter import convert_to_dlc
+
+# Constants
+DEVICE_MODEL_DIR = "/models"
+DEFAULT_SFTP_USER = "scifi-sftp"
+DEFAULT_ENV_FILE = ".scienv"
+
+
+def add_commands(subparsers: argparse._SubParsersAction):
+    """Add the deploy-model command to the CLI."""
+    parser = subparsers.add_parser(
+        "deploy-model",
+        help="Deploy a machine learning model to a Synapse device",
+    )
+
+    parser.add_argument(
+        "model_path",
+        type=str,
+        help="Path to the model file (.pt, .onnx, or .dlc)",
+    )
+
+    parser.add_argument(
+        "--input-shape",
+        type=str,
+        default=None,
+        help='Input shape for the model (e.g., "1,32,64"). Required if model has dynamic dimensions.',
+    )
+
+    parser.add_argument(
+        "--name",
+        type=str,
+        default=None,
+        help="Model name on device (default: filename without extension)",
+    )
+
+    parser.add_argument(
+        "--username",
+        "-u",
+        type=str,
+        default=DEFAULT_SFTP_USER,
+        help=f"SFTP username (default: {DEFAULT_SFTP_USER})",
+    )
+
+    parser.add_argument(
+        "--env-file",
+        "-e",
+        type=str,
+        default=DEFAULT_ENV_FILE,
+        help=f"Password env file (default: {DEFAULT_ENV_FILE})",
+    )
+
+    parser.add_argument(
+        "--forget-password",
+        "-f",
+        action="store_true",
+        help="Don't store password locally",
+    )
+
+    parser.set_defaults(func=deploy_model)
+
+
+def deploy_model(args):
+    """Deploy a model to a Synapse device."""
+    console = Console()
+
+    # Validate model path
+    if not os.path.exists(args.model_path):
+        console.print(f"[bold red]Error:[/bold red] Model file not found: {args.model_path}")
+        return
+
+    # Parse input shape if provided
+    input_shape = None
+    if args.input_shape:
+        try:
+            input_shape = tuple(int(x.strip()) for x in args.input_shape.split(","))
+        except ValueError:
+            console.print(
+                f"[bold red]Error:[/bold red] Invalid input shape format: {args.input_shape}"
+            )
+            console.print('[yellow]Expected format: "dim1,dim2,..." (e.g., "1,32,64")[/yellow]')
+            return
+
+    # Determine model name
+    model_name = args.name
+    if model_name is None:
+        model_name = os.path.splitext(os.path.basename(args.model_path))[0]
+
+    console.print(f"[bold]Deploying model:[/bold] {model_name}")
+    console.print(f"[bold]Source:[/bold] {args.model_path}")
+    console.print(f"[bold]Target:[/bold] {args.uri}:{DEVICE_MODEL_DIR}/{model_name}.dlc")
+    console.print()
+
+    # Step 1: Convert model to DLC
+    console.print("[bold cyan]Converting model to DLC format...[/bold cyan]")
+
+    dlc_path = convert_to_dlc(
+        args.model_path,
+        input_shape=input_shape,
+        console=console,
+    )
+
+    if dlc_path is None:
+        console.print("[bold red]Model conversion failed[/bold red]")
+        return
+
+    console.print()
+
+    # Step 2: Connect to device via SFTP
+    console.print("[bold cyan]Connecting to device...[/bold cyan]")
+
+    connections = _setup_connection(
+        args.uri,
+        args.username,
+        args.env_file,
+        args.forget_password,
+        console,
+    )
+
+    if connections is None:
+        return
+
+    ssh, sftp_conn = connections
+
+    try:
+        # Step 3: Ensure model directory exists
+        _ensure_model_dir(sftp_conn, console)
+
+        # Step 4: Upload the DLC file
+        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}.dlc"
+        _upload_file(sftp_conn, dlc_path, remote_path, console)
+
+        console.print()
+        console.print("[bold green]Model deployed successfully![/bold green]")
+        console.print()
+        console.print("[dim]To use in your app:[/dim]")
+        console.print(f'[cyan]  auto model = synapse::Model::load("{model_name}");[/cyan]')
+
+    finally:
+        sftp.close_sftp(ssh, sftp_conn)
+
+
+def _setup_connection(
+    uri: str,
+    username: str,
+    env_file: str,
+    forget_password: bool,
+    console: Console,
+) -> Optional[tuple]:
+    """Set up SFTP connection to device."""
+    hostname = uri.split(":")[0] if ":" in uri else uri
+    password = find_password(hostname, env_file)
+
+    if password is None:
+        console.print(f"[bold red]Didn't find any password for {hostname}[/bold red]")
+        return None
+
+    console.print(f"[dim]Connecting to {hostname}:22 as {username}...[/dim]")
+
+    try:
+        ssh, sftp_conn = sftp.connect_sftp(hostname, username, password)
+    except paramiko.ssh_exception.AuthenticationException:
+        console.print(f"[bold red]Authentication failed for {hostname}[/bold red]")
+        console.print("[yellow]Incorrect username or password.[/yellow]")
+        return None
+    except paramiko.ssh_exception.SSHException as e:
+        console.print(f"[bold red]SSH connection failed: {e}[/bold red]")
+        return None
+    except Exception as e:
+        console.print(f"[bold red]Connection failed: {e}[/bold red]")
+        return None
+
+    if ssh is None or sftp_conn is None:
+        console.print(f"[bold red]Failed to connect to {hostname}[/bold red]")
+        return None
+
+    if not forget_password:
+        save_password(password, env_file, hostname)
+
+    console.print(f"[green]Connected to {hostname}[/green]")
+    return ssh, sftp_conn
+
+
+def _ensure_model_dir(sftp_conn, console: Console):
+    """Ensure the model directory exists on the device."""
+    try:
+        sftp_conn.stat(DEVICE_MODEL_DIR)
+    except FileNotFoundError:
+        console.print(f"[blue]Creating model directory: {DEVICE_MODEL_DIR}[/blue]")
+        try:
+            sftp_conn.mkdir(DEVICE_MODEL_DIR)
+        except Exception as e:
+            console.print(
+                f"[yellow]Warning: Could not create model directory: {e}[/yellow]"
+            )
+
+
+def _upload_file(sftp_conn, local_path: str, remote_path: str, console: Console):
+    """Upload a file to the device with progress display."""
+    file_size = os.path.getsize(local_path)
+
+    console.print(f"[blue]Uploading to {remote_path}...[/blue]")
+
+    prog = progress.Progress(
+        progress.SpinnerColumn(),
+        progress.TextColumn("[progress.description]{task.description}"),
+        progress.BarColumn(),
+        progress.DownloadColumn(),
+        progress.TransferSpeedColumn(),
+        progress.TimeElapsedColumn(),
+    )
+
+    with prog:
+        task = prog.add_task("Uploading model", total=file_size)
+
+        def update_progress(transferred: int, total: int):
+            prog.update(task, completed=transferred)
+
+        sftp_conn.put(local_path, remote_path, callback=update_progress)
+
+    console.print(f"[green]Uploaded to {remote_path}[/green]")
diff --git a/synapse/cli/files.py b/synapse/cli/files.py
index ab3844cc..f2560d63 100644
--- a/synapse/cli/files.py
+++ b/synapse/cli/files.py
@@ -11,7 +11,6 @@
 from rich import progress
 from rich.prompt import Confirm
 
-from synapse import Device
 import synapse.client.sftp as sftp
 from synapse.utils.file import format_mode, format_time, filesize_binary
 
@@ -155,28 +154,29 @@ def setup_connection(
     forget_password: bool,
     console: Console,
 ) -> Optional[tuple[paramiko.SSHClient, paramiko.SFTPClient]]:
-    dev_name = Device(uri).get_name()
+    # Strip port if present - SFTP uses port 22, not the gRPC port
+    hostname = uri.split(":")[0] if ":" in uri else uri
     password = find_password(
-        dev_name, env_file
+        hostname, env_file
     )  # Check if password is provided or stored in env file
     if password is None:
-        console.print(f"[bold red]Didnt find any password for {uri}[/bold red]")
+        console.print(f"[bold red]Didnt find any password for {hostname}[/bold red]")
         return
 
     # Open SFTP connection
     with console.status("Connecting to Synapse device...", spinner="bouncingBall"):
         try:
-            ssh, sftp_conn = sftp.connect_sftp(uri, username, password)
+            ssh, sftp_conn = sftp.connect_sftp(hostname, username, password)
         except paramiko.ssh_exception.AuthenticationException:
-            console.print(f"[bold red]Authentication failed for {uri}[/bold red]")
+            console.print(f"[bold red]Authentication failed for {hostname}[/bold red]")
             console.print("[yellow] Incorrect username or password.")
             return None
     if ssh is None or sftp_conn is None:
-        console.print(f"[bold red]Failed to connect to {uri}[/bold red]")
+        console.print(f"[bold red]Failed to connect to {hostname}[/bold red]")
         return
     # If the connection is successful, we can prompt the user if they want to save the password
-    if not forget_password and dev_name is not None:
-        save_password(password, env_file, dev_name)
+    if not forget_password:
+        save_password(password, env_file, hostname)
     return ssh, sftp_conn
 
 
diff --git a/synapse/client/sftp.py b/synapse/client/sftp.py
index f8e5a723..cf75a191 100644
--- a/synapse/client/sftp.py
+++ b/synapse/client/sftp.py
@@ -1,4 +1,5 @@
 import logging
+import socket
 import paramiko
 import paramiko.ssh_exception
 
@@ -27,20 +28,29 @@ def connect_sftp(hostname, username, password=None, pass_filename=None, key_file
         except Exception as e:
             logging.error(f"Failed to read password file: {e}")
             return None, None
-    try: 
+    try:
+        logging.debug(f"Connecting to {hostname}:{port} as {username}")
         ssh.connect(
             hostname=hostname,
             port=port,
             username=username,
             password=password,
             key_filename=key_filename,
-            timeout=5
+            timeout=10,
+            allow_agent=False,
+            look_for_keys=False,
         )
         sftp = ssh.open_sftp()
     except TimeoutError as e:
         logging.error(f"Connection to {hostname} timed out")
         return None, None
-    
+    except socket.error as e:
+        logging.error(f"Socket error connecting to {hostname}:{port}: {e}")
+        return None, None
+    except paramiko.ssh_exception.SSHException as e:
+        logging.error(f"SSH error connecting to {hostname}:{port}: {e}")
+        raise  # Re-raise to let caller handle it
+
     return ssh, sftp
 
 def close_sftp(ssh, sftp):
diff --git a/synapse/utils/model_converter/__init__.py b/synapse/utils/model_converter/__init__.py
new file mode 100644
index 00000000..ac579f30
--- /dev/null
+++ b/synapse/utils/model_converter/__init__.py
@@ -0,0 +1,5 @@
+"""Model conversion utilities for deploying ML models to Synapse devices."""
+
+from synapse.utils.model_converter.convert import convert_to_dlc
+
+__all__ = ["convert_to_dlc"]
diff --git a/synapse/utils/model_converter/convert.py b/synapse/utils/model_converter/convert.py
new file mode 100644
index 00000000..4f85a222
--- /dev/null
+++ b/synapse/utils/model_converter/convert.py
@@ -0,0 +1,169 @@
+"""Main model conversion pipeline."""
+
+import os
+import shutil
+import tempfile
+from typing import Optional
+
+from rich.console import Console
+
+from synapse.utils.model_converter.pt_to_onnx import convert_pt_to_onnx
+from synapse.utils.model_converter.onnx_transforms import (
+    apply_transforms,
+    has_dynamic_shapes,
+    get_input_shapes,
+)
+from synapse.utils.model_converter.onnx_to_dlc import convert_onnx_to_dlc
+
+
+def convert_to_dlc(
+    model_path: str,
+    input_shape: Optional[tuple[int, ...]] = None,
+    output_path: Optional[str] = None,
+    console: Optional[Console] = None,
+) -> Optional[str]:
+    """
+    Convert a model to DLC format for deployment to Synapse devices.
+
+    Handles .pt (PyTorch), .onnx, and .dlc files:
+    - .pt → ONNX → DLC
+    - .onnx → DLC
+    - .dlc → returns as-is
+
+    Args:
+        model_path: Path to the model file (.pt, .onnx, or .dlc)
+        input_shape: Input shape for the model (required if model has dynamic dims)
+        output_path: Optional output path for the DLC file
+        console: Rich console for output
+
+    Returns:
+        Path to the DLC file, or None if conversion failed
+    """
+    if not os.path.exists(model_path):
+        if console:
+            console.print(f"[bold red]Error:[/bold red] Model file not found: {model_path}")
+        return None
+
+    ext = os.path.splitext(model_path)[1].lower()
+
+    if ext == ".dlc":
+        # Already a DLC, just return or copy
+        if output_path and output_path != model_path:
+            shutil.copy2(model_path, output_path)
+            return output_path
+        return model_path
+
+    if ext == ".pt":
+        return _convert_pt_to_dlc(model_path, input_shape, output_path, console)
+
+    if ext == ".onnx":
+        return _convert_onnx_to_dlc(model_path, input_shape, output_path, console)
+
+    if console:
+        console.print(
+            f"[bold red]Error:[/bold red] Unsupported file type: {ext}"
+        )
+        console.print("[yellow]Supported formats: .pt, .onnx, .dlc[/yellow]")
+    return None
+
+
+def _convert_pt_to_dlc(
+    pt_path: str,
+    input_shape: Optional[tuple[int, ...]],
+    output_path: Optional[str],
+    console: Optional[Console],
+) -> Optional[str]:
+    """Convert PyTorch model to DLC via ONNX."""
+    if console:
+        console.print("[bold blue]Step 1/3:[/bold blue] Converting PyTorch to ONNX...")
+
+    # Create temp ONNX file
+    onnx_path = convert_pt_to_onnx(
+        pt_path,
+        output_path=None,  # Use temp directory
+        input_shape=input_shape,
+        console=console,
+    )
+
+    if onnx_path is None:
+        return None
+
+    return _convert_onnx_to_dlc(onnx_path, input_shape, output_path, console, step_offset=1)
+
+
+def _convert_onnx_to_dlc(
+    onnx_path: str,
+    input_shape: Optional[tuple[int, ...]],
+    output_path: Optional[str],
+    console: Optional[Console],
+    step_offset: int = 0,
+) -> Optional[str]:
+    """Convert ONNX model to DLC."""
+    step1 = step_offset + 1
+    step2 = step_offset + 2
+
+    # Check for dynamic shapes
+    if has_dynamic_shapes(onnx_path):
+        if input_shape is None:
+            if console:
+                shapes = get_input_shapes(onnx_path)
+                console.print(
+                    "[bold red]Error:[/bold red] Model has dynamic input shapes."
+                )
+                console.print("[yellow]Current input shapes:[/yellow]")
+                for name, shape in shapes:
+                    console.print(f"  {name}: {shape}")
+                console.print(
+                    "\n[yellow]Please provide --input-shape with concrete dimensions.[/yellow]"
+                )
+            return None
+        if console:
+            console.print(
+                f"[yellow]Note: Using provided input shape {input_shape} for dynamic model[/yellow]"
+            )
+
+    if console:
+        console.print(f"[bold blue]Step {step1}/{step2 + 1}:[/bold blue] Applying ONNX transformations...")
+
+    # Apply transforms to a temp copy to avoid modifying the original
+    temp_dir = tempfile.mkdtemp()
+    temp_onnx = os.path.join(temp_dir, os.path.basename(onnx_path))
+    shutil.copy2(onnx_path, temp_onnx)
+
+    try:
+        apply_transforms(temp_onnx, console=console)
+    except Exception as e:
+        if console:
+            console.print(
+                f"[yellow]Warning: Could not apply transforms: {e}. "
+                "Proceeding with original model.[/yellow]"
+            )
+        temp_onnx = onnx_path
+
+    if console:
+        console.print(f"[bold blue]Step {step2}/{step2 + 1}:[/bold blue] Converting to DLC...")
+
+    # Determine input name from ONNX model
+    input_name = "input"
+    try:
+        shapes = get_input_shapes(temp_onnx)
+        if shapes:
+            input_name = shapes[0][0]
+    except Exception:
+        pass
+
+    dlc_path = convert_onnx_to_dlc(
+        temp_onnx,
+        output_path=output_path,
+        input_shape=input_shape,
+        input_name=input_name,
+        console=console,
+    )
+
+    # Cleanup temp files
+    try:
+        shutil.rmtree(temp_dir)
+    except Exception:
+        pass
+
+    return dlc_path
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
new file mode 100644
index 00000000..0c528578
--- /dev/null
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -0,0 +1,240 @@
+"""ONNX to DLC conversion using SNPE converter.
+
+Requirements for ONNX→DLC conversion:
+- QAIRT SDK (set SNPE_ROOT or QAIRT_ROOT environment variable)
+- Python 3.10 with: numpy, onnx, pyyaml, packaging, protobuf
+- System libraries: libc++1 (sudo apt install libc++1)
+- LD_LIBRARY_PATH must include path to libpython3.10.so
+"""
+
+import os
+import shutil
+import subprocess
+import tempfile
+from typing import Optional
+
+from rich.console import Console
+
+
+def _get_snpe_root() -> Optional[str]:
+    """Get SNPE/QAIRT SDK root from environment."""
+    return os.environ.get("SNPE_ROOT") or os.environ.get("QAIRT_ROOT")
+
+
+def find_snpe_converter(snpe_root: Optional[str] = None) -> Optional[str]:
+    """
+    Find the snpe-onnx-to-dlc converter binary.
+
+    Args:
+        snpe_root: Optional path to SNPE/QAIRT SDK root
+
+    Returns:
+        Path to the converter binary, or None if not found
+    """
+    if snpe_root is None:
+        snpe_root = _get_snpe_root()
+
+    if snpe_root is None:
+        return None
+
+    converter_path = os.path.join(
+        snpe_root, "bin", "x86_64-linux-clang", "snpe-onnx-to-dlc"
+    )
+
+    if os.path.exists(converter_path):
+        return converter_path
+
+    return None
+
+
+def _find_python310() -> Optional[str]:
+    """Find Python 3.10 executable."""
+    # Check common locations
+    candidates = [
+        "/usr/bin/python3.10",
+        shutil.which("python3.10"),
+    ]
+    for path in candidates:
+        if path and os.path.exists(path):
+            return path
+    return None
+
+
+def _setup_converter_env(snpe_root: str) -> dict:
+    """
+    Set up environment variables for the SNPE converter.
+
+    The converter requires:
+    - SNPE_ROOT pointing to SDK
+    - PYTHONPATH with SDK's Python libs first, then system packages (for onnx, numpy, etc.)
+    - LD_LIBRARY_PATH including libpython3.10.so location
+    """
+    env = os.environ.copy()
+
+    # Set SNPE_ROOT
+    env["SNPE_ROOT"] = snpe_root
+
+    # Set PYTHONPATH - SDK's Python libs must come first, but we also need
+    # access to installed packages (onnx, numpy, etc.)
+    python_lib_path = os.path.join(snpe_root, "lib", "python")
+    if "PYTHONPATH" in env:
+        env["PYTHONPATH"] = f"{python_lib_path}:{env['PYTHONPATH']}"
+    else:
+        env["PYTHONPATH"] = python_lib_path
+
+    # Set LD_LIBRARY_PATH for libpython3.10.so
+    ld_paths = ["/usr/lib/x86_64-linux-gnu"]
+    if "LD_LIBRARY_PATH" in env:
+        ld_paths.append(env["LD_LIBRARY_PATH"])
+    env["LD_LIBRARY_PATH"] = ":".join(ld_paths)
+
+    return env
+
+
+def convert_onnx_to_dlc(
+    onnx_path: str,
+    output_path: Optional[str] = None,
+    input_shape: Optional[tuple[int, ...]] = None,
+    input_name: str = "input",
+    snpe_root: Optional[str] = None,
+    console: Optional[Console] = None,
+) -> Optional[str]:
+    """
+    Convert an ONNX model to DLC format using SNPE converter.
+
+    Args:
+        onnx_path: Path to the ONNX model
+        output_path: Optional output path for the DLC file
+        input_shape: Input shape to use (required if model has dynamic dims)
+        input_name: Name of the input tensor (default: "input")
+        snpe_root: Optional path to SNPE/QAIRT SDK root
+        console: Rich console for output
+
+    Returns:
+        Path to the converted DLC file, or None if conversion failed
+    """
+    if snpe_root is None:
+        snpe_root = _get_snpe_root()
+
+    if snpe_root is None:
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] SNPE_ROOT or QAIRT_ROOT environment variable not set"
+            )
+            console.print(
+                "[yellow]Hint: export SNPE_ROOT=/path/to/qairt/x.xx.x.xxxxxx[/yellow]"
+            )
+        return None
+
+    converter_path = find_snpe_converter(snpe_root)
+    if converter_path is None:
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] Could not find snpe-onnx-to-dlc converter"
+            )
+            console.print(
+                f"[yellow]Expected at: {snpe_root}/bin/x86_64-linux-clang/snpe-onnx-to-dlc[/yellow]"
+            )
+        return None
+
+    # Determine output path
+    if output_path is None:
+        base_name = os.path.splitext(os.path.basename(onnx_path))[0]
+        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}.dlc")
+
+    if console:
+        console.print(f"[blue]Converting ONNX to DLC: {output_path}...[/blue]")
+
+    # Build command - use -d for input dimensions (short form)
+    cmd = [
+        converter_path,
+        "--input_network", onnx_path,
+        "--output_path", output_path,
+    ]
+
+    # Add input shape if provided
+    if input_shape is not None:
+        shape_str = ",".join(str(d) for d in input_shape)
+        cmd.extend(["-d", input_name, shape_str])
+
+    # Set up environment
+    env = _setup_converter_env(snpe_root)
+
+    if console:
+        console.print(f"[dim]Running: {' '.join(cmd)}[/dim]")
+
+    try:
+        result = subprocess.run(
+            cmd,
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=300,  # 5 minute timeout
+        )
+
+        if result.returncode != 0:
+            if console:
+                console.print("[bold red]DLC conversion failed:[/bold red]")
+                if result.stderr:
+                    _display_conversion_error(result.stderr, console)
+                if result.stdout:
+                    console.print(f"[dim]{result.stdout}[/dim]")
+            return None
+
+        if not os.path.exists(output_path):
+            if console:
+                console.print(
+                    "[bold red]Error:[/bold red] Converter ran but DLC file was not created"
+                )
+            return None
+
+        if console:
+            console.print(f"[green]Successfully converted to {output_path}[/green]")
+
+        return output_path
+
+    except subprocess.TimeoutExpired:
+        if console:
+            console.print("[bold red]Error:[/bold red] Conversion timed out after 5 minutes")
+        return None
+    except Exception as e:
+        if console:
+            console.print(f"[bold red]Error running converter:[/bold red] {e}")
+        return None
+
+
+def _display_conversion_error(stderr: str, console: Console):
+    """Display helpful error messages based on converter output."""
+    lines = stderr.strip().split("\n")
+
+    # Common error patterns and suggestions
+    error_hints = {
+        "unsupported op": (
+            "The model contains an unsupported operation. "
+            "Try simplifying the model or using a different export configuration."
+        ),
+        "dynamic": (
+            "The model has dynamic shapes. "
+            "Provide a fixed input shape with --input-shape."
+        ),
+        "opset": (
+            "The ONNX opset version may be too new. "
+            "Try exporting with an older opset version (e.g., opset 11)."
+        ),
+        "memory": (
+            "The conversion ran out of memory. "
+            "Try reducing model size or batch dimension."
+        ),
+    }
+
+    # Display first few error lines
+    for line in lines[-10:]:
+        if line.strip():
+            console.print(f"[red]{line}[/red]")
+
+    # Check for known patterns and provide hints
+    stderr_lower = stderr.lower()
+    for pattern, hint in error_hints.items():
+        if pattern in stderr_lower:
+            console.print(f"\n[yellow]Hint: {hint}[/yellow]")
+            break
diff --git a/synapse/utils/model_converter/onnx_transforms.py b/synapse/utils/model_converter/onnx_transforms.py
new file mode 100644
index 00000000..324882e4
--- /dev/null
+++ b/synapse/utils/model_converter/onnx_transforms.py
@@ -0,0 +1,287 @@
+"""ONNX model transformations for SNPE compatibility."""
+
+from typing import Optional
+
+from rich.console import Console
+
+
+def get_input_shapes(onnx_path: str) -> list[tuple[str, list[int | str]]]:
+    """
+    Get input shapes from an ONNX model.
+
+    Args:
+        onnx_path: Path to the ONNX model
+
+    Returns:
+        List of (input_name, shape) tuples where shape may contain strings for dynamic dims
+    """
+    import onnx
+
+    model = onnx.load(onnx_path)
+    inputs = []
+
+    for inp in model.graph.input:
+        shape = []
+        for dim in inp.type.tensor_type.shape.dim:
+            if dim.dim_param:
+                shape.append(dim.dim_param)  # Dynamic dimension name
+            else:
+                shape.append(dim.dim_value)
+        inputs.append((inp.name, shape))
+
+    return inputs
+
+
+def has_dynamic_shapes(onnx_path: str) -> bool:
+    """
+    Check if an ONNX model has dynamic input shapes.
+
+    Args:
+        onnx_path: Path to the ONNX model
+
+    Returns:
+        True if any input has dynamic dimensions
+    """
+    inputs = get_input_shapes(onnx_path)
+    for _, shape in inputs:
+        for dim in shape:
+            if isinstance(dim, str) or dim == 0:
+                return True
+    return False
+
+
+def fix_gemm_transpose(
+    onnx_path: str,
+    output_path: Optional[str] = None,
+    console: Optional[Console] = None,
+) -> str:
+    """
+    Convert GEMM ops with transB=1 to MatMul+Add.
+
+    This is a workaround for SNPE converter issues with certain GEMM configurations.
+
+    Args:
+        onnx_path: Path to the input ONNX model
+        output_path: Path for the output model (defaults to overwriting input)
+        console: Rich console for output
+
+    Returns:
+        Path to the transformed model
+    """
+    import onnx
+    from onnx import helper, numpy_helper
+
+    model = onnx.load(onnx_path)
+    graph = model.graph
+
+    if output_path is None:
+        output_path = onnx_path
+
+    # Build list of (node_index, replacement_nodes) to maintain topological order
+    replacements = []  # List of (index, [new_nodes])
+    initializers_to_add = []
+    transforms_applied = 0
+
+    for idx, node in enumerate(graph.node):
+        if node.op_type != "Gemm":
+            continue
+
+        # Check for transB attribute
+        trans_b = 0
+        alpha = 1.0
+        beta = 1.0
+        trans_a = 0
+
+        for attr in node.attribute:
+            if attr.name == "transB":
+                trans_b = attr.i
+            elif attr.name == "transA":
+                trans_a = attr.i
+            elif attr.name == "alpha":
+                alpha = attr.f
+            elif attr.name == "beta":
+                beta = attr.f
+
+        if trans_b != 1:
+            continue  # No transformation needed
+
+        if trans_a != 0 or alpha != 1.0:
+            # Complex case, skip for now
+            if console:
+                console.print(
+                    f"[yellow]Warning: Skipping complex GEMM node {node.name} "
+                    f"(transA={trans_a}, alpha={alpha})[/yellow]"
+                )
+            continue
+
+        # Get weight tensor and transpose it
+        weight_name = node.input[1]
+        weight_initializer = None
+        for init in graph.initializer:
+            if init.name == weight_name:
+                weight_initializer = init
+                break
+
+        if weight_initializer is None:
+            if console:
+                console.print(
+                    f"[yellow]Warning: Could not find initializer for {weight_name}, "
+                    "skipping transformation[/yellow]"
+                )
+            continue
+
+        transforms_applied += 1
+
+        # Transpose the weight
+        weight_array = numpy_helper.to_array(weight_initializer)
+        transposed_weight = weight_array.T
+        new_weight_name = f"{weight_name}_transposed"
+        new_weight = numpy_helper.from_array(transposed_weight, name=new_weight_name)
+        initializers_to_add.append(new_weight)
+
+        replacement_nodes = []
+
+        # Create MatMul node
+        matmul_output = f"{node.name}_matmul_out"
+        matmul_node = helper.make_node(
+            "MatMul",
+            inputs=[node.input[0], new_weight_name],
+            outputs=[matmul_output],
+            name=f"{node.name}_matmul",
+        )
+
+        # If there's a bias (C input), add it
+        if len(node.input) > 2 and node.input[2]:
+            bias_name = node.input[2]
+
+            # Handle beta scaling if needed
+            if beta != 1.0:
+                # Find bias initializer and scale it
+                for init in graph.initializer:
+                    if init.name == bias_name:
+                        bias_array = numpy_helper.to_array(init)
+                        scaled_bias = bias_array * beta
+                        new_bias_name = f"{bias_name}_scaled"
+                        new_bias = numpy_helper.from_array(scaled_bias, name=new_bias_name)
+                        initializers_to_add.append(new_bias)
+                        bias_name = new_bias_name
+                        break
+
+            add_node = helper.make_node(
+                "Add",
+                inputs=[matmul_output, bias_name],
+                outputs=node.output,
+                name=f"{node.name}_add",
+            )
+            replacement_nodes = [matmul_node, add_node]
+        else:
+            # No bias, MatMul output is the final output
+            matmul_node = helper.make_node(
+                "MatMul",
+                inputs=[node.input[0], new_weight_name],
+                outputs=node.output,
+                name=f"{node.name}_matmul",
+            )
+            replacement_nodes = [matmul_node]
+
+        replacements.append((idx, replacement_nodes))
+
+    if transforms_applied > 0:
+        # Apply replacements in reverse order to maintain correct indices
+        for idx, new_nodes in reversed(replacements):
+            del graph.node[idx]
+            for i, new_node in enumerate(new_nodes):
+                graph.node.insert(idx + i, new_node)
+
+        graph.initializer.extend(initializers_to_add)
+
+        if console:
+            console.print(
+                f"[green]Applied GEMM→MatMul+Add transformation to {transforms_applied} nodes[/green]"
+            )
+
+        onnx.save(model, output_path)
+
+    return output_path
+
+
+def downgrade_opset(
+    onnx_path: str,
+    target_opset: int = 11,
+    output_path: Optional[str] = None,
+    console: Optional[Console] = None,
+) -> str:
+    """
+    Downgrade ONNX opset version for SNPE compatibility.
+
+    Args:
+        onnx_path: Path to the input ONNX model
+        target_opset: Target opset version (default 11 for SNPE compatibility)
+        output_path: Path for the output model (defaults to overwriting input)
+        console: Rich console for output
+
+    Returns:
+        Path to the transformed model
+    """
+    import onnx
+    from onnx import version_converter
+
+    model = onnx.load(onnx_path)
+
+    if output_path is None:
+        output_path = onnx_path
+
+    current_opset = model.opset_import[0].version
+    if current_opset <= target_opset:
+        if console:
+            console.print(
+                f"[blue]Model opset {current_opset} is already at or below target {target_opset}[/blue]"
+            )
+        return output_path
+
+    if console:
+        console.print(
+            f"[blue]Downgrading opset from {current_opset} to {target_opset}...[/blue]"
+        )
+
+    try:
+        converted_model = version_converter.convert_version(model, target_opset)
+        onnx.save(converted_model, output_path)
+        if console:
+            console.print(f"[green]Successfully downgraded to opset {target_opset}[/green]")
+    except Exception as e:
+        if console:
+            console.print(
+                f"[yellow]Warning: Could not downgrade opset: {e}. "
+                "Proceeding with original version.[/yellow]"
+            )
+
+    return output_path
+
+
+def apply_transforms(
+    onnx_path: str,
+    output_path: Optional[str] = None,
+    console: Optional[Console] = None,
+) -> str:
+    """
+    Apply all ONNX transformations for SNPE compatibility.
+
+    Args:
+        onnx_path: Path to the input ONNX model
+        output_path: Path for the output model (defaults to overwriting input)
+        console: Rich console for output
+
+    Returns:
+        Path to the transformed model
+    """
+    if output_path is None:
+        output_path = onnx_path
+
+    # Apply GEMM fix
+    fix_gemm_transpose(onnx_path, output_path, console)
+
+    # Downgrade opset if needed
+    downgrade_opset(output_path, target_opset=11, output_path=output_path, console=console)
+
+    return output_path
diff --git a/synapse/utils/model_converter/pt_to_onnx.py b/synapse/utils/model_converter/pt_to_onnx.py
new file mode 100644
index 00000000..c7340ebe
--- /dev/null
+++ b/synapse/utils/model_converter/pt_to_onnx.py
@@ -0,0 +1,139 @@
+"""PyTorch to ONNX model conversion."""
+
+import os
+import tempfile
+from typing import Optional
+
+from rich.console import Console
+
+
+def convert_pt_to_onnx(
+    pt_path: str,
+    output_path: Optional[str] = None,
+    input_shape: Optional[tuple[int, ...]] = None,
+    console: Optional[Console] = None,
+) -> Optional[str]:
+    """
+    Convert a PyTorch model to ONNX format.
+
+    Args:
+        pt_path: Path to the .pt file
+        output_path: Optional output path for the ONNX file. If None, uses temp directory.
+        input_shape: Input shape for the model (required for tracing)
+        console: Rich console for output
+
+    Returns:
+        Path to the converted ONNX file, or None if conversion failed
+    """
+    try:
+        import torch
+    except ImportError:
+        if console:
+            console.print("[bold red]Error:[/bold red] torch is required for PT to ONNX conversion")
+            console.print("[yellow]Install with: pip install torch[/yellow]")
+        return None
+
+    if console:
+        console.print(f"[blue]Loading PyTorch model from {pt_path}...[/blue]")
+
+    try:
+        model = torch.load(pt_path, map_location="cpu", weights_only=False)
+    except Exception as e:
+        if console:
+            console.print(f"[bold red]Failed to load PyTorch model:[/bold red] {e}")
+        return None
+
+    # Handle case where saved file is a state_dict instead of a full model
+    if isinstance(model, dict):
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] The .pt file contains a state_dict, not a full model."
+            )
+            console.print(
+                "[yellow]Hint: Save the model with torch.save(model, path) instead of "
+                "torch.save(model.state_dict(), path)[/yellow]"
+            )
+        return None
+
+    model.eval()
+
+    # Determine input shape
+    if input_shape is None:
+        # Try to infer input shape from the model
+        input_shape = _infer_input_shape(model)
+        if input_shape is None:
+            if console:
+                console.print(
+                    "[bold red]Error:[/bold red] Could not infer input shape. "
+                    "Please provide --input-shape"
+                )
+            return None
+        if console:
+            console.print(f"[green]Inferred input shape: {input_shape}[/green]")
+
+    # Create dummy input
+    dummy_input = torch.randn(*input_shape)
+
+    # Determine output path
+    if output_path is None:
+        base_name = os.path.splitext(os.path.basename(pt_path))[0]
+        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}.onnx")
+
+    if console:
+        console.print(f"[blue]Exporting to ONNX: {output_path}...[/blue]")
+
+    try:
+        torch.onnx.export(
+            model,
+            dummy_input,
+            output_path,
+            export_params=True,
+            opset_version=13,
+            do_constant_folding=True,
+            input_names=["input"],
+            output_names=["output"],
+            dynamic_axes=None,  # Static shapes for device deployment
+        )
+    except Exception as e:
+        if console:
+            console.print(f"[bold red]ONNX export failed:[/bold red] {e}")
+        return None
+
+    if console:
+        console.print(f"[green]Successfully exported to {output_path}[/green]")
+
+    return output_path
+
+
+def _infer_input_shape(model) -> Optional[tuple[int, ...]]:
+    """
+    Try to infer input shape from model structure.
+
+    Returns:
+        Inferred input shape tuple, or None if cannot be inferred
+    """
+    try:
+        import torch.nn as nn
+
+        # Check if it's a Sequential model with a first Linear layer
+        if isinstance(model, nn.Sequential):
+            first_layer = list(model.children())[0]
+            if isinstance(first_layer, nn.Linear):
+                return (1, first_layer.in_features)
+
+        # Check for common first layer patterns
+        for name, module in model.named_modules():
+            if isinstance(module, nn.Linear) and "." not in name:
+                return (1, module.in_features)
+            if isinstance(module, nn.Conv1d) and "." not in name:
+                # For Conv1d, we need channels and sequence length
+                # Use a reasonable default sequence length
+                return (1, module.in_channels, 64)
+            if isinstance(module, nn.Conv2d) and "." not in name:
+                # For Conv2d, use a reasonable default spatial size
+                return (1, module.in_channels, 32, 32)
+
+    except Exception:
+        pass
+
+    return None

From a8a50a0b1321c077cc4acf19966f1fb93ac9c1e0 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 17 Mar 2026 19:03:39 -0700
Subject: [PATCH 02/20] Dockerize model conversion for ONNX to DLC

Replace host-side SNPE converter invocation with a Docker-based
approach. The container (Python 3.10 + pinned deps) eliminates
Python version and numpy compatibility issues on the host.

- Add model-converter/ with Dockerfile and self-contained convert.py
- Rewrite onnx_to_dlc.py to orchestrate Docker (auto-builds image)
- Bind-mount SNPE SDK at runtime (Qualcomm license compliant)
- Add --snpe-root CLI arg to deploy-model
- Remove unused onnx_transforms.py (logic moved into container)
- Fix -u shorthand conflict between --username and global --uri
---
 model-converter/Dockerfile                    |  17 +
 model-converter/convert.py                    | 317 ++++++++++++++++++
 synapse/cli/deploy_model.py                   |   9 +-
 synapse/utils/model_converter/convert.py      | 119 ++-----
 synapse/utils/model_converter/onnx_to_dlc.py  | 316 +++++++++--------
 .../utils/model_converter/onnx_transforms.py  | 287 ----------------
 6 files changed, 531 insertions(+), 534 deletions(-)
 create mode 100644 model-converter/Dockerfile
 create mode 100644 model-converter/convert.py
 delete mode 100644 synapse/utils/model_converter/onnx_transforms.py

diff --git a/model-converter/Dockerfile b/model-converter/Dockerfile
new file mode 100644
index 00000000..04a870f7
--- /dev/null
+++ b/model-converter/Dockerfile
@@ -0,0 +1,17 @@
+FROM python:3.10-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libc++1 \
+    libatomic1 \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    "numpy<2" \
+    "onnx==1.16.2" \
+    pyyaml \
+    packaging \
+    "protobuf>=3.20,<5"
+
+COPY convert.py /opt/model-converter/convert.py
+
+ENTRYPOINT ["python3", "/opt/model-converter/convert.py"]
diff --git a/model-converter/convert.py b/model-converter/convert.py
new file mode 100644
index 00000000..f52e15d1
--- /dev/null
+++ b/model-converter/convert.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+"""ONNX to DLC conversion script.
+
+Runs inside the synapse-model-converter Docker container.
+Expects:
+  - SNPE/QAIRT SDK mounted at the path given by --snpe-root
+  - Input ONNX model accessible at --input
+  - Output directory writable at --output parent
+"""
+
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+
+# ---------------------------------------------------------------------------
+# ONNX helpers
+# ---------------------------------------------------------------------------
+
+def get_input_shapes(onnx_path):
+    import onnx
+
+    model = onnx.load(onnx_path)
+    inputs = []
+    for inp in model.graph.input:
+        shape = []
+        for dim in inp.type.tensor_type.shape.dim:
+            if dim.dim_param:
+                shape.append(dim.dim_param)
+            else:
+                shape.append(dim.dim_value)
+        inputs.append((inp.name, shape))
+    return inputs
+
+
+def has_dynamic_shapes(onnx_path):
+    for _, shape in get_input_shapes(onnx_path):
+        for dim in shape:
+            if isinstance(dim, str) or dim == 0:
+                return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# ONNX transforms (SNPE compatibility)
+# ---------------------------------------------------------------------------
+
+def fix_gemm_transpose(onnx_path, output_path=None):
+    """Convert GEMM ops with transB=1 to MatMul+Add."""
+    import onnx
+    from onnx import helper, numpy_helper
+
+    model = onnx.load(onnx_path)
+    graph = model.graph
+    if output_path is None:
+        output_path = onnx_path
+
+    replacements = []
+    initializers_to_add = []
+    transforms_applied = 0
+
+    for idx, node in enumerate(graph.node):
+        if node.op_type != "Gemm":
+            continue
+
+        trans_b = 0
+        alpha = 1.0
+        beta = 1.0
+        trans_a = 0
+
+        for attr in node.attribute:
+            if attr.name == "transB":
+                trans_b = attr.i
+            elif attr.name == "transA":
+                trans_a = attr.i
+            elif attr.name == "alpha":
+                alpha = attr.f
+            elif attr.name == "beta":
+                beta = attr.f
+
+        if trans_b != 1:
+            continue
+        if trans_a != 0 or alpha != 1.0:
+            print(f"Warning: Skipping complex GEMM node {node.name}")
+            continue
+
+        weight_name = node.input[1]
+        weight_initializer = None
+        for init in graph.initializer:
+            if init.name == weight_name:
+                weight_initializer = init
+                break
+
+        if weight_initializer is None:
+            print(f"Warning: Could not find initializer for {weight_name}, skipping")
+            continue
+
+        transforms_applied += 1
+        weight_array = numpy_helper.to_array(weight_initializer)
+        transposed_weight = weight_array.T
+        new_weight_name = f"{weight_name}_transposed"
+        new_weight = numpy_helper.from_array(transposed_weight, name=new_weight_name)
+        initializers_to_add.append(new_weight)
+
+        matmul_output = f"{node.name}_matmul_out"
+        matmul_node = helper.make_node(
+            "MatMul",
+            inputs=[node.input[0], new_weight_name],
+            outputs=[matmul_output],
+            name=f"{node.name}_matmul",
+        )
+
+        if len(node.input) > 2 and node.input[2]:
+            bias_name = node.input[2]
+            if beta != 1.0:
+                for init in graph.initializer:
+                    if init.name == bias_name:
+                        bias_array = numpy_helper.to_array(init)
+                        scaled_bias = bias_array * beta
+                        new_bias_name = f"{bias_name}_scaled"
+                        new_bias = numpy_helper.from_array(
+                            scaled_bias, name=new_bias_name
+                        )
+                        initializers_to_add.append(new_bias)
+                        bias_name = new_bias_name
+                        break
+
+            add_node = helper.make_node(
+                "Add",
+                inputs=[matmul_output, bias_name],
+                outputs=node.output,
+                name=f"{node.name}_add",
+            )
+            replacement_nodes = [matmul_node, add_node]
+        else:
+            matmul_node = helper.make_node(
+                "MatMul",
+                inputs=[node.input[0], new_weight_name],
+                outputs=node.output,
+                name=f"{node.name}_matmul",
+            )
+            replacement_nodes = [matmul_node]
+
+        replacements.append((idx, replacement_nodes))
+
+    if transforms_applied > 0:
+        for idx, new_nodes in reversed(replacements):
+            del graph.node[idx]
+            for i, new_node in enumerate(new_nodes):
+                graph.node.insert(idx + i, new_node)
+        graph.initializer.extend(initializers_to_add)
+        print(f"Applied GEMM->MatMul+Add transformation to {transforms_applied} nodes")
+        onnx.save(model, output_path)
+
+    return output_path
+
+
+def downgrade_opset(onnx_path, target_opset=11, output_path=None):
+    """Downgrade ONNX opset version for SNPE compatibility."""
+    import onnx
+    from onnx import version_converter
+
+    model = onnx.load(onnx_path)
+    if output_path is None:
+        output_path = onnx_path
+
+    current_opset = model.opset_import[0].version
+    if current_opset <= target_opset:
+        print(f"Model opset {current_opset} already at or below target {target_opset}")
+        return output_path
+
+    print(f"Downgrading opset from {current_opset} to {target_opset}...")
+    try:
+        converted = version_converter.convert_version(model, target_opset)
+        onnx.save(converted, output_path)
+        print(f"Successfully downgraded to opset {target_opset}")
+    except Exception as e:
+        print(f"Warning: Could not downgrade opset: {e}. Proceeding with original.")
+
+    return output_path
+
+
+# ---------------------------------------------------------------------------
+# DLC conversion
+# ---------------------------------------------------------------------------
+
+def find_converter(snpe_root):
+    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-onnx-to-dlc")
+    return path if os.path.exists(path) else None
+
+
+def convert(input_path, output_path, snpe_root, input_shape=None, input_name=None):
+    """Run the full ONNX -> DLC conversion pipeline."""
+    if has_dynamic_shapes(input_path):
+        if input_shape is None:
+            shapes = get_input_shapes(input_path)
+            print("ERROR: Model has dynamic input shapes.", file=sys.stderr)
+            print("Current input shapes:", file=sys.stderr)
+            for name, shape in shapes:
+                print(f"  {name}: {shape}", file=sys.stderr)
+            print(
+                "\nPlease provide --input-shape with concrete dimensions.",
+                file=sys.stderr,
+            )
+            return False
+        print(f"Using provided input shape {input_shape} for dynamic model")
+
+    # Determine input name from model
+    if input_name is None:
+        shapes = get_input_shapes(input_path)
+        input_name = shapes[0][0] if shapes else "input"
+
+    # Work on a temp copy so we don't modify the original
+    temp_dir = tempfile.mkdtemp()
+    temp_onnx = os.path.join(temp_dir, os.path.basename(input_path))
+    shutil.copy2(input_path, temp_onnx)
+
+    # Apply transforms
+    print("Applying ONNX transformations...")
+    try:
+        fix_gemm_transpose(temp_onnx)
+    except Exception as e:
+        print(f"Warning: GEMM transform failed: {e}. Proceeding.")
+
+    try:
+        downgrade_opset(temp_onnx)
+    except Exception as e:
+        print(f"Warning: Opset downgrade failed: {e}. Proceeding.")
+
+    # Find converter
+    converter = find_converter(snpe_root)
+    if converter is None:
+        print(
+            f"ERROR: snpe-onnx-to-dlc not found at "
+            f"{snpe_root}/bin/x86_64-linux-clang/snpe-onnx-to-dlc",
+            file=sys.stderr,
+        )
+        return False
+
+    # Set up environment for the SNPE converter
+    env = os.environ.copy()
+    env["SNPE_ROOT"] = snpe_root
+    env["PYTHONPATH"] = os.path.join(snpe_root, "lib", "python")
+    env["LD_LIBRARY_PATH"] = "/usr/local/lib:/usr/lib/x86_64-linux-gnu"
+
+    cmd = [
+        sys.executable,
+        converter,
+        "--input_network",
+        temp_onnx,
+        "--output_path",
+        output_path,
+    ]
+
+    if input_shape is not None:
+        shape_str = ",".join(str(d) for d in input_shape)
+        cmd.extend(["-d", input_name, shape_str])
+
+    print(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd, env=env, capture_output=True, text=True, timeout=300)
+
+    shutil.rmtree(temp_dir, ignore_errors=True)
+
+    if result.returncode != 0:
+        print("ERROR: DLC conversion failed:", file=sys.stderr)
+        if result.stderr:
+            print(result.stderr, file=sys.stderr)
+        if result.stdout:
+            print(result.stdout)
+        return False
+
+    if not os.path.exists(output_path):
+        print("ERROR: Converter ran but DLC file was not created", file=sys.stderr)
+        return False
+
+    print(f"Successfully converted to {output_path}")
+    return True
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert ONNX model to Qualcomm DLC format"
+    )
+    parser.add_argument("--input", required=True, help="Path to input ONNX model")
+    parser.add_argument("--output", required=True, help="Path for output DLC file")
+    parser.add_argument(
+        "--snpe-root", required=True, help="Path to SNPE/QAIRT SDK root"
+    )
+    parser.add_argument(
+        "--input-shape", default=None, help="Input shape (comma-separated, e.g. 1,1920)"
+    )
+    parser.add_argument("--input-name", default=None, help="Input tensor name")
+    args = parser.parse_args()
+
+    input_shape = None
+    if args.input_shape:
+        input_shape = tuple(int(x.strip()) for x in args.input_shape.split(","))
+
+    success = convert(
+        args.input,
+        args.output,
+        args.snpe_root,
+        input_shape=input_shape,
+        input_name=args.input_name,
+    )
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index a1add206..7ffa8c9b 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -47,7 +47,6 @@ def add_commands(subparsers: argparse._SubParsersAction):
 
     parser.add_argument(
         "--username",
-        "-u",
         type=str,
         default=DEFAULT_SFTP_USER,
         help=f"SFTP username (default: {DEFAULT_SFTP_USER})",
@@ -68,6 +67,13 @@ def add_commands(subparsers: argparse._SubParsersAction):
         help="Don't store password locally",
     )
 
+    parser.add_argument(
+        "--snpe-root",
+        type=str,
+        default=None,
+        help="Path to SNPE/QAIRT SDK root (or set SNPE_ROOT env var)",
+    )
+
     parser.set_defaults(func=deploy_model)
 
 
@@ -108,6 +114,7 @@ def deploy_model(args):
     dlc_path = convert_to_dlc(
         args.model_path,
         input_shape=input_shape,
+        snpe_root=args.snpe_root,
         console=console,
     )
 
diff --git a/synapse/utils/model_converter/convert.py b/synapse/utils/model_converter/convert.py
index 4f85a222..a40464ad 100644
--- a/synapse/utils/model_converter/convert.py
+++ b/synapse/utils/model_converter/convert.py
@@ -2,17 +2,11 @@
 
 import os
 import shutil
-import tempfile
 from typing import Optional
 
 from rich.console import Console
 
 from synapse.utils.model_converter.pt_to_onnx import convert_pt_to_onnx
-from synapse.utils.model_converter.onnx_transforms import (
-    apply_transforms,
-    has_dynamic_shapes,
-    get_input_shapes,
-)
 from synapse.utils.model_converter.onnx_to_dlc import convert_onnx_to_dlc
 
 
@@ -20,20 +14,21 @@ def convert_to_dlc(
     model_path: str,
     input_shape: Optional[tuple[int, ...]] = None,
     output_path: Optional[str] = None,
+    snpe_root: Optional[str] = None,
     console: Optional[Console] = None,
 ) -> Optional[str]:
-    """
-    Convert a model to DLC format for deployment to Synapse devices.
+    """Convert a model to DLC format for deployment to Synapse devices.
 
     Handles .pt (PyTorch), .onnx, and .dlc files:
-    - .pt → ONNX → DLC
-    - .onnx → DLC
-    - .dlc → returns as-is
+    - .pt  -> ONNX (on host) -> DLC (in Docker)
+    - .onnx -> DLC (in Docker)
+    - .dlc  -> returns as-is
 
     Args:
         model_path: Path to the model file (.pt, .onnx, or .dlc)
         input_shape: Input shape for the model (required if model has dynamic dims)
         output_path: Optional output path for the DLC file
+        snpe_root: Path to the SNPE/QAIRT SDK
         console: Rich console for output
 
     Returns:
@@ -41,28 +36,27 @@ def convert_to_dlc(
     """
     if not os.path.exists(model_path):
         if console:
-            console.print(f"[bold red]Error:[/bold red] Model file not found: {model_path}")
+            console.print(
+                f"[bold red]Error:[/bold red] Model file not found: {model_path}"
+            )
         return None
 
     ext = os.path.splitext(model_path)[1].lower()
 
     if ext == ".dlc":
-        # Already a DLC, just return or copy
         if output_path and output_path != model_path:
             shutil.copy2(model_path, output_path)
             return output_path
         return model_path
 
     if ext == ".pt":
-        return _convert_pt_to_dlc(model_path, input_shape, output_path, console)
+        return _convert_pt_to_dlc(model_path, input_shape, output_path, snpe_root, console)
 
     if ext == ".onnx":
-        return _convert_onnx_to_dlc(model_path, input_shape, output_path, console)
+        return _convert_onnx_to_dlc(model_path, input_shape, output_path, snpe_root, console)
 
     if console:
-        console.print(
-            f"[bold red]Error:[/bold red] Unsupported file type: {ext}"
-        )
+        console.print(f"[bold red]Error:[/bold red] Unsupported file type: {ext}")
         console.print("[yellow]Supported formats: .pt, .onnx, .dlc[/yellow]")
     return None
 
@@ -71,16 +65,16 @@ def _convert_pt_to_dlc(
     pt_path: str,
     input_shape: Optional[tuple[int, ...]],
     output_path: Optional[str],
+    snpe_root: Optional[str],
     console: Optional[Console],
 ) -> Optional[str]:
     """Convert PyTorch model to DLC via ONNX."""
     if console:
-        console.print("[bold blue]Step 1/3:[/bold blue] Converting PyTorch to ONNX...")
+        console.print("[bold blue]Step 1/2:[/bold blue] Converting PyTorch to ONNX...")
 
-    # Create temp ONNX file
     onnx_path = convert_pt_to_onnx(
         pt_path,
-        output_path=None,  # Use temp directory
+        output_path=None,
         input_shape=input_shape,
         console=console,
     )
@@ -88,82 +82,35 @@ def _convert_pt_to_dlc(
     if onnx_path is None:
         return None
 
-    return _convert_onnx_to_dlc(onnx_path, input_shape, output_path, console, step_offset=1)
+    if console:
+        console.print(
+            "[bold blue]Step 2/2:[/bold blue] Converting ONNX to DLC (Docker)..."
+        )
+
+    return convert_onnx_to_dlc(
+        onnx_path,
+        output_path=output_path,
+        input_shape=input_shape,
+        snpe_root=snpe_root,
+        console=console,
+    )
 
 
 def _convert_onnx_to_dlc(
     onnx_path: str,
     input_shape: Optional[tuple[int, ...]],
     output_path: Optional[str],
+    snpe_root: Optional[str],
     console: Optional[Console],
-    step_offset: int = 0,
 ) -> Optional[str]:
-    """Convert ONNX model to DLC."""
-    step1 = step_offset + 1
-    step2 = step_offset + 2
-
-    # Check for dynamic shapes
-    if has_dynamic_shapes(onnx_path):
-        if input_shape is None:
-            if console:
-                shapes = get_input_shapes(onnx_path)
-                console.print(
-                    "[bold red]Error:[/bold red] Model has dynamic input shapes."
-                )
-                console.print("[yellow]Current input shapes:[/yellow]")
-                for name, shape in shapes:
-                    console.print(f"  {name}: {shape}")
-                console.print(
-                    "\n[yellow]Please provide --input-shape with concrete dimensions.[/yellow]"
-                )
-            return None
-        if console:
-            console.print(
-                f"[yellow]Note: Using provided input shape {input_shape} for dynamic model[/yellow]"
-            )
-
+    """Convert ONNX model to DLC via Docker."""
     if console:
-        console.print(f"[bold blue]Step {step1}/{step2 + 1}:[/bold blue] Applying ONNX transformations...")
-
-    # Apply transforms to a temp copy to avoid modifying the original
-    temp_dir = tempfile.mkdtemp()
-    temp_onnx = os.path.join(temp_dir, os.path.basename(onnx_path))
-    shutil.copy2(onnx_path, temp_onnx)
-
-    try:
-        apply_transforms(temp_onnx, console=console)
-    except Exception as e:
-        if console:
-            console.print(
-                f"[yellow]Warning: Could not apply transforms: {e}. "
-                "Proceeding with original model.[/yellow]"
-            )
-        temp_onnx = onnx_path
+        console.print("[bold blue]Converting ONNX to DLC (Docker)...[/bold blue]")
 
-    if console:
-        console.print(f"[bold blue]Step {step2}/{step2 + 1}:[/bold blue] Converting to DLC...")
-
-    # Determine input name from ONNX model
-    input_name = "input"
-    try:
-        shapes = get_input_shapes(temp_onnx)
-        if shapes:
-            input_name = shapes[0][0]
-    except Exception:
-        pass
-
-    dlc_path = convert_onnx_to_dlc(
-        temp_onnx,
+    return convert_onnx_to_dlc(
+        onnx_path,
         output_path=output_path,
         input_shape=input_shape,
-        input_name=input_name,
+        snpe_root=snpe_root,
         console=console,
     )
-
-    # Cleanup temp files
-    try:
-        shutil.rmtree(temp_dir)
-    except Exception:
-        pass
-
-    return dlc_path
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
index 0c528578..53fb1951 100644
--- a/synapse/utils/model_converter/onnx_to_dlc.py
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -1,10 +1,9 @@
-"""ONNX to DLC conversion using SNPE converter.
+"""ONNX to DLC conversion via Docker container.
 
-Requirements for ONNX→DLC conversion:
-- QAIRT SDK (set SNPE_ROOT or QAIRT_ROOT environment variable)
-- Python 3.10 with: numpy, onnx, pyyaml, packaging, protobuf
-- System libraries: libc++1 (sudo apt install libc++1)
-- LD_LIBRARY_PATH must include path to libpython3.10.so
+The conversion runs inside a Docker container that has Python 3.10 and
+the required dependencies pre-installed. The user's SNPE/QAIRT SDK is
+bind-mounted at runtime (not baked into the image) to comply with
+Qualcomm's license terms.
 """
 
 import os
@@ -15,80 +14,97 @@
 
 from rich.console import Console
 
+DOCKER_IMAGE = "synapse-model-converter:latest"
+
+
+def _find_model_converter_dir() -> str:
+    """Locate the model-converter/ directory containing the Dockerfile."""
+    # Walk up from this file to the repo root
+    here = os.path.dirname(os.path.abspath(__file__))
+    repo_root = os.path.dirname(os.path.dirname(os.path.dirname(here)))
+    candidate = os.path.join(repo_root, "model-converter")
+    if os.path.isdir(candidate) and os.path.isfile(
+        os.path.join(candidate, "Dockerfile")
+    ):
+        return candidate
+    raise FileNotFoundError(
+        f"model-converter/ directory not found at {candidate}. "
+        "Make sure you are running from the synapse-python repository."
+    )
 
-def _get_snpe_root() -> Optional[str]:
-    """Get SNPE/QAIRT SDK root from environment."""
-    return os.environ.get("SNPE_ROOT") or os.environ.get("QAIRT_ROOT")
-
-
-def find_snpe_converter(snpe_root: Optional[str] = None) -> Optional[str]:
-    """
-    Find the snpe-onnx-to-dlc converter binary.
-
-    Args:
-        snpe_root: Optional path to SNPE/QAIRT SDK root
-
-    Returns:
-        Path to the converter binary, or None if not found
-    """
-    if snpe_root is None:
-        snpe_root = _get_snpe_root()
-
-    if snpe_root is None:
-        return None
 
-    converter_path = os.path.join(
-        snpe_root, "bin", "x86_64-linux-clang", "snpe-onnx-to-dlc"
+def _image_exists() -> bool:
+    """Check if the Docker image is already built."""
+    result = subprocess.run(
+        ["docker", "image", "inspect", DOCKER_IMAGE],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
     )
+    return result.returncode == 0
 
-    if os.path.exists(converter_path):
-        return converter_path
-
-    return None
 
+def _build_image(console: Optional[Console] = None) -> bool:
+    """Build the model-converter Docker image."""
+    try:
+        build_dir = _find_model_converter_dir()
+    except FileNotFoundError as e:
+        if console:
+            console.print(f"[bold red]Error:[/bold red] {e}")
+        return False
 
-def _find_python310() -> Optional[str]:
-    """Find Python 3.10 executable."""
-    # Check common locations
-    candidates = [
-        "/usr/bin/python3.10",
-        shutil.which("python3.10"),
-    ]
-    for path in candidates:
-        if path and os.path.exists(path):
-            return path
-    return None
+    if console:
+        console.print(
+            f"[yellow]Building Docker image [bold]{DOCKER_IMAGE}[/bold] "
+            f"(first time only)...[/yellow]"
+        )
 
+    try:
+        subprocess.run(
+            ["docker", "build", "-t", DOCKER_IMAGE, "."],
+            cwd=build_dir,
+            check=True,
+        )
+    except subprocess.CalledProcessError:
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] Failed to build model-converter Docker image"
+            )
+        return False
 
-def _setup_converter_env(snpe_root: str) -> dict:
-    """
-    Set up environment variables for the SNPE converter.
+    if console:
+        console.print(f"[green]Docker image {DOCKER_IMAGE} built successfully[/green]")
+    return True
 
-    The converter requires:
-    - SNPE_ROOT pointing to SDK
-    - PYTHONPATH with SDK's Python libs first, then system packages (for onnx, numpy, etc.)
-    - LD_LIBRARY_PATH including libpython3.10.so location
-    """
-    env = os.environ.copy()
 
-    # Set SNPE_ROOT
-    env["SNPE_ROOT"] = snpe_root
+def ensure_docker(console: Optional[Console] = None) -> bool:
+    """Check that Docker is available and the image is built."""
+    if shutil.which("docker") is None:
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] Docker is required for model conversion "
+                "but was not found. Please install Docker."
+            )
+        return False
 
-    # Set PYTHONPATH - SDK's Python libs must come first, but we also need
-    # access to installed packages (onnx, numpy, etc.)
-    python_lib_path = os.path.join(snpe_root, "lib", "python")
-    if "PYTHONPATH" in env:
-        env["PYTHONPATH"] = f"{python_lib_path}:{env['PYTHONPATH']}"
-    else:
-        env["PYTHONPATH"] = python_lib_path
+    try:
+        subprocess.run(
+            ["docker", "info"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            check=True,
+        )
+    except subprocess.CalledProcessError:
+        if console:
+            console.print(
+                "[bold red]Error:[/bold red] Docker daemon is not running. "
+                "Please start Docker and try again."
+            )
+        return False
 
-    # Set LD_LIBRARY_PATH for libpython3.10.so
-    ld_paths = ["/usr/lib/x86_64-linux-gnu"]
-    if "LD_LIBRARY_PATH" in env:
-        ld_paths.append(env["LD_LIBRARY_PATH"])
-    env["LD_LIBRARY_PATH"] = ":".join(ld_paths)
+    if not _image_exists():
+        return _build_image(console)
 
-    return env
+    return True
 
 
 def convert_onnx_to_dlc(
@@ -99,142 +115,122 @@ def convert_onnx_to_dlc(
     snpe_root: Optional[str] = None,
     console: Optional[Console] = None,
 ) -> Optional[str]:
-    """
-    Convert an ONNX model to DLC format using SNPE converter.
+    """Convert an ONNX model to DLC format using the Docker-based converter.
 
     Args:
         onnx_path: Path to the ONNX model
         output_path: Optional output path for the DLC file
-        input_shape: Input shape to use (required if model has dynamic dims)
-        input_name: Name of the input tensor (default: "input")
-        snpe_root: Optional path to SNPE/QAIRT SDK root
+        input_shape: Input shape (required if model has dynamic dims)
+        input_name: Name of the input tensor
+        snpe_root: Path to the SNPE/QAIRT SDK
         console: Rich console for output
 
     Returns:
-        Path to the converted DLC file, or None if conversion failed
+        Path to the converted DLC file, or None on failure
     """
     if snpe_root is None:
-        snpe_root = _get_snpe_root()
+        snpe_root = os.environ.get("SNPE_ROOT") or os.environ.get("QAIRT_ROOT")
 
     if snpe_root is None:
         if console:
             console.print(
-                "[bold red]Error:[/bold red] SNPE_ROOT or QAIRT_ROOT environment variable not set"
-            )
-            console.print(
-                "[yellow]Hint: export SNPE_ROOT=/path/to/qairt/x.xx.x.xxxxxx[/yellow]"
+                "[bold red]Error:[/bold red] --snpe-root is required "
+                "(or set SNPE_ROOT / QAIRT_ROOT env var)"
             )
         return None
 
-    converter_path = find_snpe_converter(snpe_root)
-    if converter_path is None:
+    snpe_root = os.path.abspath(snpe_root)
+    if not os.path.isdir(snpe_root):
         if console:
             console.print(
-                "[bold red]Error:[/bold red] Could not find snpe-onnx-to-dlc converter"
-            )
-            console.print(
-                f"[yellow]Expected at: {snpe_root}/bin/x86_64-linux-clang/snpe-onnx-to-dlc[/yellow]"
+                f"[bold red]Error:[/bold red] SNPE root not found: {snpe_root}"
             )
         return None
 
-    # Determine output path
+    if not ensure_docker(console):
+        return None
+
+    # Resolve paths for Docker mounts
+    onnx_path = os.path.abspath(onnx_path)
+    onnx_dir = os.path.dirname(onnx_path)
+    onnx_filename = os.path.basename(onnx_path)
+
     if output_path is None:
-        base_name = os.path.splitext(os.path.basename(onnx_path))[0]
+        base_name = os.path.splitext(onnx_filename)[0]
         output_path = os.path.join(tempfile.gettempdir(), f"{base_name}.dlc")
 
-    if console:
-        console.print(f"[blue]Converting ONNX to DLC: {output_path}...[/blue]")
+    output_dir = os.path.abspath(os.path.dirname(output_path))
+    output_filename = os.path.basename(output_path)
+    os.makedirs(output_dir, exist_ok=True)
 
-    # Build command - use -d for input dimensions (short form)
+    # Build docker run command
+    # Run as the host user so output files have correct ownership
     cmd = [
-        converter_path,
-        "--input_network", onnx_path,
-        "--output_path", output_path,
+        "docker",
+        "run",
+        "--rm",
+        "--user",
+        f"{os.getuid()}:{os.getgid()}",
+        "-v",
+        f"{onnx_dir}:/input:ro",
+        "-v",
+        f"{snpe_root}:/snpe:ro",
+        "-v",
+        f"{output_dir}:/output",
+        DOCKER_IMAGE,
+        "--input",
+        f"/input/{onnx_filename}",
+        "--output",
+        f"/output/{output_filename}",
+        "--snpe-root",
+        "/snpe",
     ]
 
-    # Add input shape if provided
     if input_shape is not None:
         shape_str = ",".join(str(d) for d in input_shape)
-        cmd.extend(["-d", input_name, shape_str])
+        cmd.extend(["--input-shape", shape_str])
 
-    # Set up environment
-    env = _setup_converter_env(snpe_root)
+    if input_name != "input":
+        cmd.extend(["--input-name", input_name])
 
     if console:
-        console.print(f"[dim]Running: {' '.join(cmd)}[/dim]")
+        console.print("[dim]Running conversion in Docker container...[/dim]")
 
     try:
         result = subprocess.run(
-            cmd,
-            env=env,
-            capture_output=True,
-            text=True,
-            timeout=300,  # 5 minute timeout
+            cmd, capture_output=True, text=True, timeout=600,
         )
-
-        if result.returncode != 0:
-            if console:
-                console.print("[bold red]DLC conversion failed:[/bold red]")
-                if result.stderr:
-                    _display_conversion_error(result.stderr, console)
-                if result.stdout:
-                    console.print(f"[dim]{result.stdout}[/dim]")
-            return None
-
-        if not os.path.exists(output_path):
-            if console:
-                console.print(
-                    "[bold red]Error:[/bold red] Converter ran but DLC file was not created"
-                )
-            return None
-
+    except subprocess.TimeoutExpired:
         if console:
-            console.print(f"[green]Successfully converted to {output_path}[/green]")
+            console.print(
+                "[bold red]Error:[/bold red] Conversion timed out after 10 minutes"
+            )
+        return None
 
-        return output_path
+    # Display container output
+    if result.stdout:
+        for line in result.stdout.strip().split("\n"):
+            if line.strip():
+                if console:
+                    console.print(f"  {line}")
 
-    except subprocess.TimeoutExpired:
+    if result.returncode != 0:
         if console:
-            console.print("[bold red]Error:[/bold red] Conversion timed out after 5 minutes")
+            console.print("[bold red]DLC conversion failed:[/bold red]")
+            if result.stderr:
+                for line in result.stderr.strip().split("\n")[-15:]:
+                    if line.strip():
+                        console.print(f"[red]  {line}[/red]")
         return None
-    except Exception as e:
+
+    if not os.path.exists(output_path):
         if console:
-            console.print(f"[bold red]Error running converter:[/bold red] {e}")
+            console.print(
+                "[bold red]Error:[/bold red] Container exited OK but DLC file not found"
+            )
         return None
 
+    if console:
+        console.print(f"[green]Successfully converted to {output_path}[/green]")
 
-def _display_conversion_error(stderr: str, console: Console):
-    """Display helpful error messages based on converter output."""
-    lines = stderr.strip().split("\n")
-
-    # Common error patterns and suggestions
-    error_hints = {
-        "unsupported op": (
-            "The model contains an unsupported operation. "
-            "Try simplifying the model or using a different export configuration."
-        ),
-        "dynamic": (
-            "The model has dynamic shapes. "
-            "Provide a fixed input shape with --input-shape."
-        ),
-        "opset": (
-            "The ONNX opset version may be too new. "
-            "Try exporting with an older opset version (e.g., opset 11)."
-        ),
-        "memory": (
-            "The conversion ran out of memory. "
-            "Try reducing model size or batch dimension."
-        ),
-    }
-
-    # Display first few error lines
-    for line in lines[-10:]:
-        if line.strip():
-            console.print(f"[red]{line}[/red]")
-
-    # Check for known patterns and provide hints
-    stderr_lower = stderr.lower()
-    for pattern, hint in error_hints.items():
-        if pattern in stderr_lower:
-            console.print(f"\n[yellow]Hint: {hint}[/yellow]")
-            break
+    return output_path
diff --git a/synapse/utils/model_converter/onnx_transforms.py b/synapse/utils/model_converter/onnx_transforms.py
deleted file mode 100644
index 324882e4..00000000
--- a/synapse/utils/model_converter/onnx_transforms.py
+++ /dev/null
@@ -1,287 +0,0 @@
-"""ONNX model transformations for SNPE compatibility."""
-
-from typing import Optional
-
-from rich.console import Console
-
-
-def get_input_shapes(onnx_path: str) -> list[tuple[str, list[int | str]]]:
-    """
-    Get input shapes from an ONNX model.
-
-    Args:
-        onnx_path: Path to the ONNX model
-
-    Returns:
-        List of (input_name, shape) tuples where shape may contain strings for dynamic dims
-    """
-    import onnx
-
-    model = onnx.load(onnx_path)
-    inputs = []
-
-    for inp in model.graph.input:
-        shape = []
-        for dim in inp.type.tensor_type.shape.dim:
-            if dim.dim_param:
-                shape.append(dim.dim_param)  # Dynamic dimension name
-            else:
-                shape.append(dim.dim_value)
-        inputs.append((inp.name, shape))
-
-    return inputs
-
-
-def has_dynamic_shapes(onnx_path: str) -> bool:
-    """
-    Check if an ONNX model has dynamic input shapes.
-
-    Args:
-        onnx_path: Path to the ONNX model
-
-    Returns:
-        True if any input has dynamic dimensions
-    """
-    inputs = get_input_shapes(onnx_path)
-    for _, shape in inputs:
-        for dim in shape:
-            if isinstance(dim, str) or dim == 0:
-                return True
-    return False
-
-
-def fix_gemm_transpose(
-    onnx_path: str,
-    output_path: Optional[str] = None,
-    console: Optional[Console] = None,
-) -> str:
-    """
-    Convert GEMM ops with transB=1 to MatMul+Add.
-
-    This is a workaround for SNPE converter issues with certain GEMM configurations.
-
-    Args:
-        onnx_path: Path to the input ONNX model
-        output_path: Path for the output model (defaults to overwriting input)
-        console: Rich console for output
-
-    Returns:
-        Path to the transformed model
-    """
-    import onnx
-    from onnx import helper, numpy_helper
-
-    model = onnx.load(onnx_path)
-    graph = model.graph
-
-    if output_path is None:
-        output_path = onnx_path
-
-    # Build list of (node_index, replacement_nodes) to maintain topological order
-    replacements = []  # List of (index, [new_nodes])
-    initializers_to_add = []
-    transforms_applied = 0
-
-    for idx, node in enumerate(graph.node):
-        if node.op_type != "Gemm":
-            continue
-
-        # Check for transB attribute
-        trans_b = 0
-        alpha = 1.0
-        beta = 1.0
-        trans_a = 0
-
-        for attr in node.attribute:
-            if attr.name == "transB":
-                trans_b = attr.i
-            elif attr.name == "transA":
-                trans_a = attr.i
-            elif attr.name == "alpha":
-                alpha = attr.f
-            elif attr.name == "beta":
-                beta = attr.f
-
-        if trans_b != 1:
-            continue  # No transformation needed
-
-        if trans_a != 0 or alpha != 1.0:
-            # Complex case, skip for now
-            if console:
-                console.print(
-                    f"[yellow]Warning: Skipping complex GEMM node {node.name} "
-                    f"(transA={trans_a}, alpha={alpha})[/yellow]"
-                )
-            continue
-
-        # Get weight tensor and transpose it
-        weight_name = node.input[1]
-        weight_initializer = None
-        for init in graph.initializer:
-            if init.name == weight_name:
-                weight_initializer = init
-                break
-
-        if weight_initializer is None:
-            if console:
-                console.print(
-                    f"[yellow]Warning: Could not find initializer for {weight_name}, "
-                    "skipping transformation[/yellow]"
-                )
-            continue
-
-        transforms_applied += 1
-
-        # Transpose the weight
-        weight_array = numpy_helper.to_array(weight_initializer)
-        transposed_weight = weight_array.T
-        new_weight_name = f"{weight_name}_transposed"
-        new_weight = numpy_helper.from_array(transposed_weight, name=new_weight_name)
-        initializers_to_add.append(new_weight)
-
-        replacement_nodes = []
-
-        # Create MatMul node
-        matmul_output = f"{node.name}_matmul_out"
-        matmul_node = helper.make_node(
-            "MatMul",
-            inputs=[node.input[0], new_weight_name],
-            outputs=[matmul_output],
-            name=f"{node.name}_matmul",
-        )
-
-        # If there's a bias (C input), add it
-        if len(node.input) > 2 and node.input[2]:
-            bias_name = node.input[2]
-
-            # Handle beta scaling if needed
-            if beta != 1.0:
-                # Find bias initializer and scale it
-                for init in graph.initializer:
-                    if init.name == bias_name:
-                        bias_array = numpy_helper.to_array(init)
-                        scaled_bias = bias_array * beta
-                        new_bias_name = f"{bias_name}_scaled"
-                        new_bias = numpy_helper.from_array(scaled_bias, name=new_bias_name)
-                        initializers_to_add.append(new_bias)
-                        bias_name = new_bias_name
-                        break
-
-            add_node = helper.make_node(
-                "Add",
-                inputs=[matmul_output, bias_name],
-                outputs=node.output,
-                name=f"{node.name}_add",
-            )
-            replacement_nodes = [matmul_node, add_node]
-        else:
-            # No bias, MatMul output is the final output
-            matmul_node = helper.make_node(
-                "MatMul",
-                inputs=[node.input[0], new_weight_name],
-                outputs=node.output,
-                name=f"{node.name}_matmul",
-            )
-            replacement_nodes = [matmul_node]
-
-        replacements.append((idx, replacement_nodes))
-
-    if transforms_applied > 0:
-        # Apply replacements in reverse order to maintain correct indices
-        for idx, new_nodes in reversed(replacements):
-            del graph.node[idx]
-            for i, new_node in enumerate(new_nodes):
-                graph.node.insert(idx + i, new_node)
-
-        graph.initializer.extend(initializers_to_add)
-
-        if console:
-            console.print(
-                f"[green]Applied GEMM→MatMul+Add transformation to {transforms_applied} nodes[/green]"
-            )
-
-        onnx.save(model, output_path)
-
-    return output_path
-
-
-def downgrade_opset(
-    onnx_path: str,
-    target_opset: int = 11,
-    output_path: Optional[str] = None,
-    console: Optional[Console] = None,
-) -> str:
-    """
-    Downgrade ONNX opset version for SNPE compatibility.
-
-    Args:
-        onnx_path: Path to the input ONNX model
-        target_opset: Target opset version (default 11 for SNPE compatibility)
-        output_path: Path for the output model (defaults to overwriting input)
-        console: Rich console for output
-
-    Returns:
-        Path to the transformed model
-    """
-    import onnx
-    from onnx import version_converter
-
-    model = onnx.load(onnx_path)
-
-    if output_path is None:
-        output_path = onnx_path
-
-    current_opset = model.opset_import[0].version
-    if current_opset <= target_opset:
-        if console:
-            console.print(
-                f"[blue]Model opset {current_opset} is already at or below target {target_opset}[/blue]"
-            )
-        return output_path
-
-    if console:
-        console.print(
-            f"[blue]Downgrading opset from {current_opset} to {target_opset}...[/blue]"
-        )
-
-    try:
-        converted_model = version_converter.convert_version(model, target_opset)
-        onnx.save(converted_model, output_path)
-        if console:
-            console.print(f"[green]Successfully downgraded to opset {target_opset}[/green]")
-    except Exception as e:
-        if console:
-            console.print(
-                f"[yellow]Warning: Could not downgrade opset: {e}. "
-                "Proceeding with original version.[/yellow]"
-            )
-
-    return output_path
-
-
-def apply_transforms(
-    onnx_path: str,
-    output_path: Optional[str] = None,
-    console: Optional[Console] = None,
-) -> str:
-    """
-    Apply all ONNX transformations for SNPE compatibility.
-
-    Args:
-        onnx_path: Path to the input ONNX model
-        output_path: Path for the output model (defaults to overwriting input)
-        console: Rich console for output
-
-    Returns:
-        Path to the transformed model
-    """
-    if output_path is None:
-        output_path = onnx_path
-
-    # Apply GEMM fix
-    fix_gemm_transpose(onnx_path, output_path, console)
-
-    # Downgrade opset if needed
-    downgrade_opset(output_path, target_opset=11, output_path=output_path, console=console)
-
-    return output_path

From e7c4ecc1db8f330dbd6eb8b67bdd6832d733a5ba Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Thu, 19 Mar 2026 17:33:33 -0700
Subject: [PATCH 03/20] quantization working

---
 model-converter/convert.py                   | 102 +++++++++++++++++++
 synapse/cli/deploy_model.py                  |  38 +++++++
 synapse/utils/model_converter/convert.py     |  20 +++-
 synapse/utils/model_converter/onnx_to_dlc.py |  19 +++-
 4 files changed, 176 insertions(+), 3 deletions(-)

diff --git a/model-converter/convert.py b/model-converter/convert.py
index f52e15d1..bac211e5 100644
--- a/model-converter/convert.py
+++ b/model-converter/convert.py
@@ -280,6 +280,92 @@ def convert(input_path, output_path, snpe_root, input_shape=None, input_name=Non
     return True
 
 
+# ---------------------------------------------------------------------------
+# Quantization
+# ---------------------------------------------------------------------------
+
+def find_quantizer(snpe_root):
+    # Use the native binary directly, not the bash wrapper
+    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-dlc-quant")
+    if os.path.exists(path):
+        return path
+    # Fall back to the wrapper script
+    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-dlc-quantize")
+    return path if os.path.exists(path) else None
+
+
+def quantize_dlc(dlc_path, input_list, snpe_root, output_path=None):
+    """Quantize a DLC model to INT8 using representative input data."""
+    quantizer = find_quantizer(snpe_root)
+    if quantizer is None:
+        print(
+            f"ERROR: snpe-dlc-quant not found at "
+            f"{snpe_root}/bin/x86_64-linux-clang/",
+            file=sys.stderr,
+        )
+        return False
+
+    if output_path is None:
+        base, ext = os.path.splitext(dlc_path)
+        output_path = f"{base}_quantized{ext}"
+
+    env = os.environ.copy()
+    env["SNPE_ROOT"] = snpe_root
+    env["PYTHONPATH"] = os.path.join(snpe_root, "lib", "python")
+    bin_dir = os.path.join(snpe_root, "bin", "x86_64-linux-clang")
+    lib_dir = os.path.join(snpe_root, "lib", "x86_64-linux-clang")
+    env["PATH"] = f"{bin_dir}:{env.get('PATH', '')}"
+    env["LD_LIBRARY_PATH"] = f"{lib_dir}:/usr/local/lib:/usr/lib/x86_64-linux-gnu"
+
+    cmd = [
+        quantizer,
+        "--input_dlc",
+        dlc_path,
+        "--input_list",
+        input_list,
+        "--output_dlc",
+        output_path,
+    ]
+
+    # The quantizer resolves raw file paths relative to cwd and also writes
+    # intermediate output to ./output/ in cwd.  We create a temp working
+    # directory and symlink the raw files there so the data mount can stay
+    # read-only.
+    input_list_dir = os.path.dirname(os.path.abspath(input_list))
+    work_dir = tempfile.mkdtemp()
+
+    # Symlink every file from the input data directory into the work dir
+    for name in os.listdir(input_list_dir):
+        src = os.path.join(input_list_dir, name)
+        dst = os.path.join(work_dir, name)
+        os.symlink(src, dst)
+
+    print(f"Quantizing model: {' '.join(cmd)}")
+    result = subprocess.run(
+        cmd, env=env, capture_output=True, text=True, timeout=600,
+        cwd=work_dir,
+    )
+
+    shutil.rmtree(work_dir, ignore_errors=True)
+
+    if result.returncode != 0:
+        print("ERROR: Quantization failed:", file=sys.stderr)
+        if result.stderr:
+            print(result.stderr, file=sys.stderr)
+        if result.stdout:
+            print(result.stdout)
+        return False
+
+    if not os.path.exists(output_path):
+        print("ERROR: Quantizer ran but output file not created", file=sys.stderr)
+        return False
+
+    # Replace the float DLC with the quantized one
+    shutil.move(output_path, dlc_path)
+    print(f"Successfully quantized model to {dlc_path}")
+    return True
+
+
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
@@ -297,6 +383,12 @@ def main():
         "--input-shape", default=None, help="Input shape (comma-separated, e.g. 1,1920)"
     )
     parser.add_argument("--input-name", default=None, help="Input tensor name")
+    parser.add_argument(
+        "--quantize", action="store_true", help="Quantize model to INT8"
+    )
+    parser.add_argument(
+        "--input-list", default=None, help="Input list file for quantization"
+    )
     args = parser.parse_args()
 
     input_shape = None
@@ -310,6 +402,16 @@ def main():
         input_shape=input_shape,
         input_name=args.input_name,
     )
+
+    if success and args.quantize:
+        if not args.input_list:
+            print(
+                "ERROR: --quantize requires --input-list with representative inputs",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        success = quantize_dlc(args.output, args.input_list, args.snpe_root)
+
     sys.exit(0 if success else 1)
 
 
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index 7ffa8c9b..5c8cb9b3 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -74,6 +74,29 @@ def add_commands(subparsers: argparse._SubParsersAction):
         help="Path to SNPE/QAIRT SDK root (or set SNPE_ROOT env var)",
     )
 
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help=(
+            "Quantize the model to INT8 after conversion. Required for DSP/NPU inference. "
+            "Must be used with --input-list pointing to a file of representative inputs. "
+            "Each line in the input list should be a path to a raw binary file containing "
+            "float32 data matching the model's input shape (e.g., numpy: "
+            'arr.astype(np.float32).tofile("sample_001.raw")).'
+        ),
+    )
+
+    parser.add_argument(
+        "--input-list",
+        type=str,
+        default=None,
+        help=(
+            "Path to a text file listing representative input samples for quantization. "
+            "Each line is a path to a .raw file (float32 binary). Paths should be "
+            "relative to the directory containing the input list file."
+        ),
+    )
+
     parser.set_defaults(func=deploy_model)
 
 
@@ -108,6 +131,19 @@ def deploy_model(args):
     console.print(f"[bold]Target:[/bold] {args.uri}:{DEVICE_MODEL_DIR}/{model_name}.dlc")
     console.print()
 
+    # Validate quantize + input-list
+    if args.quantize and not args.input_list:
+        console.print(
+            "[bold red]Error:[/bold red] --quantize requires --input-list "
+            "with representative input samples"
+        )
+        return
+
+    if args.input_list and not args.quantize:
+        console.print(
+            "[yellow]Note: --input-list provided without --quantize, ignoring[/yellow]"
+        )
+
     # Step 1: Convert model to DLC
     console.print("[bold cyan]Converting model to DLC format...[/bold cyan]")
 
@@ -115,6 +151,8 @@ def deploy_model(args):
         args.model_path,
         input_shape=input_shape,
         snpe_root=args.snpe_root,
+        quantize=args.quantize,
+        input_list=args.input_list,
         console=console,
     )
 
diff --git a/synapse/utils/model_converter/convert.py b/synapse/utils/model_converter/convert.py
index a40464ad..1ee51677 100644
--- a/synapse/utils/model_converter/convert.py
+++ b/synapse/utils/model_converter/convert.py
@@ -15,6 +15,8 @@ def convert_to_dlc(
     input_shape: Optional[tuple[int, ...]] = None,
     output_path: Optional[str] = None,
     snpe_root: Optional[str] = None,
+    quantize: bool = False,
+    input_list: Optional[str] = None,
     console: Optional[Console] = None,
 ) -> Optional[str]:
     """Convert a model to DLC format for deployment to Synapse devices.
@@ -29,6 +31,8 @@ def convert_to_dlc(
         input_shape: Input shape for the model (required if model has dynamic dims)
         output_path: Optional output path for the DLC file
         snpe_root: Path to the SNPE/QAIRT SDK
+        quantize: Whether to quantize the model to INT8
+        input_list: Path to representative input list file (required if quantize=True)
         console: Rich console for output
 
     Returns:
@@ -50,10 +54,14 @@ def convert_to_dlc(
         return model_path
 
     if ext == ".pt":
-        return _convert_pt_to_dlc(model_path, input_shape, output_path, snpe_root, console)
+        return _convert_pt_to_dlc(
+            model_path, input_shape, output_path, snpe_root, quantize, input_list, console
+        )
 
     if ext == ".onnx":
-        return _convert_onnx_to_dlc(model_path, input_shape, output_path, snpe_root, console)
+        return _convert_onnx_to_dlc(
+            model_path, input_shape, output_path, snpe_root, quantize, input_list, console
+        )
 
     if console:
         console.print(f"[bold red]Error:[/bold red] Unsupported file type: {ext}")
@@ -66,6 +74,8 @@ def _convert_pt_to_dlc(
     input_shape: Optional[tuple[int, ...]],
     output_path: Optional[str],
     snpe_root: Optional[str],
+    quantize: bool,
+    input_list: Optional[str],
     console: Optional[Console],
 ) -> Optional[str]:
     """Convert PyTorch model to DLC via ONNX."""
@@ -92,6 +102,8 @@ def _convert_pt_to_dlc(
         output_path=output_path,
         input_shape=input_shape,
         snpe_root=snpe_root,
+        quantize=quantize,
+        input_list=input_list,
         console=console,
     )
 
@@ -101,6 +113,8 @@ def _convert_onnx_to_dlc(
     input_shape: Optional[tuple[int, ...]],
     output_path: Optional[str],
     snpe_root: Optional[str],
+    quantize: bool,
+    input_list: Optional[str],
     console: Optional[Console],
 ) -> Optional[str]:
     """Convert ONNX model to DLC via Docker."""
@@ -112,5 +126,7 @@ def _convert_onnx_to_dlc(
         output_path=output_path,
         input_shape=input_shape,
         snpe_root=snpe_root,
+        quantize=quantize,
+        input_list=input_list,
         console=console,
     )
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
index 53fb1951..b60b285b 100644
--- a/synapse/utils/model_converter/onnx_to_dlc.py
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -113,6 +113,8 @@ def convert_onnx_to_dlc(
     input_shape: Optional[tuple[int, ...]] = None,
     input_name: str = "input",
     snpe_root: Optional[str] = None,
+    quantize: bool = False,
+    input_list: Optional[str] = None,
     console: Optional[Console] = None,
 ) -> Optional[str]:
     """Convert an ONNX model to DLC format using the Docker-based converter.
@@ -123,6 +125,8 @@ def convert_onnx_to_dlc(
         input_shape: Input shape (required if model has dynamic dims)
         input_name: Name of the input tensor
         snpe_root: Path to the SNPE/QAIRT SDK
+        quantize: Whether to quantize the model to INT8 after conversion
+        input_list: Path to representative input list file for quantization
         console: Rich console for output
 
     Returns:
@@ -177,6 +181,16 @@ def convert_onnx_to_dlc(
         f"{snpe_root}:/snpe:ro",
         "-v",
         f"{output_dir}:/output",
+    ]
+
+    # Mount input data directory for quantization
+    if quantize and input_list:
+        input_list = os.path.abspath(input_list)
+        input_list_dir = os.path.dirname(input_list)
+        input_list_filename = os.path.basename(input_list)
+        cmd.extend(["-v", f"{input_list_dir}:/data:ro"])
+
+    cmd.extend([
         DOCKER_IMAGE,
         "--input",
         f"/input/{onnx_filename}",
@@ -184,7 +198,7 @@ def convert_onnx_to_dlc(
         f"/output/{output_filename}",
         "--snpe-root",
         "/snpe",
-    ]
+    ])
 
     if input_shape is not None:
         shape_str = ",".join(str(d) for d in input_shape)
@@ -193,6 +207,9 @@ def convert_onnx_to_dlc(
     if input_name != "input":
         cmd.extend(["--input-name", input_name])
 
+    if quantize and input_list:
+        cmd.extend(["--quantize", "--input-list", f"/data/{input_list_filename}"])
+
     if console:
         console.print("[dim]Running conversion in Docker container...[/dim]")
 

From c7d67cf01922880401a2d8d8653b42987bcc2533 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Mon, 23 Mar 2026 15:57:44 -0700
Subject: [PATCH 04/20] wip

---
 model-converter/convert.py                   | 431 ++++++++-----------
 scripts/setup-target-device.sh               | 125 ++++++
 synapse/cli/deploy_model.py                  |  38 +-
 synapse/utils/model_converter/convert.py     | 108 ++---
 synapse/utils/model_converter/onnx_to_dlc.py |  18 +-
 5 files changed, 382 insertions(+), 338 deletions(-)
 create mode 100755 scripts/setup-target-device.sh

diff --git a/model-converter/convert.py b/model-converter/convert.py
index bac211e5..ab64a912 100644
--- a/model-converter/convert.py
+++ b/model-converter/convert.py
@@ -1,9 +1,11 @@
 #!/usr/bin/env python3
-"""ONNX to DLC conversion script.
+"""ONNX to QNN context binary conversion script.
 
 Runs inside the synapse-model-converter Docker container.
+Pipeline: ONNX → qairt-converter → DLC → qairt-quantizer → qnn-context-binary-generator → .bin
+
 Expects:
-  - SNPE/QAIRT SDK mounted at the path given by --snpe-root
+  - QAIRT SDK mounted at the path given by --snpe-root
   - Input ONNX model accessible at --input
   - Output directory writable at --output parent
 """
@@ -45,155 +47,41 @@ def has_dynamic_shapes(onnx_path):
 
 
 # ---------------------------------------------------------------------------
-# ONNX transforms (SNPE compatibility)
+# Tool finders
 # ---------------------------------------------------------------------------
 
-def fix_gemm_transpose(onnx_path, output_path=None):
-    """Convert GEMM ops with transB=1 to MatMul+Add."""
-    import onnx
-    from onnx import helper, numpy_helper
-
-    model = onnx.load(onnx_path)
-    graph = model.graph
-    if output_path is None:
-        output_path = onnx_path
-
-    replacements = []
-    initializers_to_add = []
-    transforms_applied = 0
-
-    for idx, node in enumerate(graph.node):
-        if node.op_type != "Gemm":
-            continue
-
-        trans_b = 0
-        alpha = 1.0
-        beta = 1.0
-        trans_a = 0
-
-        for attr in node.attribute:
-            if attr.name == "transB":
-                trans_b = attr.i
-            elif attr.name == "transA":
-                trans_a = attr.i
-            elif attr.name == "alpha":
-                alpha = attr.f
-            elif attr.name == "beta":
-                beta = attr.f
-
-        if trans_b != 1:
-            continue
-        if trans_a != 0 or alpha != 1.0:
-            print(f"Warning: Skipping complex GEMM node {node.name}")
-            continue
-
-        weight_name = node.input[1]
-        weight_initializer = None
-        for init in graph.initializer:
-            if init.name == weight_name:
-                weight_initializer = init
-                break
-
-        if weight_initializer is None:
-            print(f"Warning: Could not find initializer for {weight_name}, skipping")
-            continue
-
-        transforms_applied += 1
-        weight_array = numpy_helper.to_array(weight_initializer)
-        transposed_weight = weight_array.T
-        new_weight_name = f"{weight_name}_transposed"
-        new_weight = numpy_helper.from_array(transposed_weight, name=new_weight_name)
-        initializers_to_add.append(new_weight)
-
-        matmul_output = f"{node.name}_matmul_out"
-        matmul_node = helper.make_node(
-            "MatMul",
-            inputs=[node.input[0], new_weight_name],
-            outputs=[matmul_output],
-            name=f"{node.name}_matmul",
-        )
-
-        if len(node.input) > 2 and node.input[2]:
-            bias_name = node.input[2]
-            if beta != 1.0:
-                for init in graph.initializer:
-                    if init.name == bias_name:
-                        bias_array = numpy_helper.to_array(init)
-                        scaled_bias = bias_array * beta
-                        new_bias_name = f"{bias_name}_scaled"
-                        new_bias = numpy_helper.from_array(
-                            scaled_bias, name=new_bias_name
-                        )
-                        initializers_to_add.append(new_bias)
-                        bias_name = new_bias_name
-                        break
-
-            add_node = helper.make_node(
-                "Add",
-                inputs=[matmul_output, bias_name],
-                outputs=node.output,
-                name=f"{node.name}_add",
-            )
-            replacement_nodes = [matmul_node, add_node]
-        else:
-            matmul_node = helper.make_node(
-                "MatMul",
-                inputs=[node.input[0], new_weight_name],
-                outputs=node.output,
-                name=f"{node.name}_matmul",
-            )
-            replacement_nodes = [matmul_node]
-
-        replacements.append((idx, replacement_nodes))
-
-    if transforms_applied > 0:
-        for idx, new_nodes in reversed(replacements):
-            del graph.node[idx]
-            for i, new_node in enumerate(new_nodes):
-                graph.node.insert(idx + i, new_node)
-        graph.initializer.extend(initializers_to_add)
-        print(f"Applied GEMM->MatMul+Add transformation to {transforms_applied} nodes")
-        onnx.save(model, output_path)
-
-    return output_path
-
-
-def downgrade_opset(onnx_path, target_opset=11, output_path=None):
-    """Downgrade ONNX opset version for SNPE compatibility."""
-    import onnx
-    from onnx import version_converter
+def find_tool(snpe_root, name):
+    """Find a tool in the SDK bin directory."""
+    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", name)
+    return path if os.path.exists(path) else None
 
-    model = onnx.load(onnx_path)
-    if output_path is None:
-        output_path = onnx_path
 
-    current_opset = model.opset_import[0].version
-    if current_opset <= target_opset:
-        print(f"Model opset {current_opset} already at or below target {target_opset}")
-        return output_path
+def python_env(snpe_root):
+    """Environment for running Python-based SDK tools."""
+    env = os.environ.copy()
+    env["SNPE_ROOT"] = snpe_root
+    env["PYTHONPATH"] = os.path.join(snpe_root, "lib", "python")
+    env["LD_LIBRARY_PATH"] = "/usr/local/lib:/usr/lib/x86_64-linux-gnu"
+    return env
 
-    print(f"Downgrading opset from {current_opset} to {target_opset}...")
-    try:
-        converted = version_converter.convert_version(model, target_opset)
-        onnx.save(converted, output_path)
-        print(f"Successfully downgraded to opset {target_opset}")
-    except Exception as e:
-        print(f"Warning: Could not downgrade opset: {e}. Proceeding with original.")
 
-    return output_path
+def native_env(snpe_root):
+    """Environment for running native (C++) SDK tools."""
+    env = os.environ.copy()
+    env["SNPE_ROOT"] = snpe_root
+    lib_dir = os.path.join(snpe_root, "lib", "x86_64-linux-clang")
+    env["LD_LIBRARY_PATH"] = f"{lib_dir}:/usr/local/lib:/usr/lib/x86_64-linux-gnu"
+    bin_dir = os.path.join(snpe_root, "bin", "x86_64-linux-clang")
+    env["PATH"] = f"{bin_dir}:{env.get('PATH', '')}"
+    return env
 
 
 # ---------------------------------------------------------------------------
-# DLC conversion
+# Step 1: ONNX → DLC (qairt-converter)
 # ---------------------------------------------------------------------------
 
-def find_converter(snpe_root):
-    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-onnx-to-dlc")
-    return path if os.path.exists(path) else None
-
-
-def convert(input_path, output_path, snpe_root, input_shape=None, input_name=None):
-    """Run the full ONNX -> DLC conversion pipeline."""
+def convert_to_dlc(input_path, output_path, snpe_root, input_shape=None, input_name=None):
+    """Convert ONNX model to DLC using qairt-converter."""
     if has_dynamic_shapes(input_path):
         if input_shape is None:
             shapes = get_input_shapes(input_path)
@@ -208,61 +96,41 @@ def convert(input_path, output_path, snpe_root, input_shape=None, input_name=Non
             return False
         print(f"Using provided input shape {input_shape} for dynamic model")
 
-    # Determine input name from model
     if input_name is None:
         shapes = get_input_shapes(input_path)
         input_name = shapes[0][0] if shapes else "input"
 
-    # Work on a temp copy so we don't modify the original
-    temp_dir = tempfile.mkdtemp()
-    temp_onnx = os.path.join(temp_dir, os.path.basename(input_path))
-    shutil.copy2(input_path, temp_onnx)
-
-    # Apply transforms
-    print("Applying ONNX transformations...")
-    try:
-        fix_gemm_transpose(temp_onnx)
-    except Exception as e:
-        print(f"Warning: GEMM transform failed: {e}. Proceeding.")
-
-    try:
-        downgrade_opset(temp_onnx)
-    except Exception as e:
-        print(f"Warning: Opset downgrade failed: {e}. Proceeding.")
-
-    # Find converter
-    converter = find_converter(snpe_root)
-    if converter is None:
-        print(
-            f"ERROR: snpe-onnx-to-dlc not found at "
-            f"{snpe_root}/bin/x86_64-linux-clang/snpe-onnx-to-dlc",
-            file=sys.stderr,
-        )
-        return False
-
-    # Set up environment for the SNPE converter
-    env = os.environ.copy()
-    env["SNPE_ROOT"] = snpe_root
-    env["PYTHONPATH"] = os.path.join(snpe_root, "lib", "python")
-    env["LD_LIBRARY_PATH"] = "/usr/local/lib:/usr/lib/x86_64-linux-gnu"
-
-    cmd = [
-        sys.executable,
-        converter,
-        "--input_network",
-        temp_onnx,
-        "--output_path",
-        output_path,
-    ]
-
-    if input_shape is not None:
-        shape_str = ",".join(str(d) for d in input_shape)
-        cmd.extend(["-d", input_name, shape_str])
-
-    print(f"Running: {' '.join(cmd)}")
-    result = subprocess.run(cmd, env=env, capture_output=True, text=True, timeout=300)
-
-    shutil.rmtree(temp_dir, ignore_errors=True)
+    # Try qairt-converter first (unified, preferred), fall back to snpe-onnx-to-dlc
+    converter = find_tool(snpe_root, "qairt-converter")
+    if converter:
+        cmd = [
+            sys.executable,
+            converter,
+            "-i", input_path,
+            "-o", output_path,
+        ]
+        if input_shape is not None:
+            shape_str = ",".join(str(d) for d in input_shape)
+            cmd.extend(["-d", input_name, shape_str])
+    else:
+        converter = find_tool(snpe_root, "snpe-onnx-to-dlc")
+        if converter is None:
+            print("ERROR: No converter found (tried qairt-converter, snpe-onnx-to-dlc)",
+                  file=sys.stderr)
+            return False
+        cmd = [
+            sys.executable,
+            converter,
+            "--input_network", input_path,
+            "--output_path", output_path,
+        ]
+        if input_shape is not None:
+            shape_str = ",".join(str(d) for d in input_shape)
+            cmd.extend(["-d", input_name, shape_str])
+
+    print(f"Converting ONNX to DLC: {' '.join(cmd)}")
+    result = subprocess.run(cmd, env=python_env(snpe_root),
+                            capture_output=True, text=True, timeout=300)
 
     if result.returncode != 0:
         print("ERROR: DLC conversion failed:", file=sys.stderr)
@@ -281,70 +149,48 @@ def convert(input_path, output_path, snpe_root, input_shape=None, input_name=Non
 
 
 # ---------------------------------------------------------------------------
-# Quantization
+# Step 2: Quantize DLC (qairt-quantizer)
 # ---------------------------------------------------------------------------
 
-def find_quantizer(snpe_root):
-    # Use the native binary directly, not the bash wrapper
-    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-dlc-quant")
-    if os.path.exists(path):
-        return path
-    # Fall back to the wrapper script
-    path = os.path.join(snpe_root, "bin", "x86_64-linux-clang", "snpe-dlc-quantize")
-    return path if os.path.exists(path) else None
-
-
 def quantize_dlc(dlc_path, input_list, snpe_root, output_path=None):
     """Quantize a DLC model to INT8 using representative input data."""
-    quantizer = find_quantizer(snpe_root)
+    # Try qairt-quantizer first, fall back to snpe-dlc-quant
+    quantizer = find_tool(snpe_root, "qairt-quantizer")
+    if quantizer:
+        is_python = True
+    else:
+        quantizer = find_tool(snpe_root, "snpe-dlc-quant")
+        is_python = False
     if quantizer is None:
-        print(
-            f"ERROR: snpe-dlc-quant not found at "
-            f"{snpe_root}/bin/x86_64-linux-clang/",
-            file=sys.stderr,
-        )
+        print("ERROR: No quantizer found (tried qairt-quantizer, snpe-dlc-quant)",
+              file=sys.stderr)
         return False
 
     if output_path is None:
         base, ext = os.path.splitext(dlc_path)
         output_path = f"{base}_quantized{ext}"
 
-    env = os.environ.copy()
-    env["SNPE_ROOT"] = snpe_root
-    env["PYTHONPATH"] = os.path.join(snpe_root, "lib", "python")
-    bin_dir = os.path.join(snpe_root, "bin", "x86_64-linux-clang")
-    lib_dir = os.path.join(snpe_root, "lib", "x86_64-linux-clang")
-    env["PATH"] = f"{bin_dir}:{env.get('PATH', '')}"
-    env["LD_LIBRARY_PATH"] = f"{lib_dir}:/usr/local/lib:/usr/lib/x86_64-linux-gnu"
-
-    cmd = [
-        quantizer,
-        "--input_dlc",
-        dlc_path,
-        "--input_list",
-        input_list,
-        "--output_dlc",
-        output_path,
-    ]
-
-    # The quantizer resolves raw file paths relative to cwd and also writes
-    # intermediate output to ./output/ in cwd.  We create a temp working
-    # directory and symlink the raw files there so the data mount can stay
-    # read-only.
+    if is_python:
+        cmd = [sys.executable, quantizer, "-i", dlc_path, "-l", input_list,
+               "-o", output_path]
+        env = python_env(snpe_root)
+    else:
+        cmd = [quantizer, "--input_dlc", dlc_path, "--input_list", input_list,
+               "--output_dlc", output_path]
+        env = native_env(snpe_root)
+
+    # The quantizer resolves raw file paths relative to cwd. Create a temp
+    # working directory with symlinks so the data mount can stay read-only.
     input_list_dir = os.path.dirname(os.path.abspath(input_list))
     work_dir = tempfile.mkdtemp()
-
-    # Symlink every file from the input data directory into the work dir
     for name in os.listdir(input_list_dir):
         src = os.path.join(input_list_dir, name)
         dst = os.path.join(work_dir, name)
         os.symlink(src, dst)
 
     print(f"Quantizing model: {' '.join(cmd)}")
-    result = subprocess.run(
-        cmd, env=env, capture_output=True, text=True, timeout=600,
-        cwd=work_dir,
-    )
+    result = subprocess.run(cmd, env=env, capture_output=True, text=True,
+                            timeout=600, cwd=work_dir)
 
     shutil.rmtree(work_dir, ignore_errors=True)
 
@@ -366,18 +212,93 @@ def quantize_dlc(dlc_path, input_list, snpe_root, output_path=None):
     return True
 
 
+# ---------------------------------------------------------------------------
+# Step 3: Generate QNN context binary (qnn-context-binary-generator)
+# ---------------------------------------------------------------------------
+
+def generate_context_binary(dlc_path, output_path, snpe_root):
+    """Generate a pre-compiled QNN context binary for HTP backend."""
+    generator = find_tool(snpe_root, "qnn-context-binary-generator")
+    if generator is None:
+        print("ERROR: qnn-context-binary-generator not found", file=sys.stderr)
+        return False
+
+    backend_lib = os.path.join(snpe_root, "lib", "x86_64-linux-clang", "libQnnHtp.so")
+    if not os.path.exists(backend_lib):
+        print(f"ERROR: HTP backend not found at {backend_lib}", file=sys.stderr)
+        return False
+
+    output_dir = os.path.dirname(output_path)
+    output_name = os.path.splitext(os.path.basename(output_path))[0]
+
+    # Write HTP backend extensions config to limit VTCM usage.
+    # QCS6490 (Hexagon v68) has limited VTCM; vtcm_mb=0 lets the runtime decide.
+    import json
+
+    htp_config_path = os.path.join(output_dir or ".", "htp_config.json")
+    with open(htp_config_path, "w") as f:
+        json.dump({"graphs": [{"vtcm_mb": 0, "graph_names": ["*"]}]}, f)
+
+    backend_ext_lib = os.path.join(
+        snpe_root, "lib", "x86_64-linux-clang", "libQnnHtpNetRunExtensions.so"
+    )
+    ext_config_path = os.path.join(output_dir or ".", "backend_extensions_config.json")
+    with open(ext_config_path, "w") as f:
+        json.dump({
+            "backend_extensions": {
+                "shared_library_path": backend_ext_lib,
+                "config_file_path": htp_config_path,
+            }
+        }, f)
+
+    cmd = [
+        generator,
+        "--dlc_path", dlc_path,
+        "--backend", backend_lib,
+        "--binary_file", output_name,
+        "--output_dir", output_dir,
+        "--config_file", ext_config_path,
+    ]
+
+    print(f"Generating context binary: {' '.join(cmd)}")
+    result = subprocess.run(cmd, env=native_env(snpe_root),
+                            capture_output=True, text=True, timeout=600)
+
+    if result.stdout:
+        print(result.stdout)
+
+    if result.returncode != 0:
+        print("ERROR: Context binary generation failed:", file=sys.stderr)
+        if result.stderr:
+            print(result.stderr, file=sys.stderr)
+        return False
+
+    # qnn-context-binary-generator outputs <name>.bin in output_dir
+    expected_bin = os.path.join(output_dir, f"{output_name}.bin")
+    if not os.path.exists(expected_bin):
+        print(f"ERROR: Expected output not found at {expected_bin}", file=sys.stderr)
+        return False
+
+    # Move to the requested output path if different
+    if expected_bin != output_path:
+        shutil.move(expected_bin, output_path)
+
+    print(f"Successfully generated context binary: {output_path}")
+    return True
+
+
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Convert ONNX model to Qualcomm DLC format"
+        description="Convert ONNX model to Qualcomm QNN context binary"
     )
     parser.add_argument("--input", required=True, help="Path to input ONNX model")
-    parser.add_argument("--output", required=True, help="Path for output DLC file")
+    parser.add_argument("--output", required=True, help="Path for output file")
     parser.add_argument(
-        "--snpe-root", required=True, help="Path to SNPE/QAIRT SDK root"
+        "--snpe-root", required=True, help="Path to QAIRT SDK root"
     )
     parser.add_argument(
         "--input-shape", default=None, help="Input shape (comma-separated, e.g. 1,1920)"
@@ -389,28 +310,38 @@ def main():
     parser.add_argument(
         "--input-list", default=None, help="Input list file for quantization"
     )
+    parser.add_argument(
+        "--compile", action="store_true",
+        help="Generate QNN context binary (.bin) for HTP backend"
+    )
     args = parser.parse_args()
 
     input_shape = None
     if args.input_shape:
         input_shape = tuple(int(x.strip()) for x in args.input_shape.split(","))
 
-    success = convert(
-        args.input,
-        args.output,
-        args.snpe_root,
-        input_shape=input_shape,
-        input_name=args.input_name,
+    # Determine intermediate DLC path
+    if args.compile:
+        dlc_path = os.path.splitext(args.output)[0] + ".dlc"
+    else:
+        dlc_path = args.output
+
+    # Step 1: Convert ONNX → DLC
+    success = convert_to_dlc(
+        args.input, dlc_path, args.snpe_root,
+        input_shape=input_shape, input_name=args.input_name,
     )
 
+    # Step 2: Quantize (optional, but required for HTP/DSP)
     if success and args.quantize:
         if not args.input_list:
-            print(
-                "ERROR: --quantize requires --input-list with representative inputs",
-                file=sys.stderr,
-            )
+            print("ERROR: --quantize requires --input-list", file=sys.stderr)
             sys.exit(1)
-        success = quantize_dlc(args.output, args.input_list, args.snpe_root)
+        success = quantize_dlc(dlc_path, args.input_list, args.snpe_root)
+
+    # Step 3: Generate context binary (optional)
+    if success and args.compile:
+        success = generate_context_binary(dlc_path, args.output, args.snpe_root)
 
     sys.exit(0 if success else 1)
 
diff --git a/scripts/setup-target-device.sh b/scripts/setup-target-device.sh
new file mode 100755
index 00000000..6a0bf078
--- /dev/null
+++ b/scripts/setup-target-device.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# Idempotent setup script for QCS6490 target device (scifi@10.40.63.143)
+# Deploys v2.42 SDK libraries and configures QNN HTP runtime
+#
+# Usage: ./scripts/setup-target-device.sh [SNPE_ROOT]
+# Defaults: SNPE_ROOT=/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225
+
+set -euo pipefail
+
+DEVICE_HOST="${DEVICE_HOST:-scifi@10.40.63.143}"
+DEVICE_PASS="${DEVICE_PASS:-synapse}"
+ROOT_PASS="${ROOT_PASS:-oelinux123}"
+SNPE_ROOT="${1:-/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225}"
+BSP_ROOT="${BSP_ROOT:-/home/calvinl/Documents/repos/qcs6490-ubun-1-0_amss_standard_oem}"
+SDK_LIB="${SNPE_ROOT}/lib/aarch64-ubuntu-gcc9.4"
+SDK_HEX="${SNPE_ROOT}/lib/hexagon-v68/unsigned"
+SDK_BIN="${SNPE_ROOT}/bin/aarch64-ubuntu-gcc9.4"
+BSP_CDSP="${BSP_ROOT}/cdsp_proc/build/ms/dynamic_modules/kodiak.cdsp.prod"
+
+# Validate SDK paths exist
+for dir in "$SDK_LIB" "$SDK_HEX" "$SDK_BIN"; do
+    if [ ! -d "$dir" ]; then
+        echo "ERROR: SDK directory not found: $dir"
+        exit 1
+    fi
+done
+
+if [ ! -d "$BSP_CDSP" ]; then
+    echo "WARNING: BSP CDSP path not found: $BSP_CDSP"
+    echo "  libc++ for Hexagon DSP will not be deployed."
+    echo "  Set BSP_ROOT to the qcs6490 BSP directory."
+fi
+
+echo "=== Staging libraries ==="
+STAGING=$(mktemp -d)
+trap "rm -rf $STAGING" EXIT
+
+mkdir -p "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin"
+
+# Core QNN/SNPE libraries for /usr/lib/
+for lib in libQnnCpu.so libQnnGpu.so libQnnHtp.so libQnnHtpPrepare.so \
+           libQnnHtpV68Stub.so libQnnHtpV68CalculatorStub.so \
+           libQnnSystem.so libSNPE.so libcalculator.so; do
+    if [ -f "$SDK_LIB/$lib" ]; then
+        cp "$SDK_LIB/$lib" "$STAGING/usr_lib/"
+    fi
+done
+
+# Hexagon v68 skel libraries for /usr/lib/rfsa/adsp/
+for f in "$SDK_HEX"/*.so; do
+    cp "$f" "$STAGING/adsp/"
+done
+
+# Hexagon libc++ from BSP (CRITICAL: must match the device's fastrpc_shell)
+# The QAIRT SDK does not ship these; they come from the device BSP
+if [ -d "$BSP_CDSP" ]; then
+    cp "$BSP_CDSP/libc++.so.1" "$STAGING/adsp/"
+    cp "$BSP_CDSP/libc++abi.so.1" "$STAGING/adsp/"
+    echo "Staged BSP libc++ for Hexagon DSP"
+fi
+
+# Useful debug binaries
+for bin in qnn-net-run qnn-platform-validator; do
+    if [ -f "$SDK_BIN/$bin" ]; then
+        cp "$SDK_BIN/$bin" "$STAGING/bin/"
+    fi
+done
+
+echo "=== Uploading to device ==="
+sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "rm -rf /tmp/sdk-staging && mkdir -p /tmp/sdk-staging"
+sshpass -p "$DEVICE_PASS" scp -o StrictHostKeyChecking=no -r "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin" "$DEVICE_HOST:/tmp/sdk-staging/"
+
+echo "=== Applying on device as root ==="
+sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "echo '$ROOT_PASS' | su -c '
+set -e
+
+# --- Install QNN/SNPE libraries to /usr/lib/ ---
+# Remove stale artifacts from other SDK versions
+rm -f /usr/lib/libQnnHtpV73*.so /usr/lib/libQnnHtpV69*.so
+rm -f /usr/lib/libSnpeHtpV73*.so /usr/lib/libSnpeHtpV69*.so
+rm -f /usr/lib/libSNPE_gcc11.so
+rm -f /usr/lib/libQnnDsp.so /usr/lib/libQnnDspV66Stub.so
+rm -f /usr/lib/libSnpeHtpPrepare.so /usr/lib/libSnpeHtpV68Stub.so
+
+cp /tmp/sdk-staging/usr_lib/*.so /usr/lib/
+
+# --- Install hexagon-v68 skel libraries ---
+cp /tmp/sdk-staging/adsp/*.so /usr/lib/rfsa/adsp/
+
+# --- Install debug binaries ---
+cp /tmp/sdk-staging/bin/* /usr/local/bin/ 2>/dev/null || true
+chmod +x /usr/local/bin/qnn-* 2>/dev/null || true
+
+# --- Set ADSP_LIBRARY_PATH in /etc/environment ---
+if ! grep -q ADSP_LIBRARY_PATH /etc/environment 2>/dev/null; then
+    echo "ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp" >> /etc/environment
+fi
+
+# --- Set ADSP_LIBRARY_PATH in cdsprpcd service ---
+if ! grep -q ADSP_LIBRARY_PATH /lib/systemd/system/cdsprpcd.service 2>/dev/null; then
+    sed -i "/\[Service\]/a Environment=ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp" /lib/systemd/system/cdsprpcd.service
+    systemctl daemon-reload
+    systemctl restart cdsprpcd
+fi
+
+# --- Update linker cache ---
+ldconfig
+
+echo "=== Setup complete ==="
+echo "QNN libs in /usr/lib/:"
+ls /usr/lib/libQnn*.so /usr/lib/libSNPE.so 2>/dev/null | xargs -I{} basename {}
+echo "Skel libs in /usr/lib/rfsa/adsp/:"
+ls /usr/lib/rfsa/adsp/libQnn*.so 2>/dev/null | xargs -I{} basename {}
+echo "ADSP_LIBRARY_PATH in cdsprpcd:"
+CDSP_PID=\$(pgrep cdsprpcd | head -1)
+if [ -n "\$CDSP_PID" ]; then cat /proc/\$CDSP_PID/environ | tr "\\0" "\\n" | grep ADSP; else echo "(not running)"; fi
+' 2>&1"
+
+echo ""
+echo "=== Device setup complete ==="
+echo "To test on device:"
+echo "  export ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp"
+echo "  export LD_LIBRARY_PATH=/usr/lib:/opt/scifi/lib"
+echo "  qnn-platform-validator --backend dsp --testBackend"
+echo "  ./synapse-example-app"
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index 5c8cb9b3..bc74659f 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -97,6 +97,16 @@ def add_commands(subparsers: argparse._SubParsersAction):
         ),
     )
 
+    parser.add_argument(
+        "--compile",
+        action="store_true",
+        help=(
+            "Compile a QNN context binary (.bin) pre-compiled for the HTP backend. "
+            "This enables DSP inference by bypassing runtime graph compilation. "
+            "Implies --quantize (HTP requires INT8 models)."
+        ),
+    )
+
     parser.set_defaults(func=deploy_model)
 
 
@@ -126,33 +136,43 @@ def deploy_model(args):
     if model_name is None:
         model_name = os.path.splitext(os.path.basename(args.model_path))[0]
 
+    model_ext = ".bin" if args.compile else ".dlc"
+
     console.print(f"[bold]Deploying model:[/bold] {model_name}")
     console.print(f"[bold]Source:[/bold] {args.model_path}")
-    console.print(f"[bold]Target:[/bold] {args.uri}:{DEVICE_MODEL_DIR}/{model_name}.dlc")
+    console.print(f"[bold]Target:[/bold] {args.uri}:{DEVICE_MODEL_DIR}/{model_name}{model_ext}")
     console.print()
 
+    # --compile implies --quantize (HTP requires INT8)
+    quantize = args.quantize or args.compile
+    compile_context = args.compile
+
     # Validate quantize + input-list
-    if args.quantize and not args.input_list:
+    if quantize and not args.input_list:
         console.print(
-            "[bold red]Error:[/bold red] --quantize requires --input-list "
+            "[bold red]Error:[/bold red] --quantize/--compile requires --input-list "
             "with representative input samples"
         )
         return
 
-    if args.input_list and not args.quantize:
+    if args.input_list and not quantize:
         console.print(
             "[yellow]Note: --input-list provided without --quantize, ignoring[/yellow]"
         )
 
-    # Step 1: Convert model to DLC
-    console.print("[bold cyan]Converting model to DLC format...[/bold cyan]")
+    # Step 1: Convert model
+    if compile_context:
+        console.print("[bold cyan]Converting model to QNN context binary...[/bold cyan]")
+    else:
+        console.print("[bold cyan]Converting model to DLC format...[/bold cyan]")
 
     dlc_path = convert_to_dlc(
         args.model_path,
         input_shape=input_shape,
         snpe_root=args.snpe_root,
-        quantize=args.quantize,
+        quantize=quantize,
         input_list=args.input_list,
+        compile_context=compile_context,
         console=console,
     )
 
@@ -182,8 +202,8 @@ def deploy_model(args):
         # Step 3: Ensure model directory exists
         _ensure_model_dir(sftp_conn, console)
 
-        # Step 4: Upload the DLC file
-        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}.dlc"
+        # Step 4: Upload the model file
+        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}{model_ext}"
         _upload_file(sftp_conn, dlc_path, remote_path, console)
 
         console.print()
diff --git a/synapse/utils/model_converter/convert.py b/synapse/utils/model_converter/convert.py
index 1ee51677..1af1abb4 100644
--- a/synapse/utils/model_converter/convert.py
+++ b/synapse/utils/model_converter/convert.py
@@ -17,26 +17,31 @@ def convert_to_dlc(
     snpe_root: Optional[str] = None,
     quantize: bool = False,
     input_list: Optional[str] = None,
+    compile_context: bool = False,
     console: Optional[Console] = None,
 ) -> Optional[str]:
-    """Convert a model to DLC format for deployment to Synapse devices.
+    """Convert a model for deployment to Synapse devices.
 
     Handles .pt (PyTorch), .onnx, and .dlc files:
-    - .pt  -> ONNX (on host) -> DLC (in Docker)
-    - .onnx -> DLC (in Docker)
+    - .pt  -> ONNX (on host) -> DLC or .bin (in Docker)
+    - .onnx -> DLC or .bin (in Docker)
     - .dlc  -> returns as-is
 
+    When compile_context=True, produces a QNN context binary (.bin) that is
+    pre-compiled for the HTP backend, enabling DSP inference.
+
     Args:
         model_path: Path to the model file (.pt, .onnx, or .dlc)
         input_shape: Input shape for the model (required if model has dynamic dims)
-        output_path: Optional output path for the DLC file
-        snpe_root: Path to the SNPE/QAIRT SDK
+        output_path: Optional output path
+        snpe_root: Path to the QAIRT SDK
         quantize: Whether to quantize the model to INT8
         input_list: Path to representative input list file (required if quantize=True)
+        compile_context: Whether to compile a QNN context binary for HTP
         console: Rich console for output
 
     Returns:
-        Path to the DLC file, or None if conversion failed
+        Path to the output file, or None if conversion failed
     """
     if not os.path.exists(model_path):
         if console:
@@ -53,80 +58,37 @@ def convert_to_dlc(
             return output_path
         return model_path
 
-    if ext == ".pt":
-        return _convert_pt_to_dlc(
-            model_path, input_shape, output_path, snpe_root, quantize, input_list, console
-        )
-
-    if ext == ".onnx":
-        return _convert_onnx_to_dlc(
-            model_path, input_shape, output_path, snpe_root, quantize, input_list, console
-        )
-
-    if console:
-        console.print(f"[bold red]Error:[/bold red] Unsupported file type: {ext}")
-        console.print("[yellow]Supported formats: .pt, .onnx, .dlc[/yellow]")
-    return None
-
-
-def _convert_pt_to_dlc(
-    pt_path: str,
-    input_shape: Optional[tuple[int, ...]],
-    output_path: Optional[str],
-    snpe_root: Optional[str],
-    quantize: bool,
-    input_list: Optional[str],
-    console: Optional[Console],
-) -> Optional[str]:
-    """Convert PyTorch model to DLC via ONNX."""
-    if console:
-        console.print("[bold blue]Step 1/2:[/bold blue] Converting PyTorch to ONNX...")
-
-    onnx_path = convert_pt_to_onnx(
-        pt_path,
-        output_path=None,
+    kwargs = dict(
         input_shape=input_shape,
-        console=console,
-    )
-
-    if onnx_path is None:
-        return None
-
-    if console:
-        console.print(
-            "[bold blue]Step 2/2:[/bold blue] Converting ONNX to DLC (Docker)..."
-        )
-
-    return convert_onnx_to_dlc(
-        onnx_path,
         output_path=output_path,
-        input_shape=input_shape,
         snpe_root=snpe_root,
         quantize=quantize,
         input_list=input_list,
+        compile_context=compile_context,
         console=console,
     )
 
+    if ext == ".pt":
+        if console:
+            console.print("[bold blue]Step 1/2:[/bold blue] Converting PyTorch to ONNX...")
 
-def _convert_onnx_to_dlc(
-    onnx_path: str,
-    input_shape: Optional[tuple[int, ...]],
-    output_path: Optional[str],
-    snpe_root: Optional[str],
-    quantize: bool,
-    input_list: Optional[str],
-    console: Optional[Console],
-) -> Optional[str]:
-    """Convert ONNX model to DLC via Docker."""
-    if console:
-        console.print("[bold blue]Converting ONNX to DLC (Docker)...[/bold blue]")
+        onnx_path = convert_pt_to_onnx(
+            model_path, output_path=None, input_shape=input_shape, console=console,
+        )
+        if onnx_path is None:
+            return None
 
-    return convert_onnx_to_dlc(
-        onnx_path,
-        output_path=output_path,
-        input_shape=input_shape,
-        snpe_root=snpe_root,
-        quantize=quantize,
-        input_list=input_list,
-        console=console,
-    )
+        if console:
+            console.print("[bold blue]Step 2/2:[/bold blue] Converting ONNX (Docker)...")
+
+        return convert_onnx_to_dlc(onnx_path, **kwargs)
+
+    if ext == ".onnx":
+        if console:
+            console.print("[bold blue]Converting ONNX (Docker)...[/bold blue]")
+        return convert_onnx_to_dlc(model_path, **kwargs)
+
+    if console:
+        console.print(f"[bold red]Error:[/bold red] Unsupported file type: {ext}")
+        console.print("[yellow]Supported formats: .pt, .onnx, .dlc[/yellow]")
+    return None
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
index b60b285b..3e0d3567 100644
--- a/synapse/utils/model_converter/onnx_to_dlc.py
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -115,22 +115,24 @@ def convert_onnx_to_dlc(
     snpe_root: Optional[str] = None,
     quantize: bool = False,
     input_list: Optional[str] = None,
+    compile_context: bool = False,
     console: Optional[Console] = None,
 ) -> Optional[str]:
-    """Convert an ONNX model to DLC format using the Docker-based converter.
+    """Convert an ONNX model to DLC or QNN context binary using Docker.
 
     Args:
         onnx_path: Path to the ONNX model
-        output_path: Optional output path for the DLC file
+        output_path: Optional output path for the output file
         input_shape: Input shape (required if model has dynamic dims)
         input_name: Name of the input tensor
-        snpe_root: Path to the SNPE/QAIRT SDK
-        quantize: Whether to quantize the model to INT8 after conversion
+        snpe_root: Path to the QAIRT SDK
+        quantize: Whether to quantize the model to INT8
         input_list: Path to representative input list file for quantization
+        compile_context: Whether to compile a QNN context binary (.bin) for HTP
         console: Rich console for output
 
     Returns:
-        Path to the converted DLC file, or None on failure
+        Path to the output file (.dlc or .bin), or None on failure
     """
     if snpe_root is None:
         snpe_root = os.environ.get("SNPE_ROOT") or os.environ.get("QAIRT_ROOT")
@@ -161,7 +163,8 @@ def convert_onnx_to_dlc(
 
     if output_path is None:
         base_name = os.path.splitext(onnx_filename)[0]
-        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}.dlc")
+        ext = ".bin" if compile_context else ".dlc"
+        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}{ext}")
 
     output_dir = os.path.abspath(os.path.dirname(output_path))
     output_filename = os.path.basename(output_path)
@@ -210,6 +213,9 @@ def convert_onnx_to_dlc(
     if quantize and input_list:
         cmd.extend(["--quantize", "--input-list", f"/data/{input_list_filename}"])
 
+    if compile_context:
+        cmd.append("--compile")
+
     if console:
         console.print("[dim]Running conversion in Docker container...[/dim]")
 

From 81801f7a5ad3b26fdd316fc66571bf71e084d8a8 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Mon, 23 Mar 2026 17:27:10 -0700
Subject: [PATCH 05/20] works!

---
 scripts/setup-target-device.sh | 150 +++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 43 deletions(-)

diff --git a/scripts/setup-target-device.sh b/scripts/setup-target-device.sh
index 6a0bf078..47953d24 100755
--- a/scripts/setup-target-device.sh
+++ b/scripts/setup-target-device.sh
@@ -1,16 +1,65 @@
 #!/bin/bash
 # Idempotent setup script for QCS6490 target device (scifi@10.40.63.143)
-# Deploys v2.42 SDK libraries and configures QNN HTP runtime
+# Deploys QNN SDK libraries, BSP libc++, and configures QNN HTP runtime
 #
-# Usage: ./scripts/setup-target-device.sh [SNPE_ROOT]
-# Defaults: SNPE_ROOT=/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225
+# Usage: ./scripts/setup-target-device.sh [--sdk-version v2.34|v2.42] [SNPE_ROOT]
+# Defaults: --sdk-version v2.42
+#
+# Examples:
+#   ./scripts/setup-target-device.sh                      # v2.42 (default)
+#   ./scripts/setup-target-device.sh --sdk-version v2.34  # full v2.34 stack
+#   ./scripts/setup-target-device.sh /path/to/custom/sdk  # custom SDK path
+#
+# Prerequisites:
+#   - sshpass installed on host
+#   - Device accessible at DEVICE_HOST
+#   - BSP repo at BSP_ROOT (for Hexagon libc++)
 
 set -euo pipefail
 
+# --- Parse arguments ---
+SDK_VERSION="v2.42"
+SNPE_ROOT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --sdk-version)
+            SDK_VERSION="$2"
+            shift 2
+            ;;
+        --sdk-version=*)
+            SDK_VERSION="${1#*=}"
+            shift
+            ;;
+        *)
+            SNPE_ROOT="$1"
+            shift
+            ;;
+    esac
+done
+
+# --- Resolve SDK path from version if not explicitly provided ---
+if [ -z "$SNPE_ROOT" ]; then
+    case "$SDK_VERSION" in
+        v2.42)
+            SNPE_ROOT="/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225"
+            ;;
+        v2.34)
+            SNPE_ROOT="/opt/qcom/aistack/qairt/2.34.0.250424"
+            ;;
+        *)
+            echo "ERROR: Unknown SDK version '$SDK_VERSION'. Supported: v2.34, v2.42"
+            exit 1
+            ;;
+    esac
+fi
+
+echo "=== Using SDK version: $SDK_VERSION ==="
+echo "    SNPE_ROOT: $SNPE_ROOT"
+
 DEVICE_HOST="${DEVICE_HOST:-scifi@10.40.63.143}"
 DEVICE_PASS="${DEVICE_PASS:-synapse}"
 ROOT_PASS="${ROOT_PASS:-oelinux123}"
-SNPE_ROOT="${1:-/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225}"
 BSP_ROOT="${BSP_ROOT:-/home/calvinl/Documents/repos/qcs6490-ubun-1-0_amss_standard_oem}"
 SDK_LIB="${SNPE_ROOT}/lib/aarch64-ubuntu-gcc9.4"
 SDK_HEX="${SNPE_ROOT}/lib/hexagon-v68/unsigned"
@@ -26,9 +75,10 @@ for dir in "$SDK_LIB" "$SDK_HEX" "$SDK_BIN"; do
 done
 
 if [ ! -d "$BSP_CDSP" ]; then
-    echo "WARNING: BSP CDSP path not found: $BSP_CDSP"
-    echo "  libc++ for Hexagon DSP will not be deployed."
-    echo "  Set BSP_ROOT to the qcs6490 BSP directory."
+    echo "ERROR: BSP CDSP path not found: $BSP_CDSP"
+    echo "  Hexagon libc++ from the BSP is REQUIRED for QNN HTP skel loading."
+    echo "  Set BSP_ROOT to the qcs6490-ubun-1-0_amss_standard_oem directory."
+    exit 1
 fi
 
 echo "=== Staging libraries ==="
@@ -38,12 +88,9 @@ trap "rm -rf $STAGING" EXIT
 mkdir -p "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin"
 
 # Core QNN/SNPE libraries for /usr/lib/
-for lib in libQnnCpu.so libQnnGpu.so libQnnHtp.so libQnnHtpPrepare.so \
-           libQnnHtpV68Stub.so libQnnHtpV68CalculatorStub.so \
-           libQnnSystem.so libSNPE.so libcalculator.so; do
-    if [ -f "$SDK_LIB/$lib" ]; then
-        cp "$SDK_LIB/$lib" "$STAGING/usr_lib/"
-    fi
+# Copy all QNN/SNPE/calculator libs from the SDK — covers both v2.34 and v2.42
+for lib in "$SDK_LIB"/libQnn*.so "$SDK_LIB"/libSnpe*.so "$SDK_LIB"/libSNPE.so "$SDK_LIB"/libcalculator.so; do
+    [ -f "$lib" ] && cp "$lib" "$STAGING/usr_lib/"
 done
 
 # Hexagon v68 skel libraries for /usr/lib/rfsa/adsp/
@@ -52,12 +99,11 @@ for f in "$SDK_HEX"/*.so; do
 done
 
 # Hexagon libc++ from BSP (CRITICAL: must match the device's fastrpc_shell)
-# The QAIRT SDK does not ship these; they come from the device BSP
-if [ -d "$BSP_CDSP" ]; then
-    cp "$BSP_CDSP/libc++.so.1" "$STAGING/adsp/"
-    cp "$BSP_CDSP/libc++abi.so.1" "$STAGING/adsp/"
-    echo "Staged BSP libc++ for Hexagon DSP"
-fi
+# The QAIRT SDK does NOT ship these; they come from the device BSP.
+# Without these, the QNN HTP skel fails to load with error 0x80000406.
+cp "$BSP_CDSP/libc++.so.1" "$STAGING/adsp/"
+cp "$BSP_CDSP/libc++abi.so.1" "$STAGING/adsp/"
+echo "Staged BSP libc++ for Hexagon DSP"
 
 # Useful debug binaries
 for bin in qnn-net-run qnn-platform-validator; do
@@ -66,25 +112,21 @@ for bin in qnn-net-run qnn-platform-validator; do
     fi
 done
 
-echo "=== Uploading to device ==="
-sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "rm -rf /tmp/sdk-staging && mkdir -p /tmp/sdk-staging"
-sshpass -p "$DEVICE_PASS" scp -o StrictHostKeyChecking=no -r "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin" "$DEVICE_HOST:/tmp/sdk-staging/"
-
-echo "=== Applying on device as root ==="
-sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "echo '$ROOT_PASS' | su -c '
+# Write device-side setup script (heredoc with single-quoted delimiter prevents local expansion)
+cat > "$STAGING/apply.sh" <<'APPLY_EOF'
+#!/bin/bash
 set -e
 
-# --- Install QNN/SNPE libraries to /usr/lib/ ---
-# Remove stale artifacts from other SDK versions
-rm -f /usr/lib/libQnnHtpV73*.so /usr/lib/libQnnHtpV69*.so
-rm -f /usr/lib/libSnpeHtpV73*.so /usr/lib/libSnpeHtpV69*.so
-rm -f /usr/lib/libSNPE_gcc11.so
-rm -f /usr/lib/libQnnDsp.so /usr/lib/libQnnDspV66Stub.so
-rm -f /usr/lib/libSnpeHtpPrepare.so /usr/lib/libSnpeHtpV68Stub.so
+# --- Remove ALL existing QNN/SNPE libs to prevent version mixing ---
+rm -f /usr/lib/libQnn*.so /usr/lib/libSnpe*.so /usr/lib/libSNPE*.so
+rm -f /usr/lib/libcalculator.so
+rm -f /usr/lib/rfsa/adsp/libQnn*.so /usr/lib/rfsa/adsp/libSnpe*.so
+rm -f /usr/lib/rfsa/adsp/libCalculator_skel.so
 
+# --- Install QNN/SNPE libraries to /usr/lib/ ---
 cp /tmp/sdk-staging/usr_lib/*.so /usr/lib/
 
-# --- Install hexagon-v68 skel libraries ---
+# --- Install hexagon-v68 skel + BSP libc++ to /usr/lib/rfsa/adsp/ ---
 cp /tmp/sdk-staging/adsp/*.so /usr/lib/rfsa/adsp/
 
 # --- Install debug binaries ---
@@ -100,26 +142,48 @@ fi
 if ! grep -q ADSP_LIBRARY_PATH /lib/systemd/system/cdsprpcd.service 2>/dev/null; then
     sed -i "/\[Service\]/a Environment=ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp" /lib/systemd/system/cdsprpcd.service
     systemctl daemon-reload
-    systemctl restart cdsprpcd
 fi
+# Always restart cdsprpcd to pick up env var
+systemctl restart cdsprpcd
+sleep 1
 
 # --- Update linker cache ---
 ldconfig
 
-echo "=== Setup complete ==="
+# --- Verify ---
+echo ""
+echo "=== Verification ==="
 echo "QNN libs in /usr/lib/:"
 ls /usr/lib/libQnn*.so /usr/lib/libSNPE.so 2>/dev/null | xargs -I{} basename {}
-echo "Skel libs in /usr/lib/rfsa/adsp/:"
-ls /usr/lib/rfsa/adsp/libQnn*.so 2>/dev/null | xargs -I{} basename {}
+echo ""
+echo "Skel + libc++ in /usr/lib/rfsa/adsp/:"
+ls /usr/lib/rfsa/adsp/libQnn*.so /usr/lib/rfsa/adsp/libc++*.so* 2>/dev/null | xargs -I{} basename {}
+echo ""
 echo "ADSP_LIBRARY_PATH in cdsprpcd:"
-CDSP_PID=\$(pgrep cdsprpcd | head -1)
-if [ -n "\$CDSP_PID" ]; then cat /proc/\$CDSP_PID/environ | tr "\\0" "\\n" | grep ADSP; else echo "(not running)"; fi
-' 2>&1"
+CDSP_PID=$(pgrep cdsprpcd | head -1)
+if [ -n "$CDSP_PID" ]; then
+    cat /proc/"$CDSP_PID"/environ | tr "\0" "\n" | grep ADSP || echo "(not set)"
+else
+    echo "(cdsprpcd not running)"
+fi
+echo ""
+echo "Calculator test:"
+export ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp
+export LD_LIBRARY_PATH=/usr/lib
+/usr/local/bin/qnn-platform-validator --backend dsp --testBackend 2>&1 | grep -E "Unit Test|supported"
+APPLY_EOF
 
+echo "=== Uploading to device ==="
+sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "rm -rf /tmp/sdk-staging && mkdir -p /tmp/sdk-staging"
+sshpass -p "$DEVICE_PASS" scp -o StrictHostKeyChecking=no -r "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin" "$STAGING/apply.sh" "$DEVICE_HOST:/tmp/sdk-staging/"
+
+echo "=== Applying on device as root ==="
+sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "echo '$ROOT_PASS' | su -c 'bash /tmp/sdk-staging/apply.sh' 2>&1"
+
+echo ""
+echo "=== Device setup complete (SDK: $SDK_VERSION) ==="
 echo ""
-echo "=== Device setup complete ==="
-echo "To test on device:"
+echo "To run on device:"
 echo "  export ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp"
 echo "  export LD_LIBRARY_PATH=/usr/lib:/opt/scifi/lib"
-echo "  qnn-platform-validator --backend dsp --testBackend"
 echo "  ./synapse-example-app"

From abf61b430a7a38407bbc60b09b6c78d6064e35ef Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Mon, 23 Mar 2026 19:01:40 -0700
Subject: [PATCH 06/20] removed redundant options in deploy-model, made it
 easier for users to use

---
 README.md                                    | 29 ++++++
 model-converter/convert.py                   | 96 +-------------------
 scripts/setup-target-device.sh               |  5 +-
 synapse/cli/deploy_model.py                  | 93 +++++++------------
 synapse/utils/model_converter/convert.py     | 12 +--
 synapse/utils/model_converter/onnx_to_dlc.py | 12 +--
 6 files changed, 78 insertions(+), 169 deletions(-)

diff --git a/README.md b/README.md
index b38445bf..4c2d3202 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,35 @@ And a toy device `synapse-sim` for local development,
 
 For more information on deploy and build, visit [synapse-example-app](https://github.com/sciencecorp/synapse-example-app)
 
+## Model Deployment
+
+Deploy machine learning models to Synapse devices for DSP inference.
+
+**Prerequisites:**
+- [QAIRT SDK v2.34](https://softwarecenter.qualcomm.com/) (Qualcomm AI Runtime)
+- Docker (for model conversion)
+
+**Deploy a model:**
+
+```bash
+synapsectl deploy-model model.onnx \
+  --name my_model \
+  --input-list calibration_data.txt \
+  --snpe-root /path/to/qairt/2.34.0.250424 \
+  -u <device-ip>
+```
+
+Models are automatically quantized to INT8 and converted to DLC format for on-device DSP compilation.
+
+**Use in your C++ app:**
+
+```cpp
+auto model = synapse::create_model("my_model");
+if (model && model->is_ready()) {
+    auto result = model->infer(input_data);
+}
+```
+
 ## A Note on Streaming
 
 Synapse devices stream data to and from clients with UDP. To minimize packet loss, it is highly recommended that users increase their OS UDP buffer size.
diff --git a/model-converter/convert.py b/model-converter/convert.py
index ab64a912..d093395c 100644
--- a/model-converter/convert.py
+++ b/model-converter/convert.py
@@ -2,7 +2,7 @@
 """ONNX to QNN context binary conversion script.
 
 Runs inside the synapse-model-converter Docker container.
-Pipeline: ONNX → qairt-converter → DLC → qairt-quantizer → qnn-context-binary-generator → .bin
+Pipeline: ONNX → qairt-converter → DLC → qairt-quantizer → quantized DLC
 
 Expects:
   - QAIRT SDK mounted at the path given by --snpe-root
@@ -212,88 +212,13 @@ def quantize_dlc(dlc_path, input_list, snpe_root, output_path=None):
     return True
 
 
-# ---------------------------------------------------------------------------
-# Step 3: Generate QNN context binary (qnn-context-binary-generator)
-# ---------------------------------------------------------------------------
-
-def generate_context_binary(dlc_path, output_path, snpe_root):
-    """Generate a pre-compiled QNN context binary for HTP backend."""
-    generator = find_tool(snpe_root, "qnn-context-binary-generator")
-    if generator is None:
-        print("ERROR: qnn-context-binary-generator not found", file=sys.stderr)
-        return False
-
-    backend_lib = os.path.join(snpe_root, "lib", "x86_64-linux-clang", "libQnnHtp.so")
-    if not os.path.exists(backend_lib):
-        print(f"ERROR: HTP backend not found at {backend_lib}", file=sys.stderr)
-        return False
-
-    output_dir = os.path.dirname(output_path)
-    output_name = os.path.splitext(os.path.basename(output_path))[0]
-
-    # Write HTP backend extensions config to limit VTCM usage.
-    # QCS6490 (Hexagon v68) has limited VTCM; vtcm_mb=0 lets the runtime decide.
-    import json
-
-    htp_config_path = os.path.join(output_dir or ".", "htp_config.json")
-    with open(htp_config_path, "w") as f:
-        json.dump({"graphs": [{"vtcm_mb": 0, "graph_names": ["*"]}]}, f)
-
-    backend_ext_lib = os.path.join(
-        snpe_root, "lib", "x86_64-linux-clang", "libQnnHtpNetRunExtensions.so"
-    )
-    ext_config_path = os.path.join(output_dir or ".", "backend_extensions_config.json")
-    with open(ext_config_path, "w") as f:
-        json.dump({
-            "backend_extensions": {
-                "shared_library_path": backend_ext_lib,
-                "config_file_path": htp_config_path,
-            }
-        }, f)
-
-    cmd = [
-        generator,
-        "--dlc_path", dlc_path,
-        "--backend", backend_lib,
-        "--binary_file", output_name,
-        "--output_dir", output_dir,
-        "--config_file", ext_config_path,
-    ]
-
-    print(f"Generating context binary: {' '.join(cmd)}")
-    result = subprocess.run(cmd, env=native_env(snpe_root),
-                            capture_output=True, text=True, timeout=600)
-
-    if result.stdout:
-        print(result.stdout)
-
-    if result.returncode != 0:
-        print("ERROR: Context binary generation failed:", file=sys.stderr)
-        if result.stderr:
-            print(result.stderr, file=sys.stderr)
-        return False
-
-    # qnn-context-binary-generator outputs <name>.bin in output_dir
-    expected_bin = os.path.join(output_dir, f"{output_name}.bin")
-    if not os.path.exists(expected_bin):
-        print(f"ERROR: Expected output not found at {expected_bin}", file=sys.stderr)
-        return False
-
-    # Move to the requested output path if different
-    if expected_bin != output_path:
-        shutil.move(expected_bin, output_path)
-
-    print(f"Successfully generated context binary: {output_path}")
-    return True
-
-
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Convert ONNX model to Qualcomm QNN context binary"
+        description="Convert ONNX model to quantized DLC for Qualcomm HTP inference"
     )
     parser.add_argument("--input", required=True, help="Path to input ONNX model")
     parser.add_argument("--output", required=True, help="Path for output file")
@@ -310,21 +235,14 @@ def main():
     parser.add_argument(
         "--input-list", default=None, help="Input list file for quantization"
     )
-    parser.add_argument(
-        "--compile", action="store_true",
-        help="Generate QNN context binary (.bin) for HTP backend"
-    )
     args = parser.parse_args()
 
     input_shape = None
     if args.input_shape:
         input_shape = tuple(int(x.strip()) for x in args.input_shape.split(","))
 
-    # Determine intermediate DLC path
-    if args.compile:
-        dlc_path = os.path.splitext(args.output)[0] + ".dlc"
-    else:
-        dlc_path = args.output
+    # Output is always DLC
+    dlc_path = args.output
 
     # Step 1: Convert ONNX → DLC
     success = convert_to_dlc(
@@ -332,17 +250,13 @@ def main():
         input_shape=input_shape, input_name=args.input_name,
     )
 
-    # Step 2: Quantize (optional, but required for HTP/DSP)
+    # Step 2: Quantize (required for DSP inference)
     if success and args.quantize:
         if not args.input_list:
             print("ERROR: --quantize requires --input-list", file=sys.stderr)
             sys.exit(1)
         success = quantize_dlc(dlc_path, args.input_list, args.snpe_root)
 
-    # Step 3: Generate context binary (optional)
-    if success and args.compile:
-        success = generate_context_binary(dlc_path, args.output, args.snpe_root)
-
     sys.exit(0 if success else 1)
 
 
diff --git a/scripts/setup-target-device.sh b/scripts/setup-target-device.sh
index 47953d24..24aaa5a3 100755
--- a/scripts/setup-target-device.sh
+++ b/scripts/setup-target-device.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-# Idempotent setup script for QCS6490 target device (scifi@10.40.63.143)
+# Developer tool for setting up QCS6490 dev boards directly.
+# End users: install the synapse-app-sdk .deb package instead.
 # Deploys QNN SDK libraries, BSP libc++, and configures QNN HTP runtime
 #
 # Usage: ./scripts/setup-target-device.sh [--sdk-version v2.34|v2.42] [SNPE_ROOT]
@@ -18,7 +19,7 @@
 set -euo pipefail
 
 # --- Parse arguments ---
-SDK_VERSION="v2.42"
+SDK_VERSION="v2.34"
 SNPE_ROOT=""
 
 while [[ $# -gt 0 ]]; do
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index bc74659f..8f10245f 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -41,8 +41,8 @@ def add_commands(subparsers: argparse._SubParsersAction):
     parser.add_argument(
         "--name",
         type=str,
-        default=None,
-        help="Model name on device (default: filename without extension)",
+        required=True,
+        help="Model name on device (no extension needed, e.g., 'my_model')",
     )
 
     parser.add_argument(
@@ -74,37 +74,22 @@ def add_commands(subparsers: argparse._SubParsersAction):
         help="Path to SNPE/QAIRT SDK root (or set SNPE_ROOT env var)",
     )
 
-    parser.add_argument(
-        "--quantize",
-        action="store_true",
-        help=(
-            "Quantize the model to INT8 after conversion. Required for DSP/NPU inference. "
-            "Must be used with --input-list pointing to a file of representative inputs. "
-            "Each line in the input list should be a path to a raw binary file containing "
-            "float32 data matching the model's input shape (e.g., numpy: "
-            'arr.astype(np.float32).tofile("sample_001.raw")).'
-        ),
-    )
-
     parser.add_argument(
         "--input-list",
         type=str,
-        default=None,
+        required=True,
         help=(
-            "Path to a text file listing representative input samples for quantization. "
-            "Each line is a path to a .raw file (float32 binary). Paths should be "
-            "relative to the directory containing the input list file."
+            "Path to a text file listing representative input samples for INT8 quantization "
+            "(required for DSP inference). Each line is a path to a .raw file (float32 binary). "
+            "Paths should be relative to the directory containing the input list file. "
+            "Generate .raw files with: arr.astype(np.float32).tofile('sample.raw')"
         ),
     )
 
     parser.add_argument(
-        "--compile",
+        "--force",
         action="store_true",
-        help=(
-            "Compile a QNN context binary (.bin) pre-compiled for the HTP backend. "
-            "This enables DSP inference by bypassing runtime graph compilation. "
-            "Implies --quantize (HTP requires INT8 models)."
-        ),
+        help="Overwrite existing model on device without prompting",
     )
 
     parser.set_defaults(func=deploy_model)
@@ -131,48 +116,22 @@ def deploy_model(args):
             console.print('[yellow]Expected format: "dim1,dim2,..." (e.g., "1,32,64")[/yellow]')
             return
 
-    # Determine model name
     model_name = args.name
-    if model_name is None:
-        model_name = os.path.splitext(os.path.basename(args.model_path))[0]
-
-    model_ext = ".bin" if args.compile else ".dlc"
 
     console.print(f"[bold]Deploying model:[/bold] {model_name}")
     console.print(f"[bold]Source:[/bold] {args.model_path}")
-    console.print(f"[bold]Target:[/bold] {args.uri}:{DEVICE_MODEL_DIR}/{model_name}{model_ext}")
+    console.print(f"[bold]Format:[/bold] Quantized DLC (INT8)")
     console.print()
 
-    # --compile implies --quantize (HTP requires INT8)
-    quantize = args.quantize or args.compile
-    compile_context = args.compile
-
-    # Validate quantize + input-list
-    if quantize and not args.input_list:
-        console.print(
-            "[bold red]Error:[/bold red] --quantize/--compile requires --input-list "
-            "with representative input samples"
-        )
-        return
-
-    if args.input_list and not quantize:
-        console.print(
-            "[yellow]Note: --input-list provided without --quantize, ignoring[/yellow]"
-        )
-
-    # Step 1: Convert model
-    if compile_context:
-        console.print("[bold cyan]Converting model to QNN context binary...[/bold cyan]")
-    else:
-        console.print("[bold cyan]Converting model to DLC format...[/bold cyan]")
+    # Step 1: Convert model (always quantize for DSP inference)
+    console.print("[bold cyan]Converting model to quantized DLC...[/bold cyan]")
 
     dlc_path = convert_to_dlc(
         args.model_path,
         input_shape=input_shape,
         snpe_root=args.snpe_root,
-        quantize=quantize,
+        quantize=True,
         input_list=args.input_list,
-        compile_context=compile_context,
         console=console,
     )
 
@@ -202,15 +161,33 @@ def deploy_model(args):
         # Step 3: Ensure model directory exists
         _ensure_model_dir(sftp_conn, console)
 
-        # Step 4: Upload the model file
-        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}{model_ext}"
+        # Step 4: Check if model already exists on device
+        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}.dlc"
+        try:
+            sftp_conn.stat(remote_path)
+            if not args.force:
+                console.print(
+                    f"[yellow]Model '{model_name}.dlc' already exists on device. "
+                    f"Overwrite? [y/N][/yellow] ",
+                    end="",
+                )
+                response = input().strip().lower()
+                if response not in ("y", "yes"):
+                    console.print("[dim]Aborted.[/dim]")
+                    return
+        except FileNotFoundError:
+            pass
+
+        # Step 5: Upload the model file
         _upload_file(sftp_conn, dlc_path, remote_path, console)
 
         console.print()
         console.print("[bold green]Model deployed successfully![/bold green]")
         console.print()
-        console.print("[dim]To use in your app:[/dim]")
-        console.print(f'[cyan]  auto model = synapse::Model::load("{model_name}");[/cyan]')
+        console.print(f"  Model deployed: [cyan]models/{model_name}.dlc[/cyan]")
+        console.print()
+        console.print("  To load in your app:")
+        console.print(f'    [cyan]auto model = synapse::create_model("{model_name}");[/cyan]')
 
     finally:
         sftp.close_sftp(ssh, sftp_conn)
diff --git a/synapse/utils/model_converter/convert.py b/synapse/utils/model_converter/convert.py
index 1af1abb4..f21278c0 100644
--- a/synapse/utils/model_converter/convert.py
+++ b/synapse/utils/model_converter/convert.py
@@ -17,19 +17,15 @@ def convert_to_dlc(
     snpe_root: Optional[str] = None,
     quantize: bool = False,
     input_list: Optional[str] = None,
-    compile_context: bool = False,
     console: Optional[Console] = None,
 ) -> Optional[str]:
     """Convert a model for deployment to Synapse devices.
 
     Handles .pt (PyTorch), .onnx, and .dlc files:
-    - .pt  -> ONNX (on host) -> DLC or .bin (in Docker)
-    - .onnx -> DLC or .bin (in Docker)
+    - .pt  -> ONNX (on host) -> quantized DLC (in Docker)
+    - .onnx -> quantized DLC (in Docker)
     - .dlc  -> returns as-is
 
-    When compile_context=True, produces a QNN context binary (.bin) that is
-    pre-compiled for the HTP backend, enabling DSP inference.
-
     Args:
         model_path: Path to the model file (.pt, .onnx, or .dlc)
         input_shape: Input shape for the model (required if model has dynamic dims)
@@ -37,11 +33,10 @@ def convert_to_dlc(
         snpe_root: Path to the QAIRT SDK
         quantize: Whether to quantize the model to INT8
         input_list: Path to representative input list file (required if quantize=True)
-        compile_context: Whether to compile a QNN context binary for HTP
         console: Rich console for output
 
     Returns:
-        Path to the output file, or None if conversion failed
+        Path to the output DLC file, or None if conversion failed
     """
     if not os.path.exists(model_path):
         if console:
@@ -64,7 +59,6 @@ def convert_to_dlc(
         snpe_root=snpe_root,
         quantize=quantize,
         input_list=input_list,
-        compile_context=compile_context,
         console=console,
     )
 
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
index 3e0d3567..dcf91ada 100644
--- a/synapse/utils/model_converter/onnx_to_dlc.py
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -115,10 +115,9 @@ def convert_onnx_to_dlc(
     snpe_root: Optional[str] = None,
     quantize: bool = False,
     input_list: Optional[str] = None,
-    compile_context: bool = False,
     console: Optional[Console] = None,
 ) -> Optional[str]:
-    """Convert an ONNX model to DLC or QNN context binary using Docker.
+    """Convert an ONNX model to DLC using Docker.
 
     Args:
         onnx_path: Path to the ONNX model
@@ -128,11 +127,10 @@ def convert_onnx_to_dlc(
         snpe_root: Path to the QAIRT SDK
         quantize: Whether to quantize the model to INT8
         input_list: Path to representative input list file for quantization
-        compile_context: Whether to compile a QNN context binary (.bin) for HTP
         console: Rich console for output
 
     Returns:
-        Path to the output file (.dlc or .bin), or None on failure
+        Path to the output DLC file, or None on failure
     """
     if snpe_root is None:
         snpe_root = os.environ.get("SNPE_ROOT") or os.environ.get("QAIRT_ROOT")
@@ -163,8 +161,7 @@ def convert_onnx_to_dlc(
 
     if output_path is None:
         base_name = os.path.splitext(onnx_filename)[0]
-        ext = ".bin" if compile_context else ".dlc"
-        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}{ext}")
+        output_path = os.path.join(tempfile.gettempdir(), f"{base_name}.dlc")
 
     output_dir = os.path.abspath(os.path.dirname(output_path))
     output_filename = os.path.basename(output_path)
@@ -213,9 +210,6 @@ def convert_onnx_to_dlc(
     if quantize and input_list:
         cmd.extend(["--quantize", "--input-list", f"/data/{input_list_filename}"])
 
-    if compile_context:
-        cmd.append("--compile")
-
     if console:
         console.print("[dim]Running conversion in Docker container...[/dim]")
 

From 9de15d989b929250fad0825bd7b15cec2cc66459 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 11:15:59 -0700
Subject: [PATCH 07/20] reintroduced quantization to allow CPU inference

---
 README.md                   | 13 +++++++--
 synapse/cli/deploy_model.py | 55 +++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 4c2d3202..d526114c 100644
--- a/README.md
+++ b/README.md
@@ -65,17 +65,24 @@ Deploy machine learning models to Synapse devices for DSP inference.
 - [QAIRT SDK v2.34](https://softwarecenter.qualcomm.com/) (Qualcomm AI Runtime)
 - Docker (for model conversion)
 
-**Deploy a model:**
+**Deploy a model (float, runs on CPU):**
 
 ```bash
 synapsectl deploy-model model.onnx \
   --name my_model \
-  --input-list calibration_data.txt \
   --snpe-root /path/to/qairt/2.34.0.250424 \
   -u <device-ip>
 ```
 
-Models are automatically quantized to INT8 and converted to DLC format for on-device DSP compilation.
+**Deploy a quantized model (INT8, runs on DSP at ~1ms):**
+
+```bash
+synapsectl deploy-model model.onnx \
+  --name my_model \
+  --quantize --input-list calibration_data.txt \
+  --snpe-root /path/to/qairt/2.34.0.250424 \
+  -u <device-ip>
+```
 
 **Use in your C++ app:**
 
diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index 8f10245f..b59e0de0 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -74,14 +74,23 @@ def add_commands(subparsers: argparse._SubParsersAction):
         help="Path to SNPE/QAIRT SDK root (or set SNPE_ROOT env var)",
     )
 
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help=(
+            "Quantize the model to INT8 for DSP inference. Requires --input-list with "
+            "representative input samples. Quantized models run on the HTP/DSP backend "
+            "for maximum performance (~1ms). Without quantization, models run on CPU."
+        ),
+    )
+
     parser.add_argument(
         "--input-list",
         type=str,
-        required=True,
+        default=None,
         help=(
-            "Path to a text file listing representative input samples for INT8 quantization "
-            "(required for DSP inference). Each line is a path to a .raw file (float32 binary). "
-            "Paths should be relative to the directory containing the input list file. "
+            "Path to a text file listing representative input samples for INT8 quantization. "
+            "Each line is a path to a .raw file (float32 binary). Required with --quantize. "
             "Generate .raw files with: arr.astype(np.float32).tofile('sample.raw')"
         ),
     )
@@ -117,20 +126,42 @@ def deploy_model(args):
             return
 
     model_name = args.name
+    quantize = args.quantize
+
+    # Validate quantize + input-list
+    if quantize and not args.input_list:
+        console.print(
+            "[bold red]Error:[/bold red] --quantize requires --input-list "
+            "with representative input samples for INT8 calibration."
+        )
+        console.print()
+        console.print("[dim]Example:[/dim]")
+        console.print("  synapsectl deploy-model model.onnx --name my_model \\")
+        console.print("    --quantize --input-list calibration_data.txt \\")
+        console.print("    --snpe-root /path/to/qairt/2.34.0.250424 -u <device>")
+        return
+
+    if quantize:
+        fmt_str = "Quantized DLC (INT8) — runs on DSP"
+    else:
+        fmt_str = "Float DLC — runs on CPU/GPU"
 
     console.print(f"[bold]Deploying model:[/bold] {model_name}")
     console.print(f"[bold]Source:[/bold] {args.model_path}")
-    console.print(f"[bold]Format:[/bold] Quantized DLC (INT8)")
+    console.print(f"[bold]Format:[/bold] {fmt_str}")
     console.print()
 
-    # Step 1: Convert model (always quantize for DSP inference)
-    console.print("[bold cyan]Converting model to quantized DLC...[/bold cyan]")
+    # Step 1: Convert model
+    if quantize:
+        console.print("[bold cyan]Converting model to quantized DLC...[/bold cyan]")
+    else:
+        console.print("[bold cyan]Converting model to DLC...[/bold cyan]")
 
     dlc_path = convert_to_dlc(
         args.model_path,
         input_shape=input_shape,
         snpe_root=args.snpe_root,
-        quantize=True,
+        quantize=quantize,
         input_list=args.input_list,
         console=console,
     )
@@ -185,6 +216,14 @@ def deploy_model(args):
         console.print("[bold green]Model deployed successfully![/bold green]")
         console.print()
         console.print(f"  Model deployed: [cyan]models/{model_name}.dlc[/cyan]")
+        if quantize:
+            console.print(f"  Runtime: [cyan]DSP (quantized INT8)[/cyan]")
+        else:
+            console.print(f"  Runtime: [cyan]CPU (float32)[/cyan]")
+            console.print()
+            console.print(
+                "  [dim]Tip: for faster DSP inference (~1ms), redeploy with --quantize --input-list[/dim]"
+            )
         console.print()
         console.print("  To load in your app:")
         console.print(f'    [cyan]auto model = synapse::create_model("{model_name}");[/cyan]')

From daa322c89f8422043ca6bdb75d35272f5ec632a1 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 13:49:28 -0700
Subject: [PATCH 08/20] readme wip

---
 README.md | 143 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 105 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index d526114c..4078fa79 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ Includes `synapsectl` command line utility:
 
     % synapsectl --help
     usage: synapsectl [-h] [--uri URI] [--version] [--verbose]
-                    {discover,info,query,start,stop,configure,logs,read,plot,file,taps,deploy,build,settings} ...
+                    {discover,info,query,start,stop,configure,logs,read,plot,file,taps,deploy,build,settings,deploy-model} ...
 
     Synapse Device Manager
 
@@ -17,7 +17,7 @@ Includes `synapsectl` command line utility:
     --verbose, -v         Enable verbose output
 
     Commands:
-    {discover,info,query,start,stop,configure,logs,read,plot,file,taps,deploy,build,settings}
+    {discover,info,query,start,stop,configure,logs,read,plot,file,taps,deploy,build,settings,deploy-model}
         discover            Discover Synapse devices on the network
         info                Get device information
         query               Execute a query on the device
@@ -32,6 +32,7 @@ Includes `synapsectl` command line utility:
         deploy              Deploy an application to a Synapse device
         build               Cross-compile and package an application into a .deb without deploying
         settings            Manage the persistent device settings
+        deploy-model        Deploy a machine learning model to a Synapse device
 
 As well as the base for a device implementation (`synapse/server`),
 
@@ -57,42 +58,6 @@ And a toy device `synapse-sim` for local development,
 
 For more information on deploy and build, visit [synapse-example-app](https://github.com/sciencecorp/synapse-example-app)
 
-## Model Deployment
-
-Deploy machine learning models to Synapse devices for DSP inference.
-
-**Prerequisites:**
-- [QAIRT SDK v2.34](https://softwarecenter.qualcomm.com/) (Qualcomm AI Runtime)
-- Docker (for model conversion)
-
-**Deploy a model (float, runs on CPU):**
-
-```bash
-synapsectl deploy-model model.onnx \
-  --name my_model \
-  --snpe-root /path/to/qairt/2.34.0.250424 \
-  -u <device-ip>
-```
-
-**Deploy a quantized model (INT8, runs on DSP at ~1ms):**
-
-```bash
-synapsectl deploy-model model.onnx \
-  --name my_model \
-  --quantize --input-list calibration_data.txt \
-  --snpe-root /path/to/qairt/2.34.0.250424 \
-  -u <device-ip>
-```
-
-**Use in your C++ app:**
-
-```cpp
-auto model = synapse::create_model("my_model");
-if (model && model->is_ready()) {
-    auto result = model->infer(input_data);
-}
-```
-
 ## A Note on Streaming
 
 Synapse devices stream data to and from clients with UDP. To minimize packet loss, it is highly recommended that users increase their OS UDP buffer size.
@@ -230,3 +195,105 @@ After recording data to a file, you can generate plots to visualize your data. U
 ```
 synapsectl plot --dir <path to directory containing .dat and .json>
 ```
+
+## Model Deployment
+
+Deploy machine learning models to Synapse devices.
+
+### Prerequisites
+
+1. **Docker** — required for model conversion
+2. **QAIRT SDK v2.34** (Qualcomm AI Runtime) — required for model conversion
+
+#### Installing the QAIRT SDK
+
+1. Create a free account at [softwarecenter.qualcomm.com](https://softwarecenter.qualcomm.com/)
+2. Download and install [Qualcomm Software Center](https://softwarecenter.qualcomm.com/) for your platform (Linux, macOS, or Windows)
+3. Open the Software Center, search for "Qualcomm AI Runtime", and install **v2.34 (Linux)**
+
+   > **Note:** Always install the **Linux** version, even on macOS/Windows. The SDK is mounted into a Linux Docker container for model conversion.
+
+   Alternatively on Linux, you can download the `.qik` file directly from the website and install via command line:
+   ```bash
+   /opt/qcom/softwarecenter/bin/qik/qik INSTALL "/path/to/Qualcomm_AI_Runtime_SDK.2.34.0.250424.Linux-AnyCPU.qik"
+   ```
+
+4. The SDK installs to `/opt/qcom/aistack/qairt/2.34.0.250424` by default. Note this path — you'll pass it as `--snpe-root` when deploying models.
+
+### Quick Start — Deploy a Float Model (CPU)
+
+The simplest path — no calibration data needed, runs on CPU:
+
+```bash
+synapsectl deploy-model model.onnx \
+  --name my_model \
+  --snpe-root /opt/qcom/aistack/qairt/2.34.0.250424 \
+  -u <device-ip>
+```
+
+### Deploy a Quantized Model (DSP, ~1ms inference)
+
+For production performance, quantize the model to INT8 for DSP inference. This requires representative input samples for calibration.
+
+#### Step 1: Create calibration data
+
+Generate `.raw` files from representative inputs your model will see in production. Each `.raw` file is a flat binary dump of float32 values matching your model's input shape.
+
+```python
+import numpy as np
+
+# Example: model expects input shape [1, 1920]
+# Generate 10 representative samples
+for i in range(10):
+    sample = np.random.randn(1, 1920).astype(np.float32)  # replace with real data
+    sample.tofile(f"sample_{i:03d}.raw")
+```
+
+#### Step 2: Create an input list file
+
+Create a text file (e.g., `input_list.txt`) with one `.raw` file path per line. Paths are relative to the directory containing the input list file.
+
+```
+sample_000.raw
+sample_001.raw
+sample_002.raw
+sample_003.raw
+sample_004.raw
+sample_005.raw
+sample_006.raw
+sample_007.raw
+sample_008.raw
+sample_009.raw
+```
+
+> **Tip:** Use 10-100 samples that represent the range of inputs your model will see. More diverse samples = better quantization accuracy.
+
+#### Step 3: Deploy with quantization
+
+```bash
+synapsectl deploy-model model.onnx \
+  --name my_model \
+  --quantize --input-list input_list.txt \
+  --snpe-root /opt/qcom/aistack/qairt/2.34.0.250424 \
+  -u <device-ip>
+```
+
+### Use in Your C++ App
+
+```cpp
+#include <synapse-app-sdk/inference/model.hpp>
+
+// Loads models/<name>.dlc from the device model directory
+auto model = synapse::create_model("my_model");
+
+if (model && model->is_ready()) {
+    auto result = model->infer(input_data);
+    // result.success, result.outputs, result.inference_time_us
+}
+```
+
+The runtime is selected automatically: quantized models run on the DSP, float models run on CPU. You can also specify a runtime explicitly:
+
+```cpp
+auto model = synapse::create_model("my_model", synapse::InferenceRuntime::kDsp);
+```

From ec2cf4d2c69ab09ab7f93d1f670becd7075e1421 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 15:27:42 -0700
Subject: [PATCH 09/20] improved model deployment in README

---
 README.md | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 4078fa79..4282065c 100644
--- a/README.md
+++ b/README.md
@@ -207,18 +207,17 @@ Deploy machine learning models to Synapse devices.
 
 #### Installing the QAIRT SDK
 
-1. Create a free account at [softwarecenter.qualcomm.com](https://softwarecenter.qualcomm.com/)
-2. Download and install [Qualcomm Software Center](https://softwarecenter.qualcomm.com/) for your platform (Linux, macOS, or Windows)
-3. Open the Software Center, search for "Qualcomm AI Runtime", and install **v2.34 (Linux)**
-
-   > **Note:** Always install the **Linux** version, even on macOS/Windows. The SDK is mounted into a Linux Docker container for model conversion.
-
-   Alternatively on Linux, you can download the `.qik` file directly from the website and install via command line:
+1. Create a free account at [softwarecenter.qualcomm.com](https://softwarecenter.qualcomm.com/) (no paid license required)
+2. Download **Qualcomm Software Center** (Linux `.deb`) and **Qualcomm AI Runtime v2.34** (Linux `.qik`) from the website
+3. Install both:
    ```bash
-   /opt/qcom/softwarecenter/bin/qik/qik INSTALL "/path/to/Qualcomm_AI_Runtime_SDK.2.34.0.250424.Linux-AnyCPU.qik"
-   ```
+   # Install Qualcomm Software Center (includes the qik package manager)
+   sudo dpkg -i QualcommSoftwareCenter*.deb
 
-4. The SDK installs to `/opt/qcom/aistack/qairt/2.34.0.250424` by default. Note this path — you'll pass it as `--snpe-root` when deploying models.
+   # Install the QAIRT SDK
+   sudo /opt/qcom/softwarecenter/bin/qik/qik INSTALL "/path/to/Qualcomm_AI_Runtime_SDK.2.34.0.250424.Linux-AnyCPU.qik"
+   ```
+4. The SDK installs to `/opt/qcom/aistack/qairt/2.34.0.250424`. You'll pass this path as `--snpe-root` when deploying models.
 
 ### Quick Start — Deploy a Float Model (CPU)
 

From ec8a66648b51a2717f4ffe389b3211b96c8d56e0 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 17:17:24 -0700
Subject: [PATCH 10/20] added defaults for --name and --input-shape to make
 them optional

---
 synapse/cli/deploy_model.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index b59e0de0..1f8bbdb4 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -41,8 +41,8 @@ def add_commands(subparsers: argparse._SubParsersAction):
     parser.add_argument(
         "--name",
         type=str,
-        required=True,
-        help="Model name on device (no extension needed, e.g., 'my_model')",
+        default=None,
+        help="Model name on device (default: filename without extension, e.g., 'my_model')",
     )
 
     parser.add_argument(
@@ -125,7 +125,37 @@ def deploy_model(args):
             console.print('[yellow]Expected format: "dim1,dim2,..." (e.g., "1,32,64")[/yellow]')
             return
 
+    # Default dynamic dimensions to 1 if the model has them and no --input-shape given
+    if input_shape is None:
+        ext = os.path.splitext(args.model_path)[1].lower()
+        if ext == ".onnx":
+            try:
+                import onnx
+
+                onnx_model = onnx.load(args.model_path)
+                for inp in onnx_model.graph.input:
+                    dims = inp.type.tensor_type.shape.dim
+                    has_dynamic = any(d.dim_param or d.dim_value == 0 for d in dims)
+                    if has_dynamic:
+                        resolved = []
+                        for d in dims:
+                            if d.dim_param or d.dim_value == 0:
+                                resolved.append(1)
+                            else:
+                                resolved.append(d.dim_value)
+                        input_shape = tuple(resolved)
+                        console.print(
+                            f"[yellow]Note: model has dynamic dimensions, "
+                            f"defaulting to {input_shape}[/yellow]"
+                        )
+                        break
+            except Exception:
+                pass  # If onnx isn't installed or can't load, let the converter handle it
+
+    # Default model name to filename without extension
     model_name = args.name
+    if model_name is None:
+        model_name = os.path.splitext(os.path.basename(args.model_path))[0]
     quantize = args.quantize
 
     # Validate quantize + input-list

From cab88f063c4e0763710210ab87da47879832ce3f Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 17:25:00 -0700
Subject: [PATCH 11/20] more indepth readme for quantization

---
 README.md | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 4282065c..089f0b54 100644
--- a/README.md
+++ b/README.md
@@ -230,27 +230,48 @@ synapsectl deploy-model model.onnx \
   -u <device-ip>
 ```
 
-### Deploy a Quantized Model (DSP, ~1ms inference)
+### Deploy a Quantized Model (DSP)
 
-For production performance, quantize the model to INT8 for DSP inference. This requires representative input samples for calibration.
+For production performance, quantize the model to INT8 for DSP inference. This requires **calibration data** — a small set of example inputs that represent what the model will see in real use.
 
-#### Step 1: Create calibration data
+#### What is calibration data and why do I need it?
 
-Generate `.raw` files from representative inputs your model will see in production. Each `.raw` file is a flat binary dump of float32 values matching your model's input shape.
+Quantization converts your model from 32-bit floats to 8-bit integers, making it ~4x smaller and much faster on the DSP. But to do this well, the quantizer needs to see what typical input values look like so it can choose the right scale for each layer. Bad calibration data leads to accuracy loss.
+
+**Good calibration data** is a handful of real (or realistic) inputs from your application. For example:
+- If your model processes neural signals, use 10-50 snippets of actual recorded neural data
+- If your model processes audio, use 10-50 clips of real audio
+- If you don't have real data yet, synthetic data that matches the expected distribution is acceptable
+
+Ideally, use at least **1000 representative samples** for best accuracy. Fewer samples (50-100) can work for initial testing, but more data gives the quantizer a better picture of your model's value ranges.
+
+#### Step 1: Create calibration `.raw` files
+
+Each `.raw` file is a flat binary dump of float32 values matching your model's input shape. Create them with numpy:
 
 ```python
 import numpy as np
 
 # Example: model expects input shape [1, 1920]
-# Generate 10 representative samples
-for i in range(10):
-    sample = np.random.randn(1, 1920).astype(np.float32)  # replace with real data
+# Load your real data here — these should be actual inputs, not random noise
+for i, sample_data in enumerate(my_real_samples[:20]):
+    # sample_data should be a numpy array with shape matching your model input
+    sample_data.astype(np.float32).tofile(f"sample_{i:03d}.raw")
+```
+
+If you don't have real data yet, you can use synthetic data to get started (accuracy may be lower):
+
+```python
+import numpy as np
+
+for i in range(20):
+    sample = np.random.randn(1, 1920).astype(np.float32)
     sample.tofile(f"sample_{i:03d}.raw")
 ```
 
 #### Step 2: Create an input list file
 
-Create a text file (e.g., `input_list.txt`) with one `.raw` file path per line. Paths are relative to the directory containing the input list file.
+Create a text file called `input_list.txt` listing your `.raw` files, one per line. Put this file in the same directory as the `.raw` files.
 
 ```
 sample_000.raw
@@ -265,8 +286,6 @@ sample_008.raw
 sample_009.raw
 ```
 
-> **Tip:** Use 10-100 samples that represent the range of inputs your model will see. More diverse samples = better quantization accuracy.
-
 #### Step 3: Deploy with quantization
 
 ```bash

From fdf8b58174e9906e6bdaef3116d0596a1cb66b00 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 17:27:32 -0700
Subject: [PATCH 12/20] removed device setup script

---
 scripts/setup-target-device.sh | 190 ---------------------------------
 1 file changed, 190 deletions(-)
 delete mode 100755 scripts/setup-target-device.sh

diff --git a/scripts/setup-target-device.sh b/scripts/setup-target-device.sh
deleted file mode 100755
index 24aaa5a3..00000000
--- a/scripts/setup-target-device.sh
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/bin/bash
-# Developer tool for setting up QCS6490 dev boards directly.
-# End users: install the synapse-app-sdk .deb package instead.
-# Deploys QNN SDK libraries, BSP libc++, and configures QNN HTP runtime
-#
-# Usage: ./scripts/setup-target-device.sh [--sdk-version v2.34|v2.42] [SNPE_ROOT]
-# Defaults: --sdk-version v2.42
-#
-# Examples:
-#   ./scripts/setup-target-device.sh                      # v2.42 (default)
-#   ./scripts/setup-target-device.sh --sdk-version v2.34  # full v2.34 stack
-#   ./scripts/setup-target-device.sh /path/to/custom/sdk  # custom SDK path
-#
-# Prerequisites:
-#   - sshpass installed on host
-#   - Device accessible at DEVICE_HOST
-#   - BSP repo at BSP_ROOT (for Hexagon libc++)
-
-set -euo pipefail
-
-# --- Parse arguments ---
-SDK_VERSION="v2.34"
-SNPE_ROOT=""
-
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --sdk-version)
-            SDK_VERSION="$2"
-            shift 2
-            ;;
-        --sdk-version=*)
-            SDK_VERSION="${1#*=}"
-            shift
-            ;;
-        *)
-            SNPE_ROOT="$1"
-            shift
-            ;;
-    esac
-done
-
-# --- Resolve SDK path from version if not explicitly provided ---
-if [ -z "$SNPE_ROOT" ]; then
-    case "$SDK_VERSION" in
-        v2.42)
-            SNPE_ROOT="/home/calvinl/v2.42.0.251225/qairt/2.42.0.251225"
-            ;;
-        v2.34)
-            SNPE_ROOT="/opt/qcom/aistack/qairt/2.34.0.250424"
-            ;;
-        *)
-            echo "ERROR: Unknown SDK version '$SDK_VERSION'. Supported: v2.34, v2.42"
-            exit 1
-            ;;
-    esac
-fi
-
-echo "=== Using SDK version: $SDK_VERSION ==="
-echo "    SNPE_ROOT: $SNPE_ROOT"
-
-DEVICE_HOST="${DEVICE_HOST:-scifi@10.40.63.143}"
-DEVICE_PASS="${DEVICE_PASS:-synapse}"
-ROOT_PASS="${ROOT_PASS:-oelinux123}"
-BSP_ROOT="${BSP_ROOT:-/home/calvinl/Documents/repos/qcs6490-ubun-1-0_amss_standard_oem}"
-SDK_LIB="${SNPE_ROOT}/lib/aarch64-ubuntu-gcc9.4"
-SDK_HEX="${SNPE_ROOT}/lib/hexagon-v68/unsigned"
-SDK_BIN="${SNPE_ROOT}/bin/aarch64-ubuntu-gcc9.4"
-BSP_CDSP="${BSP_ROOT}/cdsp_proc/build/ms/dynamic_modules/kodiak.cdsp.prod"
-
-# Validate SDK paths exist
-for dir in "$SDK_LIB" "$SDK_HEX" "$SDK_BIN"; do
-    if [ ! -d "$dir" ]; then
-        echo "ERROR: SDK directory not found: $dir"
-        exit 1
-    fi
-done
-
-if [ ! -d "$BSP_CDSP" ]; then
-    echo "ERROR: BSP CDSP path not found: $BSP_CDSP"
-    echo "  Hexagon libc++ from the BSP is REQUIRED for QNN HTP skel loading."
-    echo "  Set BSP_ROOT to the qcs6490-ubun-1-0_amss_standard_oem directory."
-    exit 1
-fi
-
-echo "=== Staging libraries ==="
-STAGING=$(mktemp -d)
-trap "rm -rf $STAGING" EXIT
-
-mkdir -p "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin"
-
-# Core QNN/SNPE libraries for /usr/lib/
-# Copy all QNN/SNPE/calculator libs from the SDK — covers both v2.34 and v2.42
-for lib in "$SDK_LIB"/libQnn*.so "$SDK_LIB"/libSnpe*.so "$SDK_LIB"/libSNPE.so "$SDK_LIB"/libcalculator.so; do
-    [ -f "$lib" ] && cp "$lib" "$STAGING/usr_lib/"
-done
-
-# Hexagon v68 skel libraries for /usr/lib/rfsa/adsp/
-for f in "$SDK_HEX"/*.so; do
-    cp "$f" "$STAGING/adsp/"
-done
-
-# Hexagon libc++ from BSP (CRITICAL: must match the device's fastrpc_shell)
-# The QAIRT SDK does NOT ship these; they come from the device BSP.
-# Without these, the QNN HTP skel fails to load with error 0x80000406.
-cp "$BSP_CDSP/libc++.so.1" "$STAGING/adsp/"
-cp "$BSP_CDSP/libc++abi.so.1" "$STAGING/adsp/"
-echo "Staged BSP libc++ for Hexagon DSP"
-
-# Useful debug binaries
-for bin in qnn-net-run qnn-platform-validator; do
-    if [ -f "$SDK_BIN/$bin" ]; then
-        cp "$SDK_BIN/$bin" "$STAGING/bin/"
-    fi
-done
-
-# Write device-side setup script (heredoc with single-quoted delimiter prevents local expansion)
-cat > "$STAGING/apply.sh" <<'APPLY_EOF'
-#!/bin/bash
-set -e
-
-# --- Remove ALL existing QNN/SNPE libs to prevent version mixing ---
-rm -f /usr/lib/libQnn*.so /usr/lib/libSnpe*.so /usr/lib/libSNPE*.so
-rm -f /usr/lib/libcalculator.so
-rm -f /usr/lib/rfsa/adsp/libQnn*.so /usr/lib/rfsa/adsp/libSnpe*.so
-rm -f /usr/lib/rfsa/adsp/libCalculator_skel.so
-
-# --- Install QNN/SNPE libraries to /usr/lib/ ---
-cp /tmp/sdk-staging/usr_lib/*.so /usr/lib/
-
-# --- Install hexagon-v68 skel + BSP libc++ to /usr/lib/rfsa/adsp/ ---
-cp /tmp/sdk-staging/adsp/*.so /usr/lib/rfsa/adsp/
-
-# --- Install debug binaries ---
-cp /tmp/sdk-staging/bin/* /usr/local/bin/ 2>/dev/null || true
-chmod +x /usr/local/bin/qnn-* 2>/dev/null || true
-
-# --- Set ADSP_LIBRARY_PATH in /etc/environment ---
-if ! grep -q ADSP_LIBRARY_PATH /etc/environment 2>/dev/null; then
-    echo "ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp" >> /etc/environment
-fi
-
-# --- Set ADSP_LIBRARY_PATH in cdsprpcd service ---
-if ! grep -q ADSP_LIBRARY_PATH /lib/systemd/system/cdsprpcd.service 2>/dev/null; then
-    sed -i "/\[Service\]/a Environment=ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp" /lib/systemd/system/cdsprpcd.service
-    systemctl daemon-reload
-fi
-# Always restart cdsprpcd to pick up env var
-systemctl restart cdsprpcd
-sleep 1
-
-# --- Update linker cache ---
-ldconfig
-
-# --- Verify ---
-echo ""
-echo "=== Verification ==="
-echo "QNN libs in /usr/lib/:"
-ls /usr/lib/libQnn*.so /usr/lib/libSNPE.so 2>/dev/null | xargs -I{} basename {}
-echo ""
-echo "Skel + libc++ in /usr/lib/rfsa/adsp/:"
-ls /usr/lib/rfsa/adsp/libQnn*.so /usr/lib/rfsa/adsp/libc++*.so* 2>/dev/null | xargs -I{} basename {}
-echo ""
-echo "ADSP_LIBRARY_PATH in cdsprpcd:"
-CDSP_PID=$(pgrep cdsprpcd | head -1)
-if [ -n "$CDSP_PID" ]; then
-    cat /proc/"$CDSP_PID"/environ | tr "\0" "\n" | grep ADSP || echo "(not set)"
-else
-    echo "(cdsprpcd not running)"
-fi
-echo ""
-echo "Calculator test:"
-export ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp
-export LD_LIBRARY_PATH=/usr/lib
-/usr/local/bin/qnn-platform-validator --backend dsp --testBackend 2>&1 | grep -E "Unit Test|supported"
-APPLY_EOF
-
-echo "=== Uploading to device ==="
-sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "rm -rf /tmp/sdk-staging && mkdir -p /tmp/sdk-staging"
-sshpass -p "$DEVICE_PASS" scp -o StrictHostKeyChecking=no -r "$STAGING/usr_lib" "$STAGING/adsp" "$STAGING/bin" "$STAGING/apply.sh" "$DEVICE_HOST:/tmp/sdk-staging/"
-
-echo "=== Applying on device as root ==="
-sshpass -p "$DEVICE_PASS" ssh -o StrictHostKeyChecking=no "$DEVICE_HOST" "echo '$ROOT_PASS' | su -c 'bash /tmp/sdk-staging/apply.sh' 2>&1"
-
-echo ""
-echo "=== Device setup complete (SDK: $SDK_VERSION) ==="
-echo ""
-echo "To run on device:"
-echo "  export ADSP_LIBRARY_PATH=/usr/lib/rfsa/adsp"
-echo "  export LD_LIBRARY_PATH=/usr/lib:/opt/scifi/lib"
-echo "  ./synapse-example-app"

From b55cdb70893a0049e1f989563dad6b949fb668fe Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 17:37:35 -0700
Subject: [PATCH 13/20] reverted changes to files.py and updated callsite. made
 the [y/n] show up on overwrite confirmation

---
 synapse/cli/deploy_model.py | 62 ++++++-------------------------------
 synapse/cli/files.py        | 18 +++++------
 2 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index 1f8bbdb4..3c4c9b52 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -4,12 +4,12 @@
 import os
 from typing import Optional
 
-import paramiko.ssh_exception
 from rich.console import Console
 from rich import progress
+from rich.prompt import Confirm
 
 import synapse.client.sftp as sftp
-from synapse.cli.files import find_password, save_password
+from synapse.cli.files import setup_connection
 from synapse.utils.model_converter import convert_to_dlc
 
 # Constants
@@ -205,7 +205,7 @@ def deploy_model(args):
     # Step 2: Connect to device via SFTP
     console.print("[bold cyan]Connecting to device...[/bold cyan]")
 
-    connections = _setup_connection(
+    result = setup_connection(
         args.uri,
         args.username,
         args.env_file,
@@ -213,10 +213,10 @@ def deploy_model(args):
         console,
     )
 
-    if connections is None:
+    if result is None:
         return
 
-    ssh, sftp_conn = connections
+    ssh, sftp_conn = result
 
     try:
         # Step 3: Ensure model directory exists
@@ -227,13 +227,10 @@ def deploy_model(args):
         try:
             sftp_conn.stat(remote_path)
             if not args.force:
-                console.print(
-                    f"[yellow]Model '{model_name}.dlc' already exists on device. "
-                    f"Overwrite? [y/N][/yellow] ",
-                    end="",
-                )
-                response = input().strip().lower()
-                if response not in ("y", "yes"):
+                if not Confirm.ask(
+                    f"[yellow]Model '{model_name}.dlc' already exists on device. Overwrite?[/yellow]",
+                    default=False,
+                ):
                     console.print("[dim]Aborted.[/dim]")
                     return
         except FileNotFoundError:
@@ -262,47 +259,6 @@ def deploy_model(args):
         sftp.close_sftp(ssh, sftp_conn)
 
 
-def _setup_connection(
-    uri: str,
-    username: str,
-    env_file: str,
-    forget_password: bool,
-    console: Console,
-) -> Optional[tuple]:
-    """Set up SFTP connection to device."""
-    hostname = uri.split(":")[0] if ":" in uri else uri
-    password = find_password(hostname, env_file)
-
-    if password is None:
-        console.print(f"[bold red]Didn't find any password for {hostname}[/bold red]")
-        return None
-
-    console.print(f"[dim]Connecting to {hostname}:22 as {username}...[/dim]")
-
-    try:
-        ssh, sftp_conn = sftp.connect_sftp(hostname, username, password)
-    except paramiko.ssh_exception.AuthenticationException:
-        console.print(f"[bold red]Authentication failed for {hostname}[/bold red]")
-        console.print("[yellow]Incorrect username or password.[/yellow]")
-        return None
-    except paramiko.ssh_exception.SSHException as e:
-        console.print(f"[bold red]SSH connection failed: {e}[/bold red]")
-        return None
-    except Exception as e:
-        console.print(f"[bold red]Connection failed: {e}[/bold red]")
-        return None
-
-    if ssh is None or sftp_conn is None:
-        console.print(f"[bold red]Failed to connect to {hostname}[/bold red]")
-        return None
-
-    if not forget_password:
-        save_password(password, env_file, hostname)
-
-    console.print(f"[green]Connected to {hostname}[/green]")
-    return ssh, sftp_conn
-
-
 def _ensure_model_dir(sftp_conn, console: Console):
     """Ensure the model directory exists on the device."""
     try:
diff --git a/synapse/cli/files.py b/synapse/cli/files.py
index f2560d63..ab3844cc 100644
--- a/synapse/cli/files.py
+++ b/synapse/cli/files.py
@@ -11,6 +11,7 @@
 from rich import progress
 from rich.prompt import Confirm
 
+from synapse import Device
 import synapse.client.sftp as sftp
 from synapse.utils.file import format_mode, format_time, filesize_binary
 
@@ -154,29 +155,28 @@ def setup_connection(
     forget_password: bool,
     console: Console,
 ) -> Optional[tuple[paramiko.SSHClient, paramiko.SFTPClient]]:
-    # Strip port if present - SFTP uses port 22, not the gRPC port
-    hostname = uri.split(":")[0] if ":" in uri else uri
+    dev_name = Device(uri).get_name()
     password = find_password(
-        hostname, env_file
+        dev_name, env_file
     )  # Check if password is provided or stored in env file
     if password is None:
-        console.print(f"[bold red]Didnt find any password for {hostname}[/bold red]")
+        console.print(f"[bold red]Didnt find any password for {uri}[/bold red]")
         return
 
     # Open SFTP connection
     with console.status("Connecting to Synapse device...", spinner="bouncingBall"):
         try:
-            ssh, sftp_conn = sftp.connect_sftp(hostname, username, password)
+            ssh, sftp_conn = sftp.connect_sftp(uri, username, password)
         except paramiko.ssh_exception.AuthenticationException:
-            console.print(f"[bold red]Authentication failed for {hostname}[/bold red]")
+            console.print(f"[bold red]Authentication failed for {uri}[/bold red]")
             console.print("[yellow] Incorrect username or password.")
             return None
     if ssh is None or sftp_conn is None:
-        console.print(f"[bold red]Failed to connect to {hostname}[/bold red]")
+        console.print(f"[bold red]Failed to connect to {uri}[/bold red]")
         return
     # If the connection is successful, we can prompt the user if they want to save the password
-    if not forget_password:
-        save_password(password, env_file, hostname)
+    if not forget_password and dev_name is not None:
+        save_password(password, env_file, dev_name)
     return ssh, sftp_conn
 
 

From 974f5812fe46c1ac9b4c951581b74af2b619c353 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Tue, 24 Mar 2026 18:37:34 -0700
Subject: [PATCH 14/20] changed how things are laid out to work with pypi
 installation

---
 setup.py                                      |  6 ++++++
 synapse/client/sftp.py                        |  1 -
 .../utils/model_converter/docker}/Dockerfile  |  0
 .../utils/model_converter/docker}/convert.py  |  0
 synapse/utils/model_converter/onnx_to_dlc.py  | 19 +++++++++----------
 5 files changed, 15 insertions(+), 11 deletions(-)
 rename {model-converter => synapse/utils/model_converter/docker}/Dockerfile (100%)
 rename {model-converter => synapse/utils/model_converter/docker}/convert.py (100%)

diff --git a/setup.py b/setup.py
index 55e722a9..a36c693b 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,12 @@
     author="Science Team",
     author_email="team@science.xyz",
     packages=find_packages(include=["synapse", "synapse.*"]),
+    package_data={
+        "synapse": [
+            "utils/model_converter/docker/Dockerfile",
+            "utils/model_converter/docker/convert.py",
+        ],
+    },
     long_description=long_description,
     long_description_content_type="text/markdown",
     python_requires=">=3.9",
diff --git a/synapse/client/sftp.py b/synapse/client/sftp.py
index cf75a191..c4d55253 100644
--- a/synapse/client/sftp.py
+++ b/synapse/client/sftp.py
@@ -29,7 +29,6 @@ def connect_sftp(hostname, username, password=None, pass_filename=None, key_file
             logging.error(f"Failed to read password file: {e}")
             return None, None
     try:
-        logging.debug(f"Connecting to {hostname}:{port} as {username}")
         ssh.connect(
             hostname=hostname,
             port=port,
diff --git a/model-converter/Dockerfile b/synapse/utils/model_converter/docker/Dockerfile
similarity index 100%
rename from model-converter/Dockerfile
rename to synapse/utils/model_converter/docker/Dockerfile
diff --git a/model-converter/convert.py b/synapse/utils/model_converter/docker/convert.py
similarity index 100%
rename from model-converter/convert.py
rename to synapse/utils/model_converter/docker/convert.py
diff --git a/synapse/utils/model_converter/onnx_to_dlc.py b/synapse/utils/model_converter/onnx_to_dlc.py
index dcf91ada..5b24f998 100644
--- a/synapse/utils/model_converter/onnx_to_dlc.py
+++ b/synapse/utils/model_converter/onnx_to_dlc.py
@@ -18,18 +18,17 @@
 
 
 def _find_model_converter_dir() -> str:
-    """Locate the model-converter/ directory containing the Dockerfile."""
-    # Walk up from this file to the repo root
+    """Locate the directory containing the Dockerfile for model conversion."""
     here = os.path.dirname(os.path.abspath(__file__))
-    repo_root = os.path.dirname(os.path.dirname(os.path.dirname(here)))
-    candidate = os.path.join(repo_root, "model-converter")
-    if os.path.isdir(candidate) and os.path.isfile(
-        os.path.join(candidate, "Dockerfile")
+    docker_dir = os.path.join(here, "docker")
+    if os.path.isdir(docker_dir) and os.path.isfile(
+        os.path.join(docker_dir, "Dockerfile")
     ):
-        return candidate
+        return docker_dir
+
     raise FileNotFoundError(
-        f"model-converter/ directory not found at {candidate}. "
-        "Make sure you are running from the synapse-python repository."
+        "Model converter Dockerfile not found. "
+        "Try reinstalling: pip install science-synapse"
     )
 
 
@@ -72,7 +71,7 @@ def _build_image(console: Optional[Console] = None) -> bool:
         return False
 
     if console:
-        console.print(f"[green]Docker image {DOCKER_IMAGE} built successfully[/green]")
+        console.print(f"[green]Docker image built successfully[/green]")
     return True
 
 

From 9acf9f70895590b31436eb48c1f452285c240bc9 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Wed, 8 Apr 2026 11:50:40 -0700
Subject: [PATCH 15/20] will now bundle qualcomm headers too, resulting .deb
 should be fully self contained

---
 synapse/cli/build.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/synapse/cli/build.py b/synapse/cli/build.py
index a76fb8cb..c64ec34c 100644
--- a/synapse/cli/build.py
+++ b/synapse/cli/build.py
@@ -340,6 +340,11 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
         lib_dst_dir = os.path.join(staging_dir, "opt", "scifi", "lib")
         os.makedirs(lib_dst_dir, exist_ok=True)
 
+        # QNN libraries are dlopen'd from /usr/lib/ (hardcoded paths in SDK),
+        # so they must be staged there — not in /opt/scifi/lib/
+        qnn_dst_dir = os.path.join(staging_dir, "usr", "lib")
+        os.makedirs(qnn_dst_dir, exist_ok=True)
+
         try:
             arch_suffix = detect_arch()
             image_tag = f"{app_name}:latest-{arch_suffix}"
@@ -349,6 +354,7 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
                 f"[yellow]Extracting SDK libraries from Docker image [bold]{image_tag}[/bold]...[/yellow]"
             )
 
+            # Extract SDK shared libraries → /opt/scifi/lib/
             docker_cmd = [
                 "docker",
                 "run",
@@ -360,11 +366,29 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
                 image_tag,
                 "/bin/bash",
                 "-c",
-                "find /usr/lib -name 'libsynapse*.so*' -exec cp -a {} /out/ \\;",
+                "find /usr/lib -maxdepth 1 -name 'libsynapse*.so*' -exec cp -a {} /out/ \\;",
             ]
 
             subprocess.run(docker_cmd, check=True)
 
+            # Extract QNN runtime libraries → /usr/lib/ (dlopen'd by absolute path)
+            docker_cmd_qnn = [
+                "docker",
+                "run",
+                "--rm",
+                "--platform",
+                platform_opt,
+                "-v",
+                f"{qnn_dst_dir}:/out",
+                image_tag,
+                "/bin/bash",
+                "-c",
+                "find /usr/lib -maxdepth 1 -name 'libQnn*.so' -exec cp -a {} /out/ \\; && "
+                "if [ -d /usr/lib/rfsa ]; then cp -a /usr/lib/rfsa /out/; fi",
+            ]
+
+            subprocess.run(docker_cmd_qnn, check=True)
+
         except subprocess.CalledProcessError as exc:
             console.print(
                 f"[bold red]Error:[/bold red] Failed to copy SDK libraries from Docker image: {exc}"

From 0aed90b173925b7d3e6287afe881f97857b9cba4 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Wed, 8 Apr 2026 15:42:33 -0700
Subject: [PATCH 16/20] should make models directory if it does not exist

---
 synapse/cli/deploy_model.py | 119 ++++++++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 31 deletions(-)

diff --git a/synapse/cli/deploy_model.py b/synapse/cli/deploy_model.py
index 3c4c9b52..f63bfa38 100644
--- a/synapse/cli/deploy_model.py
+++ b/synapse/cli/deploy_model.py
@@ -10,7 +10,6 @@
 
 import synapse.client.sftp as sftp
 from synapse.cli.files import setup_connection
-from synapse.utils.model_converter import convert_to_dlc
 
 # Constants
 DEVICE_MODEL_DIR = "/models"
@@ -174,31 +173,64 @@ def deploy_model(args):
     if quantize:
         fmt_str = "Quantized DLC (INT8) — runs on DSP"
     else:
-        fmt_str = "Float DLC — runs on CPU/GPU"
+        fmt_str = "ONNX (float32) — runs on CPU via ONNX Runtime"
 
     console.print(f"[bold]Deploying model:[/bold] {model_name}")
     console.print(f"[bold]Source:[/bold] {args.model_path}")
     console.print(f"[bold]Format:[/bold] {fmt_str}")
     console.print()
 
-    # Step 1: Convert model
+    # Step 1: Prepare model for deployment
     if quantize:
+        # Quantized path: convert to DLC via Docker (requires QAIRT SDK)
         console.print("[bold cyan]Converting model to quantized DLC...[/bold cyan]")
+
+        from synapse.utils.model_converter import convert_to_dlc
+
+        dlc_path = convert_to_dlc(
+            args.model_path,
+            input_shape=input_shape,
+            snpe_root=args.snpe_root,
+            quantize=quantize,
+            input_list=args.input_list,
+            console=console,
+        )
+
+        if dlc_path is None:
+            console.print("[bold red]Model conversion failed[/bold red]")
+            return
+
+        deploy_path = dlc_path
+        remote_ext = ".dlc"
     else:
-        console.print("[bold cyan]Converting model to DLC...[/bold cyan]")
-
-    dlc_path = convert_to_dlc(
-        args.model_path,
-        input_shape=input_shape,
-        snpe_root=args.snpe_root,
-        quantize=quantize,
-        input_list=args.input_list,
-        console=console,
-    )
+        # Non-quantized path: deploy .onnx directly (no QAIRT SDK needed)
+        ext = os.path.splitext(args.model_path)[1].lower()
 
-    if dlc_path is None:
-        console.print("[bold red]Model conversion failed[/bold red]")
-        return
+        if ext == ".dlc":
+            deploy_path = args.model_path
+            remote_ext = ".dlc"
+        elif ext == ".pt":
+            console.print("[bold cyan]Converting PyTorch model to ONNX...[/bold cyan]")
+            from synapse.utils.model_converter.pt_to_onnx import convert_pt_to_onnx
+
+            onnx_path = convert_pt_to_onnx(
+                args.model_path,
+                input_shape=input_shape,
+                console=console,
+            )
+            if onnx_path is None:
+                console.print("[bold red]Model conversion failed[/bold red]")
+                return
+
+            deploy_path = onnx_path
+            remote_ext = ".onnx"
+        elif ext == ".onnx":
+            deploy_path = args.model_path
+            remote_ext = ".onnx"
+        else:
+            console.print(f"[bold red]Error:[/bold red] Unsupported file type: {ext}")
+            console.print("[yellow]Supported formats: .pt, .onnx, .dlc[/yellow]")
+            return
 
     console.print()
 
@@ -220,15 +252,15 @@ def deploy_model(args):
 
     try:
         # Step 3: Ensure model directory exists
-        _ensure_model_dir(sftp_conn, console)
+        _ensure_model_dir(sftp_conn, ssh, console)
 
         # Step 4: Check if model already exists on device
-        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}.dlc"
+        remote_path = f"{DEVICE_MODEL_DIR}/{model_name}{remote_ext}"
         try:
             sftp_conn.stat(remote_path)
             if not args.force:
                 if not Confirm.ask(
-                    f"[yellow]Model '{model_name}.dlc' already exists on device. Overwrite?[/yellow]",
+                    f"[yellow]Model '{model_name}{remote_ext}' already exists on device. Overwrite?[/yellow]",
                     default=False,
                 ):
                     console.print("[dim]Aborted.[/dim]")
@@ -237,16 +269,16 @@ def deploy_model(args):
             pass
 
         # Step 5: Upload the model file
-        _upload_file(sftp_conn, dlc_path, remote_path, console)
+        _upload_file(sftp_conn, deploy_path, remote_path, console)
 
         console.print()
         console.print("[bold green]Model deployed successfully![/bold green]")
         console.print()
-        console.print(f"  Model deployed: [cyan]models/{model_name}.dlc[/cyan]")
+        console.print(f"  Model deployed: [cyan]models/{model_name}{remote_ext}[/cyan]")
         if quantize:
             console.print(f"  Runtime: [cyan]DSP (quantized INT8)[/cyan]")
         else:
-            console.print(f"  Runtime: [cyan]CPU (float32)[/cyan]")
+            console.print(f"  Runtime: [cyan]CPU (float32, ONNX Runtime)[/cyan]")
             console.print()
             console.print(
                 "  [dim]Tip: for faster DSP inference (~1ms), redeploy with --quantize --input-list[/dim]"
@@ -259,18 +291,43 @@ def deploy_model(args):
         sftp.close_sftp(ssh, sftp_conn)
 
 
-def _ensure_model_dir(sftp_conn, console: Console):
-    """Ensure the model directory exists on the device."""
+def _ensure_model_dir(sftp_conn, ssh, console: Console):
+    """Ensure the model directory exists on the device.
+
+    Tries SFTP mkdir first. If that fails (permission denied in chroot),
+    falls back to SSH exec to create and chown the directory.
+    """
     try:
         sftp_conn.stat(DEVICE_MODEL_DIR)
+        return
     except FileNotFoundError:
-        console.print(f"[blue]Creating model directory: {DEVICE_MODEL_DIR}[/blue]")
-        try:
-            sftp_conn.mkdir(DEVICE_MODEL_DIR)
-        except Exception as e:
-            console.print(
-                f"[yellow]Warning: Could not create model directory: {e}[/yellow]"
-            )
+        pass
+
+    console.print(f"[blue]Creating model directory: {DEVICE_MODEL_DIR}[/blue]")
+
+    # Try SFTP mkdir first
+    try:
+        sftp_conn.mkdir(DEVICE_MODEL_DIR)
+        return
+    except Exception:
+        pass
+
+    # SFTP failed — try SSH command to create with proper ownership
+    real_path = f"/opt/scifi/data{DEVICE_MODEL_DIR}"
+    try:
+        _, stdout, stderr = ssh.exec_command(
+            f"mkdir -p {real_path} && chown $(whoami) {real_path}"
+        )
+        exit_code = stdout.channel.recv_exit_status()
+        if exit_code == 0:
+            console.print(f"[green]Created {DEVICE_MODEL_DIR}[/green]")
+            return
+    except Exception:
+        pass
+
+    console.print(
+        f"[bold red]Error: Could not create model directory on device.[/bold red]"
+    )
 
 
 def _upload_file(sftp_conn, local_path: str, remote_path: str, console: Console):

From abd1d5d3bcc1317812318fd13e0844682857e8df Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Wed, 8 Apr 2026 15:46:22 -0700
Subject: [PATCH 17/20] bump version to 2.6.0a1 for app-inference pre-release

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a36c693b..72c88684 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="science-synapse",
-    version="2.5.0",
+    version="2.6.0a1",
     description="Client library and CLI for the Synapse API",
     author="Science Team",
     author_email="team@science.xyz",

From 6832f7ff4de07b95a52253f16f51d254f46e7a8a Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Thu, 9 Apr 2026 15:03:43 -0700
Subject: [PATCH 18/20] updated README to specify qualcomm stuff is no longer
 needed for CPU runtime, updated build to package onnxruntime into the deb

---
 README.md            | 22 +++++++++++-----------
 synapse/cli/build.py |  4 +++-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 089f0b54..cdfaa036 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,17 @@ Deploy machine learning models to Synapse devices.
 ### Prerequisites
 
 1. **Docker** — required for model conversion
-2. **QAIRT SDK v2.34** (Qualcomm AI Runtime) — required for model conversion
+2. **QAIRT SDK v2.34** (Qualcomm AI Runtime) — required for model conversion for DSP runtime, not required for CPU runtime
+
+### Quick Start — Deploy a Float Model (CPU)
+
+The simplest path — no calibration data needed, runs on CPU via onnxruntime:
+
+```bash
+synapsectl deploy-model model.onnx \
+  --name my_model \
+  -u <device-ip>
+```
 
 #### Installing the QAIRT SDK
 
@@ -219,16 +229,6 @@ Deploy machine learning models to Synapse devices.
    ```
 4. The SDK installs to `/opt/qcom/aistack/qairt/2.34.0.250424`. You'll pass this path as `--snpe-root` when deploying models.
 
-### Quick Start — Deploy a Float Model (CPU)
-
-The simplest path — no calibration data needed, runs on CPU:
-
-```bash
-synapsectl deploy-model model.onnx \
-  --name my_model \
-  --snpe-root /opt/qcom/aistack/qairt/2.34.0.250424 \
-  -u <device-ip>
-```
 
 ### Deploy a Quantized Model (DSP)
 
diff --git a/synapse/cli/build.py b/synapse/cli/build.py
index c64ec34c..a02e60b4 100644
--- a/synapse/cli/build.py
+++ b/synapse/cli/build.py
@@ -355,6 +355,7 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
             )
 
             # Extract SDK shared libraries → /opt/scifi/lib/
+            # Includes libsynapse-app-sdk and its runtime dependency libonnxruntime
             docker_cmd = [
                 "docker",
                 "run",
@@ -366,7 +367,8 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
                 image_tag,
                 "/bin/bash",
                 "-c",
-                "find /usr/lib -maxdepth 1 -name 'libsynapse*.so*' -exec cp -a {} /out/ \\;",
+                "find /usr/lib -maxdepth 1 -name 'libsynapse*.so*' -exec cp -a {} /out/ \\; && "
+                "find /usr/lib -maxdepth 1 -name 'libonnxruntime*.so*' -exec cp -a {} /out/ \\;",
             ]
 
             subprocess.run(docker_cmd, check=True)

From 837965a595a20edcac706c2272f4e28782023f36 Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Thu, 9 Apr 2026 15:05:27 -0700
Subject: [PATCH 19/20] bump version to 2.6.0a2 for fixed onnxruntime packaging
 and updated README

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 72c88684..d86cbcbc 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="science-synapse",
-    version="2.6.0a1",
+    version="2.6.0a2",
     description="Client library and CLI for the Synapse API",
     author="Science Team",
     author_email="team@science.xyz",

From afce0ec306974dde97a2bed4f2338616b1feffcd Mon Sep 17 00:00:00 2001
From: calvinleng-science <calvinl@corporationscience.com>
Date: Wed, 22 Apr 2026 18:15:46 -0700
Subject: [PATCH 20/20] apps build will now no longer package vcpkg .so's that
 already exist in scifi-headstage-shared-libraries into the resulting app .deb
 as that blocks installations of scifi-headstage-shared-libraries.
 additionally, tap names will now wrap instead of truncate in the rich UI

---
 synapse/cli/build.py | 32 ++++++++++++++++++++++++++++----
 synapse/cli/taps.py  |  6 +++---
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/synapse/cli/build.py b/synapse/cli/build.py
index a02e60b4..cdda8aef 100644
--- a/synapse/cli/build.py
+++ b/synapse/cli/build.py
@@ -354,8 +354,33 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
                 f"[yellow]Extracting SDK libraries from Docker image [bold]{image_tag}[/bold]...[/yellow]"
             )
 
-            # Extract SDK shared libraries → /opt/scifi/lib/
-            # Includes libsynapse-app-sdk and its runtime dependency libonnxruntime
+            # Skip vcpkg .so files already shipped by scifi-headstage-shared-libraries
+            # to avoid dpkg file-overwrite conflicts when the app deb is installed.
+            extract_script = r"""
+set -e
+filter=/tmp/scifi_shared_libs.txt
+: > "$filter"
+if dpkg-query -W -f='${Status}' scifi-headstage-shared-libraries 2>/dev/null | grep -q "install ok installed"; then
+    dpkg-query -L scifi-headstage-shared-libraries | grep -oE '[^/]+\.so[^/]*$' | sort -u > "$filter" || true
+else
+    echo "WARNING: scifi-headstage-shared-libraries not installed in build image; packaging all vcpkg .so files (may conflict on device)" >&2
+fi
+
+find /usr/lib -maxdepth 1 -name 'libsynapse*.so*' -exec cp -a {} /out/ \;
+
+vcpkg_lib="${VCPKG_ROOT}/build/host/vcpkg_installed/arm64-linux-dynamic-release/lib"
+if [ -d "$vcpkg_lib" ]; then
+    find "$vcpkg_lib" -maxdepth 1 -name '*.so*' -print0 | while IFS= read -r -d '' f; do
+        base=$(basename "$f")
+        if [ -s "$filter" ] && grep -qxF "$base" "$filter"; then
+            echo "Skipping $base (already shipped by scifi-headstage-shared-libraries)" >&2
+        else
+            cp -a "$f" /out/
+        fi
+    done
+fi
+""".strip()
+
             docker_cmd = [
                 "docker",
                 "run",
@@ -367,8 +392,7 @@ def build_deb_package(app_dir: str, app_name: str, version: str = "0.1.0") -> bo
                 image_tag,
                 "/bin/bash",
                 "-c",
-                "find /usr/lib -maxdepth 1 -name 'libsynapse*.so*' -exec cp -a {} /out/ \\; && "
-                "find /usr/lib -maxdepth 1 -name 'libonnxruntime*.so*' -exec cp -a {} /out/ \\;",
+                extract_script,
             ]
 
             subprocess.run(docker_cmd, check=True)
diff --git a/synapse/cli/taps.py b/synapse/cli/taps.py
index 0a697a36..27db77e2 100644
--- a/synapse/cli/taps.py
+++ b/synapse/cli/taps.py
@@ -112,9 +112,9 @@ def list_taps(args):
 
     taps = tap.list_taps()
     table = Table(title="Available Taps", show_lines=True)
-    table.add_column("Name", style="cyan")
-    table.add_column("Message Type", style="green")
-    table.add_column("Endpoint", style="green")
+    table.add_column("Name", style="cyan", overflow="fold")
+    table.add_column("Message Type", style="green", overflow="fold")
+    table.add_column("Endpoint", style="green", overflow="fold")
 
     for tap in taps:
         table.add_row(tap.name, tap.message_type, tap.endpoint)