diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 56ad55d..77fb5ba 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,14 +22,14 @@ jobs: packages: write steps: - uses: actions/checkout@v6 - - uses: docker/setup-buildx-action@v3 - - uses: docker/login-action@v3 + - uses: docker/setup-buildx-action@v4 + - uses: docker/login-action@v4 if: github.event_name != 'pull_request' with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - uses: docker/metadata-action@v5 + - uses: docker/metadata-action@v6 id: meta with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} @@ -38,7 +38,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=sha - - uses: docker/build-push-action@v6 + - uses: docker/build-push-action@v7 with: context: . push: ${{ github.event_name != 'pull_request' }} diff --git a/package-lock.json b/package-lock.json index 96515f8..3294dd5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -483,15 +483,15 @@ } }, "node_modules/@eslint/config-array": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.23.2.tgz", - "integrity": "sha512-YF+fE6LV4v5MGWRGj7G404/OZzGNepVF8fxk7jqmqo3lrza7a0uUcDnROGRBG1WFC1omYUS/Wp1f42i0M+3Q3A==", + "version": "0.23.3", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.23.3.tgz", + "integrity": "sha512-j+eEWmB6YYLwcNOdlwQ6L2OsptI/LO6lNBuLIqe5R7RetD658HLoF+Mn7LzYmAWWNNzdC6cqP+L6r8ujeYXWLw==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/object-schema": "^3.0.2", + "@eslint/object-schema": "^3.0.3", "debug": "^4.3.1", - "minimatch": "^10.2.1" + "minimatch": "^10.2.4" }, "engines": { "node": "^20.19.0 || ^22.13.0 || >=24" @@ -511,9 +511,9 @@ } }, "node_modules/@eslint/core": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@eslint/core/-/core-1.1.0.tgz", - "integrity": "sha512-/nr9K9wkr3P1EzFTdFdMoLuo1PmIxjmwvPozwoSodjNBdefGujXQUF93u1DDZpEaTuDvMsIQddsd35BwtrW9Xw==", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-1.1.1.tgz", + "integrity": "sha512-QUPblTtE51/7/Zhfv8BDwO0qkkzQL7P/aWWbqcf4xWLEYn1oKjdO0gglQBB4GAsu7u6wjijbCmzsUTy6mnk6oQ==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -524,9 +524,9 @@ } }, "node_modules/@eslint/object-schema": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-3.0.2.tgz", - "integrity": "sha512-HOy56KJt48Bx8KmJ+XGQNSUMT/6dZee/M54XyUyuvTvPXJmsERRvBchsUVx1UMe1WwIH49XLAczNC7V2INsuUw==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-3.0.3.tgz", + "integrity": "sha512-iM869Pugn9Nsxbh/YHRqYiqd23AmIbxJOcpUMOuWCVNdoQJ5ZtwL6h3t0bcZzJUlC3Dq9jCFCESBZnX0GTv7iQ==", "dev": true, "license": "Apache-2.0", "engines": { @@ -534,13 +534,13 @@ } }, "node_modules/@eslint/plugin-kit": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.6.0.tgz", - "integrity": "sha512-bIZEUzOI1jkhviX2cp5vNyXQc6olzb2ohewQubuYlMXZ2Q/XjBO0x0XhGPvc9fjSIiUN0vw+0hq53BJ4eQSJKQ==", + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.6.1.tgz", + "integrity": "sha512-iH1B076HoAshH1mLpHMgwdGeTs0CYwL0SPMkGuSebZrwBp16v415e9NZXg2jtrqPVQjf6IANe2Vtlr5KswtcZQ==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^1.1.0", + "@eslint/core": "^1.1.1", "levn": "^0.4.1" }, "engines": { @@ -1512,9 +1512,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "25.3.3", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.3.tgz", - "integrity": "sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==", + "version": "25.3.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.5.tgz", + "integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==", "license": "MIT", "dependencies": { "undici-types": "~7.18.0" @@ -3337,18 +3337,18 @@ } }, "node_modules/eslint": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-10.0.2.tgz", - "integrity": "sha512-uYixubwmqJZH+KLVYIVKY1JQt7tysXhtj21WSvjcSmU5SVNzMus1bgLe+pAt816yQ8opKfheVVoPLqvVMGejYw==", + "version": "10.0.3", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-10.0.3.tgz", + "integrity": "sha512-COV33RzXZkqhG9P2rZCFl9ZmJ7WL+gQSCRzE7RhkbclbQPtLAWReL7ysA0Sh4c8Im2U9ynybdR56PV0XcKvqaQ==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.2", - "@eslint/config-array": "^0.23.2", + "@eslint/config-array": "^0.23.3", "@eslint/config-helpers": "^0.5.2", - "@eslint/core": "^1.1.0", - "@eslint/plugin-kit": "^0.6.0", + "@eslint/core": "^1.1.1", + "@eslint/plugin-kit": "^0.6.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2", @@ -3357,7 +3357,7 @@ "cross-spawn": "^7.0.6", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", - "eslint-scope": "^9.1.1", + "eslint-scope": "^9.1.2", "eslint-visitor-keys": "^5.0.1", "espree": "^11.1.1", "esquery": "^1.7.0", @@ -3370,7 +3370,7 @@ "imurmurhash": "^0.1.4", "is-glob": "^4.0.0", "json-stable-stringify-without-jsonify": "^1.0.1", - "minimatch": "^10.2.1", + "minimatch": "^10.2.4", "natural-compare": "^1.4.0", "optionator": "^0.9.3" }, @@ -3409,9 +3409,9 @@ } }, "node_modules/eslint-scope": { - "version": "9.1.1", - "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-9.1.1.tgz", - "integrity": "sha512-GaUN0sWim5qc8KVErfPBWmc31LEsOkrUJbvJZV+xuL3u2phMUK4HIvXlWAakfC8W4nzlK+chPEAkYOYb5ZScIw==", + "version": "9.1.2", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-9.1.2.tgz", + "integrity": "sha512-xS90H51cKw0jltxmvmHy2Iai1LIqrfbw57b79w/J7MfvDfkIkFZ+kj6zC3BjtUwh150HsSSdxXZcsuv72miDFQ==", "dev": true, "license": "BSD-2-Clause", "dependencies": { @@ -3683,12 +3683,12 @@ } }, "node_modules/express-rate-limit": { - "version": "8.2.1", - "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.2.1.tgz", - "integrity": "sha512-PCZEIEIxqwhzw4KF0n7QF4QqruVTcF73O5kFKUnGOyjbCCgizBBiFaYpd/fnBLUMPw/BWw9OsiN7GgrNYr7j6g==", + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.0.tgz", + "integrity": "sha512-KJzBawY6fB9FiZGdE/0aftepZ91YlaGIrV8vgblRM3J8X+dHx/aiowJWwkx6LIGyuqGiANsjSwwrbb8mifOJ4Q==", "license": "MIT", "dependencies": { - "ip-address": "10.0.1" + "ip-address": "10.1.0" }, "engines": { "node": ">= 16" @@ -4211,9 +4211,9 @@ "license": "ISC" }, "node_modules/ip-address": { - "version": "10.0.1", - "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.0.1.tgz", - "integrity": "sha512-NWv9YLW4PoW2B7xtzaS3NCot75m6nK7Icdv0o3lfMceJVRfSoQwqD4wEH5rLwoKJwUiZ/rfpiVBhnaF0FK4HoA==", + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", "license": "MIT", "engines": { "node": ">= 12" @@ -5211,9 +5211,9 @@ } }, "node_modules/openai": { - "version": "6.25.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-6.25.0.tgz", - "integrity": "sha512-mEh6VZ2ds2AGGokWARo18aPISI1OhlgdEIC1ewhkZr8pSIT31dec0ecr9Nhxx0JlybyOgoAT1sWeKtwPZzJyww==", + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", "license": "Apache-2.0", "bin": { "openai": "bin/cli" @@ -6383,6 +6383,9 @@ "win32" ] }, + "node_modules/sqlite-vec/node_modules/sqlite-vec-linux-arm64": { + "optional": true + }, "node_modules/stackback": { "version": "0.0.2", "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", diff --git a/src/api/middleware.ts b/src/api/middleware.ts index 64d1597..c8629c8 100644 --- a/src/api/middleware.ts +++ b/src/api/middleware.ts @@ -130,7 +130,7 @@ export function checkApiKey(req: IncomingMessage, res: ServerResponse): boolean const keyBuf = Buffer.alloc(COMPARE_LEN); Buffer.from(token).copy(tokenBuf); Buffer.from(apiKey).copy(keyBuf); - if (token.length !== apiKey.length || !timingSafeEqual(tokenBuf, keyBuf)) { + if (!timingSafeEqual(tokenBuf, keyBuf)) { sendError(res, 401, "UNAUTHORIZED", "Invalid API key"); return false; } diff --git a/src/cli/index.ts b/src/cli/index.ts index b92bc7f..6daa1ba 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -59,7 +59,7 @@ import { createPackFromSource, } from "../core/packs.js"; -import { execSync } from "node:child_process"; +import { spawnSync } from "node:child_process"; import { createRequire } from "node:module"; import { FileWatcher, DEFAULT_WATCH_EXTENSIONS } from "../core/watcher.js"; import { indexRepository, parseRepoUrl } from "../core/repo.js"; @@ -2290,17 +2290,29 @@ program console.log(`Current version: ${currentVersion}`); try { - const latest = execSync("npm view libscope version", { encoding: "utf-8" }).trim(); + const versionResult = spawnSync("npm", ["view", "libscope", "version"], { + encoding: "utf-8", + }); + if (versionResult.error || versionResult.status !== 0) { + throw new Error(versionResult.stderr?.trim() || "Failed to check latest version"); + } + const latest = versionResult.stdout.trim(); if (latest === currentVersion) { console.log("✓ Already up to date."); return; } console.log(`Latest version: ${latest}`); console.log("Updating..."); - execSync("npm install -g libscope@latest", { stdio: "inherit" }); + const installResult = spawnSync("npm", ["install", "-g", "libscope@latest"], { + stdio: "inherit", + }); + if (installResult.error || installResult.status !== 0) { + throw new Error("npm install failed"); + } console.log(`✓ Updated to ${latest}`); - } catch { + } catch (err) { console.error("Failed to update. Try manually: npm install -g libscope@latest"); + if (err instanceof Error) console.error(err.message); process.exit(1); } }); diff --git a/src/config.ts b/src/config.ts index 2815b14..ffab3da 100644 --- a/src/config.ts +++ b/src/config.ts @@ -156,6 +156,18 @@ export function loadConfig(): LibScopeConfig { const projectConfig = loadJsonFile(getProjectConfigPath()); const envOverrides = getEnvOverrides(); + if ( + userConfig.embedding?.openaiApiKey || + userConfig.llm?.openaiApiKey || + userConfig.llm?.anthropicApiKey + ) { + getLogger().warn( + "API keys found in config file (~/.libscope/config.json). " + + "This is deprecated — please use environment variables (OPENAI_API_KEY, ANTHROPIC_API_KEY) instead. " + + "Keys in the config file will no longer be written back after the next save.", + ); + } + const config: LibScopeConfig = { embedding: { ...DEFAULT_CONFIG.embedding, @@ -286,6 +298,15 @@ export function saveUserConfig(config: Partial): void { ...config.logging, }, }; + + // Security: never persist API keys to disk — use environment variables instead. + // Keys are read from env vars (OPENAI_API_KEY, ANTHROPIC_API_KEY) at runtime. + delete merged.embedding.openaiApiKey; + if (merged.llm) { + delete merged.llm.openaiApiKey; + delete merged.llm.anthropicApiKey; + } + writeFileSync(getUserConfigPath(), JSON.stringify(merged, null, 2), "utf-8"); invalidateConfigCache(); } diff --git a/src/core/analytics.ts b/src/core/analytics.ts index 1a185ab..b226340 100644 --- a/src/core/analytics.ts +++ b/src/core/analytics.ts @@ -1,7 +1,9 @@ import type Database from "better-sqlite3"; import { randomUUID } from "node:crypto"; import { statSync } from "node:fs"; +import { z } from "zod"; import { getLogger } from "../logger.js"; +import { validateRow, validateRows } from "../db/validate.js"; export interface SearchLogEntry { query: string; @@ -73,9 +75,18 @@ export function logSearch(db: Database.Database, entry: SearchLogEntry): string /** Return overview stats for the knowledge base. */ export function getStats(db: Database.Database, dbPath?: string): OverviewStats { - const row = db - .prepare( - ` + const StatsRowSchema = z.object({ + doc_count: z.number(), + chunk_count: z.number(), + topic_count: z.number(), + search_count: z.number(), + avg_latency: z.number().nullable(), + }); + const row = validateRow( + StatsRowSchema, + db + .prepare( + ` SELECT (SELECT COUNT(*) FROM documents) AS doc_count, (SELECT COUNT(*) FROM chunks) AS chunk_count, @@ -83,14 +94,10 @@ export function getStats(db: Database.Database, dbPath?: string): OverviewStats (SELECT COUNT(*) FROM search_log) AS search_count, (SELECT AVG(latency_ms) FROM search_log) AS avg_latency `, - ) - .get() as { - doc_count: number; - chunk_count: number; - topic_count: number; - search_count: number; - avg_latency: number | null; - }; + ) + .get(), + "getStats.row", + ); let databaseSizeBytes = 0; if (dbPath) { @@ -113,23 +120,40 @@ export function getStats(db: Database.Database, dbPath?: string): OverviewStats /** Return the most frequently returned documents in search results. */ export function getPopularDocuments(db: Database.Database, limit = 10): PopularDocument[] { - return db - .prepare( - `SELECT dh.document_id AS documentId, d.title, COUNT(*) AS hitCount + const PopularDocSchema = z.object({ + documentId: z.string(), + title: z.string(), + hitCount: z.number(), + }); + return validateRows( + PopularDocSchema, + db + .prepare( + `SELECT dh.document_id AS documentId, d.title, COUNT(*) AS hitCount FROM document_hits dh JOIN documents d ON d.id = dh.document_id GROUP BY dh.document_id ORDER BY hitCount DESC LIMIT ?`, - ) - .all(limit) as PopularDocument[]; + ) + .all(limit), + "getPopularDocuments.rows", + ); } /** Return documents that have never appeared in search results within the last N days. */ export function getStaleDocuments(db: Database.Database, days = 90): StaleDocument[] { - return db - .prepare( - `SELECT d.id AS documentId, d.title, d.created_at AS createdAt, d.updated_at AS updatedAt + const StaleDocSchema = z.object({ + documentId: z.string(), + title: z.string(), + createdAt: z.string(), + updatedAt: z.string(), + }); + return validateRows( + StaleDocSchema, + db + .prepare( + `SELECT d.id AS documentId, d.title, d.created_at AS createdAt, d.updated_at AS updatedAt FROM documents d WHERE d.id NOT IN ( SELECT DISTINCT dh.document_id @@ -138,34 +162,50 @@ export function getStaleDocuments(db: Database.Database, days = 90): StaleDocume WHERE sl.created_at >= datetime('now', ?) ) ORDER BY d.updated_at ASC`, - ) - .all(`-${days} days`) as StaleDocument[]; + ) + .all(`-${days} days`), + "getStaleDocuments.rows", + ); } /** Return the most frequent search queries. */ export function getTopQueries(db: Database.Database, limit = 10): TopQuery[] { - return db - .prepare( - `SELECT query, COUNT(*) AS count, ROUND(AVG(latency_ms)) AS avgLatencyMs + const TopQuerySchema = z.object({ + query: z.string(), + count: z.number(), + avgLatencyMs: z.number(), + }); + return validateRows( + TopQuerySchema, + db + .prepare( + `SELECT query, COUNT(*) AS count, ROUND(AVG(latency_ms)) AS avgLatencyMs FROM search_log GROUP BY query ORDER BY count DESC LIMIT ?`, - ) - .all(limit) as TopQuery[]; + ) + .all(limit), + "getTopQueries.rows", + ); } /** Return search counts per day for the last N days. */ export function getSearchTrends(db: Database.Database, days = 30): SearchTrend[] { - return db - .prepare( - `SELECT DATE(created_at) AS date, COUNT(*) AS count + const SearchTrendSchema = z.object({ date: z.string(), count: z.number() }); + return validateRows( + SearchTrendSchema, + db + .prepare( + `SELECT DATE(created_at) AS date, COUNT(*) AS count FROM search_log WHERE created_at >= datetime('now', ?) GROUP BY DATE(created_at) ORDER BY date ASC`, - ) - .all(`-${days} days`) as SearchTrend[]; + ) + .all(`-${days} days`), + "getSearchTrends.rows", + ); } // --- Search-query analytics (search_queries table, migration v11) --- @@ -200,36 +240,55 @@ export interface SearchAnalytics { export function getSearchAnalytics(db: Database.Database, days = 30): SearchAnalytics { const since = `-${days} days`; - const totals = db - .prepare( - `SELECT COUNT(*) AS total, AVG(result_count) AS avg_results + const TotalsSchema = z.object({ total: z.number(), avg_results: z.number().nullable() }); + const totals = validateRow( + TotalsSchema, + db + .prepare( + `SELECT COUNT(*) AS total, AVG(result_count) AS avg_results FROM search_queries WHERE created_at >= datetime('now', ?)`, - ) - .get(since) as { total: number; avg_results: number | null }; + ) + .get(since), + "getSearchAnalytics.totals", + ); - const topQueries = db - .prepare( - `SELECT query, COUNT(*) AS count FROM search_queries + const QueryCountSchema = z.object({ query: z.string(), count: z.number() }); + const topQueries = validateRows( + QueryCountSchema, + db + .prepare( + `SELECT query, COUNT(*) AS count FROM search_queries WHERE created_at >= datetime('now', ?) GROUP BY query ORDER BY count DESC LIMIT 10`, - ) - .all(since) as Array<{ query: string; count: number }>; + ) + .all(since), + "getSearchAnalytics.topQueries", + ); - const zeroResultQueries = db - .prepare( - `SELECT query, COUNT(*) AS count FROM search_queries + const zeroResultQueries = validateRows( + QueryCountSchema, + db + .prepare( + `SELECT query, COUNT(*) AS count FROM search_queries WHERE result_count = 0 AND created_at >= datetime('now', ?) GROUP BY query ORDER BY count DESC LIMIT 10`, - ) - .all(since) as Array<{ query: string; count: number }>; + ) + .all(since), + "getSearchAnalytics.zeroResultQueries", + ); - const queriesPerDay = db - .prepare( - `SELECT DATE(created_at) AS date, COUNT(*) AS count FROM search_queries + const DateCountSchema = z.object({ date: z.string(), count: z.number() }); + const queriesPerDay = validateRows( + DateCountSchema, + db + .prepare( + `SELECT DATE(created_at) AS date, COUNT(*) AS count FROM search_queries WHERE created_at >= datetime('now', ?) GROUP BY DATE(created_at) ORDER BY date ASC`, - ) - .all(since) as Array<{ date: string; count: number }>; + ) + .all(since), + "getSearchAnalytics.queriesPerDay", + ); return { totalSearches: totals.total, @@ -248,14 +307,23 @@ export interface KnowledgeGap { /** Identify knowledge gaps: queries that consistently return zero results. */ export function getKnowledgeGaps(db: Database.Database, days = 30): KnowledgeGap[] { - return db - .prepare( - `SELECT query, COUNT(*) AS count, MAX(created_at) AS lastSearched + const KnowledgeGapSchema = z.object({ + query: z.string(), + count: z.number(), + lastSearched: z.string(), + }); + return validateRows( + KnowledgeGapSchema, + db + .prepare( + `SELECT query, COUNT(*) AS count, MAX(created_at) AS lastSearched FROM search_queries WHERE result_count = 0 AND created_at >= datetime('now', ?) GROUP BY query ORDER BY count DESC LIMIT 20`, - ) - .all(`-${days} days`) as KnowledgeGap[]; + ) + .all(`-${days} days`), + "getKnowledgeGaps.rows", + ); } diff --git a/src/core/ratings.ts b/src/core/ratings.ts index c345006..b7a9e52 100644 --- a/src/core/ratings.ts +++ b/src/core/ratings.ts @@ -1,6 +1,25 @@ import type Database from "better-sqlite3"; import { randomUUID } from "node:crypto"; +import { z } from "zod"; import { ValidationError, DocumentNotFoundError } from "../errors.js"; +import { validateRow, validateRows } from "../db/validate.js"; + +const RatingSummaryRowSchema = z.object({ + avg_rating: z.number().nullable(), + total: z.number(), + corrections: z.number(), +}); + +const RatingRowSchema = z.object({ + id: z.string(), + document_id: z.string(), + chunk_id: z.string().nullable(), + rating: z.number(), + feedback: z.string().nullable(), + suggested_correction: z.string().nullable(), + rated_by: z.string(), + created_at: z.string(), +}); export interface RateDocumentInput { documentId: string; @@ -36,9 +55,7 @@ export function rateDocument(db: Database.Database, input: RateDocumentInput): R } // Verify document exists - const doc = db.prepare("SELECT id FROM documents WHERE id = ?").get(input.documentId) as - | { id: string } - | undefined; + const doc = db.prepare("SELECT id FROM documents WHERE id = ?").get(input.documentId); if (!doc) { throw new DocumentNotFoundError(input.documentId); } @@ -47,7 +64,7 @@ export function rateDocument(db: Database.Database, input: RateDocumentInput): R if (input.chunkId) { const chunk = db .prepare("SELECT id FROM chunks WHERE id = ? AND document_id = ?") - .get(input.chunkId, input.documentId) as { id: string } | undefined; + .get(input.chunkId, input.documentId); if (!chunk) { throw new ValidationError( `Chunk '${input.chunkId}' not found for document '${input.documentId}'`, @@ -87,16 +104,16 @@ export function rateDocument(db: Database.Database, input: RateDocumentInput): R /** Get rating summary for a document. */ export function getDocumentRatings(db: Database.Database, documentId: string): RatingSummary { - const doc = db.prepare("SELECT id FROM documents WHERE id = ?").get(documentId) as - | { id: string } - | undefined; + const doc = db.prepare("SELECT id FROM documents WHERE id = ?").get(documentId); if (!doc) { throw new DocumentNotFoundError(documentId); } - const summary = db - .prepare( - ` + const summary = validateRow( + RatingSummaryRowSchema, + db + .prepare( + ` SELECT AVG(rating) AS avg_rating, COUNT(*) AS total, @@ -104,8 +121,10 @@ export function getDocumentRatings(db: Database.Database, documentId: string): R FROM ratings WHERE document_id = ? `, - ) - .get(documentId) as { avg_rating: number | null; total: number; corrections: number }; + ) + .get(documentId), + "getDocumentRatings", + ); return { documentId, @@ -117,25 +136,20 @@ export function getDocumentRatings(db: Database.Database, documentId: string): R /** Get all ratings for a document. */ export function listRatings(db: Database.Database, documentId: string): Rating[] { - const rows = db - .prepare( - ` + const rows = validateRows( + RatingRowSchema, + db + .prepare( + ` SELECT id, document_id, chunk_id, rating, feedback, suggested_correction, rated_by, created_at FROM ratings WHERE document_id = ? ORDER BY created_at DESC `, - ) - .all(documentId) as Array<{ - id: string; - document_id: string; - chunk_id: string | null; - rating: number; - feedback: string | null; - suggested_correction: string | null; - rated_by: string; - created_at: string; - }>; + ) + .all(documentId), + "listRatings", + ); return rows.map((r) => ({ id: r.id, diff --git a/src/core/search.ts b/src/core/search.ts index 70d1edd..9043776 100644 --- a/src/core/search.ts +++ b/src/core/search.ts @@ -1,7 +1,9 @@ import type Database from "better-sqlite3"; import type { EmbeddingProvider } from "../providers/embedding.js"; +import { z } from "zod"; import { withCorrelationId, createChildLogger } from "../logger.js"; import { validateCountRow } from "../utils/db-validation.js"; +import { validateRow, validateRows } from "../db/validate.js"; import { logSearch, recordSearchQuery } from "./analytics.js"; import { performance } from "node:perf_hooks"; @@ -100,12 +102,6 @@ export interface SearchResult { contextAfter?: ContextChunk[] | undefined; } -interface ChunkRow { - id: string; - content: string; - chunk_index: number; -} - // --------------------------------------------------------------------------- // Title boost multiplier: chunks whose document title contains any query word // receive this multiplicative boost to their final score. @@ -141,7 +137,8 @@ function reciprocalRankFusion(listA: SearchResult[], listB: SearchResult[]): Sea const map = new Map(); for (let i = 0; i < listA.length; i++) { - const r = listA[i]!; + const r = listA[i]; + if (r === undefined) continue; const key = r.chunkId; const existing = map.get(key); if (existing) { @@ -152,7 +149,8 @@ function reciprocalRankFusion(listA: SearchResult[], listB: SearchResult[]): Sea } for (let i = 0; i < listB.length; i++) { - const r = listB[i]!; + const r = listB[i]; + if (r === undefined) continue; const key = r.chunkId; const existing = map.get(key); if (existing) { @@ -202,29 +200,43 @@ function fetchContextChunks( documentId: string, contextSize: number, ): { before: ContextChunk[]; after: ContextChunk[] } { - const currentRow = db - .prepare(`SELECT chunk_index FROM chunks WHERE id = ? AND document_id = ?`) - .get(chunkId, documentId) as { chunk_index: number } | undefined; + const CurrentRowSchema = z.object({ chunk_index: z.number() }).optional(); + const currentRow = validateRow( + CurrentRowSchema, + db + .prepare(`SELECT chunk_index FROM chunks WHERE id = ? AND document_id = ?`) + .get(chunkId, documentId), + "fetchContextChunks.currentRow", + ); if (!currentRow) return { before: [], after: [] }; const idx = currentRow.chunk_index; - const beforeRows = db - .prepare( - `SELECT id, content, chunk_index FROM chunks + const ChunkRowSchema = z.object({ id: z.string(), content: z.string(), chunk_index: z.number() }); + const beforeRows = validateRows( + ChunkRowSchema, + db + .prepare( + `SELECT id, content, chunk_index FROM chunks WHERE document_id = ? AND chunk_index >= ? AND chunk_index < ? ORDER BY chunk_index ASC`, - ) - .all(documentId, Math.max(0, idx - contextSize), idx) as ChunkRow[]; + ) + .all(documentId, Math.max(0, idx - contextSize), idx), + "fetchContextChunks.beforeRows", + ); - const afterRows = db - .prepare( - `SELECT id, content, chunk_index FROM chunks + const afterRows = validateRows( + ChunkRowSchema, + db + .prepare( + `SELECT id, content, chunk_index FROM chunks WHERE document_id = ? AND chunk_index > ? AND chunk_index <= ? ORDER BY chunk_index ASC`, - ) - .all(documentId, idx, idx + contextSize) as ChunkRow[]; + ) + .all(documentId, idx, idx + contextSize), + "fetchContextChunks.afterRows", + ); return { before: beforeRows.map((r) => ({ @@ -284,14 +296,17 @@ function applyMMR(results: SearchResult[], diversity: number): SearchResult[] { const selected: SearchResult[] = []; const remaining = [...results]; - selected.push(remaining.shift()!); + const first = remaining.shift(); + if (!first) return selected; + selected.push(first); while (remaining.length > 0) { let bestIdx = 0; let bestMmrScore = -Infinity; for (let i = 0; i < remaining.length; i++) { - const candidate = remaining[i]!; + const candidate = remaining[i]; + if (candidate === undefined) continue; let maxSim = 0; for (const sel of selected) { const sim = 1 - Math.abs(candidate.score - sel.score); @@ -304,7 +319,9 @@ function applyMMR(results: SearchResult[], diversity: number): SearchResult[] { } } - selected.push(remaining.splice(bestIdx, 1)[0]!); + const picked = remaining.splice(bestIdx, 1)[0]; + if (picked === undefined) break; + selected.push(picked); } return selected; @@ -602,19 +619,20 @@ function vectorSearch( sql += ` ORDER BY candidates.distance`; - const rows = db.prepare(sql).all(...params) as Array<{ - chunk_id: string; - distance: number; - document_id: string; - chunk_content: string; - title: string; - source_type: string; - library: string | null; - version: string | null; - topic_id: string | null; - url: string | null; - avg_rating: number | null; - }>; + const VectorRowSchema = z.object({ + chunk_id: z.string(), + distance: z.number(), + document_id: z.string(), + chunk_content: z.string(), + title: z.string(), + source_type: z.string(), + library: z.string().nullable(), + version: z.string().nullable(), + topic_id: z.string().nullable(), + url: z.string().nullable(), + avg_rating: z.number().nullable().optional(), + }); + const rows = validateRows(VectorRowSchema, db.prepare(sql).all(...params), "vectorSearch.rows"); // totalCount: if we got fewer rows than the ANN candidate limit, we know // the true total (all candidates survived filtering). Otherwise the real @@ -637,7 +655,7 @@ function vectorSearch( topicId: row.topic_id, url: row.url, score: similarity, - avgRating: needsRatingJoin ? row.avg_rating : null, + avgRating: row.avg_rating ?? null, scoreExplanation: { method: "vector" as SearchMethod, rawScore: row.distance, @@ -709,18 +727,19 @@ function keywordSearch( params.push(limit); params.push(offset); - const rows = db.prepare(sql).all(...params) as Array<{ - chunk_id: string; - document_id: string; - chunk_content: string; - title: string; - source_type: string; - library: string | null; - version: string | null; - topic_id: string | null; - url: string | null; - avg_rating: number | null; - }>; + const KeywordRowSchema = z.object({ + chunk_id: z.string(), + document_id: z.string(), + chunk_content: z.string(), + title: z.string(), + source_type: z.string(), + library: z.string().nullable(), + version: z.string().nullable(), + topic_id: z.string().nullable(), + url: z.string().nullable(), + avg_rating: z.number().nullable().optional(), + }); + const rows = validateRows(KeywordRowSchema, db.prepare(sql).all(...params), "keywordSearch.rows"); const totalCount = lazyCount( db, @@ -747,7 +766,7 @@ function keywordSearch( topicId: row.topic_id, url: row.url, score: rankScore, - avgRating: needsRatingJoin ? row.avg_rating : null, + avgRating: row.avg_rating ?? null, scoreExplanation: { method: "keyword" as SearchMethod, rawScore: rankScore, @@ -784,14 +803,19 @@ function attachRatings(db: Database.Database, results: SearchResult[]): SearchRe if (results.length === 0) return results; const ids = [...new Set(results.map((r) => r.documentId))]; const placeholders = ids.map(() => "?").join(", "); - const rows = db - .prepare( - `SELECT document_id, AVG(rating) AS avg_rating + const RatingRowSchema = z.object({ document_id: z.string(), avg_rating: z.number().nullable() }); + const rows = validateRows( + RatingRowSchema, + db + .prepare( + `SELECT document_id, AVG(rating) AS avg_rating FROM ratings WHERE document_id IN (${placeholders}) GROUP BY document_id`, - ) - .all(...ids) as Array<{ document_id: string; avg_rating: number | null }>; + ) + .all(...ids), + "attachRatings.rows", + ); const ratingMap = new Map(rows.map((r) => [r.document_id, r.avg_rating])); return results.map((r) => ({ ...r, avgRating: ratingMap.get(r.documentId) ?? null })); } @@ -852,19 +876,20 @@ function fts5Search( params.push(limit); params.push(offset); - let rows = db.prepare(sql).all(...params) as Array<{ - chunk_id: string; - document_id: string; - chunk_content: string; - title: string; - source_type: string; - library: string | null; - version: string | null; - topic_id: string | null; - url: string | null; - fts_rank: number; - avg_rating: number | null; - }>; + const Fts5RowSchema = z.object({ + chunk_id: z.string(), + document_id: z.string(), + chunk_content: z.string(), + title: z.string(), + source_type: z.string(), + library: z.string().nullable(), + version: z.string().nullable(), + topic_id: z.string().nullable(), + url: z.string().nullable(), + fts_rank: z.number(), + avg_rating: z.number().nullable().optional(), + }); + let rows = validateRows(Fts5RowSchema, db.prepare(sql).all(...params), "fts5Search.rows"); // If AND returned nothing, retry with OR for recall if (rows.length === 0 && words.length > 1) { @@ -905,7 +930,7 @@ function fts5Search( orParams.push(limit); orParams.push(offset); - rows = db.prepare(orSql).all(...orParams) as typeof rows; + rows = validateRows(Fts5RowSchema, db.prepare(orSql).all(...orParams), "fts5Search.orRows"); } const totalCount = lazyCount( @@ -933,7 +958,7 @@ function fts5Search( topicId: row.topic_id, url: row.url, score: bm25Score, - avgRating: needsRatingJoin ? row.avg_rating : null, + avgRating: row.avg_rating ?? null, scoreExplanation: { method: "fts5" as SearchMethod, rawScore: row.fts_rank, diff --git a/src/core/tags.ts b/src/core/tags.ts index f37180d..1dc38e1 100644 --- a/src/core/tags.ts +++ b/src/core/tags.ts @@ -1,9 +1,53 @@ import type Database from "better-sqlite3"; import { randomUUID } from "node:crypto"; +import { z } from "zod"; import { DocumentNotFoundError, ValidationError } from "../errors.js"; +import { validateRow, validateRows } from "../db/validate.js"; import { createChildLogger } from "../logger.js"; import type { Document } from "./documents.js"; +const TagRowSchema = z.object({ + id: z.string(), + name: z.string(), + created_at: z.string(), +}); + +const TagWithCountRowSchema = z.object({ + id: z.string(), + name: z.string(), + created_at: z.string(), + document_count: z.number(), +}); + +const DocTagRowSchema = z.object({ + document_id: z.string(), + id: z.string(), + name: z.string(), + created_at: z.string(), +}); + +const DocumentRowSchema = z.object({ + id: z.string(), + source_type: z.string(), + library: z.string().nullable(), + version: z.string().nullable(), + topic_id: z.string().nullable(), + title: z.string(), + content: z.string(), + url: z.string().nullable(), + content_hash: z.string().nullable(), + submitted_by: z.string(), + created_at: z.string(), + updated_at: z.string(), +}); + +const NameRowSchema = z.object({ name: z.string() }); + +const TitleContentRowSchema = z.object({ + title: z.string(), + content: z.string(), +}); + const STOPWORDS = new Set([ "the", "a", @@ -138,13 +182,11 @@ export function createTag(db: Database.Database, name: string): Tag { return { id, name: trimmed, createdAt: new Date().toISOString() }; } - const existing = db - .prepare("SELECT id, name, created_at FROM tags WHERE name = ?") - .get(trimmed) as { - id: string; - name: string; - created_at: string; - }; + const existing = validateRow( + TagRowSchema, + db.prepare("SELECT id, name, created_at FROM tags WHERE name = ?").get(trimmed), + "createTag.existing", + ); log.info({ name: trimmed }, "Tag already exists, returning existing"); return { id: existing.id, name: existing.name, createdAt: existing.created_at }; @@ -159,20 +201,19 @@ export function deleteTag(db: Database.Database, tagId: string): void { /** List all tags with their document counts. */ export function listTags(db: Database.Database): TagWithCount[] { - const rows = db - .prepare( - `SELECT t.id, t.name, t.created_at, COUNT(dt.document_id) AS document_count + const rows = validateRows( + TagWithCountRowSchema, + db + .prepare( + `SELECT t.id, t.name, t.created_at, COUNT(dt.document_id) AS document_count FROM tags t LEFT JOIN document_tags dt ON dt.tag_id = t.id GROUP BY t.id ORDER BY t.name`, - ) - .all() as Array<{ - id: string; - name: string; - created_at: string; - document_count: number; - }>; + ) + .all(), + "listTags", + ); return rows.map((row) => ({ id: row.id, @@ -229,20 +270,19 @@ export function getDocumentTagsBatch( ): Map { if (documentIds.length === 0) return new Map(); const placeholders = documentIds.map(() => "?").join(", "); - const rows = db - .prepare( - `SELECT dt.document_id, t.id, t.name, t.created_at + const rows = validateRows( + DocTagRowSchema, + db + .prepare( + `SELECT dt.document_id, t.id, t.name, t.created_at FROM tags t JOIN document_tags dt ON dt.tag_id = t.id WHERE dt.document_id IN (${placeholders}) ORDER BY t.name`, - ) - .all(...documentIds) as Array<{ - document_id: string; - id: string; - name: string; - created_at: string; - }>; + ) + .all(...documentIds), + "getDocumentTagsBatch", + ); const result = new Map(); for (const row of rows) { @@ -255,19 +295,19 @@ export function getDocumentTagsBatch( /** Get all tags for a specific document. */ export function getDocumentTags(db: Database.Database, documentId: string): Tag[] { - const rows = db - .prepare( - `SELECT t.id, t.name, t.created_at + const rows = validateRows( + TagRowSchema, + db + .prepare( + `SELECT t.id, t.name, t.created_at FROM tags t JOIN document_tags dt ON dt.tag_id = t.id WHERE dt.document_id = ? ORDER BY t.name`, - ) - .all(documentId) as Array<{ - id: string; - name: string; - created_at: string; - }>; + ) + .all(documentId), + "getDocumentTags", + ); return rows.map((row) => ({ id: row.id, @@ -310,20 +350,7 @@ export function getDocumentsByTag( `; const params = [...normalized, normalized.length, limit, offset]; - const rows = db.prepare(sql).all(...params) as Array<{ - id: string; - source_type: string; - library: string | null; - version: string | null; - topic_id: string | null; - title: string; - content: string; - url: string | null; - content_hash: string | null; - submitted_by: string; - created_at: string; - updated_at: string; - }>; + const rows = validateRows(DocumentRowSchema, db.prepare(sql).all(...params), "getDocumentsByTag"); log.info({ tagNames: normalized, resultCount: rows.length }, "Documents retrieved by tags"); @@ -385,13 +412,12 @@ export function suggestTags( const log = createChildLogger({ operation: "suggestTags" }); const limit = maxSuggestions ?? 5; - const row = db.prepare("SELECT title, content FROM documents WHERE id = ?").get(documentId) as - | { title: string; content: string } - | undefined; + const raw = db.prepare("SELECT title, content FROM documents WHERE id = ?").get(documentId); - if (!row) { + if (!raw) { throw new DocumentNotFoundError(documentId); } + const row = validateRow(TitleContentRowSchema, raw, "suggestTags.document"); const fullText = `${row.title} ${row.content}`; const tokens = tokenize(fullText); @@ -407,20 +433,26 @@ export function suggestTags( // Get existing tags already on this document (to exclude them) const existingTags = new Set( - ( + validateRows( + NameRowSchema, db .prepare( `SELECT t.name FROM tags t JOIN document_tags dt ON dt.tag_id = t.id WHERE dt.document_id = ?`, ) - .all(documentId) as Array<{ name: string }> + .all(documentId), + "suggestTags.existingTags", ).map((r) => r.name), ); // Get all known tags in the system for boosting const knownTags = new Set( - (db.prepare("SELECT name FROM tags").all() as Array<{ name: string }>).map((r) => r.name), + validateRows( + NameRowSchema, + db.prepare("SELECT name FROM tags").all(), + "suggestTags.knownTags", + ).map((r) => r.name), ); // Score each term: TF normalized + boost for known tags diff --git a/src/core/versioning.ts b/src/core/versioning.ts index 756fbf5..364a273 100644 --- a/src/core/versioning.ts +++ b/src/core/versioning.ts @@ -1,19 +1,55 @@ import type Database from "better-sqlite3"; import { randomUUID } from "node:crypto"; +import { z } from "zod"; import type { EmbeddingProvider } from "../providers/embedding.js"; import { DocumentNotFoundError } from "../errors.js"; import { getDocument, updateDocument } from "./documents.js"; import { getLogger } from "../logger.js"; +import { validateRow, validateRows } from "../db/validate.js"; export const MAX_VERSIONS_DEFAULT = 10; +export interface VersionMetadata { + library: string | null; + version: string | null; + url: string | null; + topicId: string | null; + sourceType: string; +} + +export const VersionMetadataSchema = z.object({ + library: z.string().nullable(), + version: z.string().nullable(), + url: z.string().nullable(), + topicId: z.string().nullable(), + sourceType: z.string(), +}); + +const MaxVersionRowSchema = z.object({ + max_version: z.number().nullable(), +}); + +const VersionRowSchema = z.object({ + id: z.string(), + document_id: z.string(), + version: z.number(), + title: z.string(), + content: z.string(), + metadata: z.string().nullable(), + created_at: z.string(), +}); + +const CountRowSchema = z.object({ + cnt: z.number(), +}); + export interface DocumentVersion { id: string; documentId: string; version: number; title: string; content: string; - metadata: Record | null; + metadata: VersionMetadata | null; createdAt: string; } @@ -23,12 +59,15 @@ export function saveVersion(db: Database.Database, documentId: string): Document const row = db .prepare("SELECT MAX(version) as max_version FROM document_versions WHERE document_id = ?") - .get(documentId) as { max_version: number | null } | undefined; + .get(documentId); - const nextVersion = (row?.max_version ?? 0) + 1; + const maxVersion = row + ? (validateRow(MaxVersionRowSchema, row, "saveVersion.maxVersion").max_version ?? 0) + : 0; + const nextVersion = maxVersion + 1; const id = randomUUID(); - const metadata: Record = { + const metadata: VersionMetadata = { library: doc.library, version: doc.version, url: doc.url, @@ -57,20 +96,16 @@ export function getVersionHistory(db: Database.Database, documentId: string): Do // Verify document exists getDocument(db, documentId); - const rows = db - .prepare( - `SELECT id, document_id, version, title, content, metadata, created_at + const rows = validateRows( + VersionRowSchema, + db + .prepare( + `SELECT id, document_id, version, title, content, metadata, created_at FROM document_versions WHERE document_id = ? ORDER BY version DESC`, - ) - .all(documentId) as Array<{ - id: string; - document_id: string; - version: number; - title: string; - content: string; - metadata: string | null; - created_at: string; - }>; + ) + .all(documentId), + "getVersionHistory", + ); return rows.map(mapRow); } @@ -81,27 +116,18 @@ export function getVersion( documentId: string, version: number, ): DocumentVersion { - const row = db + const raw = db .prepare( `SELECT id, document_id, version, title, content, metadata, created_at FROM document_versions WHERE document_id = ? AND version = ?`, ) - .get(documentId, version) as - | { - id: string; - document_id: string; - version: number; - title: string; - content: string; - metadata: string | null; - created_at: string; - } - | undefined; - - if (!row) { + .get(documentId, version); + + if (!raw) { throw new DocumentNotFoundError(`Version ${version} of document ${documentId}`); } + const row = validateRow(VersionRowSchema, raw, "getVersion"); return mapRow(row); } @@ -117,7 +143,7 @@ export async function rollbackToVersion( // Save current state as a new version before rollback saveVersion(db, documentId); - const metadata = target.metadata as Record | null; + const metadata = target.metadata; // Restore the document to the target version's state await updateDocument(db, provider, documentId, { @@ -141,11 +167,16 @@ export function pruneVersions( documentId: string, maxVersions: number = MAX_VERSIONS_DEFAULT, ): number { - const countResult = db + const raw = db .prepare(`SELECT COUNT(*) AS cnt FROM document_versions WHERE document_id = ?`) - .get(documentId) as { cnt: number } | undefined; + .get(documentId); - if (!countResult || countResult.cnt <= maxVersions) { + if (!raw) { + return 0; + } + const countResult = validateRow(CountRowSchema, raw, "pruneVersions.count"); + + if (countResult.cnt <= maxVersions) { return 0; } @@ -162,19 +193,12 @@ export function pruneVersions( return result.changes; } -function mapRow(row: { - id: string; - document_id: string; - version: number; - title: string; - content: string; - metadata: string | null; - created_at: string; -}): DocumentVersion { - let metadata: Record | null = null; +function mapRow(row: z.infer): DocumentVersion { + let metadata: VersionMetadata | null = null; if (row.metadata) { try { - metadata = JSON.parse(row.metadata) as Record; + const parsed: unknown = JSON.parse(row.metadata); + metadata = validateRow(VersionMetadataSchema, parsed, "document_versions.metadata"); } catch (err) { getLogger().warn( { err, versionId: row.id }, diff --git a/src/core/webhooks.ts b/src/core/webhooks.ts index c6c7e77..07e9209 100644 --- a/src/core/webhooks.ts +++ b/src/core/webhooks.ts @@ -64,6 +64,19 @@ export interface Webhook { failureCount: number; } +/** + * Payload sent to webhook subscribers. + * + * The `data` field shape varies by event type: + * - "document.created" / "document.updated" / "document.deleted": + * { documentId: string; title: string; library?: string; version?: string } + * - "document.rated": + * { documentId: string; rating: number; feedback?: string } + * - "search.executed": + * { query: string; resultCount: number; topicId?: string } + * + * Kept as Record to avoid a breaking change to the public API. + */ export interface WebhookPayload { event: WebhookEvent; timestamp: string; @@ -201,9 +214,10 @@ export async function createWebhook( validateEvents(events); await validateWebhookUrlSsrf(url); - if (!process.env[SECRET_KEY_ENV]) { - getLogger().warn( - "LIBSCOPE_SECRET_KEY is not set — webhook secrets stored in plaintext. Set this env var to enable at-rest encryption.", + if (secret && !process.env[SECRET_KEY_ENV]) { + throw new ValidationError( + "Cannot store webhook secret: LIBSCOPE_SECRET_KEY environment variable is not set. " + + "Set it to enable at-rest encryption, or register the webhook without a secret.", ); } @@ -261,6 +275,13 @@ export async function updateWebhook( validateEvents(updates.events); } + if (updates.secret && !process.env[SECRET_KEY_ENV]) { + throw new ValidationError( + "Cannot store webhook secret: LIBSCOPE_SECRET_KEY environment variable is not set. " + + "Set it to enable at-rest encryption, or update the webhook without a secret.", + ); + } + const url = updates.url ?? existing.url; const events = updates.events ?? existing.events; const secret = diff --git a/src/core/workspace.ts b/src/core/workspace.ts index c284769..cfb2c14 100644 --- a/src/core/workspace.ts +++ b/src/core/workspace.ts @@ -10,6 +10,7 @@ import { import { join } from "node:path"; import { homedir } from "node:os"; import { ValidationError } from "../errors.js"; +import { getLogger } from "../logger.js"; export interface Workspace { name: string; @@ -110,7 +111,11 @@ export function listWorkspaces(): Workspace[] { createdAt = (typeof metaObj.createdAt === "string" ? metaObj.createdAt : undefined) ?? statSync(wsPath).birthtime.toISOString(); - } catch { + } catch (err) { + getLogger().warn( + { err, path: metaPath }, + "Failed to parse workspace file — using defaults", + ); createdAt = statSync(wsPath).birthtime.toISOString(); } } else { @@ -144,8 +149,11 @@ export function getActiveWorkspace(): string { try { const config = JSON.parse(readFileSync(projectConfig, "utf-8")) as { workspace?: string }; if (config.workspace) return config.workspace; - } catch { - // ignore parse errors + } catch (err) { + getLogger().warn( + { err, path: projectConfig }, + "Failed to parse workspace file — using defaults", + ); } } @@ -154,8 +162,11 @@ export function getActiveWorkspace(): string { try { const active = readFileSync(activeFile, "utf-8").trim(); if (active) return active; - } catch { - // ignore read errors + } catch (err) { + getLogger().warn( + { err, path: activeFile }, + "Failed to read active workspace file — using defaults", + ); } } diff --git a/src/db/connection.ts b/src/db/connection.ts index 43a3853..ba49aed 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -23,10 +23,8 @@ export function getDatabase(dbPath?: string): Database.Database { if (db) { if (cachedPath && cachedPath !== resolvedPath) { - const log = getLogger(); - log.warn( - { existingPath: cachedPath, requestedPath: resolvedPath }, - "getDatabase() called with a different path than the existing connection; returning cached connection. Call closeDatabase() first to connect to a different database.", + throw new DatabaseError( + `getDatabase() called with path "${resolvedPath}" but a connection to "${cachedPath}" is already open. Call closeDatabase() first to switch databases.`, ); } return db; diff --git a/src/db/validate.ts b/src/db/validate.ts new file mode 100644 index 0000000..933985b --- /dev/null +++ b/src/db/validate.ts @@ -0,0 +1,14 @@ +import { z } from "zod"; +import { DatabaseError } from "../errors.js"; + +export function validateRow(schema: z.ZodType, row: unknown, context: string): T { + const result = schema.safeParse(row); + if (!result.success) { + throw new DatabaseError(`DB row validation failed in ${context}: ${result.error.message}`); + } + return result.data; +} + +export function validateRows(schema: z.ZodType, rows: unknown[], context: string): T[] { + return rows.map((row) => validateRow(schema, row, context)); +} diff --git a/tests/unit/api.test.ts b/tests/unit/api.test.ts index 984d71c..f703715 100644 --- a/tests/unit/api.test.ts +++ b/tests/unit/api.test.ts @@ -727,20 +727,25 @@ describe("API routes", () => { describe("Webhooks API", () => { it("should create a webhook via POST /api/v1/webhooks", async () => { - const req = createMockReq("POST", "/api/v1/webhooks", { - url: "https://example.com/hook", - events: ["document.created"], - secret: "my-secret", - }); - const { res, getStatus, getBody } = createMockRes(); - - await handleRequest(req, res, db, provider); - - expect(getStatus()).toBe(201); - const parsed = parseResponse(getBody()); - expect(parsed.data.url).toBe("https://example.com/hook"); - expect(parsed.data.hasSecret).toBe(true); - expect(parsed.data.secret).toBeUndefined(); + process.env.LIBSCOPE_SECRET_KEY = "test-key"; + try { + const req = createMockReq("POST", "/api/v1/webhooks", { + url: "https://example.com/hook", + events: ["document.created"], + secret: "my-secret", + }); + const { res, getStatus, getBody } = createMockRes(); + + await handleRequest(req, res, db, provider); + + expect(getStatus()).toBe(201); + const parsed = parseResponse(getBody()); + expect(parsed.data.url).toBe("https://example.com/hook"); + expect(parsed.data.hasSecret).toBe(true); + expect(parsed.data.secret).toBeUndefined(); + } finally { + delete process.env.LIBSCOPE_SECRET_KEY; + } }); it("should list webhooks via GET /api/v1/webhooks", async () => { @@ -899,6 +904,17 @@ describe("middleware — API key authentication", () => { expect(checkApiKey(req, res)).toBe(true); }); + it("should reject a token of different length than the API key", () => { + process.env.LIBSCOPE_API_KEY = "test-key"; + const req = createMockReq("GET", "/api/v1/health"); + req.headers.authorization = "Bearer short"; + const { res, getStatus, getBody } = createMockRes(); + expect(checkApiKey(req, res)).toBe(false); + expect(getStatus()).toBe(401); + const parsed = parseResponse(getBody()); + expect(parsed.error.code).toBe("UNAUTHORIZED"); + }); + it("should reject non-Bearer authorization schemes", () => { process.env.LIBSCOPE_API_KEY = "test-key"; const req = createMockReq("GET", "/api/v1/health"); diff --git a/tests/unit/config-save.test.ts b/tests/unit/config-save.test.ts new file mode 100644 index 0000000..f29867e --- /dev/null +++ b/tests/unit/config-save.test.ts @@ -0,0 +1,107 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { mkdirSync, readFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { randomUUID } from "node:crypto"; +import { initLogger } from "../../src/logger.js"; +import type { LibScopeConfig } from "../../src/config.js"; + +// Create a unique temp HOME for each test run — must be initialized before module load +let tempHome: string = join(tmpdir(), `libscope-config-save-test-${process.pid}`); +mkdirSync(tempHome, { recursive: true }); + +vi.mock("node:os", async (importOriginal) => { + const orig = await importOriginal(); + return { + ...orig, + homedir: (): string => tempHome, + }; +}); + +// Dynamic import after mock is set up +const { saveUserConfig, invalidateConfigCache } = await import("../../src/config.js"); + +function readSavedConfig(): LibScopeConfig { + const written = readFileSync(join(tempHome, ".libscope", "config.json"), "utf-8"); + return JSON.parse(written) as LibScopeConfig; +} + +describe("saveUserConfig credential stripping", () => { + beforeEach(() => { + initLogger("silent"); + tempHome = join(tmpdir(), `libscope-config-save-test-${randomUUID()}`); + mkdirSync(tempHome, { recursive: true }); + invalidateConfigCache(); + }); + + afterEach(() => { + try { + rmSync(tempHome, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } + }); + + it("should not persist embedding.openaiApiKey to disk", () => { + saveUserConfig({ embedding: { provider: "openai", openaiApiKey: "sk-test-key" } }); + + const written = readFileSync(join(tempHome, ".libscope", "config.json"), "utf-8"); + const parsed = readSavedConfig(); + + expect(parsed.embedding.provider).toBe("openai"); + expect(parsed.embedding.openaiApiKey).toBeUndefined(); + expect(written).not.toContain("sk-test-key"); + }); + + it("should not persist llm.openaiApiKey to disk", () => { + saveUserConfig({ llm: { provider: "openai", openaiApiKey: "sk-llm-key" } }); + + const written = readFileSync(join(tempHome, ".libscope", "config.json"), "utf-8"); + const parsed = readSavedConfig(); + + expect(parsed.llm?.openaiApiKey).toBeUndefined(); + expect(written).not.toContain("sk-llm-key"); + }); + + it("should not persist llm.anthropicApiKey to disk", () => { + saveUserConfig({ llm: { provider: "anthropic", anthropicApiKey: "sk-ant-key" } }); + + const written = readFileSync(join(tempHome, ".libscope", "config.json"), "utf-8"); + const parsed = readSavedConfig(); + + expect(parsed.llm?.anthropicApiKey).toBeUndefined(); + expect(written).not.toContain("sk-ant-key"); + }); + + it("should strip all credential fields simultaneously", () => { + saveUserConfig({ + embedding: { provider: "openai", openaiApiKey: "sk-embed" }, + llm: { provider: "openai", openaiApiKey: "sk-llm", anthropicApiKey: "sk-ant" }, + }); + + const written = readFileSync(join(tempHome, ".libscope", "config.json"), "utf-8"); + + expect(written).not.toContain("sk-embed"); + expect(written).not.toContain("sk-llm"); + expect(written).not.toContain("sk-ant"); + expect(written).not.toContain("ApiKey"); + }); + + it("should preserve non-credential config fields", () => { + saveUserConfig({ + embedding: { + provider: "openai", + openaiModel: "text-embedding-3-large", + openaiApiKey: "sk-test", + }, + logging: { level: "debug" }, + }); + + const parsed = readSavedConfig(); + + expect(parsed.embedding.provider).toBe("openai"); + expect(parsed.embedding.openaiModel).toBe("text-embedding-3-large"); + expect(parsed.logging.level).toBe("debug"); + expect(parsed.embedding.openaiApiKey).toBeUndefined(); + }); +}); diff --git a/tests/unit/webhooks.test.ts b/tests/unit/webhooks.test.ts index d759133..1c37167 100644 --- a/tests/unit/webhooks.test.ts +++ b/tests/unit/webhooks.test.ts @@ -52,13 +52,28 @@ describe("webhooks", () => { }); it("should create a webhook with a secret", async () => { - const webhook = await createWebhook( - db, - "https://example.com/hook", - ["document.created"], - "my-secret", - ); - expect(webhook.secret).toBe("my-secret"); + process.env.LIBSCOPE_SECRET_KEY = "test-key"; + try { + const webhook = await createWebhook( + db, + "https://example.com/hook", + ["document.created"], + "my-secret", + ); + expect(webhook.secret).toBeTruthy(); + } finally { + delete process.env.LIBSCOPE_SECRET_KEY; + } + }); + + it("should throw ValidationError when secret is provided but LIBSCOPE_SECRET_KEY is not set", async () => { + delete process.env.LIBSCOPE_SECRET_KEY; + await expect( + createWebhook(db, "https://example.com/hook", ["document.created"], "my-secret"), + ).rejects.toThrow(ValidationError); + await expect( + createWebhook(db, "https://example.com/hook", ["document.created"], "my-secret"), + ).rejects.toThrow(/LIBSCOPE_SECRET_KEY/); }); it("should create a webhook with multiple events", async () => { @@ -166,8 +181,24 @@ describe("webhooks", () => { it("should update webhook secret", async () => { const webhook = await createWebhook(db, "https://example.com/hook", ["document.created"]); - const updated = await updateWebhook(db, webhook.id, { secret: "new-secret" }); - expect(updated.secret).toBe("new-secret"); + process.env.LIBSCOPE_SECRET_KEY = "test-key"; + try { + const updated = await updateWebhook(db, webhook.id, { secret: "new-secret" }); + expect(updated.secret).toBeTruthy(); + } finally { + delete process.env.LIBSCOPE_SECRET_KEY; + } + }); + + it("should throw ValidationError when updating secret without LIBSCOPE_SECRET_KEY", async () => { + delete process.env.LIBSCOPE_SECRET_KEY; + const webhook = await createWebhook(db, "https://example.com/hook", ["document.created"]); + await expect(updateWebhook(db, webhook.id, { secret: "new-secret" })).rejects.toThrow( + ValidationError, + ); + await expect(updateWebhook(db, webhook.id, { secret: "new-secret" })).rejects.toThrow( + /LIBSCOPE_SECRET_KEY/, + ); }); it("should reject invalid URL on update", async () => { @@ -287,7 +318,9 @@ describe("webhooks", () => { const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200 }); vi.stubGlobal("fetch", mockFetch); + process.env.LIBSCOPE_SECRET_KEY = "test-key"; await createWebhook(db, "https://example.com/hook", ["document.created"], "my-secret"); + delete process.env.LIBSCOPE_SECRET_KEY; fireWebhooks(db, "document.created", { docId: "123" }); await vi.waitFor(() => { diff --git a/tests/unit/zod-validate.test.ts b/tests/unit/zod-validate.test.ts new file mode 100644 index 0000000..0993417 --- /dev/null +++ b/tests/unit/zod-validate.test.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from "vitest"; +import { z } from "zod"; +import { validateRow, validateRows } from "../../src/db/validate.js"; +import { DatabaseError } from "../../src/errors.js"; + +const TestSchema = z.object({ id: z.string(), count: z.number() }); + +describe("validateRow (Zod)", () => { + it("returns parsed data for a valid row", () => { + const result = validateRow(TestSchema, { id: "abc", count: 5 }, "test"); + expect(result).toEqual({ id: "abc", count: 5 }); + }); + + it("strips unknown keys", () => { + const result = validateRow(TestSchema, { id: "abc", count: 5, extra: true }, "test"); + expect(result).toEqual({ id: "abc", count: 5 }); + }); + + it("throws DatabaseError when a required field is missing", () => { + expect(() => validateRow(TestSchema, { id: "abc" }, "myContext")).toThrow(DatabaseError); + expect(() => validateRow(TestSchema, { id: "abc" }, "myContext")).toThrow("myContext"); + }); + + it("throws DatabaseError when a field has the wrong type", () => { + expect(() => validateRow(TestSchema, { id: "abc", count: "notANumber" }, "ctx")).toThrow( + DatabaseError, + ); + }); + + it("throws DatabaseError for null input", () => { + expect(() => validateRow(TestSchema, null, "ctx")).toThrow(DatabaseError); + }); + + it("throws DatabaseError for undefined input", () => { + expect(() => validateRow(TestSchema, undefined, "ctx")).toThrow(DatabaseError); + }); + + it("works with optional schema", () => { + const OptSchema = TestSchema.optional(); + expect(validateRow(OptSchema, undefined, "ctx")).toBeUndefined(); + expect(validateRow(OptSchema, { id: "x", count: 1 }, "ctx")).toEqual({ id: "x", count: 1 }); + }); +}); + +describe("validateRows (Zod)", () => { + it("returns parsed data for valid rows", () => { + const rows = [ + { id: "a", count: 1 }, + { id: "b", count: 2 }, + ]; + expect(validateRows(TestSchema, rows, "test")).toEqual(rows); + }); + + it("returns empty array for empty input", () => { + expect(validateRows(TestSchema, [], "test")).toEqual([]); + }); + + it("throws DatabaseError if any row is invalid", () => { + const rows = [{ id: "a", count: 1 }, { id: "b" }]; + expect(() => validateRows(TestSchema, rows, "ctx")).toThrow(DatabaseError); + }); +});