diff --git a/src/generator/WorkerPool.test.ts b/src/generator/WorkerPool.test.ts new file mode 100644 index 0000000..ff093c5 --- /dev/null +++ b/src/generator/WorkerPool.test.ts @@ -0,0 +1,177 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + computeShards, + generateDeterministicShards, + createShardTasks, + ShardRange, + ShardTask, +} from "./WorkerPool"; + +describe("WorkerPool Shard Computation", () => { + describe("computeShards", () => { + it("should divide 10 items across 2 workers evenly", () => { + const shards = computeShards(10, 2, "seed123"); + + expect(shards).toHaveLength(2); + expect(shards[0]).toEqual({ workerId: 0, start: 0, end: 5, count: 5 }); + expect(shards[1]).toEqual({ workerId: 1, start: 5, end: 10, count: 5 }); + }); + + it("should handle uneven division with remainder", () => { + const shards = computeShards(10, 3, "seed123"); + + expect(shards).toHaveLength(3); + expect(shards[0]).toEqual({ workerId: 0, start: 0, end: 4, count: 4 }); + expect(shards[1]).toEqual({ workerId: 1, start: 4, end: 7, count: 3 }); + expect(shards[2]).toEqual({ workerId: 2, start: 7, end: 10, count: 3 }); + }); + + it("should handle 1 worker (no sharding)", () => { + const shards = computeShards(100, 1, "seed123"); + + expect(shards).toHaveLength(1); + expect(shards[0]).toEqual({ workerId: 0, start: 0, end: 100, count: 100 }); + }); + + it("should handle more workers than items", () => { + const shards = computeShards(3, 5, "seed123"); + + expect(shards).toHaveLength(3); + expect(shards[0].count).toBe(1); + expect(shards[1].count).toBe(1); + expect(shards[2].count).toBe(1); + }); + + it("should produce contiguous ranges", () => { + const shards = computeShards(100, 4, "seed123"); + + for (let i = 1; i < shards.length; i++) { + expect(shards[i].start).toBe(shards[i - 1].end); + } + expect(shards[shards.length - 1].end).toBe(100); + }); + + it("should cover the entire range (start to end)", () => { + const totalCount = 50; + const shards = computeShards(totalCount, 4, "seed456"); + + const totalGenerated = shards.reduce((sum, s) => sum + s.count, 0); + expect(totalGenerated).toBe(totalCount); + }); + }); + + describe("generateDeterministicShards", () => { + it("should generate shards for multiple collections", () => { + const collections = [ + { collectionName: "users", count: 100 }, + { collectionName: "orders", count: 200 }, + ]; + + const result = generateDeterministicShards(collections, 4, "seed789"); + + expect(result.has("users")).toBe(true); + expect(result.has("orders")).toBe(true); + + const userShards = result.get("users")!; + const orderShards = result.get("orders")!; + + expect(userShards.reduce((sum, s) => sum + s.count, 0)).toBe(100); + expect(orderShards.reduce((sum, s) => sum + s.count, 0)).toBe(200); + }); + + it("should be deterministic for same seed", () => { + const collections = [ + { collectionName: "users", count: 50 }, + ]; + + const result1 = generateDeterministicShards(collections, 3, "fixed-seed"); + const result2 = generateDeterministicShards(collections, 3, "fixed-seed"); + + const shards1 = JSON.stringify(Array.from(result1.entries())); + const shards2 = JSON.stringify(Array.from(result2.entries())); + + expect(shards1).toBe(shards2); + }); + }); + + describe("createShardTasks", () => { + it("should create tasks for all collections and shards", () => { + const collections = [ + { collectionName: "users", count: 10 }, + { collectionName: "orders", count: 20 }, + ]; + + const tasks = createShardTasks(collections, 2, "seed123"); + + const userTasks = tasks.filter((t) => t.collectionIndex === 0); + const orderTasks = tasks.filter((t) => t.collectionIndex === 1); + + expect(userTasks.length).toBe(2); + expect(orderTasks.length).toBe(2); + + expect(userTasks.reduce((sum, t) => sum + t.count, 0)).toBe(10); + expect(orderTasks.reduce((sum, t) => sum + t.count, 0)).toBe(20); + }); + + it("should assign correct rangeStart values", () => { + const collections = [ + { collectionName: "items", count: 100 }, + ]; + + const tasks = createShardTasks(collections, 4, "seed123"); + + expect(tasks[0].rangeStart).toBe(0); + expect(tasks[0].count).toBe(25); + + expect(tasks[1].rangeStart).toBe(25); + expect(tasks[1].count).toBe(25); + + expect(tasks[2].rangeStart).toBe(50); + expect(tasks[2].count).toBe(25); + + expect(tasks[3].rangeStart).toBe(75); + expect(tasks[3].count).toBe(25); + }); + + it("should produce task counts that sum to total documents", () => { + const collections = [ + { collectionName: "users", count: 50 }, + { collectionName: "posts", count: 100 }, + { collectionName: "comments", count: 200 }, + ]; + + const tasks = createShardTasks(collections, 4, "seed456"); + + const totalCounts = tasks.reduce((sum, t) => sum + t.count, 0); + expect(totalCounts).toBe(350); + }); + }); + + describe("Deterministic Consistency", () => { + it("should produce identical shards across multiple runs with same seed", () => { + const collections = [ + { collectionName: "test", count: 1000 }, + ]; + + const run1 = createShardTasks(collections, 4, "deterministic-seed"); + const run2 = createShardTasks(collections, 4, "deterministic-seed"); + const run3 = createShardTasks(collections, 4, "deterministic-seed"); + + expect(run1).toEqual(run2); + expect(run2).toEqual(run3); + }); + + it("should produce consistent shard assignments regardless of seed", () => { + const collections = [ + { collectionName: "test", count: 100 }, + ]; + + const run1 = createShardTasks(collections, 2, "seed-A"); + const run2 = createShardTasks(collections, 2, "seed-B"); + const run3 = createShardTasks(collections, 2, "seed-C"); + + expect(run1).toEqual(run2); + expect(run2).toEqual(run3); + }); + }); +}); diff --git a/src/generator/WorkerPool.ts b/src/generator/WorkerPool.ts index a4919f5..b1a38e4 100644 --- a/src/generator/WorkerPool.ts +++ b/src/generator/WorkerPool.ts @@ -127,12 +127,98 @@ export class WorkerPool { getQueuedTaskCount(): number { return this.taskQueue.length; } + + async processShardedGeneration( + taskData: Omit, + shardTasks: ShardTask[], + onProgress?: (progress: any, workerId: number) => void + ): Promise { + const results: WorkerResult[] = []; + const pendingTasks = new Map>(); + + for (const shard of shardTasks) { + const workerIndex = shard.workerId % this.workers.length; + + const shardTask: WorkerTask = { + event: "start_generation", + data: { + ...taskData, + shard: { + workerId: shard.workerId, + collectionIndex: shard.collectionIndex, + rangeStart: shard.rangeStart, + count: shard.count, + }, + }, + }; + + const worker = this.workers[workerIndex]; + const taskPromise = new Promise((resolve, reject) => { + const messageHandler = (result: WorkerResult) => { + if (result.event === "progress") { + onProgress?.(result.data, shard.workerId); + return; + } + + worker.off("message", messageHandler); + worker.off("error", errorHandler); + + if (result.event === "error") { + reject(new Error(result.error || "Unknown error")); + } else { + resolve(result); + } + }; + + const errorHandler = (error: Error) => { + worker.off("message", messageHandler); + worker.off("error", errorHandler); + reject(error); + }; + + worker.on("message", messageHandler); + worker.on("error", errorHandler); + + try { + worker.postMessage(shardTask); + } catch (error) { + worker.off("message", messageHandler); + worker.off("error", errorHandler); + reject(error instanceof Error ? error : new Error(String(error))); + } + }); + + pendingTasks.set(shard.workerId, taskPromise); + } + + for (const [workerId, promise] of pendingTasks) { + try { + const result = await promise; + results.push(result); + } catch (error) { + results.push({ + event: "error", + error: error instanceof Error ? error.message : String(error), + }); + } + } + + return results; + } } export interface ShardRange { workerId: number; start: number; end: number; + count: number; +} + +export interface ShardTask { + workerId: number; + collectionIndex: number; + rangeStart: number; + count: number; } export function computeShards( @@ -153,6 +239,7 @@ export function computeShards( workerId: i, start: currentStart, end: currentStart + chunkSize, + count: chunkSize, }); currentStart += chunkSize; } @@ -173,3 +260,28 @@ export function generateDeterministicShards( return result; } + +export function createShardTasks( + collections: Array<{ collectionName: string; count: number }>, + workerCount: number, + seed: string | number +): ShardTask[] { + const tasks: ShardTask[] = []; + const shardsByCollection = generateDeterministicShards(collections, workerCount, seed); + + for (let colIndex = 0; colIndex < collections.length; colIndex++) { + const col = collections[colIndex]; + const shards = shardsByCollection.get(col.collectionName) || []; + + for (const shard of shards) { + tasks.push({ + workerId: shard.workerId, + collectionIndex: colIndex, + rangeStart: shard.start, + count: shard.count, + }); + } + } + + return tasks; +} diff --git a/src/generator/adapters/BaseAdapter.ts b/src/generator/adapters/BaseAdapter.ts index cee7115..02f9fec 100644 --- a/src/generator/adapters/BaseAdapter.ts +++ b/src/generator/adapters/BaseAdapter.ts @@ -221,18 +221,22 @@ export abstract class BaseAdapter { ): AsyncGenerator { const random = seedrandom(`${this.seed}_${collection.name}`); - // Skip ahead if rangeStart > 0 to maintain determinism - // Note: seedrandom doesn't support skipping easily without state or manual loops - // For now, we do manual loops to advance the RNG for simplicity/correctness - for (let i = 0; i < rangeStart; i++) { - random(); - } - const syncedSchema = this.schemaMap.get(collection.name) || this.schemaMap.get(collection.name.split(".").pop()!) || collection; + if (rangeStart > 0) { + for (let i = 0; i < rangeStart; i++) { + for (const field of syncedSchema.fields) { + if (field.isPrimaryKey) continue; + if (field.name === "id" && !field.isPrimaryKey) continue; + if (field.type === "reference" || field.isForeignKey) continue; + random(); + } + } + } + const pkFields = syncedSchema.fields.filter((f) => f.isPrimaryKey); const isCompositePK = pkFields.length > 1; diff --git a/src/generator/core/constraints/ConstraintRegistry.test.ts b/src/generator/core/constraints/ConstraintRegistry.test.ts new file mode 100644 index 0000000..e8c71a3 --- /dev/null +++ b/src/generator/core/constraints/ConstraintRegistry.test.ts @@ -0,0 +1,536 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + ConstraintRegistry, + createDefaultRegistry, + createRangeValidator, + createStringLengthValidator, + createPatternValidator, + createEnumValidator, + createEmailValidator, + createUrlValidator, + createCrossColumnValidator, + createSumOfValidator, + createRatioOfValidator, + createPercentageOfValidator, + createConditionalValidator, +} from "./index"; + +describe("ConstraintRegistry", () => { + let registry: ConstraintRegistry; + + beforeEach(() => { + registry = createDefaultRegistry(); + }); + + describe("basic operations", () => { + it("should register and retrieve validators", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.register("price:range", validator); + + expect(registry.has("price:range")).toBe(true); + expect(registry.getValidator("price:range")).toBe(validator); + }); + + it("should unregister validators", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.register("price:range", validator); + + expect(registry.unregister("price:range")).toBe(true); + expect(registry.has("price:range")).toBe(false); + }); + + it("should register field constraints", () => { + const validator = createEmailValidator("email"); + registry.registerFieldConstraint("email", validator); + + const fieldConstraints = registry.getFieldConstraints("email"); + expect(fieldConstraints).toHaveLength(1); + expect(fieldConstraints[0]).toBe(validator); + }); + + it("should register document constraints", () => { + const validator = { + metadata: { + name: "doc:total", + type: "cross_column" as const, + severity: "error" as const, + }, + validate: () => ({ valid: true }), + }; + registry.registerDocumentConstraint(validator); + + const docConstraints = registry.getDocumentConstraints(); + expect(docConstraints).toHaveLength(1); + }); + + it("should clone registry", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.register("price:range", validator); + + const cloned = registry.clone(); + expect(cloned.has("price:range")).toBe(true); + expect(cloned.getValidator("price:range")).toBe(validator); + }); + + it("should clear registry", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.register("price:range", validator); + registry.registerFieldConstraint("email", createEmailValidator("email")); + + registry.clear(); + + expect(registry.has("price:range")).toBe(false); + expect(registry.getFieldConstraints("email")).toHaveLength(0); + }); + + it("should return correct stats", () => { + registry.register("v1", { metadata: { name: "v1", type: "field_validation", severity: "error" }, validate: () => ({ valid: true }) }); + registry.register("v2", { metadata: { name: "v2", type: "field_validation", severity: "error" }, validate: () => ({ valid: true }) }); + registry.registerFieldConstraint("f1", { metadata: { name: "f1", type: "field_validation", severity: "error" }, validate: () => ({ valid: true }) }); + + const stats = registry.getStats(); + expect(stats.totalValidators).toBe(2); + expect(stats.fieldConstraints).toBe(1); + }); + }); + + describe("validateDocument", () => { + it("should validate document with field constraints", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.register("price:range", validator); + registry.registerFieldConstraint("price", validator); + + const document = { price: 50 }; + const results = registry.validateDocument(document); + + expect(results).toHaveLength(0); + }); + + it("should detect invalid field values", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.registerFieldConstraint("price", validator); + + const document = { price: 150 }; + const results = registry.validateDocument(document); + + expect(results).toHaveLength(1); + expect(results[0].valid).toBe(false); + expect(results[0].fieldName).toBe("price"); + }); + + it("should validate multiple fields", () => { + const priceValidator = createRangeValidator("price", { min: 0, max: 100 }); + const quantityValidator = createRangeValidator("quantity", { min: 1, max: 1000 }); + + registry.registerFieldConstraint("price", priceValidator); + registry.registerFieldConstraint("quantity", quantityValidator); + + const document = { price: 50, quantity: 500 }; + const results = registry.validateDocument(document); + + expect(results).toHaveLength(0); + }); + + it("should report all violations in strict mode", () => { + const priceValidator = createRangeValidator("price", { min: 0, max: 100 }); + const quantityValidator = createRangeValidator("quantity", { min: 1, max: 1000 }); + + registry.registerFieldConstraint("price", priceValidator); + registry.registerFieldConstraint("quantity", quantityValidator); + + const document = { price: 150, quantity: 2000 }; + const results = registry.validateDocument(document); + + expect(results).toHaveLength(2); + }); + }); + + describe("validateBatch", () => { + it("should validate batch of documents", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.registerFieldConstraint("price", validator); + + const documents = [ + { price: 50 }, + { price: 75 }, + { price: 25 }, + ]; + + const report = registry.validateBatch(documents); + expect(report.totalViolations).toBe(0); + expect(report.documentsWithViolations).toBe(0); + }); + + it("should detect violations in batch", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + registry.registerFieldConstraint("price", validator); + + const documents = [ + { price: 50 }, + { price: 150 }, + { price: 200 }, + ]; + + const report = registry.validateBatch(documents); + expect(report.totalViolations).toBe(2); + expect(report.documentsWithViolations).toBe(2); + }); + + it("should track execution time", () => { + const documents = [{ price: 50 }, { price: 75 }]; + const report = registry.validateBatch(documents); + expect(report.executionTimeMs).toBeGreaterThanOrEqual(0); + }); + }); + + describe("fromSchemaFields", () => { + it("should create constraints from schema fields", () => { + const fields = [ + { name: "status", type: "string", constraints: { enum: ["active", "inactive", "pending"] } }, + { name: "price", type: "number", constraints: { min: 0, max: 1000 } }, + { name: "name", type: "string", constraints: { pattern: "^[A-Z].*" } }, + ]; + + registry.fromSchemaFields(fields); + + expect(registry.has("status:enum")).toBe(true); + expect(registry.has("price:range")).toBe(true); + expect(registry.has("name:pattern")).toBe(true); + }); + + it("should validate enum constraints", () => { + const fields = [ + { name: "status", type: "string", constraints: { enum: ["active", "inactive"] } }, + ]; + + registry.fromSchemaFields(fields); + + const validDoc = { status: "active" }; + const invalidDoc = { status: "deleted" }; + + expect(registry.validateDocument(validDoc)).toHaveLength(0); + expect(registry.validateDocument(invalidDoc as any)).toHaveLength(1); + }); + }); +}); + +describe("Field Validators", () => { + describe("createRangeValidator", () => { + it("should validate values within range", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + const result = validator.validate(50, { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject values below minimum", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + const result = validator.validate(-10, { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + expect(result.retryable).toBe(true); + }); + + it("should reject values above maximum", () => { + const validator = createRangeValidator("price", { min: 0, max: 100 }); + const result = validator.validate(150, { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + + it("should handle exclusive bounds", () => { + const validator = createRangeValidator("price", { min: 0, max: 100, inclusive: false }); + const result = validator.validate(0, { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + }); + + describe("createStringLengthValidator", () => { + it("should validate string length", () => { + const validator = createStringLengthValidator("name", { minLength: 3, maxLength: 10 }); + const result = validator.validate("John", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject short strings", () => { + const validator = createStringLengthValidator("name", { minLength: 5 }); + const result = validator.validate("Jo", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + + it("should reject long strings", () => { + const validator = createStringLengthValidator("name", { maxLength: 5 }); + const result = validator.validate("Jonathan", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + }); + + describe("createPatternValidator", () => { + it("should validate matching pattern", () => { + const validator = createPatternValidator("code", /^[A-Z]{3}$/); + const result = validator.validate("ABC", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject non-matching pattern", () => { + const validator = createPatternValidator("code", /^[A-Z]{3}$/); + const result = validator.validate("abc", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + }); + + describe("createEnumValidator", () => { + it("should validate enum values", () => { + const validator = createEnumValidator("status", ["active", "inactive", "pending"]); + const result = validator.validate("active", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject invalid enum values", () => { + const validator = createEnumValidator("status", ["active", "inactive", "pending"]); + const result = validator.validate("deleted" as "active", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + expect(result.retryable).toBe(false); + }); + }); + + describe("createEmailValidator", () => { + it("should validate valid emails", () => { + const validator = createEmailValidator("email"); + const result = validator.validate("test@example.com", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject invalid emails", () => { + const validator = createEmailValidator("email"); + const result = validator.validate("invalid-email", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + }); + + describe("createUrlValidator", () => { + it("should validate valid URLs", () => { + const validator = createUrlValidator("url"); + const result = validator.validate("https://example.com", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(true); + }); + + it("should reject invalid URLs", () => { + const validator = createUrlValidator("url"); + const result = validator.validate("not-a-url", { document: {}, random: () => 0.5 }); + + expect(result.valid).toBe(false); + }); + }); +}); + +describe("Cross-Column Validators", () => { + describe("createCrossColumnValidator", () => { + it("should validate greater than", () => { + const validator = createCrossColumnValidator("discount", { + sourceField: "discount", + targetField: "price", + operator: "lt", + }); + + const result = validator.validate(50, { document: { price: 100, discount: 50 }, random: () => 0.5 }); + expect(result.valid).toBe(true); + }); + + it("should reject when constraint violated", () => { + const validator = createCrossColumnValidator("discount", { + sourceField: "discount", + targetField: "price", + operator: "lt", + }); + + const result = validator.validate(150, { document: { price: 100, discount: 150 }, random: () => 0.5 }); + expect(result.valid).toBe(false); + }); + }); + + describe("createSumOfValidator", () => { + it("should validate sum constraints", () => { + const validator = createSumOfValidator("total", { + targetFields: ["subtotal", "tax"], + sumField: "total", + }); + + const result = validator.validate(120, { + document: { subtotal: 100, tax: 20, total: 120 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(true); + }); + + it("should reject incorrect sums", () => { + const validator = createSumOfValidator("total", { + targetFields: ["subtotal", "tax"], + sumField: "total", + }); + + const result = validator.validate(100, { + document: { subtotal: 100, tax: 20, total: 100 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(false); + }); + }); + + describe("createRatioOfValidator", () => { + it("should validate ratio constraints", () => { + const validator = createRatioOfValidator("ratio", { + numeratorField: "width", + denominatorField: "height", + targetRatio: 2, + tolerance: 0.1, + }); + + const result = validator.validate(100, { + document: { width: 200, height: 100, ratio: 100 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(true); + }); + }); + + describe("createPercentageOfValidator", () => { + it("should validate percentage constraints", () => { + const validator = createPercentageOfValidator("percentage", { + partField: "tax", + wholeField: "total", + targetPercentage: 20, + tolerance: 1, + }); + + const result = validator.validate(20, { + document: { total: 100, tax: 20, percentage: 20 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(true); + }); + }); + + describe("createConditionalValidator", () => { + it("should apply then constraint when condition is met", () => { + const validator = createConditionalValidator("discount", { + conditionField: "type", + conditionOperator: "eq", + conditionValue: "premium", + thenField: "discount", + thenConstraint: (value) => ({ + valid: typeof value === "number" && value > 0, + errorMessage: "Premium customers must have discount > 0", + }), + }); + + const result = validator.validate(10, { + document: { type: "premium", discount: 10 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(true); + }); + + it("should skip constraint when condition is not met", () => { + const validator = createConditionalValidator("discount", { + conditionField: "type", + conditionOperator: "eq", + conditionValue: "premium", + thenField: "discount", + thenConstraint: () => ({ valid: false, errorMessage: "Should not apply" }), + }); + + const result = validator.validate(0, { + document: { type: "basic", discount: 0 }, + random: () => 0.5, + }); + + expect(result.valid).toBe(true); + }); + }); +}); + +describe("Integration Scenarios", () => { + let registry: ConstraintRegistry; + + beforeEach(() => { + registry = createDefaultRegistry(); + }); + + it("should validate order with multiple constraints", () => { + registry.registerFieldConstraint("subtotal", createRangeValidator("subtotal", { min: 0 })); + registry.registerFieldConstraint("tax", createRangeValidator("tax", { min: 0 })); + registry.registerFieldConstraint("total", createSumOfValidator("total", { + targetFields: ["subtotal", "tax"], + sumField: "total", + })); + registry.registerFieldConstraint("discount", createCrossColumnValidator("discount", { + sourceField: "discount", + targetField: "total", + operator: "lt", + })); + + const validOrder = { + subtotal: 100, + tax: 10, + total: 110, + discount: 20, + }; + + const results = registry.validateDocument(validOrder); + expect(results).toHaveLength(0); + }); + + it("should detect multiple constraint violations in one document", () => { + registry.registerFieldConstraint("price", createRangeValidator("price", { min: 0, max: 100 })); + registry.registerFieldConstraint("quantity", createRangeValidator("quantity", { min: 1, max: 100 })); + + const invalidOrder = { + price: 200, + quantity: -5, + }; + + const results = registry.validateDocument(invalidOrder); + expect(results.length).toBeGreaterThanOrEqual(2); + }); + + it("should handle complex business rules", () => { + registry.registerFieldConstraint("status", createEnumValidator("status", ["draft", "pending", "approved", "rejected"])); + registry.registerFieldConstraint("approved_at", createConditionalValidator("approved_at", { + conditionField: "status", + conditionOperator: "eq", + conditionValue: "approved", + thenField: "approved_at", + thenConstraint: (value) => ({ + valid: value !== null && value !== undefined, + errorMessage: "Approved documents must have approved_at", + }), + elseConstraint: (value) => ({ + valid: value === null || value === undefined, + errorMessage: "Non-approved documents should not have approved_at", + }), + })); + + const approvedDoc = { status: "approved", approved_at: new Date() }; + const draftDoc = { status: "draft", approved_at: null }; + + expect(registry.validateDocument(approvedDoc)).toHaveLength(0); + expect(registry.validateDocument(draftDoc)).toHaveLength(0); + }); +}); diff --git a/src/generator/core/constraints/ConstraintRegistry.ts b/src/generator/core/constraints/ConstraintRegistry.ts new file mode 100644 index 0000000..6459b7d --- /dev/null +++ b/src/generator/core/constraints/ConstraintRegistry.ts @@ -0,0 +1,466 @@ +import { + ConstraintMetadata, + ConstraintValidator, + ConstraintContext, + ValidationResult, + ConstraintType, + ConstraintDefinition, + ConstraintViolation, + ConstraintReport, + ConstraintMode, + ConstraintSeverity, +} from "./types"; + +export interface RegistryOptions { + defaultMode?: ConstraintMode; + maxRetries?: number; + throwOnError?: boolean; +} + +export class ConstraintRegistry { + private validators: Map> = new Map(); + private fieldConstraints: Map[]> = new Map(); + private documentConstraints: ConstraintValidator[] = []; + private options: Required; + private priority: Map = new Map(); + + constructor(options: RegistryOptions = {}) { + this.options = { + defaultMode: options.defaultMode ?? "strict", + maxRetries: options.maxRetries ?? 3, + throwOnError: options.throwOnError ?? false, + }; + } + + register( + name: string, + validator: ConstraintValidator, + priority: number = 0 + ): void { + this.validators.set(name, validator as ConstraintValidator); + this.priority.set(name, priority); + } + + registerFieldConstraint( + fieldName: string, + validator: ConstraintValidator, + priority: number = 0 + ): void { + if (!this.fieldConstraints.has(fieldName)) { + this.fieldConstraints.set(fieldName, []); + } + this.fieldConstraints.get(fieldName)!.push(validator); + this.priority.set(`${fieldName}:${validator.metadata.name}`, priority); + } + + registerDocumentConstraint( + validator: ConstraintValidator, + priority: number = 0 + ): void { + this.documentConstraints.push(validator); + } + + unregister(name: string): boolean { + return this.validators.delete(name); + } + + unregisterFieldConstraint(fieldName: string, constraintName: string): boolean { + const constraints = this.fieldConstraints.get(fieldName); + if (!constraints) return false; + const index = constraints.findIndex((v) => v.metadata.name === constraintName); + if (index === -1) return false; + constraints.splice(index, 1); + return true; + } + + getValidator(name: string): ConstraintValidator | undefined { + return this.validators.get(name); + } + + getFieldConstraints(fieldName: string): ConstraintValidator[] { + return this.fieldConstraints.get(fieldName) || []; + } + + getDocumentConstraints(): ConstraintValidator[] { + return [...this.documentConstraints]; + } + + getAllValidators(): ConstraintValidator[] { + return Array.from(this.validators.values()); + } + + has(name: string): boolean { + return this.validators.has(name); + } + + setMode(mode: ConstraintMode): void { + this.options.defaultMode = mode; + } + + setMaxRetries(maxRetries: number): void { + this.options.maxRetries = maxRetries; + } + + setThrowOnError(throwOnError: boolean): void { + this.options.throwOnError = throwOnError; + } + + validateDocument( + document: Record, + context?: Partial + ): ValidationResult[] { + const results: ValidationResult[] = []; + const fullContext: ConstraintContext = { + document, + random: context?.random ?? (() => Math.random()), + ...context, + }; + + for (const fieldName of Object.keys(document)) { + const fieldConstraints = this.getFieldConstraints(fieldName); + const sortedConstraints = this.sortByPriority( + fieldName, + fieldConstraints + ); + + for (const validator of sortedConstraints) { + const result = validator.validate(document[fieldName], fullContext); + if (!result.valid) { + results.push({ + ...result, + fieldName, + constraintName: validator.metadata.name, + }); + } + } + } + + const sortedDocConstraints = this.sortByPriority( + "__document__", + this.documentConstraints + ); + for (const validator of sortedDocConstraints) { + const result = validator.validate(document, fullContext); + if (!result.valid) { + results.push({ + ...result, + constraintName: validator.metadata.name, + }); + } + } + + return results; + } + + validateBatch( + documents: Record[], + options?: { mode?: ConstraintMode; maxRetries?: number } + ): ConstraintReport { + const mode = options?.mode ?? this.options.defaultMode; + const maxRetries = options?.maxRetries ?? this.options.maxRetries; + const startTime = Date.now(); + const violations: ConstraintViolation[] = []; + const documentsWithViolations = new Set(); + + const allFieldNames = new Set(); + for (const doc of documents) { + for (const key of Object.keys(doc)) { + allFieldNames.add(key); + } + } + + for (let docIndex = 0; docIndex < documents.length; docIndex++) { + const doc = documents[docIndex]; + let attempts = 0; + let isValid = false; + let currentDoc = doc; + + while (attempts < maxRetries && !isValid) { + const results = this.validateDocument(currentDoc, { + allDocuments: documents, + documentIndex: docIndex, + }); + + if (results.length === 0) { + isValid = true; + } else { + const retryableViolations = results.filter((r) => r.retryable); + const fatalViolations = results.filter((r) => !r.retryable); + + if (fatalViolations.length > 0) { + for (const violation of fatalViolations) { + violations.push({ + constraintName: violation.constraintName || "unknown", + fieldName: violation.fieldName || "", + documentIndex: docIndex, + value: violation.value, + expected: violation.expected, + actual: violation.actual, + errorMessage: violation.errorMessage || "Validation failed", + severity: "error", + retryable: false, + }); + documentsWithViolations.add(docIndex); + } + isValid = true; + } else if (retryableViolations.length > 0) { + attempts++; + if (attempts >= maxRetries) { + for (const violation of retryableViolations) { + violations.push({ + constraintName: violation.constraintName || "unknown", + fieldName: violation.fieldName || "", + documentIndex: docIndex, + value: violation.value, + expected: violation.expected, + actual: violation.actual, + errorMessage: violation.errorMessage || "Validation failed", + severity: "error", + retryable: false, + }); + documentsWithViolations.add(docIndex); + } + } else { + currentDoc = this.applyCorrections(currentDoc, retryableViolations); + } + } + } + } + } + + return { + totalConstraints: + this.validators.size + + Array.from(this.fieldConstraints.values()).reduce( + (sum, arr) => sum + arr.length, + 0 + ) + + this.documentConstraints.length, + totalViolations: violations.length, + violations, + documentsWithViolations: documentsWithViolations.size, + executionTimeMs: Date.now() - startTime, + }; + } + + private sortByPriority( + key: string, + validators: ConstraintValidator[] + ): ConstraintValidator[] { + return [...validators].sort((a, b) => { + const aPriority = + this.priority.get(`${key}:${a.metadata.name}`) ?? + this.priority.get(a.metadata.name) ?? + 0; + const bPriority = + this.priority.get(`${key}:${b.metadata.name}`) ?? + this.priority.get(b.metadata.name) ?? + 0; + return bPriority - aPriority; + }); + } + + private applyCorrections( + document: Record, + violations: ValidationResult[] + ): Record { + const corrected = { ...document }; + for (const violation of violations) { + if (violation.fieldName && violation.expected !== undefined) { + corrected[violation.fieldName] = violation.expected; + } + } + return corrected; + } + + fromSchemaFields( + fields: Array<{ + name: string; + type: string; + constraints?: Record; + required?: boolean; + }> + ): ConstraintRegistry { + for (const field of fields) { + if (!field.constraints) continue; + + const constraints = field.constraints; + + if (constraints.unique === true) { + const uniqueValidator = { + metadata: { + name: `${field.name}:unique`, + type: "field_validation" as const, + severity: "error" as const, + description: `Field ${field.name} must be unique`, + }, + validate: (value: unknown) => ({ + valid: value !== undefined && value !== null, + }), + }; + this.register(`${field.name}:unique`, uniqueValidator); + this.registerFieldConstraint(field.name, uniqueValidator); + } + + if (constraints.enum && Array.isArray(constraints.enum)) { + const enumValues = constraints.enum as unknown[]; + const enumValidator = { + metadata: { + name: `${field.name}:enum`, + type: "field_validation" as const, + severity: "error" as const, + description: `Field ${field.name} must be one of ${enumValues.join(", ")}`, + }, + validate: (value: unknown) => ({ + valid: enumValues.includes(value as string), + expected: enumValues, + actual: value, + }), + }; + this.register(`${field.name}:enum`, enumValidator); + this.registerFieldConstraint(field.name, enumValidator); + } + + if (constraints.min !== undefined || constraints.max !== undefined) { + const rangeValidator = { + metadata: { + name: `${field.name}:range`, + type: "field_validation" as const, + severity: "error" as const, + description: `Field ${field.name} must be between ${constraints.min} and ${constraints.max}`, + }, + validate: (value: unknown) => { + const num = Number(value); + if (isNaN(num)) return { valid: false, actual: value }; + if (constraints.min !== undefined && num < (constraints.min as number)) { + return { valid: false, expected: `>= ${constraints.min}`, actual: num }; + } + if (constraints.max !== undefined && num > (constraints.max as number)) { + return { valid: false, expected: `<= ${constraints.max}`, actual: num }; + } + return { valid: true }; + }, + }; + this.register(`${field.name}:range`, rangeValidator); + this.registerFieldConstraint(field.name, rangeValidator); + } + + if (constraints.pattern) { + const patternValidator = { + metadata: { + name: `${field.name}:pattern`, + type: "field_validation" as const, + severity: "error" as const, + description: `Field ${field.name} must match pattern ${constraints.pattern}`, + }, + validate: (value: unknown) => { + const regex = new RegExp(constraints.pattern as string); + return { + valid: typeof value === "string" && regex.test(value), + actual: value, + }; + }, + }; + this.register(`${field.name}:pattern`, patternValidator); + this.registerFieldConstraint(field.name, patternValidator); + } + + if (constraints.minColumn || constraints.maxColumn || constraints.gtColumn || constraints.ltColumn) { + const crossColValidator = { + metadata: { + name: `${field.name}:cross_column`, + type: "cross_column" as const, + severity: "error" as const, + description: `Field ${field.name} has cross-column constraints`, + }, + validate: (value: unknown, context: { document?: Record }) => { + if (!context.document) return { valid: true }; + + let valid = true; + let errorMsg = ""; + + if (constraints.gtColumn && context.document[constraints.gtColumn as string] !== undefined) { + const ref = Number(context.document[constraints.gtColumn as string]); + const val = Number(value); + if (!isNaN(ref) && !isNaN(val) && val <= ref) { + valid = false; + errorMsg = `${field.name} must be > ${constraints.gtColumn}`; + } + } + + if (constraints.ltColumn && context.document[constraints.ltColumn as string] !== undefined) { + const ref = Number(context.document[constraints.ltColumn as string]); + const val = Number(value); + if (!isNaN(ref) && !isNaN(val) && val >= ref) { + valid = false; + errorMsg = `${field.name} must be < ${constraints.ltColumn}`; + } + } + + return { valid, errorMessage: errorMsg || undefined, retryable: true }; + }, + }; + this.register(`${field.name}:cross_column`, crossColValidator); + this.registerFieldConstraint(field.name, crossColValidator); + } + } + + return this; + } + + clone(): ConstraintRegistry { + const cloned = new ConstraintRegistry({ + defaultMode: this.options.defaultMode, + maxRetries: this.options.maxRetries, + throwOnError: this.options.throwOnError, + }); + + for (const [name, validator] of this.validators) { + cloned.register(name, validator, this.priority.get(name) ?? 0); + } + + for (const [field, validators] of this.fieldConstraints) { + for (const validator of validators) { + cloned.registerFieldConstraint(field, validator); + } + } + + for (const validator of this.documentConstraints) { + cloned.registerDocumentConstraint(validator); + } + + return cloned; + } + + clear(): void { + this.validators.clear(); + this.fieldConstraints.clear(); + this.documentConstraints.length = 0; + this.priority.clear(); + } + + getStats(): { + totalValidators: number; + fieldConstraints: number; + documentConstraints: number; + options: Required; + } { + return { + totalValidators: this.validators.size, + fieldConstraints: Array.from(this.fieldConstraints.values()).reduce( + (sum, arr) => sum + arr.length, + 0 + ), + documentConstraints: this.documentConstraints.length, + options: { ...this.options }, + }; + } +} + +export function createDefaultRegistry(): ConstraintRegistry { + return new ConstraintRegistry({ + defaultMode: "strict", + maxRetries: 3, + throwOnError: false, + }); +} diff --git a/src/generator/core/constraints/index.ts b/src/generator/core/constraints/index.ts new file mode 100644 index 0000000..1d7d368 --- /dev/null +++ b/src/generator/core/constraints/index.ts @@ -0,0 +1,3 @@ +export * from "./types"; +export * from "./ConstraintRegistry"; +export * from "./validators"; diff --git a/src/generator/core/constraints/types.ts b/src/generator/core/constraints/types.ts new file mode 100644 index 0000000..2c6a994 --- /dev/null +++ b/src/generator/core/constraints/types.ts @@ -0,0 +1,127 @@ +export type ConstraintSeverity = "error" | "warning" | "info"; + +export type ConstraintType = + | "field_validation" + | "cross_column" + | "cross_table" + | "temporal" + | "conditional" + | "aggregation"; + +export type ConstraintMode = "strict" | "warn" | "retry" | "skip"; + +export interface ConstraintMetadata { + name: string; + type: ConstraintType; + description?: string; + severity: ConstraintSeverity; + errorMessage?: string; +} + +export interface ConstraintContext { + document: Record; + allDocuments?: Record[]; + collectionName?: string; + documentIndex?: number; + random: () => number; +} + +export interface ValidationResult { + valid: boolean; + constraintName?: string; + fieldName?: string; + value?: unknown; + expected?: unknown; + actual?: unknown; + errorMessage?: string; + retryable?: boolean; +} + +export interface ConstraintValidator { + readonly metadata: ConstraintMetadata; + validate(value: T, context: ConstraintContext): ValidationResult; +} + +export interface FieldValidationConstraint extends ConstraintMetadata { + type: "field_validation"; + apply: (value: unknown) => unknown; + validate?: (value: unknown) => ValidationResult; +} + +export interface CrossColumnConstraint extends ConstraintMetadata { + type: "cross_column"; + sourceField: string; + targetField: string; + operator: "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "sum_of" | "ratio_of" | "percentage_of"; + value?: number | string; + formula?: string; +} + +export interface CrossTableConstraint extends ConstraintMetadata { + type: "cross_table"; + targetCollection: string; + relationshipField: string; + aggregationType: "count" | "sum" | "avg" | "min" | "max"; + targetField: string; + condition?: Record; +} + +export interface TemporalConstraint extends ConstraintMetadata { + type: "temporal"; + field: string; + comparisonField?: string; + operator: "before" | "after" | "within_days" | "older_than"; + days?: number; + referenceDate?: Date | string; +} + +export interface ConditionalConstraint extends ConstraintMetadata { + type: "conditional"; + condition: { + field: string; + operator: "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "exists" | "not_exists"; + value?: unknown; + }; + thenConstraint: ConstraintDefinition; + elseConstraint?: ConstraintDefinition; +} + +export interface AggregationConstraint extends ConstraintMetadata { + type: "aggregation"; + aggregationType: "sum" | "avg" | "min" | "max" | "count" | "stddev"; + field: string; + groupByField?: string; + having?: { + operator: "eq" | "gt" | "gte" | "lt" | "lte" | "ne"; + value: number; + }; +} + +export type ConstraintDefinition = + | FieldValidationConstraint + | CrossColumnConstraint + | CrossTableConstraint + | TemporalConstraint + | ConditionalConstraint + | AggregationConstraint; + +export interface ConstraintViolation { + constraintName: string; + fieldName: string; + documentId?: string | number; + documentIndex?: number; + value: unknown; + expected?: unknown; + actual?: unknown; + errorMessage: string; + severity: ConstraintSeverity; + retryable: boolean; +} + +export interface ConstraintReport { + totalConstraints: number; + totalViolations: number; + violations: ConstraintViolation[]; + documentsWithViolations: number; + executionTimeMs: number; +} diff --git a/src/generator/core/constraints/validators/crossColumnValidators.ts b/src/generator/core/constraints/validators/crossColumnValidators.ts new file mode 100644 index 0000000..b178428 --- /dev/null +++ b/src/generator/core/constraints/validators/crossColumnValidators.ts @@ -0,0 +1,311 @@ +import { + ConstraintValidator, + ConstraintContext, + ValidationResult, +} from "../types"; + +export type ComparisonOperator = "eq" | "ne" | "gt" | "gte" | "lt" | "lte"; + +export interface CrossColumnValidatorOptions { + sourceField: string; + targetField: string; + operator: ComparisonOperator; +} + +function compareValues(a: number | string | Date, b: number | string | Date): number { + if (a instanceof Date || b instanceof Date) { + const dateA = a instanceof Date ? a.getTime() : new Date(a as string).getTime(); + const dateB = b instanceof Date ? b.getTime() : new Date(b as string).getTime(); + return dateA - dateB; + } + return Number(a) - Number(b); +} + +export function createCrossColumnValidator( + fieldName: string, + options: CrossColumnValidatorOptions +): ConstraintValidator { + const { sourceField, targetField, operator } = options; + + return { + metadata: { + name: `${fieldName}:cross_column:${operator}`, + type: "cross_column", + severity: "error", + description: `Field ${fieldName} must be ${operator} ${targetField}`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (!context.document) { + return { valid: true }; + } + + const targetValue = context.document[targetField]; + if (targetValue === undefined || targetValue === null) { + return { valid: true }; + } + + const comparison = compareValues(value as number | string | Date, targetValue as number | string | Date); + let valid = false; + + switch (operator) { + case "eq": + valid = comparison === 0; + break; + case "ne": + valid = comparison !== 0; + break; + case "gt": + valid = comparison > 0; + break; + case "gte": + valid = comparison >= 0; + break; + case "lt": + valid = comparison < 0; + break; + case "lte": + valid = comparison <= 0; + break; + } + + if (!valid) { + return { + valid: false, + value, + expected: `${operator} ${targetField} (${targetValue})`, + actual: value, + errorMessage: `${fieldName} (${value}) must be ${operator} ${targetField} (${targetValue})`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export interface SumOfValidatorOptions { + targetFields: string[]; + sumField: string; + tolerance?: number; +} + +export function createSumOfValidator( + fieldName: string, + options: SumOfValidatorOptions +): ConstraintValidator { + const { targetFields, sumField, tolerance = 0 } = options; + + return { + metadata: { + name: `${fieldName}:sum_of`, + type: "cross_column", + severity: "error", + description: `${fieldName} must equal sum of ${targetFields.join(" + ")}`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (!context.document) { + return { valid: true }; + } + + const sum = targetFields.reduce((acc, field) => { + const fieldValue = context.document![field]; + return acc + (typeof fieldValue === "number" ? fieldValue : 0); + }, 0); + + const targetSum = typeof value === "number" ? value : 0; + const diff = Math.abs(targetSum - sum); + + if (diff > tolerance) { + return { + valid: false, + value: targetSum, + expected: sum, + actual: targetSum, + errorMessage: `${fieldName} (${targetSum}) must equal sum of ${targetFields.join(" + ")} (${sum})`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export interface RatioOfValidatorOptions { + numeratorField: string; + denominatorField: string; + targetRatio: number; + tolerance?: number; +} + +export function createRatioOfValidator( + fieldName: string, + options: RatioOfValidatorOptions +): ConstraintValidator { + const { numeratorField, denominatorField, targetRatio, tolerance = 0.01 } = options; + + return { + metadata: { + name: `${fieldName}:ratio_of`, + type: "cross_column", + severity: "error", + description: `${fieldName} should be approximately ${targetRatio}x ratio of ${numeratorField}/${denominatorField}`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (!context.document) { + return { valid: true }; + } + + const numerator = context.document[numeratorField] as number; + const denominator = context.document[denominatorField] as number; + + if (typeof numerator !== "number" || typeof denominator !== "number" || denominator === 0) { + return { valid: false, errorMessage: "Invalid numerator/denominator values" }; + } + + const actualRatio = numerator / denominator; + const diff = Math.abs(actualRatio - targetRatio); + + if (diff > tolerance) { + return { + valid: false, + value, + expected: `~${targetRatio} (actual: ${actualRatio.toFixed(4)})`, + actual: value, + errorMessage: `${fieldName} ratio should be approximately ${targetRatio}, got ${actualRatio.toFixed(4)}`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export interface PercentageOfValidatorOptions { + partField: string; + wholeField: string; + targetPercentage: number; + tolerance?: number; +} + +export function createPercentageOfValidator( + fieldName: string, + options: PercentageOfValidatorOptions +): ConstraintValidator { + const { partField, wholeField, targetPercentage, tolerance = 1 } = options; + + return { + metadata: { + name: `${fieldName}:percentage_of`, + type: "cross_column", + severity: "error", + description: `${fieldName} should be ${targetPercentage}% of ${wholeField} (part: ${partField})`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (!context.document) { + return { valid: true }; + } + + const part = context.document[partField] as number; + const whole = context.document[wholeField] as number; + + if (typeof part !== "number" || typeof whole !== "number" || whole === 0) { + return { valid: false, errorMessage: "Invalid part/whole values" }; + } + + const actualPercentage = (part / whole) * 100; + const diff = Math.abs(actualPercentage - targetPercentage); + + if (diff > tolerance) { + return { + valid: false, + value, + expected: `${targetPercentage}% (actual: ${actualPercentage.toFixed(2)}%)`, + actual: value, + errorMessage: `${fieldName} should be ${targetPercentage}% of ${wholeField}, got ${actualPercentage.toFixed(2)}%`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export interface ConditionalValidatorOptions { + conditionField: string; + conditionOperator: "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "exists" | "not_exists"; + conditionValue?: unknown; + thenField: string; + thenConstraint: (value: unknown) => ValidationResult; + elseConstraint?: (value: unknown) => ValidationResult; +} + +export function createConditionalValidator( + fieldName: string, + options: ConditionalValidatorOptions +): ConstraintValidator { + const { + conditionField, + conditionOperator, + conditionValue, + thenField, + thenConstraint, + elseConstraint, + } = options; + + return { + metadata: { + name: `${fieldName}:conditional`, + type: "conditional", + severity: "error", + description: `${fieldName} has conditional constraint based on ${conditionField}`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (!context.document) { + return { valid: true }; + } + + const conditionValueInDoc = context.document[conditionField]; + let conditionMet = false; + + switch (conditionOperator) { + case "exists": + conditionMet = conditionValueInDoc !== undefined && conditionValueInDoc !== null; + break; + case "not_exists": + conditionMet = conditionValueInDoc === undefined || conditionValueInDoc === null; + break; + case "eq": + conditionMet = conditionValueInDoc === conditionValue; + break; + case "ne": + conditionMet = conditionValueInDoc !== conditionValue; + break; + case "gt": + conditionMet = Number(conditionValueInDoc) > Number(conditionValue); + break; + case "gte": + conditionMet = Number(conditionValueInDoc) >= Number(conditionValue); + break; + case "lt": + conditionMet = Number(conditionValueInDoc) < Number(conditionValue); + break; + case "lte": + conditionMet = Number(conditionValueInDoc) <= Number(conditionValue); + break; + } + + if (conditionMet) { + const targetValue = context.document[thenField]; + return thenConstraint(targetValue); + } else if (elseConstraint) { + return elseConstraint(value); + } + + return { valid: true }; + }, + }; +} diff --git a/src/generator/core/constraints/validators/fieldValidators.ts b/src/generator/core/constraints/validators/fieldValidators.ts new file mode 100644 index 0000000..a68a860 --- /dev/null +++ b/src/generator/core/constraints/validators/fieldValidators.ts @@ -0,0 +1,284 @@ +import { + ConstraintValidator, + ConstraintContext, + ValidationResult, + ConstraintMetadata, +} from "../types"; + +export interface RangeValidatorOptions { + min?: number; + max?: number; + inclusive?: boolean; +} + +export function createRangeValidator( + fieldName: string, + options: RangeValidatorOptions +): ConstraintValidator { + const { min, max, inclusive = true } = options; + + return { + metadata: { + name: `${fieldName}:range`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must be ${inclusive ? "between" : "strictly between"} ${min} and ${max}`, + errorMessage: `${fieldName} is out of range [${min}, ${max}]`, + }, + validate(value: number, _context: ConstraintContext): ValidationResult { + const num = Number(value); + if (isNaN(num)) { + return { + valid: false, + value, + expected: `number in range [${min}, ${max}]`, + actual: value, + errorMessage: `${fieldName} must be a number`, + }; + } + + const minOk = min !== undefined + ? inclusive ? num >= min : num > min + : true; + const maxOk = max !== undefined + ? inclusive ? num <= max : num < max + : true; + + if (!minOk || !maxOk) { + return { + valid: false, + value: num, + expected: inclusive + ? `${min !== undefined ? `>= ${min}` : ""} and ${max !== undefined ? `<= ${max}` : ""}`.trim() + : `${min !== undefined ? `> ${min}` : ""} and ${max !== undefined ? `< ${max}` : ""}`.trim(), + actual: num, + errorMessage: this.metadata.errorMessage, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export function createStringLengthValidator( + fieldName: string, + options: { minLength?: number; maxLength?: number } +): ConstraintValidator { + const { minLength, maxLength } = options; + + return { + metadata: { + name: `${fieldName}:length`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} length must be ${minLength !== undefined ? `at least ${minLength}` : ""} ${maxLength !== undefined ? `at most ${maxLength}` : ""}`.trim(), + }, + validate(value: string, _context: ConstraintContext): ValidationResult { + if (typeof value !== "string") { + return { valid: false, value, errorMessage: `${fieldName} must be a string` }; + } + + const len = value.length; + const minOk = minLength !== undefined ? len >= minLength : true; + const maxOk = maxLength !== undefined ? len <= maxLength : true; + + if (!minOk || !maxOk) { + return { + valid: false, + value, + expected: `length ${minLength !== undefined ? `>= ${minLength}` : ""} ${maxLength !== undefined ? `<= ${maxLength}` : ""}`.trim(), + actual: len, + errorMessage: `${fieldName} length ${len} is out of bounds [${minLength}, ${maxLength}]`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export function createPatternValidator( + fieldName: string, + pattern: string | RegExp +): ConstraintValidator { + const regex = typeof pattern === "string" ? new RegExp(pattern) : pattern; + + return { + metadata: { + name: `${fieldName}:pattern`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must match pattern ${regex}`, + }, + validate(value: string, _context: ConstraintContext): ValidationResult { + if (typeof value !== "string") { + return { valid: false, value, errorMessage: `${fieldName} must be a string` }; + } + + const matches = regex.test(value); + if (!matches) { + return { + valid: false, + value, + expected: `matching ${regex}`, + actual: value.substring(0, 50), + errorMessage: `${fieldName} does not match required pattern`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export function createEnumValidator( + fieldName: string, + allowedValues: T[] +): ConstraintValidator { + return { + metadata: { + name: `${fieldName}:enum`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must be one of: ${allowedValues.join(", ")}`, + }, + validate(value: T, _context: ConstraintContext): ValidationResult { + const isValid = allowedValues.includes(value); + if (!isValid) { + return { + valid: false, + value, + expected: allowedValues, + actual: value, + errorMessage: `${fieldName} value "${value}" is not in allowed values`, + retryable: false, + }; + } + return { valid: true }; + }, + }; +} + +export function createEmailValidator(fieldName: string): ConstraintValidator { + const emailPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; + + return { + metadata: { + name: `${fieldName}:email`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must be a valid email address`, + }, + validate(value: string, _context: ConstraintContext): ValidationResult { + if (typeof value !== "string") { + return { valid: false, value, errorMessage: `${fieldName} must be a string` }; + } + + if (!emailPattern.test(value)) { + return { + valid: false, + value, + expected: "valid email format", + errorMessage: `${fieldName} is not a valid email address`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export function createUrlValidator(fieldName: string): ConstraintValidator { + const urlPattern = /^https?:\/\/[^\s/$.?#].[^\s]*$/i; + + return { + metadata: { + name: `${fieldName}:url`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must be a valid URL`, + }, + validate(value: string, _context: ConstraintContext): ValidationResult { + if (typeof value !== "string") { + return { valid: false, value, errorMessage: `${fieldName} must be a string` }; + } + + if (!urlPattern.test(value)) { + return { + valid: false, + value, + expected: "valid URL format (http:// or https://)", + errorMessage: `${fieldName} is not a valid URL`, + retryable: true, + }; + } + + return { valid: true }; + }, + }; +} + +export function createNullRateValidator( + fieldName: string, + maxNullRate: number +): ConstraintValidator { + return { + metadata: { + name: `${fieldName}:null_rate`, + type: "field_validation", + severity: "warning", + description: `Field ${fieldName} null rate should not exceed ${(maxNullRate * 100).toFixed(0)}%`, + }, + validate(value: unknown, context: ConstraintContext): ValidationResult { + if (value === null || value === undefined) { + const nullRate = context.random(); + if (nullRate > maxNullRate) { + return { + valid: false, + value, + expected: `null rate <= ${(maxNullRate * 100).toFixed(0)}%`, + actual: "high null rate", + errorMessage: `${fieldName} has excessive null values`, + retryable: true, + }; + } + } + return { valid: true }; + }, + }; +} + +export function createUniqueValidator( + fieldName: string, + getAllValues: () => T[] +): ConstraintValidator { + return { + metadata: { + name: `${fieldName}:unique`, + type: "field_validation", + severity: "error", + description: `Field ${fieldName} must contain unique values`, + }, + validate(value: T, _context: ConstraintContext): ValidationResult { + const allValues = getAllValues(); + const occurrences = allValues.filter((v) => v === value).length; + + if (occurrences > 1) { + return { + valid: false, + value, + expected: "unique value", + actual: `value appears ${occurrences} times`, + errorMessage: `${fieldName} value "${value}" is not unique`, + retryable: false, + }; + } + return { valid: true }; + }, + }; +} diff --git a/src/generator/core/constraints/validators/index.ts b/src/generator/core/constraints/validators/index.ts new file mode 100644 index 0000000..c1c3dc8 --- /dev/null +++ b/src/generator/core/constraints/validators/index.ts @@ -0,0 +1,2 @@ +export * from "./fieldValidators"; +export * from "./crossColumnValidators"; diff --git a/src/generator/deterministic.test.ts b/src/generator/deterministic.test.ts new file mode 100644 index 0000000..25a7d4d --- /dev/null +++ b/src/generator/deterministic.test.ts @@ -0,0 +1,253 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { TestDataGeneratorService } from "./index"; +import { InMemoryAdapter } from "./adapters/InMemoryAdapter"; +import { SchemaCollection } from "../types/schemaDesign"; +import { TestDataConfig } from "./types"; +import { computeShards } from "./WorkerPool"; + +describe("Deterministic Sharding Consistency", () => { + let adapter: InMemoryAdapter; + let service: TestDataGeneratorService; + let collections: SchemaCollection[]; + let config: TestDataConfig; + + beforeEach(() => { + adapter = new InMemoryAdapter(); + service = new TestDataGeneratorService(adapter); + + collections = [ + { + id: "users", + name: "users", + fields: [ + { id: "u1", name: "id", type: "integer", isPrimaryKey: true }, + { id: "u2", name: "name", type: "string" }, + { id: "u3", name: "email", type: "string" }, + ], + position: { x: 0, y: 0 }, + }, + ]; + + config = { + collections: [{ collectionName: "users", count: 100 }], + relationships: [], + seed: 42, + }; + }); + + async function generateSingleThreaded(): Promise { + const freshAdapter = new InMemoryAdapter(); + const freshService = new TestDataGeneratorService(freshAdapter); + + const result = await freshService.generateAndPopulate( + collections, + [], + config + ); + + if (!result.success) { + throw new Error((result.errors || []).join(", ")); + } + + return freshAdapter.getData("users"); + } + + async function generateSharded( + shardCount: number + ): Promise { + const shards = computeShards(100, shardCount, config.seed!); + const allResults: any[] = []; + + for (const shard of shards) { + const freshAdapter = new InMemoryAdapter(); + const freshService = new TestDataGeneratorService(freshAdapter); + + const result = await freshService.generateCollectionWithRange( + collections[0], + shard.start, + shard.count, + config, + [] + ); + + if (!result.success) { + throw new Error((result.errors || []).join(", ")); + } + + const data = freshAdapter.getData("users"); + allResults.push(...data); + } + + allResults.sort((a, b) => a.id - b.id); + return allResults; + } + + it("should produce identical output for single vs 4-worker sharded generation", async () => { + const singleThreaded = await generateSingleThreaded(); + const sharded4 = await generateSharded(4); + + expect(sharded4).toHaveLength(singleThreaded.length); + expect(sharded4).toEqual(singleThreaded); + }); + + it("should produce identical output for single vs 2-worker sharded generation", async () => { + const singleThreaded = await generateSingleThreaded(); + const sharded2 = await generateSharded(2); + + expect(sharded2).toHaveLength(singleThreaded.length); + expect(sharded2).toEqual(singleThreaded); + }); + + it("should have deterministic IDs across all shards", async () => { + const shards = computeShards(100, 4, config.seed!); + const allIds: number[] = []; + + for (const shard of shards) { + const freshAdapter = new InMemoryAdapter(); + const freshService = new TestDataGeneratorService(freshAdapter); + + await freshService.generateCollectionWithRange( + collections[0], + shard.start, + shard.count, + config, + [] + ); + + const data = freshAdapter.getData("users"); + allIds.push(...data.map((d: any) => d.id)); + } + + allIds.sort((a, b) => a - b); + const expectedIds = Array.from({ length: 100 }, (_, i) => i + 1); + + expect(allIds).toEqual(expectedIds); + }); + + it("should have deterministic email generation across shards", async () => { + const singleThreaded = await generateSingleThreaded(); + const sharded4 = await generateSharded(4); + + const singleEmails = singleThreaded.map((d) => d.email); + const shardedEmails = sharded4.map((d) => d.email); + + expect(shardedEmails).toEqual(singleEmails); + }); + + it("should be reproducible with same seed on different runs", async () => { + const run1 = await generateSingleThreaded(); + const run2 = await generateSingleThreaded(); + + expect(run1).toEqual(run2); + }); + + it("should produce different output with different seeds", async () => { + const configA = { ...config, seed: 123 }; + const configB = { ...config, seed: 456 }; + + const adapterA = new InMemoryAdapter(); + const serviceA = new TestDataGeneratorService(adapterA); + const resultA = await serviceA.generateAndPopulate(collections, [], configA); + + const adapterB = new InMemoryAdapter(); + const serviceB = new TestDataGeneratorService(adapterB); + const resultB = await serviceB.generateAndPopulate(collections, [], configB); + + const dataA = adapterA.getData("users"); + const dataB = adapterB.getData("users"); + + expect(dataA).not.toEqual(dataB); + }); +}); + +describe("Range-based Document Generation", () => { + let collections: SchemaCollection[]; + let config: TestDataConfig; + + beforeEach(() => { + collections = [ + { + id: "orders", + name: "orders", + fields: [ + { id: "o1", name: "id", type: "integer", isPrimaryKey: true }, + { id: "o2", name: "product", type: "string" }, + { id: "o3", name: "amount", type: "integer" }, + ], + position: { x: 0, y: 0 }, + }, + ]; + + config = { + collections: [{ collectionName: "orders", count: 50 }], + relationships: [], + seed: 999, + }; + }); + + it("should generate correct document count for each range", async () => { + const shards = computeShards(50, 5, config.seed!); + + for (const shard of shards) { + const adapter = new InMemoryAdapter(); + const service = new TestDataGeneratorService(adapter); + + await service.generateCollectionWithRange( + collections[0], + shard.start, + shard.count, + config, + [] + ); + + const data = adapter.getData("orders"); + expect(data.length).toBe(shard.count); + } + }); + + it("should generate IDs within the correct range", async () => { + const shard = { workerId: 0, start: 10, end: 20, count: 10 }; + + const adapter = new InMemoryAdapter(); + const service = new TestDataGeneratorService(adapter); + + await service.generateCollectionWithRange( + collections[0], + shard.start, + shard.count, + config, + [] + ); + + const data = adapter.getData("orders"); + const ids = data.map((d: any) => d.id).sort((a: number, b: number) => a - b); + + expect(ids).toEqual([11, 12, 13, 14, 15, 16, 17, 18, 19, 20]); + }); + + it("should combine all shards into complete dataset", async () => { + const shards = computeShards(50, 5, config.seed!); + const allData: any[] = []; + + for (const shard of shards) { + const adapter = new InMemoryAdapter(); + const service = new TestDataGeneratorService(adapter); + + await service.generateCollectionWithRange( + collections[0], + shard.start, + shard.count, + config, + [] + ); + + allData.push(...adapter.getData("orders")); + } + + allData.sort((a, b) => a.id - b.id); + const ids = allData.map((d) => d.id); + + const expectedIds = Array.from({ length: 50 }, (_, i) => i + 1); + expect(ids).toEqual(expectedIds); + }); +}); diff --git a/src/generator/index.ts b/src/generator/index.ts index d1a9bf3..cf700d6 100644 --- a/src/generator/index.ts +++ b/src/generator/index.ts @@ -456,4 +456,89 @@ export class TestDataGeneratorService { public setAdapter(adapter: BaseAdapter) { this.adapter = adapter; } + + async generateCollectionWithRange( + collection: SchemaCollection, + rangeStart: number, + count: number, + config: TestDataConfig, + relationships: SchemaRelationship[], + ): Promise { + const errors: string[] = []; + const collectionResults: CollectionResult[] = []; + + try { + await this.adapter.connect(); + + await this.adapter.initialize( + config, + [collection], + relationships, + config.seed, + ); + + const fullName = this.getFullCollectionName(collection); + this.collectionIdToName.set(collection.id, fullName); + + const colConfig = config.collections.find( + (c) => + c.collectionName === collection.name || + c.collectionName === fullName, + ); + if (!colConfig) { + throw new Error(`No config found for collection: ${collection.name}`); + } + + const effectiveSchema = JSON.parse(JSON.stringify(collection)) as SchemaCollection; + effectiveSchema.fields = effectiveSchema.fields.map((field) => + this.resolveForeignKeyField(field, new Map([[fullName, effectiveSchema]])), + ); + + await this.adapter.ensureCollection(fullName, effectiveSchema.fields, true); + + const allowedReferenceFields = this.getAllowedReferenceFields( + effectiveSchema, + relationships, + collection.id, + ); + + const docStream = this.adapter.generateStream( + effectiveSchema, + count, + rangeStart, + ); + + const ids = await this.adapter.writeBatchStream( + fullName, + docStream, + config.batchSize, + allowedReferenceFields, + effectiveSchema.fields, + ); + + collectionResults.push({ + collectionName: fullName, + generatedIds: ids, + documentCount: ids.length, + idType: + ids.length && typeof ids[0] === "number" ? "integer" : "string", + }); + + } catch (err) { + errors.push(`Fatal error: ${err instanceof Error ? err.message : String(err)}`); + } finally { + await this.adapter.disconnect(); + } + + return { + success: errors.length === 0, + collections: collectionResults, + errors, + warnings: [], + totalDocumentsGenerated: collectionResults.reduce( + (sum, r) => sum + r.documentCount, + 0, + ), + }; + } } diff --git a/src/generator/range-determinism.test.ts b/src/generator/range-determinism.test.ts new file mode 100644 index 0000000..41fe955 --- /dev/null +++ b/src/generator/range-determinism.test.ts @@ -0,0 +1,107 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { TestDataGeneratorService } from "./index"; +import { InMemoryAdapter } from "./adapters/InMemoryAdapter"; +import { SchemaCollection } from "../types/schemaDesign"; +import { TestDataConfig } from "./types"; + +describe("generateStream Range Determinism", () => { + let adapter: InMemoryAdapter; + let service: TestDataGeneratorService; + let collections: SchemaCollection[]; + let config: TestDataConfig; + + beforeEach(() => { + adapter = new InMemoryAdapter(); + service = new TestDataGeneratorService(adapter); + + collections = [ + { + id: "users", + name: "users", + fields: [ + { id: "u1", name: "id", type: "integer", isPrimaryKey: true }, + { id: "u2", name: "name", type: "string" }, + { id: "u3", name: "email", type: "string" }, + ], + position: { x: 0, y: 0 }, + }, + ]; + + config = { + collections: [{ collectionName: "users", count: 50 }], + relationships: [], + seed: 42, + }; + }); + + it("should produce identical document at index 0 between single and range=0", async () => { + const result1 = await service.generateAndPopulate(collections, [], config); + expect(result1.success).toBe(true); + const data1 = adapter.getData("users"); + const doc0_v1 = { ...data1[0] }; + + const adapter2 = new InMemoryAdapter(); + const service2 = new TestDataGeneratorService(adapter2); + const result2 = await service2.generateCollectionWithRange( + collections[0], 0, 1, config, [] + ); + expect(result2.success).toBe(true); + const data2 = adapter2.getData("users"); + const doc0_v2 = { ...data2[0] }; + + expect(doc0_v1).toEqual(doc0_v2); + }); + + it("should produce identical document at index 25 between single and range=25", async () => { + const result1 = await service.generateAndPopulate(collections, [], config); + expect(result1.success).toBe(true); + const data1 = adapter.getData("users"); + const doc25_v1 = { ...data1[25] }; + + const adapter2 = new InMemoryAdapter(); + const service2 = new TestDataGeneratorService(adapter2); + const result2 = await service2.generateCollectionWithRange( + collections[0], 25, 1, config, [] + ); + expect(result2.success).toBe(true); + const data2 = adapter2.getData("users"); + const doc25_v2 = { ...data2[0] }; + + expect(doc25_v1).toEqual(doc25_v2); + }); + + it("should produce identical documents 25-30 between single and range=25,count=5", async () => { + const result1 = await service.generateAndPopulate(collections, [], config); + expect(result1.success).toBe(true); + const data1 = adapter.getData("users").slice(25, 30); + + const adapter2 = new InMemoryAdapter(); + const service2 = new TestDataGeneratorService(adapter2); + const result2 = await service2.generateCollectionWithRange( + collections[0], 25, 5, config, [] + ); + expect(result2.success).toBe(true); + const data2 = adapter2.getData("users"); + + expect(data1).toEqual(data2); + }); + + it("should generate first 25 and last 25 that match single-threaded", async () => { + const result1 = await service.generateAndPopulate(collections, [], config); + expect(result1.success).toBe(true); + const allData = adapter.getData("users"); + + const adapter2 = new InMemoryAdapter(); + const service2 = new TestDataGeneratorService(adapter2); + await service2.generateCollectionWithRange(collections[0], 0, 25, config, []); + const first25 = adapter2.getData("users"); + + const adapter3 = new InMemoryAdapter(); + const service3 = new TestDataGeneratorService(adapter3); + await service3.generateCollectionWithRange(collections[0], 25, 25, config, []); + const last25 = adapter3.getData("users"); + + expect(first25).toEqual(allData.slice(0, 25)); + expect(last25).toEqual(allData.slice(25, 50)); + }); +}); diff --git a/src/generator/worker.cts b/src/generator/worker.cts index c2a29b0..2f09923 100644 --- a/src/generator/worker.cts +++ b/src/generator/worker.cts @@ -19,7 +19,8 @@ if (ipcChannel && typeof ipcChannel.on === 'function') { databaseName, collections, relationships, - config + config, + shard } = msg.data; try { @@ -32,31 +33,40 @@ if (ipcChannel && typeof ipcChannel.on === 'function') { const service = new TestDataGeneratorService(adapter); - // Reconstruct the onProgress callback to send messages back to parent - const generatorConfig = { - ...config, - onProgress: async (progress: any) => { - const payload = { - event: "progress", - data: progress - }; - if (parentPort) { - parentPort.postMessage(payload); - } else if (process.send) { - process.send(payload); - } + const postProgress = async (progress: any) => { + const payload = { + event: "progress", + data: { ...progress, workerId: shard?.workerId } + }; + if (parentPort) { + parentPort.postMessage(payload); + } else if (process.send) { + process.send(payload); } }; - const result = await service.generateAndPopulate( - collections, - relationships, - generatorConfig - ); + let result; + if (shard && shard.collectionIndex !== undefined) { + const collection = collections[shard.collectionIndex]; + result = await service.generateCollectionWithRange( + collection, + shard.rangeStart, + shard.count, + { ...config, onProgress: postProgress }, + relationships + ); + } else { + result = await service.generateAndPopulate( + collections, + relationships, + { ...config, onProgress: postProgress } + ); + } const donePayload = { event: "done", - data: result + data: result, + workerId: shard?.workerId }; if (parentPort) parentPort.postMessage(donePayload); else if (process.send) process.send(donePayload); @@ -65,7 +75,8 @@ if (ipcChannel && typeof ipcChannel.on === 'function') { console.error("Worker Error:", error); const errorPayload = { event: "error", - error: error instanceof Error ? error.message : String(error) + error: error instanceof Error ? error.message : String(error), + workerId: shard?.workerId }; if (parentPort) parentPort.postMessage(errorPayload); else if (process.send) process.send(errorPayload);