From 45fa315c9415e6ed7d3dbce19ef8a84c1ee40319 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 00:44:25 +0300 Subject: [PATCH 01/17] RavenDB-26046 - Add CDC Sink documentation Adds full CDC Sink ongoing task documentation in Docusaurus MDX format: - 16 core pages: overview, how-it-works, schema-design, embedded-tables, linked-tables, column-mapping, patching, delete-strategies, property-retention, attachment-handling, configuration-reference, api-reference, monitoring, failover-and-consistency, troubleshooting, server-configuration - 9 PostgreSQL pages: prerequisites-checklist, wal-configuration, permissions-and-roles, initial-setup, replica-identity, replica-identity-manual-setup, cleanup-and-maintenance, monitoring-postgres, studio-ui - 4 PostgreSQL examples: simple-migration, denormalization, event-sourcing, complex-nesting - 1 SQL Server stub: overview - 4 _category_.json navigation files --- .../ongoing-tasks/cdc-sink/_category_.json | 1 + .../ongoing-tasks/cdc-sink/api-reference.mdx | 222 +++++++++++ .../cdc-sink/attachment-handling.mdx | 153 ++++++++ .../ongoing-tasks/cdc-sink/column-mapping.mdx | 192 +++++++++ .../cdc-sink/configuration-reference.mdx | 181 +++++++++ .../cdc-sink/delete-strategies.mdx | 290 ++++++++++++++ .../cdc-sink/embedded-tables.mdx | 295 ++++++++++++++ .../cdc-sink/failover-and-consistency.mdx | 172 +++++++++ .../ongoing-tasks/cdc-sink/how-it-works.mdx | 206 ++++++++++ .../ongoing-tasks/cdc-sink/linked-tables.mdx | 178 +++++++++ .../ongoing-tasks/cdc-sink/monitoring.mdx | 113 ++++++ .../ongoing-tasks/cdc-sink/overview.mdx | 151 ++++++++ .../ongoing-tasks/cdc-sink/patching.mdx | 364 ++++++++++++++++++ .../cdc-sink/postgres/_category_.json | 1 + .../postgres/cleanup-and-maintenance.mdx | 227 +++++++++++ .../postgres/examples/_category_.json | 1 + .../examples/example-complex-nesting.mdx | 230 +++++++++++ .../examples/example-denormalization.mdx | 202 ++++++++++ .../examples/example-event-sourcing.mdx | 185 +++++++++ .../examples/example-simple-migration.mdx | 128 ++++++ .../cdc-sink/postgres/initial-setup.mdx | 217 +++++++++++ .../cdc-sink/postgres/monitoring-postgres.mdx | 120 ++++++ .../postgres/permissions-and-roles.mdx | 149 +++++++ .../postgres/prerequisites-checklist.mdx | 157 ++++++++ .../replica-identity-manual-setup.mdx | 141 +++++++ .../cdc-sink/postgres/replica-identity.mdx | 166 ++++++++ .../cdc-sink/postgres/studio-ui.mdx | 96 +++++ .../cdc-sink/postgres/wal-configuration.mdx | 128 ++++++ .../cdc-sink/property-retention.mdx | 155 ++++++++ .../ongoing-tasks/cdc-sink/schema-design.mdx | 328 ++++++++++++++++ .../cdc-sink/server-configuration.mdx | 77 ++++ .../cdc-sink/sql-server/_category_.json | 1 + .../cdc-sink/sql-server/overview.mdx | 38 ++ .../cdc-sink/troubleshooting.mdx | 195 ++++++++++ 34 files changed, 5460 insertions(+) create mode 100644 docs/server/ongoing-tasks/cdc-sink/_category_.json create mode 100644 docs/server/ongoing-tasks/cdc-sink/api-reference.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/monitoring.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/overview.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/patching.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/property-retention.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/schema-design.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json create mode 100644 docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx diff --git a/docs/server/ongoing-tasks/cdc-sink/_category_.json b/docs/server/ongoing-tasks/cdc-sink/_category_.json new file mode 100644 index 0000000000..9edf48317b --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/_category_.json @@ -0,0 +1 @@ +{"position": 2, "label": "CDC Sink"} diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx new file mode 100644 index 0000000000..8be425736a --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -0,0 +1,222 @@ +--- +title: "CDC Sink: API Reference" +sidebar_label: API Reference +description: "Documents the Client API operations for creating, updating, querying, toggling, and deleting CDC Sink tasks programmatically." +sidebar_position: 11 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: API Reference + + + +* CDC Sink tasks can be created, updated, and managed programmatically using + the RavenDB Client API. + +* In this page: + * [Add a CDC Sink Task](#add-a-cdc-sink-task) + * [Update a CDC Sink Task](#update-a-cdc-sink-task) + * [Get Task Info](#get-task-info) + * [Toggle Task State](#toggle-task-state) + * [Delete a Task](#delete-a-task) + + + +--- + +## Add a CDC Sink Task + +Use `AddCdcSinkOperation` to create a new CDC Sink task: + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + \{ + new CdcSinkTableConfig + \{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = new List \{ "id" \}, + ColumnsMapping = new Dictionary + \{ + ["id"] = "Id", + ["customer_name"] = "CustomerName", + ["total"] = "Total" + \} + \} + \} +\}; + +var result = await store.Maintenance.SendAsync( + new AddCdcSinkOperation(config)); + +long taskId = result.TaskId; +`} + + + + +`AddCdcSinkOperationResult`: + +| Property | Type | Description | +|----------|------|-------------| +| `TaskId` | `long` | Assigned task ID | +| `RaftCommandIndex` | `long` | Raft index of the command | + +--- + +## Update a CDC Sink Task + +Use `UpdateCdcSinkOperation` to modify an existing task. +Pass the full updated configuration including the `TaskId`: + + + + +{`config.TaskId = taskId; // Must be set +config.Tables.Add(new CdcSinkTableConfig +\{ + Name = "Customers", + SourceTableName = "customers", + PrimaryKeyColumns = new List \{ "id" \}, + ColumnsMapping = new Dictionary + \{ + ["id"] = "Id", + ["name"] = "Name", + ["email"] = "Email" + \} +\}); + +await store.Maintenance.SendAsync( + new UpdateCdcSinkOperation(taskId, config)); +`} + + + + + +**PostgreSQL — table changes affect the publication:** + +The replication slot and publication names are fixed at task creation and do not +change when you update the task. However, the **publication** controls which tables +CDC Sink receives changes for, so table changes require database-level updates: + +* **Adding tables** — If CDC Sink has the necessary permissions, it will automatically + run `ALTER PUBLICATION ADD TABLE ;` when the task is updated. If it + does not have permissions, a database administrator must run this manually. +* **Removing tables** — `ALTER PUBLICATION DROP TABLE ;` must be + run manually by a database administrator. CDC Sink does not remove tables from the + publication automatically. + +If the slot and publication were auto-named, a database administrator can look up +the current names from the task error log or from PostgreSQL: + +```sql +SELECT slot_name FROM pg_replication_slots WHERE slot_name LIKE 'rvn_cdc_s_%'; +SELECT pubname FROM pg_publication WHERE pubname LIKE 'rvn_cdc_p_%'; +``` + +See [Initial Setup](./postgres/initial-setup.mdx) for +guidance on slot and publication management. + +Dropped or orphaned slots and publications must be cleaned up manually by the +database administrator. +See [Cleanup and Maintenance](./postgres/cleanup-and-maintenance.mdx). + + +--- + +## Get Task Info + +Use `GetOngoingTaskInfoOperation` to retrieve the current state of a CDC Sink task: + + + + +{`var taskInfo = await store.Maintenance.SendAsync( + new GetOngoingTaskInfoOperation(taskId, OngoingTaskType.CdcSink)); +`} + + + + +--- + +## Toggle Task State + +Pause or resume a CDC Sink task using `ToggleOngoingTaskStateOperation`: + + + + +{`// Pause the task +await store.Maintenance.SendAsync( + new ToggleOngoingTaskStateOperation(taskId, OngoingTaskType.CdcSink, disable: true)); + +// Resume the task +await store.Maintenance.SendAsync( + new ToggleOngoingTaskStateOperation(taskId, OngoingTaskType.CdcSink, disable: false)); +`} + + + + + +**PostgreSQL:** Pausing a CDC Sink task stops the replication slot from being consumed. +PostgreSQL retains WAL segments for unconsumed slots, so pausing for an extended period +causes WAL to accumulate on disk. Monitor disk usage if a task is paused for more than +a short time. +See [Monitoring PostgreSQL](./postgres/monitoring-postgres.mdx). + +**SQL Server:** Pausing a CDC Sink task for an extended period (days) may cause SQL Server +to trim the CDC change tables, removing rows that have not yet been consumed. A short +pause is safe. If the task is paused long enough that trimming occurs, CDC Sink will +report an error on resume — the task may need to be recreated rather than simply resumed. + + +--- + +## Delete a Task + +Use `DeleteOngoingTaskOperation` to delete a CDC Sink task: + + + + +{`await store.Maintenance.SendAsync( + new DeleteOngoingTaskOperation(taskId, OngoingTaskType.CdcSink)); +`} + + + + + +Deleting the task in RavenDB does **not** drop the replication slot or publication +in the source database. These must be cleaned up manually by the database administrator. +See [Cleanup and Maintenance](./postgres/cleanup-and-maintenance.mdx). + + +--- + +## Related Articles + +### CDC Sink + +- [Configuration Reference](./configuration-reference.mdx) +- [Overview](./overview.mdx) + +### Client API + +- [Operations: How to Send Operations](../../../client-api/operations/how-to/send-multiple-operations) +- [Ongoing Task Operations](../../../client-api/operations/maintenance/ongoing-tasks/ongoing-task-operations) diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx new file mode 100644 index 0000000000..1f60880eab --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -0,0 +1,153 @@ +--- +title: "CDC Sink: Attachment Handling" +sidebar_label: Attachment Handling +description: "Explains how to store binary SQL columns as RavenDB attachments using AttachmentNameMapping on root and embedded table configurations." +sidebar_position: 9 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Attachment Handling + + + +* Binary SQL columns can be stored as RavenDB **attachments** instead of document + properties using `AttachmentNameMapping`. + +* This applies to both root tables and embedded tables. + +* In this page: + * [Root Table Attachments](#root-table-attachments) + * [Embedded Table Attachments](#embedded-table-attachments) + * [Attachment Naming](#attachment-naming) + * [Attachment Lifecycle](#attachment-lifecycle) + + + +--- + +## Root Table Attachments + +Use `AttachmentNameMapping` to map a binary SQL column to a RavenDB attachment: + + + + +{`new CdcSinkTableConfig +\{ + Name = "Files", + SourceTableName = "files", + PrimaryKeyColumns = ["id"], + ColumnsMapping = new Dictionary + \{ + ["id"] = "Id", + ["filename"] = "Filename", + ["mime_type"] = "MimeType" + \}, + AttachmentNameMapping = new Dictionary + \{ + ["content"] = "file" // SQL column "content" → attachment named "file" + \} +\} +`} + + + + +The binary `content` column is stored as an attachment named `"file"` on the document. +The attachment is stored with content type `application/octet-stream`. + +--- + +## Embedded Table Attachments + +Binary columns on embedded tables are stored as attachments on the **parent** document. +The attachment name is automatically prefixed to ensure uniqueness: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + JoinColumns = ["product_id"], + ColumnsMapping = new Dictionary + \{ + ["photo_num"] = "PhotoNum", + ["caption"] = "Caption" + \}, + AttachmentNameMapping = new Dictionary + \{ + ["thumbnail"] = "thumb" + \} +\} +`} + + + + +A photo with `photo_num = 1` creates an attachment named `"Photos/1/thumb"` on the +parent document. The prefix `"Photos/1/"` is generated from the `PropertyName` and +the primary key value. + +--- + +## Attachment Naming + +**Root table attachments:** + +The attachment name is exactly the value you specify in `AttachmentNameMapping`. + +``` +AttachmentNameMapping = { ["content"] = "file" } +→ Attachment name: "file" +``` + +**Embedded table attachments:** + +The attachment name is prefixed with `{PropertyName}/{pkValue}/`: + +``` +PropertyName = "Photos" +PrimaryKeyColumns = ["photo_num"] → photo_num = 1 +AttachmentNameMapping = { ["thumbnail"] = "thumb" } +→ Attachment name: "Photos/1/thumb" +``` + +For composite primary keys, all key values are joined: + +``` +PrimaryKeyColumns = ["date", "seq"] → date='2024-01', seq=3 +→ Attachment name: "Photos/2024-01/3/thumb" +``` + +--- + +## Attachment Lifecycle + +* **INSERT** — Attachment is created on the document +* **UPDATE** — Attachment is replaced with the new binary data +* **DELETE (embedded item)** — All attachments for that item are automatically removed + from the parent document +* **DELETE (root document)** — Document and all its attachments are deleted + +--- + +## Related Articles + +### CDC Sink + +- [Column Mapping](./column-mapping.mdx) +- [Embedded Tables](./embedded-tables.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### Document Extensions + +- [Attachments: What are Attachments](../../../document-extensions/attachments/what-are-attachments) diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx new file mode 100644 index 0000000000..4eaab62609 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -0,0 +1,192 @@ +--- +title: "CDC Sink: Column Mapping" +sidebar_label: Column Mapping +description: "Explains how to control which SQL columns appear in RavenDB documents and under what property names using ColumnsMapping and AttachmentNameMapping." +sidebar_position: 5 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Column Mapping + + + +* Column mapping controls which SQL columns appear in the RavenDB document and + under what property names. + +* In this page: + * [Mapping Columns to Properties](#mapping-columns-to-properties) + * [Unmapped Columns](#unmapped-columns) + * [Attachment Mapping](#attachment-mapping) + * [Schema (Source Table Schema)](#schema-source-table-schema) + + + +--- + +## Mapping Columns to Properties + +`ColumnsMapping` is a `Dictionary` where each entry maps a SQL column +name to a RavenDB document property name: + + + + +{`ColumnsMapping = new Dictionary +\{ + ["id"] = "Id", + ["customer_name"] = "CustomerName", + ["order_date"] = "OrderDate", + ["total_amount"] = "TotalAmount" +\} +`} + + + + +**Key:** SQL column name (case-insensitive match against the column names in CDC events) +**Value:** Property name in the RavenDB document + + +The primary key column(s) do not need to be mapped. When included in `ColumnsMapping`, +they become a regular document property. When omitted, the PK values are still used +to build the document ID — they just won't appear as a named property. + +Including the PK in the mapping is generally useful so the document carries its own +identifier, but it is not required. + + +**Type conversions:** SQL numeric, boolean, and date types are converted to their +JSON equivalents. SQL `NULL` becomes JSON `null`. If you need custom type handling +or derived values, use a `Patch` script. + +**At least one mapping is required.** An empty `ColumnsMapping` is a validation error. + +The same rules apply to embedded table column mappings. + +--- + +## Unmapped Columns + +Columns not listed in `ColumnsMapping` are **not stored** in the document, but they +are available in patch scripts via `$row`. + +This allows you to use data for computations without permanently storing raw SQL values: + + + + +{`ColumnsMapping = new Dictionary +\{ + ["id"] = "Id", + ["name"] = "Name" + // base_price and tax_rate are NOT mapped — won't appear in document +\}, +Patch = "this.FinalPrice = $row.base_price * (1 + $row.tax_rate);" +`} + + + + +In this example, `base_price` and `tax_rate` are available during the patch but +not stored as document properties. Only the computed `FinalPrice` is stored. + + +**Naming context:** +Property names (the values in `ColumnsMapping`) become properties on the RavenDB +document — accessible as `this.FinalPrice` inside a patch script. + +Column names (the keys in `ColumnsMapping`, plus any unmapped columns) are accessible +in patch scripts via `$row.base_price` (for the current row's values) and +`$old?.base_price` (for the previous row's values on UPDATE events). + + +--- + +## Attachment Mapping + +Binary SQL columns (e.g., PostgreSQL `BYTEA`) can be stored as RavenDB attachments +instead of document properties using `AttachmentNameMapping`: + + + + +{`new CdcSinkTableConfig +\{ + Name = "Files", + SourceTableName = "files", + PrimaryKeyColumns = ["id"], + ColumnsMapping = \{ ["id"] = "Id", ["filename"] = "Filename" \}, + AttachmentNameMapping = new Dictionary + \{ + ["content"] = "file" // SQL column "content" → attachment named "file" + \} +\} +`} + + + + +The binary column `content` becomes an attachment named `"file"` on the document. + +**Embedded table attachments:** + +Binary columns on embedded tables are also supported. The attachment name is prefixed +with the embedded property path and primary key to ensure uniqueness: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + AttachmentNameMapping = \{ ["thumbnail"] = "thumb" \} +\} +`} + + + + +A photo with `photo_num = 1` creates attachment `"Photos/1/thumb"` on the parent document. +When the embedded item is deleted, its attachments are automatically removed. + +--- + +## Schema (Source Table Schema) + +`SourceTableSchema` specifies the SQL schema containing the table. It defaults to +`"public"` for PostgreSQL if omitted. + + + + +{`new CdcSinkTableConfig +\{ + SourceTableSchema = "sales", // Table is in the "sales" schema + SourceTableName = "orders", + // ... +\} +`} + + + + +For most PostgreSQL setups using the default `public` schema, this can be omitted. + +--- + +## Related Articles + +### CDC Sink + +- [Schema Design](./schema-design.mdx) +- [Patching](./patching.mdx) +- [Attachment Handling](./attachment-handling.mdx) +- [Configuration Reference](./configuration-reference.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx new file mode 100644 index 0000000000..2ec550a6e2 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -0,0 +1,181 @@ +--- +title: "CDC Sink: Configuration Reference" +sidebar_label: Configuration Reference +description: "Documents all configuration classes used to define a CDC Sink task, including CdcSinkConfiguration, table configs, and relation types." +sidebar_position: 10 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Configuration Reference + + + +* This page documents all configuration classes used to define a CDC Sink task. + +* In this page: + * [CdcSinkConfiguration](#cdcsinkconfiguration) + * [CdcSinkPostgresSettings](#cdcsinkpostgressettings) + * [CdcSinkTableConfig](#cdcsinktableconfig) + * [CdcSinkEmbeddedTableConfig](#cdcsinkembeddedtableconfig) + * [CdcSinkLinkedTableConfig](#cdcsinklinkedtableconfig) + * [CdcSinkOnDeleteConfig](#cdcsinkondeleteconfig) + * [CdcSinkRelationType](#cdcsinkrelationtype) + + + +--- + +## CdcSinkConfiguration + +The top-level configuration object for a CDC Sink task. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `Name` | `string` | ✓ | Unique task name | +| `ConnectionStringName` | `string` | ✓ | Name of the SQL connection string | +| `Tables` | `List` | ✓ | Root table configurations (at least one required) | +| `Postgres` | `CdcSinkPostgresSettings` | | PostgreSQL-specific settings (slot and publication names) | +| `SkipInitialLoad` | `bool` | | When `true`, skip the initial full-table scan and start streaming CDC changes immediately. Only applies on first startup — has no effect once the initial load has completed. Default: `false` | +| `Disabled` | `bool` | | Pause the task without deleting it. Default: `false` | +| `MentorNode` | `string` | | Preferred cluster node for execution | +| `PinToMentorNode` | `bool` | | Pin the task to the mentor node. Default: `false` | +| `TaskId` | `long` | | Set by the server on creation | + +--- + +## CdcSinkPostgresSettings + +PostgreSQL-specific settings. Assigned to `CdcSinkConfiguration.Postgres`. +Leave `null` for non-PostgreSQL connections. + +| Property | Type | Description | +|----------|------|-------------| +| `SlotName` | `string` | Name of the PostgreSQL logical replication slot. If omitted on creation, a deterministic hash-based name is used. Immutable once set. Max 63 characters, alphanumeric and underscores only. | +| `PublicationName` | `string` | Name of the PostgreSQL publication. Same auto-fill and immutability rules as `SlotName`. | + +Setting these explicitly is useful when: +- A database administrator pre-creates the slot and publication with human-readable names +- Migrating from a previous CDC Sink task and reusing an existing slot +- Running multiple environments (dev/staging/prod) with predictable names + +See [Initial Setup](./postgres/initial-setup.mdx) for details. + + +For embedded tables where the join columns are not part of the primary key, the +PostgreSQL table must have `REPLICA IDENTITY` configured so that DELETE events include +the join column values. See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + + +--- + +## CdcSinkTableConfig + +Configures a root table — one SQL table mapped to one RavenDB collection. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `Name` | `string` | ✓ | RavenDB collection name (e.g., `"Orders"`) | +| `SourceTableName` | `string` | ✓ | SQL table name (e.g., `"orders"`) | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PrimaryKeyColumns` | `List` | ✓ | SQL columns used for document ID generation | +| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → document property | +| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Patch` | `string` | | JavaScript patch for INSERT and UPDATE | +| `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior. Default: delete document | +| `EmbeddedTables` | `List` | | Nested table configurations | +| `LinkedTables` | `List` | | Foreign key reference configurations | +| `Disabled` | `bool` | | Skip this table. Default: `false` | + +--- + +## CdcSinkEmbeddedTableConfig + +Configures a table whose rows are embedded as nested objects within a parent document. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `SourceTableName` | `string` | ✓ | SQL table name | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PropertyName` | `string` | ✓ | Property name in the parent document (e.g., `"Lines"`) | +| `Type` | `CdcSinkRelationType` | ✓ | `Array`, `Map`, or `Value` | +| `JoinColumns` | `List` | ✓ | FK columns referencing parent's `PrimaryKeyColumns` | +| `PrimaryKeyColumns` | `List` | ✓ | PK columns for matching items on UPDATE/DELETE | +| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → embedded property | +| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Patch` | `string` | | JavaScript patch on **parent** document for INSERT/UPDATE | +| `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior for embedded items | +| `CaseSensitiveKeys` | `bool` | | Case-sensitive PK matching. Default: `false` | +| `EmbeddedTables` | `List` | | Nested embedded tables | +| `Disabled` | `bool` | | Skip this table. Default: `false` | + +--- + +## CdcSinkLinkedTableConfig + +Configures a foreign key reference that becomes a document ID in the parent document. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `SourceTableName` | `string` | ✓ | SQL table name of the referenced table | +| `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | +| `PropertyName` | `string` | ✓ | Property name in the parent document (e.g., `"Customer"`) | +| `LinkedCollectionName` | `string` | ✓ | Target RavenDB collection for ID generation (e.g., `"Customers"`) | +| `Type` | `CdcSinkRelationType` | ✓ | `Value` (single reference) or `Array` (multiple references) | +| `JoinColumns` | `List` | ✓ | FK columns used to build the referenced document ID | + +--- + +## CdcSinkOnDeleteConfig + +Controls how DELETE events are handled for a table or embedded table. + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `Patch` | `string` | null | JavaScript patch that runs when a DELETE event arrives | +| `IgnoreDeletes` | `bool` | `false` | When `true`, skip the delete — document/item is kept | + +**Available patch variables for OnDelete:** + +* `this` — the document (root or parent for embedded) +* `$row` — all SQL columns from the DELETE event +* `$old` — the embedded item's last known state (for embedded tables) + +**Behavior matrix:** + +| IgnoreDeletes | Patch | Result | +|---------------|-------|--------| +| `false` | null | Normal delete (default) | +| `false` | set | Patch runs, then delete proceeds | +| `true` | null | DELETE discarded silently | +| `true` | set | Patch runs, delete skipped | + +--- + +## CdcSinkRelationType + +Specifies the structure of embedded or linked data in the document. + +| Value | Description | +|-------|-------------| +| `Array` | One-to-many: stored as a JSON array. Items matched by PK for UPDATE/DELETE | +| `Map` | One-to-many: stored as a JSON object keyed by PK value(s) | +| `Value` | Many-to-one: stored as a single embedded object or document reference | + +--- + +## Related Articles + +### CDC Sink + +- [Schema Design](./schema-design.mdx) +- [Embedded Tables](./embedded-tables.mdx) +- [Linked Tables](./linked-tables.mdx) +- [Patching](./patching.mdx) +- [Delete Strategies](./delete-strategies.mdx) +- [API Reference](./api-reference.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx b/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx new file mode 100644 index 0000000000..c8beb3d9a2 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/delete-strategies.mdx @@ -0,0 +1,290 @@ +--- +title: "CDC Sink: Delete Strategies" +sidebar_label: Delete Strategies +description: "Documents configurable behavior for DELETE events in CDC Sink, including archive, audit trail, and silent ignore patterns." +sidebar_position: 7 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Delete Strategies + + + +* CDC Sink provides configurable behavior for DELETE events through the + `CdcSinkOnDeleteConfig` object on both root and embedded table configurations. + +* By default, a DELETE event deletes the corresponding RavenDB document or removes + the embedded item. The `OnDelete` configuration changes this. + +* In this page: + * [Default Behavior](#default-behavior) + * [OnDelete Configuration](#ondelete-configuration) + * [Pattern: Archive](#pattern-archive) + * [Pattern: Audit Trail](#pattern-audit-trail) + * [Pattern: Silent Ignore](#pattern-silent-ignore) + * [OnDelete for Embedded Tables](#ondelete-for-embedded-tables) + * [Behavior Summary](#behavior-summary) + * [DELETE Routing and REPLICA IDENTITY](#delete-routing-and-replica-identity) + + + +--- + +## Default Behavior + +When `OnDelete` is `null` (or not set): + +* **Root table DELETE** — The corresponding RavenDB document is deleted +* **Embedded table DELETE (Array/Map)** — The item is removed from the parent document's array or map +* **Embedded table DELETE (Value)** — The `Value`-type property on the parent document is set to `null` + +--- + +## OnDelete Configuration + +`CdcSinkOnDeleteConfig` has two properties: + + + + +{`public class CdcSinkOnDeleteConfig +\{ + // JavaScript patch that runs when a DELETE event arrives + // Available variables: this, $row, $old + public string Patch \{ get; set; \} + + // When true, the delete is not applied — document/item is kept + public bool IgnoreDeletes \{ get; set; \} +\} +`} + + + + +* If `Patch` is set, it runs **before** the delete decision is made +* If `IgnoreDeletes = true`, the deletion is skipped after the patch runs +* If `IgnoreDeletes = false` (default), the deletion proceeds after the patch runs + + +When `IgnoreDeletes = true`, CDC Sink skips the delete but still **saves any changes +made to `this`** in the patch. This means a patch can modify the document (e.g., +set an `Archived` flag) and those changes are written to RavenDB even though the +document is not deleted. + + +--- + +## Pattern: Archive + +Keep the document in RavenDB but mark it as deleted. The patch runs to mark it, +and `IgnoreDeletes = true` prevents the actual deletion: + + + + +{`OnDelete = new CdcSinkOnDeleteConfig +\{ + IgnoreDeletes = true, + Patch = @" + this.Archived = true; + this.ArchivedAt = new Date().toISOString(); + " +\} +`} + + + + +The document remains in RavenDB with `Archived = true`. Queries can filter on this +field to exclude archived records. + +--- + +## Pattern: Audit Trail (Root Document) + +When a root document is deleted, write an audit record to a separate RavenDB +collection using `put()` before the deletion proceeds. + +The document is deleted after the patch runs, so any writes to `this` are lost. +Use `put()` to create a document that survives the deletion: + + + + +{`OnDelete = new CdcSinkOnDeleteConfig +\{ + // IgnoreDeletes defaults to false — delete proceeds after patch + Patch = @" + // Create a permanent audit record in a separate collection + put('DeletedOrders/' + this.Id, \{ + OriginalId: this.Id, + Customer: this.Customer, + Total: this.Total, + DeletedAt: new Date().toISOString() + \}); + " +\} +`} + + + + +The patch creates a document in `DeletedOrders/` before `this` is deleted. +The audit record persists permanently. + +--- + +## Pattern: Silent Ignore + +Discard DELETE events without running any patch. Use this for append-only data +where deletes should never result in document removal: + + + + +{`OnDelete = new CdcSinkOnDeleteConfig +\{ + IgnoreDeletes = true + // No patch — DELETE events are silently discarded +\} +`} + + + + +--- + +## OnDelete for Embedded Tables + +The same `OnDelete` configuration works on embedded tables. For embedded tables: + +* `Patch` runs on the **parent document** (not the embedded item) +* `$old` contains the embedded item's last known state before deletion +* `IgnoreDeletes = true` prevents the item from being removed from the array/map + +**Example: Keep deleted items in an audit array** + +Rather than removing a deleted line item from the array, move it to a separate +`DeletedLines` property: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "order_lines", + PropertyName = "Lines", + // ... + OnDelete = new CdcSinkOnDeleteConfig + \{ + IgnoreDeletes = true, + Patch = @" + // Remove from active Lines + this.Lines = (this.Lines || []) + .filter(l => l.LineId !== $old?.LineId); + + // Add to DeletedLines audit array + this.DeletedLines = this.DeletedLines || []; + this.DeletedLines.push(\{ + LineId: $old?.LineId, + Product: $old?.Product, + Quantity: $old?.Quantity, + DeletedAt: new Date().toISOString() + \}); + " + \} +\} +`} + + + + +With `IgnoreDeletes = true`, CDC Sink does not automatically remove the item — +the patch takes full control of both the `Lines` array and the `DeletedLines` audit trail. + +**Example: Run a patch but still remove the item** + +To run some logic on DELETE while still removing the item from the array, +use `IgnoreDeletes = false` (default) — CDC Sink handles the removal, and the patch +runs before it: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "order_lines", + PropertyName = "Lines", + // ... + OnDelete = new CdcSinkOnDeleteConfig + \{ + // IgnoreDeletes = false (default) — CDC Sink removes the item after patch + Patch = @" + // Log total for the line being removed + this.RemovedTotal = (this.RemovedTotal || 0) + + ($old?.UnitPrice || 0) * ($old?.Qty || 0); + " + \} +\} +`} + + + + +The `Lines` item is removed by CDC Sink. The patch only needs to handle the +side-effect logic. + +--- + +## Behavior Summary + +| IgnoreDeletes | Patch | Behavior | +|---------------|-------|----------| +| `false` | null | Normal delete (default) | +| `false` | set | Patch runs, then delete proceeds | +| `true` | null | DELETE event discarded silently | +| `true` | set | Patch runs, then delete is skipped | + +--- + +## DELETE Routing and REPLICA IDENTITY + +For CDC Sink to route a DELETE event to the correct document or embedded item, +the source database must include the necessary column values in the DELETE event. + +For embedded tables where the join column is not in the primary key, the source +database may need additional configuration. + + +The REPLICA IDENTITY requirement described below is **PostgreSQL-specific**. +Other databases may have different requirements or may not need any extra +configuration for DELETE routing. + + +See: + +* [REPLICA IDENTITY](./postgres/replica-identity.mdx) (PostgreSQL) + +**Skipping REPLICA IDENTITY requirements:** Set `OnDelete.IgnoreDeletes = true` +to discard DELETE events for an embedded table entirely. This skips the REPLICA +IDENTITY check, since delete routing is no longer needed. + +--- + +## Related Articles + +### CDC Sink + +- [Patching](./patching.mdx) +- [Embedded Tables](./embedded-tables.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### PostgreSQL + +- [REPLICA IDENTITY](./postgres/replica-identity.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx new file mode 100644 index 0000000000..813c702111 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -0,0 +1,295 @@ +--- +title: "CDC Sink: Embedded Tables" +sidebar_label: Embedded Tables +description: "Covers configuration options, nesting constraints, and how embedded items are updated and deleted in CDC Sink." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Embedded Tables + + + +* Embedded tables allow CDC Sink to nest SQL table data inside a parent document as + arrays, maps, or single objects. + +* This page covers configuration options, nesting constraints, and how embedded items + are updated and deleted. + +* In this page: + * [Basic Configuration](#basic-configuration) + * [Join Columns and Primary Key Interaction](#join-columns-and-primary-key-interaction) + * [Matching Items on Update and Delete](#matching-items-on-update-and-delete) + * [Deep Nesting](#deep-nesting) + * [Attachments on Embedded Items](#attachments-on-embedded-items) + * [Disabling an Embedded Table](#disabling-an-embedded-table) + + + +--- + +## Basic Configuration + +`CdcSinkEmbeddedTableConfig` is placed inside a root table's `EmbeddedTables` list: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableSchema = "public", // SQL schema (optional) + SourceTableName = "order_lines", // SQL table name + PropertyName = "Lines", // Property name in parent document + Type = CdcSinkRelationType.Array, // Array, Map, or Value + JoinColumns = ["order_id"], // FK to parent's PrimaryKeyColumns + PrimaryKeyColumns = ["line_id"], // Used to match items on UPDATE/DELETE + ColumnsMapping = new Dictionary + \{ + ["line_id"] = "LineId", + ["product"] = "Product", + ["quantity"] = "Quantity" + \} +\} +`} + + + + +--- + +## Join Columns and Primary Key Interaction + +### Purpose of JoinColumns + +`JoinColumns` specifies the foreign key columns in the embedded table that reference +the parent's primary key. CDC Sink uses these columns to route each row to the correct +parent document. + +The `JoinColumns` values must exactly match the parent's `PrimaryKeyColumns`: + + + + +{`// Parent root table: +PrimaryKeyColumns = ["order_id"] + +// Embedded table - correct: +JoinColumns = ["order_id"] // References parent's PK column + +// Embedded table - INCORRECT: +JoinColumns = ["customer_id"] // Does not reference the parent's PK +`} + + + + +### DELETE Events + +For DELETE events, the source database must include the join column values so CDC Sink +can route the delete to the correct parent document. + +By default, many databases only include primary key columns in DELETE events. If the +join column is _not_ in the embedded table's primary key, additional source database +configuration is required. + +**PostgreSQL:** See [REPLICA IDENTITY](./postgres/replica-identity.mdx) +for how CDC Sink handles this automatically when it has sufficient permissions, or how +a DBA can configure it manually. + +### Avoiding REPLICA IDENTITY Requirements + +The cleanest solution is to include the join column in the embedded table's primary key: + + + + +{`-- SQL schema: +CREATE TABLE order_lines ( + order_id INT NOT NULL REFERENCES orders(id), + line_id INT NOT NULL, + product VARCHAR(200), + PRIMARY KEY (order_id, line_id) -- order_id in PK means DELETE events include it +); +`} + + + + + + + +{`// Configuration: +PrimaryKeyColumns = ["order_id", "line_id"] +JoinColumns = ["order_id"] +`} + + + + +With `order_id` in the primary key, DELETE events include it by default and no additional +source database configuration is needed. + +Alternatively, set `OnDelete.IgnoreDeletes = true` to skip delete routing entirely +if deletes on that embedded table don't need to be processed. +See [Delete Strategies](./delete-strategies.mdx). + +--- + +## Matching Items on Update and Delete + +When an UPDATE or DELETE arrives for an embedded row, CDC Sink must find the correct +item within the parent document's array or map. + +Items are matched by their full `PrimaryKeyColumns` composite key: + +* **INSERT** — New item appended to array / added to map +* **UPDATE** — Item found by PK match; mapped columns overwritten +* **DELETE** — Item found by PK match; removed from array/map (or OnDelete behavior applied) + +**Composite PKs** work the same way — all PK columns must match: + + + + +{`PrimaryKeyColumns = ["invoice_date", "invoice_seq"] + +// UPDATE event for (invoice_date='2024-01-15', invoice_seq=3) +// → Finds and updates the item where both columns match +`} + + + + +**Case sensitivity:** By default, PK matching is case-insensitive. Set +`CaseSensitiveKeys = true` on `CdcSinkEmbeddedTableConfig` if your keys are +case-sensitive. + +--- + +## Deep Nesting + +Embedded tables can contain their own `EmbeddedTables`, creating hierarchies +with multiple levels. + +**Key constraint:** Every descendant table must carry the **root table's primary key** +as a denormalized column. This is required because CDC Sink routes each row to its +root document in a single pass. + +**Example: Company → Departments → Employees** + +The `employees` table must have `company_id` (the root PK) even though it only +directly joins to `departments`: + + + + +{`CREATE TABLE employees ( + company_id INT NOT NULL, -- Denormalized root PK + dept_id INT NOT NULL, + emp_id INT NOT NULL, + PRIMARY KEY (company_id, dept_id, emp_id) +); +`} + + + + + + + +{`// Configuration for the employees embedded table: +JoinColumns = ["company_id", "dept_id"] // Root PK + parent PK +PrimaryKeyColumns = ["emp_id"] +`} + + + + +**Why is the root PK required?** + +When a CDC Sink event arrives for an `employees` row, CDC Sink needs to: + +1. Find the root document: `Companies/\{company_id\}` +2. Navigate to the correct `Departments` array item: `Departments.find(d => d.dept_id == dept_id)` +3. Add or update the `Employees` array item: `Employees.find(e => e.emp_id == emp_id)` + +Without `company_id` in the event, CDC Sink cannot identify the root document +without an additional lookup, which is not supported. + +--- + +## Attachments on Embedded Items + +Binary columns from embedded tables can be stored as RavenDB attachments using +`AttachmentNameMapping`. + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "photos", + PropertyName = "Photos", + PrimaryKeyColumns = ["photo_num"], + JoinColumns = ["product_id"], + ColumnsMapping = \{ ["photo_num"] = "PhotoNum", ["caption"] = "Caption" \}, + AttachmentNameMapping = \{ ["thumbnail"] = "thumb" \} +\} +`} + + + + +A photo with `photo_num = 1` creates an attachment named `"Photos/1/thumb"` on the +parent document. The attachment name is prefixed with the embedded path and PK to +ensure uniqueness within the document. + +When the embedded item is deleted, its attachments are automatically removed. + +--- + +## Disabling an Embedded Table + +Set `Disabled = true` to pause processing for a specific embedded table without +removing it from the configuration: + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "audit_log", + PropertyName = "AuditLog", + Disabled = true, + // ... other config +\} +`} + + + + +Changes from the source table are ignored while `Disabled = true`. When re-enabled, +CDC Sink resumes from the current position — it does not backfill missed events. + +--- + +## Related Articles + +### CDC Sink + +- [Schema Design](./schema-design.mdx) +- [Linked Tables](./linked-tables.mdx) +- [Delete Strategies](./delete-strategies.mdx) +- [Attachment Handling](./attachment-handling.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### PostgreSQL + +- [REPLICA IDENTITY](./postgres/replica-identity.mdx) +- [REPLICA IDENTITY Manual Setup](./postgres/replica-identity-manual-setup.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx b/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx new file mode 100644 index 0000000000..5298f4d3a4 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/failover-and-consistency.mdx @@ -0,0 +1,172 @@ +--- +title: "CDC Sink: Failover and Consistency" +sidebar_label: Failover and Consistency +description: "Explains how CDC Sink handles node failover, what consistency guarantees it provides, and why patches must be designed to be idempotent." +sidebar_position: 13 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Failover and Consistency + + + +* This page explains how CDC Sink handles node failover, what consistency guarantees + it provides, and why patches must be designed to be idempotent. + +* In this page: + * [State Storage](#state-storage) + * [Failover Behavior](#failover-behavior) + * [Re-Reading Changes](#re-reading-changes) + * [Consistency Guarantee](#consistency-guarantee) + * [Idempotency Requirements](#idempotency-requirements) + + + +--- + +## State Storage + +CDC Sink persists its progress as a **document in RavenDB** alongside your data. + +This state document records: + +* The last acknowledged position in the source database's change log +* Per-table initial load progress (which tables have completed, and the last key scanned) + +Like any RavenDB document, this state document is subject to **normal replication behavior**. +Different nodes in a cluster may have different versions of this document at any given moment. + +--- + +## Failover Behavior + +When the cluster elects a new mentor node for a CDC Sink task, the new node reads +the **replicated state** — which may be older than the work the previous mentor +completed but had not yet replicated. + +**Example:** + + + + +{`Before failure: +├─ Node A (Mentor) — Processed up to position X, state document not yet replicated +├─ Node B (Replica) — State document shows position X-100 (older, replicated state) +└─ Node C (Replica) — State document shows position X-100 + +Node A crashes. +Node B is elected new mentor. +Node B reads its state document: position X-100. +Node B resumes CDC Sink from position X-100. +`} + + + + +Changes between positions X-100 and X may already be in RavenDB (written by Node A), +or they may not be (if Node A crashed before writing). Node B will re-read and re-apply +them either way. + +--- + +## Re-Reading Changes + +**CDC Sink guarantees that no changes are lost, but some changes may be re-read.** + +When the new mentor resumes from an older position: + +* Changes that were already processed and fully replicated — re-applied (idempotent merge) +* Changes that were processed but not yet replicated — applied for the "first time" on new node +* New changes not yet seen — applied normally + +Re-reading is normal and expected. The document merge strategy ensures that re-applying +an INSERT or UPDATE with the same values is safe — properties are simply overwritten +with the same values they already have. + + +Patches that are not idempotent can produce incorrect results when a change is +re-read after a failover. See [Idempotency Requirements](#idempotency-requirements) below. + + +--- + +## Consistency Guarantee + +CDC Sink provides **at-least-once delivery** with **eventual consistency**: + +* ✓ No changes are lost +* ✓ Changes are applied in source transaction order +* ✓ After failover, progress resumes from the last replicated state +* ✓ Idempotent patches ensure re-reads are safe +* ✗ No "exactly once" guarantee — the same change may be applied more than once + +This is the same model used by other ongoing tasks in RavenDB. + +--- + +## Idempotency Requirements + +A patch is **idempotent** if applying it multiple times produces the same result as +applying it once. + +**Column mapping is always idempotent** — overwriting a property with the same value +is a no-op. + +**Patches that use absolute values are idempotent:** + + + + +{`// Idempotent — always sets to the current SQL value +this.Status = $row.is_active ? 'Active' : 'Inactive'; +this.ViewCount = $row.view_count; +`} + + + + +**Patches that increment are NOT idempotent:** + + + + +{`// NOT idempotent — increments again on re-read +this.ViewCount = (this.ViewCount || 0) + 1; +`} + + + + +**Delta patches using $old are idempotent** because `$old` reflects the previous +embedded item state, not the document's accumulated value: + + + + +{`// Idempotent — delta is recomputed correctly on re-read +const oldAmount = $old?.Amount || 0; +const newAmount = $row.amount || 0; +this.RunningTotal = (this.RunningTotal || 0) + (newAmount - oldAmount); +`} + + + + +On re-read, `$old` still reflects the state before the update, and `$row` still +reflects the new state — so the delta is the same as on the first read. + +--- + +## Related Articles + +### CDC Sink + +- [How It Works](./how-it-works.mdx) +- [Patching](./patching.mdx) +- [Monitoring](./monitoring.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx new file mode 100644 index 0000000000..0fe36c9b28 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx @@ -0,0 +1,206 @@ +--- +title: "CDC Sink: How It Works" +sidebar_label: How It Works +description: "Describes the internal operation of CDC Sink — how it connects to the source database, loads initial data, streams changes, and handles failover." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: How It Works + + + +* This page describes the internal operation of CDC Sink — how it connects to the + source database, loads initial data, streams changes, and handles failover. + +* Understanding these mechanics is important when designing patches, planning for + failover, and setting up monitoring. + +* In this page: + * [Startup and Verification](#startup-and-verification) + * [Initial Load](#initial-load) + * [Change Streaming](#change-streaming) + * [Transaction Ordering](#transaction-ordering) + * [State Persistence and Failover](#state-persistence-and-failover) + * [Child Before Parent](#child-before-parent) + + + +--- + +## Startup and Verification + +When a CDC Sink task starts, it first verifies that the source database is properly +configured. For PostgreSQL, this includes checking: + +* WAL level is set to `logical` +* The connecting user has sufficient privileges +* REPLICA IDENTITY is configured correctly for embedded tables that need delete routing + +If any check fails, CDC Sink reports the exact issue and the SQL an administrator +needs to run to fix it. The task does not start until all checks pass. + +After verification, CDC Sink creates the necessary change-tracking infrastructure +in the source database, then begins the initial load. + +See the [PostgreSQL Prerequisites Checklist](./postgres/prerequisites-checklist.mdx) +for the full list of requirements. + +--- + +## Initial Load + +Before streaming live changes, CDC Sink performs a full scan of every configured table +(root, embedded, and linked) using keyset pagination ordered by primary key. + +**Progress tracking:** Initial load progress is persisted per-table as a document in +RavenDB. If the task is restarted, it resumes from the last processed key rather than +re-scanning the entire table. + +**Batch pipelining:** While one batch is being written to RavenDB, the next batch is +being read from the source database, keeping both systems busy. + +**Ordering:** Tables are scanned in dependency order. Root tables are loaded first, +then embedded tables. This minimises the number of stub documents created (see +[Child Before Parent](#child-before-parent) below). + + +**Change tracking is set up before the initial load begins — but CDC streaming does +not start until after the initial load completes.** The sequence is: + +1. Change-tracking infrastructure is created in the source database (e.g., a replication + slot and publication for PostgreSQL, or CDC capture for SQL Server) +2. The full initial table scan runs — changes made during this time are captured by the + source database but not yet consumed +3. Once the initial load is complete, CDC streaming starts from the position captured in step 1 + +This guarantees no changes are missed: anything that happened during the initial load +is retained by the source database and will be processed immediately after. + +For very large databases this means the source database must retain sufficient change +history for the duration of the initial load (e.g., enough WAL disk space on PostgreSQL, +or a long enough retention window on SQL Server CDC tables). Plan accordingly before +starting the initial load on large tables. + + +--- + +## Change Streaming + +After the initial load completes, CDC Sink begins consuming changes from the source +database's change log, starting from the position captured **before** the initial load began. + +Changes arrive grouped by source database transaction, preserving the exact order +of operations. A transaction is only applied to RavenDB after it is fully committed +in the source database — partial transactions are never written. + +**Document merging:** When an UPDATE arrives, CDC Sink merges the new column values +onto the existing RavenDB document. Properties that are not part of the column mapping +are preserved. This allows RavenDB-side annotations and computed fields to coexist with +CDC-managed properties. + +See [Property Retention](./property-retention.mdx) for details. + +--- + +## Transaction Ordering + +CDC Sink preserves the full order of operations within a source database transaction. +If a single transaction performs multiple operations on the same row, all operations +are applied in order. + +**Example:** A source transaction that does: + + + + +{`BEGIN; +INSERT INTO items (id, name) VALUES (1, 'Alpha'); +UPDATE items SET name = 'Beta' WHERE id = 1; +DELETE FROM items WHERE id = 1; +INSERT INTO items (id, name) VALUES (1, 'Gamma'); +UPDATE items SET name = 'Delta' WHERE id = 1; +COMMIT; +`} + + + + +CDC Sink applies all five operations in order. The final document state is `name = 'Delta'`. + +Multiple documents modified in the same transaction are also applied atomically within +a single RavenDB batch. + +--- + +## State Persistence and Failover + +### State Storage + +CDC Sink persists its progress as a **document in RavenDB**, alongside your data. +This document records: + +* The last acknowledged position in the source database's change log (LSN for PostgreSQL) +* Per-table initial load progress (which tables completed, and the last key scanned) + +Like any RavenDB document, this state document is subject to normal replication behavior. +Different nodes in a cluster may have different versions of this document at any point in time. + +### Failover Behavior + +When the cluster elects a new mentor node for the CDC Sink task, the new node reads +the **replicated** state document — which may be older than the work the previous +mentor had completed but not yet replicated. + +The new mentor resumes from that replicated state. This means: + +* **No data is lost** — CDC Sink resumes from a known position and the source database + retains all changes from that position onward +* **Some changes may be re-read** — Changes between the replicated state and the + previous mentor's actual progress will be processed again + +Re-reading is normal and expected. The document merge strategy means that re-applying +the same INSERT or UPDATE is safe — column values are simply overwritten with the same values. + + +Patches that are not idempotent can produce incorrect results if the same change is +re-read after a failover. Design patches to handle re-processing safely. +See [Patching](./patching.mdx) for guidance. + + +--- + +## Child Before Parent + +If an embedded row arrives before its parent row exists in RavenDB — which can happen +during initial load when tables are scanned in parallel, or due to relaxed foreign key +constraints in the source database — CDC Sink creates a **stub document** containing +only the embedded data. + +When the parent row arrives later, its columns are merged onto the stub document. +The final document contains both the parent fields and all embedded items that arrived earlier. + +This ensures no data is lost regardless of the order in which rows are processed. + +--- + +## Related Articles + +### CDC Sink + +- [Overview](./overview.mdx) +- [Schema Design](./schema-design.mdx) +- [Patching](./patching.mdx) +- [Property Retention](./property-retention.mdx) +- [Failover and Consistency](./failover-and-consistency.mdx) + +### PostgreSQL + +- [Prerequisites Checklist](./postgres/prerequisites-checklist.mdx) +- [Initial Setup](./postgres/initial-setup.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx new file mode 100644 index 0000000000..c90c521685 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -0,0 +1,178 @@ +--- +title: "CDC Sink: Linked Tables" +sidebar_label: Linked Tables +description: "Explains how linked tables create document ID references in parent documents by converting foreign key values into RavenDB document IDs." +sidebar_position: 4 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Linked Tables + + + +* A **linked table** creates a document ID reference in the parent document rather + than embedding data. A foreign key value becomes a RavenDB document ID pointing + to a related collection. + +* In this page: + * [Basic Configuration](#basic-configuration) + * [Composite Foreign Keys](#composite-foreign-keys) + * [Array References](#array-references) + * [Linked vs Embedded](#linked-vs-embedded) + + + +--- + +## Basic Configuration + +`CdcSinkLinkedTableConfig` is placed inside a root table's `LinkedTables` list: + + + + +{`new CdcSinkTableConfig +\{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = \{ ["id"] = "Id", ["customer_id"] = "CustomerId" \}, + LinkedTables = + [ + new CdcSinkLinkedTableConfig + \{ + SourceTableName = "customers", + PropertyName = "Customer", // Property name in document + LinkedCollectionName = "Customers", // Target collection for ID + Type = CdcSinkRelationType.Value, // Single reference + JoinColumns = ["customer_id"] // FK used to build the document ID + \} + ] +\} +`} + + + + +With `customer_id = 42`, the Orders document gets: + + + + +{`\{ + "Id": 1, + "CustomerId": 42, + "Customer": "Customers/42" +\} +`} + + + + +The `Customer` property is a RavenDB document ID. Load the referenced document using +[includes](../../../client-api/session/loading-entities#load-with-includes) to avoid +a second network call. + +--- + +## Composite Foreign Keys + +When the target table has a composite primary key, the linked reference includes +all parts of that key: + + + + +{`new CdcSinkLinkedTableConfig +\{ + SourceTableName = "customers", + PropertyName = "Customer", + LinkedCollectionName = "Customers", + Type = CdcSinkRelationType.Value, + JoinColumns = ["customer_region", "customer_id"] // Must match Customers PK order +\} +`} + + + + +With `customer_region = 'US'` and `customer_id = 42`, the document gets: + +``` +"Customer": "Customers/US/42" +``` + +--- + +## Array References + +Use `Type = CdcSinkRelationType.Array` for one-to-many references, where a parent +row has multiple foreign keys pointing to the same collection: + + + + +{`new CdcSinkLinkedTableConfig +\{ + SourceTableName = "tags", + PropertyName = "Tags", + LinkedCollectionName = "Tags", + Type = CdcSinkRelationType.Array, + JoinColumns = ["tag_id"] +\} +`} + + + + +This creates an array of document references: + + + + +{`\{ + "Tags": ["Tags/primary", "Tags/urgent", "Tags/follow-up"] +\} +`} + + + + +--- + +## Linked vs Embedded + +| Consideration | Embedded | Linked | +|--------------|----------|--------| +| Data stored | Full nested object/array inside document | Document ID reference only | +| Load pattern | Single document load | Load parent + include references | +| Document size | Grows with embedded items | Parent document stays small | +| Updates to referenced data | Reflected via CDC | Reflected via CDC on the referenced collection | +| Independence | Child has no meaning without parent | Referenced entity exists independently | +| Typical use | Orders own LineItems | Orders reference Customers | + +**Choose embedded** when the child data belongs to the parent and is always read +together with it. + +**Choose linked** when the referenced entity is independently meaningful and shared +across many parents, and you want to avoid duplicating data. + +--- + +## Related Articles + +### CDC Sink + +- [Schema Design](./schema-design.mdx) +- [Embedded Tables](./embedded-tables.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### Client API + +- [Load with Includes](../../../client-api/session/loading-entities#load-with-includes) diff --git a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx new file mode 100644 index 0000000000..cbacbf99f8 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx @@ -0,0 +1,113 @@ +--- +title: "CDC Sink: Monitoring" +sidebar_label: Monitoring +description: "Explains how to monitor a CDC Sink task — its running state, progress, fallback behavior, and statistics available through Management Studio and the API." +sidebar_position: 12 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Monitoring + + + +* This page explains how to monitor a CDC Sink task — its running state, progress, + fallback behavior, and statistics available through the Management Studio and API. + +* In this page: + * [Task State](#task-state) + * [Fallback Mode](#fallback-mode) + * [Statistics](#statistics) + * [Notifications](#notifications) + + + +--- + +## Task State + +A CDC Sink task can be in one of the following states: + +| State | Description | +|-------|-------------| +| `Active` | Running normally — streaming changes or waiting for new changes | +| `ActiveByAnotherNode` | Another cluster node is the mentor; this node is a replica | +| `Disabled` | Manually disabled via Studio or API | +| `Error` | The task encountered an error and stopped | +| `FallbackMode` | Connection to the source database was lost; retrying | +| `NotOnThisNode` | This node does not hold the task | + +The current state is visible in the **Ongoing Tasks** view in the Management Studio. + +--- + +## Fallback Mode + +When CDC Sink cannot reach the source database, it enters **fallback mode** rather +than failing immediately. + +In fallback mode: +* The task continues retrying the connection at regular intervals +* No changes are applied while the connection is down +* The task automatically resumes streaming once the source is reachable again + +The maximum time the task will remain in fallback mode before reporting an error +is controlled by the `CdcSink.MaxFallbackTimeInSec` configuration key. +See [Server Configuration](./server-configuration.mdx). + +--- + +## Statistics + +CDC Sink exposes runtime statistics through the `GetOngoingTaskInfoOperation`: + + + + +{`var taskInfo = await store.Maintenance.SendAsync( + new GetOngoingTaskInfoOperation(taskId, OngoingTaskType.CdcSink)); +`} + + + + +The returned object includes: + +| Field | Description | +|-------|-------------| +| `TaskState` | Current state of the task | +| `MentorNode` | Configured preferred node | +| `ResponsibleNode` | Node currently running the task | +| `Error` | Error message if the task is in error state | + +Detailed per-table statistics — including row counts and last processed position — +are available through the Management Studio's ongoing tasks detail view. + +--- + +## Notifications + +CDC Sink participates in RavenDB's standard alert system. If the task enters an +error state or fallback mode, an alert is raised and visible in: + +* The **Notification Center** in the Management Studio (bell icon) +* The cluster's alert log + +Alerts include the error message and which table or operation triggered the failure, +making it straightforward to diagnose the root cause. + +--- + +## Related Articles + +### CDC Sink + +- [How It Works](./how-it-works.mdx) +- [Failover and Consistency](./failover-and-consistency.mdx) +- [Server Configuration](./server-configuration.mdx) +- [Troubleshooting](./troubleshooting.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/overview.mdx new file mode 100644 index 0000000000..a8716827a5 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/overview.mdx @@ -0,0 +1,151 @@ +--- +title: "CDC Sink: Overview" +sidebar_label: Overview +description: "An introduction to CDC Sink, the RavenDB ongoing task that reads Change Data Capture streams from a relational database and writes documents into RavenDB." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Overview + + + +* **CDC Sink** is a RavenDB ongoing task that reads **Change Data Capture (CDC)** + streams from a relational database and writes the resulting documents into RavenDB. + +* CDC Sink is the reverse of ETL: instead of pushing data _from_ RavenDB _to_ SQL, + CDC Sink _pulls_ data _from_ SQL _into_ RavenDB. + The relational database is the source of truth; RavenDB receives a continuously-updated + document model derived from it. + +* CDC Sink maps normalized relational tables into rich, nested RavenDB documents — + automatically and in real time. + +* Supported source databases: + * **PostgreSQL** — via logical replication + * Additional source databases planned for future versions + +* In this page: + * [Why Use CDC Sink](#why-use-cdc-sink) + * [How It Works](#how-it-works) + * [Task Lifecycle](#task-lifecycle) + * [Licensing](#licensing) + + + +--- + +## Why Use CDC Sink + +CDC Sink solves the problem of moving data from a relational database into RavenDB +without requiring changes to the source system. + +* **Migrate from SQL to RavenDB** + Transform normalized SQL tables (orders, order_lines, customers) into rich RavenDB + documents where an Order contains embedded LineItems and a reference to the Customer — + automatically and continuously, without changing your SQL application. + +* **Build a read-optimized view** + Your transactional system uses a relational database, but your API layer needs + denormalized documents. CDC Sink creates and maintains those documents without + touching your existing application. + +* **Gradual migration** + Keep your SQL application running while RavenDB documents are built in the background. + Applications can start reading from RavenDB while writes still go to the relational database. + +* **Event-driven side effects** + Using JavaScript patches, every INSERT, UPDATE, or DELETE in the source database can + trigger custom logic in RavenDB — computing derived fields, maintaining running totals, + or writing custom transformations. + +--- + +## How It Works + +A CDC Sink task continuously reads changes from the source relational database and +applies them to RavenDB documents. + +### Initial Load + +When a CDC Sink task starts for the first time, it performs a full scan of every +configured table using keyset pagination. This populates RavenDB with the current +state of the data before streaming begins. + +Initial load progress is persisted per-table. If the task is restarted, it resumes +from where it left off rather than re-scanning. + +### Change Streaming + +After the initial load, CDC Sink switches to streaming changes in real time. +Changes are grouped into transactions, preserving the exact order of operations +from the source database. Partial transactions are never written to RavenDB — +all changes within a source database transaction are applied together. + +### Document Model + +The relational model is mapped to RavenDB documents through configuration: + +* **Root tables** map to RavenDB collections (one document per row) +* **Embedded tables** become nested arrays or objects within parent documents +* **Linked tables** become document ID references + +See [Schema Design](./schema-design.mdx) for details. + +--- + +## Task Lifecycle + +1. **Create** — Define the task in Studio or via the Client API + Specify the connection string, table mappings, and transformation options + +2. **Verify** — CDC Sink verifies the source database is properly configured + Checks permissions, replication prerequisites, and table configuration + +3. **Initial Load** — Full table scan populates RavenDB with current data + Progress is tracked per-table and persists across restarts + +4. **Stream** — Real-time change streaming begins + All INSERTs, UPDATEs, and DELETEs are applied to RavenDB documents as they occur + +5. **Monitor** — View statistics, errors, and progress in Studio + +6. **Retire** — Delete the task in RavenDB when no longer needed + PostgreSQL artifacts (replication slot, publication) must be cleaned up by + the database administrator separately + +--- + +## Licensing + + +CDC Sink is available on an **Enterprise** license. + + +Learn more about licensing [here](../../../start/licensing/licensing-overview). + +--- + +## Related Articles + +### CDC Sink + +- [How It Works](./how-it-works.mdx) +- [Schema Design](./schema-design.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### PostgreSQL + +- [Prerequisites Checklist](./postgres/prerequisites-checklist.mdx) +- [Initial Setup](./postgres/initial-setup.mdx) + +### Server + +- [ETL Basics](../etl/basics) +- [Queue Sink Overview](../queue-sink/overview) diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx new file mode 100644 index 0000000000..0a03290f52 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -0,0 +1,364 @@ +--- +title: "CDC Sink: Patching" +sidebar_label: Patching +description: "Explains how JavaScript patch scripts work in CDC Sink, covering available variables, patch scope, common scenarios, idempotency, and limitations." +sidebar_position: 6 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Patching + + + +* A **patch** is a JavaScript snippet that runs on a document after column mapping is applied. + Patches let you transform data, compute derived fields, maintain aggregates, load related + documents, and control document metadata. + +* Patches are defined per-table (root or embedded) and per-operation (INSERT/UPDATE + or DELETE via `OnDelete.Patch`). + +* In this page: + * [When Patches Run](#when-patches-run) + * [Available Variables](#available-variables) + * [Patch Scope: Root vs Embedded](#patch-scope-root-vs-embedded) + * [Scenario: Column Transformation](#scenario-column-transformation) + * [Scenario: Aggregation with Embedded Tables](#scenario-aggregation-with-embedded-tables) + * [Scenario: Delta Computations](#scenario-delta-computations) + * [Scenario: Computed Derived Fields](#scenario-computed-derived-fields) + * [Scenario: Document Metadata](#scenario-document-metadata) + * [Scenario: Loading Related Documents](#scenario-loading-related-documents) + * [Idempotency and Failover](#idempotency-and-failover) + * [Step Limit](#step-limit) + * [Capabilities and Limitations](#capabilities-and-limitations) + + + +--- + +## When Patches Run + +| Event | Patch property | Runs? | +|-------|---------------|-------| +| INSERT | `Patch` | ✓ After column mapping | +| UPDATE | `Patch` | ✓ After column mapping, on merged document | +| DELETE (root table) | `OnDelete.Patch` | ✓ Before document is deleted | +| DELETE (embedded table) | `OnDelete.Patch` | ✓ On parent document before item is removed | + +The `Patch` property on a table handles INSERT and UPDATE. +The `OnDelete.Patch` on `CdcSinkOnDeleteConfig` handles DELETE separately. + +--- + +## Available Variables + +| Variable | Available | Type | Description | +|----------|-----------|------|-------------| +| `this` | Always | object | The document being modified (root or parent for embedded) | +| `$row` | Always | object | All SQL columns from the CDC event (mapped and unmapped) | +| `$old` | UPDATE only | object \| null | Previous state of the item; null for INSERT | +| `get(id)` | Always | function | Load a RavenDB document by ID | + +--- + +## Patch Scope: Root vs Embedded + +**Root table patches** operate on the root document: + + + + +{`Patch = "this.Status = $row.is_active ? 'Active' : 'Inactive';" +`} + + + + +`this` is the Orders (or whatever root collection) document. + +**Embedded table patches** operate on the **parent** document, not the embedded item: + + + + +{`// Patch on order_lines embedded table +Patch = "this.TotalQuantity = (this.Lines || []).reduce((s, l) => s + l.Quantity, 0);" +`} + + + + +`this` is the parent Orders document. This lets you recompute parent-level aggregates +whenever an embedded item changes. + +--- + +## Scenario: Column Transformation + +Transform or combine SQL columns into RavenDB properties: + + + + +{`this.FullName = ($row.first_name + ' ' + $row.last_name).trim(); +this.Status = $row.is_active ? 'Active' : 'Inactive'; +this.CreatedAt = new Date($row.created_unix * 1000).toISOString(); +`} + + + + +--- + +## Scenario: Aggregation with Embedded Tables + +Recompute parent-level totals whenever an embedded item is added or updated. + + +If you use a `Patch` to maintain an aggregate, you **must** also provide an +`OnDelete.Patch` that reverses the aggregate when an item is deleted. +Without it, deletes will leave the aggregate in an incorrect state. + + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "order_lines", + PropertyName = "Lines", + PrimaryKeyColumns = ["line_id"], + JoinColumns = ["order_id"], + ColumnsMapping = \{ ["line_id"] = "LineId", ["quantity"] = "Quantity" \}, + + // Runs on INSERT and UPDATE — recomputes total from current Lines array + Patch = @" + this.TotalQuantity = (this.Lines || []) + .reduce((sum, line) => sum + (line.Quantity || 0), 0); + ", + + // REQUIRED: Runs on DELETE — recomputes after item is removed from array + OnDelete = new CdcSinkOnDeleteConfig + \{ + Patch = @" + this.TotalQuantity = (this.Lines || []) + .reduce((sum, line) => sum + (line.Quantity || 0), 0); + " + \} +\} +`} + + + + +The OnDelete patch runs after the item has already been removed from the array, +so re-summing `this.Lines` gives the correct post-deletion total. + +--- + +## Scenario: Delta Computations + +Use `$old` to compute a delta when an embedded item changes, keeping a running total +without recomputing from scratch. + + +Running total patches using `$old` must have a matching `OnDelete.Patch` that subtracts +the deleted item's value. Without it, deletes leave the running total incorrect. + + + + + +{`new CdcSinkEmbeddedTableConfig +\{ + SourceTableName = "invoice_lines", + PropertyName = "Lines", + PrimaryKeyColumns = ["line_id"], + JoinColumns = ["invoice_id"], + ColumnsMapping = \{ ["line_id"] = "LineId", ["amount"] = "Amount" \}, + + // INSERT: $old is null, so delta = new amount (0 → new) + // UPDATE: $old has previous Amount, delta = new - old + Patch = @" + const oldAmount = $old?.Amount || 0; + const newAmount = $row.amount || 0; + this.RunningTotal = (this.RunningTotal || 0) + (newAmount - oldAmount); + ", + + // REQUIRED: Subtract the deleted item's amount using $old + OnDelete = new CdcSinkOnDeleteConfig + \{ + Patch = @" + const deletedAmount = $old?.Amount || 0; + this.RunningTotal = (this.RunningTotal || 0) - deletedAmount; + " + \} +\} +`} + + + + +The `$old` variable in the OnDelete patch contains the item's last known state before deletion. + +--- + +## Scenario: Computed Derived Fields + +Compute fields from unmapped columns that you don't want to store directly: + + + + +{`ColumnsMapping = \{ ["id"] = "Id", ["name"] = "Name" \}, +// base_price and tax_rate are NOT in ColumnsMapping +Patch = @" + this.FinalPrice = $row.base_price * (1 + $row.tax_rate); + this.Discount = $row.is_vip ? $row.base_price * 0.1 : 0; +" +`} + + + + +--- + +## Scenario: Document Metadata + +Patches can set RavenDB document metadata, including expiration: + + + + +{`this['@metadata'] = this['@metadata'] || \{\}; + +if ($row.expires_at) \{ + this['@metadata']['@expires'] = new Date($row.expires_at).toISOString(); +\} + +this['@metadata']['SourceTable'] = 'orders'; +`} + + + + +--- + +## Scenario: Loading Related Documents + +Use `get()` to load a related RavenDB document and denormalize its data: + + + + +{`const customer = get('Customers/' + $row.customer_id); + +if (customer) \{ + this.CustomerName = customer.Name; + this.CustomerEmail = customer.Email; + this.CustomerTier = customer.Tier; +\} else \{ + // Document doesn't exist yet — race condition or not yet synced + this.CustomerName = null; + this.CustomerEmail = null; +\} +`} + + + + +`get()` returns `null` if the document does not exist or has not yet been created +by CDC Sink (race condition when multiple tables are loading in parallel). +Always check for null before accessing properties. + +**When to use `get()`:** + +* Denormalizing slowly-changing reference data (customer name, category, region) +* Capturing a snapshot of related data at insert time + +**Prefer linked tables** for simple foreign key references — they are cleaner and +do not have the null-handling complexity of `get()`. + +--- + +## Idempotency and Failover + +After a failover, CDC Sink resumes from the replicated state, which may be older +than the last work the previous mentor node completed. Some changes may be re-read +and re-applied. + +**Patches that are not idempotent can produce incorrect results when re-applied.** + + + + +{`// NOT idempotent — increments again on re-read +this.ViewCount = (this.ViewCount || 0) + 1; + +// Idempotent — absolute value from SQL source +this.ViewCount = $row.view_count; + +// Idempotent — delta via $old (both first-read and re-read produce same result) +const oldVal = $old?.Amount || 0; +const newVal = $row.amount || 0; +this.RunningTotal = (this.RunningTotal || 0) + (newVal - oldVal); +`} + + + + +Column mapping itself is idempotent — overwriting a property with the same value +is always safe. + +--- + +## Step Limit + +Patch scripts are bounded by a **step quota** rather than a time limit. Each operation +in the script (assignment, loop iteration, function call) consumes steps. + +The limit is controlled by the `Patching.MaxStepsForScript` configuration setting. +See [Patching Configuration](../../../server/configuration/patching-configuration). + +If a patch exceeds the limit, the CDC operation fails and is retried. + +Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nested loops. + +--- + +## Capabilities and Limitations + +**What patches can do:** + +* Access `this`, `$row`, `$old` +* Load related documents with `get()` +* Compute and transform property values +* Set and modify document metadata +* Conditional logic, loops, array methods +* Use built-in JavaScript: `Date`, `Math`, `JSON`, `Array` + +**What patches cannot do:** + +* Make HTTP calls or API requests +* Access the file system +* Load documents from other RavenDB databases + +--- + +## Related Articles + +### CDC Sink + +- [Delete Strategies](./delete-strategies.mdx) +- [Column Mapping](./column-mapping.mdx) +- [Property Retention](./property-retention.mdx) +- [Failover and Consistency](./failover-and-consistency.mdx) + +### Server + +- [Patching Configuration](../../../server/configuration/patching-configuration) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json b/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json new file mode 100644 index 0000000000..697a3e48e5 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/_category_.json @@ -0,0 +1 @@ +{"position": 16, "label": "PostgreSQL"} \ No newline at end of file diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx new file mode 100644 index 0000000000..c83a62e5c0 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx @@ -0,0 +1,227 @@ +--- +title: "CDC Sink for PostgreSQL: Cleanup and Maintenance" +sidebar_label: Cleanup and Maintenance +description: "How to identify and remove orphaned PostgreSQL replication slots and publications after a CDC Sink task is deleted, and how to avoid slot accumulation." +sidebar_position: 6 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Cleanup and Maintenance + + + +* When a CDC Sink task is deleted from RavenDB, the associated PostgreSQL objects + — the replication slot and publication — are **not** automatically removed. + +* These must be cleaned up manually by a database administrator. + +* In this page: + * [Why Manual Cleanup Is Required](#why-manual-cleanup-is-required) + * [Finding Orphaned Slots and Publications](#finding-orphaned-slots-and-publications) + * [Dropping a Replication Slot](#dropping-a-replication-slot) + * [Dropping a Publication](#dropping-a-publication) + * [Too Many Replication Slots](#too-many-replication-slots) + * [Configuration Changes That Rename Slots](#configuration-changes-that-rename-slots) + + + +--- + +## Why Manual Cleanup Is Required + +An active replication slot prevents PostgreSQL from discarding WAL segments that +have not yet been consumed. If a slot is not being consumed (because the CDC Sink +task was deleted), PostgreSQL will accumulate WAL on disk indefinitely. + +This can lead to: + +* Disk space exhaustion on the PostgreSQL server +* Degraded performance as old WAL segments pile up + +A database administrator must drop unused replication slots. + + +**Why RavenDB does not drop slots automatically:** + +There are valid reasons to keep a slot after a task is deleted. For example, you +may want to create a new CDC Sink task that resumes from the same position (by +reusing the existing slot), or you may want to review what changes are pending +before cleaning up. The CDC Sink user may also not have the permissions required +to drop replication slots, even if it had the permissions to create them. + +For these reasons, slot and publication lifecycle management is the responsibility +of the database administrator. + + +--- + +## Finding Orphaned Slots and Publications + +List all CDC Sink replication slots: + + + + +{`SELECT slot_name, active, confirmed_flush_lsn +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +`} + + + + +An `active = false` slot is not being consumed. Compare the list to your active CDC +Sink tasks in RavenDB — any slot whose corresponding task no longer exists is orphaned. + +List all CDC Sink publications: + + + + +{`SELECT p.pubname, c.relname AS table_name +FROM pg_publication p +JOIN pg_publication_rel pr ON pr.prpubid = p.oid +JOIN pg_class c ON c.oid = pr.prrelid +WHERE p.pubname LIKE 'rvn_cdc_p_%' +ORDER BY p.pubname, c.relname; +`} + + + + +--- + +## Dropping a Replication Slot + + + + +{`SELECT pg_drop_replication_slot('rvn_cdc_s_'); +`} + + + + + +You cannot drop an active replication slot (one with `active = true`). The CDC Sink +task must be stopped or deleted in RavenDB before the slot can be dropped. + + +--- + +## Dropping a Publication + + + + +{`DROP PUBLICATION IF EXISTS "rvn_cdc_p_"; +`} + + + + +Publications are not consumed like slots — they do not accumulate data or hold WAL +segments. However, they should still be dropped to keep the database clean. + +--- + +## Too Many Replication Slots + +PostgreSQL limits the total number of replication slots to `max_replication_slots`. +If you exceed this limit, no new CDC Sink tasks can start (they will fail with a +connection error). + +Check how many slots are in use: + + + + +{`SELECT count(*) FROM pg_replication_slots; +SHOW max_replication_slots; +`} + + + + +To resolve this: + +1. Identify inactive slots: + + + + +{`SELECT slot_name, active +FROM pg_replication_slots +WHERE active = false; +`} + + + + +2. Drop slots for tasks that no longer exist: + + + + +{`SELECT pg_drop_replication_slot('rvn_cdc_s_'); +`} + + + + +3. If needed, increase `max_replication_slots` in `postgresql.conf` and restart + PostgreSQL. + +--- + +## Configuration Changes That Rename Slots + +The replication slot name is derived from the task name, database name, and table +names. If you update a CDC Sink task in a way that changes any of these — such as +adding a table, removing a table, or renaming the task — the expected slot and +publication names change. + +What happens: + +* CDC Sink will look for a slot/publication with the new name +* If it has permissions, it will create them +* The old slot and publication are **not deleted** — they become orphaned + +After updating a task configuration that changes table membership: + +1. Let the task restart and create the new slot/publication +2. Identify the old slot (it will be inactive): + + + + +{`SELECT slot_name, active +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%' + AND active = false; +`} + + + + +3. Drop the old slot and publication + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Initial Setup](./initial-setup.mdx) +- [Monitoring PostgreSQL](./monitoring-postgres.mdx) +- [WAL Configuration](./wal-configuration.mdx) + +### CDC Sink + +- [API Reference](../api-reference.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json new file mode 100644 index 0000000000..4d070cfb37 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json @@ -0,0 +1 @@ +{"position": 9, "label": "Examples"} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx new file mode 100644 index 0000000000..6bcc58b0b4 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -0,0 +1,230 @@ +--- +title: "CDC Sink Example: Complex Nesting with Linked Tables" +sidebar_label: Complex Nesting with Linked Tables +description: "Multi-level embedded table structure combined with linked table references, representing a product catalog with variants, attributes, and a category reference." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink Example: Complex Nesting with Linked Tables + + + +* This example shows a multi-level embedded table structure combined with linked + table references, representing a product catalog with variants, attributes, and + a category reference. + +* In this page: + * [Source Schema](#source-schema) + * [REPLICA IDENTITY Setup](#replica-identity-setup) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + + + +## Source Schema + + + + +{`CREATE TABLE categories ( + category_id SERIAL PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE TABLE products ( + product_id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + category_id INT REFERENCES categories(category_id) +); + +CREATE TABLE product_variants ( + variant_id SERIAL PRIMARY KEY, + product_id INT NOT NULL REFERENCES products(product_id), + sku TEXT NOT NULL, + price NUMERIC(10,2) +); + +CREATE TABLE variant_attributes ( + attr_id SERIAL PRIMARY KEY, + product_id INT NOT NULL REFERENCES products(product_id), -- denormalized root PK (required for deep nesting) + variant_id INT NOT NULL REFERENCES product_variants(variant_id), + attr_name TEXT NOT NULL, + attr_value TEXT NOT NULL +); +`} + + + + +## REPLICA IDENTITY Setup + +Both `product_variants` and `variant_attributes` have surrogate PKs with join +columns that are not part of their primary keys. Rather than `REPLICA IDENTITY FULL` +(which includes all columns), we use targeted unique indexes covering just the join +and PK columns: + + + + +{`-- product_variants: join column is product_id, PK is variant_id +CREATE UNIQUE INDEX product_variants_replica_idx + ON product_variants (product_id, variant_id); +ALTER TABLE product_variants + REPLICA IDENTITY USING INDEX product_variants_replica_idx; + +-- variant_attributes: join columns are product_id + variant_id, PK is attr_id +CREATE UNIQUE INDEX variant_attributes_replica_idx + ON variant_attributes (product_id, variant_id, attr_id); +ALTER TABLE variant_attributes + REPLICA IDENTITY USING INDEX variant_attributes_replica_idx; +`} + + + + +See [REPLICA IDENTITY](../replica-identity.mdx) for more details. + +## Task Configuration + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "ProductCatalogSync", + ConnectionStringName = "MyPostgresConnection", + Tables = + [ + new CdcSinkTableConfig + \{ + Name = "Products", + SourceTableName = "products", + PrimaryKeyColumns = ["product_id"], + ColumnsMapping = new Dictionary + \{ + ["product_id"] = "ProductId", + ["name"] = "Name" + \}, + // Linked table: category_id FK → document ID in Categories collection + LinkedTables = + [ + new CdcSinkLinkedTableConfig + \{ + SourceTableName = "categories", + PropertyName = "Category", + LinkedCollectionName = "Categories", + Type = CdcSinkRelationType.Value, + JoinColumns = ["category_id"] + \} + ], + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "product_variants", + PropertyName = "Variants", + Type = CdcSinkRelationType.Array, + JoinColumns = ["product_id"], + PrimaryKeyColumns = ["variant_id"], + ColumnsMapping = new Dictionary + \{ + ["variant_id"] = "VariantId", + ["sku"] = "Sku", + ["price"] = "Price" + \}, + // Deep-nested: attributes within each variant + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "variant_attributes", + PropertyName = "Attributes", + Type = CdcSinkRelationType.Array, + // JoinColumns must include the ROOT PK for deep nesting + JoinColumns = ["product_id", "variant_id"], + PrimaryKeyColumns = ["attr_id"], + ColumnsMapping = new Dictionary + \{ + ["attr_id"] = "AttrId", + ["attr_name"] = "Name", + ["attr_value"] = "Value" + \} + \} + ] + \} + ] + \} + ] +\}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +`} + + + + +## Resulting Documents + + + + +{`\{ + "ProductId": 42, + "Name": "Hiking Boot", + "Category": "categories/3", + "Variants": [ + \{ + "VariantId": 101, + "Sku": "HB-BLK-10", + "Price": 89.99, + "Attributes": [ + \{ "AttrId": 1, "Name": "Color", "Value": "Black" \}, + \{ "AttrId": 2, "Name": "Size", "Value": "10" \} + ] + \}, + \{ + "VariantId": 102, + "Sku": "HB-BRN-11", + "Price": 89.99, + "Attributes": [ + \{ "AttrId": 3, "Name": "Color", "Value": "Brown" \}, + \{ "AttrId": 4, "Name": "Size", "Value": "11" \} + ] + \} + ], + "@metadata": \{ "@collection": "Products" \} +\} +`} + + + + + +The `Categories` collection is also synced by CDC Sink (it would be a separate +root table configuration). `"categories/3"` is a standard RavenDB document ID +that enables the use of RavenDB includes when querying products. + + +--- + +## Related Articles + +### CDC Sink Examples + +- [Simple Migration](./example-simple-migration.mdx) +- [Denormalization](./example-denormalization.mdx) +- [Event Sourcing](./example-event-sourcing.mdx) + +### CDC Sink + +- [Schema Design](../../schema-design.mdx) +- [Embedded Tables](../../embedded-tables.mdx) +- [Linked Tables](../../linked-tables.mdx) +- [REPLICA IDENTITY](../replica-identity.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx new file mode 100644 index 0000000000..9213a85115 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx @@ -0,0 +1,202 @@ +--- +title: "CDC Sink Example: Denormalization with Embedded Tables" +sidebar_label: Denormalization with Embedded Tables +description: "How to merge a normalized SQL schema (orders + order_lines) into denormalized RavenDB documents with embedded arrays using CDC Sink." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink Example: Denormalization with Embedded Tables + + + +* This example shows how to merge a normalized SQL schema (orders + order_lines) + into denormalized RavenDB documents with embedded arrays. + +* In this page: + * [Source Schema](#source-schema) + * [REPLICA IDENTITY Setup](#replica-identity-setup) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + * [What Happens on Change Events](#what-happens-on-change-events) + + + +## Source Schema + + + + +{`CREATE TABLE orders ( + order_id SERIAL PRIMARY KEY, + customer TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT now() +); + +CREATE TABLE order_lines ( + line_id SERIAL PRIMARY KEY, + order_id INT NOT NULL REFERENCES orders(order_id), + product TEXT NOT NULL, + qty INT NOT NULL, + unit_price NUMERIC(10,2) NOT NULL +); +`} + + + + +## REPLICA IDENTITY Setup + +`order_lines` has a surrogate PK (`line_id`). The join column `order_id` is not +part of the primary key. Without `REPLICA IDENTITY FULL`, DELETE events for +`order_lines` rows would not include `order_id`, and CDC Sink could not find +the parent document. + + + + +{`ALTER TABLE order_lines REPLICA IDENTITY FULL; +`} + + + + +See [REPLICA IDENTITY](../replica-identity.mdx). + +## Task Configuration + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + \{ + new CdcSinkTableConfig + \{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = new List \{ "order_id" \}, + ColumnsMapping = new Dictionary + \{ + ["order_id"] = "OrderId", + ["customer"] = "Customer", + ["status"] = "Status", + ["created_at"] = "CreatedAt" + \}, + EmbeddedTables = new List + \{ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "order_lines", + PropertyName = "Lines", + Type = CdcSinkRelationType.Array, + JoinColumns = new List \{ "order_id" \}, + PrimaryKeyColumns = new List \{ "line_id" \}, + ColumnsMapping = new Dictionary + \{ + ["line_id"] = "LineId", + ["product"] = "Product", + ["qty"] = "Qty", + ["unit_price"] = "UnitPrice" + \} + \} + \} + \} + \} +\}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +`} + + + + +## Resulting Documents + +SQL rows: + + + + +{`orders: order_id=1, customer='Acme Corp', status='pending' +order_lines: line_id=1, order_id=1, product='Widget A', qty=3, unit_price=9.99 +order_lines: line_id=2, order_id=1, product='Widget B', qty=1, unit_price=24.99 +`} + + + + +RavenDB document `orders/1`: + + + + +{`\{ + "OrderId": 1, + "Customer": "Acme Corp", + "Status": "pending", + "CreatedAt": "2024-06-01T09:00:00+00:00", + "Lines": [ + \{ + "LineId": 1, + "Product": "Widget A", + "Qty": 3, + "UnitPrice": 9.99 + \}, + \{ + "LineId": 2, + "Product": "Widget B", + "Qty": 1, + "UnitPrice": 24.99 + \} + ], + "@metadata": \{ "@collection": "Orders" \} +\} +`} + + + + +## What Happens on Change Events + +**INSERT into `order_lines`:** +A new item is appended to the `Lines` array. + +**UPDATE to `order_lines`:** +CDC Sink finds the item by `line_id` within the `Lines` array and updates its properties. + +**DELETE from `order_lines`:** +CDC Sink finds the item by `line_id` and removes it from the array. +(Requires `REPLICA IDENTITY FULL` as configured above.) + +**UPDATE to `orders`:** +Only the root document properties (`Status`, etc.) are updated. The `Lines` array +is not affected. + +**DELETE from `orders`:** +The entire `orders/1` document is deleted, including all embedded lines. + +--- + +## Related Articles + +### CDC Sink Examples + +- [Simple Migration](./example-simple-migration.mdx) +- [Complex Nesting](./example-complex-nesting.mdx) + +### CDC Sink + +- [Embedded Tables](../../embedded-tables.mdx) +- [REPLICA IDENTITY](../replica-identity.mdx) +- [Delete Strategies](../../delete-strategies.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx new file mode 100644 index 0000000000..df7ace832d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx @@ -0,0 +1,185 @@ +--- +title: "CDC Sink Example: Event Sourcing with Aggregation" +sidebar_label: Event Sourcing with Aggregation +description: "How to use CDC Sink patches to maintain a computed aggregate on a RavenDB document as individual event rows arrive from PostgreSQL." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink Example: Event Sourcing with Aggregation + + + +* This example shows how to use CDC Sink patches to maintain a computed aggregate + on a RavenDB document as individual event rows arrive from PostgreSQL. + +* In this page: + * [Source Schema](#source-schema) + * [Goal](#goal) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + * [Handling Deletes](#handling-deletes) + + + +## Source Schema + +An accounts table and a transactions table: + + + + +{`CREATE TABLE accounts ( + account_id SERIAL PRIMARY KEY, + owner TEXT NOT NULL, + currency TEXT NOT NULL DEFAULT 'USD' +); + +CREATE TABLE transactions ( + txn_id SERIAL PRIMARY KEY, + account_id INT NOT NULL REFERENCES accounts(account_id), + amount NUMERIC(12,2) NOT NULL, + type TEXT NOT NULL, -- 'credit' or 'debit' + created_at TIMESTAMPTZ DEFAULT now() +); +`} + + + + +## Goal + +Store each account as a RavenDB document with a `Balance` field that reflects the +running total of all transactions. Transaction rows are embedded as an array for +history, and `Balance` is maintained using patch logic. + +## Task Configuration + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "AccountsSync", + ConnectionStringName = "MyPostgresConnection", + Tables = + [ + new CdcSinkTableConfig + \{ + Name = "Accounts", + SourceTableName = "accounts", + PrimaryKeyColumns = ["account_id"], + ColumnsMapping = new Dictionary + \{ + ["account_id"] = "AccountId", + ["owner"] = "Owner", + ["currency"] = "Currency" + \}, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "transactions", + PropertyName = "Transactions", + Type = CdcSinkRelationType.Array, + JoinColumns = ["account_id"], + PrimaryKeyColumns = ["txn_id"], + ColumnsMapping = new Dictionary + \{ + ["txn_id"] = "TxnId", + ["amount"] = "Amount", + ["type"] = "Type", + ["created_at"] = "CreatedAt" + \}, + // Patch runs on the parent document for INSERT/UPDATE + Patch = """ + const oldAmount = $old?.Amount || 0; + const newAmount = $row.amount || 0; + const sign = $row.type === 'credit' ? 1 : -1; + const oldSign = $old?.Type === 'credit' ? 1 : -1; + this.Balance = (this.Balance || 0) + - (oldSign * oldAmount) + + (sign * newAmount); + """, + OnDelete = new CdcSinkOnDeleteConfig + \{ + Patch = """ + const deletedAmount = $old?.Amount || 0; + const sign = $old?.Type === 'credit' ? 1 : -1; + this.Balance = (this.Balance || 0) - (sign * deletedAmount); + """ + \} + \} + ] + \} + ] +\}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +`} + + + + +## Resulting Documents + +After three transactions (credit 100, debit 30, credit 50): + + + + +{`\{ + "AccountId": 1, + "Owner": "Alice", + "Currency": "USD", + "Balance": 120.00, + "Transactions": [ + \{ "TxnId": 1, "Amount": 100.00, "Type": "credit", "CreatedAt": "..." \}, + \{ "TxnId": 2, "Amount": 30.00, "Type": "debit", "CreatedAt": "..." \}, + \{ "TxnId": 3, "Amount": 50.00, "Type": "credit", "CreatedAt": "..." \} + ], + "@metadata": \{ "@collection": "Accounts" \} +\} +`} + + + + +## Handling Deletes + +The `OnDelete.Patch` reverses the contribution of the deleted transaction to +`Balance`. This uses `$old` (the embedded item's last known state) rather than +`$row`, because for a DELETE event the embedded item's mapped values are in `$old`. + +Without the `OnDelete.Patch`, deleting a transaction row from SQL would remove +it from the `Transactions` array but leave `Balance` stale. The delete patch +keeps `Balance` consistent. + + +The patch uses delta logic (`$old` → `$row`) for idempotency. If a change is +re-applied after a failover, `$old` still reflects the state before the original +update, so the delta produces the same result. +See [Failover and Consistency](../../failover-and-consistency.mdx). + + +--- + +## Related Articles + +### CDC Sink Examples + +- [Simple Migration](./example-simple-migration.mdx) +- [Denormalization](./example-denormalization.mdx) +- [Complex Nesting](./example-complex-nesting.mdx) + +### CDC Sink + +- [Patching](../../patching.mdx) +- [Delete Strategies](../../delete-strategies.mdx) +- [Failover and Consistency](../../failover-and-consistency.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx new file mode 100644 index 0000000000..aad066f6bd --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx @@ -0,0 +1,128 @@ +--- +title: "CDC Sink Example: Simple Table Migration" +sidebar_label: Simple Table Migration +description: "Minimal setup to replicate a single SQL table into a RavenDB collection using CDC Sink." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink Example: Simple Table Migration + + + +* This example shows the minimal setup to replicate a single SQL table into a + RavenDB collection. + +* In this page: + * [Source Schema](#source-schema) + * [Task Configuration](#task-configuration) + * [Resulting Documents](#resulting-documents) + + + +## Source Schema + +A simple customers table: + + + + +{`CREATE TABLE customers ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT now() +); +`} + + + + +## Task Configuration + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "CustomersSync", + ConnectionStringName = "MyPostgresConnection", + Tables = new List + \{ + new CdcSinkTableConfig + \{ + Name = "Customers", + SourceTableName = "customers", + PrimaryKeyColumns = new List \{ "id" \}, + ColumnsMapping = new Dictionary + \{ + ["id"] = "Id", + ["name"] = "Name", + ["email"] = "Email", + ["created_at"] = "CreatedAt" + \} + \} + \} +\}; + +await store.Maintenance.SendAsync(new AddCdcSinkOperation(config)); +`} + + + + +## Resulting Documents + +SQL row: + + + + +{`id=1, name='Alice', email='alice@example.com', created_at='2024-01-15 10:30:00+00' +`} + + + + +RavenDB document in collection `Customers` with ID `customers/1`: + + + + +{`\{ + "Id": 1, + "Name": "Alice", + "Email": "alice@example.com", + "CreatedAt": "2024-01-15T10:30:00+00:00", + "@metadata": \{ + "@collection": "Customers" + \} +\} +`} + + + + +Document IDs are generated as `{collection}/{pk}` — for example, `customers/1` +for a row with `id = 1`. + +--- + +## Related Articles + +### CDC Sink Examples + +- [Denormalization](./example-denormalization.mdx) +- [Event Sourcing](./example-event-sourcing.mdx) +- [Complex Nesting](./example-complex-nesting.mdx) + +### CDC Sink + +- [Schema Design](../../schema-design.mdx) +- [Column Mapping](../../column-mapping.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx new file mode 100644 index 0000000000..28e9872ddd --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx @@ -0,0 +1,217 @@ +--- +title: "CDC Sink for PostgreSQL: Initial Setup" +sidebar_label: Initial Setup +description: "How CDC Sink creates and verifies the replication slot and publication on startup, with options for automatic or manual setup and custom naming." +sidebar_position: 3 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Initial Setup + + + +* When CDC Sink starts, it verifies and (if it has the necessary permissions) creates + the **replication slot** and **publication** required for logical replication. + +* If the CDC Sink user does not have permission to create these objects, a database + administrator must create them manually before the task can start. + +* In this page: + * [Automatic Setup](#automatic-setup) + * [Manual Setup](#manual-setup) + * [Specifying Custom Slot and Publication Names](#specifying-custom-slot-and-publication-names) + * [Slot and Publication Naming (Auto-generated)](#slot-and-publication-naming-auto-generated) + * [Verifying Setup](#verifying-setup) + + + +--- + +## Automatic Setup + +If the CDC Sink user has the required permissions (see +[Permissions and Roles](./permissions-and-roles.mdx)), +CDC Sink will: + +1. Generate the replication slot and publication names (a GUID-based identifier) and store them with the task +2. Check whether they already exist +3. Create them if they do not exist +4. Begin the initial load + +No manual database administration is needed in this case. + +--- + +## Manual Setup + +If the CDC Sink user does not have permission to create replication slots or +publications, a database administrator must create them before the task is started. + +**Step 1: Determine the slot and publication names** + +The simplest approach is to specify the names explicitly in `CdcSinkPostgresSettings` +so both you and the database administrator know what names to use. +See [Specifying Custom Slot and Publication Names](#specifying-custom-slot-and-publication-names). + +If using auto-generated names (no `Postgres` settings), CDC Sink generates names +using the `rvn_cdc_s_` and `rvn_cdc_p_` prefixes followed by a GUID. +See [Slot and Publication Naming](#slot-and-publication-naming-auto-generated) +for the naming scheme. + +You can also find the names CDC Sink expects by creating the task (it will fail to start) +and reading the error message, which includes the expected names. + +**Step 2: Create the publication** + +Create a publication that includes all the tables CDC Sink will replicate: + + + + +{`CREATE PUBLICATION rvn_cdc_p_ +FOR TABLE orders, order_lines, customers; +`} + + + + +The publication must include all tables from the task's root and embedded table +configurations. + +**Step 3: Create the replication slot** + + + + +{`SELECT pg_create_logical_replication_slot( + 'rvn_cdc_s_', + 'pgoutput' +); +`} + + + + +**Step 4: Start the CDC Sink task** + +Once the slot and publication exist, CDC Sink will detect them on startup and +proceed with the initial load. + + +The slot and publication names are determined at task creation time and do not change +when you later modify the task. If you created these objects manually, the same names +will continue to be used regardless of subsequent configuration changes. + + +--- + +## Specifying Custom Slot and Publication Names + +You can explicitly specify the replication slot and publication names by setting +`CdcSinkPostgresSettings` on the task configuration: + + + + +{`var config = new CdcSinkConfiguration +\{ + Name = "OrdersSync", + ConnectionStringName = "MyPostgresConnection", + Postgres = new CdcSinkPostgresSettings + \{ + SlotName = "orders_sync_slot", + PublicationName = "orders_sync_pub" + \}, + Tables = [ ... ] +\}; +`} + + + + +Custom names are useful when: + +* A database administrator pre-creates the slot and publication with human-readable + names before starting the task +* You are migrating from a previous CDC Sink task and want to reuse an existing slot + to avoid re-reading history +* You need predictable names across environments (dev/staging/prod) + +**Immutability:** Once set, `SlotName` and `PublicationName` are fixed for the +lifetime of the task. If you need different names, delete the task and create a new one. + +--- + +## Slot and Publication Naming (Auto-generated) + +When `CdcSinkPostgresSettings` is not set (or `SlotName`/`PublicationName` are null), +CDC Sink generates unique names at task creation time using a GUID: + +* **Slot name**: `rvn_cdc_s_` +* **Publication name**: `rvn_cdc_p_` + +The GUID is generated once when the task is first created and stored with the task. + +**Example:** + + + + +{`rvn_cdc_s_a3f1b2c4d5e6f7a8b9c0d1e2f3a4b5c6 +rvn_cdc_p_a3f1b2c4d5e6f7a8b9c0d1e2f3a4b5c6 +`} + + + + +The slot and publication share the same identifier, making it easy to match them to +a specific task. + + +The slot and publication names are generated **once at task creation** and then stored +with the task. Subsequent modifications to the task (adding tables, changing mappings, +etc.) do not change the slot or publication names — the original names are always used. + + +--- + +## Verifying Setup + +To verify the slot and publication were created: + + + + +{`-- View CDC Sink replication slots +SELECT slot_name, plugin, slot_type, active +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; + +-- View CDC Sink publications +SELECT p.pubname, c.relname AS table_name +FROM pg_publication p +JOIN pg_publication_rel pr ON pr.prpubid = p.oid +JOIN pg_class c ON c.oid = pr.prrelid +WHERE p.pubname LIKE 'rvn_cdc_p_%' +ORDER BY p.pubname, c.relname; +`} + + + + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Prerequisites Checklist](./prerequisites-checklist.mdx) +- [Permissions and Roles](./permissions-and-roles.mdx) +- [REPLICA IDENTITY](./replica-identity.mdx) +- [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx new file mode 100644 index 0000000000..78390309b6 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/monitoring-postgres.mdx @@ -0,0 +1,120 @@ +--- +title: "CDC Sink for PostgreSQL: Monitoring PostgreSQL" +sidebar_label: Monitoring PostgreSQL +description: "PostgreSQL-side monitoring for CDC Sink, including replication slot health, replication lag, and WAL disk usage queries." +sidebar_position: 7 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Monitoring PostgreSQL + + + +* This page covers PostgreSQL-side monitoring for CDC Sink — replication slot health, + lag, and WAL usage. + +* For RavenDB-side monitoring, see [Monitoring](../monitoring.mdx). + +* In this page: + * [Replication Slot Health](#replication-slot-health) + * [Replication Lag](#replication-lag) + * [WAL Disk Usage](#wal-disk-usage) + + + +--- + +## Replication Slot Health + +Check that CDC Sink replication slots are active and being consumed: + + + + +{`SELECT slot_name, active, confirmed_flush_lsn +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +`} + + + + +| Column | Meaning | +|--------|---------| +| `active` | `true` if CDC Sink is connected and consuming; `false` if the connection is down | +| `confirmed_flush_lsn` | The LSN up to which changes have been confirmed as consumed | + +A slot with `active = false` means CDC Sink is not currently connected. This is +expected during failover or when the task is paused. If the slot remains inactive +for an extended period, investigate the task state in RavenDB Studio. + +--- + +## Replication Lag + +Replication lag measures how far behind the slot is relative to the current WAL position: + + + + +{`SELECT slot_name, + pg_current_wal_lsn() - confirmed_flush_lsn AS lag_bytes +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +`} + + + + +A consistently growing `lag_bytes` means CDC Sink is not keeping up with the rate +of changes in the source database. Consider: + +* Increasing `CdcSink.MaxBatchSize` to process more changes per batch +* Reducing load on the source database +* Checking the per-table processing statistics in the Management Studio for slow scripts — + complex patch scripts are a common cause of processing slowdowns + +--- + +## WAL Disk Usage + +PostgreSQL retains WAL segments until all replication slots have consumed them. +An inactive or slow slot can cause WAL to accumulate on disk. + +Check approximate WAL retained per slot: + + + + +{`SELECT slot_name, active, + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS retained_wal_bytes +FROM pg_replication_slots +WHERE slot_name LIKE 'rvn_cdc_s_%'; +`} + + + + +High `retained_wal_bytes` on an inactive slot indicates the slot is not being +consumed and is holding WAL. If the slot corresponds to a deleted or abandoned +CDC Sink task, drop it. +See [Cleanup and Maintenance](./cleanup-and-maintenance.mdx). + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) + +### CDC Sink + +- [Monitoring](../monitoring.mdx) +- [Failover and Consistency](../failover-and-consistency.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx new file mode 100644 index 0000000000..beb6b070d4 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/permissions-and-roles.mdx @@ -0,0 +1,149 @@ +--- +title: "CDC Sink for PostgreSQL: Permissions and Roles" +sidebar_label: Permissions and Roles +description: "PostgreSQL permissions required by the CDC Sink user, including minimum privileges, automatic setup permissions, and dedicated user creation examples." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Permissions and Roles + + + +* This page documents the PostgreSQL permissions required by the CDC Sink user, + and how to grant them. + +* In this page: + * [Minimum Permissions](#minimum-permissions) + * [Permissions for Automatic Setup](#permissions-for-automatic-setup) + * [Creating a Dedicated CDC User](#creating-a-dedicated-cdc-user) + + + +--- + +## Minimum Permissions + +At a minimum, the CDC Sink user needs: + +* **`REPLICATION`** attribute — allows the user to initiate logical replication +* **`SELECT`** on each table being replicated — required for the initial load phase + +Example: + + + + +{`-- Grant replication privilege +ALTER USER cdc_user REPLICATION; + +-- Grant SELECT on each table +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; +GRANT SELECT ON TABLE customers TO cdc_user; +`} + + + + +With only these permissions, a database administrator must manually create the +replication slot and publication before starting the CDC Sink task. +See [Initial Setup](./initial-setup.mdx). + +--- + +## Permissions for Automatic Setup + +If you want CDC Sink to create and manage the replication slot and publication +automatically, the user needs additional permissions: + +**Create/drop replication slots:** + +PostgreSQL 14+: + + + + +{`GRANT pg_replication_slot_admin TO cdc_user; +`} + + + + +PostgreSQL 10–13: Replication slot management requires `SUPERUSER`. + +**Create/drop publications:** + +The user must either own the tables being published, or have `SUPERUSER`. + +Alternatively, you can grant `CREATE` on the database: + + + + +{`GRANT CREATE ON DATABASE mydb TO cdc_user; +`} + + + + + +Granting `SUPERUSER` gives the user unrestricted access to the database server. +For production environments, prefer granting only the specific privileges listed +above rather than `SUPERUSER`. + + + +For added security in production, consider having your database administrator create +the replication slot and publication manually with the minimal permissions shown above, +rather than granting CDC Sink the ability to manage them automatically. The CDC Sink +user then only needs `REPLICATION` privilege and `SELECT` on the relevant tables. +See [Initial Setup](./initial-setup.mdx). + + +--- + +## Creating a Dedicated CDC User + +It is recommended to use a dedicated database user for CDC Sink rather than an +application or admin user. + +Example setup: + + + + +{`-- Create the user +CREATE USER cdc_user WITH PASSWORD 'secure_password' REPLICATION; + +-- Grant SELECT on tables to replicate +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; + +-- If using PostgreSQL 14+ and want automatic slot management: +GRANT pg_replication_slot_admin TO cdc_user; + +-- If user needs to create publications (requires table ownership or superuser): +-- Option A: Grant ownership of tables +ALTER TABLE orders OWNER TO cdc_user; +-- Option B: Create publications as a superuser before starting the task +`} + + + + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Prerequisites Checklist](./prerequisites-checklist.mdx) +- [Initial Setup](./initial-setup.mdx) +- [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx new file mode 100644 index 0000000000..ef223b2502 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx @@ -0,0 +1,157 @@ +--- +title: "CDC Sink for PostgreSQL: Prerequisites Checklist" +sidebar_label: Prerequisites Checklist +description: "Verify all requirements before creating a CDC Sink task for PostgreSQL, including WAL configuration, user permissions, table requirements, and network access." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Prerequisites Checklist + + + +* Before creating a CDC Sink task for PostgreSQL, verify that each of these requirements + is in place. + +* In this page: + * [Source Database Requirements](#source-database-requirements) + * [User Permissions](#user-permissions) + * [Table Requirements](#table-requirements) + * [Network Access](#network-access) + + + +--- + +## Source Database Requirements + +* **PostgreSQL version**: 10 or later (logical replication introduced in version 10) + +* **WAL level**: `wal_level` must be set to `logical` + + Verify with: + + + + +{`SHOW wal_level; +`} + + + + + If the result is not `logical`, see [WAL Configuration](./wal-configuration.mdx). + +* **`max_replication_slots`**: Must be at least 1 (one slot per CDC Sink task) + + Verify with: + + + + +{`SHOW max_replication_slots; +`} + + + + +* **`max_wal_senders`**: Must be at least 1 + + Verify with: + + + + +{`SHOW max_wal_senders; +`} + + + + +--- + +## User Permissions + +The database user in the connection string must have sufficient permissions. + +**Minimum required permissions:** + + + + +{`-- Replication privilege (required for logical replication) +ALTER USER cdc_user REPLICATION; + +-- SELECT on each table CDC Sink will read +GRANT SELECT ON TABLE orders TO cdc_user; +GRANT SELECT ON TABLE order_lines TO cdc_user; +`} + + + + +**Optional (allows CDC Sink to configure replication automatically):** + + + + +{`-- Create/drop replication slots +-- This requires SUPERUSER or membership in pg_replication_slot_admin (PG 14+) + +-- Create/drop publications +-- This requires ownership of the tables being published, or SUPERUSER +`} + + + + +If the CDC Sink user does not have permission to create publications and replication +slots, a database administrator must set them up manually. +See [Initial Setup](./initial-setup.mdx). + +Full details: [Permissions and Roles](./permissions-and-roles.mdx). + +--- + +## Table Requirements + +* **Primary key**: Each root table and each embedded table must have a primary key + (or a unique index used as a replica identity). + +* **REPLICA IDENTITY**: For embedded tables where the join columns are not part of + the primary key, the table must have `REPLICA IDENTITY FULL` or a replica identity + index that includes the join columns. Without this, DELETE events will not include + the old row values, and CDC Sink cannot identify which embedded item to remove. + + See [REPLICA IDENTITY](./replica-identity.mdx). + +* **Published columns**: All columns referenced in `PrimaryKeyColumns`, `JoinColumns`, + and `ColumnsMapping` must exist in the SQL table. + +--- + +## Network Access + +* The RavenDB server must be able to open a TCP connection to the PostgreSQL host + on the configured port (default: 5432). + +* The connection must remain open for the duration of the replication stream. + Ensure that firewalls, proxies, and load balancers do not terminate idle or + long-lived connections. + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [WAL Configuration](./wal-configuration.mdx) +- [Permissions and Roles](./permissions-and-roles.mdx) +- [Initial Setup](./initial-setup.mdx) +- [REPLICA IDENTITY](./replica-identity.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx new file mode 100644 index 0000000000..d1172cad60 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity-manual-setup.mdx @@ -0,0 +1,141 @@ +--- +title: "CDC Sink for PostgreSQL: Manual REPLICA IDENTITY Setup" +sidebar_label: Manual REPLICA IDENTITY Setup +description: "Step-by-step instructions for a database administrator to configure REPLICA IDENTITY manually when CDC Sink lacks permission to alter tables." +sidebar_position: 5 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Manual REPLICA IDENTITY Setup + + + +* When CDC Sink does not have permission to alter tables, a database administrator + must configure `REPLICA IDENTITY` manually before the task starts. + +* In this page: + * [When Manual Setup Is Required](#when-manual-setup-is-required) + * [Setting REPLICA IDENTITY FULL](#setting-replica-identity-full) + * [Using an Index Instead of FULL](#using-an-index-instead-of-full) + * [Verifying the Configuration](#verifying-the-configuration) + + + +--- + +## When Manual Setup Is Required + +CDC Sink attempts to set `REPLICA IDENTITY FULL` automatically on any embedded +table whose join columns are not part of the primary key. This requires: + +* The CDC Sink user owns the table, **or** +* The CDC Sink user has `SUPERUSER` + +If neither condition is met, CDC Sink will start but embedded table deletes may +not work correctly. A database administrator must configure `REPLICA IDENTITY` +manually. + +--- + +## Setting REPLICA IDENTITY FULL + +The simplest approach is to set `REPLICA IDENTITY FULL` on all embedded tables +that CDC Sink will replicate: + + + + +{`ALTER TABLE order_lines REPLICA IDENTITY FULL; +ALTER TABLE line_attributes REPLICA IDENTITY FULL; +`} + + + + +`REPLICA IDENTITY FULL` includes all column values in DELETE and UPDATE events. +This is the most compatible option but increases WAL volume for tables with many +or large columns. + +--- + +## Using an Index Instead of FULL + +If WAL size is a concern, you can use a specific unique index that covers both +the join columns and PK columns of the embedded table. + +**Step 1: Create a unique index covering the required columns** + + + + +{`-- For order_lines: join column is order_id, PK is line_id +CREATE UNIQUE INDEX order_lines_replica_idx + ON order_lines (order_id, line_id); +`} + + + + +**Step 2: Set REPLICA IDENTITY to use this index** + + + + +{`ALTER TABLE order_lines + REPLICA IDENTITY USING INDEX order_lines_replica_idx; +`} + + + + +This instructs PostgreSQL to include only those indexed columns in DELETE and +UPDATE events, rather than all columns. + + +The index must be `UNIQUE` and `NOT DEFERRABLE`. It cannot include expressions +or partial predicates. All columns in the index must be `NOT NULL`. + + +--- + +## Verifying the Configuration + +Confirm that the desired `REPLICA IDENTITY` mode is set: + + + + +{`SELECT c.relname, c.relreplident, + CASE c.relreplident + WHEN 'd' THEN 'DEFAULT' + WHEN 'f' THEN 'FULL' + WHEN 'i' THEN 'INDEX' + WHEN 'n' THEN 'NOTHING' + END AS mode +FROM pg_class c +JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'public' +ORDER BY c.relname; +`} + + + + +Tables configured with `FULL` or `INDEX` are ready for CDC Sink embedded table +delete processing. + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [REPLICA IDENTITY](./replica-identity.mdx) +- [Initial Setup](./initial-setup.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx new file mode 100644 index 0000000000..571d76a7fb --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/replica-identity.mdx @@ -0,0 +1,166 @@ +--- +title: "CDC Sink for PostgreSQL: REPLICA IDENTITY" +sidebar_label: REPLICA IDENTITY +description: "How PostgreSQL REPLICA IDENTITY affects CDC Sink's ability to process DELETE events for embedded tables, and when it must be configured." +sidebar_position: 4 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: REPLICA IDENTITY + + + +* **REPLICA IDENTITY** controls what column values PostgreSQL includes in DELETE + (and UPDATE) events in the logical replication stream. + +* CDC Sink requires old row values in DELETE events to identify which embedded + item to remove from a parent document. Without them, embedded table deletes + cannot be routed correctly. + +* In this page: + * [Why REPLICA IDENTITY Matters](#why-replica-identity-matters) + * [REPLICA IDENTITY Options](#replica-identity-options) + * [When Is REPLICA IDENTITY Required?](#when-is-replica-identity-required) + * [Checking Current Setting](#checking-current-setting) + * [Automatic vs Manual Configuration](#automatic-vs-manual-configuration) + + + +--- + +## Why REPLICA IDENTITY Matters + +When CDC Sink receives a DELETE event for an embedded table, it needs to: + +1. Find the parent document (using the join column values from the old row) +2. Find the matching item within the embedded array or map (using the PK column values) +3. Remove it + +If the DELETE event does not include old row values for the join columns and PK +columns, CDC Sink cannot perform steps 1 or 2. + +By default, PostgreSQL only includes the primary key columns in DELETE events. +If the embedded table's join columns are part of the primary key, this is sufficient. +If they are not — which is the common case when a table has a surrogate PK — the +DELETE event will be missing the columns needed to route it. + +--- + +## REPLICA IDENTITY Options + +PostgreSQL supports four REPLICA IDENTITY modes: + +| Mode | What's included in DELETE/UPDATE events | Notes | +|------|----------------------------------------|-------| +| `DEFAULT` | Primary key columns only | Default for tables with a PK | +| `FULL` | All columns | Works for any table structure; increases WAL size | +| `INDEX` | Columns covered by a specific unique index | More targeted than FULL | +| `NOTHING` | No old values | Insufficient for CDC Sink embedded table deletes | + +For CDC Sink, `FULL` is the simplest and most compatible choice. +`INDEX` works if the index covers both the join columns and PK columns. + +--- + +## When Is REPLICA IDENTITY Required? + +| Scenario | REPLICA IDENTITY needed? | +|----------|------------------------| +| Root table deletes | No — root documents are deleted by document ID | +| Embedded table deletes where join columns ARE part of the PK | No — `DEFAULT` is sufficient | +| Embedded table deletes where join columns are NOT part of the PK | **Yes** — `DEFAULT` is insufficient | +| Deep-nested embedded tables | Depends on the join column placement | + +**Example where REPLICA IDENTITY is needed:** + + + + +{`-- order_lines has a surrogate PK (line_id), with order_id as foreign key +-- order_id is NOT part of the primary key +CREATE TABLE order_lines ( + line_id SERIAL PRIMARY KEY, -- PK + order_id INT NOT NULL, -- FK to orders (not part of PK) + product TEXT, + qty INT +); + +-- Without REPLICA IDENTITY FULL, a DELETE event only contains line_id. +-- CDC Sink cannot determine which parent document (order_id) to update. +ALTER TABLE order_lines REPLICA IDENTITY FULL; +`} + + + + +**Example where REPLICA IDENTITY is NOT needed:** + + + + +{`-- If order_id is part of a composite PK, DEFAULT is sufficient +CREATE TABLE order_lines ( + order_id INT NOT NULL, + line_num INT NOT NULL, + product TEXT, + qty INT, + PRIMARY KEY (order_id, line_num) +); +`} + + + + +--- + +## Checking Current Setting + + + + +{`SELECT c.relname, c.relreplident +FROM pg_class c +JOIN pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'public' +ORDER BY c.relname; +`} + + + + +The `relreplident` column: + +| Value | Meaning | +|-------|---------| +| `d` | DEFAULT (primary key only) | +| `f` | FULL (all columns) | +| `i` | INDEX (specific unique index) | +| `n` | NOTHING | + +--- + +## Automatic vs Manual Configuration + +If the CDC Sink user has sufficient permissions (table ownership or `SUPERUSER`), +CDC Sink will automatically set `REPLICA IDENTITY FULL` on embedded tables that +require it. + +If the CDC Sink user does not have permission to alter tables, a database +administrator must configure `REPLICA IDENTITY` before starting the task. +See [Manual Setup](./replica-identity-manual-setup.mdx). + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Manual REPLICA IDENTITY Setup](./replica-identity-manual-setup.mdx) +- [Initial Setup](./initial-setup.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx new file mode 100644 index 0000000000..0cedc92e5f --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/studio-ui.mdx @@ -0,0 +1,96 @@ +--- +title: "CDC Sink for PostgreSQL: Studio UI" +sidebar_label: Studio UI +description: "How to create, monitor, and edit CDC Sink tasks for PostgreSQL through the RavenDB Management Studio Ongoing Tasks interface." +sidebar_position: 8 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Studio UI + + + +* CDC Sink tasks are created and managed through the **Ongoing Tasks** section of the + Management Studio. + +* In this page: + * [Creating a CDC Sink Task](#creating-a-cdc-sink-task) + * [Monitoring Task State](#monitoring-task-state) + * [Editing a Task](#editing-a-task) + + + +--- + +## Creating a CDC Sink Task + +To create a CDC Sink task in the Studio: + +1. Navigate to **Databases** → your database → **Ongoing Tasks** +2. Click **Add Task** and select **CDC Sink** +3. Configure the connection string pointing to the PostgreSQL source +4. Add one or more root tables with their column mappings +5. (Optional) Add embedded tables and linked tables for each root table +6. (Optional) Configure patches for INSERT/UPDATE/DELETE events +7. Click **Save** + +--- + +## Monitoring Task State + +The task list shows: + +* **Task name** and **connection string name** +* **State** — Active, Disabled, Error, or FallbackMode +* **Responsible node** — the cluster node currently running the task +* **Progress** — during initial load, shows which tables have been scanned + +Clicking on a task opens the detail view with per-table statistics. + +--- + +## Editing a Task + +To edit a task, click its name in the Ongoing Tasks list. The same configuration +form used for creation opens in edit mode. + + +The replication slot and publication names are **fixed at task creation** and do not +change when you edit the task — regardless of whether names were auto-generated or +specified explicitly. + +When you add or remove tables, CDC Sink continues using the same slot and publication +names. The **publication** in PostgreSQL controls which tables are replicated and must +be kept in sync with the task configuration: + +* **Adding a table:** If CDC Sink has the necessary permissions, it will automatically + run `ALTER PUBLICATION ADD TABLE ;` when the task is saved. If it does + not have permissions, a database administrator must run this manually. +* **Removing a table:** `ALTER PUBLICATION DROP TABLE
;` must be run + manually by a database administrator — CDC Sink does not remove tables from the + publication automatically. + +Orphaned slots and publications must be dropped manually by the database administrator. +See [Cleanup and Maintenance](./cleanup-and-maintenance.mdx). + + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Initial Setup](./initial-setup.mdx) +- [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) + +### CDC Sink + +- [Monitoring](../monitoring.mdx) +- [API Reference](../api-reference.mdx) +- [Configuration Reference](../configuration-reference.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx new file mode 100644 index 0000000000..e4263bfdf8 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/wal-configuration.mdx @@ -0,0 +1,128 @@ +--- +title: "CDC Sink for PostgreSQL: WAL Configuration" +sidebar_label: WAL Configuration +description: "How to verify and configure the PostgreSQL Write-Ahead Log level to enable logical replication required by CDC Sink." +sidebar_position: 1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: WAL Configuration + + + +* CDC Sink uses PostgreSQL **logical replication**, which requires the Write-Ahead Log + (WAL) to be configured at the `logical` level. + +* This page explains how to verify the current setting and change it if needed. + +* In this page: + * [Check Current WAL Level](#check-current-wal-level) + * [Enable Logical Replication](#enable-logical-replication) + * [Other Required Settings](#other-required-settings) + + + +--- + +## Check Current WAL Level + +Connect to your PostgreSQL instance and run: + + + + +{`SHOW wal_level; +`} + + + + +If the result is `logical`, no changes are needed. + +If the result is `replica` or `minimal`, logical replication is not enabled and +must be configured before CDC Sink can run. + +--- + +## Enable Logical Replication + +Edit `postgresql.conf` and set the following: + + + + +{`wal_level = logical +`} + + + + +This change requires a **PostgreSQL restart**. + +After restarting, verify the change took effect: + + + + +{`SHOW wal_level; +-- Should return: logical +`} + + + + + +Changing `wal_level` requires a full server restart, not just a configuration reload. +Plan for a brief maintenance window. + + +--- + +## Other Required Settings + +CDC Sink uses one replication slot per task. Ensure the following settings are +sufficient for the number of CDC Sink tasks you plan to run: + + + + +{`max_replication_slots = 10 -- at least 1 per CDC Sink task +max_wal_senders = 10 -- at least 1 per active replication connection +`} + + + + +The defaults in a standard PostgreSQL installation are typically sufficient for +a small number of tasks, but you may need to increase them if you have many +concurrent CDC Sink tasks or other replication consumers. + +Check current values: + + + + +{`SHOW max_replication_slots; +SHOW max_wal_senders; +`} + + + + +These settings also require a **server restart** if changed. + +--- + +## Related Articles + +### CDC Sink for PostgreSQL + +- [Prerequisites Checklist](./prerequisites-checklist.mdx) +- [Permissions and Roles](./permissions-and-roles.mdx) +- [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx new file mode 100644 index 0000000000..e150618ccf --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx @@ -0,0 +1,155 @@ +--- +title: "CDC Sink: Property Retention" +sidebar_label: Property Retention +description: "Explains how CDC Sink merges UPDATE events onto existing RavenDB documents, preserving properties that are not part of the column mapping." +sidebar_position: 8 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Property Retention + + + +* When CDC Sink applies an UPDATE to an existing RavenDB document, it **merges** + the new values onto the existing document rather than replacing it entirely. + +* Properties that are not part of the column mapping are preserved across updates. + This allows RavenDB-side data to coexist safely with CDC-managed properties. + +* In this page: + * [How Merging Works](#how-merging-works) + * [What Is and Isn't Preserved](#what-is-and-isnt-preserved) + * [Editing Documents Directly in RavenDB](#editing-documents-directly-in-ravendb) + * [Implications for Patches](#implications-for-patches) + + + +--- + +## How Merging Works + +When a CDC UPDATE arrives for a document that already exists: + +1. The existing document is loaded +2. Mapped column values from the CDC event overwrite the corresponding properties +3. All other properties on the document are left unchanged +4. The merged document is written back + +**Example:** + +Initial SQL row → initial document: + + + + +{`\{ + "Id": 1, + "Name": "Alice", + "Email": "alice@example.com", + "InternalNotes": "VIP customer", + "@metadata": \{ "@collection": "Customers" \} +\} +`} + + + + +SQL UPDATE: `UPDATE customers SET email = 'alice.new@example.com' WHERE id = 1` + +Document after CDC UPDATE: + + + + +{`\{ + "Id": 1, + "Name": "Alice", + "Email": "alice.new@example.com", + "InternalNotes": "VIP customer", + "@metadata": \{ "@collection": "Customers" \} +\} +`} + + + + +`Email` is updated from SQL, while `InternalNotes` (not in `ColumnsMapping`) is preserved. + +--- + +## What Is and Isn't Preserved + +**Preserved across CDC updates:** + +* Properties not listed in `ColumnsMapping` +* Properties set in RavenDB directly (annotations, computed values, flags) +* Document metadata (unless the patch explicitly modifies it) + +**Overwritten on CDC update:** + +* Any property mapped via `ColumnsMapping` — always updated to match the current SQL value + +If you manually edit a property that is part of `ColumnsMapping`, the next CDC UPDATE +for that row will overwrite your edit with the SQL value. + +--- + +## Editing Documents Directly in RavenDB + +You can safely add properties to CDC-managed documents: + + + + +{`\{ + "Id": 1, + "Name": "Alice", + "Email": "alice@ex.com", + "InternalNotes": "...", + "ReviewedAt": "...", + "Tags": ["vip"] +\} +`} + + + + +Properties managed by CDC (those in `ColumnsMapping`) will be overwritten on +the next UPDATE from the source database. Do not rely on manual edits to mapped +properties surviving future CDC updates. + +**CDC Sink does not detect or protect manual edits to mapped properties.** +If you need to preserve a value that comes from SQL, consider adding a separate +RavenDB-only property for your annotation and leaving the SQL-mapped property as-is. + +--- + +## Implications for Patches + +Patches run after column mapping and can set additional properties that are not +from `ColumnsMapping`. These patch-computed properties follow the same merge rules: + +* If a patch sets `this.ComputedField = ...`, that value persists across future events + where the patch doesn't explicitly change it +* If a patch sets a property that is also in `ColumnsMapping`, the column mapping + value takes precedence (mapping is applied before patching) + +For aggregates maintained via patches (e.g., `RunningTotal`), the patch itself +is responsible for keeping the value correct across INSERT, UPDATE, and DELETE events. +See [Patching](./patching.mdx). + +--- + +## Related Articles + +### CDC Sink + +- [Column Mapping](./column-mapping.mdx) +- [Patching](./patching.mdx) +- [How It Works](./how-it-works.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx new file mode 100644 index 0000000000..e874dbeeeb --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -0,0 +1,328 @@ +--- +title: "CDC Sink: Schema Design" +sidebar_label: Schema Design +description: "Explains how CDC Sink maps a relational schema to a RavenDB document model using root tables, embedded tables, and linked tables." +sidebar_position: 2 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Schema Design + + + +* CDC Sink maps a relational schema to a RavenDB document model through configuration. + This page explains the three building blocks — root tables, embedded tables, and + linked tables — and how to combine them. + +* In this page: + * [Root Tables](#root-tables) + * [Embedded Tables](#embedded-tables) + * [Linked Tables](#linked-tables) + * [Primary Key and Join Column Requirements](#primary-key-and-join-column-requirements) + * [Multi-Level Nesting](#multi-level-nesting) + * [Relation Types](#relation-types) + * [Choosing Between Embedded and Linked](#choosing-between-embedded-and-linked) + + + +--- + +## Root Tables + +A **root table** maps a SQL table to a RavenDB collection. Each row in the SQL table +becomes one document. + + + + +{`new CdcSinkTableConfig +\{ + Name = "Orders", // RavenDB collection name + SourceTableSchema = "public", // SQL schema (optional, default: "public") + SourceTableName = "orders", // SQL table name + PrimaryKeyColumns = ["id"], // Used for document ID generation + ColumnsMapping = new Dictionary + \{ + ["id"] = "Id", + ["customer_name"] = "CustomerName", + ["total"] = "Total" + \} +\} +`} + + + + +**Document ID generation:** `\{CollectionName\}/\{pk1\}/\{pk2\}/...` +A row with `id = 42` and collection `Orders` becomes document `Orders/42`. +A composite PK `(region, id)` with values `(US, 42)` becomes `Orders/US/42`. + +**Column mapping:** Only mapped columns appear in the document. Unmapped columns are +still available in patch scripts via `$row` but are not stored in the document. + +--- + +## Embedded Tables + +An **embedded table** creates nested data within a parent document. For example, a +SQL `order_lines` table becomes an array inside each `Orders` document. + + + + +{`new CdcSinkTableConfig +\{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = \{ ["id"] = "Id", ["customer_name"] = "CustomerName" \}, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "order_lines", + PropertyName = "Lines", // Property in parent document + Type = CdcSinkRelationType.Array, // Array of items + JoinColumns = ["order_id"], // FK referencing parent's PK + PrimaryKeyColumns = ["line_id"], // Used to match items on update/delete + ColumnsMapping = + \{ + ["line_id"] = "LineId", + ["product"] = "Product", + ["quantity"] = "Quantity" + \} + \} + ] +\} +`} + + + + +This produces documents like: + + + + +{`\{ + "Id": 1, + "CustomerName": "Alice", + "Lines": [ + \{ "LineId": 1, "Product": "Apples", "Quantity": 5 \}, + \{ "LineId": 2, "Product": "Bananas", "Quantity": 3 \} + ], + "@metadata": \{ "@collection": "Orders" \} +\} +`} + + + + +--- + +## Linked Tables + +A **linked table** creates a document ID reference rather than embedding data. +A foreign key in the source row becomes a RavenDB document ID. + + + + +{`new CdcSinkTableConfig +\{ + Name = "Orders", + SourceTableName = "orders", + PrimaryKeyColumns = ["id"], + ColumnsMapping = \{ ["id"] = "Id", ["customer_id"] = "CustomerId" \}, + LinkedTables = + [ + new CdcSinkLinkedTableConfig + \{ + SourceTableName = "customers", + PropertyName = "Customer", // Property in parent document + LinkedCollectionName = "Customers", // Target collection for ID + Type = CdcSinkRelationType.Value, // Single reference + JoinColumns = ["customer_id"] // FK used to build the ID + \} + ] +\} +`} + + + + +With `customer_id = 42`, the document gets `"Customer": "Customers/42"`. + +--- + +## Primary Key and Join Column Requirements + +### Root Tables + +The `PrimaryKeyColumns` list defines which SQL columns are used to generate the document ID. +All PK columns must be present in every INSERT, UPDATE, and DELETE event. + +### Embedded Tables (One Level) + +An embedded table needs: + +* **PrimaryKeyColumns** — Used to match items within the parent's array for UPDATE and DELETE +* **JoinColumns** — Foreign key referencing the parent's `PrimaryKeyColumns` + +The `JoinColumns` must exactly match the parent's `PrimaryKeyColumns`: + +| Parent PK | Required JoinColumns | Valid? | +|-----------|---------------------|--------| +| `[id]` | `[order_id]` where `order_id` = parent's `id` | ✓ | +| `[id, year]` | `[order_id, order_year]` | ✓ | +| `[id]` | `[customer_id, order_id]` | ✗ Extra column not from parent PK | +| `[id, year]` | `[order_id]` | ✗ Missing `order_year` | + +### DELETE Events and REPLICA IDENTITY + +For DELETE events, the source database must include the join column values so CDC Sink +can route the delete to the correct parent document. + +If the join column is not part of the SQL table's primary key, the source database may +need additional configuration to include it in DELETE events. + +See [REPLICA IDENTITY](./postgres/replica-identity.mdx) for +the PostgreSQL-specific requirement and how CDC Sink handles it automatically. + +--- + +## Multi-Level Nesting + +Embedded tables can themselves have embedded tables, creating arbitrarily deep hierarchies. + +**Example: Company → Departments → Employees** + + + + +{`new CdcSinkTableConfig +\{ + Name = "Companies", + SourceTableName = "companies", + PrimaryKeyColumns = ["company_id"], + ColumnsMapping = \{ ["company_id"] = "CompanyId", ["name"] = "Name" \}, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "departments", + PropertyName = "Departments", + Type = CdcSinkRelationType.Array, + JoinColumns = ["company_id"], // Root FK + PrimaryKeyColumns = ["dept_id"], + ColumnsMapping = \{ ["dept_id"] = "DeptId", ["dept_name"] = "DeptName" \}, + EmbeddedTables = + [ + new CdcSinkEmbeddedTableConfig + \{ + SourceTableName = "employees", + PropertyName = "Employees", + Type = CdcSinkRelationType.Array, + JoinColumns = ["company_id", "dept_id"], // Root FK + parent FK + PrimaryKeyColumns = ["emp_id"], + ColumnsMapping = \{ ["emp_id"] = "EmpId", ["emp_name"] = "EmpName" \} + \} + ] + \} + ] +\} +`} + + + + +**Critical requirement for deep nesting:** All descendant tables must carry the root +table's primary key as a denormalized column. The `employees` table must have +`company_id` even though it joins directly to `departments` via `dept_id`. + +This is required because CDC Sink needs to route every row to the correct root document +in a single pass, without additional lookups. + +**SQL schema to support this:** + + + + +{`CREATE TABLE employees ( + company_id INT NOT NULL, -- Denormalized root FK + dept_id INT NOT NULL, -- Parent FK + emp_id INT NOT NULL, -- Local PK + emp_name VARCHAR(200), + PRIMARY KEY (company_id, dept_id, emp_id) +); +`} + + + + +Including all routing columns in the primary key also avoids the need for REPLICA IDENTITY +configuration — the default DELETE events include all PK columns. + +--- + +## Relation Types + +The `Type` property on embedded and linked tables controls the document structure: + +| Type | Use Case | Document Structure | +|------|----------|--------------------| +| `Array` | One-to-many: parent has many children | `"Lines": [\{ ... \}, \{ ... \}]` | +| `Map` | One-to-many with direct key lookup | `"Lines": \{ "1": \{ ... \}, "2": \{ ... \} \}` | +| `Value` | Many-to-one: parent has one child/reference | `"Customer": \{ ... \}` or `"Customer": "Customers/42"` | + +**Array** — Items are matched by `PrimaryKeyColumns` for UPDATE and DELETE. +Use when you need to iterate over all items. + +**Map** — Items are stored as a JSON object keyed by the primary key value(s). +Use when you need fast direct-key access within the document. + +**Value** — Stores a single embedded object or document reference. +Use for many-to-one relationships (many orders share one customer). + +--- + +## Choosing Between Embedded and Linked + +| Consideration | Embedded | Linked | +|--------------|----------|--------| +| Data location | Stored inside parent document | Stored in a separate document | +| Access pattern | Read parent to get all data | Load parent, then load referenced doc | +| Updates | Automatic via CDC | Automatic via CDC for each table | +| Document size | Grows with embedded items | Parent stays small | +| Use case | Parent owns child (orders own lines) | Independent entities (orders reference customers) | + +**Use embedded tables** when: +* The child entity has no meaning outside the parent (order lines without an order) +* You always read the parent and child together +* You want a single-document read + +**Use linked tables** when: +* The referenced entity is independently meaningful (customers exist without orders) +* The referenced entity is shared by many parents +* You want RavenDB's include loading to handle the join + +--- + +## Related Articles + +### CDC Sink + +- [Embedded Tables](./embedded-tables.mdx) +- [Linked Tables](./linked-tables.mdx) +- [Column Mapping](./column-mapping.mdx) +- [Configuration Reference](./configuration-reference.mdx) + +### PostgreSQL + +- [REPLICA IDENTITY](./postgres/replica-identity.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx new file mode 100644 index 0000000000..c3e33d951d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx @@ -0,0 +1,77 @@ +--- +title: "CDC Sink: Server Configuration" +sidebar_label: Server Configuration +description: "Documents the RavenDB server configuration keys that control CDC Sink task behavior, including batch size, fallback timeout, and poll interval." +sidebar_position: 15 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Server Configuration + + + +* This page documents the RavenDB server configuration keys that control CDC Sink + task behavior. + +* In this page: + * [Configuration Keys](#configuration-keys) + + + +--- + +## Configuration Keys + +These keys can be set in `settings.json` or passed as environment variables. +See [Configuration Overview](../../../server/configuration/configuration-options). + +--- + +#### `CdcSink.MaxBatchSize` + +**Default:** `1024` + +The maximum number of change events processed in a single batch. Larger values +increase throughput but also increase memory usage per batch. + +--- + +#### `CdcSink.MaxFallbackTimeInSec` + +**Default:** `900` (15 minutes) + +How long the task will remain in fallback mode (continuously retrying) after losing +connection to the source database before reporting an error. + +Set to `0` to disable fallback mode entirely — the task will move to error state +immediately on connection failure. + +--- + +#### `CdcSink.PollIntervalInSec` + +**Default:** `1` + +How frequently CDC Sink polls the source database for new change events when the +stream is idle. A shorter interval reduces latency but increases polling load on +the source. + +--- + +## Related Articles + +### CDC Sink + +- [Overview](./overview.mdx) +- [Monitoring](./monitoring.mdx) +- [Failover and Consistency](./failover-and-consistency.mdx) + +### Server Configuration + +- [Configuration Overview](../../../server/configuration/configuration-options) diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json b/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json new file mode 100644 index 0000000000..53959810fc --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/_category_.json @@ -0,0 +1 @@ +{"position": 17, "label": "SQL Server"} diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx new file mode 100644 index 0000000000..0552dc23a4 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx @@ -0,0 +1,38 @@ +--- +title: "CDC Sink for SQL Server: Overview" +sidebar_label: Overview +description: "Overview of CDC Sink support for SQL Server, including planned availability and links to existing PostgreSQL documentation." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for SQL Server: Overview + + + +* CDC Sink support for SQL Server is planned for a future release. + + + + +This section will cover the prerequisites, setup, and SQL Server-specific +configuration for CDC Sink when SQL Server support is available. + +In the meantime, see the [PostgreSQL documentation](../postgres/prerequisites-checklist.mdx) +for the full CDC Sink feature documentation. + + +--- + +## Related Articles + +### CDC Sink + +- [Overview](../overview.mdx) +- [PostgreSQL: Prerequisites Checklist](../postgres/prerequisites-checklist.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx new file mode 100644 index 0000000000..8afe70e1e3 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -0,0 +1,195 @@ +--- +title: "CDC Sink: Troubleshooting" +sidebar_label: Troubleshooting +description: "Covers common problems encountered when running CDC Sink tasks and how to resolve them, including startup failures, error states, and missing documents." +sidebar_position: 14 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Troubleshooting + + + +* This page covers common problems encountered when running CDC Sink tasks and how + to resolve them. + +* In this page: + * [Task Won't Start](#task-wont-start) + * [Task Enters Error State](#task-enters-error-state) + * [Documents Not Appearing](#documents-not-appearing) + * [Embedded Items Missing or Incorrect](#embedded-items-missing-or-incorrect) + * [DELETE Not Applied](#delete-not-applied) + * [Patch Errors](#patch-errors) + + + +--- + +## Task Won't Start + +**Symptom:** Task remains in `Error` state immediately after creation. + +**Common causes:** + +* **Invalid connection string** — verify the connection string name matches one defined + in the SQL connection strings section of the Management Studio. + +* **Connection refused** — the source database is unreachable from the RavenDB server. + Verify host, port, and firewall rules. + +* **Authentication failure** — the credentials in the connection string are incorrect + or the user does not have the required permissions. + See the PostgreSQL [Permissions and Roles](./postgres/permissions-and-roles.mdx) page. + +* **WAL level not set to `logical`** — CDC Sink requires logical replication enabled + on the source. + See [WAL Configuration](./postgres/wal-configuration.mdx). + +Check the error message in the Notification Center (bell icon in Studio) for the +specific failure reason. + +--- + +## Task Enters Error State + +**Symptom:** Task was running, then stopped with an error. + +**Common causes:** + +* **Replication slot dropped** — if the PostgreSQL replication slot was dropped + externally, CDC Sink cannot resume. The task must be deleted and recreated (a new + slot will be created on next start). + +* **Source table schema changed** — adding or removing columns from the SQL table + may cause mapping errors. Update the task configuration to match the new schema. + +* **Replication lag too large** — if CDC Sink falls behind significantly, PostgreSQL + may retain WAL segments that fill the disk. See your PostgreSQL documentation on + `wal_keep_size` and replication slot lag. + +* **Exceeded fallback timeout** — the source was unreachable for longer than + `CdcSink.MaxFallbackTimeInSec`. The task moves to error state after this timeout. + Restore connectivity and re-enable the task. + +--- + +## Documents Not Appearing + +**Symptom:** SQL rows exist but corresponding RavenDB documents are not created. + +**Check:** + +1. **Initial load in progress** — CDC Sink performs a full table scan before streaming. + For large tables this can take time. Check the task progress in Studio. + +2. **Table is disabled** — verify `Disabled` is not set to `true` on the `CdcSinkTableConfig`. + +3. **Primary key mismatch** — the columns listed in `PrimaryKeyColumns` must match the + actual SQL primary key. If they don't match, CDC Sink cannot generate a document ID. + +4. **ColumnsMapping is empty** — at least one column must be mapped. A table with no + column mappings will produce empty documents. + +5. **Task is paused** — check that the task state is `Active`, not `Disabled`. + +--- + +## Embedded Items Missing or Incorrect + +**Symptom:** Parent document exists but embedded array/map is empty or items are missing. + +**Check:** + +1. **JoinColumns mismatch** — the `JoinColumns` in `CdcSinkEmbeddedTableConfig` must + reference the same columns (by name) that the SQL foreign key uses. Verify these + match the parent table's `PrimaryKeyColumns`. + +2. **PrimaryKeyColumns wrong** — the embedded table's `PrimaryKeyColumns` are used to + match items on UPDATE and DELETE. If they're wrong, new items may be created as + duplicates or existing items may not be found. + +3. **DELETE not reflected** — if embedded item rows are being deleted in SQL but the + items remain in RavenDB, check whether `REPLICA IDENTITY` is configured for the + embedded table. Without it, PostgreSQL does not include the old row values in the + DELETE event, and CDC Sink cannot identify which item to remove. + See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + +4. **Type mismatch (Array vs Map)** — if the `Type` was changed from `Array` to `Map` + or vice versa after documents were already created, existing documents retain the + old structure. Re-process from scratch or migrate existing documents. + +--- + +## DELETE Not Applied + +**Symptom:** A row was deleted from SQL but the RavenDB document still exists. + +**Check:** + +1. **`IgnoreDeletes = true`** — if `OnDelete.IgnoreDeletes` is set to `true`, the + delete is intentionally discarded. Verify this is the intended behavior. + +2. **Missing REPLICA IDENTITY (embedded tables)** — for embedded table items, + PostgreSQL must include old row values in DELETE events so CDC Sink knows which + parent document to update. If `REPLICA IDENTITY` is not set to `FULL` (or to + an index that includes join columns), the DELETE event may be missing the columns + needed to route it. + See [REPLICA IDENTITY](./postgres/replica-identity.mdx). + +3. **Custom patch returns without deleting** — if an `OnDelete.Patch` is configured + that does not allow the delete to proceed (combined with `IgnoreDeletes = true`), + the document is kept. Review the patch logic. + +--- + +## Patch Errors + +**Symptom:** Task enters error state with a message referencing a JavaScript patch failure. + +**Check:** + +1. **Script syntax error** — test the patch script in the Management Studio's patch + editor before using it in CDC Sink. + +2. **Null reference** — `$row` properties and `$old` may be `null` for certain event + types. Use optional chaining: `$old?.Amount || 0`. + +3. **`get()` returns null** — a document loaded with `get()` may not exist yet if CDC + Sink processes tables out of dependency order. Guard with a null check: + + + + +{`const related = get("Collection/123"); +if (related) \{ ... \} +`} + + + + +4. **Step limit exceeded** — patch scripts have a step quota. If the script is very + long-running or loops over large arrays, it may hit `Patching.MaxStepsForScript`. + See the [configuration reference](../../../server/configuration/patching-configuration) for this setting. + +--- + +## Related Articles + +### CDC Sink + +- [How It Works](./how-it-works.mdx) +- [Monitoring](./monitoring.mdx) +- [Patching](./patching.mdx) +- [Failover and Consistency](./failover-and-consistency.mdx) + +### PostgreSQL + +- [WAL Configuration](./postgres/wal-configuration.mdx) +- [Permissions and Roles](./postgres/permissions-and-roles.mdx) +- [REPLICA IDENTITY](./postgres/replica-identity.mdx) From 8512e38009a4aea26f62d37419899659afb7ff1e Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:17:54 +0300 Subject: [PATCH 02/17] RavenDB-26046 - Update CDC Sink docs for new Columns API and additional features - Replace ColumnsMapping (Dictionary) + AttachmentNameMapping (Dictionary) with unified Columns list of CdcColumnMapping { Column, Name, Type } across all files - Add CdcColumnType enum documentation (Default, Json, Attachment) - Add REST API endpoints table to configuration-reference - Add CdcSink.PollIntervalInSec to server-configuration - Add error handling details to monitoring (threshold, fallback, exponential backoff) - Add ALTER PUBLICATION auto-fix note to postgres/initial-setup - Fix how-it-works: sequential scan description, Child Before Parent section - Fix Startup and Verification: split into per-database subsections - Update all prose references from ColumnsMapping to Columns list --- .../ongoing-tasks/cdc-sink/api-reference.mdx | 24 ++-- .../cdc-sink/attachment-handling.mdx | 51 ++++--- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 131 ++++++++---------- .../cdc-sink/configuration-reference.mdx | 53 ++++++- .../cdc-sink/embedded-tables.mdx | 24 ++-- .../ongoing-tasks/cdc-sink/how-it-works.mdx | 36 +++-- .../ongoing-tasks/cdc-sink/linked-tables.mdx | 6 +- .../ongoing-tasks/cdc-sink/monitoring.mdx | 21 +++ .../ongoing-tasks/cdc-sink/patching.mdx | 12 +- .../examples/example-complex-nesting.mdx | 34 ++--- .../examples/example-denormalization.mdx | 28 ++-- .../examples/example-event-sourcing.mdx | 26 ++-- .../examples/example-simple-migration.mdx | 14 +- .../cdc-sink/postgres/initial-setup.mdx | 3 +- .../postgres/prerequisites-checklist.mdx | 2 +- .../cdc-sink/property-retention.mdx | 14 +- .../ongoing-tasks/cdc-sink/schema-design.mdx | 36 ++--- .../cdc-sink/server-configuration.mdx | 2 + .../cdc-sink/troubleshooting.mdx | 2 +- 19 files changed, 289 insertions(+), 230 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx index 8be425736a..94af8e2688 100644 --- a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -48,12 +48,12 @@ Use `AddCdcSinkOperation` to create a new CDC Sink task: Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = new List \{ "id" \}, - ColumnsMapping = new Dictionary - \{ - ["id"] = "Id", - ["customer_name"] = "CustomerName", - ["total"] = "Total" - \} + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "customer_name", Name = "CustomerName" \}, + new() \{ Column = "total", Name = "Total" \}, + ] \} \} \}; @@ -90,12 +90,12 @@ config.Tables.Add(new CdcSinkTableConfig Name = "Customers", SourceTableName = "customers", PrimaryKeyColumns = new List \{ "id" \}, - ColumnsMapping = new Dictionary - \{ - ["id"] = "Id", - ["name"] = "Name", - ["email"] = "Email" - \} + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "name", Name = "Name" \}, + new() \{ Column = "email", Name = "Email" \}, + ] \}); await store.Maintenance.SendAsync( diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index 1f60880eab..5a77cb1cbd 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -1,7 +1,7 @@ --- title: "CDC Sink: Attachment Handling" sidebar_label: Attachment Handling -description: "Explains how to store binary SQL columns as RavenDB attachments using AttachmentNameMapping on root and embedded table configurations." +description: "Explains how to store binary SQL columns as RavenDB attachments using CdcColumnType.Attachment on root and embedded table configurations." sidebar_position: 9 --- @@ -16,8 +16,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; -* Binary SQL columns can be stored as RavenDB **attachments** instead of document - properties using `AttachmentNameMapping`. +* Binary SQL columns (bytea, varbinary, etc.) can be stored as RavenDB **attachments** + by setting `Type = CdcColumnType.Attachment` on the column mapping entry. * This applies to both root tables and embedded tables. @@ -33,7 +33,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; ## Root Table Attachments -Use `AttachmentNameMapping` to map a binary SQL column to a RavenDB attachment: +Set `Type = CdcColumnType.Attachment` on a `Columns` entry to store a binary SQL +column as a RavenDB attachment: @@ -43,16 +44,13 @@ Use `AttachmentNameMapping` to map a binary SQL column to a RavenDB attachment: Name = "Files", SourceTableName = "files", PrimaryKeyColumns = ["id"], - ColumnsMapping = new Dictionary - \{ - ["id"] = "Id", - ["filename"] = "Filename", - ["mime_type"] = "MimeType" - \}, - AttachmentNameMapping = new Dictionary - \{ - ["content"] = "file" // SQL column "content" → attachment named "file" - \} + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "filename", Name = "Filename" \}, + new() \{ Column = "mime_type", Name = "MimeType" \}, + new() \{ Column = "content", Name = "file", Type = CdcColumnType.Attachment \}, + ] \} `} @@ -60,7 +58,8 @@ Use `AttachmentNameMapping` to map a binary SQL column to a RavenDB attachment: The binary `content` column is stored as an attachment named `"file"` on the document. -The attachment is stored with content type `application/octet-stream`. +The `Name` value becomes the attachment name. The attachment is stored with content +type `application/octet-stream`. --- @@ -78,15 +77,12 @@ The attachment name is automatically prefixed to ensure uniqueness: PropertyName = "Photos", PrimaryKeyColumns = ["photo_num"], JoinColumns = ["product_id"], - ColumnsMapping = new Dictionary - \{ - ["photo_num"] = "PhotoNum", - ["caption"] = "Caption" - \}, - AttachmentNameMapping = new Dictionary - \{ - ["thumbnail"] = "thumb" - \} + Columns = + [ + new() \{ Column = "photo_num", Name = "PhotoNum" \}, + new() \{ Column = "caption", Name = "Caption" \}, + new() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + ] \} `} @@ -103,10 +99,11 @@ the primary key value. **Root table attachments:** -The attachment name is exactly the value you specify in `AttachmentNameMapping`. +The attachment name is exactly the `Name` value on the mapping entry with +`Type = CdcColumnType.Attachment`. ``` -AttachmentNameMapping = { ["content"] = "file" } +Column = "content", Name = "file", Type = CdcColumnType.Attachment → Attachment name: "file" ``` @@ -117,7 +114,7 @@ The attachment name is prefixed with `{PropertyName}/{pkValue}/`: ``` PropertyName = "Photos" PrimaryKeyColumns = ["photo_num"] → photo_num = 1 -AttachmentNameMapping = { ["thumbnail"] = "thumb" } +Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment → Attachment name: "Photos/1/thumb" ``` diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 4eaab62609..3c2e95e8d8 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -1,7 +1,7 @@ --- title: "CDC Sink: Column Mapping" sidebar_label: Column Mapping -description: "Explains how to control which SQL columns appear in RavenDB documents and under what property names using ColumnsMapping and AttachmentNameMapping." +description: "Explains how to control which SQL columns appear in RavenDB documents and under what property names using the Columns list of CdcColumnMapping entries." sidebar_position: 5 --- @@ -16,13 +16,15 @@ import LanguageContent from "@site/src/components/LanguageContent"; -* Column mapping controls which SQL columns appear in the RavenDB document and - under what property names. +* Column mapping controls which SQL columns appear in the RavenDB document, + under what property names, and how they are stored. It uses a `Columns` list + of `CdcColumnMapping` objects (not a dictionary). * In this page: * [Mapping Columns to Properties](#mapping-columns-to-properties) + * [Column Types](#column-types) * [Unmapped Columns](#unmapped-columns) - * [Attachment Mapping](#attachment-mapping) + * [Validation Rules](#validation-rules) * [Schema (Source Table Schema)](#schema-source-table-schema) @@ -31,29 +33,29 @@ import LanguageContent from "@site/src/components/LanguageContent"; ## Mapping Columns to Properties -`ColumnsMapping` is a `Dictionary` where each entry maps a SQL column -name to a RavenDB document property name: +`Columns` is a `List` where each entry maps a SQL column +to a RavenDB document property: -{`ColumnsMapping = new Dictionary -\{ - ["id"] = "Id", - ["customer_name"] = "CustomerName", - ["order_date"] = "OrderDate", - ["total_amount"] = "TotalAmount" -\} +{`Columns = +[ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "customer_name", Name = "CustomerName" \}, + new() \{ Column = "order_date", Name = "OrderDate" \}, + new() \{ Column = "total_amount", Name = "TotalAmount" \}, +] `} -**Key:** SQL column name (case-insensitive match against the column names in CDC events) -**Value:** Property name in the RavenDB document +**`Column`:** SQL column name (case-insensitive match against the column names in CDC events) +**`Name`:** Property name in the RavenDB document -The primary key column(s) do not need to be mapped. When included in `ColumnsMapping`, +The primary key column(s) do not need to be mapped. When included in `Columns`, they become a regular document property. When omitted, the PK values are still used to build the document ID — they just won't appear as a named property. @@ -61,19 +63,38 @@ Including the PK in the mapping is generally useful so the document carries its identifier, but it is not required. -**Type conversions:** SQL numeric, boolean, and date types are converted to their -JSON equivalents. SQL `NULL` becomes JSON `null`. If you need custom type handling -or derived values, use a `Patch` script. +--- -**At least one mapping is required.** An empty `ColumnsMapping` is a validation error. +## Column Types -The same rules apply to embedded table column mappings. +Each `CdcColumnMapping` entry has an optional `Type` property of type `CdcColumnType` +that controls how the SQL value is stored in RavenDB: + + + + +{`Columns = +[ + new() \{ Column = "order_id", Name = "OrderId" \}, + new() \{ Column = "metadata", Name = "Metadata", Type = CdcColumnType.Json \}, // Parsed as native JSON + new() \{ Column = "receipt", Name = "receipt.pdf", Type = CdcColumnType.Attachment \}, // Stored as attachment +] +`} + + + + +| Type | Behavior | +|------|----------| +| `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. This is the default when `Type` is omitted. | +| `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | +| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary, `string`→UTF-8 text, `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | --- ## Unmapped Columns -Columns not listed in `ColumnsMapping` are **not stored** in the document, but they +Columns not listed in `Columns` are **not stored** in the document, but they are available in patch scripts via `$row`. This allows you to use data for computations without permanently storing raw SQL values: @@ -81,12 +102,12 @@ This allows you to use data for computations without permanently storing raw SQL -{`ColumnsMapping = new Dictionary -\{ - ["id"] = "Id", - ["name"] = "Name" +{`Columns = +[ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "name", Name = "Name" \}, // base_price and tax_rate are NOT mapped — won't appear in document -\}, +], Patch = "this.FinalPrice = $row.base_price * (1 + $row.tax_rate);" `} @@ -98,64 +119,24 @@ not stored as document properties. Only the computed `FinalPrice` is stored. **Naming context:** -Property names (the values in `ColumnsMapping`) become properties on the RavenDB +Property names (the `Name` values in `Columns`) become properties on the RavenDB document — accessible as `this.FinalPrice` inside a patch script. -Column names (the keys in `ColumnsMapping`, plus any unmapped columns) are accessible +Column names (the `Column` values in `Columns`, plus any unmapped columns) are accessible in patch scripts via `$row.base_price` (for the current row's values) and `$old?.base_price` (for the previous row's values on UPDATE events). --- -## Attachment Mapping +## Validation Rules -Binary SQL columns (e.g., PostgreSQL `BYTEA`) can be stored as RavenDB attachments -instead of document properties using `AttachmentNameMapping`: - - - - -{`new CdcSinkTableConfig -\{ - Name = "Files", - SourceTableName = "files", - PrimaryKeyColumns = ["id"], - ColumnsMapping = \{ ["id"] = "Id", ["filename"] = "Filename" \}, - AttachmentNameMapping = new Dictionary - \{ - ["content"] = "file" // SQL column "content" → attachment named "file" - \} -\} -`} - - - - -The binary column `content` becomes an attachment named `"file"` on the document. - -**Embedded table attachments:** - -Binary columns on embedded tables are also supported. The attachment name is prefixed -with the embedded property path and primary key to ensure uniqueness: - - - - -{`new CdcSinkEmbeddedTableConfig -\{ - SourceTableName = "photos", - PropertyName = "Photos", - PrimaryKeyColumns = ["photo_num"], - AttachmentNameMapping = \{ ["thumbnail"] = "thumb" \} -\} -`} - - - +The following rules apply to both root table and embedded table column mappings: -A photo with `photo_num = 1` creates attachment `"Photos/1/thumb"` on the parent document. -When the embedded item is deleted, its attachments are automatically removed. +- Each `Column` name must be unique within a table (no duplicate SQL column entries). +- Each `Name` must be unique within a table (no duplicate property or attachment names). +- Both `Column` and `Name` are required and must be non-empty strings. +- At least one entry is required — an empty `Columns` list is a validation error. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx index 2ec550a6e2..60e0224f6d 100644 --- a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -26,6 +26,9 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [CdcSinkLinkedTableConfig](#cdcsinklinkedtableconfig) * [CdcSinkOnDeleteConfig](#cdcsinkondeleteconfig) * [CdcSinkRelationType](#cdcsinkrelationtype) + * [CdcColumnMapping](#cdccolumnmapping) + * [CdcColumnType](#cdccolumntype) + * [REST API Endpoints](#rest-api-endpoints) @@ -56,14 +59,17 @@ Leave `null` for non-PostgreSQL connections. | Property | Type | Description | |----------|------|-------------| -| `SlotName` | `string` | Name of the PostgreSQL logical replication slot. If omitted on creation, a deterministic hash-based name is used. Immutable once set. Max 63 characters, alphanumeric and underscores only. | -| `PublicationName` | `string` | Name of the PostgreSQL publication. Same auto-fill and immutability rules as `SlotName`. | +| `SlotName` | `string` | Name of the PostgreSQL logical replication slot. If omitted on creation, auto-generated as `rvn_cdc_s_{guid}` on task creation. Immutable once set. Max 63 characters, alphanumeric and underscores only. | +| `PublicationName` | `string` | Name of the PostgreSQL publication. Same auto-generated naming and immutability rules as `SlotName`. | Setting these explicitly is useful when: - A database administrator pre-creates the slot and publication with human-readable names - Migrating from a previous CDC Sink task and reusing an existing slot - Running multiple environments (dev/staging/prod) with predictable names +If tables are missing from the publication, CDC Sink attempts `ALTER PUBLICATION ... ADD TABLE` +automatically on startup (requires sufficient permissions). + See [Initial Setup](./postgres/initial-setup.mdx) for details. @@ -84,8 +90,7 @@ Configures a root table — one SQL table mapped to one RavenDB collection. | `SourceTableName` | `string` | ✓ | SQL table name (e.g., `"orders"`) | | `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | | `PrimaryKeyColumns` | `List` | ✓ | SQL columns used for document ID generation | -| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → document property | -| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Columns` | `List` | ✓ | Column mappings — each entry defines a SQL column, its document property name, and how to store it | | `Patch` | `string` | | JavaScript patch for INSERT and UPDATE | | `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior. Default: delete document | | `EmbeddedTables` | `List` | | Nested table configurations | @@ -106,8 +111,7 @@ Configures a table whose rows are embedded as nested objects within a parent doc | `Type` | `CdcSinkRelationType` | ✓ | `Array`, `Map`, or `Value` | | `JoinColumns` | `List` | ✓ | FK columns referencing parent's `PrimaryKeyColumns` | | `PrimaryKeyColumns` | `List` | ✓ | PK columns for matching items on UPDATE/DELETE | -| `ColumnsMapping` | `Dictionary` | ✓ | SQL column → embedded property | -| `AttachmentNameMapping` | `Dictionary` | | Binary SQL column → attachment name | +| `Columns` | `List` | ✓ | Column mappings — each entry defines a SQL column, its document property name, and how to store it | | `Patch` | `string` | | JavaScript patch on **parent** document for INSERT/UPDATE | | `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior for embedded items | | `CaseSensitiveKeys` | `bool` | | Case-sensitive PK matching. Default: `false` | @@ -131,6 +135,30 @@ Configures a foreign key reference that becomes a document ID in the parent docu --- +## CdcColumnMapping + +A single column mapping entry within a `Columns` list. + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `Column` | `string` | ✓ | SQL column name (case-insensitive) | +| `Name` | `string` | ✓ | Document property name (or attachment name when `Type = Attachment`) | +| `Type` | `CdcColumnType` | | How to store the value. Default: `Default` | + +--- + +## CdcColumnType + +Controls how a SQL column value is stored in RavenDB. + +| Value | Behavior | +|-------|----------| +| `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. | +| `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | +| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary, `string`→UTF-8 text, `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | + +--- + ## CdcSinkOnDeleteConfig Controls how DELETE events are handled for a table or embedded table. @@ -169,6 +197,19 @@ Specifies the structure of embedded or linked data in the document. --- +## REST API Endpoints + +| Method | Path | Auth | Description | +|--------|------|------|-------------| +| PUT | `/databases/{db}/admin/cdc-sink` | DatabaseAdmin | Create CDC Sink task | +| PUT | `/databases/{db}/admin/cdc-sink?id={taskId}` | DatabaseAdmin | Update CDC Sink task | +| POST | `/databases/{db}/admin/cdc-sink/test` | DatabaseAdmin | Test a patch script | +| POST | `/databases/{db}/admin/cdc-sink/verify` | DatabaseAdmin | Verify source database connectivity and CDC setup | +| GET | `/databases/{db}/cdc-sink/performance` | ValidUser | Get performance stats | +| GET | `/databases/{db}/cdc-sink/performance/live` | ValidUser | WebSocket for live performance stats | + +--- + ## Related Articles ### CDC Sink diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx index 813c702111..d7ed2734b5 100644 --- a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -49,12 +49,12 @@ import LanguageContent from "@site/src/components/LanguageContent"; Type = CdcSinkRelationType.Array, // Array, Map, or Value JoinColumns = ["order_id"], // FK to parent's PrimaryKeyColumns PrimaryKeyColumns = ["line_id"], // Used to match items on UPDATE/DELETE - ColumnsMapping = new Dictionary - \{ - ["line_id"] = "LineId", - ["product"] = "Product", - ["quantity"] = "Quantity" - \} + Columns = + [ + new() \{ Column = "line_id", Name = "LineId" \}, + new() \{ Column = "product", Name = "Product" \}, + new() \{ Column = "quantity", Name = "Quantity" \}, + ] \} `} @@ -226,8 +226,8 @@ without an additional lookup, which is not supported. ## Attachments on Embedded Items -Binary columns from embedded tables can be stored as RavenDB attachments using -`AttachmentNameMapping`. +Binary columns from embedded tables can be stored as RavenDB attachments by adding +them to `Columns` with `Type = CdcColumnType.Attachment`. @@ -238,8 +238,12 @@ Binary columns from embedded tables can be stored as RavenDB attachments using PropertyName = "Photos", PrimaryKeyColumns = ["photo_num"], JoinColumns = ["product_id"], - ColumnsMapping = \{ ["photo_num"] = "PhotoNum", ["caption"] = "Caption" \}, - AttachmentNameMapping = \{ ["thumbnail"] = "thumb" \} + Columns = + [ + new() \{ Column = "photo_num", Name = "PhotoNum" \}, + new() \{ Column = "caption", Name = "Caption" \}, + new() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + ] \} `} diff --git a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx index 0fe36c9b28..ef8c24ba26 100644 --- a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx @@ -24,6 +24,7 @@ import LanguageContent from "@site/src/components/LanguageContent"; * In this page: * [Startup and Verification](#startup-and-verification) + * [PostgreSQL](#postgresql) * [Initial Load](#initial-load) * [Change Streaming](#change-streaming) * [Transaction Ordering](#transaction-ordering) @@ -36,19 +37,21 @@ import LanguageContent from "@site/src/components/LanguageContent"; ## Startup and Verification -When a CDC Sink task starts, it first verifies that the source database is properly -configured. For PostgreSQL, this includes checking: +When a CDC Sink task starts, it verifies that the source database is properly configured +before doing anything else. If any check fails, CDC Sink reports the exact issue and the +SQL an administrator needs to run to fix it. The task does not start until all checks pass. + +After verification, CDC Sink creates the necessary change-tracking infrastructure +in the source database, then begins the initial load. + +### PostgreSQL + +CDC Sink checks: * WAL level is set to `logical` * The connecting user has sufficient privileges * REPLICA IDENTITY is configured correctly for embedded tables that need delete routing -If any check fails, CDC Sink reports the exact issue and the SQL an administrator -needs to run to fix it. The task does not start until all checks pass. - -After verification, CDC Sink creates the necessary change-tracking infrastructure -in the source database, then begins the initial load. - See the [PostgreSQL Prerequisites Checklist](./postgres/prerequisites-checklist.mdx) for the full list of requirements. @@ -178,15 +181,18 @@ See [Patching](./patching.mdx) for guidance. ## Child Before Parent -If an embedded row arrives before its parent row exists in RavenDB — which can happen -during initial load when tables are scanned in parallel, or due to relaxed foreign key -constraints in the source database — CDC Sink creates a **stub document** containing -only the embedded data. +Tables are scanned one at a time during the initial load. This means a child table can +contain rows that reference a parent row inserted *after* the parent table scan had +already completed. For example: Orders are fully scanned, then while scanning OrderLines, +an OrderLine is encountered that points to an Order that was inserted after the Orders +scan finished. -When the parent row arrives later, its columns are merged onto the stub document. -The final document contains both the parent fields and all embedded items that arrived earlier. +In this case, CDC Sink creates a **stub document** for the parent containing only the +embedded child data. When CDC streaming begins after the initial load, it picks up the +missing parent row and merges its columns onto the stub document. -This ensures no data is lost regardless of the order in which rows are processed. +The final document will contain both the parent fields and all embedded items. No data +is lost — the brief intermediate state is resolved automatically once streaming starts. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx index c90c521685..67b44711a4 100644 --- a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -42,7 +42,11 @@ import LanguageContent from "@site/src/components/LanguageContent"; Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - ColumnsMapping = \{ ["id"] = "Id", ["customer_id"] = "CustomerId" \}, + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "customer_id", Name = "CustomerId" \}, + ], LinkedTables = [ new CdcSinkLinkedTableConfig diff --git a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx index cbacbf99f8..010c0f4381 100644 --- a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx @@ -103,6 +103,27 @@ making it straightforward to diagnose the root cause. --- +## Error Handling + +CDC Sink tolerates individual document processing errors — each failure is logged +and recorded, but processing continues for the remaining documents in the batch. +The CDC position advances as long as the error ratio stays within acceptable bounds. + +**Error threshold:** +When cumulative errors reach **100** and also exceed the number of successes, +the batch throws and the task enters fallback mode. + +**Fallback mode on error:** +The task enters exponential backoff, starting at 5 seconds and doubling on each +retry, up to the limit set by `CdcSink.MaxFallbackTimeInSec` (default 15 minutes). +See [Server Configuration](./server-configuration.mdx) and [Fallback Mode](#fallback-mode). + +**Patch errors:** +Patch errors — including `MaxSteps exceeded` — fail only the affected document. +The rest of the batch continues normally. + +--- + ## Related Articles ### CDC Sink diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index 0a03290f52..75fce25b1c 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -61,7 +61,7 @@ The `OnDelete.Patch` on `CdcSinkOnDeleteConfig` handles DELETE separately. |----------|-----------|------|-------------| | `this` | Always | object | The document being modified (root or parent for embedded) | | `$row` | Always | object | All SQL columns from the CDC event (mapped and unmapped) | -| `$old` | UPDATE only | object \| null | Previous state of the item; null for INSERT | +| `$old` | Always | object \| null | Previous state of the document (root patches) or embedded item (embedded patches). Null on INSERT. | | `get(id)` | Always | function | Load a RavenDB document by ID | --- @@ -134,7 +134,7 @@ Without it, deletes will leave the aggregate in an incorrect state. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["order_id"], - ColumnsMapping = \{ ["line_id"] = "LineId", ["quantity"] = "Quantity" \}, + Columns = [ new() \{ Column = "line_id", Name = "LineId" \}, new() \{ Column = "quantity", Name = "Quantity" \} ], // Runs on INSERT and UPDATE — recomputes total from current Lines array Patch = @" @@ -180,7 +180,7 @@ the deleted item's value. Without it, deletes leave the running total incorrect. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["invoice_id"], - ColumnsMapping = \{ ["line_id"] = "LineId", ["amount"] = "Amount" \}, + Columns = [ new() \{ Column = "line_id", Name = "LineId" \}, new() \{ Column = "amount", Name = "Amount" \} ], // INSERT: $old is null, so delta = new amount (0 → new) // UPDATE: $old has previous Amount, delta = new - old @@ -215,8 +215,8 @@ Compute fields from unmapped columns that you don't want to store directly: -{`ColumnsMapping = \{ ["id"] = "Id", ["name"] = "Name" \}, -// base_price and tax_rate are NOT in ColumnsMapping +{`Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "name", Name = "Name" \} ], +// base_price and tax_rate are NOT in Columns Patch = @" this.FinalPrice = $row.base_price * (1 + $row.tax_rate); this.Discount = $row.is_vip ? $row.base_price * 0.1 : 0; @@ -337,6 +337,8 @@ Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nes * Access `this`, `$row`, `$old` * Load related documents with `get()` +* Create or replace documents with `put()` +* Delete documents with `del()` * Compute and transform property values * Set and modify document metadata * Conditional logic, loops, array methods diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx index 6bcc58b0b4..060e210d4f 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -107,11 +107,11 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. Name = "Products", SourceTableName = "products", PrimaryKeyColumns = ["product_id"], - ColumnsMapping = new Dictionary - \{ - ["product_id"] = "ProductId", - ["name"] = "Name" - \}, + Columns = + [ + new() \{ Column = "product_id", Name = "ProductId" \}, + new() \{ Column = "name", Name = "Name" \}, + ], // Linked table: category_id FK → document ID in Categories collection LinkedTables = [ @@ -133,12 +133,12 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. Type = CdcSinkRelationType.Array, JoinColumns = ["product_id"], PrimaryKeyColumns = ["variant_id"], - ColumnsMapping = new Dictionary - \{ - ["variant_id"] = "VariantId", - ["sku"] = "Sku", - ["price"] = "Price" - \}, + Columns = + [ + new() \{ Column = "variant_id", Name = "VariantId" \}, + new() \{ Column = "sku", Name = "Sku" \}, + new() \{ Column = "price", Name = "Price" \}, + ], // Deep-nested: attributes within each variant EmbeddedTables = [ @@ -150,12 +150,12 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. // JoinColumns must include the ROOT PK for deep nesting JoinColumns = ["product_id", "variant_id"], PrimaryKeyColumns = ["attr_id"], - ColumnsMapping = new Dictionary - \{ - ["attr_id"] = "AttrId", - ["attr_name"] = "Name", - ["attr_value"] = "Value" - \} + Columns = + [ + new() \{ Column = "attr_id", Name = "AttrId" \}, + new() \{ Column = "attr_name", Name = "Name" \}, + new() \{ Column = "attr_value", Name = "Value" \}, + ] \} ] \} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx index 9213a85115..772d2a5042 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx @@ -86,13 +86,13 @@ See [REPLICA IDENTITY](../replica-identity.mdx). Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = new List \{ "order_id" \}, - ColumnsMapping = new Dictionary - \{ - ["order_id"] = "OrderId", - ["customer"] = "Customer", - ["status"] = "Status", - ["created_at"] = "CreatedAt" - \}, + Columns = + [ + new() \{ Column = "order_id", Name = "OrderId" \}, + new() \{ Column = "customer", Name = "Customer" \}, + new() \{ Column = "status", Name = "Status" \}, + new() \{ Column = "created_at", Name = "CreatedAt" \}, + ], EmbeddedTables = new List \{ new CdcSinkEmbeddedTableConfig @@ -102,13 +102,13 @@ See [REPLICA IDENTITY](../replica-identity.mdx). Type = CdcSinkRelationType.Array, JoinColumns = new List \{ "order_id" \}, PrimaryKeyColumns = new List \{ "line_id" \}, - ColumnsMapping = new Dictionary - \{ - ["line_id"] = "LineId", - ["product"] = "Product", - ["qty"] = "Qty", - ["unit_price"] = "UnitPrice" - \} + Columns = + [ + new() \{ Column = "line_id", Name = "LineId" \}, + new() \{ Column = "product", Name = "Product" \}, + new() \{ Column = "qty", Name = "Qty" \}, + new() \{ Column = "unit_price", Name = "UnitPrice" \}, + ] \} \} \} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx index df7ace832d..ecd57e96f8 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx @@ -75,12 +75,12 @@ history, and `Balance` is maintained using patch logic. Name = "Accounts", SourceTableName = "accounts", PrimaryKeyColumns = ["account_id"], - ColumnsMapping = new Dictionary - \{ - ["account_id"] = "AccountId", - ["owner"] = "Owner", - ["currency"] = "Currency" - \}, + Columns = + [ + new() \{ Column = "account_id", Name = "AccountId" \}, + new() \{ Column = "owner", Name = "Owner" \}, + new() \{ Column = "currency", Name = "Currency" \}, + ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -90,13 +90,13 @@ history, and `Balance` is maintained using patch logic. Type = CdcSinkRelationType.Array, JoinColumns = ["account_id"], PrimaryKeyColumns = ["txn_id"], - ColumnsMapping = new Dictionary - \{ - ["txn_id"] = "TxnId", - ["amount"] = "Amount", - ["type"] = "Type", - ["created_at"] = "CreatedAt" - \}, + Columns = + [ + new() \{ Column = "txn_id", Name = "TxnId" \}, + new() \{ Column = "amount", Name = "Amount" \}, + new() \{ Column = "type", Name = "Type" \}, + new() \{ Column = "created_at", Name = "CreatedAt" \}, + ], // Patch runs on the parent document for INSERT/UPDATE Patch = """ const oldAmount = $old?.Amount || 0; diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx index aad066f6bd..de82dc81e0 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx @@ -60,13 +60,13 @@ A simple customers table: Name = "Customers", SourceTableName = "customers", PrimaryKeyColumns = new List \{ "id" \}, - ColumnsMapping = new Dictionary - \{ - ["id"] = "Id", - ["name"] = "Name", - ["email"] = "Email", - ["created_at"] = "CreatedAt" - \} + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "name", Name = "Name" \}, + new() \{ Column = "email", Name = "Email" \}, + new() \{ Column = "created_at", Name = "CreatedAt" \}, + ] \} \} \}; diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx index 28e9872ddd..d491506ee1 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/initial-setup.mdx @@ -42,7 +42,8 @@ CDC Sink will: 1. Generate the replication slot and publication names (a GUID-based identifier) and store them with the task 2. Check whether they already exist 3. Create them if they do not exist -4. Begin the initial load +4. If configured tables are missing from the publication, CDC Sink attempts `ALTER PUBLICATION ... ADD TABLE` automatically (requires the user to have publication ownership or superuser access) +5. Begin the initial load No manual database administration is needed in this case. diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx index ef223b2502..04f24fc05e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/prerequisites-checklist.mdx @@ -132,7 +132,7 @@ Full details: [Permissions and Roles](./permissions-and-roles.mdx). See [REPLICA IDENTITY](./replica-identity.mdx). * **Published columns**: All columns referenced in `PrimaryKeyColumns`, `JoinColumns`, - and `ColumnsMapping` must exist in the SQL table. + and `Columns` entries must exist in the SQL table. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx index e150618ccf..95fd92e5ff 100644 --- a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx @@ -79,7 +79,7 @@ Document after CDC UPDATE: -`Email` is updated from SQL, while `InternalNotes` (not in `ColumnsMapping`) is preserved. +`Email` is updated from SQL, while `InternalNotes` (not in `the `Columns` list`) is preserved. --- @@ -87,15 +87,15 @@ Document after CDC UPDATE: **Preserved across CDC updates:** -* Properties not listed in `ColumnsMapping` +* Properties not listed in `the `Columns` list` * Properties set in RavenDB directly (annotations, computed values, flags) * Document metadata (unless the patch explicitly modifies it) **Overwritten on CDC update:** -* Any property mapped via `ColumnsMapping` — always updated to match the current SQL value +* Any property mapped via `the `Columns` list` — always updated to match the current SQL value -If you manually edit a property that is part of `ColumnsMapping`, the next CDC UPDATE +If you manually edit a property that is part of `the `Columns` list`, the next CDC UPDATE for that row will overwrite your edit with the SQL value. --- @@ -120,7 +120,7 @@ You can safely add properties to CDC-managed documents: -Properties managed by CDC (those in `ColumnsMapping`) will be overwritten on +Properties managed by CDC (those in `the `Columns` list`) will be overwritten on the next UPDATE from the source database. Do not rely on manual edits to mapped properties surviving future CDC updates. @@ -133,11 +133,11 @@ RavenDB-only property for your annotation and leaving the SQL-mapped property as ## Implications for Patches Patches run after column mapping and can set additional properties that are not -from `ColumnsMapping`. These patch-computed properties follow the same merge rules: +from `the `Columns` list`. These patch-computed properties follow the same merge rules: * If a patch sets `this.ComputedField = ...`, that value persists across future events where the patch doesn't explicitly change it -* If a patch sets a property that is also in `ColumnsMapping`, the column mapping +* If a patch sets a property that is also in `the `Columns` list`, the column mapping value takes precedence (mapping is applied before patching) For aggregates maintained via patches (e.g., `RunningTotal`), the patch itself diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx index e874dbeeeb..2aa35d58a4 100644 --- a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -47,12 +47,12 @@ becomes one document. SourceTableSchema = "public", // SQL schema (optional, default: "public") SourceTableName = "orders", // SQL table name PrimaryKeyColumns = ["id"], // Used for document ID generation - ColumnsMapping = new Dictionary - \{ - ["id"] = "Id", - ["customer_name"] = "CustomerName", - ["total"] = "Total" - \} + Columns = + [ + new() \{ Column = "id", Name = "Id" \}, + new() \{ Column = "customer_name", Name = "CustomerName" \}, + new() \{ Column = "total", Name = "Total" \}, + ] \} `} @@ -63,7 +63,7 @@ becomes one document. A row with `id = 42` and collection `Orders` becomes document `Orders/42`. A composite PK `(region, id)` with values `(US, 42)` becomes `Orders/US/42`. -**Column mapping:** Only mapped columns appear in the document. Unmapped columns are +**Column mapping:** Only columns listed in `Columns` appear in the document. Unmapped columns are still available in patch scripts via `$row` but are not stored in the document. --- @@ -81,7 +81,7 @@ SQL `order_lines` table becomes an array inside each `Orders` document. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - ColumnsMapping = \{ ["id"] = "Id", ["customer_name"] = "CustomerName" \}, + Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "customer_name", Name = "CustomerName" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -91,12 +91,12 @@ SQL `order_lines` table becomes an array inside each `Orders` document. Type = CdcSinkRelationType.Array, // Array of items JoinColumns = ["order_id"], // FK referencing parent's PK PrimaryKeyColumns = ["line_id"], // Used to match items on update/delete - ColumnsMapping = - \{ - ["line_id"] = "LineId", - ["product"] = "Product", - ["quantity"] = "Quantity" - \} + Columns = + [ + new() \{ Column = "line_id", Name = "LineId" \}, + new() \{ Column = "product", Name = "Product" \}, + new() \{ Column = "quantity", Name = "Quantity" \}, + ] \} ] \} @@ -139,7 +139,7 @@ A foreign key in the source row becomes a RavenDB document ID. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - ColumnsMapping = \{ ["id"] = "Id", ["customer_id"] = "CustomerId" \}, + Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "customer_id", Name = "CustomerId" \} ], LinkedTables = [ new CdcSinkLinkedTableConfig @@ -211,7 +211,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Name = "Companies", SourceTableName = "companies", PrimaryKeyColumns = ["company_id"], - ColumnsMapping = \{ ["company_id"] = "CompanyId", ["name"] = "Name" \}, + Columns = [ new() \{ Column = "company_id", Name = "CompanyId" \}, new() \{ Column = "name", Name = "Name" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -221,7 +221,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id"], // Root FK PrimaryKeyColumns = ["dept_id"], - ColumnsMapping = \{ ["dept_id"] = "DeptId", ["dept_name"] = "DeptName" \}, + Columns = [ new() \{ Column = "dept_id", Name = "DeptId" \}, new() \{ Column = "dept_name", Name = "DeptName" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -231,7 +231,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id", "dept_id"], // Root FK + parent FK PrimaryKeyColumns = ["emp_id"], - ColumnsMapping = \{ ["emp_id"] = "EmpId", ["emp_name"] = "EmpName" \} + Columns = [ new() \{ Column = "emp_id", Name = "EmpId" \}, new() \{ Column = "emp_name", Name = "EmpName" \} ] \} ] \} diff --git a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx index c3e33d951d..34780fe361 100644 --- a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx @@ -62,6 +62,8 @@ How frequently CDC Sink polls the source database for new change events when the stream is idle. A shorter interval reduces latency but increases polling load on the source. +PostgreSQL uses logical replication streaming and ignores this setting. + --- ## Related Articles diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx index 8afe70e1e3..030c7b355b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -93,7 +93,7 @@ specific failure reason. 3. **Primary key mismatch** — the columns listed in `PrimaryKeyColumns` must match the actual SQL primary key. If they don't match, CDC Sink cannot generate a document ID. -4. **ColumnsMapping is empty** — at least one column must be mapped. A table with no +4. **Columns list is empty** — at least one column must be mapped. A table with no column mappings will produce empty documents. 5. **Task is paused** — check that the task state is `Active`, not `Disabled`. From 43af5dab1ccf806da6ea017289cc1c3c07f289b7 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:20:33 +0300 Subject: [PATCH 03/17] Use explicit CdcColumnMapping() class name; note text columns in attachment handling - Replace all new() shorthand with new CdcColumnMapping() across all files - attachment-handling: clarify that text columns (text, nvarchar, etc.) as well as binary columns can use Type = CdcColumnType.Attachment --- .../ongoing-tasks/cdc-sink/api-reference.mdx | 12 +++++----- .../cdc-sink/attachment-handling.mdx | 21 +++++++++--------- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 18 +++++++-------- .../cdc-sink/embedded-tables.mdx | 12 +++++----- .../ongoing-tasks/cdc-sink/linked-tables.mdx | 4 ++-- .../ongoing-tasks/cdc-sink/patching.mdx | 6 ++--- .../examples/example-complex-nesting.mdx | 16 +++++++------- .../examples/example-denormalization.mdx | 16 +++++++------- .../examples/example-event-sourcing.mdx | 14 ++++++------ .../examples/example-simple-migration.mdx | 8 +++---- .../ongoing-tasks/cdc-sink/schema-design.mdx | 22 +++++++++---------- 11 files changed, 75 insertions(+), 74 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx index 94af8e2688..448aa7b634 100644 --- a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -50,9 +50,9 @@ Use `AddCdcSinkOperation` to create a new CDC Sink task: PrimaryKeyColumns = new List \{ "id" \}, Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "customer_name", Name = "CustomerName" \}, - new() \{ Column = "total", Name = "Total" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \}, + new CdcColumnMapping() \{ Column = "total", Name = "Total" \}, ] \} \} @@ -92,9 +92,9 @@ config.Tables.Add(new CdcSinkTableConfig PrimaryKeyColumns = new List \{ "id" \}, Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "name", Name = "Name" \}, - new() \{ Column = "email", Name = "Email" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, + new CdcColumnMapping() \{ Column = "email", Name = "Email" \}, ] \}); diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index 5a77cb1cbd..25415e7dc2 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -1,7 +1,7 @@ --- title: "CDC Sink: Attachment Handling" sidebar_label: Attachment Handling -description: "Explains how to store binary SQL columns as RavenDB attachments using CdcColumnType.Attachment on root and embedded table configurations." +description: "Explains how to store binary or text SQL columns as RavenDB attachments using CdcColumnType.Attachment on root and embedded table configurations." sidebar_position: 9 --- @@ -16,8 +16,9 @@ import LanguageContent from "@site/src/components/LanguageContent"; -* Binary SQL columns (bytea, varbinary, etc.) can be stored as RavenDB **attachments** - by setting `Type = CdcColumnType.Attachment` on the column mapping entry. +* SQL columns — binary (`bytea`, `varbinary`, etc.) or text (`text`, `nvarchar`, etc.) + — can be stored as RavenDB **attachments** by setting `Type = CdcColumnType.Attachment` + on the column mapping entry. * This applies to both root tables and embedded tables. @@ -46,10 +47,10 @@ column as a RavenDB attachment: PrimaryKeyColumns = ["id"], Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "filename", Name = "Filename" \}, - new() \{ Column = "mime_type", Name = "MimeType" \}, - new() \{ Column = "content", Name = "file", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "filename", Name = "Filename" \}, + new CdcColumnMapping() \{ Column = "mime_type", Name = "MimeType" \}, + new CdcColumnMapping() \{ Column = "content", Name = "file", Type = CdcColumnType.Attachment \}, ] \} `} @@ -79,9 +80,9 @@ The attachment name is automatically prefixed to ensure uniqueness: JoinColumns = ["product_id"], Columns = [ - new() \{ Column = "photo_num", Name = "PhotoNum" \}, - new() \{ Column = "caption", Name = "Caption" \}, - new() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() \{ Column = "photo_num", Name = "PhotoNum" \}, + new CdcColumnMapping() \{ Column = "caption", Name = "Caption" \}, + new CdcColumnMapping() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, ] \} `} diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 3c2e95e8d8..0ada70e559 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -41,10 +41,10 @@ to a RavenDB document property: {`Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "customer_name", Name = "CustomerName" \}, - new() \{ Column = "order_date", Name = "OrderDate" \}, - new() \{ Column = "total_amount", Name = "TotalAmount" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \}, + new CdcColumnMapping() \{ Column = "order_date", Name = "OrderDate" \}, + new CdcColumnMapping() \{ Column = "total_amount", Name = "TotalAmount" \}, ] `} @@ -75,9 +75,9 @@ that controls how the SQL value is stored in RavenDB: {`Columns = [ - new() \{ Column = "order_id", Name = "OrderId" \}, - new() \{ Column = "metadata", Name = "Metadata", Type = CdcColumnType.Json \}, // Parsed as native JSON - new() \{ Column = "receipt", Name = "receipt.pdf", Type = CdcColumnType.Attachment \}, // Stored as attachment + new CdcColumnMapping() \{ Column = "order_id", Name = "OrderId" \}, + new CdcColumnMapping() \{ Column = "metadata", Name = "Metadata", Type = CdcColumnType.Json \}, // Parsed as native JSON + new CdcColumnMapping() \{ Column = "receipt", Name = "receipt.pdf", Type = CdcColumnType.Attachment \}, // Stored as attachment ] `} @@ -104,8 +104,8 @@ This allows you to use data for computations without permanently storing raw SQL {`Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "name", Name = "Name" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, // base_price and tax_rate are NOT mapped — won't appear in document ], Patch = "this.FinalPrice = $row.base_price * (1 + $row.tax_rate);" diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx index d7ed2734b5..3af139bee0 100644 --- a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -51,9 +51,9 @@ import LanguageContent from "@site/src/components/LanguageContent"; PrimaryKeyColumns = ["line_id"], // Used to match items on UPDATE/DELETE Columns = [ - new() \{ Column = "line_id", Name = "LineId" \}, - new() \{ Column = "product", Name = "Product" \}, - new() \{ Column = "quantity", Name = "Quantity" \}, + new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, + new CdcColumnMapping() \{ Column = "product", Name = "Product" \}, + new CdcColumnMapping() \{ Column = "quantity", Name = "Quantity" \}, ] \} `} @@ -240,9 +240,9 @@ them to `Columns` with `Type = CdcColumnType.Attachment`. JoinColumns = ["product_id"], Columns = [ - new() \{ Column = "photo_num", Name = "PhotoNum" \}, - new() \{ Column = "caption", Name = "Caption" \}, - new() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() \{ Column = "photo_num", Name = "PhotoNum" \}, + new CdcColumnMapping() \{ Column = "caption", Name = "Caption" \}, + new CdcColumnMapping() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, ] \} `} diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx index 67b44711a4..003b7aa55b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -44,8 +44,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; PrimaryKeyColumns = ["id"], Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "customer_id", Name = "CustomerId" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_id", Name = "CustomerId" \}, ], LinkedTables = [ diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index 75fce25b1c..eefdc9ebe3 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -134,7 +134,7 @@ Without it, deletes will leave the aggregate in an incorrect state. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["order_id"], - Columns = [ new() \{ Column = "line_id", Name = "LineId" \}, new() \{ Column = "quantity", Name = "Quantity" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, new CdcColumnMapping() \{ Column = "quantity", Name = "Quantity" \} ], // Runs on INSERT and UPDATE — recomputes total from current Lines array Patch = @" @@ -180,7 +180,7 @@ the deleted item's value. Without it, deletes leave the running total incorrect. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["invoice_id"], - Columns = [ new() \{ Column = "line_id", Name = "LineId" \}, new() \{ Column = "amount", Name = "Amount" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, new CdcColumnMapping() \{ Column = "amount", Name = "Amount" \} ], // INSERT: $old is null, so delta = new amount (0 → new) // UPDATE: $old has previous Amount, delta = new - old @@ -215,7 +215,7 @@ Compute fields from unmapped columns that you don't want to store directly: -{`Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "name", Name = "Name" \} ], +{`Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "name", Name = "Name" \} ], // base_price and tax_rate are NOT in Columns Patch = @" this.FinalPrice = $row.base_price * (1 + $row.tax_rate); diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx index 060e210d4f..5766906484 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -109,8 +109,8 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. PrimaryKeyColumns = ["product_id"], Columns = [ - new() \{ Column = "product_id", Name = "ProductId" \}, - new() \{ Column = "name", Name = "Name" \}, + new CdcColumnMapping() \{ Column = "product_id", Name = "ProductId" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, ], // Linked table: category_id FK → document ID in Categories collection LinkedTables = @@ -135,9 +135,9 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. PrimaryKeyColumns = ["variant_id"], Columns = [ - new() \{ Column = "variant_id", Name = "VariantId" \}, - new() \{ Column = "sku", Name = "Sku" \}, - new() \{ Column = "price", Name = "Price" \}, + new CdcColumnMapping() \{ Column = "variant_id", Name = "VariantId" \}, + new CdcColumnMapping() \{ Column = "sku", Name = "Sku" \}, + new CdcColumnMapping() \{ Column = "price", Name = "Price" \}, ], // Deep-nested: attributes within each variant EmbeddedTables = @@ -152,9 +152,9 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. PrimaryKeyColumns = ["attr_id"], Columns = [ - new() \{ Column = "attr_id", Name = "AttrId" \}, - new() \{ Column = "attr_name", Name = "Name" \}, - new() \{ Column = "attr_value", Name = "Value" \}, + new CdcColumnMapping() \{ Column = "attr_id", Name = "AttrId" \}, + new CdcColumnMapping() \{ Column = "attr_name", Name = "Name" \}, + new CdcColumnMapping() \{ Column = "attr_value", Name = "Value" \}, ] \} ] diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx index 772d2a5042..5cae4c6008 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx @@ -88,10 +88,10 @@ See [REPLICA IDENTITY](../replica-identity.mdx). PrimaryKeyColumns = new List \{ "order_id" \}, Columns = [ - new() \{ Column = "order_id", Name = "OrderId" \}, - new() \{ Column = "customer", Name = "Customer" \}, - new() \{ Column = "status", Name = "Status" \}, - new() \{ Column = "created_at", Name = "CreatedAt" \}, + new CdcColumnMapping() \{ Column = "order_id", Name = "OrderId" \}, + new CdcColumnMapping() \{ Column = "customer", Name = "Customer" \}, + new CdcColumnMapping() \{ Column = "status", Name = "Status" \}, + new CdcColumnMapping() \{ Column = "created_at", Name = "CreatedAt" \}, ], EmbeddedTables = new List \{ @@ -104,10 +104,10 @@ See [REPLICA IDENTITY](../replica-identity.mdx). PrimaryKeyColumns = new List \{ "line_id" \}, Columns = [ - new() \{ Column = "line_id", Name = "LineId" \}, - new() \{ Column = "product", Name = "Product" \}, - new() \{ Column = "qty", Name = "Qty" \}, - new() \{ Column = "unit_price", Name = "UnitPrice" \}, + new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, + new CdcColumnMapping() \{ Column = "product", Name = "Product" \}, + new CdcColumnMapping() \{ Column = "qty", Name = "Qty" \}, + new CdcColumnMapping() \{ Column = "unit_price", Name = "UnitPrice" \}, ] \} \} diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx index ecd57e96f8..4045733e3c 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx @@ -77,9 +77,9 @@ history, and `Balance` is maintained using patch logic. PrimaryKeyColumns = ["account_id"], Columns = [ - new() \{ Column = "account_id", Name = "AccountId" \}, - new() \{ Column = "owner", Name = "Owner" \}, - new() \{ Column = "currency", Name = "Currency" \}, + new CdcColumnMapping() \{ Column = "account_id", Name = "AccountId" \}, + new CdcColumnMapping() \{ Column = "owner", Name = "Owner" \}, + new CdcColumnMapping() \{ Column = "currency", Name = "Currency" \}, ], EmbeddedTables = [ @@ -92,10 +92,10 @@ history, and `Balance` is maintained using patch logic. PrimaryKeyColumns = ["txn_id"], Columns = [ - new() \{ Column = "txn_id", Name = "TxnId" \}, - new() \{ Column = "amount", Name = "Amount" \}, - new() \{ Column = "type", Name = "Type" \}, - new() \{ Column = "created_at", Name = "CreatedAt" \}, + new CdcColumnMapping() \{ Column = "txn_id", Name = "TxnId" \}, + new CdcColumnMapping() \{ Column = "amount", Name = "Amount" \}, + new CdcColumnMapping() \{ Column = "type", Name = "Type" \}, + new CdcColumnMapping() \{ Column = "created_at", Name = "CreatedAt" \}, ], // Patch runs on the parent document for INSERT/UPDATE Patch = """ diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx index de82dc81e0..9503109eea 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx @@ -62,10 +62,10 @@ A simple customers table: PrimaryKeyColumns = new List \{ "id" \}, Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "name", Name = "Name" \}, - new() \{ Column = "email", Name = "Email" \}, - new() \{ Column = "created_at", Name = "CreatedAt" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, + new CdcColumnMapping() \{ Column = "email", Name = "Email" \}, + new CdcColumnMapping() \{ Column = "created_at", Name = "CreatedAt" \}, ] \} \} diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx index 2aa35d58a4..334e86ddec 100644 --- a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -49,9 +49,9 @@ becomes one document. PrimaryKeyColumns = ["id"], // Used for document ID generation Columns = [ - new() \{ Column = "id", Name = "Id" \}, - new() \{ Column = "customer_name", Name = "CustomerName" \}, - new() \{ Column = "total", Name = "Total" \}, + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \}, + new CdcColumnMapping() \{ Column = "total", Name = "Total" \}, ] \} `} @@ -81,7 +81,7 @@ SQL `order_lines` table becomes an array inside each `Orders` document. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "customer_name", Name = "CustomerName" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -93,9 +93,9 @@ SQL `order_lines` table becomes an array inside each `Orders` document. PrimaryKeyColumns = ["line_id"], // Used to match items on update/delete Columns = [ - new() \{ Column = "line_id", Name = "LineId" \}, - new() \{ Column = "product", Name = "Product" \}, - new() \{ Column = "quantity", Name = "Quantity" \}, + new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, + new CdcColumnMapping() \{ Column = "product", Name = "Product" \}, + new CdcColumnMapping() \{ Column = "quantity", Name = "Quantity" \}, ] \} ] @@ -139,7 +139,7 @@ A foreign key in the source row becomes a RavenDB document ID. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - Columns = [ new() \{ Column = "id", Name = "Id" \}, new() \{ Column = "customer_id", Name = "CustomerId" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "customer_id", Name = "CustomerId" \} ], LinkedTables = [ new CdcSinkLinkedTableConfig @@ -211,7 +211,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Name = "Companies", SourceTableName = "companies", PrimaryKeyColumns = ["company_id"], - Columns = [ new() \{ Column = "company_id", Name = "CompanyId" \}, new() \{ Column = "name", Name = "Name" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "company_id", Name = "CompanyId" \}, new CdcColumnMapping() \{ Column = "name", Name = "Name" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -221,7 +221,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id"], // Root FK PrimaryKeyColumns = ["dept_id"], - Columns = [ new() \{ Column = "dept_id", Name = "DeptId" \}, new() \{ Column = "dept_name", Name = "DeptName" \} ], + Columns = [ new CdcColumnMapping() \{ Column = "dept_id", Name = "DeptId" \}, new CdcColumnMapping() \{ Column = "dept_name", Name = "DeptName" \} ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -231,7 +231,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id", "dept_id"], // Root FK + parent FK PrimaryKeyColumns = ["emp_id"], - Columns = [ new() \{ Column = "emp_id", Name = "EmpId" \}, new() \{ Column = "emp_name", Name = "EmpName" \} ] + Columns = [ new CdcColumnMapping() \{ Column = "emp_id", Name = "EmpId" \}, new CdcColumnMapping() \{ Column = "emp_name", Name = "EmpName" \} ] \} ] \} From d144d298777b071ee09ec05c332006e89e6b00ec Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:21:07 +0300 Subject: [PATCH 04/17] attachment-handling: add SQL schema examples for root and embedded table sections --- .../cdc-sink/attachment-handling.mdx | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index 25415e7dc2..0045001cb2 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -34,8 +34,23 @@ import LanguageContent from "@site/src/components/LanguageContent"; ## Root Table Attachments -Set `Type = CdcColumnType.Attachment` on a `Columns` entry to store a binary SQL -column as a RavenDB attachment: +Given a SQL table with a binary column: + + + + +{`CREATE TABLE files ( + id SERIAL PRIMARY KEY, + filename TEXT NOT NULL, + mime_type TEXT NOT NULL, + content BYTEA NOT NULL -- binary attachment +); +`} + + + + +Set `Type = CdcColumnType.Attachment` on the `content` column entry: @@ -58,7 +73,7 @@ column as a RavenDB attachment: -The binary `content` column is stored as an attachment named `"file"` on the document. +The `content` column is stored as an attachment named `"file"` on the document. The `Name` value becomes the attachment name. The attachment is stored with content type `application/octet-stream`. @@ -67,7 +82,31 @@ type `application/octet-stream`. ## Embedded Table Attachments Binary columns on embedded tables are stored as attachments on the **parent** document. -The attachment name is automatically prefixed to ensure uniqueness: +The attachment name is automatically prefixed to ensure uniqueness. + +Given a parent `products` table and a child `photos` table: + + + + +{`CREATE TABLE products ( + product_id SERIAL PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE TABLE photos ( + product_id INT NOT NULL REFERENCES products(product_id), + photo_num INT NOT NULL, + caption TEXT, + thumbnail BYTEA, -- binary attachment + PRIMARY KEY (product_id, photo_num) +); +`} + + + + +Configure the embedded table with `Type = CdcColumnType.Attachment` on the binary column: From 116779965d6b8ff72eaee81f88a5cfd016091d1e Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:21:40 +0300 Subject: [PATCH 05/17] attachment-handling: text columns use text/plain content type, binary uses application/octet-stream --- docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx | 5 +++-- docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx | 2 +- .../ongoing-tasks/cdc-sink/configuration-reference.mdx | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index 0045001cb2..f384239070 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -74,8 +74,9 @@ Set `Type = CdcColumnType.Attachment` on the `content` column entry: The `content` column is stored as an attachment named `"file"` on the document. -The `Name` value becomes the attachment name. The attachment is stored with content -type `application/octet-stream`. +The `Name` value becomes the attachment name. The content type is determined by the +SQL column type: `application/octet-stream` for binary columns, `text/plain` for +text columns. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 0ada70e559..f3a0d19888 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -88,7 +88,7 @@ that controls how the SQL value is stored in RavenDB: |------|----------| | `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. This is the default when `Type` is omitted. | | `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | -| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary, `string`→UTF-8 text, `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | +| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | --- diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx index 60e0224f6d..a03dda95a1 100644 --- a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -155,7 +155,7 @@ Controls how a SQL column value is stored in RavenDB. |-------|----------| | `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. | | `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | -| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary, `string`→UTF-8 text, `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | +| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | --- From ac39ae713a29ecad330b1a7ab72a0a1192c8791e Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:24:46 +0300 Subject: [PATCH 06/17] CdcColumnType.Json: any JSON value, not just objects/arrays --- docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx | 2 +- docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index f3a0d19888..9968856040 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -87,7 +87,7 @@ that controls how the SQL value is stored in RavenDB: | Type | Behavior | |------|----------| | `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. This is the default when `Type` is omitted. | -| `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | +| `Json` | Parses the string value as a native JSON value (object, array, string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | | `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | --- diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx index a03dda95a1..ddc9355e98 100644 --- a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -154,7 +154,7 @@ Controls how a SQL column value is stored in RavenDB. | Value | Behavior | |-------|----------| | `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. | -| `Json` | Parses the string value as a native JSON object or array in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | +| `Json` | Parses the string value as a native JSON value (object, array, string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | | `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | --- From 5707fe442e2dde78e9e0c7a441a4dd73ccb0d6fa Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:28:50 +0300 Subject: [PATCH 07/17] patching, troubleshooting: replace get() with load() for loading related documents --- docs/server/ongoing-tasks/cdc-sink/patching.mdx | 12 ++++++------ .../ongoing-tasks/cdc-sink/troubleshooting.mdx | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index eefdc9ebe3..c78a843f64 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -251,12 +251,12 @@ this['@metadata']['SourceTable'] = 'orders'; ## Scenario: Loading Related Documents -Use `get()` to load a related RavenDB document and denormalize its data: +Use `load()` to load a related RavenDB document and denormalize its data: -{`const customer = get('Customers/' + $row.customer_id); +{`const customer = load('Customers/' + $row.customer_id); if (customer) \{ this.CustomerName = customer.Name; @@ -272,17 +272,17 @@ if (customer) \{ -`get()` returns `null` if the document does not exist or has not yet been created +`load()` returns `null` if the document does not exist or has not yet been created by CDC Sink (race condition when multiple tables are loading in parallel). Always check for null before accessing properties. -**When to use `get()`:** +**When to use `load()`:** * Denormalizing slowly-changing reference data (customer name, category, region) * Capturing a snapshot of related data at insert time **Prefer linked tables** for simple foreign key references — they are cleaner and -do not have the null-handling complexity of `get()`. +do not have the null-handling complexity of `load()`. --- @@ -336,7 +336,7 @@ Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nes **What patches can do:** * Access `this`, `$row`, `$old` -* Load related documents with `get()` +* Load related documents with `load()` * Create or replace documents with `put()` * Delete documents with `del()` * Compute and transform property values diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx index 030c7b355b..90816bcdf0 100644 --- a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -160,13 +160,13 @@ specific failure reason. 2. **Null reference** — `$row` properties and `$old` may be `null` for certain event types. Use optional chaining: `$old?.Amount || 0`. -3. **`get()` returns null** — a document loaded with `get()` may not exist yet if CDC +3. **`load()` returns null** — a document loaded with `load()` may not exist yet if CDC Sink processes tables out of dependency order. Guard with a null check: -{`const related = get("Collection/123"); +{`const related = load("Collection/123"); if (related) \{ ... \} `} From 5a7f3b1128736b1c2985c0039aed33b44806aa06 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Fri, 3 Apr 2026 05:32:47 +0300 Subject: [PATCH 08/17] Reformat long lines in code blocks; fix CdcColumnMapping Type= continuation style --- .../cdc-sink/attachment-handling.mdx | 12 ++++++-- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 14 +++++++-- .../cdc-sink/embedded-tables.mdx | 6 +++- .../ongoing-tasks/cdc-sink/patching.mdx | 18 +++++++++-- .../examples/example-complex-nesting.mdx | 3 +- .../ongoing-tasks/cdc-sink/schema-design.mdx | 30 +++++++++++++++---- 6 files changed, 69 insertions(+), 14 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index f384239070..1b52eefd36 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -65,7 +65,11 @@ Set `Type = CdcColumnType.Attachment` on the `content` column entry: new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "filename", Name = "Filename" \}, new CdcColumnMapping() \{ Column = "mime_type", Name = "MimeType" \}, - new CdcColumnMapping() \{ Column = "content", Name = "file", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() + \{ + Column = "content", Name = "file", + Type = CdcColumnType.Attachment + \}, ] \} `} @@ -122,7 +126,11 @@ Configure the embedded table with `Type = CdcColumnType.Attachment` on the binar [ new CdcColumnMapping() \{ Column = "photo_num", Name = "PhotoNum" \}, new CdcColumnMapping() \{ Column = "caption", Name = "Caption" \}, - new CdcColumnMapping() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() + \{ + Column = "thumbnail", Name = "thumb", + Type = CdcColumnType.Attachment + \}, ] \} `} diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 9968856040..22222da6ba 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -76,8 +76,18 @@ that controls how the SQL value is stored in RavenDB: {`Columns = [ new CdcColumnMapping() \{ Column = "order_id", Name = "OrderId" \}, - new CdcColumnMapping() \{ Column = "metadata", Name = "Metadata", Type = CdcColumnType.Json \}, // Parsed as native JSON - new CdcColumnMapping() \{ Column = "receipt", Name = "receipt.pdf", Type = CdcColumnType.Attachment \}, // Stored as attachment + // Parsed as a native JSON value + new CdcColumnMapping() + \{ + Column = "metadata", Name = "Metadata", + Type = CdcColumnType.Json + \}, + // Stored as an attachment + new CdcColumnMapping() + \{ + Column = "receipt", Name = "receipt.pdf", + Type = CdcColumnType.Attachment + \}, ] `} diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx index 3af139bee0..a50058f60e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -242,7 +242,11 @@ them to `Columns` with `Type = CdcColumnType.Attachment`. [ new CdcColumnMapping() \{ Column = "photo_num", Name = "PhotoNum" \}, new CdcColumnMapping() \{ Column = "caption", Name = "Caption" \}, - new CdcColumnMapping() \{ Column = "thumbnail", Name = "thumb", Type = CdcColumnType.Attachment \}, + new CdcColumnMapping() + \{ + Column = "thumbnail", Name = "thumb", + Type = CdcColumnType.Attachment + \}, ] \} `} diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index c78a843f64..d71b97d3e2 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -134,7 +134,11 @@ Without it, deletes will leave the aggregate in an incorrect state. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["order_id"], - Columns = [ new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, new CdcColumnMapping() \{ Column = "quantity", Name = "Quantity" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, + new CdcColumnMapping() \{ Column = "quantity", Name = "Quantity" \}, + ], // Runs on INSERT and UPDATE — recomputes total from current Lines array Patch = @" @@ -180,7 +184,11 @@ the deleted item's value. Without it, deletes leave the running total incorrect. PropertyName = "Lines", PrimaryKeyColumns = ["line_id"], JoinColumns = ["invoice_id"], - Columns = [ new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, new CdcColumnMapping() \{ Column = "amount", Name = "Amount" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "line_id", Name = "LineId" \}, + new CdcColumnMapping() \{ Column = "amount", Name = "Amount" \}, + ], // INSERT: $old is null, so delta = new amount (0 → new) // UPDATE: $old has previous Amount, delta = new - old @@ -215,7 +223,11 @@ Compute fields from unmapped columns that you don't want to store directly: -{`Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "name", Name = "Name" \} ], +{`Columns = +[ + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, +], // base_price and tax_rate are NOT in Columns Patch = @" this.FinalPrice = $row.base_price * (1 + $row.tax_rate); diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx index 5766906484..60988a0428 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -53,7 +53,8 @@ CREATE TABLE product_variants ( CREATE TABLE variant_attributes ( attr_id SERIAL PRIMARY KEY, - product_id INT NOT NULL REFERENCES products(product_id), -- denormalized root PK (required for deep nesting) + -- denormalized root PK (required for deep nesting) + product_id INT NOT NULL REFERENCES products(product_id), variant_id INT NOT NULL REFERENCES product_variants(variant_id), attr_name TEXT NOT NULL, attr_value TEXT NOT NULL diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx index 334e86ddec..e9a59e82d8 100644 --- a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -81,7 +81,11 @@ SQL `order_lines` table becomes an array inside each `Orders` document. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \}, + ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -139,7 +143,11 @@ A foreign key in the source row becomes a RavenDB document ID. Name = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], - Columns = [ new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, new CdcColumnMapping() \{ Column = "customer_id", Name = "CustomerId" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "customer_id", Name = "CustomerId" \}, + ], LinkedTables = [ new CdcSinkLinkedTableConfig @@ -211,7 +219,11 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Name = "Companies", SourceTableName = "companies", PrimaryKeyColumns = ["company_id"], - Columns = [ new CdcColumnMapping() \{ Column = "company_id", Name = "CompanyId" \}, new CdcColumnMapping() \{ Column = "name", Name = "Name" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "company_id", Name = "CompanyId" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, + ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -221,7 +233,11 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id"], // Root FK PrimaryKeyColumns = ["dept_id"], - Columns = [ new CdcColumnMapping() \{ Column = "dept_id", Name = "DeptId" \}, new CdcColumnMapping() \{ Column = "dept_name", Name = "DeptName" \} ], + Columns = + [ + new CdcColumnMapping() \{ Column = "dept_id", Name = "DeptId" \}, + new CdcColumnMapping() \{ Column = "dept_name", Name = "DeptName" \}, + ], EmbeddedTables = [ new CdcSinkEmbeddedTableConfig @@ -231,7 +247,11 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h Type = CdcSinkRelationType.Array, JoinColumns = ["company_id", "dept_id"], // Root FK + parent FK PrimaryKeyColumns = ["emp_id"], - Columns = [ new CdcColumnMapping() \{ Column = "emp_id", Name = "EmpId" \}, new CdcColumnMapping() \{ Column = "emp_name", Name = "EmpName" \} ] + Columns = + [ + new CdcColumnMapping() \{ Column = "emp_id", Name = "EmpId" \}, + new CdcColumnMapping() \{ Column = "emp_name", Name = "EmpName" \}, + ] \} ] \} From 4d904a67782a2c494269d79581da084ae24c588f Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Sat, 4 Apr 2026 00:39:43 +0300 Subject: [PATCH 09/17] RavenDB-26046 - CDC Sink: add PostgreSQL type mapping page, fix $row/$old documentation - Add postgres/type-mapping.mdx: full reference table of PostgreSQL column types and their JavaScript/CLR equivalents (scalars, arrays, json/jsonb, bytea, pgvector) - Add patching.mdx "$row and $old: Names and Types" - Fix cleanup-and-maintenance.mdx: replace obsolete "Configuration Changes That Rename Slots" section (described hash-based naming, no longer accurate) with correct "Slot and Publication Names Are Immutable" section reflecting enforced immutability --- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 35 ++++- .../ongoing-tasks/cdc-sink/patching.mdx | 127 ++++++++++++++++++ .../postgres/cleanup-and-maintenance.mdx | 39 ++---- .../postgres/examples/_category_.json | 2 +- 4 files changed, 169 insertions(+), 34 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 22222da6ba..29949bf9e5 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -94,11 +94,32 @@ that controls how the SQL value is stored in RavenDB: -| Type | Behavior | -|------|----------| -| `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. This is the default when `Type` is omitted. | -| `Json` | Parses the string value as a native JSON value (object, array, string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | -| `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | +**`CdcColumnType.Default`** — Standard SQL-to-JSON type conversion. This is the default when `Type` is omitted. +Type conversions are source-database-specific. For PostgreSQL: + +* `smallint` / `integer` / `oid` → `long` +* `bigint` → `long` +* `real` → `float` +* `double precision` → `double` +* `numeric` / `decimal` → `decimal` +* `boolean` → `bool` +* `date` → `DateOnly` +* `timestamp` / `timestamptz` → `DateTime` (UTC) +* `uuid` → `string` +* `varchar` / `text` / `char` → `string` +* `json` / `jsonb` → `string` (raw JSON text, not parsed) +* SQL arrays → JSON arrays +* `vector` (pgvector) → `float[]` + +See [PostgreSQL Type Mapping](./postgres/type-mapping.mdx) for the full reference table. + +**`CdcColumnType.Json`** — Parses the string value as a native JSON value (object, array, +string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or +SQL Server `nvarchar(max)` with JSON content. + +**`CdcColumnType.Attachment`** — Stores the raw column value as a RavenDB attachment. +`byte[]` → binary (`application/octet-stream`), `string` → UTF-8 text (`text/plain`), +`float[]`/`double[]` → raw vector data. The `Name` field becomes the attachment name. --- @@ -181,3 +202,7 @@ For most PostgreSQL setups using the default `public` schema, this can be omitte - [Patching](./patching.mdx) - [Attachment Handling](./attachment-handling.mdx) - [Configuration Reference](./configuration-reference.mdx) + +### PostgreSQL + +- [Type Mapping](./postgres/type-mapping.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index d71b97d3e2..4bec406439 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -26,6 +26,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; * In this page: * [When Patches Run](#when-patches-run) * [Available Variables](#available-variables) + * [$row and $old: Names and Types](#row-and-old-names-and-types) + * [$row Column Types](#row-column-types) * [Patch Scope: Root vs Embedded](#patch-scope-root-vs-embedded) * [Scenario: Column Transformation](#scenario-column-transformation) * [Scenario: Aggregation with Embedded Tables](#scenario-aggregation-with-embedded-tables) @@ -66,6 +68,127 @@ The `OnDelete.Patch` on `CdcSinkOnDeleteConfig` handles DELETE separately. --- +## $row and $old: Names and Types + +`$row` and `$old` look similar but represent fundamentally different things: + +| | `$row` | `$old` | +|--|--------|--------| +| **Contains** | The incoming SQL row from the CDC event | The previous state of the RavenDB document (or embedded item) | +| **Property names** | Original SQL column names (`customer_name`) | Mapped RavenDB property names (`CustomerName`) | +| **Value types** | Raw SQL types (see [$row Column Types](#row-column-types)) | Stored JSON types (whatever was last written to RavenDB) | +| **Available on INSERT** | ✓ | ✗ (null) | +| **Available on UPDATE** | ✓ | ✓ | +| **Available on DELETE** | ✓ | ✓ | + +Given this column mapping: + + + + +{`Columns = +[ + new CdcColumnMapping() \{ Column = "customer_name", Name = "CustomerName" \}, + new CdcColumnMapping() \{ Column = "amount", Name = "Amount" \}, +] +`} + + + + +In the patch script: + + + + +{`// $row — SQL column names, raw SQL types +$row.customer_name // string +$row.amount // number (from numeric/decimal column) + +// $old — mapped property names, stored JSON types +$old.CustomerName // string (as stored in RavenDB) +$old.Amount // number (as stored in RavenDB) + +// this — the document after column mapping has been applied for this event +this.CustomerName // string (just set from $row.customer_name) +this.Amount // number (just set from $row.amount) +`} + + + + + +Unmapped columns — those not listed in `Columns` — appear in `$row` but are never +written to the document and therefore never appear in `$old`. + + +--- + +## $row Column Types + +`$row` exposes all SQL columns using their **original column names** (not the mapped +RavenDB property names). The JavaScript type of each value depends on the source +database type. + +### PostgreSQL + +| PostgreSQL Type | JavaScript Type in `$row` | Notes | +|----------------|--------------------------|-------| +| `integer` / `bigint` | `number` | | +| `real` / `double precision` | `number` | | +| `numeric` | `number` | | +| `boolean` | `boolean` | | +| `date` / `timestamp` / `timestamptz` | `string` | Formatted date string | +| `uuid` | `string` | | +| `varchar` / `text` / `char` | `string` | | +| `bytea` | `string` | Base64-encoded | +| `json` / `jsonb` | `string` | Raw JSON text — call `JSON.parse()` to access as an object / array | +| `text[]` / `varchar[]` / `int[]` / etc. | `Array` | Elements are always strings, even for numeric array types | +| `vector` (pgvector) | `Array` | Elements are JavaScript numbers (floats) | + +**JSON/JSONB columns require explicit parsing:** + + + + +{`var meta = JSON.parse($row.metadata); +this.Priority = meta.priority; +this.Label = meta.label; +`} + + + + +**Array columns:** + + + + +{`this.TagCount = $row.tags.length; +this.FirstTag = $row.tags[0]; +this.Tags = $row.tags; + +// INT[] elements are strings — parse explicitly if numeric arithmetic is needed +var first = parseInt($row.scores[0], 10); +`} + + + + +**pgvector columns (elements are numbers, not strings):** + + + + +{`this.EmbeddingSum = $row.embedding.reduce(function(a, b) \{ return a + b; \}, 0); +this.Embedding = $row.embedding; +`} + + + + +--- + ## Patch Scope: Root vs Embedded **Root table patches** operate on the root document: @@ -373,6 +496,10 @@ Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nes - [Property Retention](./property-retention.mdx) - [Failover and Consistency](./failover-and-consistency.mdx) +### PostgreSQL + +- [Type Mapping](./postgres/type-mapping.mdx) + ### Server - [Patching Configuration](../../../server/configuration/patching-configuration) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx index c83a62e5c0..538fb59942 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/cleanup-and-maintenance.mdx @@ -180,37 +180,20 @@ WHERE active = false; --- -## Configuration Changes That Rename Slots +## Slot and Publication Names Are Immutable -The replication slot name is derived from the task name, database name, and table -names. If you update a CDC Sink task in a way that changes any of these — such as -adding a table, removing a table, or renaming the task — the expected slot and -publication names change. +The replication slot name and publication name are generated **once at task creation** +using a GUID and stored with the task. They do not change when you modify the task — +adding or removing tables, renaming the task, or changing any other configuration +always reuses the same slot and publication. -What happens: +Attempting to specify a different `SlotName` or `PublicationName` when updating a +task results in an error. -* CDC Sink will look for a slot/publication with the new name -* If it has permissions, it will create them -* The old slot and publication are **not deleted** — they become orphaned - -After updating a task configuration that changes table membership: - -1. Let the task restart and create the new slot/publication -2. Identify the old slot (it will be inactive): - - - - -{`SELECT slot_name, active -FROM pg_replication_slots -WHERE slot_name LIKE 'rvn_cdc_s_%' - AND active = false; -`} - - - - -3. Drop the old slot and publication +If you need to use a different slot or publication, delete the task and create a new one. +The old slot and publication from the deleted task must then be cleaned up manually — +see [Dropping a Replication Slot](#dropping-a-replication-slot) and +[Dropping a Publication](#dropping-a-publication) below. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json index 4d070cfb37..10c3f5c05e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/_category_.json @@ -1 +1 @@ -{"position": 9, "label": "Examples"} +{"position": 10, "label": "Examples"} From ee05eb625c5c97eae4eff04c91408ecf196d938e Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 01:10:01 +0300 Subject: [PATCH 10/17] RavenDB-26046 - Align CDC Sink docs with actual code API from PR #4698 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Name → CollectionName on CdcSinkTableConfig across all files - Remove Type from CdcSinkLinkedTableConfig (linked tables have no relation type) - Remove Disabled from CdcSinkEmbeddedTableConfig, add LinkedTables - Add FactoryName table (Npgsql, SqlClient, MySql) to configuration-reference - Add CdcColumnMapping and CdcColumnType reference sections - Add put(id, document) and del(id) to patch capabilities - Add JSON Columns section to column-mapping - Remove non-existent Array References section from linked-tables - Remove non-existent Disabling an Embedded Table section from embedded-tables - Update server-configuration descriptions (MaxBatchSize, MaxFallbackTimeInSec, PollIntervalInSec applies to SQL Server only) - Fix licensing link in overview - Add SQL Server and MySQL/MariaDB as supported source databases --- .../ongoing-tasks/cdc-sink/api-reference.mdx | 4 +- .../cdc-sink/attachment-handling.mdx | 2 +- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 30 +++++++++++++ .../cdc-sink/configuration-reference.mdx | 25 +++++++---- .../cdc-sink/embedded-tables.mdx | 26 ----------- .../ongoing-tasks/cdc-sink/linked-tables.mdx | 43 +------------------ .../ongoing-tasks/cdc-sink/overview.mdx | 5 ++- .../ongoing-tasks/cdc-sink/patching.mdx | 8 ++-- .../examples/example-complex-nesting.mdx | 3 +- .../examples/example-denormalization.mdx | 2 +- .../examples/example-event-sourcing.mdx | 2 +- .../examples/example-simple-migration.mdx | 2 +- .../cdc-sink/property-retention.mdx | 14 +++--- .../ongoing-tasks/cdc-sink/schema-design.mdx | 15 +++---- .../cdc-sink/server-configuration.mdx | 17 +++----- 15 files changed, 83 insertions(+), 115 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx index 448aa7b634..f2def3578a 100644 --- a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -45,7 +45,7 @@ Use `AddCdcSinkOperation` to create a new CDC Sink task: \{ new CdcSinkTableConfig \{ - Name = "Orders", + CollectionName = "Orders", SourceTableName = "orders", PrimaryKeyColumns = new List \{ "id" \}, Columns = @@ -87,7 +87,7 @@ Pass the full updated configuration including the `TaskId`: {`config.TaskId = taskId; // Must be set config.Tables.Add(new CdcSinkTableConfig \{ - Name = "Customers", + CollectionName = "Customers", SourceTableName = "customers", PrimaryKeyColumns = new List \{ "id" \}, Columns = diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index 1b52eefd36..f7645bca8b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -57,7 +57,7 @@ Set `Type = CdcColumnType.Attachment` on the `content` column entry: {`new CdcSinkTableConfig \{ - Name = "Files", + CollectionName = "Files", SourceTableName = "files", PrimaryKeyColumns = ["id"], Columns = diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 29949bf9e5..1e63b66fc5 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -24,6 +24,7 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [Mapping Columns to Properties](#mapping-columns-to-properties) * [Column Types](#column-types) * [Unmapped Columns](#unmapped-columns) + * [JSON Columns](#json-columns) * [Validation Rules](#validation-rules) * [Schema (Source Table Schema)](#schema-source-table-schema) @@ -160,6 +161,35 @@ in patch scripts via `$row.base_price` (for the current row's values) and --- +## JSON Columns + +SQL columns that contain JSON data (e.g., PostgreSQL `jsonb`, SQL Server `nvarchar` +with JSON content) can be parsed into native JSON objects or arrays using +`CdcColumnType.Json`: + + + + +{`Columns = +[ + new CdcColumnMapping() \{ Column = "id", Name = "Id" \}, + new CdcColumnMapping() \{ Column = "name", Name = "Name" \}, + new CdcColumnMapping() + \{ + Column = "metadata", Name = "Metadata", + Type = CdcColumnType.Json + \}, +] +`} + + + + +Without `CdcColumnType.Json`, the JSON column value would be stored as a raw string. +With it, the value is parsed and stored as a nested object or array in the document. + +--- + ## Validation Rules The following rules apply to both root table and embedded table column mappings: diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx index ddc9355e98..ac255fa74f 100644 --- a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -1,7 +1,7 @@ --- title: "CDC Sink: Configuration Reference" sidebar_label: Configuration Reference -description: "Documents all configuration classes used to define a CDC Sink task, including CdcSinkConfiguration, table configs, and relation types." +description: "Documents all configuration classes used to define a CDC Sink task, including CdcSinkConfiguration, table configs, CdcColumnMapping, and related types." sidebar_position: 10 --- @@ -24,10 +24,10 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [CdcSinkTableConfig](#cdcsinktableconfig) * [CdcSinkEmbeddedTableConfig](#cdcsinkembeddedtableconfig) * [CdcSinkLinkedTableConfig](#cdcsinklinkedtableconfig) - * [CdcSinkOnDeleteConfig](#cdcsinkondeleteconfig) - * [CdcSinkRelationType](#cdcsinkrelationtype) * [CdcColumnMapping](#cdccolumnmapping) * [CdcColumnType](#cdccolumntype) + * [CdcSinkOnDeleteConfig](#cdcsinkondeleteconfig) + * [CdcSinkRelationType](#cdcsinkrelationtype) * [REST API Endpoints](#rest-api-endpoints) @@ -41,7 +41,7 @@ The top-level configuration object for a CDC Sink task. | Property | Type | Required | Description | |----------|------|----------|-------------| | `Name` | `string` | ✓ | Unique task name | -| `ConnectionStringName` | `string` | ✓ | Name of the SQL connection string | +| `ConnectionStringName` | `string` | ✓ | Name of the SQL connection string (uses the existing `SqlConnectionString` type — the same as SQL ETL). The connection string's `FactoryName` determines the source database engine. | | `Tables` | `List` | ✓ | Root table configurations (at least one required) | | `Postgres` | `CdcSinkPostgresSettings` | | PostgreSQL-specific settings (slot and publication names) | | `SkipInitialLoad` | `bool` | | When `true`, skip the initial full-table scan and start streaming CDC changes immediately. Only applies on first startup — has no effect once the initial load has completed. Default: `false` | @@ -50,6 +50,14 @@ The top-level configuration object for a CDC Sink task. | `PinToMentorNode` | `bool` | | Pin the task to the mentor node. Default: `false` | | `TaskId` | `long` | | Set by the server on creation | +**Supported `FactoryName` values (set on the connection string):** + +| FactoryName | Database Engine | +|-------------|-----------------| +| `Npgsql` | PostgreSQL | +| `System.Data.SqlClient` or `Microsoft.Data.SqlClient` | SQL Server | +| `MySql.Data.MySqlClient` or `MySqlConnector.MySqlConnectorFactory` | MySQL / MariaDB | + --- ## CdcSinkPostgresSettings @@ -86,7 +94,7 @@ Configures a root table — one SQL table mapped to one RavenDB collection. | Property | Type | Required | Description | |----------|------|----------|-------------| -| `Name` | `string` | ✓ | RavenDB collection name (e.g., `"Orders"`) | +| `CollectionName` | `string` | ✓ | RavenDB collection name (e.g., `"Orders"`) | | `SourceTableName` | `string` | ✓ | SQL table name (e.g., `"orders"`) | | `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | | `PrimaryKeyColumns` | `List` | ✓ | SQL columns used for document ID generation | @@ -116,7 +124,7 @@ Configures a table whose rows are embedded as nested objects within a parent doc | `OnDelete` | `CdcSinkOnDeleteConfig` | | Delete behavior for embedded items | | `CaseSensitiveKeys` | `bool` | | Case-sensitive PK matching. Default: `false` | | `EmbeddedTables` | `List` | | Nested embedded tables | -| `Disabled` | `bool` | | Skip this table. Default: `false` | +| `LinkedTables` | `List` | | Linked tables within embedded items | --- @@ -130,7 +138,6 @@ Configures a foreign key reference that becomes a document ID in the parent docu | `SourceTableSchema` | `string` | | SQL schema name. Default: `"public"` | | `PropertyName` | `string` | ✓ | Property name in the parent document (e.g., `"Customer"`) | | `LinkedCollectionName` | `string` | ✓ | Target RavenDB collection for ID generation (e.g., `"Customers"`) | -| `Type` | `CdcSinkRelationType` | ✓ | `Value` (single reference) or `Array` (multiple references) | | `JoinColumns` | `List` | ✓ | FK columns used to build the referenced document ID | --- @@ -187,13 +194,13 @@ Controls how DELETE events are handled for a table or embedded table. ## CdcSinkRelationType -Specifies the structure of embedded or linked data in the document. +Specifies the structure of embedded data in the document. | Value | Description | |-------|-------------| | `Array` | One-to-many: stored as a JSON array. Items matched by PK for UPDATE/DELETE | | `Map` | One-to-many: stored as a JSON object keyed by PK value(s) | -| `Value` | Many-to-one: stored as a single embedded object or document reference | +| `Value` | Many-to-one: stored as a single embedded object | --- diff --git a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx index a50058f60e..dde63a2bef 100644 --- a/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/embedded-tables.mdx @@ -28,7 +28,6 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [Matching Items on Update and Delete](#matching-items-on-update-and-delete) * [Deep Nesting](#deep-nesting) * [Attachments on Embedded Items](#attachments-on-embedded-items) - * [Disabling an Embedded Table](#disabling-an-embedded-table) @@ -262,31 +261,6 @@ When the embedded item is deleted, its attachments are automatically removed. --- -## Disabling an Embedded Table - -Set `Disabled = true` to pause processing for a specific embedded table without -removing it from the configuration: - - - - -{`new CdcSinkEmbeddedTableConfig -\{ - SourceTableName = "audit_log", - PropertyName = "AuditLog", - Disabled = true, - // ... other config -\} -`} - - - - -Changes from the source table are ignored while `Disabled = true`. When re-enabled, -CDC Sink resumes from the current position — it does not backfill missed events. - ---- - ## Related Articles ### CDC Sink diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx index 003b7aa55b..e0ecc863fa 100644 --- a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -23,7 +23,6 @@ import LanguageContent from "@site/src/components/LanguageContent"; * In this page: * [Basic Configuration](#basic-configuration) * [Composite Foreign Keys](#composite-foreign-keys) - * [Array References](#array-references) * [Linked vs Embedded](#linked-vs-embedded) @@ -32,14 +31,14 @@ import LanguageContent from "@site/src/components/LanguageContent"; ## Basic Configuration -`CdcSinkLinkedTableConfig` is placed inside a root table's `LinkedTables` list: +`CdcSinkLinkedTableConfig` is placed inside a root table's (or embedded table's) `LinkedTables` list: {`new CdcSinkTableConfig \{ - Name = "Orders", + CollectionName = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], Columns = @@ -54,7 +53,6 @@ import LanguageContent from "@site/src/components/LanguageContent"; SourceTableName = "customers", PropertyName = "Customer", // Property name in document LinkedCollectionName = "Customers", // Target collection for ID - Type = CdcSinkRelationType.Value, // Single reference JoinColumns = ["customer_id"] // FK used to build the document ID \} ] @@ -98,7 +96,6 @@ all parts of that key: SourceTableName = "customers", PropertyName = "Customer", LinkedCollectionName = "Customers", - Type = CdcSinkRelationType.Value, JoinColumns = ["customer_region", "customer_id"] // Must match Customers PK order \} `} @@ -114,42 +111,6 @@ With `customer_region = 'US'` and `customer_id = 42`, the document gets: --- -## Array References - -Use `Type = CdcSinkRelationType.Array` for one-to-many references, where a parent -row has multiple foreign keys pointing to the same collection: - - - - -{`new CdcSinkLinkedTableConfig -\{ - SourceTableName = "tags", - PropertyName = "Tags", - LinkedCollectionName = "Tags", - Type = CdcSinkRelationType.Array, - JoinColumns = ["tag_id"] -\} -`} - - - - -This creates an array of document references: - - - - -{`\{ - "Tags": ["Tags/primary", "Tags/urgent", "Tags/follow-up"] -\} -`} - - - - ---- - ## Linked vs Embedded | Consideration | Embedded | Linked | diff --git a/docs/server/ongoing-tasks/cdc-sink/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/overview.mdx index a8716827a5..7a2adb1909 100644 --- a/docs/server/ongoing-tasks/cdc-sink/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/overview.mdx @@ -29,7 +29,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; * Supported source databases: * **PostgreSQL** — via logical replication - * Additional source databases planned for future versions + * **SQL Server** — via native CDC (polling change tables) + * **MySQL / MariaDB** — via binlog replication * In this page: * [Why Use CDC Sink](#why-use-cdc-sink) @@ -128,7 +129,7 @@ See [Schema Design](./schema-design.mdx) for details. CDC Sink is available on an **Enterprise** license. -Learn more about licensing [here](../../../start/licensing/licensing-overview). +Learn more about licensing [here](../../../licensing/overview). --- diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index 4bec406439..1b3579b912 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -64,7 +64,7 @@ The `OnDelete.Patch` on `CdcSinkOnDeleteConfig` handles DELETE separately. | `this` | Always | object | The document being modified (root or parent for embedded) | | `$row` | Always | object | All SQL columns from the CDC event (mapped and unmapped) | | `$old` | Always | object \| null | Previous state of the document (root patches) or embedded item (embedded patches). Null on INSERT. | -| `get(id)` | Always | function | Load a RavenDB document by ID | +| `load(id)` | Always | function | Load a RavenDB document by ID | --- @@ -471,9 +471,9 @@ Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nes **What patches can do:** * Access `this`, `$row`, `$old` -* Load related documents with `load()` -* Create or replace documents with `put()` -* Delete documents with `del()` +* Load related documents with `load(id)` +* Create or replace documents with `put(id, document)` +* Delete documents with `del(id)` * Compute and transform property values * Set and modify document metadata * Conditional logic, loops, array methods diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx index 60988a0428..9520887f56 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-complex-nesting.mdx @@ -105,7 +105,7 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. [ new CdcSinkTableConfig \{ - Name = "Products", + CollectionName = "Products", SourceTableName = "products", PrimaryKeyColumns = ["product_id"], Columns = @@ -121,7 +121,6 @@ See [REPLICA IDENTITY](../replica-identity.mdx) for more details. SourceTableName = "categories", PropertyName = "Category", LinkedCollectionName = "Categories", - Type = CdcSinkRelationType.Value, JoinColumns = ["category_id"] \} ], diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx index 5cae4c6008..73cd1c7dfb 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-denormalization.mdx @@ -83,7 +83,7 @@ See [REPLICA IDENTITY](../replica-identity.mdx). \{ new CdcSinkTableConfig \{ - Name = "Orders", + CollectionName = "Orders", SourceTableName = "orders", PrimaryKeyColumns = new List \{ "order_id" \}, Columns = diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx index 4045733e3c..a5c8304f71 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-event-sourcing.mdx @@ -72,7 +72,7 @@ history, and `Balance` is maintained using patch logic. [ new CdcSinkTableConfig \{ - Name = "Accounts", + CollectionName = "Accounts", SourceTableName = "accounts", PrimaryKeyColumns = ["account_id"], Columns = diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx index 9503109eea..b6634e057e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/examples/example-simple-migration.mdx @@ -57,7 +57,7 @@ A simple customers table: \{ new CdcSinkTableConfig \{ - Name = "Customers", + CollectionName = "Customers", SourceTableName = "customers", PrimaryKeyColumns = new List \{ "id" \}, Columns = diff --git a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx index 95fd92e5ff..8f64d7c68e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/property-retention.mdx @@ -79,7 +79,7 @@ Document after CDC UPDATE: -`Email` is updated from SQL, while `InternalNotes` (not in `the `Columns` list`) is preserved. +`Email` is updated from SQL, while `InternalNotes` (not in the `Columns` list) is preserved. --- @@ -87,15 +87,15 @@ Document after CDC UPDATE: **Preserved across CDC updates:** -* Properties not listed in `the `Columns` list` +* Properties not listed in the `Columns` list * Properties set in RavenDB directly (annotations, computed values, flags) * Document metadata (unless the patch explicitly modifies it) **Overwritten on CDC update:** -* Any property mapped via `the `Columns` list` — always updated to match the current SQL value +* Any property mapped via the `Columns` list — always updated to match the current SQL value -If you manually edit a property that is part of `the `Columns` list`, the next CDC UPDATE +If you manually edit a property that is part of the `Columns` list, the next CDC UPDATE for that row will overwrite your edit with the SQL value. --- @@ -120,7 +120,7 @@ You can safely add properties to CDC-managed documents: -Properties managed by CDC (those in `the `Columns` list`) will be overwritten on +Properties managed by CDC (those in the `Columns` list) will be overwritten on the next UPDATE from the source database. Do not rely on manual edits to mapped properties surviving future CDC updates. @@ -133,11 +133,11 @@ RavenDB-only property for your annotation and leaving the SQL-mapped property as ## Implications for Patches Patches run after column mapping and can set additional properties that are not -from `the `Columns` list`. These patch-computed properties follow the same merge rules: +from the `Columns` list. These patch-computed properties follow the same merge rules: * If a patch sets `this.ComputedField = ...`, that value persists across future events where the patch doesn't explicitly change it -* If a patch sets a property that is also in `the `Columns` list`, the column mapping +* If a patch sets a property that is also in the `Columns` list, the column mapping value takes precedence (mapping is applied before patching) For aggregates maintained via patches (e.g., `RunningTotal`), the patch itself diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx index e9a59e82d8..3958b38dca 100644 --- a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -43,7 +43,7 @@ becomes one document. {`new CdcSinkTableConfig \{ - Name = "Orders", // RavenDB collection name + CollectionName = "Orders", // RavenDB collection name SourceTableSchema = "public", // SQL schema (optional, default: "public") SourceTableName = "orders", // SQL table name PrimaryKeyColumns = ["id"], // Used for document ID generation @@ -78,7 +78,7 @@ SQL `order_lines` table becomes an array inside each `Orders` document. {`new CdcSinkTableConfig \{ - Name = "Orders", + CollectionName = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], Columns = @@ -140,7 +140,7 @@ A foreign key in the source row becomes a RavenDB document ID. {`new CdcSinkTableConfig \{ - Name = "Orders", + CollectionName = "Orders", SourceTableName = "orders", PrimaryKeyColumns = ["id"], Columns = @@ -155,7 +155,6 @@ A foreign key in the source row becomes a RavenDB document ID. SourceTableName = "customers", PropertyName = "Customer", // Property in parent document LinkedCollectionName = "Customers", // Target collection for ID - Type = CdcSinkRelationType.Value, // Single reference JoinColumns = ["customer_id"] // FK used to build the ID \} ] @@ -216,7 +215,7 @@ Embedded tables can themselves have embedded tables, creating arbitrarily deep h {`new CdcSinkTableConfig \{ - Name = "Companies", + CollectionName = "Companies", SourceTableName = "companies", PrimaryKeyColumns = ["company_id"], Columns = @@ -293,13 +292,13 @@ configuration — the default DELETE events include all PK columns. ## Relation Types -The `Type` property on embedded and linked tables controls the document structure: +The `Type` property on embedded tables controls the document structure: | Type | Use Case | Document Structure | |------|----------|--------------------| | `Array` | One-to-many: parent has many children | `"Lines": [\{ ... \}, \{ ... \}]` | | `Map` | One-to-many with direct key lookup | `"Lines": \{ "1": \{ ... \}, "2": \{ ... \} \}` | -| `Value` | Many-to-one: parent has one child/reference | `"Customer": \{ ... \}` or `"Customer": "Customers/42"` | +| `Value` | Many-to-one: parent has one child | `"Customer": \{ ... \}` | **Array** — Items are matched by `PrimaryKeyColumns` for UPDATE and DELETE. Use when you need to iterate over all items. @@ -307,7 +306,7 @@ Use when you need to iterate over all items. **Map** — Items are stored as a JSON object keyed by the primary key value(s). Use when you need fast direct-key access within the document. -**Value** — Stores a single embedded object or document reference. +**Value** — Stores a single embedded object. Use for many-to-one relationships (many orders share one customer). --- diff --git a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx index 34780fe361..fe680783b2 100644 --- a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx @@ -37,8 +37,9 @@ See [Configuration Overview](../../../server/configuration/configuration-options **Default:** `1024` -The maximum number of change events processed in a single batch. Larger values -increase throughput but also increase memory usage per batch. +Target number of change rows processed in a single batch before writing to RavenDB. A batch may exceed this size when a source database transaction contains more rows, since transactions are never split across batches. + +Larger values increase throughput but also increase memory usage per batch. --- @@ -46,11 +47,9 @@ increase throughput but also increase memory usage per batch. **Default:** `900` (15 minutes) -How long the task will remain in fallback mode (continuously retrying) after losing -connection to the source database before reporting an error. +Maximum number of seconds the CDC Sink process will stay in fallback mode after a failure before retrying. Fallback duration doubles on each consecutive failure, up to this cap. -Set to `0` to disable fallback mode entirely — the task will move to error state -immediately on connection failure. +Set to `0` to disable fallback mode entirely — the task will retry immediately on each failure without exponential backoff. --- @@ -58,11 +57,9 @@ immediately on connection failure. **Default:** `1` -How frequently CDC Sink polls the source database for new change events when the -stream is idle. A shorter interval reduces latency but increases polling load on -the source. +How frequently (in seconds) the CDC Sink polls the source database for new change rows. This setting applies to **SQL Server** only, which uses polling-based change capture. -PostgreSQL uses logical replication streaming and ignores this setting. +PostgreSQL and MySQL use streaming replication and ignore this setting. --- From cee24cbbdee890375a887c71605dafa21460aa3b Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 01:10:11 +0300 Subject: [PATCH 11/17] RavenDB-26046 - Add PostgreSQL, SQL Server, and MySQL overview pages - postgres/overview.mdx: connection string, logical replication explanation, prerequisites summary, section index - sql-server/overview.mdx: expand from stub to full page with connection string, CDC prerequisites, polling behavior, SourceTableSchema default - mysql/overview.mdx + _category_.json: connection string, binlog prerequisites, streaming behavior, required privileges --- .../cdc-sink/mysql/_category_.json | 4 + .../ongoing-tasks/cdc-sink/mysql/overview.mdx | 110 ++++++++++++++++ .../cdc-sink/postgres/overview.mdx | 123 ++++++++++++++++++ .../cdc-sink/sql-server/overview.mdx | 78 +++++++++-- 4 files changed, 307 insertions(+), 8 deletions(-) create mode 100644 docs/server/ongoing-tasks/cdc-sink/mysql/_category_.json create mode 100644 docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx create mode 100644 docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx diff --git a/docs/server/ongoing-tasks/cdc-sink/mysql/_category_.json b/docs/server/ongoing-tasks/cdc-sink/mysql/_category_.json new file mode 100644 index 0000000000..8ae06a8e97 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/mysql/_category_.json @@ -0,0 +1,4 @@ +{ + "position": 18, + "label": "MySQL" +} diff --git a/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx new file mode 100644 index 0000000000..6c92f20c39 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx @@ -0,0 +1,110 @@ +--- +title: "CDC Sink for MySQL: Overview" +sidebar_label: Overview +description: "CDC Sink support for MySQL and MariaDB — using binlog replication to stream changes into RavenDB in real time." +sidebar_position: 0 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for MySQL: Overview + + + +* CDC Sink supports MySQL and MariaDB as source databases using + [binary log (binlog) replication](https://dev.mysql.com/doc/refman/8.0/en/replication.html). + +* MySQL streams row-level changes in real time through the binlog. CDC Sink consumes + this stream and applies INSERTs, UPDATEs, and DELETEs to RavenDB documents as they + occur. + + + +--- + +## Connection String + +Use a `SqlConnectionString` with `FactoryName` set to `"MySql.Data.MySqlClient"` or +`"MySqlConnector.MySqlConnectorFactory"`: + + + + +{`store.Maintenance.Send(new PutConnectionStringOperation( + new SqlConnectionString + \{ + Name = "MyMySqlConnection", + FactoryName = "MySqlConnector.MySqlConnectorFactory", + ConnectionString = "Server=myserver;Port=3306;Database=mydb;User=ravendb_cdc;Password=..." + \})); +`} + + + + +--- + +## Prerequisites + +MySQL must have binary logging enabled with row-based format: + + + + +{`# my.cnf / my.ini +[mysqld] +log-bin = mysql-bin +binlog_format = ROW +binlog_row_image = FULL +server-id = 1 +`} + + + + +The CDC Sink user needs the following privileges: + + + + +{`GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'ravendb_cdc'@'%'; +GRANT SELECT ON mydb.* TO 'ravendb_cdc'@'%'; +`} + + + + +* `REPLICATION SLAVE` — required to read the binlog stream +* `REPLICATION CLIENT` — required to query binlog status +* `SELECT` — required for the initial full-table load + +--- + +## Streaming Behavior + +Like PostgreSQL, MySQL CDC Sink uses streaming replication (via the binlog), not +polling. The `CdcSink.PollIntervalInSec` server configuration setting does not +apply to MySQL. + +--- + +## Configuration + +The CDC Sink task configuration is the same across all database engines. +See [Configuration Reference](../configuration-reference.mdx) for full details. + +The `SourceTableSchema` corresponds to the MySQL database name. + +--- + +## Related Articles + +### CDC Sink + +- [Overview](../overview.mdx) +- [Configuration Reference](../configuration-reference.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx new file mode 100644 index 0000000000..ef07f3bd6d --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx @@ -0,0 +1,123 @@ +--- +title: "CDC Sink for PostgreSQL: Overview" +sidebar_label: Overview +description: "CDC Sink support for PostgreSQL — using logical replication to stream changes into RavenDB in real time." +sidebar_position: -1 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink for PostgreSQL: Overview + + + +* CDC Sink supports PostgreSQL as a source database using + [logical replication](https://www.postgresql.org/docs/current/logical-replication.html). + +* PostgreSQL streams row-level changes in real time through a logical replication slot. + CDC Sink consumes this stream and applies INSERTs, UPDATEs, and DELETEs to RavenDB + documents as they occur. + + + +--- + +## Connection String + +Use a `SqlConnectionString` with `FactoryName` set to `"Npgsql"`: + + + + +{`store.Maintenance.Send(new PutConnectionStringOperation( + new SqlConnectionString + \{ + Name = "MyPostgresConnection", + FactoryName = "Npgsql", + ConnectionString = "Host=myserver;Port=5432;Database=mydb;Username=ravendb_cdc;Password=..." + \})); +`} + + + + +--- + +## How It Works + +PostgreSQL CDC Sink uses logical replication, which provides a continuous stream of +changes without polling. This gives low-latency, near-real-time replication. + +Key PostgreSQL concepts: + +* **Replication slot** — a server-side cursor that tracks how far a consumer has read. + CDC Sink creates and manages one automatically. +* **Publication** — defines which tables are included in the replication stream. + CDC Sink creates one automatically for the configured tables. +* **WAL (Write-Ahead Log)** — PostgreSQL retains WAL segments as long as a replication + slot has not consumed them. An inactive or paused task causes WAL to accumulate. + +--- + +## Prerequisites + +PostgreSQL requires several configuration settings and permissions before CDC Sink +can connect. See [Prerequisites Checklist](./prerequisites-checklist.mdx) for the +full list, including: + +* WAL level set to `logical` +* Sufficient `max_replication_slots` and `max_wal_senders` +* A database user with `REPLICATION` privilege +* `REPLICA IDENTITY` configuration for embedded tables with surrogate keys + +--- + +## PostgreSQL-Specific Settings + +The `CdcSinkPostgresSettings` object on `CdcSinkConfiguration` controls the replication +slot and publication names: + + + + +{`Postgres = new CdcSinkPostgresSettings +\{ + SlotName = "my_slot", + PublicationName = "my_publication" +\} +`} + + + + +If omitted, deterministic hash-based names are generated automatically. +See [Initial Setup](./initial-setup.mdx) for details. + +--- + +## Configuration + +The CDC Sink task configuration is the same across all database engines. +See [Configuration Reference](../configuration-reference.mdx) for full details. + +The `SourceTableSchema` defaults to `"public"` when using PostgreSQL. + +--- + +## In This Section + +* [Prerequisites Checklist](./prerequisites-checklist.mdx) — what to verify before creating a task +* [Initial Setup](./initial-setup.mdx) — creating the replication slot and publication +* [WAL Configuration](./wal-configuration.mdx) — PostgreSQL WAL settings +* [Permissions and Roles](./permissions-and-roles.mdx) — required database permissions +* [REPLICA IDENTITY](./replica-identity.mdx) — configuring DELETE event content +* [Monitoring PostgreSQL](./monitoring-postgres.mdx) — replication lag and WAL monitoring +* [Cleanup and Maintenance](./cleanup-and-maintenance.mdx) — dropping orphaned slots +* [Studio UI](./studio-ui.mdx) — creating tasks through the Studio interface + +--- diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx index 0552dc23a4..8bb1add581 100644 --- a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx @@ -1,7 +1,7 @@ --- title: "CDC Sink for SQL Server: Overview" sidebar_label: Overview -description: "Overview of CDC Sink support for SQL Server, including planned availability and links to existing PostgreSQL documentation." +description: "CDC Sink support for SQL Server — using native SQL Server Change Data Capture to stream changes into RavenDB." sidebar_position: 0 --- @@ -16,17 +16,78 @@ import LanguageContent from "@site/src/components/LanguageContent"; -* CDC Sink support for SQL Server is planned for a future release. +* CDC Sink supports SQL Server as a source database using native + [SQL Server Change Data Capture](https://learn.microsoft.com/en-us/sql/relational-databases/track-changes/about-change-data-capture-sql-server). + +* SQL Server CDC captures row-level changes from the transaction log and stores them + in change tables. CDC Sink polls these change tables to stream INSERTs, UPDATEs, + and DELETEs into RavenDB documents. - -This section will cover the prerequisites, setup, and SQL Server-specific -configuration for CDC Sink when SQL Server support is available. +--- -In the meantime, see the [PostgreSQL documentation](../postgres/prerequisites-checklist.mdx) -for the full CDC Sink feature documentation. - +## Connection String + +Use a `SqlConnectionString` with `FactoryName` set to `"System.Data.SqlClient"` or +`"Microsoft.Data.SqlClient"`: + + + + +{`store.Maintenance.Send(new PutConnectionStringOperation( + new SqlConnectionString + \{ + Name = "MySqlServerConnection", + FactoryName = "Microsoft.Data.SqlClient", + ConnectionString = "Server=myserver;Database=mydb;Trusted_Connection=True;" + \})); +`} + + + + +--- + +## Prerequisites + +SQL Server CDC must be enabled on the source database and on each table you want +to capture: + + + + +{`-- Enable CDC on the database +EXEC sys.sp_cdc_enable_db; + +-- Enable CDC on a table +EXEC sys.sp_cdc_enable_table + @source_schema = N'dbo', + @source_name = N'orders', + @role_name = NULL; +`} + + + + +The SQL Server Agent service must be running for CDC to function. + +--- + +## Polling Behavior + +Unlike PostgreSQL (which uses streaming replication), SQL Server CDC Sink uses polling. +The `CdcSink.PollIntervalInSec` server configuration key controls how frequently +the task polls for new changes. See [Server Configuration](../server-configuration.mdx). + +--- + +## Configuration + +The CDC Sink task configuration is the same across all database engines. +See [Configuration Reference](../configuration-reference.mdx) for full details. + +The `SourceTableSchema` defaults to `"dbo"` when using SQL Server. --- @@ -35,4 +96,5 @@ for the full CDC Sink feature documentation. ### CDC Sink - [Overview](../overview.mdx) +- [Configuration Reference](../configuration-reference.mdx) - [PostgreSQL: Prerequisites Checklist](../postgres/prerequisites-checklist.mdx) From 4720d23d6cf644df96533806e3e856bf1aca6f75 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 01:10:17 +0300 Subject: [PATCH 12/17] RavenDB-26046 - Add Source Schema Changes and Partial Export/Import troubleshooting sections - Source Schema Changes: how each database engine handles DDL changes on source tables while CDC Sink is running (adding/removing/renaming columns, SQL Server capture instance limitations) - Partial Export/Import and State Loss: @cdc-states collection, recovery guidance, SkipInitialLoad workaround, LSN editing risks --- .../cdc-sink/troubleshooting.mdx | 166 +++++++++++++++++- 1 file changed, 165 insertions(+), 1 deletion(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx index 90816bcdf0..2d4daf3c14 100644 --- a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -26,6 +26,8 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [Embedded Items Missing or Incorrect](#embedded-items-missing-or-incorrect) * [DELETE Not Applied](#delete-not-applied) * [Patch Errors](#patch-errors) + * [Source Schema Changes](#source-schema-changes) + * [Partial Export/Import and State Loss](#partial-exportimport-and-state-loss) @@ -67,7 +69,7 @@ specific failure reason. slot will be created on next start). * **Source table schema changed** — adding or removing columns from the SQL table - may cause mapping errors. Update the task configuration to match the new schema. + may cause mapping errors. See [Source Schema Changes](#source-schema-changes) below. * **Replication lag too large** — if CDC Sink falls behind significantly, PostgreSQL may retain WAL segments that fill the disk. See your PostgreSQL documentation on @@ -179,6 +181,168 @@ if (related) \{ ... \} --- +## Source Schema Changes + +CDC Sink resolves column metadata (names, types, positions) when the task starts or +restarts. **Schema changes on the source table while the task is running are not +detected mid-stream.** The behavior depends on the type of change and the database +engine. + +### Adding a Column (Unmapped) + +If a new column is added to the source table but is not referenced in the CDC Sink +configuration, it is **silently ignored** across all database engines. No action is +needed — the new column simply does not appear in the RavenDB document. + +The new column is still available in patch scripts via `$row` after the task is +restarted. + +### Removing a Mapped Column + +If a column that is referenced in the `Columns` mapping is dropped from the source +table: + +* **On next restart:** CDC Sink validates all mapped column names against the source + table and raises an error: `"Column 'X' not found in source columns"`. The task + enters error state and retries with exponential backoff until the configuration is + updated to remove the missing column. + +* **Mid-stream (before restart):** Behavior varies by engine: + * **PostgreSQL** — the cached column list from the replication stream becomes stale. + This may cause incorrect value mapping or errors. The task will recover the correct + state on the next restart. + * **SQL Server** — the CDC change table is independent of the source table schema and + still contains the old column, so no immediate error occurs. The column continues + to appear in change rows until the capture instance is recreated. + * **MySQL** — the missing column is silently treated as null in the document. No error + is raised until the task restarts. + +**Resolution:** Update the CDC Sink task configuration to remove the column from the +`Columns` list, then re-enable the task. + +### Renaming a Column + +A column rename is effectively a remove + add. The old mapped column name will fail +validation on restart (same as removing a mapped column). Update the `Columns` mapping +to use the new column name. + +### SQL Server: Capture Instance Limitations + +SQL Server CDC uses **capture instances** — snapshots of the table schema at the time +`sp_cdc_enable_table` was executed. The CDC change tables reflect the schema as it +was when the capture instance was created, **not** the current table schema. + +This means: + +* **Adding a column** to the source table does not add it to the CDC change table. + To capture the new column, you must create a new capture instance: + + + + +{`-- Disable the old capture instance +EXEC sys.sp_cdc_disable_table + @source_schema = N'dbo', + @source_name = N'orders', + @capture_instance = N'dbo_orders'; + +-- Re-enable with the updated schema +EXEC sys.sp_cdc_enable_table + @source_schema = N'dbo', + @source_name = N'orders', + @role_name = NULL; +`} + + + + + After recreating the capture instance, restart the CDC Sink task so it picks up the + new column list. + +* **Removing a column** from the source table does not remove it from the existing + capture instance — the change table still reports the old column with null values + for new rows. No immediate error occurs, but the data is stale. + + + +When planning schema migrations on a SQL Server source database, account for the +capture instance lifecycle. The recommended sequence is: + +1. Disable the CDC Sink task in RavenDB +2. Apply the schema change (`ALTER TABLE`) +3. Recreate the capture instance (`sp_cdc_disable_table` + `sp_cdc_enable_table`) +4. Update the CDC Sink task configuration if column mappings changed +5. Re-enable the CDC Sink task + + + +### General Recommendation + +For any schema change on the source database that affects mapped columns: + +1. Disable the CDC Sink task +2. Apply the schema change on the source database +3. Update the CDC Sink task configuration to match +4. Re-enable the task + +This avoids mid-stream mismatches and ensures a clean restart with validated column +metadata. + +--- + +## Partial Export/Import and State Loss + +**Symptom:** After importing a database export, CDC Sink re-reads all data from the +source database from the beginning, performing a full initial load even though the +task configuration is present and the data already exists in RavenDB. + +**Cause:** + +CDC Sink stores two separate pieces of information: + +* **Task configuration** — stored in the cluster-wide **database record**. This is + always included in any export/import because it is part of the database definition. +* **Processing state (last LSN)** — stored as a **document** in the `@cdc-states` + system collection. This tracks the last position read from the source database + (e.g., the PostgreSQL LSN or SQL Server change tracking version). It also tracks + per-table initial load progress. + +If you export and import the database but **skip importing documents** (or skip system +documents), the task configuration is restored from the database record — but the +processing state document is missing. CDC Sink sees a valid configuration with no +recorded position and starts from the beginning: performing a full initial load of +every configured table, then streaming all changes from the start. + +For large source databases, this can take a significant amount of time and place +heavy load on both the source database and RavenDB. + +**How to avoid this:** + +* When exporting a database that has CDC Sink tasks, always include documents in the + export — specifically the `@cdc-states` collection. +* If using Smuggler or the REST API for selective export/import, ensure system + documents are included. +* If state loss has already occurred and the data in RavenDB is already up to date, + you can delete and recreate the task with `SkipInitialLoad = true` to skip the full + table scan and begin streaming from the current position. Note that imported tasks + are restored in a **disabled** state by default — you must explicitly enable the task + after import. This gives you the opportunity to inspect and correct the configuration + before the task starts processing. + + + +There is no _supported_ way to manually set the last LSN position. While the state +document in `@cdc-states` can technically be edited, doing so is risky: setting an +incorrect LSN can cause CDC Sink to skip over changes (data loss) or land in the +middle of a transaction boundary, producing errors or inconsistent documents. If the +state document is lost, the safest options are to re-read from the beginning or to +recreate the task with `SkipInitialLoad` (which skips the initial load but may miss +changes that occurred between the export and the recreation). + + + +--- + ## Related Articles ### CDC Sink From ac0d035c1ba62d997eb1b567555f4b808896d454 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 14:08:02 +0300 Subject: [PATCH 13/17] RavenDB-26056 Fix broken links, address PR comments, document config update behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broken links fixed: - api-reference: send-multiple-operations → what-are-operations - attachment-handling: what-are-attachments → attachments/overview - column-mapping, patching: postgres/type-mapping.mdx (nonexistent) → cross-reference to patching.mdx#row-column-types PR comment fixes: - MySQL overview: rename MyMySqlConnection → MySqlConnection - Postgres overview: slot/publication names are GUID-based on first use, not deterministic hash-based New content: - how-it-works: "Updating the Task Configuration" section explaining that config changes only apply to new CDC events going forward. Existing documents are not retroactively re-processed. To apply changes to all documents, delete and recreate the task. --- .../ongoing-tasks/cdc-sink/api-reference.mdx | 2 +- .../cdc-sink/attachment-handling.mdx | 2 +- .../ongoing-tasks/cdc-sink/column-mapping.mdx | 4 +-- .../ongoing-tasks/cdc-sink/how-it-works.mdx | 35 ++++++++++++++++++- .../ongoing-tasks/cdc-sink/mysql/overview.mdx | 2 +- .../ongoing-tasks/cdc-sink/patching.mdx | 2 +- .../cdc-sink/postgres/overview.mdx | 2 +- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx index f2def3578a..c5074617f6 100644 --- a/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/api-reference.mdx @@ -218,5 +218,5 @@ See [Cleanup and Maintenance](./postgres/cleanup-and-maintenance.mdx). ### Client API -- [Operations: How to Send Operations](../../../client-api/operations/how-to/send-multiple-operations) +- [What are Operations](../../../client-api/operations/what-are-operations) - [Ongoing Task Operations](../../../client-api/operations/maintenance/ongoing-tasks/ongoing-task-operations) diff --git a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx index f7645bca8b..208e03d497 100644 --- a/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/attachment-handling.mdx @@ -196,4 +196,4 @@ PrimaryKeyColumns = ["date", "seq"] → date='2024-01', seq=3 ### Document Extensions -- [Attachments: What are Attachments](../../../document-extensions/attachments/what-are-attachments) +- [Attachments Overview](../../../document-extensions/attachments/overview) diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 1e63b66fc5..33f36503d8 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -112,7 +112,7 @@ Type conversions are source-database-specific. For PostgreSQL: * SQL arrays → JSON arrays * `vector` (pgvector) → `float[]` -See [PostgreSQL Type Mapping](./postgres/type-mapping.mdx) for the full reference table. +See [Patching: $row Column Types](./patching.mdx#row-column-types) for the full reference table. **`CdcColumnType.Json`** — Parses the string value as a native JSON value (object, array, string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or @@ -235,4 +235,4 @@ For most PostgreSQL setups using the default `public` schema, this can be omitte ### PostgreSQL -- [Type Mapping](./postgres/type-mapping.mdx) +- [Patching: $row Column Types](./patching.mdx#row-column-types) diff --git a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx index ef8c24ba26..94c3047800 100644 --- a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx @@ -29,6 +29,7 @@ import LanguageContent from "@site/src/components/LanguageContent"; * [Change Streaming](#change-streaming) * [Transaction Ordering](#transaction-ordering) * [State Persistence and Failover](#state-persistence-and-failover) + * [Updating the Task Configuration](#updating-the-task-configuration) * [Child Before Parent](#child-before-parent) @@ -179,7 +180,39 @@ See [Patching](./patching.mdx) for guidance. --- -## Child Before Parent +## Updating the Task Configuration + +When you update a CDC Sink task configuration — for example, adding or removing columns +from the `Columns` list, changing a `Patch` script, or modifying embedded table +configuration — the changes take effect **only for new CDC events going forward**. + +CDC Sink does not retroactively re-process existing documents. Documents that were +already created retain their current structure. This means: + +* **Adding a new column mapping** — Existing documents will not have the new property. + Only documents created or updated by a subsequent CDC event will include it. +* **Removing a column mapping** — Existing documents retain the property. It will no + longer be updated by CDC events, but it is not removed from existing documents. +* **Changing a patch script** — The new patch runs on future events only. Existing + documents reflect the results of the previous patch. +* **Adding an embedded or linked table** — Existing parent documents will not have the + new embedded data until a CDC event for the embedded table row arrives. + + + +To apply configuration changes to **all** documents (not just new events), delete +the CDC Sink task and recreate it. The new task will perform a fresh initial load, +re-processing all rows with the updated configuration. + +If you only need to backfill a specific property on existing documents, you can also +use a RavenDB [patch by query](../../../client-api/operations/patching/set-based) +to update existing documents independently of the CDC Sink task. + + + +--- + + Tables are scanned one at a time during the initial load. This means a child table can contain rows that reference a parent row inserted *after* the parent table scan had diff --git a/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx index 6c92f20c39..f0ae2aee1e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx @@ -38,7 +38,7 @@ Use a `SqlConnectionString` with `FactoryName` set to `"MySql.Data.MySqlClient"` {`store.Maintenance.Send(new PutConnectionStringOperation( new SqlConnectionString \{ - Name = "MyMySqlConnection", + Name = "MySqlConnection", FactoryName = "MySqlConnector.MySqlConnectorFactory", ConnectionString = "Server=myserver;Port=3306;Database=mydb;User=ravendb_cdc;Password=..." \})); diff --git a/docs/server/ongoing-tasks/cdc-sink/patching.mdx b/docs/server/ongoing-tasks/cdc-sink/patching.mdx index 1b3579b912..2b7bf52eef 100644 --- a/docs/server/ongoing-tasks/cdc-sink/patching.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/patching.mdx @@ -498,7 +498,7 @@ Keep patches focused and efficient — prefer `.filter()` + `.reduce()` over nes ### PostgreSQL -- [Type Mapping](./postgres/type-mapping.mdx) +- [Column Mapping](./column-mapping.mdx) ### Server diff --git a/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx index ef07f3bd6d..f8aebd302e 100644 --- a/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/postgres/overview.mdx @@ -95,7 +95,7 @@ slot and publication names: -If omitted, deterministic hash-based names are generated automatically. +If omitted, GUID-based names are generated automatically on first use. See [Initial Setup](./initial-setup.mdx) for details. --- From 3eee4c4d37a4850f0636808a0fdebdfecd0ff882 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 15:32:52 +0300 Subject: [PATCH 14/17] RavenDB-26056 Extract Source Schema Changes to dedicated page Move schema change documentation from troubleshooting into its own page with per-engine detail: - PostgreSQL: auto-detects via RelationMessage, most resilient - MySQL: detects via TableMapEvent column types, auto-recovers - SQL Server: requires explicit capture instance procedure (create new instance, drain old, then drop) - Quick reference table, SQL examples, recovery mechanism - Troubleshooting retains a short summary with link to the new page --- .../cdc-sink/source-schema-changes.mdx | 253 ++++++++++++++++++ .../cdc-sink/troubleshooting.mdx | 111 +------- 2 files changed, 260 insertions(+), 104 deletions(-) create mode 100644 docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx diff --git a/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx new file mode 100644 index 0000000000..63113a7981 --- /dev/null +++ b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx @@ -0,0 +1,253 @@ +--- +title: "CDC Sink: Source Schema Changes" +sidebar_label: Source Schema Changes +description: "How to safely modify your source database schema while a CDC Sink task is running — behavior per database engine, recovery, and the SQL Server capture instance procedure." +sidebar_position: 13 +--- + +import Admonition from '@theme/Admonition'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import LanguageSwitcher from "@site/src/components/LanguageSwitcher"; +import LanguageContent from "@site/src/components/LanguageContent"; + +# CDC Sink: Source Schema Changes + + + +* This page explains how CDC Sink handles schema changes (columns added, removed, + renamed, or retyped) on the source database tables while the task is running. + +* The behavior differs by database engine. PostgreSQL is the most resilient, + MySQL detects most changes automatically, and SQL Server requires an explicit + procedure for every schema change. + +* In this page: + * [Quick Reference](#quick-reference) + * [PostgreSQL](#postgresql) + * [MySQL](#mysql) + * [SQL Server](#sql-server) + * [Recovery From Errors](#recovery-from-errors) + * [General Recommendation](#general-recommendation) + + + +--- + +## Quick Reference + +| Change | MySQL | PostgreSQL | SQL Server | +|--------|-------|------------|------------| +| Add column (unmapped) | Transparent | Transparent | Requires [procedure](#sql-server-schema-change-procedure) | +| Add column (mapped) | Update config + restart | Update config + restart | Requires [procedure](#sql-server-schema-change-procedure) + config update | +| Drop unmapped column | Transparent | Transparent | Requires [procedure](#sql-server-schema-change-procedure) | +| Drop mapped column | Enters fallback (config error) | Enters fallback (config error) | Requires [procedure](#sql-server-schema-change-procedure) | +| Change column type | Restarts, usually recovers | Transparent (auto-rebuild) | Requires [procedure](#sql-server-schema-change-procedure) | +| Rename mapped column | Enters fallback | Enters fallback | Requires [procedure](#sql-server-schema-change-procedure) | + +--- + +## PostgreSQL + +PostgreSQL logical replication sends `RelationMessage` events inline in the WAL stream +whenever the schema changes. CDC Sink detects these automatically and rebuilds its +column mapping on the fly. This makes PostgreSQL the **most resilient** to schema changes. + +* **Adding or dropping unmapped columns** — transparent. CDC Sink auto-rebuilds and + continues without interruption. + +* **Changing a column type** — usually transparent. A new `RelationMessage` triggers + an automatic rebuild of type categories. + +* **Dropping or renaming a mapped column** — CDC Sink enters fallback because the + mapped column name is no longer found in the new schema. Update the task + configuration to match the new column name, and CDC Sink recovers on the next retry. + +### PostgreSQL Examples + + + + +{`-- Transparent: adding an unmapped column +ALTER TABLE orders ADD COLUMN internal_notes TEXT; +-- CDC Sink auto-rebuilds and continues + +-- Transparent: changing a column type +ALTER TABLE orders ALTER COLUMN status TYPE VARCHAR(100); +-- CDC Sink auto-rebuilds and continues + +-- Requires config update: renaming a mapped column +ALTER TABLE orders RENAME COLUMN name TO full_name; +-- CDC Sink enters fallback. Update column mapping from 'name' to 'full_name'. +`} + + + + +--- + +## MySQL + +MySQL CDC uses binlog replication. Each row event is preceded by a `TableMapEvent` +containing column type information. CDC Sink compares the column types at mapped +positions against the expected types from `INFORMATION_SCHEMA`. If they don't match, +the process detects a schema change. + +* **Adding or dropping unmapped columns** — transparent. The unmapped column is ignored. + +* **Changing a column type** — CDC Sink detects the type change via binlog column types, + restarts, re-resolves from `INFORMATION_SCHEMA`, and continues. + +* **Dropping or renaming a mapped column** — CDC Sink enters fallback because the + mapped column is no longer found. Update the task configuration to match the new + schema. + +### MySQL Examples + + + + +{`-- Transparent: adding an unmapped column +ALTER TABLE orders ADD COLUMN internal_notes TEXT; +-- CDC Sink continues without interruption + +-- Auto-recovers: changing a column type +ALTER TABLE orders MODIFY COLUMN status VARCHAR(100); +-- CDC Sink detects change, restarts, and recovers automatically + +-- Requires config update: dropping a mapped column +ALTER TABLE orders DROP COLUMN legacy_field; +-- CDC Sink enters fallback. Update the config to remove 'legacy_field' from Columns. +`} + + + + +--- + +## SQL Server + +SQL Server CDC uses **capture instances** — immutable snapshots of the table schema +at the time `sp_cdc_enable_table` was executed. The CDC change tables reflect the +schema as it was when the capture instance was created, **not** the current table +schema. This means **any** schema change on a CDC-tracked table requires creating +a new capture instance. + +SQL Server supports at most **two** capture instances per table at a time. The CDC +Sink always reads from the oldest active instance. When the old instance is dropped, +CDC Sink detects this (via `fn_cdc_get_min_lsn` returning all zeros), restarts, and +picks up the new instance. + +### SQL Server Schema Change Procedure + +For **any** schema change on a SQL Server CDC-tracked table, follow this procedure: + + + + +{`-- Step 1: ALTER the table +ALTER TABLE dbo.orders ADD priority INT DEFAULT 0; + +-- Step 2: Create a NEW capture instance (the old one still exists) +EXEC sys.sp_cdc_enable_table + @source_schema = N'dbo', + @source_name = N'orders', + @capture_instance = N'dbo_orders_v2', + @role_name = NULL; + +-- Step 3: WAIT — let CDC Sink drain the OLD capture instance. +-- The sink reads from the oldest instance and processes all remaining changes. +-- Monitor the CDC Sink task status in Studio to confirm it has caught up. + +-- Step 4: Drop the OLD capture instance +EXEC sys.sp_cdc_disable_table + @source_schema = N'dbo', + @source_name = N'orders', + @capture_instance = N'dbo_orders'; + +-- CDC Sink detects the drop, restarts, and picks up dbo_orders_v2. +`} + + + + +**What happens at each step:** + +1. **After step 2** — Two capture instances exist. CDC Sink continues reading from the + oldest one (`dbo_orders`). New changes are captured by both instances. +2. **After step 3** — CDC Sink has processed all changes from the old instance. It's + safe to drop it. +3. **After step 4** — `fn_cdc_get_min_lsn('dbo_orders')` returns all zeros. CDC Sink + detects this on the next poll cycle, enters fallback, and restarts against the new + instance. + +If the schema change also affects column mappings, update the CDC Sink task +configuration after the new capture instance is active. + + + +Do not drop the old capture instance before CDC Sink has drained it — you may lose +changes that were captured in the old instance but not yet processed. + + + +### SQL Server Agent + +CDC requires SQL Server Agent to be running. The capture job (which populates change +tables) and cleanup job (which purges old changes) are Agent jobs. + + + + +{`-- Check Agent status +SELECT * FROM sys.dm_server_services WHERE servicename LIKE '%Agent%'; +`} + + + + +Without the Agent, changes are never captured and CDC Sink polls indefinitely without +finding data. + +--- + +## Recovery From Errors + +All three providers use the same recovery mechanism: + +1. **Error detected** — process enters fallback mode with exponential backoff (starting + at 5 seconds, up to `CdcSink.MaxFallbackTimeInSec`) +2. **On each retry** — process restarts: re-resolves schema, re-opens connections +3. **If the error persists** (e.g., mapped column still missing) — retries continue + with increasing backoff. A notification is raised for each error, visible in Studio. +4. **After fixing the configuration** — the process recovers automatically on the next + retry cycle. No manual restart is needed. + +--- + +## General Recommendation + +For schema changes that affect mapped columns on any database engine: + +1. Update the CDC Sink task configuration to match the new schema +2. Apply the schema change on the source database +3. For SQL Server: follow the [capture instance procedure](#sql-server-schema-change-procedure) above + +CDC Sink will detect the change and recover automatically. Disabling the task +beforehand is not required but can avoid a brief fallback cycle. + +--- + +## Related Articles + +### CDC Sink + +- [Configuration Reference](./configuration-reference.mdx) +- [Column Mapping](./column-mapping.mdx) +- [How It Works: Updating the Task Configuration](./how-it-works.mdx#updating-the-task-configuration) +- [Troubleshooting](./troubleshooting.mdx) + +### SQL Server + +- [SQL Server Overview](./sql-server/overview.mdx) diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx index 2d4daf3c14..5c8b275645 100644 --- a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -69,7 +69,7 @@ specific failure reason. slot will be created on next start). * **Source table schema changed** — adding or removing columns from the SQL table - may cause mapping errors. See [Source Schema Changes](#source-schema-changes) below. + may cause mapping errors. See [Source Schema Changes](./source-schema-changes.mdx). * **Replication lag too large** — if CDC Sink falls behind significantly, PostgreSQL may retain WAL segments that fill the disk. See your PostgreSQL documentation on @@ -183,110 +183,13 @@ if (related) \{ ... \} ## Source Schema Changes -CDC Sink resolves column metadata (names, types, positions) when the task starts or -restarts. **Schema changes on the source table while the task is running are not -detected mid-stream.** The behavior depends on the type of change and the database -engine. +Adding, removing, renaming, or retyping columns on the source database while a CDC +Sink task is running requires different handling depending on the database engine. +PostgreSQL and MySQL detect most changes automatically; SQL Server requires an explicit +capture instance procedure for every schema change. -### Adding a Column (Unmapped) - -If a new column is added to the source table but is not referenced in the CDC Sink -configuration, it is **silently ignored** across all database engines. No action is -needed — the new column simply does not appear in the RavenDB document. - -The new column is still available in patch scripts via `$row` after the task is -restarted. - -### Removing a Mapped Column - -If a column that is referenced in the `Columns` mapping is dropped from the source -table: - -* **On next restart:** CDC Sink validates all mapped column names against the source - table and raises an error: `"Column 'X' not found in source columns"`. The task - enters error state and retries with exponential backoff until the configuration is - updated to remove the missing column. - -* **Mid-stream (before restart):** Behavior varies by engine: - * **PostgreSQL** — the cached column list from the replication stream becomes stale. - This may cause incorrect value mapping or errors. The task will recover the correct - state on the next restart. - * **SQL Server** — the CDC change table is independent of the source table schema and - still contains the old column, so no immediate error occurs. The column continues - to appear in change rows until the capture instance is recreated. - * **MySQL** — the missing column is silently treated as null in the document. No error - is raised until the task restarts. - -**Resolution:** Update the CDC Sink task configuration to remove the column from the -`Columns` list, then re-enable the task. - -### Renaming a Column - -A column rename is effectively a remove + add. The old mapped column name will fail -validation on restart (same as removing a mapped column). Update the `Columns` mapping -to use the new column name. - -### SQL Server: Capture Instance Limitations - -SQL Server CDC uses **capture instances** — snapshots of the table schema at the time -`sp_cdc_enable_table` was executed. The CDC change tables reflect the schema as it -was when the capture instance was created, **not** the current table schema. - -This means: - -* **Adding a column** to the source table does not add it to the CDC change table. - To capture the new column, you must create a new capture instance: - - - - -{`-- Disable the old capture instance -EXEC sys.sp_cdc_disable_table - @source_schema = N'dbo', - @source_name = N'orders', - @capture_instance = N'dbo_orders'; - --- Re-enable with the updated schema -EXEC sys.sp_cdc_enable_table - @source_schema = N'dbo', - @source_name = N'orders', - @role_name = NULL; -`} - - - - - After recreating the capture instance, restart the CDC Sink task so it picks up the - new column list. - -* **Removing a column** from the source table does not remove it from the existing - capture instance — the change table still reports the old column with null values - for new rows. No immediate error occurs, but the data is stale. - - - -When planning schema migrations on a SQL Server source database, account for the -capture instance lifecycle. The recommended sequence is: - -1. Disable the CDC Sink task in RavenDB -2. Apply the schema change (`ALTER TABLE`) -3. Recreate the capture instance (`sp_cdc_disable_table` + `sp_cdc_enable_table`) -4. Update the CDC Sink task configuration if column mappings changed -5. Re-enable the CDC Sink task - - - -### General Recommendation - -For any schema change on the source database that affects mapped columns: - -1. Disable the CDC Sink task -2. Apply the schema change on the source database -3. Update the CDC Sink task configuration to match -4. Re-enable the task - -This avoids mid-stream mismatches and ensures a clean restart with validated column -metadata. +See [Source Schema Changes](./source-schema-changes.mdx) for the full guide, including +per-engine behavior, examples, and the SQL Server procedure. --- From 2115556490628ae4be8b2317954b480e09e636b7 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Tue, 7 Apr 2026 15:43:22 +0300 Subject: [PATCH 15/17] RavenDB-26056 MySQL: warn to apply schema changes one at a time MySQL CDC detects changes by column position. Compound ALTER TABLE statements (add + drop, ADD COLUMN ... AFTER ...) cause positional shifts that are hard to resolve. Apply one change at a time and let CDC Sink catch up between each. --- .../cdc-sink/source-schema-changes.mdx | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx index 63113a7981..cfd12c7fba 100644 --- a/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx @@ -54,11 +54,10 @@ PostgreSQL logical replication sends `RelationMessage` events inline in the WAL whenever the schema changes. CDC Sink detects these automatically and rebuilds its column mapping on the fly. This makes PostgreSQL the **most resilient** to schema changes. -* **Adding or dropping unmapped columns** — transparent. CDC Sink auto-rebuilds and - continues without interruption. +* **Adding or dropping unmapped columns** — transparent. * **Changing a column type** — usually transparent. A new `RelationMessage` triggers - an automatic rebuild of type categories. + an update to the internal columns mapping inside the sink. * **Dropping or renaming a mapped column** — CDC Sink enters fallback because the mapped column name is no longer found in the new schema. Update the task @@ -95,6 +94,8 @@ positions against the expected types from `INFORMATION_SCHEMA`. If they don't ma the process detects a schema change. * **Adding or dropping unmapped columns** — transparent. The unmapped column is ignored. + The CDC Sink may restart itself to re-resovle the current schema from `INFORMATION_SCHEMA`, + but normal operations will resume momentarily. * **Changing a column type** — CDC Sink detects the type change via binlog column types, restarts, re-resolves from `INFORMATION_SCHEMA`, and continues. @@ -103,6 +104,19 @@ the process detects a schema change. mapped column is no longer found. Update the task configuration to match the new schema. + + +**Apply schema changes one at a time.** MySQL CDC Sink detects schema changes by +comparing column types at specific positions. If multiple changes are applied in a +single `ALTER TABLE` statement (e.g., `ADD COLUMN ... AFTER ...`, or adding and +removing columns together), the positional shifts can compound and make it difficult +for CDC Sink to resolve the new schema correctly. + +Apply one schema change, let CDC Sink catch up and confirm it is processing normally, +then apply the next change. This ensures each change is simple and isolated. + + + ### MySQL Examples From 6bb2ee007870a12af839a00597be684baacc8466 Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Wed, 15 Apr 2026 09:29:42 +0000 Subject: [PATCH 16/17] RavenDB-26046 Address PR #2387 review comments Fix typo, missing heading, misleading wording, and inconsistent descriptions flagged during code review. --- docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx | 7 +++++-- docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx | 2 +- docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx | 3 ++- docs/server/ongoing-tasks/cdc-sink/monitoring.mdx | 4 ++-- docs/server/ongoing-tasks/cdc-sink/schema-design.mdx | 3 ++- .../ongoing-tasks/cdc-sink/source-schema-changes.mdx | 2 +- docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx | 7 ++++--- 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx index 33f36503d8..3d4e6fdbde 100644 --- a/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/column-mapping.mdx @@ -155,8 +155,11 @@ Property names (the `Name` values in `Columns`) become properties on the RavenDB document — accessible as `this.FinalPrice` inside a patch script. Column names (the `Column` values in `Columns`, plus any unmapped columns) are accessible -in patch scripts via `$row.base_price` (for the current row's values) and -`$old?.base_price` (for the previous row's values on UPDATE events). +in patch scripts via `$row.base_price` (for the current row's values). + +`$old` contains the previous state of the document and uses **mapped property names** +(the `Name` values), not SQL column names. Unmapped columns do not appear in `$old`. +See [Patching: $row and $old](./patching.mdx#row-and-old-names-and-types) for details. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx index 94c3047800..414ef0e4d5 100644 --- a/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/how-it-works.mdx @@ -212,7 +212,7 @@ to update existing documents independently of the CDC Sink task. --- - +## Child Before Parent Tables are scanned one at a time during the initial load. This means a child table can contain rows that reference a parent row inserted *after* the parent table scan had diff --git a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx index e0ecc863fa..b54a563f58 100644 --- a/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/linked-tables.mdx @@ -77,7 +77,8 @@ With `customer_id = 42`, the Orders document gets: -The `Customer` property is a RavenDB document ID. Load the referenced document using +The `Customer` property is a RavenDB document ID, built from the `LinkedCollectionName` +and the join column values. Load the referenced document using [includes](../../../client-api/session/loading-entities#load-with-includes) to avoid a second network call. diff --git a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx index 010c0f4381..ad48b4ba9b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/monitoring.mdx @@ -56,8 +56,8 @@ In fallback mode: * No changes are applied while the connection is down * The task automatically resumes streaming once the source is reachable again -The maximum time the task will remain in fallback mode before reporting an error -is controlled by the `CdcSink.MaxFallbackTimeInSec` configuration key. +Retry delay doubles on each consecutive failure, up to the cap set by +`CdcSink.MaxFallbackTimeInSec` (default 15 minutes). See [Server Configuration](./server-configuration.mdx). --- diff --git a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx index 3958b38dca..9c9edd6568 100644 --- a/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/schema-design.mdx @@ -62,6 +62,7 @@ becomes one document. **Document ID generation:** `\{CollectionName\}/\{pk1\}/\{pk2\}/...` A row with `id = 42` and collection `Orders` becomes document `Orders/42`. A composite PK `(region, id)` with values `(US, 42)` becomes `Orders/US/42`. +CDC Sink uses the `CollectionName` verbatim as the document ID prefix. **Column mapping:** Only columns listed in `Columns` appear in the document. Unmapped columns are still available in patch scripts via `$row` but are not stored in the document. @@ -182,7 +183,7 @@ An embedded table needs: * **PrimaryKeyColumns** — Used to match items within the parent's array for UPDATE and DELETE * **JoinColumns** — Foreign key referencing the parent's `PrimaryKeyColumns` -The `JoinColumns` must exactly match the parent's `PrimaryKeyColumns`: +The `JoinColumns` must correspond 1:1 (same count and order) to the parent's `PrimaryKeyColumns`: | Parent PK | Required JoinColumns | Valid? | |-----------|---------------------|--------| diff --git a/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx index cfd12c7fba..387f1d2c4b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/source-schema-changes.mdx @@ -94,7 +94,7 @@ positions against the expected types from `INFORMATION_SCHEMA`. If they don't ma the process detects a schema change. * **Adding or dropping unmapped columns** — transparent. The unmapped column is ignored. - The CDC Sink may restart itself to re-resovle the current schema from `INFORMATION_SCHEMA`, + The CDC Sink may restart itself to re-resolve the current schema from `INFORMATION_SCHEMA`, but normal operations will resume momentarily. * **Changing a column type** — CDC Sink detects the type change via binlog column types, diff --git a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx index 5c8b275645..e34d32aa55 100644 --- a/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/troubleshooting.mdx @@ -75,9 +75,10 @@ specific failure reason. may retain WAL segments that fill the disk. See your PostgreSQL documentation on `wal_keep_size` and replication slot lag. -* **Exceeded fallback timeout** — the source was unreachable for longer than - `CdcSink.MaxFallbackTimeInSec`. The task moves to error state after this timeout. - Restore connectivity and re-enable the task. +* **Persistent connection failure** — the source has been unreachable through multiple + retry cycles. Retry delay doubles on each failure up to the `CdcSink.MaxFallbackTimeInSec` + cap (default 15 minutes). Restore connectivity and the task resumes automatically + on the next retry. --- From 863d222e1517076eb67f0a379449d431ac90059b Mon Sep 17 00:00:00 2001 From: Ayende Rahien Date: Wed, 15 Apr 2026 09:37:20 +0000 Subject: [PATCH 17/17] RavenDB-26046 Fix config key names and add missing Postgres timeout setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename CdcSink.PollIntervalInSec to CdcSink.SqlServer.PollIntervalInSec to match actual code (3 files) - Add missing CdcSink.Postgres.ReplicationTimeoutInSec (default 10s) - Fix type conversion: decimal→double should be numeric/decimal→decimal --- .../cdc-sink/configuration-reference.mdx | 2 +- .../ongoing-tasks/cdc-sink/mysql/overview.mdx | 2 +- .../cdc-sink/server-configuration.mdx | 14 ++++++++++++-- .../ongoing-tasks/cdc-sink/sql-server/overview.mdx | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx index ac255fa74f..809ce1ddcf 100644 --- a/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/configuration-reference.mdx @@ -160,7 +160,7 @@ Controls how a SQL column value is stored in RavenDB. | Value | Behavior | |-------|----------| -| `Default` | Standard type conversion: int→long, decimal→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. | +| `Default` | Standard type conversion: int→long, numeric/decimal→decimal, real→float, double precision→double, date→DateOnly, timestamp→DateTime, uuid→string, varchar/text→string. SQL arrays→JSON arrays. JSON/JSONB stored as plain string. | | `Json` | Parses the string value as a native JSON value (object, array, string, number, boolean, etc.) in the document. Use for PostgreSQL `json`/`jsonb` or SQL Server `nvarchar(max)` with JSON content. | | `Attachment` | Stores the raw column value as a RavenDB attachment. `byte[]`→binary (`application/octet-stream`), `string`→UTF-8 text (`text/plain`), `float[]`/`double[]`→raw vector data. The `Name` field becomes the attachment name. | diff --git a/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx index f0ae2aee1e..5a4bba128b 100644 --- a/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/mysql/overview.mdx @@ -88,7 +88,7 @@ GRANT SELECT ON mydb.* TO 'ravendb_cdc'@'%'; ## Streaming Behavior Like PostgreSQL, MySQL CDC Sink uses streaming replication (via the binlog), not -polling. The `CdcSink.PollIntervalInSec` server configuration setting does not +polling. The `CdcSink.SqlServer.PollIntervalInSec` server configuration setting does not apply to MySQL. --- diff --git a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx index fe680783b2..aa6d656ff7 100644 --- a/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/server-configuration.mdx @@ -53,16 +53,26 @@ Set to `0` to disable fallback mode entirely — the task will retry immediately --- -#### `CdcSink.PollIntervalInSec` +#### `CdcSink.SqlServer.PollIntervalInSec` **Default:** `1` -How frequently (in seconds) the CDC Sink polls the source database for new change rows. This setting applies to **SQL Server** only, which uses polling-based change capture. +How frequently (in seconds) the SQL Server CDC Sink polls for new change rows. Lower values reduce latency but increase load on the source database. PostgreSQL and MySQL use streaming replication and ignore this setting. --- +#### `CdcSink.Postgres.ReplicationTimeoutInSec` + +**Default:** `10` + +Timeout (in seconds) for the PostgreSQL replication connection. Controls both the server-side `wal_sender_timeout` (keepalives arrive at roughly half this interval) and the client-side `WalReceiverTimeout`. Lower values detect dead connections faster but increase keepalive traffic. + +SQL Server and MySQL ignore this setting. + +--- + ## Related Articles ### CDC Sink diff --git a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx index 8bb1add581..9b103d38bb 100644 --- a/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx +++ b/docs/server/ongoing-tasks/cdc-sink/sql-server/overview.mdx @@ -77,7 +77,7 @@ The SQL Server Agent service must be running for CDC to function. ## Polling Behavior Unlike PostgreSQL (which uses streaming replication), SQL Server CDC Sink uses polling. -The `CdcSink.PollIntervalInSec` server configuration key controls how frequently +The `CdcSink.SqlServer.PollIntervalInSec` server configuration key controls how frequently the task polls for new changes. See [Server Configuration](../server-configuration.mdx). ---