diff --git a/.gitignore b/.gitignore
index a3c97ff992..e23903ec9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 CLAUDE.md
+# PR1-description.md is a working artifact used when manually opening PR1; not committed.
+PR1-description.md
 target
 .idea
 *.iml
@@ -10,6 +12,7 @@ metastore_db/
 spark-warehouse/
 dependency-reduced-pom.xml
 native/proto/src/generated
+contrib/example/native/src/generated
 prebuild
 .flattened-pom.xml
 rat.txt
diff --git a/contrib/example/native/Cargo.toml b/contrib/example/native/Cargo.toml
new file mode 100644
index 0000000000..698814c603
--- /dev/null
+++ b/contrib/example/native/Cargo.toml
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "comet-contrib-example"
+description = "Worked reference implementation of a Comet contrib extension. Not published; bundled as a SPI dispatch test fixture."
+# Contrib crates live OUTSIDE the workspace root directory (`native/`) but are listed as
+# workspace members in `native/Cargo.toml`. Cargo's auto-discovery walks up the directory
+# tree, so without the explicit pointer it can't find `native/Cargo.toml` from
+# `contrib/example/native/`.
+workspace = "../../../native"
+version = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+edition = { workspace = true }
+
+[lib]
+# rlib (not cdylib): linked INTO core's cdylib via the `contrib-example` Cargo feature
+# flag on the core crate. There is exactly one libcomet.{so,dylib,dll} at runtime; the
+# contrib's #[ctor] runs during that single library's init.
+crate-type = ["rlib"]
+
+[dependencies]
+# Depend on the thin SPI crate, NOT on core. This is what breaks the cycle: core
+# depends on contribs (Cargo feature -> rlib link); both depend on contrib-spi; nothing
+# depends back on core from a contrib.
+comet-contrib-spi = { path = "../../../native/contrib-spi" }
+datafusion = { workspace = true }
+# Used only in unit tests to construct a TestCtx that implements ContribPlannerContext;
+# kept in [dependencies] (not [dev-dependencies]) because the trait's typed methods take
+# spark_expression / spark_operator proto refs and the impl module is not test-gated.
+datafusion-comet-proto = { workspace = true }
+prost = "0.14.3"
+ctor = "0.4"
+log = "0.4"
+
+# Each contrib runs its own prost-build over its own .proto files (see build.rs).
+[build-dependencies]
+prost-build = "0.14.3"
diff --git a/contrib/example/native/build.rs b/contrib/example/native/build.rs
new file mode 100644
index 0000000000..236360962e
--- /dev/null
+++ b/contrib/example/native/build.rs
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Build script for the example contrib's proto. Mirrors `native/proto/build.rs`.
+//!
+//! Each contrib runs its own `prost-build` invocation against its own `.proto` files.
+//! This keeps core's proto crate format-agnostic and lets contribs evolve their wire
+//! format independently. The generated Rust types live under `src/generated/` and are
+//! gitignored.
+
+use std::{fs, io::Result, path::Path};
+
+fn main() -> Result<()> {
+    println!("cargo:rerun-if-changed=src/proto/");
+
+    let out_dir = "src/generated";
+    if !Path::new(out_dir).is_dir() {
+        fs::create_dir(out_dir)?;
+    }
+
+    prost_build::Config::new()
+        .out_dir(out_dir)
+        .compile_protos(&["src/proto/example_op.proto"], &["src/proto"])?;
+    Ok(())
+}
diff --git a/contrib/example/native/src/lib.rs b/contrib/example/native/src/lib.rs
new file mode 100644
index 0000000000..8bd0753fe2
--- /dev/null
+++ b/contrib/example/native/src/lib.rs
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Worked reference implementation of a Comet contrib extension.
+//!
+//! Demonstrates two patterns future contribs will follow:
+//!
+//!   1. **Dispatch wiring** -- registers a `ContribOperatorPlanner` against a stable
+//!      `kind` string at lib-init time via `#[ctor::ctor]`. The planner is called from
+//!      core's `OpStruct::ContribOp` dispatcher with the contrib's payload bytes.
+//!
+//!   2. **Proto layer** -- the contrib has its own `proto/` directory with its own
+//!      `.proto` schema (`example_op.proto`). `build.rs` runs `prost-build` over it;
+//!      generated Rust types live under `src/generated/` (gitignored). The planner
+//!      decodes the payload via `prost::Message::decode` -- the same way real contribs
+//!      (Delta etc.) will.
+//!
+//! Two planner kinds are registered:
+//!
+//!   * `example-no-op`            -- returns a sentinel error. Tests use this to verify
+//!                                   the dispatch chain end-to-end.
+//!   * `example-constant-scan`    -- decodes an `ExampleConstantScan` payload, returns
+//!                                   an `EmptyExec` sized by the payload's `row_count`.
+//!                                   Real contribs (Delta) follow the same pattern,
+//!                                   just with their own message and operator.
+//!
+//! The whole crate is gated by `native/core/Cargo.toml`'s `contrib-example` feature flag.
+//! Build core without that feature (`cargo build --no-default-features`) and zero bytes
+//! of this crate end up in `libcomet`.
+
+use std::sync::Arc;
+
+use comet_contrib_spi::{
+    register_contrib_planner, ContribError, ContribOperatorPlanner, ContribPlannerContext,
+};
+use datafusion::arrow::datatypes::Schema;
+use datafusion::physical_plan::empty::EmptyExec;
+use datafusion::physical_plan::ExecutionPlan;
+use prost::Message;
+
+/// Generated Rust types for the contrib's proto schema. `build.rs` writes the module
+/// here at compile time; `src/generated/` is gitignored.
+pub mod proto {
+    include!(concat!("generated/", "comet.contrib.example.rs"));
+}
+
+/// Sentinel kind used by tests to verify dispatch reaches this contrib at all.
+pub const EXAMPLE_NO_OP_KIND: &str = "example-no-op";
+
+/// Kind for the proto-decoding constant-scan planner. Demonstrates the
+/// proto-decode-and-build path real contribs will use.
+pub const EXAMPLE_CONSTANT_SCAN_KIND: &str = "example-constant-scan";
+
+/// A planner that intentionally does no plan-building work. Returns a sentinel error so
+/// dispatch tests can assert the message reaches this code path. The payload is ignored;
+/// children are ignored.
+struct NoOpPlanner;
+
+impl ContribOperatorPlanner for NoOpPlanner {
+    fn plan(
+        &self,
+        _ctx: &dyn ContribPlannerContext,
+        _payload: &[u8],
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+        Err(ContribError::Plan(format!(
+            "comet-contrib-example: NoOpPlanner reached for kind={EXAMPLE_NO_OP_KIND:?}; \
+             this is the expected sentinel for SPI dispatch tests"
+        )))
+    }
+}
+
+/// Decodes the payload as an `ExampleConstantScan` proto and returns an `EmptyExec`
+/// with a schema-less output. Real contribs use the same decode-then-build pattern --
+/// they just decode richer messages and return richer execs.
+struct ConstantScanPlanner;
+
+impl ContribOperatorPlanner for ConstantScanPlanner {
+    fn plan(
+        &self,
+        _ctx: &dyn ContribPlannerContext,
+        payload: &[u8],
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+        let msg = proto::ExampleConstantScan::decode(payload).map_err(|e| {
+            ContribError::BadPayload(format!(
+                "ExampleConstantScan: decode failed: {e}"
+            ))
+        })?;
+        log::debug!(
+            "comet-contrib-example: ConstantScanPlanner produces {} synthetic rows",
+            msg.row_count
+        );
+        // For the worked example we don't actually populate rows -- EmptyExec is fine to
+        // demonstrate the build path. Real contribs return their domain-specific exec
+        // (Delta returns the file scan + DV filter wrap).
+        Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty()))))
+    }
+}
+
+/// Registers all of the example contrib's planners against the contrib registry at
+/// library-init time. `#[ctor::ctor]` runs this constructor before `main`/`JNI_OnLoad`.
+/// Comet's `libcomet` cdylib is the single library the JVM loads; this constructor runs
+/// during that one library's init.
+///
+/// # Panic safety
+///
+/// The body is wrapped in `catch_unwind` and writes to stderr on failure. A panic inside
+/// `#[ctor]` aborts the entire JVM process before `JNI_OnLoad` runs and produces no
+/// diagnostic on macOS/Linux without this wrapper. Every contrib's `#[ctor]` should
+/// follow the same pattern; see `docs/source/contributor-guide/contrib-extensions.md`.
+///
+/// # Logging
+///
+/// `log::*!` macros inside `#[ctor]` are no-ops because Comet's logger is initialised
+/// later, in `Java_org_apache_comet_NativeBase_init`. Use `eprintln!` (or nothing) for
+/// any ctor diagnostics that must be visible.
+#[ctor::ctor]
+fn register() {
+    let _ = std::panic::catch_unwind(|| {
+        register_contrib_planner(EXAMPLE_NO_OP_KIND, Arc::new(NoOpPlanner));
+        register_contrib_planner(EXAMPLE_CONSTANT_SCAN_KIND, Arc::new(ConstantScanPlanner));
+    })
+    .map_err(|panic| {
+        eprintln!(
+            "comet-contrib-example: #[ctor] panicked during planner registration; \
+             contrib will not be available. panic={panic:?}"
+        );
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use comet_contrib_spi::{lookup_contrib_planner_by_kind, ParquetDatasourceParams};
+    use datafusion::arrow::datatypes::SchemaRef;
+    use datafusion::execution::context::SessionContext;
+    use datafusion::execution::object_store::ObjectStoreUrl;
+    use datafusion::physical_expr::PhysicalExpr;
+    use datafusion_comet_proto::{spark_expression, spark_operator};
+    use std::collections::HashMap;
+
+    /// Minimal `ContribPlannerContext` for unit-testing contrib planners that don't
+    /// actually need to build a parquet exec. All methods that the tests don't exercise
+    /// panic if invoked.
+    struct TestCtx {
+        ctx: Arc<SessionContext>,
+    }
+    impl ContribPlannerContext for TestCtx {
+        fn session_ctx(&self) -> &Arc<SessionContext> {
+            &self.ctx
+        }
+        fn build_physical_expr(
+            &self,
+            _expr: &spark_expression::Expr,
+            _input_schema: SchemaRef,
+        ) -> Result<Arc<dyn PhysicalExpr>, ContribError> {
+            unimplemented!("TestCtx: build_physical_expr not used by this test")
+        }
+        fn convert_spark_schema(
+            &self,
+            _fields: &[spark_operator::SparkStructField],
+        ) -> SchemaRef {
+            unimplemented!("TestCtx: convert_spark_schema not used by this test")
+        }
+        fn prepare_object_store(
+            &self,
+            _url: String,
+            _configs: &HashMap<String, String>,
+        ) -> Result<(ObjectStoreUrl, datafusion::object_store::path::Path), ContribError> {
+            unimplemented!("TestCtx: prepare_object_store not used by this test")
+        }
+        fn build_parquet_datasource_exec(
+            &self,
+            _params: ParquetDatasourceParams,
+        ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+            unimplemented!("TestCtx: build_parquet_datasource_exec not used by this test")
+        }
+    }
+
+    fn test_ctx() -> TestCtx {
+        TestCtx {
+            ctx: Arc::new(SessionContext::new()),
+        }
+    }
+
+    #[test]
+    fn ctor_registers_both_planners() {
+        // The #[ctor] above runs at process-init time for test binaries too.
+        assert!(lookup_contrib_planner_by_kind(EXAMPLE_NO_OP_KIND).is_some());
+        assert!(lookup_contrib_planner_by_kind(EXAMPLE_CONSTANT_SCAN_KIND).is_some());
+    }
+
+    #[test]
+    fn constant_scan_decodes_payload_and_builds() {
+        let payload = proto::ExampleConstantScan { row_count: 42 }.encode_to_vec();
+        let planner = ConstantScanPlanner;
+        let ctx = test_ctx();
+        let plan = planner.plan(&ctx, &payload, vec![]).expect("decode + build");
+        assert!(plan.schema().fields().is_empty());
+    }
+
+    #[test]
+    fn constant_scan_handles_zero_rows() {
+        // Worked-example coverage: row_count = 0 must not be a special case.
+        let payload = proto::ExampleConstantScan { row_count: 0 }.encode_to_vec();
+        let planner = ConstantScanPlanner;
+        let ctx = test_ctx();
+        let plan = planner.plan(&ctx, &payload, vec![]).expect("decode + build");
+        assert!(plan.schema().fields().is_empty());
+    }
+
+    #[test]
+    fn constant_scan_surfaces_bad_payload() {
+        let planner = ConstantScanPlanner;
+        let ctx = test_ctx();
+        let bad = b"not a valid proto";
+        let err = planner
+            .plan(&ctx, bad, vec![])
+            .expect_err("garbage should fail decode");
+        match err {
+            ContribError::BadPayload(_) => {}
+            other => panic!("expected BadPayload, got {other:?}"),
+        }
+    }
+}
diff --git a/contrib/example/native/src/proto/example_op.proto b/contrib/example/native/src/proto/example_op.proto
new file mode 100644
index 0000000000..59ae4ca761
--- /dev/null
+++ b/contrib/example/native/src/proto/example_op.proto
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto3";
+
+// Contrib-private proto package. Each contrib's proto messages live under their own
+// package so symbols never collide with core or with other contribs.
+package comet.contrib.example;
+
+// Trivial reference message used by the worked-example contrib. A real contrib's proto
+// carries whatever fields its native operator needs (file paths, predicates, schemas,
+// deletion vectors, etc.).
+//
+// The contrib's Scala side fills this message and serializes it into the
+// `ContribOp.payload` bytes; the contrib's Rust side decodes the bytes back into this
+// struct in its `ContribOperatorPlanner::plan`.
+message ExampleConstantScan {
+  // Number of rows the synthetic constant scan should emit. Bounded by the contrib's
+  // planner -- this is a test reference, not a useful operator.
+  uint32 row_count = 1;
+}
diff --git a/contrib/example/pom.xml b/contrib/example/pom.xml
new file mode 100644
index 0000000000..6f67ac682d
--- /dev/null
+++ b/contrib/example/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.datafusion</groupId>
+    <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
+    <version>0.17.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <!--
+    Deps-only pom for the example contrib. `<packaging>pom</packaging>` means this
+    module produces a metadata artifact (no JAR, no compiled classes). Its sole
+    purpose is to list external Maven dependencies the contrib needs at compile
+    time; spark/pom.xml's contrib-example profile depends on it via
+    `<type>pom</type>` to pull those deps onto comet-spark's classpath transitively.
+
+    The example contrib happens to have NO external deps (it only references
+    comet-spark's own SPI types, which are visible because the contrib's sources
+    are compiled INTO comet-spark via source injection). Real contribs add their
+    deps here ;e.g., a Delta contrib would add `io.delta:delta-spark`. The empty
+    `<dependencies>` block stays as the documented insertion point.
+
+    Not separately published; `maven.deploy.skip` inherits its default (true) from
+    the parent pom. Out-of-tree contribs do not exist in this distribution model.
+  -->
+  <artifactId>comet-contrib-example-deps-spark${spark.version.short}_${scala.binary.version}</artifactId>
+  <name>comet-contrib-example-deps</name>
+  <packaging>pom</packaging>
+
+  <dependencies>
+    <!-- The example contrib has no external deps. Add yours here when you copy
+         this file as the template for a real contrib. -->
+  </dependencies>
+</project>
diff --git a/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension b/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension
new file mode 100644
index 0000000000..13c4689816
--- /dev/null
+++ b/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension
@@ -0,0 +1 @@
+org.apache.comet.contrib.example.ExampleScanRuleExtension
diff --git a/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala b/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala
new file mode 100644
index 0000000000..6ef10587e0
--- /dev/null
+++ b/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.example
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.datasources.HadoopFsRelation
+
+import org.apache.comet.spi.CometScanRuleExtension
+
+/**
+ * Worked-reference `CometScanRuleExtension` for new contrib authors. This implementation is
+ * intentionally trivial: it does not match any scan and never transforms anything. What it proves
+ * end-to-end at runtime is:
+ *
+ *   1. `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` discovery works:
+ *      `CometExtensionRegistry.load()` finds this class via ServiceLoader as soon as the contrib
+ *      JAR is on the classpath.
+ *
+ * 2. The wiring in `CometScanRule.transformV1Scan` / `transformV2Scan` actually iterates
+ * extensions: even though this one returns `false` from `matchesV1` and `matchesV2`, the registry
+ * call happens for every scan.
+ *
+ * Real contribs replace `matchesV1` / `transformV1` with real probes against the scan's
+ * `relation.fileFormat` (e.g. Delta would detect `DeltaParquetFileFormat`) and `transformV1` with
+ * the contrib's native dispatch.
+ *
+ * The matching native-side counterpart lives in `contrib/example/native/src/lib.rs` -- it
+ * registers a `ContribOperatorPlanner` under the same kind string used by any future Scala-side
+ * serde this example might add.
+ */
+class ExampleScanRuleExtension extends CometScanRuleExtension with Logging {
+  override val name: String = "example"
+
+  override def matchesV1(relation: HadoopFsRelation): Boolean = {
+    // Sentinel: only match if a synthetic option declares this contrib should claim the
+    // scan. Production contribs replace this with a real file-format probe; here we want
+    // the test to be able to opt in deterministically.
+    relation.options
+      .get(ExampleScanRuleExtension.MarkerOptionKey)
+      .contains(ExampleScanRuleExtension.MarkerOptionValue)
+  }
+
+  // matchesV2 / transformV1 / transformV2 inherit the trait defaults (`false` / `None`).
+  // This example only demonstrates V1 discovery. A real contrib would override the
+  // transform methods to build its native plan.
+}
+
+object ExampleScanRuleExtension {
+
+  /**
+   * Test-only option key. A Spark read can set this on `HadoopFsRelation.options` to trigger
+   * `ExampleScanRuleExtension.matchesV1` and verify the SPI is being consulted.
+   */
+  val MarkerOptionKey: String = "comet.contrib.example.marker"
+
+  /** Sentinel value the marker option must equal. */
+  val MarkerOptionValue: String = "match"
+}
diff --git a/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala b/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala
new file mode 100644
index 0000000000..7de0fd6b34
--- /dev/null
+++ b/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.example
+
+import org.scalatest.funsuite.AnyFunSuite
+
+import org.apache.comet.spi.CometExtensionRegistry
+
+/**
+ * Verifies the JVM half of the contrib SPI by going through public API only:
+ *
+ * * `CometExtensionRegistry.load()` discovers this contrib via its
+ * `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` entry. * The discovered
+ * extension is the `ExampleScanRuleExtension` defined in this module. * `matchesV1` honours the
+ * test-only marker option so a real `CometScanRule.transformV1Scan` integration test could
+ * deterministically opt in.
+ *
+ * Native-side dispatch (the `OpStruct::ContribOp` arm in core's planner that delegates to the
+ * example's Rust `NoOpPlanner`) is exercised by core's own integration tests when built with the
+ * `contrib-example` Cargo feature on -- not duplicated here.
+ */
+class ExampleScanRuleExtensionSuite extends AnyFunSuite {
+
+  test("CometExtensionRegistry discovers ExampleScanRuleExtension via ServiceLoader") {
+    // The registry caches discovery results across calls; reset so this test sees a
+    // deterministic load against the current test classpath.
+    CometExtensionRegistry.resetForTesting()
+    CometExtensionRegistry.load()
+
+    val found = CometExtensionRegistry.scanExtensions.find(_.name == "example")
+    assert(found.isDefined, "ServiceLoader should have discovered the example contrib")
+    assert(found.get.isInstanceOf[ExampleScanRuleExtension])
+  }
+
+  test("matchesV1 returns true only when the marker option is set") {
+    val ext = new ExampleScanRuleExtension
+
+    // We construct a minimal HadoopFsRelation just enough to call matchesV1. The trait
+    // method only reads `relation.options` so we don't need a real file format/schema.
+    //
+    // Use getOrCreate so the test reuses any already-running singleton SparkSession
+    // (e.g., from another suite). Critically, DO NOT call `stop()` in a finally block:
+    // stop() tears down the JVM-wide singleton and breaks every other test sharing it.
+    val sparkSession = org.apache.spark.sql.SparkSession
+      .builder()
+      .master("local[1]")
+      .appName("ExampleScanRuleExtensionSuite")
+      .getOrCreate()
+    val relationWithoutMarker = new org.apache.spark.sql.execution.datasources.HadoopFsRelation(
+      location = new org.apache.spark.sql.execution.datasources.InMemoryFileIndex(
+        sparkSession,
+        Seq.empty,
+        Map.empty,
+        None),
+      partitionSchema = new org.apache.spark.sql.types.StructType(),
+      dataSchema = new org.apache.spark.sql.types.StructType(),
+      bucketSpec = None,
+      fileFormat = new org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat(),
+      options = Map.empty)(sparkSession)
+    assert(!ext.matchesV1(relationWithoutMarker), "no marker -> no match")
+
+    val relationWithMarker = relationWithoutMarker.copy(options = Map(
+      ExampleScanRuleExtension.MarkerOptionKey ->
+        ExampleScanRuleExtension.MarkerOptionValue))(sparkSession)
+    assert(ext.matchesV1(relationWithMarker), "marker present -> match")
+  }
+}
diff --git a/docs/source/contributor-guide/contrib-extensions.md b/docs/source/contributor-guide/contrib-extensions.md
new file mode 100644
index 0000000000..e7a49421dd
--- /dev/null
+++ b/docs/source/contributor-guide/contrib-extensions.md
@@ -0,0 +1,898 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Authoring a Comet contrib extension
+
+A Comet *contrib* is a self-contained extension that lives alongside core but ships
+independently. Contribs add support for a specific table format or operator class
+without core having to know about them at build time.
+
+The first contrib in the tree is
+[`contrib/example/`](https://github.com/apache/datafusion-comet/tree/main/contrib/example) —
+read it top-to-bottom as the worked reference. This guide adds the architectural context
+and walks through every integration point that the example does not exercise.
+
+## Architecture at a glance
+
+A contrib has two halves, both **bundled into Comet's published artifacts at build
+time** when their matching flags are enabled. Nothing about a contrib is independently
+distributable — the contrib lives inside Comet's release.
+
+- **JVM half** — Scala/Java classes plus generated Java proto. Built as a Maven
+  submodule under `contrib/<name>/` and **shaded into `comet-spark.jar`** via the
+  `-Pcontrib-<name>` Maven profile on `spark/pom.xml`. With no profile active, the
+  contrib's classes are not in the published JAR. The contrib's `META-INF/services/`
+  entries are bundled along with the classes; ServiceLoader at runtime then discovers
+  them from inside `comet-spark.jar` itself.
+- **Native half** — a Rust `rlib` crate (NOT `cdylib`) **linked into `libcomet`** via
+  the matching `--features contrib-<name>` Cargo flag on the core crate. The contrib's
+  `#[ctor]` registers its operator planners during library load.
+
+The two halves are symmetric: contribs are build-time options on Comet, JVM and
+native both. `mvn install -Pcontrib-example && cargo build --features contrib-example`
+produces a Comet build that includes the example contrib in both `comet-spark.jar` and
+`libcomet`; a vanilla build of either side produces an artifact with zero contrib
+surface.
+
+The wire format between JVM and native uses a single generic envelope on the operator
+proto, `ContribOp { kind, payload }`. Core's planner dispatches by `kind`; the contrib's
+native crate registers planners against the same `kind` string the contrib's JVM code
+writes into the proto.
+
+## Required files (mirror `contrib/example/` exactly)
+
+A contrib is a directory of sources plus a deps-only Maven pom. The contrib's
+Scala/Java sources are pulled into `comet-spark`'s compile by a profile on
+`spark/pom.xml`; the contrib's Rust sources are pulled into `libcomet` by a Cargo
+feature on `native/core`. The `pom.xml` exists solely to enumerate external Maven
+deps (e.g., `io.delta:delta-spark` for a Delta contrib); it does NOT produce code
+and does NOT depend on `comet-spark` (those two together would create a Maven
+reactor cycle).
+
+```
+contrib/<name>/
+  pom.xml                                                          ← <packaging>pom</packaging>; declares external Maven deps only
+  src/main/scala/org/apache/comet/contrib/<name>/
+    <SomeClass>.scala                                              ← CometScanRuleExtension / CometOperatorSerdeExtension impl
+  src/main/resources/META-INF/services/
+    org.apache.comet.spi.CometScanRuleExtension                    ← one line per extension class
+    org.apache.comet.spi.CometOperatorSerdeExtension               ← (only if you implement serdes)
+  src/test/scala/org/apache/comet/contrib/<name>/
+    <SomeClass>Suite.scala                                         ← integration test (runs as part of comet-spark's tests when profile active)
+  native/
+    Cargo.toml                                                     ← rlib crate, workspace = "../../../native"
+    build.rs                                                       ← runs prost-build over your proto schema
+    src/lib.rs                                                     ← ContribOperatorPlanner impl + #[ctor] registration
+    src/proto/<your_op>.proto                                      ← contrib-private proto schema (also used by JVM-side protoc generation)
+    src/generated/                                                 ← (gitignored) prost-build output
+```
+
+The `pom.xml` is a `<packaging>pom</packaging>` with one job: list the contrib's
+external Maven deps. A Delta contrib's pom would carry `<dependency>` entries for
+`io.delta:delta-spark`. `spark/pom.xml`'s `contrib-<name>` profile depends on this
+deps-pom via `<type>pom</type>`, which transitively resolves the listed deps onto
+comet-spark's classpath.
+
+Plus a handful of build-config edits (collected under "Wiring into core", below).
+
+### Prerequisites
+
+You need:
+
+- The same toolchain Comet's main build uses: JDK 11+ (Maven build), Rust stable, `protoc`
+  (pulled in automatically by `protoc-jar-maven-plugin` and `prost-build`).
+- The contrib's `<name>` decided in advance — it becomes a Cargo feature flag
+  (`contrib-<name>`), an artifact ID, a JNI symbol prefix if your contrib calls into its
+  own Rust, and a `kind` string component for every `ContribOp`. Choose a short, stable
+  identifier; renames are breaking.
+
+### `.gitignore`
+
+The generated proto outputs are checked in nowhere. Add a line to the repo-root
+`.gitignore` mirroring the existing `contrib/example/native/src/generated` entry:
+
+```
+contrib/<name>/native/src/generated
+```
+
+`contrib/<name>/target/` is already gitignored by the repo-root pattern.
+
+### Workspace placement constraint
+
+`contrib/<name>/native/Cargo.toml` uses `workspace = "../../../native"`. This relative
+path assumes contribs live exactly at `<repo>/contrib/<name>/native`. Deeper nesting
+breaks the workspace lookup; place the contrib at the documented depth.
+
+## Wiring into core
+
+Five edits, three per side (JVM) + two (native):
+
+### JVM side
+
+1. **Root `pom.xml`** — add `<module>contrib/<name></module>` so Maven always builds
+   the contrib's deps-pom. The pom is tiny (no code, no JAR — just `<packaging>pom</packaging>`).
+2. **`contrib/<name>/pom.xml`** — create a `<packaging>pom</packaging>` file enumerating
+   your external Maven deps. Copy `contrib/example/pom.xml` as the template; the
+   example's `<dependencies>` block is empty (no external deps needed). A Delta-style
+   contrib would add e.g.:
+
+   ```xml
+   <dependencies>
+     <dependency>
+       <groupId>io.delta</groupId>
+       <artifactId>delta-spark_${scala.binary.version}</artifactId>
+       <version>3.3.2</version>
+       <scope>provided</scope>
+     </dependency>
+   </dependencies>
+   ```
+
+   Use `<scope>provided</scope>` for deps the user supplies on their Spark classpath;
+   `<scope>compile</scope>` if the contrib ships them itself (shaded into comet-spark
+   via the inherited shade execution).
+
+3. **`spark/pom.xml`** — add a `contrib-<name>` profile under `<profiles>`. Copy the
+   `contrib-example` profile as the template. The profile (a) depends on the contrib's
+   deps-pom via `<type>pom</type>`, (b) uses `build-helper-maven-plugin` to add the
+   contrib's source/test directories, (c) uses `maven-resources-plugin` to merge in
+   `META-INF/services` entries, and (d) uses `protoc-jar-maven-plugin` to generate
+   the contrib's Java protos. See `contrib/example`'s entry in `spark/pom.xml` for
+   the verbatim block to copy.
+
+### Native side
+
+2. **`native/Cargo.toml`** — add `../contrib/<name>/native` to the workspace `members`
+   list (NOT `default-members` — contribs are consumed via core's feature flags).
+3. **`native/core/Cargo.toml`** — add a `contrib-<name>` feature gate and a matching
+   optional `dep:` entry, mirroring the `contrib-example` lines:
+
+   ```toml
+   [dependencies]
+   comet-contrib-<name> = { path = "../../contrib/<name>/native", optional = true }
+
+   [features]
+   contrib-<name> = ["dep:comet-contrib-<name>"]
+   ```
+
+   Do **not** add the feature to `default = [...]`. Production builds carry zero contrib
+   surface by design; users opt in explicitly.
+4. **`native/core/src/lib.rs`** — add the matching feature-gated `extern crate` so the
+   contrib's `#[ctor]` is linked in when the feature is on:
+
+   ```rust
+   #[cfg(feature = "contrib-<name>")]
+   extern crate comet_contrib_<name>;
+   ```
+
+## Build matrix
+
+```bash
+# Vanilla Comet build: zero contribs on either side.
+mvn install
+cargo build
+
+# Build with the example contrib bundled into both halves.
+mvn install -Pcontrib-example
+cargo build --features contrib-example
+
+# Multiple contribs at once.
+mvn install -Pcontrib-example,contrib-delta
+cargo build --features 'contrib-example contrib-delta'
+
+# Verify the slim native build path.
+cargo build --no-default-features
+```
+
+The JVM and native flags MUST agree for a contrib to work. Activating only the Maven
+profile gives you a `comet-spark.jar` whose serde produces `ContribOp` envelopes the
+native side can't dispatch (you'll get
+`No contrib planner registered for ContribOp.kind=...`). Activating only the Cargo
+feature gives you a `libcomet` ready to dispatch a contrib whose serde isn't on the
+classpath, so the registered planner sits dormant. The contributor guide and release
+notes call out both flags together.
+
+A core test under `#[cfg(not(any(feature = "contrib-example")))]` (today's form;
+the `any(...)` will list every contrib feature once more are added) asserts
+`registered_contrib_kinds()` is empty in the slim build. When you add a new
+`contrib-<name>` feature, **extend that `cfg` predicate** (see
+`native/core/src/execution/planner/contrib.rs`'s `production_build_has_no_contrib_planners_registered`)
+to add `feature = "contrib-<name>"` so the canary still compiles on your contrib's CI row.
+
+## SPI stability
+
+The contrib SPI is currently **alpha** — minor Comet versions may carry breaking
+changes during the early-adopter period. Because contribs ship in-tree (as part of
+Comet's release), every Comet build is internally consistent — a `0.18.x`
+`comet-spark.jar` is bundled with `0.18.x` contribs. Version-skew concerns
+("contrib JAR built against 0.17, Comet runtime 0.18") don't apply.
+
+What stability guarantees the SPI does aim for:
+
+- `ParquetDatasourceParams` and `ContribError` are `#[non_exhaustive]` so additive
+  changes (new fields / variants) are minor bumps, not breaks. Use
+  `ParquetDatasourceParams::new(...)` + `with_*` setters rather than struct-literal
+  syntax; consumers of `ContribError` must include a wildcard match arm.
+- Scala SPI traits add new methods with default implementations (default `false` /
+  `None`). Override only the methods you need; an additive method change is a minor
+  bump. Abstract-method additions are breaking and called out in release notes.
+- Releases that change the SPI in a breaking way will say so explicitly.
+
+## SPI surface
+
+### JVM side: `org.apache.comet.spi`
+
+| Trait / Object | Purpose |
+|---|---|
+| `CometScanRuleExtension` | Intercept scan-tree transformation. See subsections below. |
+| `CometOperatorSerdeExtension` | Contribute additional `SparkPlan` class → `CometOperatorSerde` mappings to `CometExecRule`. See subsections below. |
+| `CometExtensionRegistry` | Process-wide singleton. `load()` is invoked lazily from `CometScanRule._apply` / `CometExecRule._apply` the first time Comet runs against a Comet-enabled session — sessions that never enable Comet pay zero ServiceLoader cost. Subsequent calls are no-ops. `resetForTesting()` (public) clears the registry between tests. |
+
+#### `CometScanRuleExtension`
+
+- `name: String` — human label used in logs and warnings.
+- `preTransform(plan, session): SparkPlan` (default identity) — tree-level pre-pass run
+  once per plan before per-scan dispatch. **V1-only.** Use it to undo wrapper rewrites
+  applied by your format's own Catalyst strategy (Delta's `PreprocessTableWithDVs` is
+  the canonical case). Skipped entirely when `spark.comet.scan.enabled=false` — your
+  wrappers become load-bearing in that mode and stripping them would be a correctness
+  bug. `CometScanRule` logs a warning when an extension replaces a `FileSourceScanExec`
+  whose relation it does not claim; this catches accidental cross-format corruption.
+- `matchesV1(relation): Boolean` (default `false`) / `transformV1(plan, scanExec, session): Option[SparkPlan]`
+  — V1 dispatch. Make `matchesV1` cheap (typically a file-format class probe).
+- `matchesV2(scanExec): Boolean` (default `false`) / `transformV2(scanExec, session): Option[SparkPlan]`
+  — V2 dispatch. Unlike V1, `transformV2` does **not** receive a plan-tree reference;
+  any wrapper-stripping a V2 contrib needs must happen against `scanExec.scan` /
+  `scanExec.children` directly.
+
+Dispatch iterates registered extensions in registration order; the first one whose
+`match*` returns `true` AND `transform*` returns `Some` wins. `None` from
+`transform*` is treated as "decline this instance" and dispatch continues to the next
+matching extension before falling back to core.
+
+Pass state from `preTransform` to `transformV1` via Spark's `TreeNodeTag` mechanism —
+do NOT use external mutable state, which leaks across plan invocations.
+
+#### `CometOperatorSerdeExtension`
+
+```scala
+trait CometOperatorSerdeExtension {
+  def name: String
+  def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]]
+}
+```
+
+Two dispatch shapes are supported:
+
+**Class-keyed** — the contrib defines its own `SparkPlan` subclass (typical for
+operator-style contribs):
+
+```scala
+case class CometMyFormatScanExec(...) extends CometNativeExec { /* ... */ }
+
+class MyFormatSerdeExtension extends CometOperatorSerdeExtension {
+  override def name: String = "myformat"
+  override def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] =
+    Map(classOf[CometMyFormatScanExec] -> CometMyFormatScanSerde)
+}
+```
+
+The merged map across all extensions is computed once at registry load time;
+`CometExecRule` consults it via `.get(op.getClass)`. Duplicate class keys across
+contribs are logged as a warning at load — the convention is **one contrib defines a
+class, that contrib owns its serde**.
+
+**Predicate-keyed (marker-class with scanImpl tag)** — required when the contrib uses
+core's `CometScanExec` as a marker disambiguated by a `scanImpl` string. `CometScanExec`
+is a Scala case class shared with core, so two contribs marking different tag values
+on the same class would otherwise collide. Override `matchOperator` instead of (or in
+addition to) populating `serdes`, and declare your tag(s) via `nativeParquetScanImpls`
+if your scan goes through Comet's tuned ParquetSource:
+
+```scala
+class MyFormatSerdeExtension extends CometOperatorSerdeExtension {
+  override def name: String = "myformat"
+
+  // Your contrib's scanImpl marker. Pick a stable string; no central registry of these
+  // exists in core, but conventionally contribs use snake-case like "native_<name>_compat".
+  private val MyScanImpl = "native_myformat_compat"
+
+  override def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = op match {
+    case s: CometScanExec if s.scanImpl == MyScanImpl => Some(CometMyFormatScan)
+    case _ => None
+  }
+
+  // Tell core's CometScanExec.supportedDataFilters to apply DataFusion-style filter
+  // exclusions to this tag. Required when your scan goes through Comet's tuned
+  // ParquetSource (the same path SCAN_NATIVE_DATAFUSION uses).
+  override def nativeParquetScanImpls: Set[String] = Set(MyScanImpl)
+}
+```
+
+`CometExecRule` checks `matchOperator` only after the class-keyed `serdes` map misses,
+so the two patterns coexist. Multiple registered extensions' `matchOperator` calls are
+tried in registration order; the first `Some` wins.
+
+Core's CometConf defines `SCAN_NATIVE_DATAFUSION` / `SCAN_NATIVE_ICEBERG_COMPAT` for
+core's own scan variants. Contribs are expected to define their own scanImpl strings
+inside their own code (not in `CometConf`); registering via `nativeParquetScanImpls`
+is the SPI hook that lets `CometScanExec.supportedDataFilters` apply the right filter
+treatment without core needing to know the contrib's tag name.
+
+##### `CometOperatorSerde[T <: SparkPlan]` contract
+
+The serde itself lives in `org.apache.comet.serde.CometOperatorSerde` (not in the `spi`
+package). Implement four members:
+
+```scala
+class CometMyFormatScanSerde extends CometOperatorSerde[CometMyFormatScanExec] {
+  override def enabledConfig: Option[ConfigEntry[Boolean]] =
+    Some(CometConf.COMET_MYFORMAT_ENABLED)
+
+  override def requiresNativeChildren: Boolean = false
+
+  override def getSupportLevel(op: CometMyFormatScanExec): SupportLevel =
+    Compatible(None)
+
+  override def convert(
+      op: CometMyFormatScanExec,
+      builder: Operator.Builder,
+      childOp: Operator*): Option[Operator] = {
+    // Build your contrib-private payload message and wrap in ContribOp.
+    // See "Building a ContribOp envelope" below.
+    Some(builder
+      .setContribOp(ContribOp.newBuilder()
+        .setKind("myformat-scan")
+        .setPayload(myPayload.toByteString))
+      .build())
+  }
+
+  override def createExec(nativeOp: Operator, op: CometMyFormatScanExec): CometNativeExec =
+    new CometMyFormatScanExec(nativeOp, op.output, op.child, /* ... */)
+}
+```
+
+`convert` MUST return `Some(builder.setContribOp(...).build())` for the dispatch to
+reach your native planner; returning `None` falls the operator back to Spark.
+
+### Native side: `comet-contrib-spi` crate
+
+| Item | Purpose |
+|---|---|
+| `trait ContribOperatorPlanner` | Implemented by the contrib's native crate. `plan(ctx, payload, children) -> Arc<dyn ExecutionPlan>` receives a `&dyn ContribPlannerContext` (handle to core's planner services), the contrib-private payload bytes, and the already-built native children. |
+| `trait ContribPlannerContext` | Implemented by core. Exposes the parquet exec builder, expression planner, schema conversion, object-store registration, and the `SessionContext` itself. Contribs reach into core through this trait rather than depending on `datafusion-comet` directly. |
+| `struct ParquetDatasourceParams` | `#[non_exhaustive]` argument bundle for the parquet exec builder. Construct via `ParquetDatasourceParams::new(required_schema, object_store_url, file_groups)` and chain `with_*` setters. |
+| `register_contrib_planner(kind, planner)` | Process-wide registry. Called from the contrib's `#[ctor::ctor]` at library load. |
+| `lookup_contrib_planner_by_kind(kind)` | Used by core's planner; contribs rarely call directly. |
+| `registered_contrib_kinds()` | Diagnostic snapshot of registered kinds. |
+| `ContribError` | `#[non_exhaustive]` error type. Variants: `Plan(String)`, `BadPayload(String)`, `WrongChildCount { expected: String, actual: usize }`. Pattern matches MUST include a wildcard arm. |
+| `ScopedContribPlannerRegistration` | (`#[cfg(any(test, feature = "test-utils"))]`) RAII guard that registers a planner for the lifetime of the guard and removes it on drop. Use in unit tests that exercise dispatch without polluting the global registry. |
+| `_clear_for_test()` | (`#[cfg(any(test, feature = "test-utils"))]`) Wipes the registry entirely. **Test escape hatch only** — using it in parallel with other registry consumers is unsafe; prefer `ScopedContribPlannerRegistration`. |
+
+The SPI crate depends only on `datafusion`, `datafusion-comet-proto`, and
+`object_store`. Core links contribs via Cargo feature flags; contribs depend on the SPI
+crate; nothing depends back on core from a contrib — the dependency graph is a DAG.
+
+#### Why `ContribOperatorPlanner` is `Send + Sync` but `ContribPlannerContext` isn't
+
+The planner trait is stored in an `Arc` inside a process-wide registry shared across
+threads, so `Send + Sync` is load-bearing. The context is short-lived: a `&dyn`
+reference passed for the duration of one synchronous `plan()` call, so the bound would
+only restrict implementations without adding safety. Core's `PhysicalPlanner` carries
+JNI handles that aren't `Send`; requiring it would force an `Arc<Mutex<...>>` dance
+for no gain.
+
+Contribs that want to spawn async work during `plan()` must capture only the
+`Arc<SessionContext>` (which **is** `Send + Sync`) before crossing a thread boundary —
+not the `&dyn ContribPlannerContext` itself.
+
+#### Why `payload: &[u8]` instead of `Bytes`
+
+The dispatcher already owns the decoded `ContribOp` proto; passing `&[u8]` is zero-copy
+and avoids forcing every contrib to depend on the `bytes` crate. `prost::Message::decode`
+accepts `&[u8]` directly. Contribs that want `Bytes` for downstream zero-copy work can
+convert via `bytes::Bytes::copy_from_slice(payload)` — one allocation, once per plan
+call.
+
+#### `ContribError::WrongChildCount` convention
+
+`expected` is a free-form human description; conventionally a phrase like `"exactly 1"`
+or `"0 or 1"`. The dispatcher displays:
+`wrong child count: expected exactly 1, got 2`.
+
+#### Error message convention
+
+The dispatcher wraps every `ContribError` with `format!("contrib planner {kind:?}: {e}")`,
+so contribs should NOT re-prefix their messages with their own `kind`. Write:
+
+```rust
+ContribError::Plan(format!("file not found: {path}"))
+```
+
+not:
+
+```rust
+ContribError::Plan(format!("myformat-scan: file not found: {path}"))  // double prefix
+```
+
+## Proto layer
+
+Each contrib carries its own `.proto` schema defining the message its `ContribOp.payload`
+carries. Both halves of the contrib generate code from the same `.proto` source:
+
+- **Rust**, in the contrib's `build.rs` via `prost-build`.
+- **Java**, in the contrib's `pom.xml` via `protoc-jar-maven-plugin`.
+
+Use your own proto **package name** (e.g., `comet.contrib.<name>`) so symbols never
+collide with core or with other contribs. Add `contrib/<name>/native/src/generated/`
+to `.gitignore`.
+
+### Proto, native side
+
+`contrib/example/native/build.rs` is the template:
+
+```rust
+use std::{fs, io::Result, path::Path};
+
+fn main() -> Result<()> {
+    // rerun-if-changed so cargo rebuilds when you edit your .proto during dev.
+    println!("cargo:rerun-if-changed=src/proto/");
+
+    let out_dir = "src/generated";
+    if !Path::new(out_dir).is_dir() {
+        fs::create_dir(out_dir)?;
+    }
+    prost_build::Config::new()
+        .out_dir(out_dir)
+        .compile_protos(&["src/proto/<your_op>.proto"], &["src/proto"])?;
+    Ok(())
+}
+```
+
+Note: writing into `src/generated/` rather than `$OUT_DIR` is a deliberate deviation
+from idiomatic prost. It lets `lib.rs` do
+`include!(concat!("generated/", "comet.contrib.example.rs"))` with a stable filesystem
+path — convenient for editor tooling. The file is gitignored.
+
+The contrib's `Cargo.toml` adds `prost-build` to `[build-dependencies]` and `prost`
+to `[dependencies]`.
+
+### Proto, JVM side
+
+Add `protoc-jar-maven-plugin` to your contrib `pom.xml`, pointing at your `.proto`
+schema. Generated Java classes end up under `target/generated-sources/protobuf/java/`
+and get compiled into the contrib's JAR by the inherited `scala-maven-plugin`:
+
+```xml
+<dependencies>
+  <dependency>
+    <groupId>com.google.protobuf</groupId>
+    <artifactId>protobuf-java</artifactId>
+    <version>${protobuf.version}</version>
+  </dependency>
+</dependencies>
+
+<build>
+  <plugins>
+    <plugin>
+      <groupId>com.github.os72</groupId>
+      <artifactId>protoc-jar-maven-plugin</artifactId>
+      <version>${protoc-jar-maven-plugin.version}</version>
+      <executions>
+        <execution>
+          <phase>generate-sources</phase>
+          <goals><goal>run</goal></goals>
+          <configuration>
+            <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+            <inputDirectories>
+              <include>native/src/proto</include>
+            </inputDirectories>
+          </configuration>
+        </execution>
+      </executions>
+    </plugin>
+  </plugins>
+</build>
+```
+
+**Shading is handled automatically.** When the `contrib-<name>` profile on
+`spark/pom.xml` bundles your contrib into `comet-spark.jar`, the inherited shade
+execution relocates `com.google.protobuf` to `${comet.shade.packageName}.protobuf`
+across both your classes and `comet-spark`'s. Don't add your own `maven-shade-plugin`
+execution to the contrib pom; that would shade twice and break the runtime types.
+
+### Building a `ContribOp` envelope
+
+From your `CometOperatorSerde.convert`:
+
+```scala
+import org.apache.comet.serde.OperatorOuterClass.{ContribOp, Operator}
+import comet.contrib.myformat.{MyOpProto}  // your generated Java proto
+
+val payload: MyOpProto = MyOpProto.newBuilder()
+  .setSomeField(scanState.someField)
+  .build()
+
+val envelope = ContribOp.newBuilder()
+  .setKind("myformat-scan")
+  .setPayload(payload.toByteString)
+  .build()
+
+Some(builder.setContribOp(envelope).build())
+```
+
+A note on the proto naming. `operator.proto` declares
+`oneof op_struct { ... ContribOp contrib_op = 117; ... }`. So `op_struct` is the
+*oneof name* and `contrib_op` is the *field name* on that oneof. The two code
+generators surface this differently:
+
+- **Rust (prost):** pattern-matches as `match operator.op_struct { Some(OpStruct::ContribOp(c)) => ... }`.
+- **Java (protoc):** uses the field-name-derived builder method `Operator.Builder.setContribOp(ContribOp)`.
+
+Both manipulate the same wire-format slot — the difference is purely how the code
+generators expose `oneof` membership.
+
+## Wire-format flow
+
+1. Your Scala code intercepts a `FileSourceScanExec` (or `BatchScanExec`) matching your
+   format, returning a `CometMyFormatScanExec` from `transformV1`/`transformV2`.
+2. `CometExecRule` later picks up the `CometMyFormatScanExec` instance, finds your serde
+   via the class-keyed dispatch, and calls `serde.convert(op, builder, childOp...)`.
+3. Your `convert` builds a contrib-private proto message (whatever fields you need),
+   serializes it, wraps in `ContribOp { kind, payload }`, and stuffs it into the
+   operator builder via `setContribOp`.
+4. The proto is shipped through JNI to native.
+5. Core's native planner sees `OpStruct::ContribOp`, validates `kind` (non-empty,
+   under 16 MiB payload, registered), looks up the planner, calls
+   `planner.plan(ctx, payload, children)`.
+6. Your native crate decodes `payload` into your own proto type and returns an
+   `Arc<dyn ExecutionPlan>`. Use `ctx` to reach core's parquet builder, expression
+   planner, etc. (see the next section).
+7. Core wraps the result in a `SparkPlan` and continues planning.
+
+## Walking a real `plan()` body
+
+The example contrib's planners return `EmptyExec` — none of the `ContribPlannerContext`
+methods are exercised. A file-scan contrib's `plan()` typically threads through all of
+them:
+
+```rust
+use std::sync::Arc;
+use comet_contrib_spi::{
+    ContribError, ContribOperatorPlanner, ContribPlannerContext, ParquetDatasourceParams,
+};
+use datafusion::physical_plan::ExecutionPlan;
+use prost::Message;
+
+use crate::proto::MyFormatScan;
+
+pub struct MyFormatScanPlanner;
+
+impl ContribOperatorPlanner for MyFormatScanPlanner {
+    fn plan(
+        &self,
+        ctx: &dyn ContribPlannerContext,
+        payload: &[u8],
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+        // 1. Decode your contrib-private payload.
+        let scan = MyFormatScan::decode(payload)
+            .map_err(|e| ContribError::BadPayload(format!("decode MyFormatScan: {e}")))?;
+
+        // 2. Translate the Spark proto schemas into Arrow schemas via core.
+        let required_schema = ctx.convert_spark_schema(&scan.required_schema);
+        let data_schema = ctx.convert_spark_schema(&scan.data_schema);
+        let partition_schema = ctx.convert_spark_schema(&scan.partition_schema);
+
+        // 3. Lift Catalyst data-filter Exprs into PhysicalExprs core can execute.
+        let data_filters = scan
+            .data_filters
+            .iter()
+            .map(|e| ctx.build_physical_expr(e, required_schema.clone()))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        // 4. Register the object store for the scheme + host the files live in. The
+        //    returned ObjectStoreUrl is the canonical key every PartitionedFile in your
+        //    file_groups must reference. The returned Path is only relevant if you are
+        //    constructing PartitionedFiles whose location is rooted at the same prefix;
+        //    most file-scan contribs build per-file Paths from the raw URL inside
+        //    `build_partitioned_files` below and can discard this Path entirely.
+        let any_file_url = scan.tasks
+            .first()
+            .map(|t| t.file_path.clone())
+            .ok_or_else(|| ContribError::Plan("empty file list".into()))?;
+        let object_store_options = scan.object_store_options.clone();
+        let (object_store_url, _root_path) =
+            ctx.prepare_object_store(any_file_url, &object_store_options)?;
+
+        // 5. Build the file_groups: Vec<Vec<PartitionedFile>>, one inner Vec per
+        //    desired DataFusion partition. The contrib owns this -- see the helper
+        //    sketch below.
+        let file_groups = build_partitioned_files(&scan.tasks)?;
+
+        // 6. Hand the bundle to core's tuned ParquetSource. as_str() because
+        //    with_session_timezone takes `impl Into<String>` and `&String` doesn't
+        //    impl that; `&str` does.
+        let exec = ctx.build_parquet_datasource_exec(
+            ParquetDatasourceParams::new(
+                required_schema.clone(),
+                object_store_url,
+                file_groups,
+            )
+            .with_data_schema(data_schema)
+            .with_partition_schema(partition_schema)
+            .with_data_filters(data_filters)
+            .with_session_timezone(scan.session_timezone.as_str())
+            .with_case_sensitive(scan.case_sensitive),
+        )?;
+
+        // 7. Optionally wrap the parquet exec in contrib-specific operators
+        //    (e.g. a Delta DV filter).
+        Ok(exec)
+    }
+}
+```
+
+### `build_partitioned_files` — contrib-owned helper sketch
+
+`Vec<Vec<PartitionedFile>>` is the format `init_datasource_exec` consumes. Each inner
+`Vec` becomes one DataFusion partition; each `PartitionedFile` carries an
+`ObjectMeta.location` (a path inside the registered object store) plus optional
+partition-column values. Minimal one-file-per-partition implementation:
+
+```rust
+use datafusion::datasource::listing::PartitionedFile;
+use object_store::path::Path;
+use url::Url;
+
+fn build_partitioned_files(
+    tasks: &[crate::proto::FileTask],
+) -> Result<Vec<Vec<PartitionedFile>>, ContribError> {
+    let mut groups = Vec::with_capacity(tasks.len());
+    for task in tasks {
+        let url = Url::parse(&task.file_path)
+            .map_err(|e| ContribError::Plan(format!("invalid file URL: {e}")))?;
+        // Path within the object store -- starts at the bucket root for s3://,
+        // at the filesystem root for file://, etc.
+        let path = Path::from_url_path(url.path())
+            .map_err(|e| ContribError::Plan(format!("path from URL: {e}")))?;
+        let mut pf = PartitionedFile::new(String::new(), task.file_size);
+        pf.object_meta.location = path;
+        // pf.partition_values = vec![/* ScalarValues per partition column */];
+        groups.push(vec![pf]);
+    }
+    Ok(groups)
+}
+```
+
+Real-world contribs typically:
+
+- Combine many small non-partitioned files into a single inner `Vec` (fewer
+  DataFusion partitions) and split very large files across multiple partitions with
+  `PartitionedFile::new_with_range`.
+- Populate `partition_values` from the format's metadata so partition pruning works.
+- Apply format-specific filters (e.g., Delta's pre-materialized deleted-row indexes,
+  Iceberg's equality deletes) as wrappers around the parquet exec, NOT as
+  PartitionedFile mutations.
+
+### Pieces a contrib owns inside itself
+
+Not exposed through `ContribPlannerContext`:
+
+- Reading the format's transaction log / manifest (kernel-rs for Delta, iceberg-rust
+  for Iceberg).
+- Resolving file paths to absolute URLs on the driver.
+- Computing per-file deletion-vector / equality-delete row indexes.
+- Wrapping the parquet exec in a per-row-filter operator if the format needs it.
+
+Use `ctx` for things that already exist inside core (object-store registry, parquet
+plumbing, expression planner); reimplement the format-specific parts in your contrib.
+
+## `#[ctor]` registration: panic safety + logging
+
+The contrib's native crate registers its planners during library init via
+`#[ctor::ctor]`. Three quirks to get right:
+
+**Panics in `#[ctor]` abort the JVM process** before `JNI_OnLoad` runs, with no
+diagnostic on macOS/Linux. Wrap every ctor body in `std::panic::catch_unwind` and
+emit a stderr message on failure:
+
+```rust
+#[ctor::ctor]
+fn register() {
+    let _ = std::panic::catch_unwind(|| {
+        register_contrib_planner(MY_KIND, Arc::new(MyPlanner));
+    })
+    .map_err(|panic| {
+        eprintln!("comet-contrib-myname: #[ctor] panicked: {panic:?}");
+    });
+}
+```
+
+**`log::*!` macros inside `#[ctor]` are no-ops.** Comet's logger is initialised later,
+in `Java_org_apache_comet_NativeBase_init`. Any diagnostic you need from the ctor body
+must go through `eprintln!`.
+
+**Cross-platform caveats.** `#[ctor::ctor]` works on Linux / macOS / Windows MSVC, but
+the order of ctor execution across rlibs is link-order dependent and not guaranteed
+across compiler versions. Your contrib's ctor **MUST NOT** depend on another contrib
+already being registered.
+
+The corresponding JVM rule: **do not call `CometExtensionRegistry.load()` from a
+class's static initializer** (Scala `object` init, or a JVM-level static block). Scala
+monitors are reentrant so it won't deadlock, but re-entry would observe the partially-
+built state and shadow the in-flight publication.
+
+## Logging conventions
+
+- **From the contrib's Scala code**: use `org.slf4j.Logger` / Comet's `Logging` trait.
+  Lifetime-event logs (extension discovered, contrib registered) at INFO; per-plan
+  decisions at DEBUG; correctness violations at WARN.
+- **From the contrib's Rust `#[ctor]`**: `eprintln!` only (logger not yet initialised).
+- **From the contrib's Rust `plan()` body and runtime code**: `log::*` macros. Choose a
+  `target:` matching your crate name so users can filter:
+  `log::debug!(target: "comet::contrib::myname", "built plan with {n} files")`.
+- **Error context**: pre-format error messages with enough context that the dispatcher's
+  `contrib planner "myname-scan": <your-message>` wrapper reads sensibly. Do not
+  re-prefix with your `kind`.
+
+## Diagnosing a misconfigured contrib
+
+The most common first-hour problem is "I packaged my JAR and it does nothing." Three
+signals to check:
+
+- `CometExtensionRegistry` logs at INFO. When discovery runs and finds zero entries,
+  it emits:
+  ```
+  Comet contrib extensions: none discovered on classpath
+    (no META-INF/services entries for CometScanRuleExtension or
+     CometOperatorSerdeExtension)
+  ```
+  Confirm your JAR ships the `META-INF/services/...CometScanRuleExtension` file with
+  the correct fully-qualified extension class on its own line.
+- ServiceLoader instantiation failures are logged at WARN with `Failed to load a
+  CometScanRuleExtension entry; skipping`. Causes: missing no-arg constructor on the
+  extension class, exception thrown by the constructor.
+- Duplicate-class collisions across contribs are logged at WARN with
+  `Multiple Comet contrib extensions claim the same exec class ...`. The merged
+  `CometExecRule` dispatch is last-write-wins on collision; if your contrib's serde
+  silently stops working when another contrib JAR is present, this is the line to
+  look for.
+- `register_contrib_planner` is last-write-wins on duplicate `kind`. Registration
+  logs a WARN: `replacing existing planner for kind=...`. Two contribs that both
+  register `kind="delta-scan"` (the second clobbers the first) will surface here.
+- `registered_contrib_kinds()` (Rust) returns the kinds currently registered. If your
+  contrib's kind is missing under a build that should include it, the Cargo feature is
+  off or the `extern crate` in `native/core/src/lib.rs` is missing.
+
+Set the logger for `org.apache.comet.spi.CometExtensionRegistry` to INFO/WARN to surface
+both messages.
+
+### Classloader interaction
+
+`CometExtensionRegistry.load()` uses `Thread.currentThread().getContextClassLoader()`
+first, with `getClass.getClassLoader` as fallback. Either should see Comet and the
+contrib JAR in typical Spark deploy modes (`--jars`, `--packages`, application
+classpath). Discovery is **lazy** — triggered the first time `CometScanRule._apply` or
+`CometExecRule._apply` runs against a Comet-enabled session. By that point all
+`--jars`-injected JARs are on the classpath, so order-of-arrival inside the driver
+JVM is not a concern.
+
+## Maven packaging
+
+Contribs are in-tree only — they ship as part of Comet's release. The contrib's
+Maven module produces a standalone JAR (built unconditionally so the workspace stays
+consistent), but the JAR is **not deployed**: `maven.deploy.skip=true` inherits from
+the parent pom. The contrib's classes reach users through `comet-spark.jar`, which
+bundles them via the `contrib-<name>` profile on `spark/pom.xml`.
+
+If your contrib pulls in a third-party library, declare the dep in your contrib's pom
+in `compile` scope (no `provided` — the contrib's classes go through the same shade
+execution as core's, and any deps the contrib pulls need to be visible to that shade).
+Avoid third-party deps where you can; the more your contrib drags in, the more
+likely the shade hits a relocation collision with `comet-spark`'s own includes.
+
+### Multi-Spark-version support
+
+Comet itself ships a per-Spark-minor-version artifact via the
+`spark.version.short` Maven profile (`3.4`, `3.5`, `4.0`). Your contrib follows the
+same model:
+
+- Pick the matching Spark profile when building (`-Dspark.version.short=3.5`).
+- The resulting artifact ID encodes the Spark version
+  (`comet-contrib-<name>-spark3.5_2.13`).
+- If your contrib must support multiple Spark minor versions, publish one artifact per
+  profile, mirroring Comet. Shim code that differs across Spark versions belongs under
+  `src/main/scala-${shims.majorVerSrc}/` (see Comet's `common/`/`spark/` modules for
+  the existing pattern).
+
+## Testing
+
+`contrib/example/` demonstrates the JVM-side test pattern:
+
+- A unit test that calls `CometExtensionRegistry.resetForTesting()` and `load()`,
+  then asserts the contrib's extension is discovered via ServiceLoader. Catches
+  packaging mistakes (missing `META-INF/services`, wrong class name).
+- Per-method unit tests for the extension's `matches*` / `transform*` logic.
+
+For native unit tests of a `ContribOperatorPlanner`, use `ScopedContribPlannerRegistration`
+from `comet-contrib-spi` to install and tear down planners without polluting the
+global registry:
+
+```rust
+use comet_contrib_spi::ScopedContribPlannerRegistration;
+
+#[test]
+fn my_planner_round_trip() {
+    let _guard = ScopedContribPlannerRegistration::new(
+        "myformat-scan",
+        Arc::new(MyFormatScanPlanner),
+    );
+    // ... exercise dispatch ...
+}
+```
+
+Pair with `#[serial_test::serial]` if your test asserts on `registered_contrib_kinds()`
+(which other tests' guards may be temporarily mutating in parallel).
+
+### End-to-end (Rust + Scala round-trip)
+
+A full integration test wires the Spark plan through real JNI and asserts the contrib's
+native planner ran:
+
+1. Build a `SparkSession` configured with `spark.sql.extensions =
+   org.apache.comet.CometSparkSessionExtensions` and the contrib JAR on the classpath
+   (sbt: `Test/unmanagedClasspath`; Maven: the contrib's own test scope already has it).
+2. Submit a query that hits your format's table reader.
+3. Inspect the produced physical plan for your contrib's exec class
+   (`plan.exists(_.isInstanceOf[CometMyFormatScanExec])`).
+4. Run the plan and assert against the result (e.g., a row count that only your native
+   planner could produce, distinguishable from a Spark fall-back).
+
+The example contrib's test fixture doubles as smoke coverage for the SPI dispatch path
+itself (kind lookup, payload decode, error wrapping) under Comet's own CI when the
+`contrib-example` feature is enabled.
+
+## Payload size cap
+
+The native dispatcher enforces a hard ceiling of **16 MiB** on `ContribOp.payload`
+(`MAX_CONTRIB_PAYLOAD_BYTES` in `native/core/src/execution/planner.rs`). A malformed
+JVM-side serde (or one that accidentally accumulates state across plan calls)
+producing a larger payload is rejected with a clear error message before the contrib's
+`plan()` runs. The cap is comfortably above any plausible file-scan payload (Delta
+with ~100k tasks weighs in around 3–4 MiB) and well below "heap pressure" territory.
+If your contrib has a legitimate need for a higher ceiling, file an issue with the
+size you need and the use case — the cap is a guardrail, not a feature.
+
+## Registry implementation note
+
+The native contrib planner registry uses `ArcSwap<HashMap<String, Arc<...>>>` —
+lock-free for readers, RCU swap for writers. Reads on the `ContribOp` dispatch hot
+path are a single atomic load plus an `Arc` ref-count bump; there is no
+reader-writer contention because writes happen exclusively during library init
+(sequential `#[ctor]` registrations, no concurrent writers). Contribs never call
+the registry primitives directly.
+
+## See also
+
+- [`contrib/example/`](https://github.com/apache/datafusion-comet/tree/main/contrib/example) —
+  the worked reference.
+- [`native/contrib-spi/`](https://github.com/apache/datafusion-comet/tree/main/native/contrib-spi) —
+  the leaf SPI crate.
+- [`spark/src/main/scala/org/apache/comet/spi/`](https://github.com/apache/datafusion-comet/tree/main/spark/src/main/scala/org/apache/comet/spi) —
+  the JVM SPI traits.
diff --git a/native/Cargo.lock b/native/Cargo.lock
index df3c3b03c0..187a75665a 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -1485,6 +1485,30 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "comet-contrib-example"
+version = "0.17.0"
+dependencies = [
+ "comet-contrib-spi",
+ "ctor 0.4.3",
+ "datafusion",
+ "datafusion-comet-proto",
+ "log",
+ "prost",
+ "prost-build",
+]
+
+[[package]]
+name = "comet-contrib-spi"
+version = "0.17.0"
+dependencies = [
+ "arc-swap",
+ "datafusion",
+ "datafusion-comet-proto",
+ "log",
+ "object_store",
+]
+
 [[package]]
 name = "comfy-table"
 version = "7.2.2"
@@ -1740,16 +1764,32 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "ctor"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6"
+dependencies = [
+ "ctor-proc-macro 0.0.6",
+ "dtor 0.0.6",
+]
+
 [[package]]
 name = "ctor"
 version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e"
 dependencies = [
- "ctor-proc-macro",
- "dtor",
+ "ctor-proc-macro 0.0.7",
+ "dtor 0.1.1",
 ]
 
+[[package]]
+name = "ctor-proc-macro"
+version = "0.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2"
+
 [[package]]
 name = "ctor-proc-macro"
 version = "0.0.7"
@@ -1966,6 +2006,8 @@ dependencies = [
  "aws-config",
  "aws-credential-types",
  "bytes",
+ "comet-contrib-example",
+ "comet-contrib-spi",
  "criterion",
  "datafusion",
  "datafusion-comet-common",
@@ -2852,15 +2894,30 @@ dependencies = [
  "const-random",
 ]
 
+[[package]]
+name = "dtor"
+version = "0.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8"
+dependencies = [
+ "dtor-proc-macro 0.0.5",
+]
+
 [[package]]
 name = "dtor"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301"
 dependencies = [
- "dtor-proc-macro",
+ "dtor-proc-macro 0.0.6",
 ]
 
+[[package]]
+name = "dtor-proc-macro"
+version = "0.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
+
 [[package]]
 name = "dtor-proc-macro"
 version = "0.0.6"
@@ -4633,7 +4690,7 @@ version = "0.56.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236"
 dependencies = [
- "ctor",
+ "ctor 0.6.3",
  "opendal-core",
  "opendal-layer-concurrent-limit",
  "opendal-layer-logging",
diff --git a/native/Cargo.toml b/native/Cargo.toml
index d1b5c74af9..9c5f5816bd 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -16,8 +16,14 @@
 # under the License.
 
 [workspace]
-default-members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle"]
-members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "hdfs", "fs-hdfs"]
+default-members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "contrib-spi"]
+# `contrib-spi` is the thin SPI surface that BOTH core and contribs depend on -- breaking
+# what would otherwise be a cyclic dep between core (links contribs via Cargo features)
+# and contribs (need core types). Contrib crates themselves live under
+# `../contrib/<name>/native` and are workspace members so workspace lockfile + workspace
+# dependencies apply; they're NOT default-members because they're consumed via core's
+# optional Cargo feature flags rather than built standalone.
+members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "hdfs", "fs-hdfs", "contrib-spi", "../contrib/example/native"]
 resolver = "2"
 
 [workspace.package]
diff --git a/native/contrib-spi/Cargo.toml b/native/contrib-spi/Cargo.toml
new file mode 100644
index 0000000000..fbead2c17e
--- /dev/null
+++ b/native/contrib-spi/Cargo.toml
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "comet-contrib-spi"
+description = "Stable SPI surface that contrib crates and Comet's core both depend on. Defines the ContribOperatorPlanner trait, ContribPlannerContext, the process-wide registry, and the lightweight error type. Separating this from the core crate breaks what would otherwise be a cyclic dependency (core links contribs via Cargo feature flags; contribs need core types)."
+version = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+edition = { workspace = true }
+
+[dependencies]
+# Public types in the SPI reference these crates. Pinning matches core via workspace.
+datafusion = { workspace = true }
+datafusion-comet-proto = { workspace = true }
+# Surface the `Path` type on the SPI's prepare_object_store return value.
+object_store = { workspace = true }
+log = "0.4"
+# Lock-free registry primitive. Reads (per ContribOp dispatch, hot path) are one atomic
+# load + ref-count bump; writes (per contrib's #[ctor] at lib init) are an RCU swap.
+arc-swap = "1"
+
+[features]
+# Off by default. When enabled, the crate exposes `ScopedContribPlannerRegistration` and
+# `_clear_for_test` for downstream test code that needs to register planners without
+# polluting the process-wide registry. The same surfaces are unconditionally available
+# under `#[cfg(test)]` for the SPI's own unit tests.
+test-utils = []
diff --git a/native/contrib-spi/src/lib.rs b/native/contrib-spi/src/lib.rs
new file mode 100644
index 0000000000..bf5348fa2c
--- /dev/null
+++ b/native/contrib-spi/src/lib.rs
@@ -0,0 +1,534 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Thin SPI crate shared between Comet's core and every contrib crate.
+//!
+//! Both core (`datafusion-comet`) and individual contribs (`comet-contrib-example`,
+//! `comet-contrib-delta`, ...) depend on THIS crate, NOT on each other. This avoids a
+//! cyclic dependency: core wires contribs in via Cargo feature flags, and contribs need
+//! the SPI types to implement the trait. With the SPI in a third crate, the dependency
+//! graph is a DAG.
+//!
+//! Surface:
+//!   * [`ContribOperatorPlanner`]   -- the trait contribs implement.
+//!   * [`ContribPlannerContext`]    -- the trait core implements; gives contribs access
+//!                                     to the parquet exec builder, expression planner,
+//!                                     object-store registration, and session context.
+//!   * [`ParquetDatasourceParams`]  -- argument bundle for the parquet exec builder.
+//!   * [`register_contrib_planner`] / [`lookup_contrib_planner_by_kind`] --
+//!                                     process-wide registry, expected to be populated
+//!                                     from a contrib's `#[ctor]`.
+//!   * [`registered_contrib_kinds`] -- diagnostics.
+
+use std::{
+    collections::HashMap,
+    sync::{Arc, OnceLock},
+};
+
+use arc_swap::ArcSwap;
+
+use datafusion::{
+    arrow::datatypes::SchemaRef,
+    common::ScalarValue,
+    datasource::listing::PartitionedFile,
+    execution::{context::SessionContext, object_store::ObjectStoreUrl},
+    physical_expr::PhysicalExpr,
+    physical_plan::{expressions::Column, ExecutionPlan},
+};
+use datafusion_comet_proto::{spark_expression, spark_operator};
+
+/// Implemented by each contrib. Called from core's planner when an `OpStruct::ContribOp`
+/// with the contrib's `kind` is encountered.
+///
+/// The contract is intentionally minimal:
+///   * `ctx` is a handle to core-side planner services (parquet exec builder,
+///     expression planner, object-store registration, session context). Contribs reach
+///     into core through this trait rather than depending on core directly, which keeps
+///     the dependency graph acyclic.
+///   * `payload` is the raw bytes from `ContribOp.payload`. The contrib decodes it into
+///     whatever proto / serde format it uses internally; core never inspects.
+///   * `children` is the list of already-built native children (in spark-plan child
+///     order). The contrib uses these to build its `ExecutionPlan` if it needs child
+///     inputs.
+///   * The returned `Arc<dyn ExecutionPlan>` is the contrib's operator. Core wraps it
+///     into a `SparkPlan` and threads it through the rest of the plan tree.
+///
+/// Implementations MUST be `Send + Sync` and idempotent -- the same `(payload, children)`
+/// must always produce a functionally equivalent plan, so core can cache or re-plan.
+pub trait ContribOperatorPlanner: Send + Sync {
+    fn plan(
+        &self,
+        ctx: &dyn ContribPlannerContext,
+        payload: &[u8],
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError>;
+}
+
+/// Argument bundle for [`ContribPlannerContext::build_parquet_datasource_exec`]. Mirrors
+/// core's internal `init_datasource_exec` signature one-to-one, so the trait method is a
+/// thin forward.
+///
+/// `#[non_exhaustive]` so adding fields in future is a minor SemVer bump, not a break.
+/// Contribs construct via [`ParquetDatasourceParams::new`] (required fields only) +
+/// `with_*` builder setters; never by struct-literal syntax.
+///
+/// `session_timezone` is owned (`String`) so contribs can pass a runtime-computed value
+/// (from a session config lookup) without juggling lifetimes. The string is one-time
+/// per plan call, so the allocation is negligible.
+#[non_exhaustive]
+pub struct ParquetDatasourceParams {
+    pub required_schema: SchemaRef,
+    pub data_schema: Option<SchemaRef>,
+    pub partition_schema: Option<SchemaRef>,
+    pub object_store_url: ObjectStoreUrl,
+    pub file_groups: Vec<Vec<PartitionedFile>>,
+    pub projection_vector: Option<Vec<usize>>,
+    pub data_filters: Option<Vec<Arc<dyn PhysicalExpr>>>,
+    pub default_values: Option<HashMap<Column, ScalarValue>>,
+    pub session_timezone: String,
+    pub case_sensitive: bool,
+    pub return_null_struct_if_all_fields_missing: bool,
+    pub encryption_enabled: bool,
+    pub use_field_id: bool,
+    pub ignore_missing_field_id: bool,
+}
+
+impl ParquetDatasourceParams {
+    /// Minimal constructor with the parameters every parquet scan needs. All `Option`s
+    /// default to `None`, all `bool`s to `false`, and `session_timezone` to `"UTC"`. Use
+    /// the `with_*` setters to populate the rest.
+    pub fn new(
+        required_schema: SchemaRef,
+        object_store_url: ObjectStoreUrl,
+        file_groups: Vec<Vec<PartitionedFile>>,
+    ) -> Self {
+        Self {
+            required_schema,
+            data_schema: None,
+            partition_schema: None,
+            object_store_url,
+            file_groups,
+            projection_vector: None,
+            data_filters: None,
+            default_values: None,
+            session_timezone: "UTC".to_string(),
+            case_sensitive: false,
+            return_null_struct_if_all_fields_missing: false,
+            encryption_enabled: false,
+            use_field_id: false,
+            ignore_missing_field_id: false,
+        }
+    }
+
+    pub fn with_data_schema(mut self, schema: SchemaRef) -> Self {
+        self.data_schema = Some(schema);
+        self
+    }
+    pub fn with_partition_schema(mut self, schema: SchemaRef) -> Self {
+        self.partition_schema = Some(schema);
+        self
+    }
+    pub fn with_projection_vector(mut self, projection: Vec<usize>) -> Self {
+        self.projection_vector = Some(projection);
+        self
+    }
+    pub fn with_data_filters(mut self, filters: Vec<Arc<dyn PhysicalExpr>>) -> Self {
+        self.data_filters = Some(filters);
+        self
+    }
+    pub fn with_default_values(mut self, values: HashMap<Column, ScalarValue>) -> Self {
+        self.default_values = Some(values);
+        self
+    }
+    /// Accepts anything that can be turned into a `String` -- string literals,
+    /// `&str` borrowed from session config, owned `String`s -- without lifetime games.
+    pub fn with_session_timezone(mut self, tz: impl Into<String>) -> Self {
+        self.session_timezone = tz.into();
+        self
+    }
+    pub fn with_case_sensitive(mut self, b: bool) -> Self {
+        self.case_sensitive = b;
+        self
+    }
+    pub fn with_return_null_struct_if_all_fields_missing(mut self, b: bool) -> Self {
+        self.return_null_struct_if_all_fields_missing = b;
+        self
+    }
+    pub fn with_encryption_enabled(mut self, b: bool) -> Self {
+        self.encryption_enabled = b;
+        self
+    }
+    pub fn with_use_field_id(mut self, b: bool) -> Self {
+        self.use_field_id = b;
+        self
+    }
+    pub fn with_ignore_missing_field_id(mut self, b: bool) -> Self {
+        self.ignore_missing_field_id = b;
+        self
+    }
+}
+
+/// Planner services exposed by core to contribs. Core implements this trait against its
+/// `PhysicalPlanner` + `SessionContext`; contribs receive a `&dyn ContribPlannerContext`
+/// in their [`ContribOperatorPlanner::plan`] call and reach into core through it.
+///
+/// All trait methods are infallible at the trait-bound level but return `ContribError`
+/// for runtime failures, so contribs can propagate without converting between error
+/// types.
+// Note: no `Send + Sync` bound -- `&dyn ContribPlannerContext` is only held for the
+// duration of a synchronous `plan()` call, so it doesn't need to cross threads. The
+// natural core-side impl borrows the `PhysicalPlanner` (which carries JNI handles that
+// aren't `Send`), and adding the bound here would force an awkward `Arc<Mutex<...>>`
+// dance for no gain.
+pub trait ContribPlannerContext {
+    /// The session context the plan is being built under. Contribs need this to register
+    /// object stores on `runtime_env()` and to read session-level configs (timezone,
+    /// case sensitivity, etc) that aren't already on `ParquetDatasourceParams`.
+    fn session_ctx(&self) -> &Arc<SessionContext>;
+
+    /// Convert a Catalyst-side Spark expression proto into a DataFusion `PhysicalExpr`
+    /// against the given input schema. Used by file-scan contribs to convert data
+    /// filters from their proto-side `Expr` form into the typed `PhysicalExpr`s that
+    /// `ParquetSource` consumes.
+    fn build_physical_expr(
+        &self,
+        expr: &spark_expression::Expr,
+        input_schema: SchemaRef,
+    ) -> Result<Arc<dyn PhysicalExpr>, ContribError>;
+
+    /// Convert a slice of Spark struct fields (the proto representation of a Spark
+    /// schema) into an Arrow `SchemaRef`. This is a pure proto-to-arrow conversion --
+    /// no side effects, no session state.
+    fn convert_spark_schema(&self, fields: &[spark_operator::SparkStructField]) -> SchemaRef;
+
+    /// Register an object store on the runtime env for the given URL's scheme + bucket,
+    /// using `object_store_configs` for credentials / endpoint overrides. Returns
+    /// `(ObjectStoreUrl, Path)`: the URL the contrib attaches to its `PartitionedFile`s,
+    /// and the canonical path within that store (caller may discard if not needed --
+    /// most file-scan contribs use it to set `partitioned_file.object_meta.location`).
+    fn prepare_object_store(
+        &self,
+        any_file_url: String,
+        object_store_configs: &HashMap<String, String>,
+    ) -> Result<(ObjectStoreUrl, object_store::path::Path), ContribError>;
+
+    /// Build a `DataSourceExec` over Comet's tuned `ParquetSource`. This is the single
+    /// most important method on the trait -- every file-scan contrib (Delta, Iceberg)
+    /// goes through here so the contrib doesn't have to rebuild Comet's parquet plumbing.
+    fn build_parquet_datasource_exec(
+        &self,
+        params: ParquetDatasourceParams,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError>;
+}
+
+/// Error type returned by [`ContribOperatorPlanner::plan`] and the trait methods on
+/// [`ContribPlannerContext`]. Kept distinct from core's `ExecutionError` so this crate
+/// stays free of core's dependency tree. Core converts `ContribError` into its own
+/// `ExecutionError` at the dispatch site.
+///
+/// `#[non_exhaustive]` so adding variants in the future is a minor SemVer bump, not a
+/// break. Pattern matchers in contribs MUST include a wildcard arm.
+#[non_exhaustive]
+#[derive(Debug)]
+pub enum ContribError {
+    /// Generic failure. Use this for cases that don't fit the more specific variants.
+    Plan(String),
+    /// The contrib received a payload it couldn't decode (wrong proto schema, missing
+    /// required field, etc.).
+    BadPayload(String),
+    /// The contrib received a child count it can't handle. `expected` is a free-form
+    /// human description, conventionally a phrase like `"exactly 1"` or `"0 or 1"` so
+    /// the error message reads `wrong child count: expected exactly 1, got 2`.
+    WrongChildCount { expected: String, actual: usize },
+}
+
+impl std::fmt::Display for ContribError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ContribError::Plan(msg) => write!(f, "{msg}"),
+            ContribError::BadPayload(msg) => write!(f, "bad payload: {msg}"),
+            ContribError::WrongChildCount { expected, actual } => {
+                write!(f, "wrong child count: expected {expected}, got {actual}")
+            }
+            // Defense for external callers reading `Display` after a future variant is
+            // added under #[non_exhaustive]: their `match` is non-exhaustive even with
+            // a wildcard, but our `Display` impl always falls through to the Debug repr
+            // so the dispatcher's `format!("contrib planner ...: {e}")` still produces
+            // something useful. (Note: inside this crate the wildcard is unreachable
+            // because #[non_exhaustive] is only enforced across crate boundaries --
+            // adding a variant here will require an explicit arm anyway. The wildcard
+            // exists to keep downstream `Display`-as-source consumers working.)
+            #[allow(unreachable_patterns)]
+            other => write!(f, "{other:?}"),
+        }
+    }
+}
+
+impl std::error::Error for ContribError {}
+
+/// Process-wide registry of contrib operator planners, keyed by `ContribOp.kind`.
+///
+/// `ArcSwap<HashMap<...>>` gives lock-free reads (one atomic load + Arc ref-count bump)
+/// on the dispatch hot path. Writes happen exclusively during library init from
+/// `#[ctor]`s (sequential, single-threaded) and use `rcu` to swap an updated map atom.
+/// The init-once / read-many access pattern is exactly what `ArcSwap` is designed for;
+/// the previous `RwLock<HashMap>` would have introduced reader-writer contention for
+/// no gain since there are effectively no concurrent writes.
+type RegistryMap = HashMap<String, Arc<dyn ContribOperatorPlanner>>;
+
+fn registry() -> &'static ArcSwap<RegistryMap> {
+    static REGISTRY: OnceLock<ArcSwap<RegistryMap>> = OnceLock::new();
+    REGISTRY.get_or_init(|| ArcSwap::from_pointee(HashMap::new()))
+}
+
+/// Register a contrib operator planner under the given `kind` identifier. Last-write-wins
+/// on duplicates (logged as a warning). Thread-safe; intended to be called from a
+/// contrib's `#[ctor]` constructor at library-init time.
+pub fn register_contrib_planner(
+    kind: impl Into<String>,
+    planner: Arc<dyn ContribOperatorPlanner>,
+) {
+    let kind = kind.into();
+    registry().rcu(|current| {
+        let mut new_map: RegistryMap = (**current).clone();
+        if new_map.contains_key(&kind) {
+            log::warn!(
+                "register_contrib_planner: replacing existing planner for kind={kind:?}; \
+                 second registration usually indicates a misconfigured test harness"
+            );
+        }
+        new_map.insert(kind.clone(), Arc::clone(&planner));
+        new_map
+    });
+}
+
+/// Look up the contrib planner registered for `kind`, or `None` if no contrib is loaded
+/// for that operator. Core's dispatcher uses this to route `OpStruct::ContribOp` payloads.
+pub fn lookup_contrib_planner_by_kind(kind: &str) -> Option<Arc<dyn ContribOperatorPlanner>> {
+    registry().load().get(kind).cloned()
+}
+
+/// Return a snapshot of all registered contrib kinds, for diagnostics and tests.
+pub fn registered_contrib_kinds() -> Vec<String> {
+    let snapshot = registry().load();
+    let mut kinds: Vec<String> = snapshot.keys().cloned().collect();
+    kinds.sort();
+    kinds
+}
+
+/// RAII guard that registers a planner for the lifetime of the guard and removes it on
+/// drop. Use in tests that want a planner registered without polluting the process
+/// registry for other tests running in parallel.
+///
+/// Not `Send` because dropping it requires the registry write lock; tests using this
+/// guard should mark themselves `#[serial_test::serial]` if they assert on
+/// `registered_contrib_kinds()` (whose snapshot is affected by other threads' guards).
+#[cfg(any(test, feature = "test-utils"))]
+pub struct ScopedContribPlannerRegistration {
+    kind: String,
+    previous: Option<Arc<dyn ContribOperatorPlanner>>,
+}
+
+#[cfg(any(test, feature = "test-utils"))]
+impl ScopedContribPlannerRegistration {
+    /// Install `planner` under `kind` for the lifetime of the returned guard. The
+    /// previously-registered planner (if any) is restored on drop.
+    pub fn new(kind: impl Into<String>, planner: Arc<dyn ContribOperatorPlanner>) -> Self {
+        let kind = kind.into();
+        // Snapshot the previous binding BEFORE the rcu so retries (under contention) don't
+        // observe our own write as the previous value.
+        let previous = registry().load().get(&kind).cloned();
+        registry().rcu(|current| {
+            let mut new_map: RegistryMap = (**current).clone();
+            new_map.insert(kind.clone(), Arc::clone(&planner));
+            new_map
+        });
+        Self { kind, previous }
+    }
+}
+
+#[cfg(any(test, feature = "test-utils"))]
+impl Drop for ScopedContribPlannerRegistration {
+    fn drop(&mut self) {
+        let kind = std::mem::take(&mut self.kind);
+        let previous = self.previous.take();
+        registry().rcu(|current| {
+            let mut new_map: RegistryMap = (**current).clone();
+            match &previous {
+                Some(prev) => {
+                    new_map.insert(kind.clone(), Arc::clone(prev));
+                }
+                None => {
+                    new_map.remove(&kind);
+                }
+            }
+            new_map
+        });
+    }
+}
+
+/// Clear the registry. **Test-only escape hatch.** Use [`ScopedContribPlannerRegistration`]
+/// instead in any test that runs in parallel with other registry users -- this function
+/// removes the entries every other concurrent test depends on.
+#[cfg(any(test, feature = "test-utils"))]
+pub fn _clear_for_test() {
+    registry().store(Arc::new(HashMap::new()));
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion::arrow::datatypes::Schema;
+    use datafusion::physical_plan::empty::EmptyExec;
+    use std::sync::Arc;
+
+    struct AlwaysEmpty;
+    impl ContribOperatorPlanner for AlwaysEmpty {
+        fn plan(
+            &self,
+            _ctx: &dyn ContribPlannerContext,
+            _payload: &[u8],
+            _children: Vec<Arc<dyn ExecutionPlan>>,
+        ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+            Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty()))))
+        }
+    }
+
+    // Use globally-unique kinds so concurrent tests in the same binary don't collide
+    // through the process-wide registry. The `_test_` prefix is reserved for unit tests.
+
+    #[test]
+    fn unknown_kind_returns_none() {
+        // Independent of any registrations: a kind no one ever registers stays None.
+        let probe = "_test_definitely_unregistered_a8f3c1e";
+        assert!(lookup_contrib_planner_by_kind(probe).is_none());
+    }
+
+    #[test]
+    fn scoped_registration_round_trip() {
+        let kind = "_test_scoped_registration_a";
+        assert!(lookup_contrib_planner_by_kind(kind).is_none());
+        {
+            let _guard = ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty));
+            assert!(lookup_contrib_planner_by_kind(kind).is_some());
+        }
+        // Dropping the guard removes the entry.
+        assert!(lookup_contrib_planner_by_kind(kind).is_none());
+    }
+
+    #[test]
+    fn scoped_registration_restores_previous() {
+        let kind = "_test_scoped_registration_b";
+        let _outer =
+            ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty));
+        {
+            // Inner guard temporarily replaces the outer planner; drop restores outer.
+            let _inner =
+                ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty));
+            assert!(lookup_contrib_planner_by_kind(kind).is_some());
+        }
+        assert!(lookup_contrib_planner_by_kind(kind).is_some());
+    }
+
+    #[test]
+    fn parquet_datasource_params_constructor_defaults() {
+        use datafusion::arrow::datatypes::{DataType, Field, Schema};
+        use datafusion::execution::object_store::ObjectStoreUrl;
+
+        let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new(
+            "id",
+            DataType::Int64,
+            false,
+        )]));
+        let url = ObjectStoreUrl::parse("file://").unwrap();
+        let p = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]);
+
+        assert_eq!(p.required_schema.fields().len(), 1);
+        assert!(p.data_schema.is_none());
+        assert!(p.partition_schema.is_none());
+        assert!(p.projection_vector.is_none());
+        assert!(p.data_filters.is_none());
+        assert!(p.default_values.is_none());
+        assert_eq!(p.session_timezone, "UTC");
+        assert!(!p.case_sensitive);
+        assert!(!p.return_null_struct_if_all_fields_missing);
+        assert!(!p.encryption_enabled);
+        assert!(!p.use_field_id);
+        assert!(!p.ignore_missing_field_id);
+    }
+
+    #[test]
+    fn parquet_datasource_params_setters_apply() {
+        use datafusion::arrow::datatypes::{DataType, Field, Schema};
+        use datafusion::execution::object_store::ObjectStoreUrl;
+
+        let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new(
+            "id",
+            DataType::Int64,
+            false,
+        )]));
+        let url = ObjectStoreUrl::parse("file://").unwrap();
+        let p = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![])
+            .with_data_schema(Arc::clone(&schema))
+            .with_session_timezone("America/Los_Angeles")
+            .with_case_sensitive(true)
+            .with_use_field_id(true)
+            .with_ignore_missing_field_id(true)
+            .with_encryption_enabled(true);
+
+        // Distinguishable bool tuple: a swap in `init_datasource_exec`'s arg order
+        // would fail this assertion in core's planner::contrib tests.
+        assert_eq!(p.session_timezone, "America/Los_Angeles");
+        assert!(p.case_sensitive);
+        assert!(!p.return_null_struct_if_all_fields_missing);
+        assert!(p.encryption_enabled);
+        assert!(p.use_field_id);
+        assert!(p.ignore_missing_field_id);
+        assert!(p.data_schema.is_some());
+    }
+
+    #[test]
+    fn contrib_error_display_preserves_variant_info() {
+        // The dispatcher wraps `e` via Display: `format!("contrib planner {kind:?}: {e}")`.
+        // These cases assert each variant's discriminating info survives that path.
+        let plan = ContribError::Plan("plan-context-message".into()).to_string();
+        assert!(plan.contains("plan-context-message"));
+
+        let bad = ContribError::BadPayload("decoding failed at field 7".into()).to_string();
+        assert!(bad.starts_with("bad payload: "));
+        assert!(bad.contains("decoding failed at field 7"));
+
+        let wcc = ContribError::WrongChildCount {
+            expected: "exactly 1".into(),
+            actual: 3,
+        }
+        .to_string();
+        assert!(wcc.contains("exactly 1"));
+        assert!(wcc.contains("got 3"));
+    }
+
+    #[test]
+    fn registered_contrib_kinds_reflects_current_state() {
+        let kind = "_test_kinds_snapshot_only";
+        let _guard = ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty));
+        let kinds = registered_contrib_kinds();
+        assert!(
+            kinds.iter().any(|k| k == kind),
+            "expected snapshot to include {kind:?}, got {kinds:?}"
+        );
+    }
+}
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 4fb3ed4c5d..0f42b2a6e1 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -73,6 +73,11 @@ reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-
 object_store_opendal = { version = "0.56.0", optional = true }
 hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
 opendal = { version = "0.56.0", optional = true, features = ["services-hdfs"] }
+# Contrib rlibs. Each is gated by a matching `contrib-<name>` Cargo feature defined
+# below in [features]. When the feature is on, the contrib's rlib is linked into core's
+# cdylib and its #[ctor] runs at library load.
+comet-contrib-spi = { path = "../contrib-spi" }
+comet-contrib-example = { path = "../../contrib/example/native", optional = true }
 iceberg = { workspace = true }
 iceberg-storage-opendal = { workspace = true }
 serde_json = "1.0"
@@ -95,11 +100,23 @@ datafusion-functions-nested = { version = "53.1.0" }
 
 [features]
 backtrace = ["datafusion/backtrace"]
+# Released cdylib ships with hdfs-opendal only -- no contrib surface. This keeps
+# `registered_contrib_kinds()` empty in production so users see only the contribs they
+# explicitly opted into (Delta, Iceberg, ...). CI / dev builds turn on `contrib-example`
+# (and the example's unit tests run under its own crate's test profile, which always
+# links the example regardless of this list).
 default = ["hdfs-opendal"]
 hdfs = ["datafusion-comet-objectstore-hdfs"]
 hdfs-opendal = ["opendal", "object_store_opendal", "hdfs-sys"]
 jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
 
+# Contrib feature flags. Each flag pulls a contrib rlib into core's cdylib so contrib
+# Rust code is linked into the single libcomet at build time; the contrib's #[ctor]
+# registers its operator planners during library init. The single-cdylib architecture
+# (rather than separate cdylib per contrib) is documented in
+# docs/source/contributor-guide/contrib-extensions.md.
+contrib-example = ["dep:comet-contrib-example"]
+
 # exclude optional packages from cargo machete verifications
 [package.metadata.cargo-machete]
 ignored = ["hdfs-sys", "paste"]
diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
index f5b04cc51d..c35a9a6e25 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -232,6 +232,11 @@ fn op_name(op: &OpStruct) -> &'static str {
         OpStruct::Explode(_) => "Explode",
         OpStruct::CsvScan(_) => "CsvScan",
         OpStruct::ShuffleScan(_) => "ShuffleScan",
+        // Contrib operators carry their concrete identity in `ContribOp.kind`, but
+        // `op_name` returns `&'static str` for tracing/error messages. Keep the label
+        // generic here; downstream code that needs the specific contrib reads `kind`
+        // off the proto directly.
+        OpStruct::ContribOp(_) => "ContribOp",
     }
 }
 
diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
index b00f140026..b3b18e9f75 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -17,6 +17,7 @@
 
 //! Converts Spark physical plan to DataFusion physical plan
 
+pub mod contrib;
 pub mod expression_registry;
 pub mod macros;
 pub mod operator_registry;
@@ -1959,6 +1960,83 @@ impl PhysicalPlanner {
                     )),
                 ))
             }
+            OpStruct::ContribOp(contrib_op) => {
+                // Dispatch the ContribOp envelope to a contrib-registered planner keyed
+                // by `kind`. The contrib's #[ctor] in its rlib (linked into core's cdylib
+                // via a Cargo feature flag) populates the registry at lib-init time, so
+                // by the time we reach this arm the registry is already warm. Missing
+                // registrations typically mean the JVM JAR is on the classpath but core
+                // was built without the corresponding `contrib-<name>` Cargo feature.
+                use crate::execution::planner::contrib::{
+                    lookup_contrib_planner_by_kind, CorePlannerContext,
+                };
+                let kind = contrib_op.kind.as_str();
+                if kind.trim().is_empty() {
+                    return Err(GeneralError(format!(
+                        "ContribOp.kind={kind:?} is empty or whitespace -- the JVM-side \
+                         serde produced a malformed envelope (every contrib must set a \
+                         stable kind string)"
+                    )));
+                }
+
+                // Look up the planner first so a bogus kind produces the "not registered"
+                // error rather than a misleading "payload too big" one (in case the kind
+                // is garbage and the payload also happens to be oversized).
+                let planner = lookup_contrib_planner_by_kind(kind).ok_or_else(|| {
+                    GeneralError(format!(
+                        "No contrib planner registered for ContribOp.kind={kind:?}; \
+                         did you build core with the corresponding `contrib-{kind}` \
+                         Cargo feature (or its workspace equivalent)?"
+                    ))
+                })?;
+
+                // Payload-size guard. By the time we get here prost has already decoded
+                // `contrib_op.payload` into a heap-allocated Vec<u8>, so this guard does
+                // NOT fence the proto-decode allocation itself. What it does fence: the
+                // contrib's plan() body from being invoked with an absurd payload, which
+                // typically does its own prost decode against contrib-private types --
+                // potentially several more allocations. 16 MiB is comfortably above any
+                // plausible file-scan payload (Delta with 100k tasks weighs in around
+                // 3-4 MiB) and well below "we should be worried about heap pressure".
+                // Moving the check pre-decode would require a streaming Operator parser;
+                // not worth the complexity given typical payloads are <1 MiB.
+                const MAX_CONTRIB_PAYLOAD_BYTES: usize = 16 * 1024 * 1024;
+                if contrib_op.payload.len() > MAX_CONTRIB_PAYLOAD_BYTES {
+                    return Err(GeneralError(format!(
+                        "ContribOp.kind={kind:?} payload size {} bytes exceeds limit \
+                         of {} bytes -- inspect the contrib's serde for accidental \
+                         data accumulation",
+                        contrib_op.payload.len(),
+                        MAX_CONTRIB_PAYLOAD_BYTES,
+                    )));
+                }
+
+                // Recursively build native children. The contrib gets them as
+                // `Arc<dyn ExecutionPlan>` rather than the richer `SparkPlan` because the
+                // SPI is intentionally minimal — contribs only need the DataFusion-level
+                // plan surface.
+                let mut child_scans: Vec<ScanExec> = Vec::new();
+                let mut child_shuffle_scans: Vec<ShuffleScanExec> = Vec::new();
+                let mut native_children: Vec<Arc<dyn ExecutionPlan>> = Vec::new();
+                for child in &spark_plan.children {
+                    let (mut s, mut ss, child_plan) =
+                        self.create_plan(child, inputs, partition_count)?;
+                    child_scans.append(&mut s);
+                    child_shuffle_scans.append(&mut ss);
+                    native_children.push(child_plan.native_plan.clone());
+                }
+
+                let ctx = CorePlannerContext { planner: self };
+                let exec = planner
+                    .plan(&ctx, &contrib_op.payload, native_children)
+                    .map_err(|e| GeneralError(format!("contrib planner {kind:?}: {e}")))?;
+
+                Ok((
+                    child_scans,
+                    child_shuffle_scans,
+                    Arc::new(SparkPlan::new(spark_plan.plan_id, exec, vec![])),
+                ))
+            }
             _ => Err(GeneralError(format!(
                 "Unsupported or unregistered operator type: {:?}",
                 spark_plan.op_struct
diff --git a/native/core/src/execution/planner/contrib.rs b/native/core/src/execution/planner/contrib.rs
new file mode 100644
index 0000000000..1cf9c15179
--- /dev/null
+++ b/native/core/src/execution/planner/contrib.rs
@@ -0,0 +1,249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Re-exports + core-side `ContribPlannerContext` adapter.
+//!
+//! The SPI trait + registry live in the standalone `comet-contrib-spi` crate so both
+//! core and contribs can depend on them without forming a dependency cycle (core links
+//! contribs via Cargo feature flags, contribs need the SPI types). This module:
+//!
+//!   1. re-exports the parts of the SPI core itself imports, so existing
+//!      `crate::execution::planner::contrib::...` paths keep resolving;
+//!   2. provides `CorePlannerContext`, a thin adapter that lets a `&PhysicalPlanner` be
+//!      passed to contribs as a `&dyn ContribPlannerContext`.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::object_store::ObjectStoreUrl;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_comet_proto::{spark_expression, spark_operator};
+
+pub use comet_contrib_spi::lookup_contrib_planner_by_kind;
+#[allow(unused_imports)] // surfaced for tests + diagnostics
+pub use comet_contrib_spi::{
+    register_contrib_planner, registered_contrib_kinds, ContribError, ContribOperatorPlanner,
+    ContribPlannerContext, ParquetDatasourceParams,
+};
+
+use crate::execution::planner::PhysicalPlanner;
+use crate::parquet::parquet_exec::init_datasource_exec;
+use crate::parquet::parquet_support::prepare_object_store_with_configs;
+
+/// Adapter that exposes a `&PhysicalPlanner` (plus the session_ctx it carries) as a
+/// `ContribPlannerContext`. Construction is cheap -- just borrows the planner. The
+/// dispatcher creates one per ContribOp arm.
+pub(crate) struct CorePlannerContext<'a> {
+    pub(crate) planner: &'a PhysicalPlanner,
+}
+
+impl ContribPlannerContext for CorePlannerContext<'_> {
+    fn session_ctx(&self) -> &Arc<SessionContext> {
+        self.planner.session_ctx()
+    }
+
+    fn build_physical_expr(
+        &self,
+        expr: &spark_expression::Expr,
+        input_schema: SchemaRef,
+    ) -> Result<Arc<dyn PhysicalExpr>, ContribError> {
+        self.planner
+            .create_expr(expr, input_schema)
+            .map_err(|e| ContribError::Plan(format!("create_expr: {e}")))
+    }
+
+    fn convert_spark_schema(&self, fields: &[spark_operator::SparkStructField]) -> SchemaRef {
+        super::convert_spark_types_to_arrow_schema(fields)
+    }
+
+    fn prepare_object_store(
+        &self,
+        url: String,
+        configs: &HashMap<String, String>,
+    ) -> Result<(ObjectStoreUrl, object_store::path::Path), ContribError> {
+        prepare_object_store_with_configs(self.planner.session_ctx().runtime_env(), url, configs)
+            .map_err(|e| ContribError::Plan(format!("prepare_object_store_with_configs: {e}")))
+    }
+
+    fn build_parquet_datasource_exec(
+        &self,
+        params: ParquetDatasourceParams,
+    ) -> Result<Arc<dyn ExecutionPlan>, ContribError> {
+        init_datasource_exec(
+            params.required_schema,
+            params.data_schema,
+            params.partition_schema,
+            params.object_store_url,
+            params.file_groups,
+            params.projection_vector,
+            params.data_filters,
+            params.default_values,
+            &params.session_timezone,
+            params.case_sensitive,
+            params.return_null_struct_if_all_fields_missing,
+            self.planner.session_ctx(),
+            params.encryption_enabled,
+            params.use_field_id,
+            params.ignore_missing_field_id,
+        )
+        .map(|e| e as Arc<dyn ExecutionPlan>)
+        .map_err(|e| ContribError::Plan(format!("init_datasource_exec: {e}")))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::execution::planner::PhysicalPlanner;
+    use datafusion::arrow::datatypes::{DataType, Field, Schema};
+    use datafusion::execution::context::SessionContext;
+    use datafusion::execution::object_store::ObjectStoreUrl;
+
+    /// Production-build assertion: when no contrib feature is enabled, the registry
+    /// must be empty. Catches an accidental re-introduction of a contrib into core's
+    /// `default = [...]` feature set. Compiled out under any active contrib feature
+    /// (the test binary always links its crate's dependencies, so the assertion would
+    /// be wrong under those flags).
+    ///
+    /// MAINTENANCE: when adding a new `contrib-<name>` feature to `native/core/Cargo.toml`,
+    /// extend the `not(any(...))` predicate below with the new feature name so the
+    /// canary still compiles under that contrib's standalone CI matrix entry.
+    #[cfg(not(any(feature = "contrib-example")))]
+    #[test]
+    fn production_build_has_no_contrib_planners_registered() {
+        // Direct read through the SPI's public API. This test is the canary for
+        // the contributor-guide claim that release builds carry zero contrib surface.
+        let kinds = comet_contrib_spi::registered_contrib_kinds();
+        assert!(
+            kinds.is_empty(),
+            "default cdylib leaked contrib planners: {kinds:?}. \
+             Check native/core/Cargo.toml's `default = [...]` for contrib features."
+        );
+    }
+
+    #[test]
+    fn core_planner_context_builds_parquet_exec_with_expected_schema() {
+        // Smoke test for the adapter: build a minimal DataSourceExec through the SPI
+        // trait method and verify the schema flowed through. Catches a coarse class of
+        // bugs where init_datasource_exec call-site args go out of order -- a swap that
+        // sent `required_schema` into the `data_schema` slot would produce a different
+        // output schema.
+        let session_ctx = Arc::new(SessionContext::new());
+        let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0);
+        let ctx = CorePlannerContext { planner: &planner };
+
+        let schema: SchemaRef = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+        let url = ObjectStoreUrl::parse("file://").unwrap();
+        let params = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![])
+            .with_session_timezone("UTC")
+            .with_case_sensitive(true);
+
+        let exec = ctx
+            .build_parquet_datasource_exec(params)
+            .expect("adapter should build a DataSourceExec");
+
+        // The exec's reported schema must equal the required_schema we passed in.
+        let out_schema = exec.schema();
+        assert_eq!(out_schema.fields().len(), 2);
+        assert_eq!(out_schema.field(0).name(), "id");
+        assert_eq!(out_schema.field(1).name(), "name");
+    }
+
+    #[test]
+    fn core_planner_context_session_ctx_round_trip() {
+        let session_ctx = Arc::new(SessionContext::new());
+        let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0);
+        let ctx = CorePlannerContext { planner: &planner };
+        // Arc identity check -- the contrib gets back the same SessionContext core was
+        // built with, not a copy.
+        assert!(Arc::ptr_eq(ctx.session_ctx(), &session_ctx));
+    }
+
+    #[test]
+    fn core_planner_context_converts_empty_schema() {
+        let session_ctx = Arc::new(SessionContext::new());
+        let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0);
+        let ctx = CorePlannerContext { planner: &planner };
+        let schema = ctx.convert_spark_schema(&[]);
+        assert_eq!(schema.fields().len(), 0);
+    }
+
+    #[test]
+    fn core_planner_context_encryption_flag_reaches_init_datasource_exec() {
+        // Cross-crate positional-arg-swap guard. `init_datasource_exec` takes five `bool`
+        // parameters in a row (case_sensitive, return_null_struct_..., encryption_enabled,
+        // use_field_id, ignore_missing_field_id); a swap of two of them at the call site
+        // in `build_parquet_datasource_exec` would compile fine and break silently. We
+        // exploit the asymmetry that `encryption_enabled=true` triggers an encryption-
+        // factory lookup that fails when no factory is registered, while every other
+        // bool being `true` keeps the call succeeding. So:
+        //   * Default (all bools false) -> Ok
+        //   * Same call with `encryption_enabled=true` -> Err on factory lookup
+        // If a swap accidentally routed e.g. `use_field_id` into the encryption slot, the
+        // "default" variant below would fail (because use_field_id is true here in the
+        // params struct, and the swapped slot would now enable encryption).
+        //
+        // SCOPE: this test catches swaps that involve the `encryption_enabled` slot.
+        // Swaps among the other four bools (case_sensitive / return_null_... /
+        // use_field_id / ignore_missing_field_id) are NOT caught -- the two witnesses
+        // below either set all four to true (witness #1) or all four to false
+        // (witness #2), so a permutation among them is invisible. Adding a new bool to
+        // ParquetDatasourceParams / init_datasource_exec should be accompanied by a new
+        // asymmetry witness that exercises THAT new flag.
+        let session_ctx = Arc::new(SessionContext::new());
+        let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0);
+        let ctx = CorePlannerContext { planner: &planner };
+
+        let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new(
+            "id",
+            DataType::Int64,
+            false,
+        )]));
+        let url = ObjectStoreUrl::parse("file://").unwrap();
+
+        // Witness #1: all five bools `true` EXCEPT encryption_enabled. Must succeed --
+        // confirms case_sensitive / use_field_id / etc. are NOT routed into the
+        // encryption slot.
+        let no_encryption = ParquetDatasourceParams::new(Arc::clone(&schema), url.clone(), vec![])
+            .with_case_sensitive(true)
+            .with_return_null_struct_if_all_fields_missing(true)
+            .with_use_field_id(true)
+            .with_ignore_missing_field_id(true)
+            .with_encryption_enabled(false);
+        ctx.build_parquet_datasource_exec(no_encryption)
+            .expect("encryption_enabled=false must not trigger factory lookup");
+
+        // Witness #2: only encryption_enabled is true. Must fail with the encryption-factory
+        // not-found error. Confirms encryption_enabled actually reaches the encryption slot.
+        let with_encryption =
+            ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]).with_encryption_enabled(true);
+        let err = ctx
+            .build_parquet_datasource_exec(with_encryption)
+            .expect_err("encryption_enabled=true should fail without a factory");
+        let msg = format!("{err}");
+        assert!(
+            msg.contains("encryption") || msg.contains("Encryption") || msg.contains("factory"),
+            "expected encryption-factory error, got: {msg}"
+        );
+    }
+}
diff --git a/native/core/src/execution/planner/operator_registry.rs b/native/core/src/execution/planner/operator_registry.rs
index eb31184461..302c3c9489 100644
--- a/native/core/src/execution/planner/operator_registry.rs
+++ b/native/core/src/execution/planner/operator_registry.rs
@@ -151,5 +151,34 @@ fn get_operator_type(spark_operator: &Operator) -> Option<OperatorType> {
         OpStruct::Explode(_) => None, // Not yet in OperatorType enum
         OpStruct::CsvScan(_) => Some(OperatorType::CsvScan),
         OpStruct::ShuffleScan(_) => None, // Not yet in OperatorType enum
+        // Contrib operators go through the contrib registry instead, keyed by
+        // ContribOp.kind. Returning None here keeps `OperatorRegistry::can_handle` false
+        // for contribs so they don't get caught by the in-tree registry; the dispatch
+        // arm in `planner.rs` for OpStruct::ContribOp handles them explicitly via
+        // `lookup_contrib_planner_by_kind`.
+        OpStruct::ContribOp(_) => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use datafusion_comet_proto::spark_operator::{operator::OpStruct, ContribOp, Operator};
+
+    #[test]
+    fn contrib_op_is_not_handled_by_in_tree_registry() {
+        // Guard against a future refactor that wires ContribOp into the in-tree
+        // operator registry by accident (which would double-dispatch contribs).
+        let op = Operator {
+            op_struct: Some(OpStruct::ContribOp(ContribOp {
+                kind: "anything".into(),
+                payload: vec![],
+            })),
+            ..Default::default()
+        };
+        assert!(
+            get_operator_type(&op).is_none(),
+            "ContribOp must not be mapped to an in-tree OperatorType"
+        );
     }
 }
diff --git a/native/core/src/lib.rs b/native/core/src/lib.rs
index 7d0b6a5454..4d74a7f52f 100644
--- a/native/core/src/lib.rs
+++ b/native/core/src/lib.rs
@@ -29,6 +29,13 @@ extern crate core;
 #[macro_use]
 extern crate datafusion_comet_jni_bridge;
 
+// Pull in contrib crates so their #[ctor] registration runs when libcomet is loaded.
+// Each is gated by a Cargo feature flag (see `[features]` in core's Cargo.toml). With the
+// feature off the `extern crate` line is removed by cfg and zero bytes of the contrib end
+// up in the built cdylib.
+#[cfg(feature = "contrib-example")]
+extern crate comet_contrib_example;
+
 use jni::{
     objects::{JClass, JString},
     EnvUnowned,
diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto
index 7cefe06da7..9e1f1b1767 100644
--- a/native/proto/src/proto/operator.proto
+++ b/native/proto/src/proto/operator.proto
@@ -53,9 +53,34 @@ message Operator {
     Explode explode = 114;
     CsvScan csv_scan = 115;
     ShuffleScan shuffle_scan = 116;
+    // Generic envelope for operators contributed by an extension. The contrib's JVM-side
+    // serde fills the payload with a contrib-private proto message; core's native planner
+    // dispatches to a `ContribOperatorPlanner` registered (at lib-init time) by the
+    // contrib's Rust crate, keyed by `kind`. Lets core stay format-agnostic while contrib
+    // authors evolve their own wire format on a separate cadence.
+    ContribOp contrib_op = 117;
   }
 }
 
+// Envelope for a contrib-contributed operator. Core's native planner dispatches by `kind`
+// to a `ContribOperatorPlanner` registered at library-init time (see
+// `native/core/src/execution/planner/contrib.rs`); the `payload` is opaque to core --
+// the contrib's planner decodes it into its own proto type. Each contrib ships:
+//   * a JVM JAR (Scala extension code + ServiceLoader entry), discovered via classpath
+//   * a Rust crate (rlib) compiled INTO core's cdylib via a Cargo feature flag on core,
+//     not shipped as a separate cdylib
+// Reusing the same envelope for every contrib keeps core's proto stable when contribs
+// ship/evolve independently.
+message ContribOp {
+  // Stable identifier the contrib registered under (e.g. "delta-scan", "example-constant").
+  string kind = 1;
+  // Contrib-private payload bytes. Format defined by the contrib's own proto schema.
+  bytes payload = 2;
+  // Reserve tags for future additive evolution (e.g. payload_format_version, compression,
+  // contrib_version) without risking accidental tag reuse by an evolving contrib.
+  reserved 3 to 9;
+}
+
 message SparkPartitionedFile {
   string file_path = 1;
   int64 start = 2;
diff --git a/pom.xml b/pom.xml
index 7419fecc92..e6855e1805 100644
--- a/pom.xml
+++ b/pom.xml
@@ -38,6 +38,23 @@ under the License.
     <module>common</module>
     <module>spark</module>
     <module>spark-integration</module>
+    <!--
+      Each contrib has a deps-only Maven pom (packaging=pom, no code, no shading).
+      The pom exists ONLY to declare the contrib's external Maven dependencies
+      (e.g. delta-spark for the Delta contrib). The contrib's Scala/Java sources
+      are compiled INTO comet-spark.jar by the matching `-Pcontrib-<name>` profile
+      on spark/pom.xml, with the deps pom pulled in via `<type>pom</type>` so its
+      transitive deps land on spark's compile classpath.
+
+      No reactor cycle: the contrib pom has no `<dependency>` on `comet-spark`;
+      it's a leaf list of external deps. spark/pom.xml depends on the contrib pom
+      (one direction).
+
+      The contrib's Rust rlib under contrib/<name>/native/ is similarly compiled
+      into libcomet via the matching Cargo feature on native/core. See
+      docs/source/contributor-guide/contrib-extensions.md.
+    -->
+    <module>contrib/example</module>
   </modules>
 
   <properties>
@@ -1151,6 +1168,14 @@ under the License.
             <exclude>dev/release/rat_exclude_files.txt</exclude>
             <exclude>dev/release/requirements.txt</exclude>
             <exclude>native/proto/src/generated/**</exclude>
+            <!-- Contrib build outputs: prost-build's generated Rust sits under
+                 contrib/<name>/native/src/generated/, and the META-INF/services
+                 ServiceLoader entries are plain-text class names with no comment
+                 syntax that supports a license header. Patterns are glob-style
+                 (no leading `/`) so they match against the path RAT sees
+                 regardless of which module the plugin runs against. -->
+            <exclude>**/native/src/generated/**</exclude>
+            <exclude>**/META-INF/services/**</exclude>
             <exclude>benchmarks/tpc/queries/**</exclude>
             <exclude>.claude/**</exclude>
           </excludes>
diff --git a/spark/pom.xml b/spark/pom.xml
index d3c18ccf87..f26f2f50bc 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -351,6 +351,124 @@ under the License.
         </plugins>
       </build>
     </profile>
+
+    <!--
+      Contrib bundling profiles. Symmetric with native/core/Cargo.toml's `contrib-<name>`
+      Cargo features: activating the profile compiles the contrib's Scala/Java sources
+      into comet-spark.jar (along with its META-INF/services entries and its
+      contrib-private generated proto classes). With no profile active, the contrib's
+      sources sit under contrib/<name>/ but nothing references them.
+
+      No separate Maven module per contrib; the contrib lives as a source directory
+      under contrib/<name>/, mirroring how contrib/<name>/native/ is an rlib crate
+      whose sources are compiled INTO libcomet rather than into a separate cdylib.
+      This avoids a circular Maven dep (the contrib's Scala extends SPI traits in
+      comet-spark; comet-spark would consume the contrib in turn).
+
+      Adding a new contrib? Copy the contrib-example block below, swap `example` for
+      your contrib's name, and ensure the source/resource/proto paths point at your
+      contrib's directory.
+    -->
+    <profile>
+      <id>contrib-example</id>
+      <!--
+        Pull the contrib's external Maven deps onto comet-spark's compile classpath.
+        comet-contrib-<name>-deps is a `<packaging>pom</packaging>` artifact whose
+        sole purpose is to enumerate those deps; we depend on it as `<type>pom</type>`
+        so Maven resolves its transitive deps. The example contrib's deps pom is
+        empty (no external deps); real contribs add their deps there.
+      -->
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.datafusion</groupId>
+          <artifactId>comet-contrib-example-deps-spark${spark.version.short}_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+          <type>pom</type>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <!--
+            Inject the contrib's Scala sources into comet-spark's compile. The shim
+            module already uses this pattern (see common/pom.xml's add-shim-source
+            execution) for per-Spark-version source roots.
+          -->
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-contrib-example-source</id>
+                <phase>generate-sources</phase>
+                <goals><goal>add-source</goal></goals>
+                <configuration>
+                  <sources>
+                    <source>../contrib/example/src/main/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+              <execution>
+                <id>add-contrib-example-test-source</id>
+                <phase>generate-test-sources</phase>
+                <goals><goal>add-test-source</goal></goals>
+                <configuration>
+                  <sources>
+                    <source>../contrib/example/src/test/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <!--
+            Add the contrib's META-INF/services/ resources to comet-spark's resource
+            roots. The shade execution's ServicesResourceTransformer merges them with
+            any existing entries.
+          -->
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-resources-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>copy-contrib-example-resources</id>
+                <phase>process-resources</phase>
+                <goals><goal>copy-resources</goal></goals>
+                <configuration>
+                  <outputDirectory>${project.build.outputDirectory}</outputDirectory>
+                  <resources>
+                    <resource>
+                      <directory>../contrib/example/src/main/resources</directory>
+                    </resource>
+                  </resources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <!--
+            Generate the contrib's Java protos into comet-spark's target/. The contrib's
+            own .proto schema lives under contrib/example/native/src/proto/ alongside
+            the Rust-side schema (one source of truth). The contrib's classes can then
+            import comet.contrib.example.* generated classes directly.
+          -->
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>generate-contrib-example-proto</id>
+                <phase>generate-sources</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <inputDirectories>
+                    <include>../contrib/example/native/src/proto</include>
+                  </inputDirectories>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
   <build>
@@ -435,6 +553,16 @@ under the License.
                   <shadedPattern>${comet.shade.packageName}.guava.thirdparty</shadedPattern>
                 </relocation>
               </relocations>
+              <!--
+                Merge META-INF/services/ entries across the artifacts being shaded
+                rather than letting the last one win. This is what makes a
+                ServiceLoader-discoverable contrib's META-INF/services file survive
+                being bundled into comet-spark.jar via the contrib-<name> profile
+                below.
+              -->
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              </transformers>
             </configuration>
           </execution>
         </executions>
diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
index 679005d9b1..311d2d2a6f 100644
--- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
+++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
@@ -87,6 +87,10 @@ class CometSparkSessionExtensions
     with Logging
     with ShimCometSparkSessionExtensions {
   override def apply(extensions: SparkSessionExtensions): Unit = {
+    // Note: contrib extension discovery happens lazily inside CometScanRule /
+    // CometExecRule (the first time either runs against a Comet-enabled session).
+    // Sessions that never enable Comet pay zero ServiceLoader cost.
+
     extensions.injectColumnar { session => CometScanColumnar(session) }
     extensions.injectColumnar { session => CometExecColumnar(session) }
     // Pre-3.5 only: tag AQE DPP regions so the conversion rules below leave them Spark-native.
diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala
index 72c2bea9e4..1a4a916f8f 100644
--- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala
+++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala
@@ -54,6 +54,7 @@ import org.apache.comet.rules.CometExecRule.allExecs
 import org.apache.comet.serde._
 import org.apache.comet.serde.operator._
 import org.apache.comet.shims.{ShimCometStreaming, ShimSubqueryBroadcast}
+import org.apache.comet.spi.CometExtensionRegistry
 
 object CometExecRule {
 
@@ -270,6 +271,27 @@ case class CometExecRule(session: SparkSession)
       case scan: CometBatchScanExec if scan.wrapped.scan.isInstanceOf[CSVScan] =>
         convertToComet(scan, CometCsvNativeScanExec).getOrElse(scan)
 
+      // Contrib marker dispatch: a `CometScanExec` tagged with a contrib's `scanImpl`
+      // (i.e. listed in `nativeParquetScanImpls`) is routed through that contrib's
+      // `matchOperator` serde rather than the generic `CometScanWrapper` below. Without
+      // this, the marker would only get JVM-side parquet bytes-reuse, never reaching the
+      // contrib's `serialize` and therefore missing format-specific concerns -- e.g.
+      // Delta column-mapping physical-name substitution and `InputFileBlockHolder`
+      // population for `input_file_name()` / `_metadata.file_path`. If the contrib's
+      // `matchOperator` chooses to return None (e.g. the contrib only wants the full
+      // native conversion for certain scans), the marker falls back to the generic
+      // wrapper path -- which is also what happens when conversion itself returns None.
+      case scan: CometScanExec
+          if CometExtensionRegistry.nativeParquetScanImpls.contains(scan.scanImpl) =>
+        val handler = CometExtensionRegistry.serdeExtensions.iterator
+          .flatMap(_.matchOperator(scan))
+          .nextOption()
+          .map(_.asInstanceOf[CometOperatorSerde[SparkPlan]])
+        handler match {
+          case Some(h) => convertToComet(scan, h).getOrElse(scan)
+          case None => convertToComet(scan, CometScanWrapper).getOrElse(scan)
+        }
+
       // Comet JVM + native scan for V1 and V2
       case op if isCometScan(op) =>
         convertToComet(op, CometScanWrapper).getOrElse(op)
@@ -349,8 +371,27 @@ case class CometExecRule(session: SparkSession)
         // if all children are native (or if this is a leaf node) then see if there is a
         // registered handler for creating a fully native plan
         if (op.children.forall(_.isInstanceOf[CometNativeExec])) {
+          // Contrib SPI: each registered CometOperatorSerdeExtension contributes a
+          // SparkPlan-class -> CometOperatorSerde map. The merged map is pre-computed
+          // once at registry load time (CometExtensionRegistry.mergedSerdes) so we
+          // don't rebuild a HashMap on every operator transform. Contribs own classes
+          // that aren't in `allExecs`, so this merge never overrides a core mapping in
+          // practice; duplicate-class detection at load() time logs a warning if it
+          // does happen.
+          // Three-step dispatch:
+          //   1. core's built-in class-keyed map (allExecs)
+          //   2. contrib serde-extensions' class-keyed map (mergedSerdes)
+          //   3. contrib serde-extensions' predicate-based matchOperator hook
+          //      (for marker-class patterns where one shared SparkPlan class --
+          //      e.g. CometScanExec -- is disambiguated by a runtime tag)
           val handler = allExecs
             .get(op.getClass)
+            .orElse(CometExtensionRegistry.mergedSerdes.get(op.getClass))
+            .orElse {
+              CometExtensionRegistry.serdeExtensions.iterator
+                .flatMap(_.matchOperator(op))
+                .nextOption()
+            }
             .map(_.asInstanceOf[CometOperatorSerde[SparkPlan]])
           handler match {
             case Some(handler) =>
@@ -545,6 +586,12 @@ case class CometExecRule(session: SparkSession)
     // We shouldn't transform Spark query plan if Comet is not loaded.
     if (!isCometLoaded(conf)) return plan
 
+    // Lazy contrib discovery. Mirrors the call in CometScanRule._apply -- either rule may
+    // be the first to run depending on which path of the plan tree fires first. load() is
+    // idempotent (AtomicBoolean gate), so the duplicate call is a no-op in steady state
+    // but makes each rule self-contained instead of relying on CometScanRule running first.
+    CometExtensionRegistry.load()
+
     // Comet does not support structured streaming. Fall back to Spark for any plan that
     // belongs to a streaming query (detected via StreamSourceAwareSparkPlan.getStream).
     if (ShimCometStreaming.isStreamingPlan(plan)) return plan
diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
index 64b69be1e9..64d555a80f 100644
--- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
+++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
@@ -50,6 +50,7 @@ import org.apache.comet.parquet.CometParquetUtils.{encryptionEnabled, isEncrypti
 import org.apache.comet.parquet.Native
 import org.apache.comet.serde.operator.{CometIcebergNativeScan, CometNativeScan}
 import org.apache.comet.shims.{CometTypeShim, ShimCometStreaming, ShimFileFormat, ShimSubqueryBroadcast}
+import org.apache.comet.spi.CometExtensionRegistry
 
 /**
  * Spark physical optimizer rule for replacing Spark scans with Comet scans.
@@ -75,6 +76,11 @@ case class CometScanRule(session: SparkSession)
   private def _apply(plan: SparkPlan): SparkPlan = {
     if (!isCometLoaded(conf)) return plan
 
+    // Lazy contrib discovery: by the time we get here Comet is enabled. load() is
+    // idempotent so subsequent invocations across plans / sessions are free. Sessions
+    // that never reach this point pay zero ServiceLoader cost.
+    CometExtensionRegistry.load()
+
     // Comet does not support structured streaming. The parallel guard in
     // CometExecRule only stops operator wrapping, so without this check we
     // would still rewrite scans to CometScanExec in a streaming plan.
@@ -114,7 +120,71 @@ case class CometScanRule(session: SparkSession)
       metadataTableSuffix.exists(suffix => scanExec.table.name().endsWith(suffix))
     }
 
-    val fullPlan = plan
+    // Contrib SPI tree-level pre-pass. Each registered extension gets a chance to rewrite
+    // the whole plan tree before per-scan dispatch begins. Used by contribs that need to
+    // undo wrapper rewrites from their own Catalyst strategies (Delta's
+    // `PreprocessTableWithDVs` is the canonical case). Fold in registration order so
+    // contribs see each other's outputs deterministically. Extensions that don't override
+    // `preTransform` inherit the trait's identity default -- zero overhead.
+    //
+    // Gated on COMET_NATIVE_SCAN_ENABLED: if the user has disabled Comet scan, the
+    // contribs' Catalyst wrappers (Delta's DV filter, etc.) are load-bearing and stripping
+    // them turns into a correctness bug. Leave the plan tree as Spark wrote it.
+    //
+    // Corruption guard: snapshot every FileSourceScanExec the extension does NOT claim
+    // before the pass, and verify each one is still present (by reference) afterwards.
+    // Contribs' `preTransform` MUST only rewrite scans they recognise; this guard catches
+    // the most dangerous violation (a contrib stripping or substituting an unrelated
+    // format's scan) regardless of whether the replacement keeps the same SparkPlan
+    // class. Plan-tree reordering is tolerated -- we only care that the scan still
+    // exists in the tree, not where.
+    //
+    // Cost: O(K * (P + S)) where K = scanExtensions.size, P = plan node count,
+    // S = scan count. IdentityHashMap gives O(1) survivor lookup; the dominant term
+    // is the tree traversals. For typical K=1..3 this is negligible.
+    //
+    // V2 scope: V2 BatchScanExecs are NOT inspected. preTransform is documented V1-only
+    // (see CometScanRuleExtension.preTransform); V2 wrapper-stripping happens per-scan
+    // inside `transformV2` and doesn't have the same tree-level corruption surface.
+    val prepped =
+      if (!CometConf.COMET_NATIVE_SCAN_ENABLED.get(conf)) {
+        plan
+      } else {
+        CometExtensionRegistry.scanExtensions.foldLeft(plan) { (p, ext) =>
+          val unclaimedBefore = p.collect {
+            case s: FileSourceScanExec if !ext.matchesV1(s.relation) => s
+          }
+          val after = ext.preTransform(p, session)
+          if (unclaimedBefore.nonEmpty) {
+            // IDENTITY semantics, NOT value-equality: Spark case classes (including
+            // FileSourceScanExec) compare equal when their fields match, so a self-join
+            // with two reads against the same table after AQE deduplication can produce
+            // two value-equal-but-reference-distinct scans. A standard mutable.Set would
+            // collapse them and we'd emit a false-positive warning. IdentityHashMap
+            // gives us O(1) lookup with reference-equality semantics.
+            val survivors =
+              new java.util.IdentityHashMap[FileSourceScanExec, java.lang.Boolean]()
+            after.foreach {
+              case s: FileSourceScanExec => survivors.put(s, java.lang.Boolean.TRUE)
+              case _ =>
+            }
+            unclaimedBefore.foreach { b =>
+              if (!survivors.containsKey(b)) {
+                logWarning(
+                  s"CometScanRuleExtension '${ext.name}'.preTransform removed or " +
+                    s"replaced a FileSourceScanExec it does not claim " +
+                    s"(matchesV1=false on its relation, ${b.relation.fileFormat}). " +
+                    s"This is a contract violation -- preTransform must only rewrite " +
+                    s"scans the extension recognises. See " +
+                    s"CometScanRuleExtension.preTransform doc.")
+              }
+            }
+          }
+          after
+        }
+      }
+
+    val fullPlan = prepped
 
     def transformScan(scanNode: SparkPlan): SparkPlan = scanNode match {
       // Tagged by CometSpark34AqeDppFallbackRule on Spark < 3.5 to keep a peer scan
@@ -141,7 +211,7 @@ case class CometScanRule(session: SparkSession)
         }
     }
 
-    plan.transform {
+    prepped.transform {
       case scan if isSupportedScanNode(scan) => transformScan(scan)
     }
   }
@@ -161,6 +231,27 @@ case class CometScanRule(session: SparkSession)
       return withInfo(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+")
     }
 
+    // Contrib SPI dispatch: offer the scan to every registered CometScanRuleExtension
+    // before core's built-in file-format logic. Loop in registration order; the FIRST
+    // extension whose `matchesV1` returns true AND whose `transformV1` returns Some(_)
+    // wins -- its replacement plan is returned. An extension that returns None from
+    // `transformV1` means "I match this scan shape but decline to accelerate this
+    // specific instance"; the loop continues to the next extension before falling back
+    // to core's built-in file-format logic. This lets multiple contribs coexist (e.g.
+    // Iceberg + Delta both loaded) without one's decline silently masking another.
+    scanExec.relation match {
+      case r: HadoopFsRelation =>
+        val replacement = CometExtensionRegistry.scanExtensions.iterator
+          .filter(_.matchesV1(r))
+          .flatMap(ext => ext.transformV1(plan, scanExec, session))
+          .nextOption()
+        replacement match {
+          case Some(plan) => return plan
+          case None => // no extension produced a replacement; fall through
+        }
+      case _ => // SPI only operates on HadoopFsRelation V1 scans
+    }
+
     scanExec.relation match {
       case r: HadoopFsRelation =>
         if (!CometScanExec.isFileFormatSupported(r.fileFormat)) {
@@ -259,6 +350,18 @@ case class CometScanRule(session: SparkSession)
 
   private def transformV2Scan(scanExec: BatchScanExec): SparkPlan = {
 
+    // Contrib SPI dispatch (V2): mirrors transformV1Scan. Loop in registration order;
+    // first matching extension whose transformV2 returns Some wins. Decline = continue
+    // to next extension.
+    val replacement = CometExtensionRegistry.scanExtensions.iterator
+      .filter(_.matchesV2(scanExec))
+      .flatMap(ext => ext.transformV2(scanExec, session))
+      .nextOption()
+    replacement match {
+      case Some(plan) => return plan
+      case None => // no extension produced a replacement; fall through
+    }
+
     scanExec.scan match {
       case scan: CSVScan if COMET_CSV_V2_NATIVE_ENABLED.get() =>
         val fallbackReasons = new ListBuffer[String]()
@@ -677,33 +780,14 @@ case class CometScanRule(session: SparkSession)
       case _ => false
     }
 
+  // Delegate to the companion object's pure helper so the implementation lives in one
+  // place. Kept as a class-level method so existing in-class callers (transformV1Scan,
+  // transformV2Scan) compile unchanged.
   private def isSchemaSupported(
       scanExec: FileSourceScanExec,
       scanImpl: String,
-      r: HadoopFsRelation): Boolean = {
-    val fallbackReasons = new ListBuffer[String]()
-    val typeChecker = CometScanTypeChecker(scanImpl)
-    val schemaSupported =
-      typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
-    if (!schemaSupported) {
-      withInfo(
-        scanExec,
-        s"Unsupported schema ${scanExec.requiredSchema} " +
-          s"for $scanImpl: ${fallbackReasons.mkString(", ")}")
-      return false
-    }
-    val partitionSchemaSupported =
-      typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons)
-    if (!partitionSchemaSupported) {
-      withInfo(
-        scanExec,
-        s"Unsupported partitioning schema ${scanExec.requiredSchema} " +
-          s"for $scanImpl: ${fallbackReasons
-              .mkString(", ")}")
-      return false
-    }
-    true
-  }
+      r: HadoopFsRelation): Boolean =
+    CometScanRule.isSchemaSupported(scanExec, scanImpl, r)
 }
 
 case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with CometTypeShim {
@@ -743,6 +827,39 @@ case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with C
 
 object CometScanRule extends Logging {
 
+  /**
+   * Schema-support check + fallback-reason emission, callable from contrib extensions under
+   * `org.apache.comet.contrib.*`. Pure function; no shared state with CometScanRule instances.
+   * `private[comet]` keeps it out of the public API while letting subpackages (contribs) reach
+   * it.
+   */
+  private[comet] def isSchemaSupported(
+      scanExec: FileSourceScanExec,
+      scanImpl: String,
+      r: HadoopFsRelation): Boolean = {
+    val fallbackReasons = new ListBuffer[String]()
+    val typeChecker = CometScanTypeChecker(scanImpl)
+    val schemaSupported =
+      typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
+    if (!schemaSupported) {
+      org.apache.comet.CometSparkSessionExtensions.withInfo(
+        scanExec,
+        s"Unsupported schema ${scanExec.requiredSchema} " +
+          s"for $scanImpl: ${fallbackReasons.mkString(", ")}")
+      return false
+    }
+    val partitionSchemaSupported =
+      typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons)
+    if (!partitionSchemaSupported) {
+      org.apache.comet.CometSparkSessionExtensions.withInfo(
+        scanExec,
+        s"Unsupported partitioning schema ${scanExec.requiredSchema} " +
+          s"for $scanImpl: ${fallbackReasons.mkString(", ")}")
+      return false
+    }
+    true
+  }
+
   /**
    * Tag set on a scan (`FileSourceScanExec` or `BatchScanExec`) that should be left as a plain
    * Spark scan rather than converted to a Comet scan. Written by
diff --git a/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala b/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala
new file mode 100644
index 0000000000..e8ab6fc2f6
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.spi
+
+import java.util.ServiceLoader
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.internal.Logging
+
+/**
+ * Process-wide singleton that exposes the contrib extensions bundled into comet-spark.jar.
+ *
+ * Discovery uses `java.util.ServiceLoader` against `META-INF/services/` entries inside
+ * comet-spark.jar. Those entries get there at build time: each contrib (under `contrib/<name>/`)
+ * carries its own `META-INF/services/` files, and the `contrib-<name>` Maven profile on
+ * spark/pom.xml shades the contrib's classes plus those service entries into the published
+ * comet-spark.jar. A vanilla `mvn install` produces a comet-spark.jar with zero contribs; a `mvn
+ * install -Pcontrib-example` build bundles the example contrib. The native side mirrors this
+ * exactly via `--features contrib-example` on the Rust core crate.
+ *
+ * Discovery is idempotent: the first `load()` call enumerates the service entries; subsequent
+ * calls are no-ops. `load()` is invoked lazily from `CometScanRule._apply` and
+ * `CometExecRule._apply` the first time either rule runs against a Comet-enabled session. Spark
+ * sessions that never enable Comet pay zero ServiceLoader cost.
+ *
+ * Failures to instantiate individual extensions are logged at WARN but do NOT fail Comet startup
+ * -- a misconfigured contrib shouldn't take down the whole Spark session.
+ */
+object CometExtensionRegistry extends Logging {
+
+  private val loaded = new AtomicBoolean(false)
+  @volatile private var scanExts: Seq[CometScanRuleExtension] = Seq.empty
+  @volatile private var serdeExts: Seq[CometOperatorSerdeExtension] = Seq.empty
+
+  /**
+   * Discover contrib extensions on the classpath. Idempotent. Safe to call from multiple threads
+   * (only the first call performs discovery).
+   */
+  def load(): Unit = synchronized {
+    // `synchronized` (not just compareAndSet) so that concurrent callers wait for the
+    // first thread's writes to `scanExts` / `serdeExts` / `mergedSerdesCache` to publish
+    // before they return. The previous AtomicBoolean-only gate allowed thread B to
+    // observe `loaded=true` and read `Seq.empty` while thread A was still mid-loadOne.
+    // CometScanRule._apply and CometExecRule._apply both call this on first invocation,
+    // and AQE can run them concurrently across sub-queries, so the race is reachable.
+    //
+    // Contribs MUST NOT call `load()` from a `#[ctor]`-equivalent (JVM-side: a class's
+    // static initializer or trait's `object` init) -- Scala monitors are reentrant so
+    // re-entry won't deadlock, but the inner call would observe the partially-built
+    // state and re-trigger `loadOne`, shadowing the in-flight publication.
+    if (loaded.get()) return
+    val newScanExts = loadOne[CometScanRuleExtension]("CometScanRuleExtension")
+    val newSerdeExts = loadOne[CometOperatorSerdeExtension]("CometOperatorSerdeExtension")
+    val newMerged = newSerdeExts.flatMap(_.serdes).toMap
+    val newNativeParquetTags = newSerdeExts.flatMap(_.nativeParquetScanImpls).toSet
+    // Publish the @volatile fields BEFORE flipping `loaded` so other threads either see
+    // the empty defaults (and may re-enter -- benign, blocked by the monitor) or the
+    // fully-populated state (and may skip -- also benign).
+    scanExts = newScanExts
+    serdeExts = newSerdeExts
+    mergedSerdesCache = newMerged
+    nativeParquetScanImplsCache = newNativeParquetTags
+    loaded.set(true)
+    // Call `init()` AFTER publishing the volatile fields and flipping `loaded`. This lets
+    // an extension's `init` synchronously call back into the registry (e.g. to read its
+    // sibling extensions) without observing a half-built state, and it lets `init` register
+    // executor-side callbacks on `CometExecRDD` without racing the first compute call.
+    // Failures are isolated per extension so one broken contrib doesn't take down the others.
+    newSerdeExts.foreach { ext =>
+      try ext.init()
+      catch {
+        case scala.util.control.NonFatal(e) =>
+          logWarning(s"CometOperatorSerdeExtension '${ext.name}' init failed; continuing", e)
+      }
+    }
+    if (newScanExts.nonEmpty || newSerdeExts.nonEmpty) {
+      logInfo(
+        s"Comet contrib extensions loaded: " +
+          s"scan=[${newScanExts.map(_.name).mkString(", ")}], " +
+          s"serde=[${newSerdeExts.map(_.name).mkString(", ")}]")
+      detectDuplicateSerdeClasses(newSerdeExts)
+    } else {
+      // Positive signal that discovery ran. Comet-spark.jar's contrib content depends on
+      // which `-Pcontrib-<name>` Maven profiles were active at build time; this line is
+      // what tells a user whose contrib went missing whether to suspect their Comet build
+      // or their classpath.
+      logInfo(
+        "Comet contrib extensions: none discovered. comet-spark.jar was built " +
+          "without any contrib profiles enabled, or the contrib's META-INF/services " +
+          "entries were not bundled correctly.")
+    }
+  }
+
+  /** Registered scan-rule extensions, in classpath discovery order. */
+  def scanExtensions: Seq[CometScanRuleExtension] = scanExts
+
+  /** Registered operator-serde extensions, in classpath discovery order. */
+  def serdeExtensions: Seq[CometOperatorSerdeExtension] = serdeExts
+
+  /**
+   * Pre-merged serde map across all registered extensions, keyed by the `Class[_ <: SparkPlan]`
+   * the contrib uses for class-keyed dispatch in `CometExecRule`. Computed once at `load()` time;
+   * an empty map until `load()` has run.
+   */
+  def mergedSerdes: Map[
+    Class[_ <: org.apache.spark.sql.execution.SparkPlan],
+    org.apache.comet.serde.CometOperatorSerde[_]] = mergedSerdesCache
+
+  @volatile private var mergedSerdesCache: Map[
+    Class[_ <: org.apache.spark.sql.execution.SparkPlan],
+    org.apache.comet.serde.CometOperatorSerde[_]] = Map.empty
+
+  /**
+   * Union of every registered extension's `nativeParquetScanImpls`. Consumed by
+   * `CometScanExec.supportedDataFilters` to decide whether the marker scan's filter set should
+   * get the same native-parquet exclusions as `SCAN_NATIVE_DATAFUSION`. Computed once at `load()`
+   * time; empty until `load()` has run.
+   */
+  def nativeParquetScanImpls: Set[String] = nativeParquetScanImplsCache
+
+  @volatile private var nativeParquetScanImplsCache: Set[String] = Set.empty
+
+  /**
+   * Log a warning when two registered contribs claim the same `Class[_ <: SparkPlan]` for serde
+   * dispatch. The convention documented in `contrib-extensions.md` is that each contrib defines
+   * its own exec class and registers a serde keyed on that class; a collision usually means a
+   * contrib subclassed a core exec by mistake.
+   *
+   * Detection only -- the last-write-wins toMap behavior stands. We log so the user has a chance
+   * to notice; preventing the override would be a harder migration path (silent drop of one
+   * contrib's exec).
+   */
+  private def detectDuplicateSerdeClasses(exts: Seq[CometOperatorSerdeExtension]): Unit = {
+    val perClassOwners = scala.collection.mutable.Map
+      .empty[
+        Class[_ <: org.apache.spark.sql.execution.SparkPlan],
+        scala.collection.mutable.ArrayBuffer[String]]
+    exts.foreach { ext =>
+      ext.serdes.keys.foreach { cls =>
+        perClassOwners
+          .getOrElseUpdate(cls, scala.collection.mutable.ArrayBuffer.empty)
+          .+=(ext.name)
+      }
+    }
+    perClassOwners.foreach { case (cls, owners) =>
+      if (owners.size > 1) {
+        logWarning(
+          s"Multiple Comet contrib extensions claim the same exec class " +
+            s"${cls.getName}: [${owners.mkString(", ")}]. Last-write-wins; " +
+            s"this usually indicates a contrib has subclassed a core or " +
+            s"another contrib's exec instead of defining its own.")
+      }
+    }
+  }
+
+  /**
+   * Test-only: reset the registry to the empty state. Lets unit tests re-run discovery with a
+   * different classpath / overridden services. Not for production use.
+   *
+   * Visibility is `public` (rather than `private[comet]`) because contribs are not required to be
+   * packaged under `org.apache.comet.*`; a contrib living under e.g. `io.delta.comet.contrib`
+   * must still be able to reset between tests. The method's name carries the "test-only" contract
+   * by convention.
+   */
+  def resetForTesting(): Unit = synchronized {
+    // synchronized so concurrent `load()` callers don't observe torn state -- e.g.
+    // `loaded=false` with `scanExts` still populated, which would let a subsequent
+    // `load()` short-circuit on the AtomicBoolean and never re-discover.
+    loaded.set(false)
+    scanExts = Seq.empty
+    serdeExts = Seq.empty
+    mergedSerdesCache = Map.empty
+    nativeParquetScanImplsCache = Set.empty
+    // Also clear any executor-side callbacks registered via the SPI's `init` hook so the
+    // next `load()` re-registers from scratch. Without this the test that exercises
+    // `resetForTesting` + `load` would accumulate handlers across reset boundaries.
+    org.apache.spark.sql.comet.CometExecRDD.clearPartitionMetadataHandlers()
+    org.apache.spark.sql.comet.PlanDataInjector.clearContribInjectors()
+  }
+
+  private def loadOne[T](label: String)(implicit ct: scala.reflect.ClassTag[T]): Seq[T] = {
+    val cls = ct.runtimeClass.asInstanceOf[Class[T]]
+    val loader = Option(Thread.currentThread().getContextClassLoader)
+      .getOrElse(getClass.getClassLoader)
+    try {
+      val it = ServiceLoader.load(cls, loader).iterator().asScala
+      val out = scala.collection.mutable.ArrayBuffer.empty[T]
+      while (it.hasNext) {
+        // Pull each extension under its own try so one broken contrib doesn't sink the
+        // rest of the registry. ServiceLoader.next() can throw if the extension fails to
+        // instantiate (missing class, ctor exception, etc.).
+        try out += it.next()
+        catch {
+          case scala.util.control.NonFatal(e) =>
+            logWarning(s"Failed to load a $label entry; skipping: ${e.getMessage}", e)
+        }
+      }
+      out.toSeq
+    } catch {
+      case scala.util.control.NonFatal(e) =>
+        logWarning(s"$label discovery failed; no contrib extensions of this kind loaded", e)
+        Seq.empty
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala b/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala
new file mode 100644
index 0000000000..345271f9a4
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.spi
+
+import org.apache.spark.sql.execution.SparkPlan
+
+import org.apache.comet.serde.CometOperatorSerde
+
+/**
+ * SPI hook that lets a contrib extension contribute additional operator-to-native serdes to
+ * `CometExecRule`. Used when a contrib needs to translate a contrib-specific physical operator
+ * (e.g. `CometDeltaNativeScanExec` for Delta) into a native plan -- the contrib provides the
+ * serde, and `CometExecRule` calls it during plan transformation.
+ *
+ * `CometExecRule` discovers implementations via `CometExtensionRegistry.serdeExtensions`
+ * (ServiceLoader-backed). Each contrib JAR ships a
+ * `META-INF/services/org.apache.comet.spi.CometOperatorSerdeExtension` resource listing its
+ * extension class.
+ *
+ * Implementations MUST be stateless / safe to share across query executions.
+ */
+trait CometOperatorSerdeExtension {
+
+  /** Human-readable name shown in logs and error messages. */
+  def name: String
+
+  /**
+   * Mapping of SparkPlan class -> serde. The contrib lists every operator class it knows how to
+   * translate to native. `CometExecRule` merges these mappings with its built-in `allExecs` to
+   * dispatch by class identity at conversion time.
+   *
+   * Convention: each contrib's mapping should reference only classes the contrib itself defines,
+   * so two contribs never claim ownership of the same operator class.
+   */
+  def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] = Map.empty
+
+  /**
+   * Predicate-based dispatch hook for contribs whose serde key cannot be expressed as a unique
+   * `SparkPlan` class. The canonical case is the `CometScanExec` marker-with-`scanImpl`-tag
+   * pattern: a contrib's `CometScanRuleExtension.transformV1` returns `CometScanExec(scanExec,
+   * session, "my-contrib-tag")`, but `CometScanExec` is a case class shared with core, so a
+   * class-keyed map can't disambiguate by the tag. The contrib overrides this method to inspect
+   * the plan and return its serde:
+   *
+   * {{{
+   *   private val MyScanImpl = "native_myformat_compat"   // contrib-local constant
+   *
+   *   override def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = op match {
+   *     case s: CometScanExec if s.scanImpl == MyScanImpl => Some(CometMyFormatScan)
+   *     case _ => None
+   *   }
+   * }}}
+   *
+   * `CometExecRule` consults `matchOperator` only after the class-keyed `serdes` map misses, so
+   * contribs with a unique exec class never need to implement this. Multiple registered
+   * extensions' `matchOperator` returns are tried in registration order; the first `Some` wins.
+   */
+  def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = None
+
+  /**
+   * Declares which `scanImpl` string tags this contrib produces from
+   * `CometScanRuleExtension.transformV1` when using the `CometScanExec(marker, scanImpl=X)`
+   * pattern. Tags listed here get `CometScanExec.supportedDataFilters`'s native-parquet filter
+   * exclusions (drop dynamic pruning + IsNull/IsNotNull on ArrayType columns), the same treatment
+   * `SCAN_NATIVE_DATAFUSION` receives.
+   *
+   * Override only if your contrib uses the marker-class pattern AND your native side goes through
+   * Comet's tuned `ParquetSource`. Contribs that define their own `SparkPlan` subclass (rather
+   * than reusing `CometScanExec`) don't need this; they control filter selection themselves.
+   *
+   * Example: a Delta contrib that uses `CometScanExec(..., scanImpl="native_delta_compat")` would
+   * override this to `Set("native_delta_compat")`.
+   */
+  def nativeParquetScanImpls: Set[String] = Set.empty
+
+  /**
+   * One-shot initialization hook invoked exactly once per JVM by `CometExtensionRegistry.load`
+   * after this extension has been instantiated. Use to register executor-side callbacks that
+   * can't be expressed declaratively in the `serdes` map -- e.g. a per-partition metadata
+   * handler on `CometExecRDD.registerPartitionMetadataHandler` for populating Spark
+   * thread-locals from a contrib's serialized per-partition payload.
+   *
+   * Default no-op so existing extensions don't have to opt in. Implementations MUST be safe
+   * to call once per JVM (e.g. don't accumulate state across query executions). Failures are
+   * logged and isolated: a broken `init` on one contrib doesn't take down the others.
+   */
+  def init(): Unit = ()
+}
diff --git a/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala b/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala
new file mode 100644
index 0000000000..9c273bf47f
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.spi
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.HadoopFsRelation
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+
+/**
+ * SPI hook that lets a contrib extension intercept scan transformation in `CometScanRule`.
+ * Contribs typically use this to recognise a specific table format (Delta, Hudi, etc.) and route
+ * it through a contrib-specific native execution path.
+ *
+ * `CometScanRule` discovers implementations via `CometExtensionRegistry.scanExtensions`
+ * (ServiceLoader-backed) and offers each candidate scan to every registered extension in
+ * registration order. The first extension whose [[matchesV1]] (or [[matchesV2]]) returns true AND
+ * whose [[transformV1]] (or [[transformV2]]) returns `Some(_)` wins -- its returned plan replaces
+ * the scan subtree. An extension whose `matches` is true but whose `transform` returns `None` is
+ * treated as "declined this instance"; dispatch continues to the next matching extension. After
+ * every matching extension has declined, core's built-in file-format dispatch handles the scan as
+ * before.
+ *
+ * Contribs are discovered via the standard Java ServiceLoader. Each contrib JAR ships a
+ * `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` resource listing its extension
+ * class.
+ *
+ * Implementations MUST be safe to invoke from `CometScanRule`'s `apply` method -- specifically:
+ * pure, stateless, side-effect-free with respect to the plan tree (any state needed should be
+ * derived from `scanExec` / `relation` / the surrounding plan). The registry caches instances
+ * across plans, so per-plan state on the implementation will leak between queries.
+ */
+trait CometScanRuleExtension {
+
+  /** Human-readable name shown in logs and error messages. Should be unique per extension. */
+  def name: String
+
+  /**
+   * Tree-level pre-pass run once per plan before per-scan dispatch begins. Default: identity.
+   *
+   * Use this to undo wrapper rewrites that a format's own Catalyst strategy applied. The
+   * canonical example is Delta's `PreprocessTableWithDVs` strategy, which wraps every DV-bearing
+   * Delta scan in a `Project(Filter(...))` subtree referencing a synthetic
+   * `__delta_internal_is_row_deleted` column produced by Delta's own reader. Comet reads via its
+   * own parquet path; without unwrapping that subtree, the synthetic column never gets produced
+   * and the downstream `Filter` silently drops every row. The Delta contrib's `preTransform`
+   * strips the wrapper so the clean scan reaches per-scan dispatch.
+   *
+   * '''V1-only.''' `preTransform` runs once for the whole plan and the rewritten tree is what
+   * later `transformV1` calls see via their `plan` argument. `transformV2` does NOT receive a
+   * plan-tree reference -- only the matched `BatchScanExec`. V2 contribs that need
+   * wrapper-stripping must do that work inside `transformV2` against `scanExec.scan` /
+   * `scanExec.children` directly.
+   *
+   * '''Disabled when scan conversion is off.''' `CometScanRule` skips the entire preTransform
+   * fold when `spark.comet.scan.enabled=false`. A contrib's own wrappers (Delta's DV filter,
+   * etc.) are load-bearing in that case; stripping them turns into a correctness bug.
+   *
+   * '''MUST NOT modify scans the extension does not recognise.''' Multiple registered extensions
+   * are folded over the plan in registration order; an extension that rewrites scans outside its
+   * format's domain will silently corrupt other formats' plans. `CometScanRule` logs a warning
+   * when a `FileSourceScanExec` is replaced by an extension whose `matchesV1` returns false
+   * against the original scan's relation -- contribs that trip this warning should narrow their
+   * pattern match.
+   *
+   * '''State sharing.''' Shared state between this pre-pass and later `transformV1` calls is the
+   * contrib's problem. The recommended pattern is to attach a Spark `TreeNodeTag` to nodes during
+   * `preTransform` and read it during `transformV1`. Spark's tag mechanism is tree-immutable-safe
+   * and survives plan transformations -- preferred over external mutable state which leaks across
+   * plans.
+   */
+  def preTransform(plan: SparkPlan, session: SparkSession): SparkPlan = plan
+
+  /**
+   * Whether this extension wants to handle the given V1 scan. Implementations should make a cheap
+   * decision here (typically file-format class-name probe) so non-matching paths add no per-scan
+   * overhead.
+   *
+   * Default returns false; override `matchesV1` and `transformV1` for V1 scan support.
+   */
+  def matchesV1(relation: HadoopFsRelation): Boolean = false
+
+  /**
+   * Transform the matched V1 scan. Called only when `matchesV1` returned true.
+   *
+   * Returning `None` means "I matched the scan shape but ultimately can't accelerate this
+   * specific instance" -- `CometScanRule` then continues to the NEXT registered extension whose
+   * `matchesV1` is true, falling back to core's built-in file-format dispatch only after every
+   * matching extension has declined. Returning `Some(plan)` ends dispatch and replaces the scan
+   * subtree with `plan`.
+   */
+  def transformV1(
+      plan: SparkPlan,
+      scanExec: FileSourceScanExec,
+      session: SparkSession): Option[SparkPlan] = None
+
+  /**
+   * Whether this extension wants to handle the given V2 batch scan. See `matchesV1`.
+   *
+   * Default returns false; override `matchesV2` and `transformV2` for V2 scan support.
+   */
+  def matchesV2(scanExec: BatchScanExec): Boolean = false
+
+  /**
+   * Transform the matched V2 scan. Called only when `matchesV2` returned true.
+   *
+   * Same semantics as `transformV1`: `None` falls through to the next matching extension;
+   * `Some(plan)` ends dispatch. Note that unlike `transformV1`, this method does NOT receive a
+   * plan-tree reference -- `preTransform` rewrites are not visible here. V2 contribs that need
+   * wrapper-stripping must operate on `scanExec.scan` / `scanExec.children` directly.
+   */
+  def transformV2(scanExec: BatchScanExec, session: SparkSession): Option[SparkPlan] = None
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala
index 47eda98a11..072ff6d2bf 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala
@@ -111,6 +111,16 @@ private[spark] class CometExecRDD(
       serializedPlan
     }
 
+    // Invoke registered per-partition metadata handlers. This is the SPI hook contribs
+    // use to populate executor thread-locals (e.g. `InputFileBlockHolder`) from their
+    // serialized per-partition payloads before the native iterator runs. The Delta
+    // contrib uses it so `input_file_name()` and Delta's `_metadata.file_path` resolve
+    // correctly; without this, UPDATE/DELETE/MERGE/CDC paths see empty file_path and
+    // throw `DELTA_FILE_TO_OVERWRITE_NOT_FOUND`. Handlers are called for every
+    // partition with non-empty plan data and are expected to no-op when the partition
+    // doesn't carry their proto. The registered handlers list is a `@volatile` read.
+    CometExecRDD.runPartitionMetadataHandlers(partition.planDataByKey, context)
+
     // Create shuffle block iterators for inputs that are CometShuffledBatchRDD
     val shuffleBlockIters = shuffleScanIndices.flatMap { idx =>
       inputRDDs(idx) match {
@@ -163,6 +173,56 @@ private[spark] class CometExecRDD(
 
 object CometExecRDD {
 
+  /**
+   * SPI hook signature: a callback contribs register to inspect a partition's per-partition
+   * planning data BEFORE the native iterator starts producing rows on this task. Receives the
+   * `Map[String, Array[Byte]]` of serialized per-partition payloads keyed by `sourceKey` (the
+   * same shape contribs serialize into `perPartitionByKey` at planning time). Plus the active
+   * `TaskContext` so handlers can register completion listeners.
+   *
+   * Canonical use: the Delta contrib reads its `DeltaScan` payload, extracts the AddFile path,
+   * and calls `InputFileBlockHolder.set` so `input_file_name()` and Delta's `_metadata.file_path`
+   * resolve to the file being read (otherwise UPDATE/DELETE/MERGE/CDC throw
+   * `DELTA_FILE_TO_OVERWRITE_NOT_FOUND`).
+   *
+   * The signature is deliberately the data shape, NOT a Spark-internal partition type, so contribs
+   * don't have to live under `org.apache.spark.*` to see it. Handlers MUST:
+   *   - be stateless and free of contrib-specific assumptions on partitions that don't carry
+   *     their proto (no-op silently when their expected key/payload shape isn't present),
+   *   - register a task-completion listener for any thread-local they set, so the value is
+   *     cleared at the end of the task, and
+   *   - tolerate parse failures defensively -- another contrib may own this key.
+   */
+  type PartitionMetadataHandler = (Map[String, Array[Byte]], TaskContext) => Unit
+
+  @volatile private var partitionMetadataHandlers: Vector[PartitionMetadataHandler] = Vector.empty
+
+  /**
+   * Register a per-partition metadata handler. Called once per contrib at extension-load
+   * time (from `CometOperatorSerdeExtension.init`). Registration is idempotent on the
+   * same function reference but does not de-duplicate equivalent lambdas; contribs are
+   * expected to register exactly once.
+   */
+  def registerPartitionMetadataHandler(h: PartitionMetadataHandler): Unit = synchronized {
+    if (!partitionMetadataHandlers.contains(h)) {
+      partitionMetadataHandlers = partitionMetadataHandlers :+ h
+    }
+  }
+
+  /**
+   * Test-only / contrib reset. Visibility is `public` to mirror `resetForTesting` on the registry.
+   */
+  def clearPartitionMetadataHandlers(): Unit = synchronized {
+    partitionMetadataHandlers = Vector.empty
+  }
+
+  private[comet] def runPartitionMetadataHandlers(
+      planDataByKey: Map[String, Array[Byte]],
+      context: TaskContext): Unit = {
+    val hs = partitionMetadataHandlers
+    if (hs.nonEmpty) hs.foreach(_(planDataByKey, context))
+  }
+
   /**
    * Creates an RDD for native execution with optional per-partition planning data.
    */
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
index 652fdfc96d..7e9b2bfa80 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
@@ -159,7 +159,13 @@ case class CometScanExec(
    * on array columns (see [[isNullCheckOnArrayColumn]]).
    */
   lazy val supportedDataFilters: Seq[Expression] = {
-    if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) {
+    // Contribs that use the CometScanExec marker pattern with their own scanImpl
+    // string can declare that their scan goes through Comet's tuned ParquetSource
+    // (and therefore wants DataFusion-style filter exclusions) by registering the
+    // tag via `CometOperatorSerdeExtension.nativeParquetScanImpls`. Core doesn't
+    // need to know any contrib's marker name; the registry is the source of truth.
+    if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION ||
+      CometScanExec.contribNativeParquetScanImpls.contains(scanImpl)) {
       dataFilters
         .filterNot(isDynamicPruningFilter)
         .filterNot(isNullCheckOnArrayColumn)
@@ -534,6 +540,17 @@ case class CometScanExec(
 
 object CometScanExec {
 
+  /**
+   * Set of contrib-registered scanImpl tags whose CometScanExec should use Comet's native-parquet
+   * filter exclusion semantics (drop dynamic pruning + IsNull/IsNotNull on ArrayType columns).
+   * Populated lazily from
+   * `CometExtensionRegistry.serdeExtensions.flatMap(_.nativeParquetScanImpls)`. Each access
+   * re-reads the volatile field on `CometExtensionRegistry`; the cost is one HashSet lookup per
+   * CometScanExec construction, which is dwarfed by Spark's own per-plan work.
+   */
+  private[comet] def contribNativeParquetScanImpls: Set[String] =
+    org.apache.comet.spi.CometExtensionRegistry.nativeParquetScanImpls
+
   def apply(
       scanExec: FileSourceScanExec,
       session: SparkSession,
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
index f315aae6e2..bf0b414e86 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
@@ -59,6 +59,34 @@ import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => CometAggregat
 import org.apache.comet.serde.QueryPlanSerde.{aggExprToProto, exprToProto, isStringCollationType, supportedSortType}
 import org.apache.comet.serde.operator.CometSink
 
+/**
+ * Generic source of per-partition planning data for a Comet native exec. Implementations expose
+ * the trio of inputs that `CometExecRDD.compute` needs to feed `PlanDataInjector.injectPlanData`:
+ *
+ *   - `planDataSourceKey`: stable identifier the matching `PlanDataInjector.getKey` reproduces
+ *     by hashing the operator's common payload. Must agree between driver-side (RDD construction)
+ *     and executor-side (injector lookup) views of the SAME operator's proto.
+ *   - `planDataCommonBytes`: serialized common block (schemas, table root, filters, ...) the
+ *     contrib's `serialize` produced once per scan.
+ *   - `planDataPerPartitionBytes`: array of serialized per-partition payloads, one entry per
+ *     partition, carrying that partition's task list / file list / ranges.
+ *
+ * `findAllPlanData` checks for this trait BEFORE the hardcoded `CometNativeScanExec` /
+ * `CometIcebergNativeScanExec` arms so contribs can plug in without core-side enumeration.
+ * Core's own scans implement the trait too for symmetry (no behavioural change -- the trait's
+ * defaults just delegate to their existing accessors).
+ *
+ * Implementations whose driver-side `commonData` / `perPartitionData` require Spark's standard
+ * `prepare -> waitForSubqueries` lifecycle (typically because DPP `InSubqueryExec` values land
+ * in the per-partition payload) override `ensureSubqueriesResolvedIfApplicable` to trigger it.
+ */
+trait PlanDataSource { self: SparkPlan =>
+  def planDataSourceKey: String
+  def planDataCommonBytes: Array[Byte]
+  def planDataPerPartitionBytes: Array[Array[Byte]]
+  def ensureSubqueriesResolvedIfApplicable(): Unit = ()
+}
+
 /**
  * Trait for injecting per-partition planning data into operator nodes.
  *
@@ -79,14 +107,35 @@ private[comet] trait PlanDataInjector {
 /**
  * Registry and utilities for injecting per-partition planning data into operator trees.
  */
-private[comet] object PlanDataInjector {
+object PlanDataInjector {
 
-  // Registry of injectors for different operator types
-  private val injectors: Seq[PlanDataInjector] = Seq(
+  // Built-in injectors for core operator types. Contribs add to this via
+  // `registerInjector` from their `CometOperatorSerdeExtension.init` -- the
+  // generic SPI route -- so core stays format-agnostic.
+  private val builtinInjectors: Seq[PlanDataInjector] = Seq(
     IcebergPlanDataInjector,
-    NativeScanPlanDataInjector
-    // Future: DeltaPlanDataInjector, HudiPlanDataInjector, etc.
-  )
+    NativeScanPlanDataInjector)
+
+  @volatile private var contribInjectors: Vector[PlanDataInjector] = Vector.empty
+
+  /**
+   * SPI: register a contrib-side `PlanDataInjector`. Called once per contrib at
+   * extension-load time (from `CometOperatorSerdeExtension.init`). Registration is
+   * idempotent on the same instance but not de-duplicated across structurally-equal
+   * implementations -- contribs are expected to register exactly once.
+   */
+  def registerInjector(injector: PlanDataInjector): Unit = synchronized {
+    if (!contribInjectors.contains(injector)) {
+      contribInjectors = contribInjectors :+ injector
+    }
+  }
+
+  /** Test-only reset, mirroring `CometExtensionRegistry.resetForTesting`. */
+  def clearContribInjectors(): Unit = synchronized {
+    contribInjectors = Vector.empty
+  }
+
+  private def injectors: Seq[PlanDataInjector] = builtinInjectors ++ contribInjectors
 
   /**
    * Injects planning data into an Operator tree by finding nodes that need injection and applying
@@ -617,6 +666,13 @@ abstract class CometNativeExec extends CometExec {
           _: CometBroadcastExchangeExec | _: BroadcastQueryStageExec |
           _: CometSparkToColumnarExec | _: CometLocalTableScanExec =>
         func(plan)
+      // Any other leaf `CometNativeExec` (e.g. a contrib-defined leaf scan such as the Delta
+      // contrib's `CometDeltaNativeScanExec`) is a Comet input boundary -- recursing into its
+      // (non-existent) children would otherwise leave it invisible to the caller, which then
+      // misinterprets a leaf-only plan as having no inputs at all and crashes on
+      // `firstNonBroadcastPlan.get`. Treat it the same as the explicit list above.
+      case p: CometNativeExec if p.children.isEmpty =>
+        func(plan)
       case _: CometPlan =>
         // Other Comet operators, continue to traverse the tree.
         plan.children.foreach(foreachUntilCometInput(_)(func))
@@ -664,6 +720,16 @@ abstract class CometNativeExec extends CometExec {
   private def findAllPlanData(
       plan: SparkPlan): (Map[String, Array[Byte]], Map[String, Array[Array[Byte]]]) = {
     plan match {
+      // Contribs (e.g. the Delta contrib's `CometDeltaNativeScanExec`) implement
+      // `PlanDataSource` to expose their per-partition payload and matching
+      // `sourceKey`. Checked before the explicit core cases below so subclasses can
+      // override the trait without colliding with the hardcoded matches.
+      case src: PlanDataSource =>
+        src.ensureSubqueriesResolvedIfApplicable()
+        (
+          Map(src.planDataSourceKey -> src.planDataCommonBytes),
+          Map(src.planDataSourceKey -> src.planDataPerPartitionBytes))
+
       case iceberg: CometIcebergNativeScanExec =>
         // Trigger Spark's standard prepare -> waitForSubqueries lifecycle so DPP
         // InSubqueryExec values are resolved before commonData is read. Without this,