diff --git a/.gitignore b/.gitignore index a3c97ff992..e23903ec9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ CLAUDE.md +# PR1-description.md is a working artifact used when manually opening PR1; not committed. +PR1-description.md target .idea *.iml @@ -10,6 +12,7 @@ metastore_db/ spark-warehouse/ dependency-reduced-pom.xml native/proto/src/generated +contrib/example/native/src/generated prebuild .flattened-pom.xml rat.txt diff --git a/contrib/example/native/Cargo.toml b/contrib/example/native/Cargo.toml new file mode 100644 index 0000000000..698814c603 --- /dev/null +++ b/contrib/example/native/Cargo.toml @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "comet-contrib-example" +description = "Worked reference implementation of a Comet contrib extension. Not published; bundled as a SPI dispatch test fixture." +# Contrib crates live OUTSIDE the workspace root directory (`native/`) but are listed as +# workspace members in `native/Cargo.toml`. Cargo's auto-discovery walks up the directory +# tree, so without the explicit pointer it can't find `native/Cargo.toml` from +# `contrib/example/native/`. +workspace = "../../../native" +version = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +edition = { workspace = true } + +[lib] +# rlib (not cdylib): linked INTO core's cdylib via the `contrib-example` Cargo feature +# flag on the core crate. There is exactly one libcomet.{so,dylib,dll} at runtime; the +# contrib's #[ctor] runs during that single library's init. +crate-type = ["rlib"] + +[dependencies] +# Depend on the thin SPI crate, NOT on core. This is what breaks the cycle: core +# depends on contribs (Cargo feature -> rlib link); both depend on contrib-spi; nothing +# depends back on core from a contrib. +comet-contrib-spi = { path = "../../../native/contrib-spi" } +datafusion = { workspace = true } +# Used only in unit tests to construct a TestCtx that implements ContribPlannerContext; +# kept in [dependencies] (not [dev-dependencies]) because the trait's typed methods take +# spark_expression / spark_operator proto refs and the impl module is not test-gated. +datafusion-comet-proto = { workspace = true } +prost = "0.14.3" +ctor = "0.4" +log = "0.4" + +# Each contrib runs its own prost-build over its own .proto files (see build.rs). +[build-dependencies] +prost-build = "0.14.3" diff --git a/contrib/example/native/build.rs b/contrib/example/native/build.rs new file mode 100644 index 0000000000..236360962e --- /dev/null +++ b/contrib/example/native/build.rs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Build script for the example contrib's proto. Mirrors `native/proto/build.rs`. +//! +//! Each contrib runs its own `prost-build` invocation against its own `.proto` files. +//! This keeps core's proto crate format-agnostic and lets contribs evolve their wire +//! format independently. The generated Rust types live under `src/generated/` and are +//! gitignored. + +use std::{fs, io::Result, path::Path}; + +fn main() -> Result<()> { + println!("cargo:rerun-if-changed=src/proto/"); + + let out_dir = "src/generated"; + if !Path::new(out_dir).is_dir() { + fs::create_dir(out_dir)?; + } + + prost_build::Config::new() + .out_dir(out_dir) + .compile_protos(&["src/proto/example_op.proto"], &["src/proto"])?; + Ok(()) +} diff --git a/contrib/example/native/src/lib.rs b/contrib/example/native/src/lib.rs new file mode 100644 index 0000000000..8bd0753fe2 --- /dev/null +++ b/contrib/example/native/src/lib.rs @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Worked reference implementation of a Comet contrib extension. +//! +//! Demonstrates two patterns future contribs will follow: +//! +//! 1. **Dispatch wiring** -- registers a `ContribOperatorPlanner` against a stable +//! `kind` string at lib-init time via `#[ctor::ctor]`. The planner is called from +//! core's `OpStruct::ContribOp` dispatcher with the contrib's payload bytes. +//! +//! 2. **Proto layer** -- the contrib has its own `proto/` directory with its own +//! `.proto` schema (`example_op.proto`). `build.rs` runs `prost-build` over it; +//! generated Rust types live under `src/generated/` (gitignored). The planner +//! decodes the payload via `prost::Message::decode` -- the same way real contribs +//! (Delta etc.) will. +//! +//! Two planner kinds are registered: +//! +//! * `example-no-op` -- returns a sentinel error. Tests use this to verify +//! the dispatch chain end-to-end. +//! * `example-constant-scan` -- decodes an `ExampleConstantScan` payload, returns +//! an `EmptyExec` sized by the payload's `row_count`. +//! Real contribs (Delta) follow the same pattern, +//! just with their own message and operator. +//! +//! The whole crate is gated by `native/core/Cargo.toml`'s `contrib-example` feature flag. +//! Build core without that feature (`cargo build --no-default-features`) and zero bytes +//! of this crate end up in `libcomet`. + +use std::sync::Arc; + +use comet_contrib_spi::{ + register_contrib_planner, ContribError, ContribOperatorPlanner, ContribPlannerContext, +}; +use datafusion::arrow::datatypes::Schema; +use datafusion::physical_plan::empty::EmptyExec; +use datafusion::physical_plan::ExecutionPlan; +use prost::Message; + +/// Generated Rust types for the contrib's proto schema. `build.rs` writes the module +/// here at compile time; `src/generated/` is gitignored. +pub mod proto { + include!(concat!("generated/", "comet.contrib.example.rs")); +} + +/// Sentinel kind used by tests to verify dispatch reaches this contrib at all. +pub const EXAMPLE_NO_OP_KIND: &str = "example-no-op"; + +/// Kind for the proto-decoding constant-scan planner. Demonstrates the +/// proto-decode-and-build path real contribs will use. +pub const EXAMPLE_CONSTANT_SCAN_KIND: &str = "example-constant-scan"; + +/// A planner that intentionally does no plan-building work. Returns a sentinel error so +/// dispatch tests can assert the message reaches this code path. The payload is ignored; +/// children are ignored. +struct NoOpPlanner; + +impl ContribOperatorPlanner for NoOpPlanner { + fn plan( + &self, + _ctx: &dyn ContribPlannerContext, + _payload: &[u8], + _children: Vec>, + ) -> Result, ContribError> { + Err(ContribError::Plan(format!( + "comet-contrib-example: NoOpPlanner reached for kind={EXAMPLE_NO_OP_KIND:?}; \ + this is the expected sentinel for SPI dispatch tests" + ))) + } +} + +/// Decodes the payload as an `ExampleConstantScan` proto and returns an `EmptyExec` +/// with a schema-less output. Real contribs use the same decode-then-build pattern -- +/// they just decode richer messages and return richer execs. +struct ConstantScanPlanner; + +impl ContribOperatorPlanner for ConstantScanPlanner { + fn plan( + &self, + _ctx: &dyn ContribPlannerContext, + payload: &[u8], + _children: Vec>, + ) -> Result, ContribError> { + let msg = proto::ExampleConstantScan::decode(payload).map_err(|e| { + ContribError::BadPayload(format!( + "ExampleConstantScan: decode failed: {e}" + )) + })?; + log::debug!( + "comet-contrib-example: ConstantScanPlanner produces {} synthetic rows", + msg.row_count + ); + // For the worked example we don't actually populate rows -- EmptyExec is fine to + // demonstrate the build path. Real contribs return their domain-specific exec + // (Delta returns the file scan + DV filter wrap). + Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty())))) + } +} + +/// Registers all of the example contrib's planners against the contrib registry at +/// library-init time. `#[ctor::ctor]` runs this constructor before `main`/`JNI_OnLoad`. +/// Comet's `libcomet` cdylib is the single library the JVM loads; this constructor runs +/// during that one library's init. +/// +/// # Panic safety +/// +/// The body is wrapped in `catch_unwind` and writes to stderr on failure. A panic inside +/// `#[ctor]` aborts the entire JVM process before `JNI_OnLoad` runs and produces no +/// diagnostic on macOS/Linux without this wrapper. Every contrib's `#[ctor]` should +/// follow the same pattern; see `docs/source/contributor-guide/contrib-extensions.md`. +/// +/// # Logging +/// +/// `log::*!` macros inside `#[ctor]` are no-ops because Comet's logger is initialised +/// later, in `Java_org_apache_comet_NativeBase_init`. Use `eprintln!` (or nothing) for +/// any ctor diagnostics that must be visible. +#[ctor::ctor] +fn register() { + let _ = std::panic::catch_unwind(|| { + register_contrib_planner(EXAMPLE_NO_OP_KIND, Arc::new(NoOpPlanner)); + register_contrib_planner(EXAMPLE_CONSTANT_SCAN_KIND, Arc::new(ConstantScanPlanner)); + }) + .map_err(|panic| { + eprintln!( + "comet-contrib-example: #[ctor] panicked during planner registration; \ + contrib will not be available. panic={panic:?}" + ); + }); +} + +#[cfg(test)] +mod tests { + use super::*; + use comet_contrib_spi::{lookup_contrib_planner_by_kind, ParquetDatasourceParams}; + use datafusion::arrow::datatypes::SchemaRef; + use datafusion::execution::context::SessionContext; + use datafusion::execution::object_store::ObjectStoreUrl; + use datafusion::physical_expr::PhysicalExpr; + use datafusion_comet_proto::{spark_expression, spark_operator}; + use std::collections::HashMap; + + /// Minimal `ContribPlannerContext` for unit-testing contrib planners that don't + /// actually need to build a parquet exec. All methods that the tests don't exercise + /// panic if invoked. + struct TestCtx { + ctx: Arc, + } + impl ContribPlannerContext for TestCtx { + fn session_ctx(&self) -> &Arc { + &self.ctx + } + fn build_physical_expr( + &self, + _expr: &spark_expression::Expr, + _input_schema: SchemaRef, + ) -> Result, ContribError> { + unimplemented!("TestCtx: build_physical_expr not used by this test") + } + fn convert_spark_schema( + &self, + _fields: &[spark_operator::SparkStructField], + ) -> SchemaRef { + unimplemented!("TestCtx: convert_spark_schema not used by this test") + } + fn prepare_object_store( + &self, + _url: String, + _configs: &HashMap, + ) -> Result<(ObjectStoreUrl, datafusion::object_store::path::Path), ContribError> { + unimplemented!("TestCtx: prepare_object_store not used by this test") + } + fn build_parquet_datasource_exec( + &self, + _params: ParquetDatasourceParams, + ) -> Result, ContribError> { + unimplemented!("TestCtx: build_parquet_datasource_exec not used by this test") + } + } + + fn test_ctx() -> TestCtx { + TestCtx { + ctx: Arc::new(SessionContext::new()), + } + } + + #[test] + fn ctor_registers_both_planners() { + // The #[ctor] above runs at process-init time for test binaries too. + assert!(lookup_contrib_planner_by_kind(EXAMPLE_NO_OP_KIND).is_some()); + assert!(lookup_contrib_planner_by_kind(EXAMPLE_CONSTANT_SCAN_KIND).is_some()); + } + + #[test] + fn constant_scan_decodes_payload_and_builds() { + let payload = proto::ExampleConstantScan { row_count: 42 }.encode_to_vec(); + let planner = ConstantScanPlanner; + let ctx = test_ctx(); + let plan = planner.plan(&ctx, &payload, vec![]).expect("decode + build"); + assert!(plan.schema().fields().is_empty()); + } + + #[test] + fn constant_scan_handles_zero_rows() { + // Worked-example coverage: row_count = 0 must not be a special case. + let payload = proto::ExampleConstantScan { row_count: 0 }.encode_to_vec(); + let planner = ConstantScanPlanner; + let ctx = test_ctx(); + let plan = planner.plan(&ctx, &payload, vec![]).expect("decode + build"); + assert!(plan.schema().fields().is_empty()); + } + + #[test] + fn constant_scan_surfaces_bad_payload() { + let planner = ConstantScanPlanner; + let ctx = test_ctx(); + let bad = b"not a valid proto"; + let err = planner + .plan(&ctx, bad, vec![]) + .expect_err("garbage should fail decode"); + match err { + ContribError::BadPayload(_) => {} + other => panic!("expected BadPayload, got {other:?}"), + } + } +} diff --git a/contrib/example/native/src/proto/example_op.proto b/contrib/example/native/src/proto/example_op.proto new file mode 100644 index 0000000000..59ae4ca761 --- /dev/null +++ b/contrib/example/native/src/proto/example_op.proto @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax = "proto3"; + +// Contrib-private proto package. Each contrib's proto messages live under their own +// package so symbols never collide with core or with other contribs. +package comet.contrib.example; + +// Trivial reference message used by the worked-example contrib. A real contrib's proto +// carries whatever fields its native operator needs (file paths, predicates, schemas, +// deletion vectors, etc.). +// +// The contrib's Scala side fills this message and serializes it into the +// `ContribOp.payload` bytes; the contrib's Rust side decodes the bytes back into this +// struct in its `ContribOperatorPlanner::plan`. +message ExampleConstantScan { + // Number of rows the synthetic constant scan should emit. Bounded by the contrib's + // planner -- this is a test reference, not a useful operator. + uint32 row_count = 1; +} diff --git a/contrib/example/pom.xml b/contrib/example/pom.xml new file mode 100644 index 0000000000..6f67ac682d --- /dev/null +++ b/contrib/example/pom.xml @@ -0,0 +1,56 @@ + + + + + + 4.0.0 + + org.apache.datafusion + comet-parent-spark${spark.version.short}_${scala.binary.version} + 0.17.0-SNAPSHOT + ../../pom.xml + + + + comet-contrib-example-deps-spark${spark.version.short}_${scala.binary.version} + comet-contrib-example-deps + pom + + + + + diff --git a/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension b/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension new file mode 100644 index 0000000000..13c4689816 --- /dev/null +++ b/contrib/example/src/main/resources/META-INF/services/org.apache.comet.spi.CometScanRuleExtension @@ -0,0 +1 @@ +org.apache.comet.contrib.example.ExampleScanRuleExtension diff --git a/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala b/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala new file mode 100644 index 0000000000..6ef10587e0 --- /dev/null +++ b/contrib/example/src/main/scala/org/apache/comet/contrib/example/ExampleScanRuleExtension.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.contrib.example + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.execution.datasources.HadoopFsRelation + +import org.apache.comet.spi.CometScanRuleExtension + +/** + * Worked-reference `CometScanRuleExtension` for new contrib authors. This implementation is + * intentionally trivial: it does not match any scan and never transforms anything. What it proves + * end-to-end at runtime is: + * + * 1. `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` discovery works: + * `CometExtensionRegistry.load()` finds this class via ServiceLoader as soon as the contrib + * JAR is on the classpath. + * + * 2. The wiring in `CometScanRule.transformV1Scan` / `transformV2Scan` actually iterates + * extensions: even though this one returns `false` from `matchesV1` and `matchesV2`, the registry + * call happens for every scan. + * + * Real contribs replace `matchesV1` / `transformV1` with real probes against the scan's + * `relation.fileFormat` (e.g. Delta would detect `DeltaParquetFileFormat`) and `transformV1` with + * the contrib's native dispatch. + * + * The matching native-side counterpart lives in `contrib/example/native/src/lib.rs` -- it + * registers a `ContribOperatorPlanner` under the same kind string used by any future Scala-side + * serde this example might add. + */ +class ExampleScanRuleExtension extends CometScanRuleExtension with Logging { + override val name: String = "example" + + override def matchesV1(relation: HadoopFsRelation): Boolean = { + // Sentinel: only match if a synthetic option declares this contrib should claim the + // scan. Production contribs replace this with a real file-format probe; here we want + // the test to be able to opt in deterministically. + relation.options + .get(ExampleScanRuleExtension.MarkerOptionKey) + .contains(ExampleScanRuleExtension.MarkerOptionValue) + } + + // matchesV2 / transformV1 / transformV2 inherit the trait defaults (`false` / `None`). + // This example only demonstrates V1 discovery. A real contrib would override the + // transform methods to build its native plan. +} + +object ExampleScanRuleExtension { + + /** + * Test-only option key. A Spark read can set this on `HadoopFsRelation.options` to trigger + * `ExampleScanRuleExtension.matchesV1` and verify the SPI is being consulted. + */ + val MarkerOptionKey: String = "comet.contrib.example.marker" + + /** Sentinel value the marker option must equal. */ + val MarkerOptionValue: String = "match" +} diff --git a/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala b/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala new file mode 100644 index 0000000000..7de0fd6b34 --- /dev/null +++ b/contrib/example/src/test/scala/org/apache/comet/contrib/example/ExampleScanRuleExtensionSuite.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.contrib.example + +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.comet.spi.CometExtensionRegistry + +/** + * Verifies the JVM half of the contrib SPI by going through public API only: + * + * * `CometExtensionRegistry.load()` discovers this contrib via its + * `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` entry. * The discovered + * extension is the `ExampleScanRuleExtension` defined in this module. * `matchesV1` honours the + * test-only marker option so a real `CometScanRule.transformV1Scan` integration test could + * deterministically opt in. + * + * Native-side dispatch (the `OpStruct::ContribOp` arm in core's planner that delegates to the + * example's Rust `NoOpPlanner`) is exercised by core's own integration tests when built with the + * `contrib-example` Cargo feature on -- not duplicated here. + */ +class ExampleScanRuleExtensionSuite extends AnyFunSuite { + + test("CometExtensionRegistry discovers ExampleScanRuleExtension via ServiceLoader") { + // The registry caches discovery results across calls; reset so this test sees a + // deterministic load against the current test classpath. + CometExtensionRegistry.resetForTesting() + CometExtensionRegistry.load() + + val found = CometExtensionRegistry.scanExtensions.find(_.name == "example") + assert(found.isDefined, "ServiceLoader should have discovered the example contrib") + assert(found.get.isInstanceOf[ExampleScanRuleExtension]) + } + + test("matchesV1 returns true only when the marker option is set") { + val ext = new ExampleScanRuleExtension + + // We construct a minimal HadoopFsRelation just enough to call matchesV1. The trait + // method only reads `relation.options` so we don't need a real file format/schema. + // + // Use getOrCreate so the test reuses any already-running singleton SparkSession + // (e.g., from another suite). Critically, DO NOT call `stop()` in a finally block: + // stop() tears down the JVM-wide singleton and breaks every other test sharing it. + val sparkSession = org.apache.spark.sql.SparkSession + .builder() + .master("local[1]") + .appName("ExampleScanRuleExtensionSuite") + .getOrCreate() + val relationWithoutMarker = new org.apache.spark.sql.execution.datasources.HadoopFsRelation( + location = new org.apache.spark.sql.execution.datasources.InMemoryFileIndex( + sparkSession, + Seq.empty, + Map.empty, + None), + partitionSchema = new org.apache.spark.sql.types.StructType(), + dataSchema = new org.apache.spark.sql.types.StructType(), + bucketSpec = None, + fileFormat = new org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat(), + options = Map.empty)(sparkSession) + assert(!ext.matchesV1(relationWithoutMarker), "no marker -> no match") + + val relationWithMarker = relationWithoutMarker.copy(options = Map( + ExampleScanRuleExtension.MarkerOptionKey -> + ExampleScanRuleExtension.MarkerOptionValue))(sparkSession) + assert(ext.matchesV1(relationWithMarker), "marker present -> match") + } +} diff --git a/docs/source/contributor-guide/contrib-extensions.md b/docs/source/contributor-guide/contrib-extensions.md new file mode 100644 index 0000000000..e7a49421dd --- /dev/null +++ b/docs/source/contributor-guide/contrib-extensions.md @@ -0,0 +1,898 @@ + + +# Authoring a Comet contrib extension + +A Comet *contrib* is a self-contained extension that lives alongside core but ships +independently. Contribs add support for a specific table format or operator class +without core having to know about them at build time. + +The first contrib in the tree is +[`contrib/example/`](https://github.com/apache/datafusion-comet/tree/main/contrib/example) — +read it top-to-bottom as the worked reference. This guide adds the architectural context +and walks through every integration point that the example does not exercise. + +## Architecture at a glance + +A contrib has two halves, both **bundled into Comet's published artifacts at build +time** when their matching flags are enabled. Nothing about a contrib is independently +distributable — the contrib lives inside Comet's release. + +- **JVM half** — Scala/Java classes plus generated Java proto. Built as a Maven + submodule under `contrib//` and **shaded into `comet-spark.jar`** via the + `-Pcontrib-` Maven profile on `spark/pom.xml`. With no profile active, the + contrib's classes are not in the published JAR. The contrib's `META-INF/services/` + entries are bundled along with the classes; ServiceLoader at runtime then discovers + them from inside `comet-spark.jar` itself. +- **Native half** — a Rust `rlib` crate (NOT `cdylib`) **linked into `libcomet`** via + the matching `--features contrib-` Cargo flag on the core crate. The contrib's + `#[ctor]` registers its operator planners during library load. + +The two halves are symmetric: contribs are build-time options on Comet, JVM and +native both. `mvn install -Pcontrib-example && cargo build --features contrib-example` +produces a Comet build that includes the example contrib in both `comet-spark.jar` and +`libcomet`; a vanilla build of either side produces an artifact with zero contrib +surface. + +The wire format between JVM and native uses a single generic envelope on the operator +proto, `ContribOp { kind, payload }`. Core's planner dispatches by `kind`; the contrib's +native crate registers planners against the same `kind` string the contrib's JVM code +writes into the proto. + +## Required files (mirror `contrib/example/` exactly) + +A contrib is a directory of sources plus a deps-only Maven pom. The contrib's +Scala/Java sources are pulled into `comet-spark`'s compile by a profile on +`spark/pom.xml`; the contrib's Rust sources are pulled into `libcomet` by a Cargo +feature on `native/core`. The `pom.xml` exists solely to enumerate external Maven +deps (e.g., `io.delta:delta-spark` for a Delta contrib); it does NOT produce code +and does NOT depend on `comet-spark` (those two together would create a Maven +reactor cycle). + +``` +contrib// + pom.xml ← pom; declares external Maven deps only + src/main/scala/org/apache/comet/contrib// + .scala ← CometScanRuleExtension / CometOperatorSerdeExtension impl + src/main/resources/META-INF/services/ + org.apache.comet.spi.CometScanRuleExtension ← one line per extension class + org.apache.comet.spi.CometOperatorSerdeExtension ← (only if you implement serdes) + src/test/scala/org/apache/comet/contrib// + Suite.scala ← integration test (runs as part of comet-spark's tests when profile active) + native/ + Cargo.toml ← rlib crate, workspace = "../../../native" + build.rs ← runs prost-build over your proto schema + src/lib.rs ← ContribOperatorPlanner impl + #[ctor] registration + src/proto/.proto ← contrib-private proto schema (also used by JVM-side protoc generation) + src/generated/ ← (gitignored) prost-build output +``` + +The `pom.xml` is a `pom` with one job: list the contrib's +external Maven deps. A Delta contrib's pom would carry `` entries for +`io.delta:delta-spark`. `spark/pom.xml`'s `contrib-` profile depends on this +deps-pom via `pom`, which transitively resolves the listed deps onto +comet-spark's classpath. + +Plus a handful of build-config edits (collected under "Wiring into core", below). + +### Prerequisites + +You need: + +- The same toolchain Comet's main build uses: JDK 11+ (Maven build), Rust stable, `protoc` + (pulled in automatically by `protoc-jar-maven-plugin` and `prost-build`). +- The contrib's `` decided in advance — it becomes a Cargo feature flag + (`contrib-`), an artifact ID, a JNI symbol prefix if your contrib calls into its + own Rust, and a `kind` string component for every `ContribOp`. Choose a short, stable + identifier; renames are breaking. + +### `.gitignore` + +The generated proto outputs are checked in nowhere. Add a line to the repo-root +`.gitignore` mirroring the existing `contrib/example/native/src/generated` entry: + +``` +contrib//native/src/generated +``` + +`contrib//target/` is already gitignored by the repo-root pattern. + +### Workspace placement constraint + +`contrib//native/Cargo.toml` uses `workspace = "../../../native"`. This relative +path assumes contribs live exactly at `/contrib//native`. Deeper nesting +breaks the workspace lookup; place the contrib at the documented depth. + +## Wiring into core + +Five edits, three per side (JVM) + two (native): + +### JVM side + +1. **Root `pom.xml`** — add `contrib/` so Maven always builds + the contrib's deps-pom. The pom is tiny (no code, no JAR — just `pom`). +2. **`contrib//pom.xml`** — create a `pom` file enumerating + your external Maven deps. Copy `contrib/example/pom.xml` as the template; the + example's `` block is empty (no external deps needed). A Delta-style + contrib would add e.g.: + + ```xml + + + io.delta + delta-spark_${scala.binary.version} + 3.3.2 + provided + + + ``` + + Use `provided` for deps the user supplies on their Spark classpath; + `compile` if the contrib ships them itself (shaded into comet-spark + via the inherited shade execution). + +3. **`spark/pom.xml`** — add a `contrib-` profile under ``. Copy the + `contrib-example` profile as the template. The profile (a) depends on the contrib's + deps-pom via `pom`, (b) uses `build-helper-maven-plugin` to add the + contrib's source/test directories, (c) uses `maven-resources-plugin` to merge in + `META-INF/services` entries, and (d) uses `protoc-jar-maven-plugin` to generate + the contrib's Java protos. See `contrib/example`'s entry in `spark/pom.xml` for + the verbatim block to copy. + +### Native side + +2. **`native/Cargo.toml`** — add `../contrib//native` to the workspace `members` + list (NOT `default-members` — contribs are consumed via core's feature flags). +3. **`native/core/Cargo.toml`** — add a `contrib-` feature gate and a matching + optional `dep:` entry, mirroring the `contrib-example` lines: + + ```toml + [dependencies] + comet-contrib- = { path = "../../contrib//native", optional = true } + + [features] + contrib- = ["dep:comet-contrib-"] + ``` + + Do **not** add the feature to `default = [...]`. Production builds carry zero contrib + surface by design; users opt in explicitly. +4. **`native/core/src/lib.rs`** — add the matching feature-gated `extern crate` so the + contrib's `#[ctor]` is linked in when the feature is on: + + ```rust + #[cfg(feature = "contrib-")] + extern crate comet_contrib_; + ``` + +## Build matrix + +```bash +# Vanilla Comet build: zero contribs on either side. +mvn install +cargo build + +# Build with the example contrib bundled into both halves. +mvn install -Pcontrib-example +cargo build --features contrib-example + +# Multiple contribs at once. +mvn install -Pcontrib-example,contrib-delta +cargo build --features 'contrib-example contrib-delta' + +# Verify the slim native build path. +cargo build --no-default-features +``` + +The JVM and native flags MUST agree for a contrib to work. Activating only the Maven +profile gives you a `comet-spark.jar` whose serde produces `ContribOp` envelopes the +native side can't dispatch (you'll get +`No contrib planner registered for ContribOp.kind=...`). Activating only the Cargo +feature gives you a `libcomet` ready to dispatch a contrib whose serde isn't on the +classpath, so the registered planner sits dormant. The contributor guide and release +notes call out both flags together. + +A core test under `#[cfg(not(any(feature = "contrib-example")))]` (today's form; +the `any(...)` will list every contrib feature once more are added) asserts +`registered_contrib_kinds()` is empty in the slim build. When you add a new +`contrib-` feature, **extend that `cfg` predicate** (see +`native/core/src/execution/planner/contrib.rs`'s `production_build_has_no_contrib_planners_registered`) +to add `feature = "contrib-"` so the canary still compiles on your contrib's CI row. + +## SPI stability + +The contrib SPI is currently **alpha** — minor Comet versions may carry breaking +changes during the early-adopter period. Because contribs ship in-tree (as part of +Comet's release), every Comet build is internally consistent — a `0.18.x` +`comet-spark.jar` is bundled with `0.18.x` contribs. Version-skew concerns +("contrib JAR built against 0.17, Comet runtime 0.18") don't apply. + +What stability guarantees the SPI does aim for: + +- `ParquetDatasourceParams` and `ContribError` are `#[non_exhaustive]` so additive + changes (new fields / variants) are minor bumps, not breaks. Use + `ParquetDatasourceParams::new(...)` + `with_*` setters rather than struct-literal + syntax; consumers of `ContribError` must include a wildcard match arm. +- Scala SPI traits add new methods with default implementations (default `false` / + `None`). Override only the methods you need; an additive method change is a minor + bump. Abstract-method additions are breaking and called out in release notes. +- Releases that change the SPI in a breaking way will say so explicitly. + +## SPI surface + +### JVM side: `org.apache.comet.spi` + +| Trait / Object | Purpose | +|---|---| +| `CometScanRuleExtension` | Intercept scan-tree transformation. See subsections below. | +| `CometOperatorSerdeExtension` | Contribute additional `SparkPlan` class → `CometOperatorSerde` mappings to `CometExecRule`. See subsections below. | +| `CometExtensionRegistry` | Process-wide singleton. `load()` is invoked lazily from `CometScanRule._apply` / `CometExecRule._apply` the first time Comet runs against a Comet-enabled session — sessions that never enable Comet pay zero ServiceLoader cost. Subsequent calls are no-ops. `resetForTesting()` (public) clears the registry between tests. | + +#### `CometScanRuleExtension` + +- `name: String` — human label used in logs and warnings. +- `preTransform(plan, session): SparkPlan` (default identity) — tree-level pre-pass run + once per plan before per-scan dispatch. **V1-only.** Use it to undo wrapper rewrites + applied by your format's own Catalyst strategy (Delta's `PreprocessTableWithDVs` is + the canonical case). Skipped entirely when `spark.comet.scan.enabled=false` — your + wrappers become load-bearing in that mode and stripping them would be a correctness + bug. `CometScanRule` logs a warning when an extension replaces a `FileSourceScanExec` + whose relation it does not claim; this catches accidental cross-format corruption. +- `matchesV1(relation): Boolean` (default `false`) / `transformV1(plan, scanExec, session): Option[SparkPlan]` + — V1 dispatch. Make `matchesV1` cheap (typically a file-format class probe). +- `matchesV2(scanExec): Boolean` (default `false`) / `transformV2(scanExec, session): Option[SparkPlan]` + — V2 dispatch. Unlike V1, `transformV2` does **not** receive a plan-tree reference; + any wrapper-stripping a V2 contrib needs must happen against `scanExec.scan` / + `scanExec.children` directly. + +Dispatch iterates registered extensions in registration order; the first one whose +`match*` returns `true` AND `transform*` returns `Some` wins. `None` from +`transform*` is treated as "decline this instance" and dispatch continues to the next +matching extension before falling back to core. + +Pass state from `preTransform` to `transformV1` via Spark's `TreeNodeTag` mechanism — +do NOT use external mutable state, which leaks across plan invocations. + +#### `CometOperatorSerdeExtension` + +```scala +trait CometOperatorSerdeExtension { + def name: String + def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] +} +``` + +Two dispatch shapes are supported: + +**Class-keyed** — the contrib defines its own `SparkPlan` subclass (typical for +operator-style contribs): + +```scala +case class CometMyFormatScanExec(...) extends CometNativeExec { /* ... */ } + +class MyFormatSerdeExtension extends CometOperatorSerdeExtension { + override def name: String = "myformat" + override def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] = + Map(classOf[CometMyFormatScanExec] -> CometMyFormatScanSerde) +} +``` + +The merged map across all extensions is computed once at registry load time; +`CometExecRule` consults it via `.get(op.getClass)`. Duplicate class keys across +contribs are logged as a warning at load — the convention is **one contrib defines a +class, that contrib owns its serde**. + +**Predicate-keyed (marker-class with scanImpl tag)** — required when the contrib uses +core's `CometScanExec` as a marker disambiguated by a `scanImpl` string. `CometScanExec` +is a Scala case class shared with core, so two contribs marking different tag values +on the same class would otherwise collide. Override `matchOperator` instead of (or in +addition to) populating `serdes`, and declare your tag(s) via `nativeParquetScanImpls` +if your scan goes through Comet's tuned ParquetSource: + +```scala +class MyFormatSerdeExtension extends CometOperatorSerdeExtension { + override def name: String = "myformat" + + // Your contrib's scanImpl marker. Pick a stable string; no central registry of these + // exists in core, but conventionally contribs use snake-case like "native__compat". + private val MyScanImpl = "native_myformat_compat" + + override def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = op match { + case s: CometScanExec if s.scanImpl == MyScanImpl => Some(CometMyFormatScan) + case _ => None + } + + // Tell core's CometScanExec.supportedDataFilters to apply DataFusion-style filter + // exclusions to this tag. Required when your scan goes through Comet's tuned + // ParquetSource (the same path SCAN_NATIVE_DATAFUSION uses). + override def nativeParquetScanImpls: Set[String] = Set(MyScanImpl) +} +``` + +`CometExecRule` checks `matchOperator` only after the class-keyed `serdes` map misses, +so the two patterns coexist. Multiple registered extensions' `matchOperator` calls are +tried in registration order; the first `Some` wins. + +Core's CometConf defines `SCAN_NATIVE_DATAFUSION` / `SCAN_NATIVE_ICEBERG_COMPAT` for +core's own scan variants. Contribs are expected to define their own scanImpl strings +inside their own code (not in `CometConf`); registering via `nativeParquetScanImpls` +is the SPI hook that lets `CometScanExec.supportedDataFilters` apply the right filter +treatment without core needing to know the contrib's tag name. + +##### `CometOperatorSerde[T <: SparkPlan]` contract + +The serde itself lives in `org.apache.comet.serde.CometOperatorSerde` (not in the `spi` +package). Implement four members: + +```scala +class CometMyFormatScanSerde extends CometOperatorSerde[CometMyFormatScanExec] { + override def enabledConfig: Option[ConfigEntry[Boolean]] = + Some(CometConf.COMET_MYFORMAT_ENABLED) + + override def requiresNativeChildren: Boolean = false + + override def getSupportLevel(op: CometMyFormatScanExec): SupportLevel = + Compatible(None) + + override def convert( + op: CometMyFormatScanExec, + builder: Operator.Builder, + childOp: Operator*): Option[Operator] = { + // Build your contrib-private payload message and wrap in ContribOp. + // See "Building a ContribOp envelope" below. + Some(builder + .setContribOp(ContribOp.newBuilder() + .setKind("myformat-scan") + .setPayload(myPayload.toByteString)) + .build()) + } + + override def createExec(nativeOp: Operator, op: CometMyFormatScanExec): CometNativeExec = + new CometMyFormatScanExec(nativeOp, op.output, op.child, /* ... */) +} +``` + +`convert` MUST return `Some(builder.setContribOp(...).build())` for the dispatch to +reach your native planner; returning `None` falls the operator back to Spark. + +### Native side: `comet-contrib-spi` crate + +| Item | Purpose | +|---|---| +| `trait ContribOperatorPlanner` | Implemented by the contrib's native crate. `plan(ctx, payload, children) -> Arc` receives a `&dyn ContribPlannerContext` (handle to core's planner services), the contrib-private payload bytes, and the already-built native children. | +| `trait ContribPlannerContext` | Implemented by core. Exposes the parquet exec builder, expression planner, schema conversion, object-store registration, and the `SessionContext` itself. Contribs reach into core through this trait rather than depending on `datafusion-comet` directly. | +| `struct ParquetDatasourceParams` | `#[non_exhaustive]` argument bundle for the parquet exec builder. Construct via `ParquetDatasourceParams::new(required_schema, object_store_url, file_groups)` and chain `with_*` setters. | +| `register_contrib_planner(kind, planner)` | Process-wide registry. Called from the contrib's `#[ctor::ctor]` at library load. | +| `lookup_contrib_planner_by_kind(kind)` | Used by core's planner; contribs rarely call directly. | +| `registered_contrib_kinds()` | Diagnostic snapshot of registered kinds. | +| `ContribError` | `#[non_exhaustive]` error type. Variants: `Plan(String)`, `BadPayload(String)`, `WrongChildCount { expected: String, actual: usize }`. Pattern matches MUST include a wildcard arm. | +| `ScopedContribPlannerRegistration` | (`#[cfg(any(test, feature = "test-utils"))]`) RAII guard that registers a planner for the lifetime of the guard and removes it on drop. Use in unit tests that exercise dispatch without polluting the global registry. | +| `_clear_for_test()` | (`#[cfg(any(test, feature = "test-utils"))]`) Wipes the registry entirely. **Test escape hatch only** — using it in parallel with other registry consumers is unsafe; prefer `ScopedContribPlannerRegistration`. | + +The SPI crate depends only on `datafusion`, `datafusion-comet-proto`, and +`object_store`. Core links contribs via Cargo feature flags; contribs depend on the SPI +crate; nothing depends back on core from a contrib — the dependency graph is a DAG. + +#### Why `ContribOperatorPlanner` is `Send + Sync` but `ContribPlannerContext` isn't + +The planner trait is stored in an `Arc` inside a process-wide registry shared across +threads, so `Send + Sync` is load-bearing. The context is short-lived: a `&dyn` +reference passed for the duration of one synchronous `plan()` call, so the bound would +only restrict implementations without adding safety. Core's `PhysicalPlanner` carries +JNI handles that aren't `Send`; requiring it would force an `Arc>` dance +for no gain. + +Contribs that want to spawn async work during `plan()` must capture only the +`Arc` (which **is** `Send + Sync`) before crossing a thread boundary — +not the `&dyn ContribPlannerContext` itself. + +#### Why `payload: &[u8]` instead of `Bytes` + +The dispatcher already owns the decoded `ContribOp` proto; passing `&[u8]` is zero-copy +and avoids forcing every contrib to depend on the `bytes` crate. `prost::Message::decode` +accepts `&[u8]` directly. Contribs that want `Bytes` for downstream zero-copy work can +convert via `bytes::Bytes::copy_from_slice(payload)` — one allocation, once per plan +call. + +#### `ContribError::WrongChildCount` convention + +`expected` is a free-form human description; conventionally a phrase like `"exactly 1"` +or `"0 or 1"`. The dispatcher displays: +`wrong child count: expected exactly 1, got 2`. + +#### Error message convention + +The dispatcher wraps every `ContribError` with `format!("contrib planner {kind:?}: {e}")`, +so contribs should NOT re-prefix their messages with their own `kind`. Write: + +```rust +ContribError::Plan(format!("file not found: {path}")) +``` + +not: + +```rust +ContribError::Plan(format!("myformat-scan: file not found: {path}")) // double prefix +``` + +## Proto layer + +Each contrib carries its own `.proto` schema defining the message its `ContribOp.payload` +carries. Both halves of the contrib generate code from the same `.proto` source: + +- **Rust**, in the contrib's `build.rs` via `prost-build`. +- **Java**, in the contrib's `pom.xml` via `protoc-jar-maven-plugin`. + +Use your own proto **package name** (e.g., `comet.contrib.`) so symbols never +collide with core or with other contribs. Add `contrib//native/src/generated/` +to `.gitignore`. + +### Proto, native side + +`contrib/example/native/build.rs` is the template: + +```rust +use std::{fs, io::Result, path::Path}; + +fn main() -> Result<()> { + // rerun-if-changed so cargo rebuilds when you edit your .proto during dev. + println!("cargo:rerun-if-changed=src/proto/"); + + let out_dir = "src/generated"; + if !Path::new(out_dir).is_dir() { + fs::create_dir(out_dir)?; + } + prost_build::Config::new() + .out_dir(out_dir) + .compile_protos(&["src/proto/.proto"], &["src/proto"])?; + Ok(()) +} +``` + +Note: writing into `src/generated/` rather than `$OUT_DIR` is a deliberate deviation +from idiomatic prost. It lets `lib.rs` do +`include!(concat!("generated/", "comet.contrib.example.rs"))` with a stable filesystem +path — convenient for editor tooling. The file is gitignored. + +The contrib's `Cargo.toml` adds `prost-build` to `[build-dependencies]` and `prost` +to `[dependencies]`. + +### Proto, JVM side + +Add `protoc-jar-maven-plugin` to your contrib `pom.xml`, pointing at your `.proto` +schema. Generated Java classes end up under `target/generated-sources/protobuf/java/` +and get compiled into the contrib's JAR by the inherited `scala-maven-plugin`: + +```xml + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + + + + + com.github.os72 + protoc-jar-maven-plugin + ${protoc-jar-maven-plugin.version} + + + generate-sources + run + + com.google.protobuf:protoc:${protobuf.version} + + native/src/proto + + + + + + + +``` + +**Shading is handled automatically.** When the `contrib-` profile on +`spark/pom.xml` bundles your contrib into `comet-spark.jar`, the inherited shade +execution relocates `com.google.protobuf` to `${comet.shade.packageName}.protobuf` +across both your classes and `comet-spark`'s. Don't add your own `maven-shade-plugin` +execution to the contrib pom; that would shade twice and break the runtime types. + +### Building a `ContribOp` envelope + +From your `CometOperatorSerde.convert`: + +```scala +import org.apache.comet.serde.OperatorOuterClass.{ContribOp, Operator} +import comet.contrib.myformat.{MyOpProto} // your generated Java proto + +val payload: MyOpProto = MyOpProto.newBuilder() + .setSomeField(scanState.someField) + .build() + +val envelope = ContribOp.newBuilder() + .setKind("myformat-scan") + .setPayload(payload.toByteString) + .build() + +Some(builder.setContribOp(envelope).build()) +``` + +A note on the proto naming. `operator.proto` declares +`oneof op_struct { ... ContribOp contrib_op = 117; ... }`. So `op_struct` is the +*oneof name* and `contrib_op` is the *field name* on that oneof. The two code +generators surface this differently: + +- **Rust (prost):** pattern-matches as `match operator.op_struct { Some(OpStruct::ContribOp(c)) => ... }`. +- **Java (protoc):** uses the field-name-derived builder method `Operator.Builder.setContribOp(ContribOp)`. + +Both manipulate the same wire-format slot — the difference is purely how the code +generators expose `oneof` membership. + +## Wire-format flow + +1. Your Scala code intercepts a `FileSourceScanExec` (or `BatchScanExec`) matching your + format, returning a `CometMyFormatScanExec` from `transformV1`/`transformV2`. +2. `CometExecRule` later picks up the `CometMyFormatScanExec` instance, finds your serde + via the class-keyed dispatch, and calls `serde.convert(op, builder, childOp...)`. +3. Your `convert` builds a contrib-private proto message (whatever fields you need), + serializes it, wraps in `ContribOp { kind, payload }`, and stuffs it into the + operator builder via `setContribOp`. +4. The proto is shipped through JNI to native. +5. Core's native planner sees `OpStruct::ContribOp`, validates `kind` (non-empty, + under 16 MiB payload, registered), looks up the planner, calls + `planner.plan(ctx, payload, children)`. +6. Your native crate decodes `payload` into your own proto type and returns an + `Arc`. Use `ctx` to reach core's parquet builder, expression + planner, etc. (see the next section). +7. Core wraps the result in a `SparkPlan` and continues planning. + +## Walking a real `plan()` body + +The example contrib's planners return `EmptyExec` — none of the `ContribPlannerContext` +methods are exercised. A file-scan contrib's `plan()` typically threads through all of +them: + +```rust +use std::sync::Arc; +use comet_contrib_spi::{ + ContribError, ContribOperatorPlanner, ContribPlannerContext, ParquetDatasourceParams, +}; +use datafusion::physical_plan::ExecutionPlan; +use prost::Message; + +use crate::proto::MyFormatScan; + +pub struct MyFormatScanPlanner; + +impl ContribOperatorPlanner for MyFormatScanPlanner { + fn plan( + &self, + ctx: &dyn ContribPlannerContext, + payload: &[u8], + _children: Vec>, + ) -> Result, ContribError> { + // 1. Decode your contrib-private payload. + let scan = MyFormatScan::decode(payload) + .map_err(|e| ContribError::BadPayload(format!("decode MyFormatScan: {e}")))?; + + // 2. Translate the Spark proto schemas into Arrow schemas via core. + let required_schema = ctx.convert_spark_schema(&scan.required_schema); + let data_schema = ctx.convert_spark_schema(&scan.data_schema); + let partition_schema = ctx.convert_spark_schema(&scan.partition_schema); + + // 3. Lift Catalyst data-filter Exprs into PhysicalExprs core can execute. + let data_filters = scan + .data_filters + .iter() + .map(|e| ctx.build_physical_expr(e, required_schema.clone())) + .collect::, _>>()?; + + // 4. Register the object store for the scheme + host the files live in. The + // returned ObjectStoreUrl is the canonical key every PartitionedFile in your + // file_groups must reference. The returned Path is only relevant if you are + // constructing PartitionedFiles whose location is rooted at the same prefix; + // most file-scan contribs build per-file Paths from the raw URL inside + // `build_partitioned_files` below and can discard this Path entirely. + let any_file_url = scan.tasks + .first() + .map(|t| t.file_path.clone()) + .ok_or_else(|| ContribError::Plan("empty file list".into()))?; + let object_store_options = scan.object_store_options.clone(); + let (object_store_url, _root_path) = + ctx.prepare_object_store(any_file_url, &object_store_options)?; + + // 5. Build the file_groups: Vec>, one inner Vec per + // desired DataFusion partition. The contrib owns this -- see the helper + // sketch below. + let file_groups = build_partitioned_files(&scan.tasks)?; + + // 6. Hand the bundle to core's tuned ParquetSource. as_str() because + // with_session_timezone takes `impl Into` and `&String` doesn't + // impl that; `&str` does. + let exec = ctx.build_parquet_datasource_exec( + ParquetDatasourceParams::new( + required_schema.clone(), + object_store_url, + file_groups, + ) + .with_data_schema(data_schema) + .with_partition_schema(partition_schema) + .with_data_filters(data_filters) + .with_session_timezone(scan.session_timezone.as_str()) + .with_case_sensitive(scan.case_sensitive), + )?; + + // 7. Optionally wrap the parquet exec in contrib-specific operators + // (e.g. a Delta DV filter). + Ok(exec) + } +} +``` + +### `build_partitioned_files` — contrib-owned helper sketch + +`Vec>` is the format `init_datasource_exec` consumes. Each inner +`Vec` becomes one DataFusion partition; each `PartitionedFile` carries an +`ObjectMeta.location` (a path inside the registered object store) plus optional +partition-column values. Minimal one-file-per-partition implementation: + +```rust +use datafusion::datasource::listing::PartitionedFile; +use object_store::path::Path; +use url::Url; + +fn build_partitioned_files( + tasks: &[crate::proto::FileTask], +) -> Result>, ContribError> { + let mut groups = Vec::with_capacity(tasks.len()); + for task in tasks { + let url = Url::parse(&task.file_path) + .map_err(|e| ContribError::Plan(format!("invalid file URL: {e}")))?; + // Path within the object store -- starts at the bucket root for s3://, + // at the filesystem root for file://, etc. + let path = Path::from_url_path(url.path()) + .map_err(|e| ContribError::Plan(format!("path from URL: {e}")))?; + let mut pf = PartitionedFile::new(String::new(), task.file_size); + pf.object_meta.location = path; + // pf.partition_values = vec![/* ScalarValues per partition column */]; + groups.push(vec![pf]); + } + Ok(groups) +} +``` + +Real-world contribs typically: + +- Combine many small non-partitioned files into a single inner `Vec` (fewer + DataFusion partitions) and split very large files across multiple partitions with + `PartitionedFile::new_with_range`. +- Populate `partition_values` from the format's metadata so partition pruning works. +- Apply format-specific filters (e.g., Delta's pre-materialized deleted-row indexes, + Iceberg's equality deletes) as wrappers around the parquet exec, NOT as + PartitionedFile mutations. + +### Pieces a contrib owns inside itself + +Not exposed through `ContribPlannerContext`: + +- Reading the format's transaction log / manifest (kernel-rs for Delta, iceberg-rust + for Iceberg). +- Resolving file paths to absolute URLs on the driver. +- Computing per-file deletion-vector / equality-delete row indexes. +- Wrapping the parquet exec in a per-row-filter operator if the format needs it. + +Use `ctx` for things that already exist inside core (object-store registry, parquet +plumbing, expression planner); reimplement the format-specific parts in your contrib. + +## `#[ctor]` registration: panic safety + logging + +The contrib's native crate registers its planners during library init via +`#[ctor::ctor]`. Three quirks to get right: + +**Panics in `#[ctor]` abort the JVM process** before `JNI_OnLoad` runs, with no +diagnostic on macOS/Linux. Wrap every ctor body in `std::panic::catch_unwind` and +emit a stderr message on failure: + +```rust +#[ctor::ctor] +fn register() { + let _ = std::panic::catch_unwind(|| { + register_contrib_planner(MY_KIND, Arc::new(MyPlanner)); + }) + .map_err(|panic| { + eprintln!("comet-contrib-myname: #[ctor] panicked: {panic:?}"); + }); +} +``` + +**`log::*!` macros inside `#[ctor]` are no-ops.** Comet's logger is initialised later, +in `Java_org_apache_comet_NativeBase_init`. Any diagnostic you need from the ctor body +must go through `eprintln!`. + +**Cross-platform caveats.** `#[ctor::ctor]` works on Linux / macOS / Windows MSVC, but +the order of ctor execution across rlibs is link-order dependent and not guaranteed +across compiler versions. Your contrib's ctor **MUST NOT** depend on another contrib +already being registered. + +The corresponding JVM rule: **do not call `CometExtensionRegistry.load()` from a +class's static initializer** (Scala `object` init, or a JVM-level static block). Scala +monitors are reentrant so it won't deadlock, but re-entry would observe the partially- +built state and shadow the in-flight publication. + +## Logging conventions + +- **From the contrib's Scala code**: use `org.slf4j.Logger` / Comet's `Logging` trait. + Lifetime-event logs (extension discovered, contrib registered) at INFO; per-plan + decisions at DEBUG; correctness violations at WARN. +- **From the contrib's Rust `#[ctor]`**: `eprintln!` only (logger not yet initialised). +- **From the contrib's Rust `plan()` body and runtime code**: `log::*` macros. Choose a + `target:` matching your crate name so users can filter: + `log::debug!(target: "comet::contrib::myname", "built plan with {n} files")`. +- **Error context**: pre-format error messages with enough context that the dispatcher's + `contrib planner "myname-scan": ` wrapper reads sensibly. Do not + re-prefix with your `kind`. + +## Diagnosing a misconfigured contrib + +The most common first-hour problem is "I packaged my JAR and it does nothing." Three +signals to check: + +- `CometExtensionRegistry` logs at INFO. When discovery runs and finds zero entries, + it emits: + ``` + Comet contrib extensions: none discovered on classpath + (no META-INF/services entries for CometScanRuleExtension or + CometOperatorSerdeExtension) + ``` + Confirm your JAR ships the `META-INF/services/...CometScanRuleExtension` file with + the correct fully-qualified extension class on its own line. +- ServiceLoader instantiation failures are logged at WARN with `Failed to load a + CometScanRuleExtension entry; skipping`. Causes: missing no-arg constructor on the + extension class, exception thrown by the constructor. +- Duplicate-class collisions across contribs are logged at WARN with + `Multiple Comet contrib extensions claim the same exec class ...`. The merged + `CometExecRule` dispatch is last-write-wins on collision; if your contrib's serde + silently stops working when another contrib JAR is present, this is the line to + look for. +- `register_contrib_planner` is last-write-wins on duplicate `kind`. Registration + logs a WARN: `replacing existing planner for kind=...`. Two contribs that both + register `kind="delta-scan"` (the second clobbers the first) will surface here. +- `registered_contrib_kinds()` (Rust) returns the kinds currently registered. If your + contrib's kind is missing under a build that should include it, the Cargo feature is + off or the `extern crate` in `native/core/src/lib.rs` is missing. + +Set the logger for `org.apache.comet.spi.CometExtensionRegistry` to INFO/WARN to surface +both messages. + +### Classloader interaction + +`CometExtensionRegistry.load()` uses `Thread.currentThread().getContextClassLoader()` +first, with `getClass.getClassLoader` as fallback. Either should see Comet and the +contrib JAR in typical Spark deploy modes (`--jars`, `--packages`, application +classpath). Discovery is **lazy** — triggered the first time `CometScanRule._apply` or +`CometExecRule._apply` runs against a Comet-enabled session. By that point all +`--jars`-injected JARs are on the classpath, so order-of-arrival inside the driver +JVM is not a concern. + +## Maven packaging + +Contribs are in-tree only — they ship as part of Comet's release. The contrib's +Maven module produces a standalone JAR (built unconditionally so the workspace stays +consistent), but the JAR is **not deployed**: `maven.deploy.skip=true` inherits from +the parent pom. The contrib's classes reach users through `comet-spark.jar`, which +bundles them via the `contrib-` profile on `spark/pom.xml`. + +If your contrib pulls in a third-party library, declare the dep in your contrib's pom +in `compile` scope (no `provided` — the contrib's classes go through the same shade +execution as core's, and any deps the contrib pulls need to be visible to that shade). +Avoid third-party deps where you can; the more your contrib drags in, the more +likely the shade hits a relocation collision with `comet-spark`'s own includes. + +### Multi-Spark-version support + +Comet itself ships a per-Spark-minor-version artifact via the +`spark.version.short` Maven profile (`3.4`, `3.5`, `4.0`). Your contrib follows the +same model: + +- Pick the matching Spark profile when building (`-Dspark.version.short=3.5`). +- The resulting artifact ID encodes the Spark version + (`comet-contrib--spark3.5_2.13`). +- If your contrib must support multiple Spark minor versions, publish one artifact per + profile, mirroring Comet. Shim code that differs across Spark versions belongs under + `src/main/scala-${shims.majorVerSrc}/` (see Comet's `common/`/`spark/` modules for + the existing pattern). + +## Testing + +`contrib/example/` demonstrates the JVM-side test pattern: + +- A unit test that calls `CometExtensionRegistry.resetForTesting()` and `load()`, + then asserts the contrib's extension is discovered via ServiceLoader. Catches + packaging mistakes (missing `META-INF/services`, wrong class name). +- Per-method unit tests for the extension's `matches*` / `transform*` logic. + +For native unit tests of a `ContribOperatorPlanner`, use `ScopedContribPlannerRegistration` +from `comet-contrib-spi` to install and tear down planners without polluting the +global registry: + +```rust +use comet_contrib_spi::ScopedContribPlannerRegistration; + +#[test] +fn my_planner_round_trip() { + let _guard = ScopedContribPlannerRegistration::new( + "myformat-scan", + Arc::new(MyFormatScanPlanner), + ); + // ... exercise dispatch ... +} +``` + +Pair with `#[serial_test::serial]` if your test asserts on `registered_contrib_kinds()` +(which other tests' guards may be temporarily mutating in parallel). + +### End-to-end (Rust + Scala round-trip) + +A full integration test wires the Spark plan through real JNI and asserts the contrib's +native planner ran: + +1. Build a `SparkSession` configured with `spark.sql.extensions = + org.apache.comet.CometSparkSessionExtensions` and the contrib JAR on the classpath + (sbt: `Test/unmanagedClasspath`; Maven: the contrib's own test scope already has it). +2. Submit a query that hits your format's table reader. +3. Inspect the produced physical plan for your contrib's exec class + (`plan.exists(_.isInstanceOf[CometMyFormatScanExec])`). +4. Run the plan and assert against the result (e.g., a row count that only your native + planner could produce, distinguishable from a Spark fall-back). + +The example contrib's test fixture doubles as smoke coverage for the SPI dispatch path +itself (kind lookup, payload decode, error wrapping) under Comet's own CI when the +`contrib-example` feature is enabled. + +## Payload size cap + +The native dispatcher enforces a hard ceiling of **16 MiB** on `ContribOp.payload` +(`MAX_CONTRIB_PAYLOAD_BYTES` in `native/core/src/execution/planner.rs`). A malformed +JVM-side serde (or one that accidentally accumulates state across plan calls) +producing a larger payload is rejected with a clear error message before the contrib's +`plan()` runs. The cap is comfortably above any plausible file-scan payload (Delta +with ~100k tasks weighs in around 3–4 MiB) and well below "heap pressure" territory. +If your contrib has a legitimate need for a higher ceiling, file an issue with the +size you need and the use case — the cap is a guardrail, not a feature. + +## Registry implementation note + +The native contrib planner registry uses `ArcSwap>>` — +lock-free for readers, RCU swap for writers. Reads on the `ContribOp` dispatch hot +path are a single atomic load plus an `Arc` ref-count bump; there is no +reader-writer contention because writes happen exclusively during library init +(sequential `#[ctor]` registrations, no concurrent writers). Contribs never call +the registry primitives directly. + +## See also + +- [`contrib/example/`](https://github.com/apache/datafusion-comet/tree/main/contrib/example) — + the worked reference. +- [`native/contrib-spi/`](https://github.com/apache/datafusion-comet/tree/main/native/contrib-spi) — + the leaf SPI crate. +- [`spark/src/main/scala/org/apache/comet/spi/`](https://github.com/apache/datafusion-comet/tree/main/spark/src/main/scala/org/apache/comet/spi) — + the JVM SPI traits. diff --git a/native/Cargo.lock b/native/Cargo.lock index df3c3b03c0..187a75665a 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1485,6 +1485,30 @@ dependencies = [ "memchr", ] +[[package]] +name = "comet-contrib-example" +version = "0.17.0" +dependencies = [ + "comet-contrib-spi", + "ctor 0.4.3", + "datafusion", + "datafusion-comet-proto", + "log", + "prost", + "prost-build", +] + +[[package]] +name = "comet-contrib-spi" +version = "0.17.0" +dependencies = [ + "arc-swap", + "datafusion", + "datafusion-comet-proto", + "log", + "object_store", +] + [[package]] name = "comfy-table" version = "7.2.2" @@ -1740,16 +1764,32 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6" +dependencies = [ + "ctor-proc-macro 0.0.6", + "dtor 0.0.6", +] + [[package]] name = "ctor" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" dependencies = [ - "ctor-proc-macro", - "dtor", + "ctor-proc-macro 0.0.7", + "dtor 0.1.1", ] +[[package]] +name = "ctor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2" + [[package]] name = "ctor-proc-macro" version = "0.0.7" @@ -1966,6 +2006,8 @@ dependencies = [ "aws-config", "aws-credential-types", "bytes", + "comet-contrib-example", + "comet-contrib-spi", "criterion", "datafusion", "datafusion-comet-common", @@ -2852,15 +2894,30 @@ dependencies = [ "const-random", ] +[[package]] +name = "dtor" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8" +dependencies = [ + "dtor-proc-macro 0.0.5", +] + [[package]] name = "dtor" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" dependencies = [ - "dtor-proc-macro", + "dtor-proc-macro 0.0.6", ] +[[package]] +name = "dtor-proc-macro" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055" + [[package]] name = "dtor-proc-macro" version = "0.0.6" @@ -4633,7 +4690,7 @@ version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236" dependencies = [ - "ctor", + "ctor 0.6.3", "opendal-core", "opendal-layer-concurrent-limit", "opendal-layer-logging", diff --git a/native/Cargo.toml b/native/Cargo.toml index d1b5c74af9..9c5f5816bd 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -16,8 +16,14 @@ # under the License. [workspace] -default-members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle"] -members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "hdfs", "fs-hdfs"] +default-members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "contrib-spi"] +# `contrib-spi` is the thin SPI surface that BOTH core and contribs depend on -- breaking +# what would otherwise be a cyclic dep between core (links contribs via Cargo features) +# and contribs (need core types). Contrib crates themselves live under +# `../contrib//native` and are workspace members so workspace lockfile + workspace +# dependencies apply; they're NOT default-members because they're consumed via core's +# optional Cargo feature flags rather than built standalone. +members = ["core", "spark-expr", "common", "proto", "jni-bridge", "shuffle", "hdfs", "fs-hdfs", "contrib-spi", "../contrib/example/native"] resolver = "2" [workspace.package] diff --git a/native/contrib-spi/Cargo.toml b/native/contrib-spi/Cargo.toml new file mode 100644 index 0000000000..fbead2c17e --- /dev/null +++ b/native/contrib-spi/Cargo.toml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "comet-contrib-spi" +description = "Stable SPI surface that contrib crates and Comet's core both depend on. Defines the ContribOperatorPlanner trait, ContribPlannerContext, the process-wide registry, and the lightweight error type. Separating this from the core crate breaks what would otherwise be a cyclic dependency (core links contribs via Cargo feature flags; contribs need core types)." +version = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +edition = { workspace = true } + +[dependencies] +# Public types in the SPI reference these crates. Pinning matches core via workspace. +datafusion = { workspace = true } +datafusion-comet-proto = { workspace = true } +# Surface the `Path` type on the SPI's prepare_object_store return value. +object_store = { workspace = true } +log = "0.4" +# Lock-free registry primitive. Reads (per ContribOp dispatch, hot path) are one atomic +# load + ref-count bump; writes (per contrib's #[ctor] at lib init) are an RCU swap. +arc-swap = "1" + +[features] +# Off by default. When enabled, the crate exposes `ScopedContribPlannerRegistration` and +# `_clear_for_test` for downstream test code that needs to register planners without +# polluting the process-wide registry. The same surfaces are unconditionally available +# under `#[cfg(test)]` for the SPI's own unit tests. +test-utils = [] diff --git a/native/contrib-spi/src/lib.rs b/native/contrib-spi/src/lib.rs new file mode 100644 index 0000000000..bf5348fa2c --- /dev/null +++ b/native/contrib-spi/src/lib.rs @@ -0,0 +1,534 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Thin SPI crate shared between Comet's core and every contrib crate. +//! +//! Both core (`datafusion-comet`) and individual contribs (`comet-contrib-example`, +//! `comet-contrib-delta`, ...) depend on THIS crate, NOT on each other. This avoids a +//! cyclic dependency: core wires contribs in via Cargo feature flags, and contribs need +//! the SPI types to implement the trait. With the SPI in a third crate, the dependency +//! graph is a DAG. +//! +//! Surface: +//! * [`ContribOperatorPlanner`] -- the trait contribs implement. +//! * [`ContribPlannerContext`] -- the trait core implements; gives contribs access +//! to the parquet exec builder, expression planner, +//! object-store registration, and session context. +//! * [`ParquetDatasourceParams`] -- argument bundle for the parquet exec builder. +//! * [`register_contrib_planner`] / [`lookup_contrib_planner_by_kind`] -- +//! process-wide registry, expected to be populated +//! from a contrib's `#[ctor]`. +//! * [`registered_contrib_kinds`] -- diagnostics. + +use std::{ + collections::HashMap, + sync::{Arc, OnceLock}, +}; + +use arc_swap::ArcSwap; + +use datafusion::{ + arrow::datatypes::SchemaRef, + common::ScalarValue, + datasource::listing::PartitionedFile, + execution::{context::SessionContext, object_store::ObjectStoreUrl}, + physical_expr::PhysicalExpr, + physical_plan::{expressions::Column, ExecutionPlan}, +}; +use datafusion_comet_proto::{spark_expression, spark_operator}; + +/// Implemented by each contrib. Called from core's planner when an `OpStruct::ContribOp` +/// with the contrib's `kind` is encountered. +/// +/// The contract is intentionally minimal: +/// * `ctx` is a handle to core-side planner services (parquet exec builder, +/// expression planner, object-store registration, session context). Contribs reach +/// into core through this trait rather than depending on core directly, which keeps +/// the dependency graph acyclic. +/// * `payload` is the raw bytes from `ContribOp.payload`. The contrib decodes it into +/// whatever proto / serde format it uses internally; core never inspects. +/// * `children` is the list of already-built native children (in spark-plan child +/// order). The contrib uses these to build its `ExecutionPlan` if it needs child +/// inputs. +/// * The returned `Arc` is the contrib's operator. Core wraps it +/// into a `SparkPlan` and threads it through the rest of the plan tree. +/// +/// Implementations MUST be `Send + Sync` and idempotent -- the same `(payload, children)` +/// must always produce a functionally equivalent plan, so core can cache or re-plan. +pub trait ContribOperatorPlanner: Send + Sync { + fn plan( + &self, + ctx: &dyn ContribPlannerContext, + payload: &[u8], + children: Vec>, + ) -> Result, ContribError>; +} + +/// Argument bundle for [`ContribPlannerContext::build_parquet_datasource_exec`]. Mirrors +/// core's internal `init_datasource_exec` signature one-to-one, so the trait method is a +/// thin forward. +/// +/// `#[non_exhaustive]` so adding fields in future is a minor SemVer bump, not a break. +/// Contribs construct via [`ParquetDatasourceParams::new`] (required fields only) + +/// `with_*` builder setters; never by struct-literal syntax. +/// +/// `session_timezone` is owned (`String`) so contribs can pass a runtime-computed value +/// (from a session config lookup) without juggling lifetimes. The string is one-time +/// per plan call, so the allocation is negligible. +#[non_exhaustive] +pub struct ParquetDatasourceParams { + pub required_schema: SchemaRef, + pub data_schema: Option, + pub partition_schema: Option, + pub object_store_url: ObjectStoreUrl, + pub file_groups: Vec>, + pub projection_vector: Option>, + pub data_filters: Option>>, + pub default_values: Option>, + pub session_timezone: String, + pub case_sensitive: bool, + pub return_null_struct_if_all_fields_missing: bool, + pub encryption_enabled: bool, + pub use_field_id: bool, + pub ignore_missing_field_id: bool, +} + +impl ParquetDatasourceParams { + /// Minimal constructor with the parameters every parquet scan needs. All `Option`s + /// default to `None`, all `bool`s to `false`, and `session_timezone` to `"UTC"`. Use + /// the `with_*` setters to populate the rest. + pub fn new( + required_schema: SchemaRef, + object_store_url: ObjectStoreUrl, + file_groups: Vec>, + ) -> Self { + Self { + required_schema, + data_schema: None, + partition_schema: None, + object_store_url, + file_groups, + projection_vector: None, + data_filters: None, + default_values: None, + session_timezone: "UTC".to_string(), + case_sensitive: false, + return_null_struct_if_all_fields_missing: false, + encryption_enabled: false, + use_field_id: false, + ignore_missing_field_id: false, + } + } + + pub fn with_data_schema(mut self, schema: SchemaRef) -> Self { + self.data_schema = Some(schema); + self + } + pub fn with_partition_schema(mut self, schema: SchemaRef) -> Self { + self.partition_schema = Some(schema); + self + } + pub fn with_projection_vector(mut self, projection: Vec) -> Self { + self.projection_vector = Some(projection); + self + } + pub fn with_data_filters(mut self, filters: Vec>) -> Self { + self.data_filters = Some(filters); + self + } + pub fn with_default_values(mut self, values: HashMap) -> Self { + self.default_values = Some(values); + self + } + /// Accepts anything that can be turned into a `String` -- string literals, + /// `&str` borrowed from session config, owned `String`s -- without lifetime games. + pub fn with_session_timezone(mut self, tz: impl Into) -> Self { + self.session_timezone = tz.into(); + self + } + pub fn with_case_sensitive(mut self, b: bool) -> Self { + self.case_sensitive = b; + self + } + pub fn with_return_null_struct_if_all_fields_missing(mut self, b: bool) -> Self { + self.return_null_struct_if_all_fields_missing = b; + self + } + pub fn with_encryption_enabled(mut self, b: bool) -> Self { + self.encryption_enabled = b; + self + } + pub fn with_use_field_id(mut self, b: bool) -> Self { + self.use_field_id = b; + self + } + pub fn with_ignore_missing_field_id(mut self, b: bool) -> Self { + self.ignore_missing_field_id = b; + self + } +} + +/// Planner services exposed by core to contribs. Core implements this trait against its +/// `PhysicalPlanner` + `SessionContext`; contribs receive a `&dyn ContribPlannerContext` +/// in their [`ContribOperatorPlanner::plan`] call and reach into core through it. +/// +/// All trait methods are infallible at the trait-bound level but return `ContribError` +/// for runtime failures, so contribs can propagate without converting between error +/// types. +// Note: no `Send + Sync` bound -- `&dyn ContribPlannerContext` is only held for the +// duration of a synchronous `plan()` call, so it doesn't need to cross threads. The +// natural core-side impl borrows the `PhysicalPlanner` (which carries JNI handles that +// aren't `Send`), and adding the bound here would force an awkward `Arc>` +// dance for no gain. +pub trait ContribPlannerContext { + /// The session context the plan is being built under. Contribs need this to register + /// object stores on `runtime_env()` and to read session-level configs (timezone, + /// case sensitivity, etc) that aren't already on `ParquetDatasourceParams`. + fn session_ctx(&self) -> &Arc; + + /// Convert a Catalyst-side Spark expression proto into a DataFusion `PhysicalExpr` + /// against the given input schema. Used by file-scan contribs to convert data + /// filters from their proto-side `Expr` form into the typed `PhysicalExpr`s that + /// `ParquetSource` consumes. + fn build_physical_expr( + &self, + expr: &spark_expression::Expr, + input_schema: SchemaRef, + ) -> Result, ContribError>; + + /// Convert a slice of Spark struct fields (the proto representation of a Spark + /// schema) into an Arrow `SchemaRef`. This is a pure proto-to-arrow conversion -- + /// no side effects, no session state. + fn convert_spark_schema(&self, fields: &[spark_operator::SparkStructField]) -> SchemaRef; + + /// Register an object store on the runtime env for the given URL's scheme + bucket, + /// using `object_store_configs` for credentials / endpoint overrides. Returns + /// `(ObjectStoreUrl, Path)`: the URL the contrib attaches to its `PartitionedFile`s, + /// and the canonical path within that store (caller may discard if not needed -- + /// most file-scan contribs use it to set `partitioned_file.object_meta.location`). + fn prepare_object_store( + &self, + any_file_url: String, + object_store_configs: &HashMap, + ) -> Result<(ObjectStoreUrl, object_store::path::Path), ContribError>; + + /// Build a `DataSourceExec` over Comet's tuned `ParquetSource`. This is the single + /// most important method on the trait -- every file-scan contrib (Delta, Iceberg) + /// goes through here so the contrib doesn't have to rebuild Comet's parquet plumbing. + fn build_parquet_datasource_exec( + &self, + params: ParquetDatasourceParams, + ) -> Result, ContribError>; +} + +/// Error type returned by [`ContribOperatorPlanner::plan`] and the trait methods on +/// [`ContribPlannerContext`]. Kept distinct from core's `ExecutionError` so this crate +/// stays free of core's dependency tree. Core converts `ContribError` into its own +/// `ExecutionError` at the dispatch site. +/// +/// `#[non_exhaustive]` so adding variants in the future is a minor SemVer bump, not a +/// break. Pattern matchers in contribs MUST include a wildcard arm. +#[non_exhaustive] +#[derive(Debug)] +pub enum ContribError { + /// Generic failure. Use this for cases that don't fit the more specific variants. + Plan(String), + /// The contrib received a payload it couldn't decode (wrong proto schema, missing + /// required field, etc.). + BadPayload(String), + /// The contrib received a child count it can't handle. `expected` is a free-form + /// human description, conventionally a phrase like `"exactly 1"` or `"0 or 1"` so + /// the error message reads `wrong child count: expected exactly 1, got 2`. + WrongChildCount { expected: String, actual: usize }, +} + +impl std::fmt::Display for ContribError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ContribError::Plan(msg) => write!(f, "{msg}"), + ContribError::BadPayload(msg) => write!(f, "bad payload: {msg}"), + ContribError::WrongChildCount { expected, actual } => { + write!(f, "wrong child count: expected {expected}, got {actual}") + } + // Defense for external callers reading `Display` after a future variant is + // added under #[non_exhaustive]: their `match` is non-exhaustive even with + // a wildcard, but our `Display` impl always falls through to the Debug repr + // so the dispatcher's `format!("contrib planner ...: {e}")` still produces + // something useful. (Note: inside this crate the wildcard is unreachable + // because #[non_exhaustive] is only enforced across crate boundaries -- + // adding a variant here will require an explicit arm anyway. The wildcard + // exists to keep downstream `Display`-as-source consumers working.) + #[allow(unreachable_patterns)] + other => write!(f, "{other:?}"), + } + } +} + +impl std::error::Error for ContribError {} + +/// Process-wide registry of contrib operator planners, keyed by `ContribOp.kind`. +/// +/// `ArcSwap>` gives lock-free reads (one atomic load + Arc ref-count bump) +/// on the dispatch hot path. Writes happen exclusively during library init from +/// `#[ctor]`s (sequential, single-threaded) and use `rcu` to swap an updated map atom. +/// The init-once / read-many access pattern is exactly what `ArcSwap` is designed for; +/// the previous `RwLock` would have introduced reader-writer contention for +/// no gain since there are effectively no concurrent writes. +type RegistryMap = HashMap>; + +fn registry() -> &'static ArcSwap { + static REGISTRY: OnceLock> = OnceLock::new(); + REGISTRY.get_or_init(|| ArcSwap::from_pointee(HashMap::new())) +} + +/// Register a contrib operator planner under the given `kind` identifier. Last-write-wins +/// on duplicates (logged as a warning). Thread-safe; intended to be called from a +/// contrib's `#[ctor]` constructor at library-init time. +pub fn register_contrib_planner( + kind: impl Into, + planner: Arc, +) { + let kind = kind.into(); + registry().rcu(|current| { + let mut new_map: RegistryMap = (**current).clone(); + if new_map.contains_key(&kind) { + log::warn!( + "register_contrib_planner: replacing existing planner for kind={kind:?}; \ + second registration usually indicates a misconfigured test harness" + ); + } + new_map.insert(kind.clone(), Arc::clone(&planner)); + new_map + }); +} + +/// Look up the contrib planner registered for `kind`, or `None` if no contrib is loaded +/// for that operator. Core's dispatcher uses this to route `OpStruct::ContribOp` payloads. +pub fn lookup_contrib_planner_by_kind(kind: &str) -> Option> { + registry().load().get(kind).cloned() +} + +/// Return a snapshot of all registered contrib kinds, for diagnostics and tests. +pub fn registered_contrib_kinds() -> Vec { + let snapshot = registry().load(); + let mut kinds: Vec = snapshot.keys().cloned().collect(); + kinds.sort(); + kinds +} + +/// RAII guard that registers a planner for the lifetime of the guard and removes it on +/// drop. Use in tests that want a planner registered without polluting the process +/// registry for other tests running in parallel. +/// +/// Not `Send` because dropping it requires the registry write lock; tests using this +/// guard should mark themselves `#[serial_test::serial]` if they assert on +/// `registered_contrib_kinds()` (whose snapshot is affected by other threads' guards). +#[cfg(any(test, feature = "test-utils"))] +pub struct ScopedContribPlannerRegistration { + kind: String, + previous: Option>, +} + +#[cfg(any(test, feature = "test-utils"))] +impl ScopedContribPlannerRegistration { + /// Install `planner` under `kind` for the lifetime of the returned guard. The + /// previously-registered planner (if any) is restored on drop. + pub fn new(kind: impl Into, planner: Arc) -> Self { + let kind = kind.into(); + // Snapshot the previous binding BEFORE the rcu so retries (under contention) don't + // observe our own write as the previous value. + let previous = registry().load().get(&kind).cloned(); + registry().rcu(|current| { + let mut new_map: RegistryMap = (**current).clone(); + new_map.insert(kind.clone(), Arc::clone(&planner)); + new_map + }); + Self { kind, previous } + } +} + +#[cfg(any(test, feature = "test-utils"))] +impl Drop for ScopedContribPlannerRegistration { + fn drop(&mut self) { + let kind = std::mem::take(&mut self.kind); + let previous = self.previous.take(); + registry().rcu(|current| { + let mut new_map: RegistryMap = (**current).clone(); + match &previous { + Some(prev) => { + new_map.insert(kind.clone(), Arc::clone(prev)); + } + None => { + new_map.remove(&kind); + } + } + new_map + }); + } +} + +/// Clear the registry. **Test-only escape hatch.** Use [`ScopedContribPlannerRegistration`] +/// instead in any test that runs in parallel with other registry users -- this function +/// removes the entries every other concurrent test depends on. +#[cfg(any(test, feature = "test-utils"))] +pub fn _clear_for_test() { + registry().store(Arc::new(HashMap::new())); +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion::arrow::datatypes::Schema; + use datafusion::physical_plan::empty::EmptyExec; + use std::sync::Arc; + + struct AlwaysEmpty; + impl ContribOperatorPlanner for AlwaysEmpty { + fn plan( + &self, + _ctx: &dyn ContribPlannerContext, + _payload: &[u8], + _children: Vec>, + ) -> Result, ContribError> { + Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty())))) + } + } + + // Use globally-unique kinds so concurrent tests in the same binary don't collide + // through the process-wide registry. The `_test_` prefix is reserved for unit tests. + + #[test] + fn unknown_kind_returns_none() { + // Independent of any registrations: a kind no one ever registers stays None. + let probe = "_test_definitely_unregistered_a8f3c1e"; + assert!(lookup_contrib_planner_by_kind(probe).is_none()); + } + + #[test] + fn scoped_registration_round_trip() { + let kind = "_test_scoped_registration_a"; + assert!(lookup_contrib_planner_by_kind(kind).is_none()); + { + let _guard = ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty)); + assert!(lookup_contrib_planner_by_kind(kind).is_some()); + } + // Dropping the guard removes the entry. + assert!(lookup_contrib_planner_by_kind(kind).is_none()); + } + + #[test] + fn scoped_registration_restores_previous() { + let kind = "_test_scoped_registration_b"; + let _outer = + ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty)); + { + // Inner guard temporarily replaces the outer planner; drop restores outer. + let _inner = + ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty)); + assert!(lookup_contrib_planner_by_kind(kind).is_some()); + } + assert!(lookup_contrib_planner_by_kind(kind).is_some()); + } + + #[test] + fn parquet_datasource_params_constructor_defaults() { + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::execution::object_store::ObjectStoreUrl; + + let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new( + "id", + DataType::Int64, + false, + )])); + let url = ObjectStoreUrl::parse("file://").unwrap(); + let p = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]); + + assert_eq!(p.required_schema.fields().len(), 1); + assert!(p.data_schema.is_none()); + assert!(p.partition_schema.is_none()); + assert!(p.projection_vector.is_none()); + assert!(p.data_filters.is_none()); + assert!(p.default_values.is_none()); + assert_eq!(p.session_timezone, "UTC"); + assert!(!p.case_sensitive); + assert!(!p.return_null_struct_if_all_fields_missing); + assert!(!p.encryption_enabled); + assert!(!p.use_field_id); + assert!(!p.ignore_missing_field_id); + } + + #[test] + fn parquet_datasource_params_setters_apply() { + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::execution::object_store::ObjectStoreUrl; + + let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new( + "id", + DataType::Int64, + false, + )])); + let url = ObjectStoreUrl::parse("file://").unwrap(); + let p = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]) + .with_data_schema(Arc::clone(&schema)) + .with_session_timezone("America/Los_Angeles") + .with_case_sensitive(true) + .with_use_field_id(true) + .with_ignore_missing_field_id(true) + .with_encryption_enabled(true); + + // Distinguishable bool tuple: a swap in `init_datasource_exec`'s arg order + // would fail this assertion in core's planner::contrib tests. + assert_eq!(p.session_timezone, "America/Los_Angeles"); + assert!(p.case_sensitive); + assert!(!p.return_null_struct_if_all_fields_missing); + assert!(p.encryption_enabled); + assert!(p.use_field_id); + assert!(p.ignore_missing_field_id); + assert!(p.data_schema.is_some()); + } + + #[test] + fn contrib_error_display_preserves_variant_info() { + // The dispatcher wraps `e` via Display: `format!("contrib planner {kind:?}: {e}")`. + // These cases assert each variant's discriminating info survives that path. + let plan = ContribError::Plan("plan-context-message".into()).to_string(); + assert!(plan.contains("plan-context-message")); + + let bad = ContribError::BadPayload("decoding failed at field 7".into()).to_string(); + assert!(bad.starts_with("bad payload: ")); + assert!(bad.contains("decoding failed at field 7")); + + let wcc = ContribError::WrongChildCount { + expected: "exactly 1".into(), + actual: 3, + } + .to_string(); + assert!(wcc.contains("exactly 1")); + assert!(wcc.contains("got 3")); + } + + #[test] + fn registered_contrib_kinds_reflects_current_state() { + let kind = "_test_kinds_snapshot_only"; + let _guard = ScopedContribPlannerRegistration::new(kind, Arc::new(AlwaysEmpty)); + let kinds = registered_contrib_kinds(); + assert!( + kinds.iter().any(|k| k == kind), + "expected snapshot to include {kind:?}, got {kinds:?}" + ); + } +} diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 4fb3ed4c5d..0f42b2a6e1 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -73,6 +73,11 @@ reqwest = { version = "0.12", default-features = false, features = ["rustls-tls- object_store_opendal = { version = "0.56.0", optional = true } hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]} opendal = { version = "0.56.0", optional = true, features = ["services-hdfs"] } +# Contrib rlibs. Each is gated by a matching `contrib-` Cargo feature defined +# below in [features]. When the feature is on, the contrib's rlib is linked into core's +# cdylib and its #[ctor] runs at library load. +comet-contrib-spi = { path = "../contrib-spi" } +comet-contrib-example = { path = "../../contrib/example/native", optional = true } iceberg = { workspace = true } iceberg-storage-opendal = { workspace = true } serde_json = "1.0" @@ -95,11 +100,23 @@ datafusion-functions-nested = { version = "53.1.0" } [features] backtrace = ["datafusion/backtrace"] +# Released cdylib ships with hdfs-opendal only -- no contrib surface. This keeps +# `registered_contrib_kinds()` empty in production so users see only the contribs they +# explicitly opted into (Delta, Iceberg, ...). CI / dev builds turn on `contrib-example` +# (and the example's unit tests run under its own crate's test profile, which always +# links the example regardless of this list). default = ["hdfs-opendal"] hdfs = ["datafusion-comet-objectstore-hdfs"] hdfs-opendal = ["opendal", "object_store_opendal", "hdfs-sys"] jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"] +# Contrib feature flags. Each flag pulls a contrib rlib into core's cdylib so contrib +# Rust code is linked into the single libcomet at build time; the contrib's #[ctor] +# registers its operator planners during library init. The single-cdylib architecture +# (rather than separate cdylib per contrib) is documented in +# docs/source/contributor-guide/contrib-extensions.md. +contrib-example = ["dep:comet-contrib-example"] + # exclude optional packages from cargo machete verifications [package.metadata.cargo-machete] ignored = ["hdfs-sys", "paste"] diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index f5b04cc51d..c35a9a6e25 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -232,6 +232,11 @@ fn op_name(op: &OpStruct) -> &'static str { OpStruct::Explode(_) => "Explode", OpStruct::CsvScan(_) => "CsvScan", OpStruct::ShuffleScan(_) => "ShuffleScan", + // Contrib operators carry their concrete identity in `ContribOp.kind`, but + // `op_name` returns `&'static str` for tracing/error messages. Keep the label + // generic here; downstream code that needs the specific contrib reads `kind` + // off the proto directly. + OpStruct::ContribOp(_) => "ContribOp", } } diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index b00f140026..b3b18e9f75 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -17,6 +17,7 @@ //! Converts Spark physical plan to DataFusion physical plan +pub mod contrib; pub mod expression_registry; pub mod macros; pub mod operator_registry; @@ -1959,6 +1960,83 @@ impl PhysicalPlanner { )), )) } + OpStruct::ContribOp(contrib_op) => { + // Dispatch the ContribOp envelope to a contrib-registered planner keyed + // by `kind`. The contrib's #[ctor] in its rlib (linked into core's cdylib + // via a Cargo feature flag) populates the registry at lib-init time, so + // by the time we reach this arm the registry is already warm. Missing + // registrations typically mean the JVM JAR is on the classpath but core + // was built without the corresponding `contrib-` Cargo feature. + use crate::execution::planner::contrib::{ + lookup_contrib_planner_by_kind, CorePlannerContext, + }; + let kind = contrib_op.kind.as_str(); + if kind.trim().is_empty() { + return Err(GeneralError(format!( + "ContribOp.kind={kind:?} is empty or whitespace -- the JVM-side \ + serde produced a malformed envelope (every contrib must set a \ + stable kind string)" + ))); + } + + // Look up the planner first so a bogus kind produces the "not registered" + // error rather than a misleading "payload too big" one (in case the kind + // is garbage and the payload also happens to be oversized). + let planner = lookup_contrib_planner_by_kind(kind).ok_or_else(|| { + GeneralError(format!( + "No contrib planner registered for ContribOp.kind={kind:?}; \ + did you build core with the corresponding `contrib-{kind}` \ + Cargo feature (or its workspace equivalent)?" + )) + })?; + + // Payload-size guard. By the time we get here prost has already decoded + // `contrib_op.payload` into a heap-allocated Vec, so this guard does + // NOT fence the proto-decode allocation itself. What it does fence: the + // contrib's plan() body from being invoked with an absurd payload, which + // typically does its own prost decode against contrib-private types -- + // potentially several more allocations. 16 MiB is comfortably above any + // plausible file-scan payload (Delta with 100k tasks weighs in around + // 3-4 MiB) and well below "we should be worried about heap pressure". + // Moving the check pre-decode would require a streaming Operator parser; + // not worth the complexity given typical payloads are <1 MiB. + const MAX_CONTRIB_PAYLOAD_BYTES: usize = 16 * 1024 * 1024; + if contrib_op.payload.len() > MAX_CONTRIB_PAYLOAD_BYTES { + return Err(GeneralError(format!( + "ContribOp.kind={kind:?} payload size {} bytes exceeds limit \ + of {} bytes -- inspect the contrib's serde for accidental \ + data accumulation", + contrib_op.payload.len(), + MAX_CONTRIB_PAYLOAD_BYTES, + ))); + } + + // Recursively build native children. The contrib gets them as + // `Arc` rather than the richer `SparkPlan` because the + // SPI is intentionally minimal — contribs only need the DataFusion-level + // plan surface. + let mut child_scans: Vec = Vec::new(); + let mut child_shuffle_scans: Vec = Vec::new(); + let mut native_children: Vec> = Vec::new(); + for child in &spark_plan.children { + let (mut s, mut ss, child_plan) = + self.create_plan(child, inputs, partition_count)?; + child_scans.append(&mut s); + child_shuffle_scans.append(&mut ss); + native_children.push(child_plan.native_plan.clone()); + } + + let ctx = CorePlannerContext { planner: self }; + let exec = planner + .plan(&ctx, &contrib_op.payload, native_children) + .map_err(|e| GeneralError(format!("contrib planner {kind:?}: {e}")))?; + + Ok(( + child_scans, + child_shuffle_scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, exec, vec![])), + )) + } _ => Err(GeneralError(format!( "Unsupported or unregistered operator type: {:?}", spark_plan.op_struct diff --git a/native/core/src/execution/planner/contrib.rs b/native/core/src/execution/planner/contrib.rs new file mode 100644 index 0000000000..1cf9c15179 --- /dev/null +++ b/native/core/src/execution/planner/contrib.rs @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Re-exports + core-side `ContribPlannerContext` adapter. +//! +//! The SPI trait + registry live in the standalone `comet-contrib-spi` crate so both +//! core and contribs can depend on them without forming a dependency cycle (core links +//! contribs via Cargo feature flags, contribs need the SPI types). This module: +//! +//! 1. re-exports the parts of the SPI core itself imports, so existing +//! `crate::execution::planner::contrib::...` paths keep resolving; +//! 2. provides `CorePlannerContext`, a thin adapter that lets a `&PhysicalPlanner` be +//! passed to contribs as a `&dyn ContribPlannerContext`. + +use std::collections::HashMap; +use std::sync::Arc; + +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::execution::context::SessionContext; +use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_comet_proto::{spark_expression, spark_operator}; + +pub use comet_contrib_spi::lookup_contrib_planner_by_kind; +#[allow(unused_imports)] // surfaced for tests + diagnostics +pub use comet_contrib_spi::{ + register_contrib_planner, registered_contrib_kinds, ContribError, ContribOperatorPlanner, + ContribPlannerContext, ParquetDatasourceParams, +}; + +use crate::execution::planner::PhysicalPlanner; +use crate::parquet::parquet_exec::init_datasource_exec; +use crate::parquet::parquet_support::prepare_object_store_with_configs; + +/// Adapter that exposes a `&PhysicalPlanner` (plus the session_ctx it carries) as a +/// `ContribPlannerContext`. Construction is cheap -- just borrows the planner. The +/// dispatcher creates one per ContribOp arm. +pub(crate) struct CorePlannerContext<'a> { + pub(crate) planner: &'a PhysicalPlanner, +} + +impl ContribPlannerContext for CorePlannerContext<'_> { + fn session_ctx(&self) -> &Arc { + self.planner.session_ctx() + } + + fn build_physical_expr( + &self, + expr: &spark_expression::Expr, + input_schema: SchemaRef, + ) -> Result, ContribError> { + self.planner + .create_expr(expr, input_schema) + .map_err(|e| ContribError::Plan(format!("create_expr: {e}"))) + } + + fn convert_spark_schema(&self, fields: &[spark_operator::SparkStructField]) -> SchemaRef { + super::convert_spark_types_to_arrow_schema(fields) + } + + fn prepare_object_store( + &self, + url: String, + configs: &HashMap, + ) -> Result<(ObjectStoreUrl, object_store::path::Path), ContribError> { + prepare_object_store_with_configs(self.planner.session_ctx().runtime_env(), url, configs) + .map_err(|e| ContribError::Plan(format!("prepare_object_store_with_configs: {e}"))) + } + + fn build_parquet_datasource_exec( + &self, + params: ParquetDatasourceParams, + ) -> Result, ContribError> { + init_datasource_exec( + params.required_schema, + params.data_schema, + params.partition_schema, + params.object_store_url, + params.file_groups, + params.projection_vector, + params.data_filters, + params.default_values, + ¶ms.session_timezone, + params.case_sensitive, + params.return_null_struct_if_all_fields_missing, + self.planner.session_ctx(), + params.encryption_enabled, + params.use_field_id, + params.ignore_missing_field_id, + ) + .map(|e| e as Arc) + .map_err(|e| ContribError::Plan(format!("init_datasource_exec: {e}"))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::execution::planner::PhysicalPlanner; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::execution::context::SessionContext; + use datafusion::execution::object_store::ObjectStoreUrl; + + /// Production-build assertion: when no contrib feature is enabled, the registry + /// must be empty. Catches an accidental re-introduction of a contrib into core's + /// `default = [...]` feature set. Compiled out under any active contrib feature + /// (the test binary always links its crate's dependencies, so the assertion would + /// be wrong under those flags). + /// + /// MAINTENANCE: when adding a new `contrib-` feature to `native/core/Cargo.toml`, + /// extend the `not(any(...))` predicate below with the new feature name so the + /// canary still compiles under that contrib's standalone CI matrix entry. + #[cfg(not(any(feature = "contrib-example")))] + #[test] + fn production_build_has_no_contrib_planners_registered() { + // Direct read through the SPI's public API. This test is the canary for + // the contributor-guide claim that release builds carry zero contrib surface. + let kinds = comet_contrib_spi::registered_contrib_kinds(); + assert!( + kinds.is_empty(), + "default cdylib leaked contrib planners: {kinds:?}. \ + Check native/core/Cargo.toml's `default = [...]` for contrib features." + ); + } + + #[test] + fn core_planner_context_builds_parquet_exec_with_expected_schema() { + // Smoke test for the adapter: build a minimal DataSourceExec through the SPI + // trait method and verify the schema flowed through. Catches a coarse class of + // bugs where init_datasource_exec call-site args go out of order -- a swap that + // sent `required_schema` into the `data_schema` slot would produce a different + // output schema. + let session_ctx = Arc::new(SessionContext::new()); + let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0); + let ctx = CorePlannerContext { planner: &planner }; + + let schema: SchemaRef = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + ])); + let url = ObjectStoreUrl::parse("file://").unwrap(); + let params = ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]) + .with_session_timezone("UTC") + .with_case_sensitive(true); + + let exec = ctx + .build_parquet_datasource_exec(params) + .expect("adapter should build a DataSourceExec"); + + // The exec's reported schema must equal the required_schema we passed in. + let out_schema = exec.schema(); + assert_eq!(out_schema.fields().len(), 2); + assert_eq!(out_schema.field(0).name(), "id"); + assert_eq!(out_schema.field(1).name(), "name"); + } + + #[test] + fn core_planner_context_session_ctx_round_trip() { + let session_ctx = Arc::new(SessionContext::new()); + let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0); + let ctx = CorePlannerContext { planner: &planner }; + // Arc identity check -- the contrib gets back the same SessionContext core was + // built with, not a copy. + assert!(Arc::ptr_eq(ctx.session_ctx(), &session_ctx)); + } + + #[test] + fn core_planner_context_converts_empty_schema() { + let session_ctx = Arc::new(SessionContext::new()); + let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0); + let ctx = CorePlannerContext { planner: &planner }; + let schema = ctx.convert_spark_schema(&[]); + assert_eq!(schema.fields().len(), 0); + } + + #[test] + fn core_planner_context_encryption_flag_reaches_init_datasource_exec() { + // Cross-crate positional-arg-swap guard. `init_datasource_exec` takes five `bool` + // parameters in a row (case_sensitive, return_null_struct_..., encryption_enabled, + // use_field_id, ignore_missing_field_id); a swap of two of them at the call site + // in `build_parquet_datasource_exec` would compile fine and break silently. We + // exploit the asymmetry that `encryption_enabled=true` triggers an encryption- + // factory lookup that fails when no factory is registered, while every other + // bool being `true` keeps the call succeeding. So: + // * Default (all bools false) -> Ok + // * Same call with `encryption_enabled=true` -> Err on factory lookup + // If a swap accidentally routed e.g. `use_field_id` into the encryption slot, the + // "default" variant below would fail (because use_field_id is true here in the + // params struct, and the swapped slot would now enable encryption). + // + // SCOPE: this test catches swaps that involve the `encryption_enabled` slot. + // Swaps among the other four bools (case_sensitive / return_null_... / + // use_field_id / ignore_missing_field_id) are NOT caught -- the two witnesses + // below either set all four to true (witness #1) or all four to false + // (witness #2), so a permutation among them is invisible. Adding a new bool to + // ParquetDatasourceParams / init_datasource_exec should be accompanied by a new + // asymmetry witness that exercises THAT new flag. + let session_ctx = Arc::new(SessionContext::new()); + let planner = PhysicalPlanner::new(Arc::clone(&session_ctx), 0); + let ctx = CorePlannerContext { planner: &planner }; + + let schema: SchemaRef = Arc::new(Schema::new(vec![Field::new( + "id", + DataType::Int64, + false, + )])); + let url = ObjectStoreUrl::parse("file://").unwrap(); + + // Witness #1: all five bools `true` EXCEPT encryption_enabled. Must succeed -- + // confirms case_sensitive / use_field_id / etc. are NOT routed into the + // encryption slot. + let no_encryption = ParquetDatasourceParams::new(Arc::clone(&schema), url.clone(), vec![]) + .with_case_sensitive(true) + .with_return_null_struct_if_all_fields_missing(true) + .with_use_field_id(true) + .with_ignore_missing_field_id(true) + .with_encryption_enabled(false); + ctx.build_parquet_datasource_exec(no_encryption) + .expect("encryption_enabled=false must not trigger factory lookup"); + + // Witness #2: only encryption_enabled is true. Must fail with the encryption-factory + // not-found error. Confirms encryption_enabled actually reaches the encryption slot. + let with_encryption = + ParquetDatasourceParams::new(Arc::clone(&schema), url, vec![]).with_encryption_enabled(true); + let err = ctx + .build_parquet_datasource_exec(with_encryption) + .expect_err("encryption_enabled=true should fail without a factory"); + let msg = format!("{err}"); + assert!( + msg.contains("encryption") || msg.contains("Encryption") || msg.contains("factory"), + "expected encryption-factory error, got: {msg}" + ); + } +} diff --git a/native/core/src/execution/planner/operator_registry.rs b/native/core/src/execution/planner/operator_registry.rs index eb31184461..302c3c9489 100644 --- a/native/core/src/execution/planner/operator_registry.rs +++ b/native/core/src/execution/planner/operator_registry.rs @@ -151,5 +151,34 @@ fn get_operator_type(spark_operator: &Operator) -> Option { OpStruct::Explode(_) => None, // Not yet in OperatorType enum OpStruct::CsvScan(_) => Some(OperatorType::CsvScan), OpStruct::ShuffleScan(_) => None, // Not yet in OperatorType enum + // Contrib operators go through the contrib registry instead, keyed by + // ContribOp.kind. Returning None here keeps `OperatorRegistry::can_handle` false + // for contribs so they don't get caught by the in-tree registry; the dispatch + // arm in `planner.rs` for OpStruct::ContribOp handles them explicitly via + // `lookup_contrib_planner_by_kind`. + OpStruct::ContribOp(_) => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion_comet_proto::spark_operator::{operator::OpStruct, ContribOp, Operator}; + + #[test] + fn contrib_op_is_not_handled_by_in_tree_registry() { + // Guard against a future refactor that wires ContribOp into the in-tree + // operator registry by accident (which would double-dispatch contribs). + let op = Operator { + op_struct: Some(OpStruct::ContribOp(ContribOp { + kind: "anything".into(), + payload: vec![], + })), + ..Default::default() + }; + assert!( + get_operator_type(&op).is_none(), + "ContribOp must not be mapped to an in-tree OperatorType" + ); } } diff --git a/native/core/src/lib.rs b/native/core/src/lib.rs index 7d0b6a5454..4d74a7f52f 100644 --- a/native/core/src/lib.rs +++ b/native/core/src/lib.rs @@ -29,6 +29,13 @@ extern crate core; #[macro_use] extern crate datafusion_comet_jni_bridge; +// Pull in contrib crates so their #[ctor] registration runs when libcomet is loaded. +// Each is gated by a Cargo feature flag (see `[features]` in core's Cargo.toml). With the +// feature off the `extern crate` line is removed by cfg and zero bytes of the contrib end +// up in the built cdylib. +#[cfg(feature = "contrib-example")] +extern crate comet_contrib_example; + use jni::{ objects::{JClass, JString}, EnvUnowned, diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto index 7cefe06da7..9e1f1b1767 100644 --- a/native/proto/src/proto/operator.proto +++ b/native/proto/src/proto/operator.proto @@ -53,9 +53,34 @@ message Operator { Explode explode = 114; CsvScan csv_scan = 115; ShuffleScan shuffle_scan = 116; + // Generic envelope for operators contributed by an extension. The contrib's JVM-side + // serde fills the payload with a contrib-private proto message; core's native planner + // dispatches to a `ContribOperatorPlanner` registered (at lib-init time) by the + // contrib's Rust crate, keyed by `kind`. Lets core stay format-agnostic while contrib + // authors evolve their own wire format on a separate cadence. + ContribOp contrib_op = 117; } } +// Envelope for a contrib-contributed operator. Core's native planner dispatches by `kind` +// to a `ContribOperatorPlanner` registered at library-init time (see +// `native/core/src/execution/planner/contrib.rs`); the `payload` is opaque to core -- +// the contrib's planner decodes it into its own proto type. Each contrib ships: +// * a JVM JAR (Scala extension code + ServiceLoader entry), discovered via classpath +// * a Rust crate (rlib) compiled INTO core's cdylib via a Cargo feature flag on core, +// not shipped as a separate cdylib +// Reusing the same envelope for every contrib keeps core's proto stable when contribs +// ship/evolve independently. +message ContribOp { + // Stable identifier the contrib registered under (e.g. "delta-scan", "example-constant"). + string kind = 1; + // Contrib-private payload bytes. Format defined by the contrib's own proto schema. + bytes payload = 2; + // Reserve tags for future additive evolution (e.g. payload_format_version, compression, + // contrib_version) without risking accidental tag reuse by an evolving contrib. + reserved 3 to 9; +} + message SparkPartitionedFile { string file_path = 1; int64 start = 2; diff --git a/pom.xml b/pom.xml index 7419fecc92..e6855e1805 100644 --- a/pom.xml +++ b/pom.xml @@ -38,6 +38,23 @@ under the License. common spark spark-integration + + contrib/example @@ -1151,6 +1168,14 @@ under the License. dev/release/rat_exclude_files.txt dev/release/requirements.txt native/proto/src/generated/** + + **/native/src/generated/** + **/META-INF/services/** benchmarks/tpc/queries/** .claude/** diff --git a/spark/pom.xml b/spark/pom.xml index d3c18ccf87..f26f2f50bc 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -351,6 +351,124 @@ under the License. + + + + contrib-example + + + + org.apache.datafusion + comet-contrib-example-deps-spark${spark.version.short}_${scala.binary.version} + ${project.version} + pom + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-contrib-example-source + generate-sources + add-source + + + ../contrib/example/src/main/scala + + + + + add-contrib-example-test-source + generate-test-sources + add-test-source + + + ../contrib/example/src/test/scala + + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-contrib-example-resources + process-resources + copy-resources + + ${project.build.outputDirectory} + + + ../contrib/example/src/main/resources + + + + + + + + + com.github.os72 + protoc-jar-maven-plugin + + + generate-contrib-example-proto + generate-sources + run + + com.google.protobuf:protoc:${protobuf.version} + + ../contrib/example/native/src/proto + + + + + + + + @@ -435,6 +553,16 @@ under the License. ${comet.shade.packageName}.guava.thirdparty + + + + diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index 679005d9b1..311d2d2a6f 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -87,6 +87,10 @@ class CometSparkSessionExtensions with Logging with ShimCometSparkSessionExtensions { override def apply(extensions: SparkSessionExtensions): Unit = { + // Note: contrib extension discovery happens lazily inside CometScanRule / + // CometExecRule (the first time either runs against a Comet-enabled session). + // Sessions that never enable Comet pay zero ServiceLoader cost. + extensions.injectColumnar { session => CometScanColumnar(session) } extensions.injectColumnar { session => CometExecColumnar(session) } // Pre-3.5 only: tag AQE DPP regions so the conversion rules below leave them Spark-native. diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala index 72c2bea9e4..1a4a916f8f 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala @@ -54,6 +54,7 @@ import org.apache.comet.rules.CometExecRule.allExecs import org.apache.comet.serde._ import org.apache.comet.serde.operator._ import org.apache.comet.shims.{ShimCometStreaming, ShimSubqueryBroadcast} +import org.apache.comet.spi.CometExtensionRegistry object CometExecRule { @@ -270,6 +271,27 @@ case class CometExecRule(session: SparkSession) case scan: CometBatchScanExec if scan.wrapped.scan.isInstanceOf[CSVScan] => convertToComet(scan, CometCsvNativeScanExec).getOrElse(scan) + // Contrib marker dispatch: a `CometScanExec` tagged with a contrib's `scanImpl` + // (i.e. listed in `nativeParquetScanImpls`) is routed through that contrib's + // `matchOperator` serde rather than the generic `CometScanWrapper` below. Without + // this, the marker would only get JVM-side parquet bytes-reuse, never reaching the + // contrib's `serialize` and therefore missing format-specific concerns -- e.g. + // Delta column-mapping physical-name substitution and `InputFileBlockHolder` + // population for `input_file_name()` / `_metadata.file_path`. If the contrib's + // `matchOperator` chooses to return None (e.g. the contrib only wants the full + // native conversion for certain scans), the marker falls back to the generic + // wrapper path -- which is also what happens when conversion itself returns None. + case scan: CometScanExec + if CometExtensionRegistry.nativeParquetScanImpls.contains(scan.scanImpl) => + val handler = CometExtensionRegistry.serdeExtensions.iterator + .flatMap(_.matchOperator(scan)) + .nextOption() + .map(_.asInstanceOf[CometOperatorSerde[SparkPlan]]) + handler match { + case Some(h) => convertToComet(scan, h).getOrElse(scan) + case None => convertToComet(scan, CometScanWrapper).getOrElse(scan) + } + // Comet JVM + native scan for V1 and V2 case op if isCometScan(op) => convertToComet(op, CometScanWrapper).getOrElse(op) @@ -349,8 +371,27 @@ case class CometExecRule(session: SparkSession) // if all children are native (or if this is a leaf node) then see if there is a // registered handler for creating a fully native plan if (op.children.forall(_.isInstanceOf[CometNativeExec])) { + // Contrib SPI: each registered CometOperatorSerdeExtension contributes a + // SparkPlan-class -> CometOperatorSerde map. The merged map is pre-computed + // once at registry load time (CometExtensionRegistry.mergedSerdes) so we + // don't rebuild a HashMap on every operator transform. Contribs own classes + // that aren't in `allExecs`, so this merge never overrides a core mapping in + // practice; duplicate-class detection at load() time logs a warning if it + // does happen. + // Three-step dispatch: + // 1. core's built-in class-keyed map (allExecs) + // 2. contrib serde-extensions' class-keyed map (mergedSerdes) + // 3. contrib serde-extensions' predicate-based matchOperator hook + // (for marker-class patterns where one shared SparkPlan class -- + // e.g. CometScanExec -- is disambiguated by a runtime tag) val handler = allExecs .get(op.getClass) + .orElse(CometExtensionRegistry.mergedSerdes.get(op.getClass)) + .orElse { + CometExtensionRegistry.serdeExtensions.iterator + .flatMap(_.matchOperator(op)) + .nextOption() + } .map(_.asInstanceOf[CometOperatorSerde[SparkPlan]]) handler match { case Some(handler) => @@ -545,6 +586,12 @@ case class CometExecRule(session: SparkSession) // We shouldn't transform Spark query plan if Comet is not loaded. if (!isCometLoaded(conf)) return plan + // Lazy contrib discovery. Mirrors the call in CometScanRule._apply -- either rule may + // be the first to run depending on which path of the plan tree fires first. load() is + // idempotent (AtomicBoolean gate), so the duplicate call is a no-op in steady state + // but makes each rule self-contained instead of relying on CometScanRule running first. + CometExtensionRegistry.load() + // Comet does not support structured streaming. Fall back to Spark for any plan that // belongs to a streaming query (detected via StreamSourceAwareSparkPlan.getStream). if (ShimCometStreaming.isStreamingPlan(plan)) return plan diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 64b69be1e9..64d555a80f 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -50,6 +50,7 @@ import org.apache.comet.parquet.CometParquetUtils.{encryptionEnabled, isEncrypti import org.apache.comet.parquet.Native import org.apache.comet.serde.operator.{CometIcebergNativeScan, CometNativeScan} import org.apache.comet.shims.{CometTypeShim, ShimCometStreaming, ShimFileFormat, ShimSubqueryBroadcast} +import org.apache.comet.spi.CometExtensionRegistry /** * Spark physical optimizer rule for replacing Spark scans with Comet scans. @@ -75,6 +76,11 @@ case class CometScanRule(session: SparkSession) private def _apply(plan: SparkPlan): SparkPlan = { if (!isCometLoaded(conf)) return plan + // Lazy contrib discovery: by the time we get here Comet is enabled. load() is + // idempotent so subsequent invocations across plans / sessions are free. Sessions + // that never reach this point pay zero ServiceLoader cost. + CometExtensionRegistry.load() + // Comet does not support structured streaming. The parallel guard in // CometExecRule only stops operator wrapping, so without this check we // would still rewrite scans to CometScanExec in a streaming plan. @@ -114,7 +120,71 @@ case class CometScanRule(session: SparkSession) metadataTableSuffix.exists(suffix => scanExec.table.name().endsWith(suffix)) } - val fullPlan = plan + // Contrib SPI tree-level pre-pass. Each registered extension gets a chance to rewrite + // the whole plan tree before per-scan dispatch begins. Used by contribs that need to + // undo wrapper rewrites from their own Catalyst strategies (Delta's + // `PreprocessTableWithDVs` is the canonical case). Fold in registration order so + // contribs see each other's outputs deterministically. Extensions that don't override + // `preTransform` inherit the trait's identity default -- zero overhead. + // + // Gated on COMET_NATIVE_SCAN_ENABLED: if the user has disabled Comet scan, the + // contribs' Catalyst wrappers (Delta's DV filter, etc.) are load-bearing and stripping + // them turns into a correctness bug. Leave the plan tree as Spark wrote it. + // + // Corruption guard: snapshot every FileSourceScanExec the extension does NOT claim + // before the pass, and verify each one is still present (by reference) afterwards. + // Contribs' `preTransform` MUST only rewrite scans they recognise; this guard catches + // the most dangerous violation (a contrib stripping or substituting an unrelated + // format's scan) regardless of whether the replacement keeps the same SparkPlan + // class. Plan-tree reordering is tolerated -- we only care that the scan still + // exists in the tree, not where. + // + // Cost: O(K * (P + S)) where K = scanExtensions.size, P = plan node count, + // S = scan count. IdentityHashMap gives O(1) survivor lookup; the dominant term + // is the tree traversals. For typical K=1..3 this is negligible. + // + // V2 scope: V2 BatchScanExecs are NOT inspected. preTransform is documented V1-only + // (see CometScanRuleExtension.preTransform); V2 wrapper-stripping happens per-scan + // inside `transformV2` and doesn't have the same tree-level corruption surface. + val prepped = + if (!CometConf.COMET_NATIVE_SCAN_ENABLED.get(conf)) { + plan + } else { + CometExtensionRegistry.scanExtensions.foldLeft(plan) { (p, ext) => + val unclaimedBefore = p.collect { + case s: FileSourceScanExec if !ext.matchesV1(s.relation) => s + } + val after = ext.preTransform(p, session) + if (unclaimedBefore.nonEmpty) { + // IDENTITY semantics, NOT value-equality: Spark case classes (including + // FileSourceScanExec) compare equal when their fields match, so a self-join + // with two reads against the same table after AQE deduplication can produce + // two value-equal-but-reference-distinct scans. A standard mutable.Set would + // collapse them and we'd emit a false-positive warning. IdentityHashMap + // gives us O(1) lookup with reference-equality semantics. + val survivors = + new java.util.IdentityHashMap[FileSourceScanExec, java.lang.Boolean]() + after.foreach { + case s: FileSourceScanExec => survivors.put(s, java.lang.Boolean.TRUE) + case _ => + } + unclaimedBefore.foreach { b => + if (!survivors.containsKey(b)) { + logWarning( + s"CometScanRuleExtension '${ext.name}'.preTransform removed or " + + s"replaced a FileSourceScanExec it does not claim " + + s"(matchesV1=false on its relation, ${b.relation.fileFormat}). " + + s"This is a contract violation -- preTransform must only rewrite " + + s"scans the extension recognises. See " + + s"CometScanRuleExtension.preTransform doc.") + } + } + } + after + } + } + + val fullPlan = prepped def transformScan(scanNode: SparkPlan): SparkPlan = scanNode match { // Tagged by CometSpark34AqeDppFallbackRule on Spark < 3.5 to keep a peer scan @@ -141,7 +211,7 @@ case class CometScanRule(session: SparkSession) } } - plan.transform { + prepped.transform { case scan if isSupportedScanNode(scan) => transformScan(scan) } } @@ -161,6 +231,27 @@ case class CometScanRule(session: SparkSession) return withInfo(scanExec, "AQE Dynamic Partition Pruning requires Spark 3.5+") } + // Contrib SPI dispatch: offer the scan to every registered CometScanRuleExtension + // before core's built-in file-format logic. Loop in registration order; the FIRST + // extension whose `matchesV1` returns true AND whose `transformV1` returns Some(_) + // wins -- its replacement plan is returned. An extension that returns None from + // `transformV1` means "I match this scan shape but decline to accelerate this + // specific instance"; the loop continues to the next extension before falling back + // to core's built-in file-format logic. This lets multiple contribs coexist (e.g. + // Iceberg + Delta both loaded) without one's decline silently masking another. + scanExec.relation match { + case r: HadoopFsRelation => + val replacement = CometExtensionRegistry.scanExtensions.iterator + .filter(_.matchesV1(r)) + .flatMap(ext => ext.transformV1(plan, scanExec, session)) + .nextOption() + replacement match { + case Some(plan) => return plan + case None => // no extension produced a replacement; fall through + } + case _ => // SPI only operates on HadoopFsRelation V1 scans + } + scanExec.relation match { case r: HadoopFsRelation => if (!CometScanExec.isFileFormatSupported(r.fileFormat)) { @@ -259,6 +350,18 @@ case class CometScanRule(session: SparkSession) private def transformV2Scan(scanExec: BatchScanExec): SparkPlan = { + // Contrib SPI dispatch (V2): mirrors transformV1Scan. Loop in registration order; + // first matching extension whose transformV2 returns Some wins. Decline = continue + // to next extension. + val replacement = CometExtensionRegistry.scanExtensions.iterator + .filter(_.matchesV2(scanExec)) + .flatMap(ext => ext.transformV2(scanExec, session)) + .nextOption() + replacement match { + case Some(plan) => return plan + case None => // no extension produced a replacement; fall through + } + scanExec.scan match { case scan: CSVScan if COMET_CSV_V2_NATIVE_ENABLED.get() => val fallbackReasons = new ListBuffer[String]() @@ -677,33 +780,14 @@ case class CometScanRule(session: SparkSession) case _ => false } + // Delegate to the companion object's pure helper so the implementation lives in one + // place. Kept as a class-level method so existing in-class callers (transformV1Scan, + // transformV2Scan) compile unchanged. private def isSchemaSupported( scanExec: FileSourceScanExec, scanImpl: String, - r: HadoopFsRelation): Boolean = { - val fallbackReasons = new ListBuffer[String]() - val typeChecker = CometScanTypeChecker(scanImpl) - val schemaSupported = - typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) - if (!schemaSupported) { - withInfo( - scanExec, - s"Unsupported schema ${scanExec.requiredSchema} " + - s"for $scanImpl: ${fallbackReasons.mkString(", ")}") - return false - } - val partitionSchemaSupported = - typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons) - if (!partitionSchemaSupported) { - withInfo( - scanExec, - s"Unsupported partitioning schema ${scanExec.requiredSchema} " + - s"for $scanImpl: ${fallbackReasons - .mkString(", ")}") - return false - } - true - } + r: HadoopFsRelation): Boolean = + CometScanRule.isSchemaSupported(scanExec, scanImpl, r) } case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with CometTypeShim { @@ -743,6 +827,39 @@ case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with C object CometScanRule extends Logging { + /** + * Schema-support check + fallback-reason emission, callable from contrib extensions under + * `org.apache.comet.contrib.*`. Pure function; no shared state with CometScanRule instances. + * `private[comet]` keeps it out of the public API while letting subpackages (contribs) reach + * it. + */ + private[comet] def isSchemaSupported( + scanExec: FileSourceScanExec, + scanImpl: String, + r: HadoopFsRelation): Boolean = { + val fallbackReasons = new ListBuffer[String]() + val typeChecker = CometScanTypeChecker(scanImpl) + val schemaSupported = + typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) + if (!schemaSupported) { + org.apache.comet.CometSparkSessionExtensions.withInfo( + scanExec, + s"Unsupported schema ${scanExec.requiredSchema} " + + s"for $scanImpl: ${fallbackReasons.mkString(", ")}") + return false + } + val partitionSchemaSupported = + typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons) + if (!partitionSchemaSupported) { + org.apache.comet.CometSparkSessionExtensions.withInfo( + scanExec, + s"Unsupported partitioning schema ${scanExec.requiredSchema} " + + s"for $scanImpl: ${fallbackReasons.mkString(", ")}") + return false + } + true + } + /** * Tag set on a scan (`FileSourceScanExec` or `BatchScanExec`) that should be left as a plain * Spark scan rather than converted to a Comet scan. Written by diff --git a/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala b/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala new file mode 100644 index 0000000000..e8ab6fc2f6 --- /dev/null +++ b/spark/src/main/scala/org/apache/comet/spi/CometExtensionRegistry.scala @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.spi + +import java.util.ServiceLoader +import java.util.concurrent.atomic.AtomicBoolean + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.internal.Logging + +/** + * Process-wide singleton that exposes the contrib extensions bundled into comet-spark.jar. + * + * Discovery uses `java.util.ServiceLoader` against `META-INF/services/` entries inside + * comet-spark.jar. Those entries get there at build time: each contrib (under `contrib//`) + * carries its own `META-INF/services/` files, and the `contrib-` Maven profile on + * spark/pom.xml shades the contrib's classes plus those service entries into the published + * comet-spark.jar. A vanilla `mvn install` produces a comet-spark.jar with zero contribs; a `mvn + * install -Pcontrib-example` build bundles the example contrib. The native side mirrors this + * exactly via `--features contrib-example` on the Rust core crate. + * + * Discovery is idempotent: the first `load()` call enumerates the service entries; subsequent + * calls are no-ops. `load()` is invoked lazily from `CometScanRule._apply` and + * `CometExecRule._apply` the first time either rule runs against a Comet-enabled session. Spark + * sessions that never enable Comet pay zero ServiceLoader cost. + * + * Failures to instantiate individual extensions are logged at WARN but do NOT fail Comet startup + * -- a misconfigured contrib shouldn't take down the whole Spark session. + */ +object CometExtensionRegistry extends Logging { + + private val loaded = new AtomicBoolean(false) + @volatile private var scanExts: Seq[CometScanRuleExtension] = Seq.empty + @volatile private var serdeExts: Seq[CometOperatorSerdeExtension] = Seq.empty + + /** + * Discover contrib extensions on the classpath. Idempotent. Safe to call from multiple threads + * (only the first call performs discovery). + */ + def load(): Unit = synchronized { + // `synchronized` (not just compareAndSet) so that concurrent callers wait for the + // first thread's writes to `scanExts` / `serdeExts` / `mergedSerdesCache` to publish + // before they return. The previous AtomicBoolean-only gate allowed thread B to + // observe `loaded=true` and read `Seq.empty` while thread A was still mid-loadOne. + // CometScanRule._apply and CometExecRule._apply both call this on first invocation, + // and AQE can run them concurrently across sub-queries, so the race is reachable. + // + // Contribs MUST NOT call `load()` from a `#[ctor]`-equivalent (JVM-side: a class's + // static initializer or trait's `object` init) -- Scala monitors are reentrant so + // re-entry won't deadlock, but the inner call would observe the partially-built + // state and re-trigger `loadOne`, shadowing the in-flight publication. + if (loaded.get()) return + val newScanExts = loadOne[CometScanRuleExtension]("CometScanRuleExtension") + val newSerdeExts = loadOne[CometOperatorSerdeExtension]("CometOperatorSerdeExtension") + val newMerged = newSerdeExts.flatMap(_.serdes).toMap + val newNativeParquetTags = newSerdeExts.flatMap(_.nativeParquetScanImpls).toSet + // Publish the @volatile fields BEFORE flipping `loaded` so other threads either see + // the empty defaults (and may re-enter -- benign, blocked by the monitor) or the + // fully-populated state (and may skip -- also benign). + scanExts = newScanExts + serdeExts = newSerdeExts + mergedSerdesCache = newMerged + nativeParquetScanImplsCache = newNativeParquetTags + loaded.set(true) + // Call `init()` AFTER publishing the volatile fields and flipping `loaded`. This lets + // an extension's `init` synchronously call back into the registry (e.g. to read its + // sibling extensions) without observing a half-built state, and it lets `init` register + // executor-side callbacks on `CometExecRDD` without racing the first compute call. + // Failures are isolated per extension so one broken contrib doesn't take down the others. + newSerdeExts.foreach { ext => + try ext.init() + catch { + case scala.util.control.NonFatal(e) => + logWarning(s"CometOperatorSerdeExtension '${ext.name}' init failed; continuing", e) + } + } + if (newScanExts.nonEmpty || newSerdeExts.nonEmpty) { + logInfo( + s"Comet contrib extensions loaded: " + + s"scan=[${newScanExts.map(_.name).mkString(", ")}], " + + s"serde=[${newSerdeExts.map(_.name).mkString(", ")}]") + detectDuplicateSerdeClasses(newSerdeExts) + } else { + // Positive signal that discovery ran. Comet-spark.jar's contrib content depends on + // which `-Pcontrib-` Maven profiles were active at build time; this line is + // what tells a user whose contrib went missing whether to suspect their Comet build + // or their classpath. + logInfo( + "Comet contrib extensions: none discovered. comet-spark.jar was built " + + "without any contrib profiles enabled, or the contrib's META-INF/services " + + "entries were not bundled correctly.") + } + } + + /** Registered scan-rule extensions, in classpath discovery order. */ + def scanExtensions: Seq[CometScanRuleExtension] = scanExts + + /** Registered operator-serde extensions, in classpath discovery order. */ + def serdeExtensions: Seq[CometOperatorSerdeExtension] = serdeExts + + /** + * Pre-merged serde map across all registered extensions, keyed by the `Class[_ <: SparkPlan]` + * the contrib uses for class-keyed dispatch in `CometExecRule`. Computed once at `load()` time; + * an empty map until `load()` has run. + */ + def mergedSerdes: Map[ + Class[_ <: org.apache.spark.sql.execution.SparkPlan], + org.apache.comet.serde.CometOperatorSerde[_]] = mergedSerdesCache + + @volatile private var mergedSerdesCache: Map[ + Class[_ <: org.apache.spark.sql.execution.SparkPlan], + org.apache.comet.serde.CometOperatorSerde[_]] = Map.empty + + /** + * Union of every registered extension's `nativeParquetScanImpls`. Consumed by + * `CometScanExec.supportedDataFilters` to decide whether the marker scan's filter set should + * get the same native-parquet exclusions as `SCAN_NATIVE_DATAFUSION`. Computed once at `load()` + * time; empty until `load()` has run. + */ + def nativeParquetScanImpls: Set[String] = nativeParquetScanImplsCache + + @volatile private var nativeParquetScanImplsCache: Set[String] = Set.empty + + /** + * Log a warning when two registered contribs claim the same `Class[_ <: SparkPlan]` for serde + * dispatch. The convention documented in `contrib-extensions.md` is that each contrib defines + * its own exec class and registers a serde keyed on that class; a collision usually means a + * contrib subclassed a core exec by mistake. + * + * Detection only -- the last-write-wins toMap behavior stands. We log so the user has a chance + * to notice; preventing the override would be a harder migration path (silent drop of one + * contrib's exec). + */ + private def detectDuplicateSerdeClasses(exts: Seq[CometOperatorSerdeExtension]): Unit = { + val perClassOwners = scala.collection.mutable.Map + .empty[ + Class[_ <: org.apache.spark.sql.execution.SparkPlan], + scala.collection.mutable.ArrayBuffer[String]] + exts.foreach { ext => + ext.serdes.keys.foreach { cls => + perClassOwners + .getOrElseUpdate(cls, scala.collection.mutable.ArrayBuffer.empty) + .+=(ext.name) + } + } + perClassOwners.foreach { case (cls, owners) => + if (owners.size > 1) { + logWarning( + s"Multiple Comet contrib extensions claim the same exec class " + + s"${cls.getName}: [${owners.mkString(", ")}]. Last-write-wins; " + + s"this usually indicates a contrib has subclassed a core or " + + s"another contrib's exec instead of defining its own.") + } + } + } + + /** + * Test-only: reset the registry to the empty state. Lets unit tests re-run discovery with a + * different classpath / overridden services. Not for production use. + * + * Visibility is `public` (rather than `private[comet]`) because contribs are not required to be + * packaged under `org.apache.comet.*`; a contrib living under e.g. `io.delta.comet.contrib` + * must still be able to reset between tests. The method's name carries the "test-only" contract + * by convention. + */ + def resetForTesting(): Unit = synchronized { + // synchronized so concurrent `load()` callers don't observe torn state -- e.g. + // `loaded=false` with `scanExts` still populated, which would let a subsequent + // `load()` short-circuit on the AtomicBoolean and never re-discover. + loaded.set(false) + scanExts = Seq.empty + serdeExts = Seq.empty + mergedSerdesCache = Map.empty + nativeParquetScanImplsCache = Set.empty + // Also clear any executor-side callbacks registered via the SPI's `init` hook so the + // next `load()` re-registers from scratch. Without this the test that exercises + // `resetForTesting` + `load` would accumulate handlers across reset boundaries. + org.apache.spark.sql.comet.CometExecRDD.clearPartitionMetadataHandlers() + org.apache.spark.sql.comet.PlanDataInjector.clearContribInjectors() + } + + private def loadOne[T](label: String)(implicit ct: scala.reflect.ClassTag[T]): Seq[T] = { + val cls = ct.runtimeClass.asInstanceOf[Class[T]] + val loader = Option(Thread.currentThread().getContextClassLoader) + .getOrElse(getClass.getClassLoader) + try { + val it = ServiceLoader.load(cls, loader).iterator().asScala + val out = scala.collection.mutable.ArrayBuffer.empty[T] + while (it.hasNext) { + // Pull each extension under its own try so one broken contrib doesn't sink the + // rest of the registry. ServiceLoader.next() can throw if the extension fails to + // instantiate (missing class, ctor exception, etc.). + try out += it.next() + catch { + case scala.util.control.NonFatal(e) => + logWarning(s"Failed to load a $label entry; skipping: ${e.getMessage}", e) + } + } + out.toSeq + } catch { + case scala.util.control.NonFatal(e) => + logWarning(s"$label discovery failed; no contrib extensions of this kind loaded", e) + Seq.empty + } + } +} diff --git a/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala b/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala new file mode 100644 index 0000000000..345271f9a4 --- /dev/null +++ b/spark/src/main/scala/org/apache/comet/spi/CometOperatorSerdeExtension.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.spi + +import org.apache.spark.sql.execution.SparkPlan + +import org.apache.comet.serde.CometOperatorSerde + +/** + * SPI hook that lets a contrib extension contribute additional operator-to-native serdes to + * `CometExecRule`. Used when a contrib needs to translate a contrib-specific physical operator + * (e.g. `CometDeltaNativeScanExec` for Delta) into a native plan -- the contrib provides the + * serde, and `CometExecRule` calls it during plan transformation. + * + * `CometExecRule` discovers implementations via `CometExtensionRegistry.serdeExtensions` + * (ServiceLoader-backed). Each contrib JAR ships a + * `META-INF/services/org.apache.comet.spi.CometOperatorSerdeExtension` resource listing its + * extension class. + * + * Implementations MUST be stateless / safe to share across query executions. + */ +trait CometOperatorSerdeExtension { + + /** Human-readable name shown in logs and error messages. */ + def name: String + + /** + * Mapping of SparkPlan class -> serde. The contrib lists every operator class it knows how to + * translate to native. `CometExecRule` merges these mappings with its built-in `allExecs` to + * dispatch by class identity at conversion time. + * + * Convention: each contrib's mapping should reference only classes the contrib itself defines, + * so two contribs never claim ownership of the same operator class. + */ + def serdes: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] = Map.empty + + /** + * Predicate-based dispatch hook for contribs whose serde key cannot be expressed as a unique + * `SparkPlan` class. The canonical case is the `CometScanExec` marker-with-`scanImpl`-tag + * pattern: a contrib's `CometScanRuleExtension.transformV1` returns `CometScanExec(scanExec, + * session, "my-contrib-tag")`, but `CometScanExec` is a case class shared with core, so a + * class-keyed map can't disambiguate by the tag. The contrib overrides this method to inspect + * the plan and return its serde: + * + * {{{ + * private val MyScanImpl = "native_myformat_compat" // contrib-local constant + * + * override def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = op match { + * case s: CometScanExec if s.scanImpl == MyScanImpl => Some(CometMyFormatScan) + * case _ => None + * } + * }}} + * + * `CometExecRule` consults `matchOperator` only after the class-keyed `serdes` map misses, so + * contribs with a unique exec class never need to implement this. Multiple registered + * extensions' `matchOperator` returns are tried in registration order; the first `Some` wins. + */ + def matchOperator(op: SparkPlan): Option[CometOperatorSerde[_]] = None + + /** + * Declares which `scanImpl` string tags this contrib produces from + * `CometScanRuleExtension.transformV1` when using the `CometScanExec(marker, scanImpl=X)` + * pattern. Tags listed here get `CometScanExec.supportedDataFilters`'s native-parquet filter + * exclusions (drop dynamic pruning + IsNull/IsNotNull on ArrayType columns), the same treatment + * `SCAN_NATIVE_DATAFUSION` receives. + * + * Override only if your contrib uses the marker-class pattern AND your native side goes through + * Comet's tuned `ParquetSource`. Contribs that define their own `SparkPlan` subclass (rather + * than reusing `CometScanExec`) don't need this; they control filter selection themselves. + * + * Example: a Delta contrib that uses `CometScanExec(..., scanImpl="native_delta_compat")` would + * override this to `Set("native_delta_compat")`. + */ + def nativeParquetScanImpls: Set[String] = Set.empty + + /** + * One-shot initialization hook invoked exactly once per JVM by `CometExtensionRegistry.load` + * after this extension has been instantiated. Use to register executor-side callbacks that + * can't be expressed declaratively in the `serdes` map -- e.g. a per-partition metadata + * handler on `CometExecRDD.registerPartitionMetadataHandler` for populating Spark + * thread-locals from a contrib's serialized per-partition payload. + * + * Default no-op so existing extensions don't have to opt in. Implementations MUST be safe + * to call once per JVM (e.g. don't accumulate state across query executions). Failures are + * logged and isolated: a broken `init` on one contrib doesn't take down the others. + */ + def init(): Unit = () +} diff --git a/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala b/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala new file mode 100644 index 0000000000..9c273bf47f --- /dev/null +++ b/spark/src/main/scala/org/apache/comet/spi/CometScanRuleExtension.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.spi + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.datasources.HadoopFsRelation +import org.apache.spark.sql.execution.datasources.v2.BatchScanExec + +/** + * SPI hook that lets a contrib extension intercept scan transformation in `CometScanRule`. + * Contribs typically use this to recognise a specific table format (Delta, Hudi, etc.) and route + * it through a contrib-specific native execution path. + * + * `CometScanRule` discovers implementations via `CometExtensionRegistry.scanExtensions` + * (ServiceLoader-backed) and offers each candidate scan to every registered extension in + * registration order. The first extension whose [[matchesV1]] (or [[matchesV2]]) returns true AND + * whose [[transformV1]] (or [[transformV2]]) returns `Some(_)` wins -- its returned plan replaces + * the scan subtree. An extension whose `matches` is true but whose `transform` returns `None` is + * treated as "declined this instance"; dispatch continues to the next matching extension. After + * every matching extension has declined, core's built-in file-format dispatch handles the scan as + * before. + * + * Contribs are discovered via the standard Java ServiceLoader. Each contrib JAR ships a + * `META-INF/services/org.apache.comet.spi.CometScanRuleExtension` resource listing its extension + * class. + * + * Implementations MUST be safe to invoke from `CometScanRule`'s `apply` method -- specifically: + * pure, stateless, side-effect-free with respect to the plan tree (any state needed should be + * derived from `scanExec` / `relation` / the surrounding plan). The registry caches instances + * across plans, so per-plan state on the implementation will leak between queries. + */ +trait CometScanRuleExtension { + + /** Human-readable name shown in logs and error messages. Should be unique per extension. */ + def name: String + + /** + * Tree-level pre-pass run once per plan before per-scan dispatch begins. Default: identity. + * + * Use this to undo wrapper rewrites that a format's own Catalyst strategy applied. The + * canonical example is Delta's `PreprocessTableWithDVs` strategy, which wraps every DV-bearing + * Delta scan in a `Project(Filter(...))` subtree referencing a synthetic + * `__delta_internal_is_row_deleted` column produced by Delta's own reader. Comet reads via its + * own parquet path; without unwrapping that subtree, the synthetic column never gets produced + * and the downstream `Filter` silently drops every row. The Delta contrib's `preTransform` + * strips the wrapper so the clean scan reaches per-scan dispatch. + * + * '''V1-only.''' `preTransform` runs once for the whole plan and the rewritten tree is what + * later `transformV1` calls see via their `plan` argument. `transformV2` does NOT receive a + * plan-tree reference -- only the matched `BatchScanExec`. V2 contribs that need + * wrapper-stripping must do that work inside `transformV2` against `scanExec.scan` / + * `scanExec.children` directly. + * + * '''Disabled when scan conversion is off.''' `CometScanRule` skips the entire preTransform + * fold when `spark.comet.scan.enabled=false`. A contrib's own wrappers (Delta's DV filter, + * etc.) are load-bearing in that case; stripping them turns into a correctness bug. + * + * '''MUST NOT modify scans the extension does not recognise.''' Multiple registered extensions + * are folded over the plan in registration order; an extension that rewrites scans outside its + * format's domain will silently corrupt other formats' plans. `CometScanRule` logs a warning + * when a `FileSourceScanExec` is replaced by an extension whose `matchesV1` returns false + * against the original scan's relation -- contribs that trip this warning should narrow their + * pattern match. + * + * '''State sharing.''' Shared state between this pre-pass and later `transformV1` calls is the + * contrib's problem. The recommended pattern is to attach a Spark `TreeNodeTag` to nodes during + * `preTransform` and read it during `transformV1`. Spark's tag mechanism is tree-immutable-safe + * and survives plan transformations -- preferred over external mutable state which leaks across + * plans. + */ + def preTransform(plan: SparkPlan, session: SparkSession): SparkPlan = plan + + /** + * Whether this extension wants to handle the given V1 scan. Implementations should make a cheap + * decision here (typically file-format class-name probe) so non-matching paths add no per-scan + * overhead. + * + * Default returns false; override `matchesV1` and `transformV1` for V1 scan support. + */ + def matchesV1(relation: HadoopFsRelation): Boolean = false + + /** + * Transform the matched V1 scan. Called only when `matchesV1` returned true. + * + * Returning `None` means "I matched the scan shape but ultimately can't accelerate this + * specific instance" -- `CometScanRule` then continues to the NEXT registered extension whose + * `matchesV1` is true, falling back to core's built-in file-format dispatch only after every + * matching extension has declined. Returning `Some(plan)` ends dispatch and replaces the scan + * subtree with `plan`. + */ + def transformV1( + plan: SparkPlan, + scanExec: FileSourceScanExec, + session: SparkSession): Option[SparkPlan] = None + + /** + * Whether this extension wants to handle the given V2 batch scan. See `matchesV1`. + * + * Default returns false; override `matchesV2` and `transformV2` for V2 scan support. + */ + def matchesV2(scanExec: BatchScanExec): Boolean = false + + /** + * Transform the matched V2 scan. Called only when `matchesV2` returned true. + * + * Same semantics as `transformV1`: `None` falls through to the next matching extension; + * `Some(plan)` ends dispatch. Note that unlike `transformV1`, this method does NOT receive a + * plan-tree reference -- `preTransform` rewrites are not visible here. V2 contribs that need + * wrapper-stripping must operate on `scanExec.scan` / `scanExec.children` directly. + */ + def transformV2(scanExec: BatchScanExec, session: SparkSession): Option[SparkPlan] = None +} diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala index 47eda98a11..072ff6d2bf 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala @@ -111,6 +111,16 @@ private[spark] class CometExecRDD( serializedPlan } + // Invoke registered per-partition metadata handlers. This is the SPI hook contribs + // use to populate executor thread-locals (e.g. `InputFileBlockHolder`) from their + // serialized per-partition payloads before the native iterator runs. The Delta + // contrib uses it so `input_file_name()` and Delta's `_metadata.file_path` resolve + // correctly; without this, UPDATE/DELETE/MERGE/CDC paths see empty file_path and + // throw `DELTA_FILE_TO_OVERWRITE_NOT_FOUND`. Handlers are called for every + // partition with non-empty plan data and are expected to no-op when the partition + // doesn't carry their proto. The registered handlers list is a `@volatile` read. + CometExecRDD.runPartitionMetadataHandlers(partition.planDataByKey, context) + // Create shuffle block iterators for inputs that are CometShuffledBatchRDD val shuffleBlockIters = shuffleScanIndices.flatMap { idx => inputRDDs(idx) match { @@ -163,6 +173,56 @@ private[spark] class CometExecRDD( object CometExecRDD { + /** + * SPI hook signature: a callback contribs register to inspect a partition's per-partition + * planning data BEFORE the native iterator starts producing rows on this task. Receives the + * `Map[String, Array[Byte]]` of serialized per-partition payloads keyed by `sourceKey` (the + * same shape contribs serialize into `perPartitionByKey` at planning time). Plus the active + * `TaskContext` so handlers can register completion listeners. + * + * Canonical use: the Delta contrib reads its `DeltaScan` payload, extracts the AddFile path, + * and calls `InputFileBlockHolder.set` so `input_file_name()` and Delta's `_metadata.file_path` + * resolve to the file being read (otherwise UPDATE/DELETE/MERGE/CDC throw + * `DELTA_FILE_TO_OVERWRITE_NOT_FOUND`). + * + * The signature is deliberately the data shape, NOT a Spark-internal partition type, so contribs + * don't have to live under `org.apache.spark.*` to see it. Handlers MUST: + * - be stateless and free of contrib-specific assumptions on partitions that don't carry + * their proto (no-op silently when their expected key/payload shape isn't present), + * - register a task-completion listener for any thread-local they set, so the value is + * cleared at the end of the task, and + * - tolerate parse failures defensively -- another contrib may own this key. + */ + type PartitionMetadataHandler = (Map[String, Array[Byte]], TaskContext) => Unit + + @volatile private var partitionMetadataHandlers: Vector[PartitionMetadataHandler] = Vector.empty + + /** + * Register a per-partition metadata handler. Called once per contrib at extension-load + * time (from `CometOperatorSerdeExtension.init`). Registration is idempotent on the + * same function reference but does not de-duplicate equivalent lambdas; contribs are + * expected to register exactly once. + */ + def registerPartitionMetadataHandler(h: PartitionMetadataHandler): Unit = synchronized { + if (!partitionMetadataHandlers.contains(h)) { + partitionMetadataHandlers = partitionMetadataHandlers :+ h + } + } + + /** + * Test-only / contrib reset. Visibility is `public` to mirror `resetForTesting` on the registry. + */ + def clearPartitionMetadataHandlers(): Unit = synchronized { + partitionMetadataHandlers = Vector.empty + } + + private[comet] def runPartitionMetadataHandlers( + planDataByKey: Map[String, Array[Byte]], + context: TaskContext): Unit = { + val hs = partitionMetadataHandlers + if (hs.nonEmpty) hs.foreach(_(planDataByKey, context)) + } + /** * Creates an RDD for native execution with optional per-partition planning data. */ diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 652fdfc96d..7e9b2bfa80 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -159,7 +159,13 @@ case class CometScanExec( * on array columns (see [[isNullCheckOnArrayColumn]]). */ lazy val supportedDataFilters: Seq[Expression] = { - if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) { + // Contribs that use the CometScanExec marker pattern with their own scanImpl + // string can declare that their scan goes through Comet's tuned ParquetSource + // (and therefore wants DataFusion-style filter exclusions) by registering the + // tag via `CometOperatorSerdeExtension.nativeParquetScanImpls`. Core doesn't + // need to know any contrib's marker name; the registry is the source of truth. + if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION || + CometScanExec.contribNativeParquetScanImpls.contains(scanImpl)) { dataFilters .filterNot(isDynamicPruningFilter) .filterNot(isNullCheckOnArrayColumn) @@ -534,6 +540,17 @@ case class CometScanExec( object CometScanExec { + /** + * Set of contrib-registered scanImpl tags whose CometScanExec should use Comet's native-parquet + * filter exclusion semantics (drop dynamic pruning + IsNull/IsNotNull on ArrayType columns). + * Populated lazily from + * `CometExtensionRegistry.serdeExtensions.flatMap(_.nativeParquetScanImpls)`. Each access + * re-reads the volatile field on `CometExtensionRegistry`; the cost is one HashSet lookup per + * CometScanExec construction, which is dwarfed by Spark's own per-plan work. + */ + private[comet] def contribNativeParquetScanImpls: Set[String] = + org.apache.comet.spi.CometExtensionRegistry.nativeParquetScanImpls + def apply( scanExec: FileSourceScanExec, session: SparkSession, diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index f315aae6e2..bf0b414e86 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -59,6 +59,34 @@ import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => CometAggregat import org.apache.comet.serde.QueryPlanSerde.{aggExprToProto, exprToProto, isStringCollationType, supportedSortType} import org.apache.comet.serde.operator.CometSink +/** + * Generic source of per-partition planning data for a Comet native exec. Implementations expose + * the trio of inputs that `CometExecRDD.compute` needs to feed `PlanDataInjector.injectPlanData`: + * + * - `planDataSourceKey`: stable identifier the matching `PlanDataInjector.getKey` reproduces + * by hashing the operator's common payload. Must agree between driver-side (RDD construction) + * and executor-side (injector lookup) views of the SAME operator's proto. + * - `planDataCommonBytes`: serialized common block (schemas, table root, filters, ...) the + * contrib's `serialize` produced once per scan. + * - `planDataPerPartitionBytes`: array of serialized per-partition payloads, one entry per + * partition, carrying that partition's task list / file list / ranges. + * + * `findAllPlanData` checks for this trait BEFORE the hardcoded `CometNativeScanExec` / + * `CometIcebergNativeScanExec` arms so contribs can plug in without core-side enumeration. + * Core's own scans implement the trait too for symmetry (no behavioural change -- the trait's + * defaults just delegate to their existing accessors). + * + * Implementations whose driver-side `commonData` / `perPartitionData` require Spark's standard + * `prepare -> waitForSubqueries` lifecycle (typically because DPP `InSubqueryExec` values land + * in the per-partition payload) override `ensureSubqueriesResolvedIfApplicable` to trigger it. + */ +trait PlanDataSource { self: SparkPlan => + def planDataSourceKey: String + def planDataCommonBytes: Array[Byte] + def planDataPerPartitionBytes: Array[Array[Byte]] + def ensureSubqueriesResolvedIfApplicable(): Unit = () +} + /** * Trait for injecting per-partition planning data into operator nodes. * @@ -79,14 +107,35 @@ private[comet] trait PlanDataInjector { /** * Registry and utilities for injecting per-partition planning data into operator trees. */ -private[comet] object PlanDataInjector { +object PlanDataInjector { - // Registry of injectors for different operator types - private val injectors: Seq[PlanDataInjector] = Seq( + // Built-in injectors for core operator types. Contribs add to this via + // `registerInjector` from their `CometOperatorSerdeExtension.init` -- the + // generic SPI route -- so core stays format-agnostic. + private val builtinInjectors: Seq[PlanDataInjector] = Seq( IcebergPlanDataInjector, - NativeScanPlanDataInjector - // Future: DeltaPlanDataInjector, HudiPlanDataInjector, etc. - ) + NativeScanPlanDataInjector) + + @volatile private var contribInjectors: Vector[PlanDataInjector] = Vector.empty + + /** + * SPI: register a contrib-side `PlanDataInjector`. Called once per contrib at + * extension-load time (from `CometOperatorSerdeExtension.init`). Registration is + * idempotent on the same instance but not de-duplicated across structurally-equal + * implementations -- contribs are expected to register exactly once. + */ + def registerInjector(injector: PlanDataInjector): Unit = synchronized { + if (!contribInjectors.contains(injector)) { + contribInjectors = contribInjectors :+ injector + } + } + + /** Test-only reset, mirroring `CometExtensionRegistry.resetForTesting`. */ + def clearContribInjectors(): Unit = synchronized { + contribInjectors = Vector.empty + } + + private def injectors: Seq[PlanDataInjector] = builtinInjectors ++ contribInjectors /** * Injects planning data into an Operator tree by finding nodes that need injection and applying @@ -617,6 +666,13 @@ abstract class CometNativeExec extends CometExec { _: CometBroadcastExchangeExec | _: BroadcastQueryStageExec | _: CometSparkToColumnarExec | _: CometLocalTableScanExec => func(plan) + // Any other leaf `CometNativeExec` (e.g. a contrib-defined leaf scan such as the Delta + // contrib's `CometDeltaNativeScanExec`) is a Comet input boundary -- recursing into its + // (non-existent) children would otherwise leave it invisible to the caller, which then + // misinterprets a leaf-only plan as having no inputs at all and crashes on + // `firstNonBroadcastPlan.get`. Treat it the same as the explicit list above. + case p: CometNativeExec if p.children.isEmpty => + func(plan) case _: CometPlan => // Other Comet operators, continue to traverse the tree. plan.children.foreach(foreachUntilCometInput(_)(func)) @@ -664,6 +720,16 @@ abstract class CometNativeExec extends CometExec { private def findAllPlanData( plan: SparkPlan): (Map[String, Array[Byte]], Map[String, Array[Array[Byte]]]) = { plan match { + // Contribs (e.g. the Delta contrib's `CometDeltaNativeScanExec`) implement + // `PlanDataSource` to expose their per-partition payload and matching + // `sourceKey`. Checked before the explicit core cases below so subclasses can + // override the trait without colliding with the hardcoded matches. + case src: PlanDataSource => + src.ensureSubqueriesResolvedIfApplicable() + ( + Map(src.planDataSourceKey -> src.planDataCommonBytes), + Map(src.planDataSourceKey -> src.planDataPerPartitionBytes)) + case iceberg: CometIcebergNativeScanExec => // Trigger Spark's standard prepare -> waitForSubqueries lifecycle so DPP // InSubqueryExec values are resolved before commonData is read. Without this,