diff --git a/Cargo.lock b/Cargo.lock index 9c8f2c593555a..1d0a311b42333 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5515,6 +5515,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -5872,11 +5873,12 @@ dependencies = [ [[package]] name = "substrait" -version = "0.62.2" +version = "0.62.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +checksum = "4fb31133a233ae4e8fad7c9228fd87767927d9f4d60c3b0f3184be0a445aac1c" dependencies = [ "heck", + "indexmap 2.13.0", "pbjson 0.8.0", "pbjson-build 0.8.0", "pbjson-types", diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 1c901f6d4a0e6..dc4daf1ab7532 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4456,49 +4456,49 @@ mod tests { [ { "Plan": { + "Node Type": "Projection", "Expressions": [ "employee_csv.id" ], - "Node Type": "Projection", - "Output": [ - "id" - ], "Plans": [ { - "Condition": "employee_csv.state IN ()", "Node Type": "Filter", - "Output": [ - "id", - "state" - ], + "Condition": "employee_csv.state IN ()", "Plans": [ { "Node Type": "Subquery", - "Output": [ - "state" - ], "Plans": [ { "Node Type": "TableScan", + "Relation Name": "employee_csv", + "Plans": [], "Output": [ "state" - ], - "Plans": [], - "Relation Name": "employee_csv" + ] } + ], + "Output": [ + "state" ] }, { "Node Type": "TableScan", + "Relation Name": "employee_csv", + "Plans": [], "Output": [ "id", "state" - ], - "Plans": [], - "Relation Name": "employee_csv" + ] } + ], + "Output": [ + "id", + "state" ] } + ], + "Output": [ + "id" ] } } diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 717b455193354..9916892058569 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -646,11 +646,11 @@ logical_plan 02)--{ 03)----"Plan": { 04)------"Node Type": "Values", -05)------"Output": [ -06)--------"column1" -07)------], -08)------"Plans": [], -09)------"Values": "(Int64(1))" +05)------"Values": "(Int64(1))", +06)------"Plans": [], +07)------"Output": [ +08)--------"column1" +09)------] 10)----} 11)--} 12)] diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 927c326b88fbc..ba14d0e7cedfa 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -44,7 +44,7 @@ object_store = { workspace = true } # We need to match the version in substrait, so we don't use the workspace version here pbjson-types = { version = "0.8.0" } prost = { workspace = true } -substrait = { version = "0.62", features = ["serde"] } +substrait = { version = "0.62.3", features = ["serde"] } url = { workspace = true } tokio = { workspace = true, features = ["fs"] } diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs index 50d93a4600a00..dae6c625ef55b 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs @@ -56,6 +56,9 @@ pub(crate) fn from_substrait_field_reference( Some(RootType::Expression(_)) => not_impl_err!( "Expression root type in field reference is not supported" ), + Some(RootType::LambdaParameterReference(_)) => not_impl_err!( + "Lambda parameter reference in field reference is not yet supported" + ), } } _ => not_impl_err!( diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs index ad38b6addee0b..455ce914f11ce 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs @@ -43,7 +43,7 @@ use prost::Message; use std::sync::Arc; use substrait::proto; use substrait::proto::expression::Literal; -use substrait::proto::expression::literal::user_defined::Val; +use substrait::proto::expression::literal::user_defined::{TypeAnchorType, Val}; use substrait::proto::expression::literal::{ IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, LiteralType, interval_day_to_second, @@ -474,11 +474,12 @@ pub(crate) fn from_substrait_literal( ))) }; - if let Some(name) = consumer - .get_extensions() - .types - .get(&user_defined.type_reference) - { + let type_ref = match user_defined.type_anchor_type { + Some(TypeAnchorType::TypeReference(ref_val)) => ref_val, + _ => 0, + }; + + if let Some(name) = consumer.get_extensions().types.get(&type_ref) { match name.as_ref() { FLOAT_16_TYPE_NAME => { // Rules for encoding fp16 Substrait literals are defined as part of Arrow here: @@ -518,14 +519,14 @@ pub(crate) fn from_substrait_literal( _ => { return not_impl_err!( "Unsupported Substrait user defined type with ref {} and name {}", - user_defined.type_reference, + type_ref, name ); } } } else { #[expect(deprecated)] - match user_defined.type_reference { + match type_ref { // Kept for backwards compatibility, producers should useIntervalYearToMonth instead INTERVAL_YEAR_MONTH_TYPE_REF => { let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else { @@ -568,7 +569,7 @@ pub(crate) fn from_substrait_literal( _ => { return not_impl_err!( "Unsupported Substrait user defined type literal with ref {}", - user_defined.type_reference + type_ref ); } } diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs index 5d98850c72cca..a0468dbd451b9 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs @@ -93,6 +93,9 @@ pub async fn from_substrait_rex( RexType::DynamicParameter(expr) => { consumer.consume_dynamic_parameter(expr, input_schema).await } + RexType::Lambda(_) | RexType::LambdaInvocation(_) => { + not_impl_err!("Lambda expressions are not yet supported") + } }, None => substrait_err!("Expression must set rex_type: {expression:?}"), } diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs index a23f1faed1eb0..f03caa529a8d3 100644 --- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs @@ -141,7 +141,12 @@ use substrait::proto::{ /// /// // and user-defined literals /// fn consume_user_defined_literal(&self, literal: &proto::expression::literal::UserDefined) -> Result { -/// let type_string = self.extensions.types.get(&literal.type_reference).unwrap(); +/// // extract type_reference from the new TypeAnchorType oneof +/// let type_ref = match literal.type_anchor_type { +/// Some(proto::expression::literal::user_defined::TypeAnchorType::TypeReference(r)) => r, +/// _ => 0, +/// }; +/// let type_string = self.extensions.types.get(&type_ref).unwrap(); /// match type_string.as_str() { /// "u!foo" => not_impl_err!("handle foo conversion"), /// "u!bar" => not_impl_err!("handle bar conversion"), @@ -444,10 +449,15 @@ pub trait SubstraitConsumer: Send + Sync + Sized { &self, user_defined_literal: &proto::expression::literal::UserDefined, ) -> datafusion::common::Result { - substrait_err!( - "Missing handler for user-defined literals {}", - user_defined_literal.type_reference - ) + let type_ref = match user_defined_literal.type_anchor_type { + Some( + proto::expression::literal::user_defined::TypeAnchorType::TypeReference( + ref_val, + ), + ) => ref_val, + _ => 0, + }; + substrait_err!("Missing handler for user-defined literals {}", type_ref) } } diff --git a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs index 8882c992dca1c..bbed7ee9be417 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs @@ -117,7 +117,7 @@ pub(crate) fn to_substrait_literal( ( LiteralType::UserDefined( substrait::proto::expression::literal::UserDefined { - type_reference: type_anchor, + type_anchor_type: Some(substrait::proto::expression::literal::user_defined::TypeAnchorType::TypeReference(type_anchor)), type_parameters: vec![], val: Some(substrait::proto::expression::literal::user_defined::Val::Value( pbjson_types::Any { diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs index 6a6824579b4e8..4d9b5ca83e5e0 100644 --- a/datafusion/substrait/tests/utils.rs +++ b/datafusion/substrait/tests/utils.rs @@ -486,6 +486,7 @@ pub mod test { // Enum is deprecated #[expect(deprecated)] RexType::Enum(_) => {} + RexType::Lambda(_) | RexType::LambdaInvocation(_) => {} } Ok(()) }