diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs index 5057564d370cf..ecda021aa390f 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs @@ -139,16 +139,16 @@ pub fn to_substrait_rex( } Expr::WindowFunction(expr) => producer.handle_window_function(expr, schema), Expr::InList(expr) => producer.handle_in_list(expr, schema), - Expr::Exists(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), + Expr::Exists(expr) => producer.handle_exists(expr, schema), Expr::InSubquery(expr) => producer.handle_in_subquery(expr, schema), - Expr::ScalarSubquery(expr) => { - not_impl_err!("Cannot convert {expr:?} to Substrait") - } + Expr::ScalarSubquery(expr) => producer.handle_scalar_subquery(expr, schema), #[expect(deprecated)] Expr::Wildcard { .. } => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::GroupingSet(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::Placeholder(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::OuterReferenceColumn(_, _) => { + // OuterReferenceColumn requires tracking outer query schema context for correlated + // subqueries. This is a complex feature that is not yet implemented. not_impl_err!("Cannot convert {expr:?} to Substrait") } Expr::Unnest(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), diff --git a/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs b/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs index f2e6ff551223c..7e9a2d54335ff 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/subquery.rs @@ -17,8 +17,9 @@ use crate::logical_plan::producer::SubstraitProducer; use datafusion::common::DFSchemaRef; -use datafusion::logical_expr::expr::InSubquery; -use substrait::proto::expression::subquery::InPredicate; +use datafusion::logical_expr::Subquery; +use datafusion::logical_expr::expr::{Exists, InSubquery}; +use substrait::proto::expression::subquery::{InPredicate, Scalar, SetPredicate}; use substrait::proto::expression::{RexType, ScalarFunction}; use substrait::proto::function_argument::ArgType; use substrait::proto::{Expression, FunctionArgument}; @@ -70,3 +71,70 @@ pub fn from_in_subquery( Ok(substrait_subquery) } } + +/// Convert DataFusion ScalarSubquery to Substrait Scalar subquery type +pub fn from_scalar_subquery( + producer: &mut impl SubstraitProducer, + subquery: &Subquery, + _schema: &DFSchemaRef, +) -> datafusion::common::Result { + let subquery_plan = producer.handle_plan(subquery.subquery.as_ref())?; + + Ok(Expression { + rex_type: Some(RexType::Subquery(Box::new( + substrait::proto::expression::Subquery { + subquery_type: Some( + substrait::proto::expression::subquery::SubqueryType::Scalar( + Box::new(Scalar { + input: Some(subquery_plan), + }), + ), + ), + }, + ))), + }) +} + +/// Convert DataFusion Exists expression to Substrait SetPredicate subquery type +pub fn from_exists( + producer: &mut impl SubstraitProducer, + exists: &Exists, + _schema: &DFSchemaRef, +) -> datafusion::common::Result { + let subquery_plan = producer.handle_plan(exists.subquery.subquery.as_ref())?; + + let substrait_exists = Expression { + rex_type: Some(RexType::Subquery(Box::new( + substrait::proto::expression::Subquery { + subquery_type: Some( + substrait::proto::expression::subquery::SubqueryType::SetPredicate( + Box::new(SetPredicate { + predicate_op: substrait::proto::expression::subquery::set_predicate::PredicateOp::Exists as i32, + tuples: Some(subquery_plan), + }), + ), + ), + }, + ))), + }; + + // Handle negated EXISTS (NOT EXISTS) + if exists.negated { + let function_anchor = producer.register_function("not".to_string()); + + #[expect(deprecated)] + Ok(Expression { + rex_type: Some(RexType::ScalarFunction(ScalarFunction { + function_reference: function_anchor, + arguments: vec![FunctionArgument { + arg_type: Some(ArgType::Value(substrait_exists)), + }], + output_type: None, + args: vec![], + options: vec![], + })), + }) + } else { + Ok(substrait_exists) + } +} diff --git a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs index ffc920ffe609e..dcb14c2d8a276 100644 --- a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs +++ b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs @@ -18,16 +18,18 @@ use crate::extensions::Extensions; use crate::logical_plan::producer::{ from_aggregate, from_aggregate_function, from_alias, from_between, from_binary_expr, - from_case, from_cast, from_column, from_distinct, from_empty_relation, from_filter, - from_in_list, from_in_subquery, from_join, from_like, from_limit, from_literal, - from_projection, from_repartition, from_scalar_function, from_sort, - from_subquery_alias, from_table_scan, from_try_cast, from_unary_expr, from_union, - from_values, from_window, from_window_function, to_substrait_rel, to_substrait_rex, + from_case, from_cast, from_column, from_distinct, from_empty_relation, from_exists, + from_filter, from_in_list, from_in_subquery, from_join, from_like, from_limit, + from_literal, from_projection, from_repartition, from_scalar_function, + from_scalar_subquery, from_sort, from_subquery_alias, from_table_scan, from_try_cast, + from_unary_expr, from_union, from_values, from_window, from_window_function, + to_substrait_rel, to_substrait_rex, }; use datafusion::common::{Column, DFSchemaRef, ScalarValue, substrait_err}; use datafusion::execution::SessionState; use datafusion::execution::registry::SerializerRegistry; -use datafusion::logical_expr::expr::{Alias, InList, InSubquery, WindowFunction}; +use datafusion::logical_expr::Subquery; +use datafusion::logical_expr::expr::{Alias, Exists, InList, InSubquery, WindowFunction}; use datafusion::logical_expr::{ Aggregate, Between, BinaryExpr, Case, Cast, Distinct, EmptyRelation, Expr, Extension, Filter, Join, Like, Limit, LogicalPlan, Projection, Repartition, Sort, SubqueryAlias, @@ -361,6 +363,22 @@ pub trait SubstraitProducer: Send + Sync + Sized { ) -> datafusion::common::Result { from_in_subquery(self, in_subquery, schema) } + + fn handle_scalar_subquery( + &mut self, + subquery: &Subquery, + schema: &DFSchemaRef, + ) -> datafusion::common::Result { + from_scalar_subquery(self, subquery, schema) + } + + fn handle_exists( + &mut self, + exists: &Exists, + schema: &DFSchemaRef, + ) -> datafusion::common::Result { + from_exists(self, exists, schema) + } } pub struct DefaultSubstraitProducer<'a> {