Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions datafusion/substrait/src/logical_plan/producer/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,16 @@ pub fn to_substrait_rex(
}
Expr::WindowFunction(expr) => producer.handle_window_function(expr, schema),
Expr::InList(expr) => producer.handle_in_list(expr, schema),
Expr::Exists(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Expr::Exists(expr) => producer.handle_exists(expr, schema),
Expr::InSubquery(expr) => producer.handle_in_subquery(expr, schema),
Expr::ScalarSubquery(expr) => {
not_impl_err!("Cannot convert {expr:?} to Substrait")
}
Expr::ScalarSubquery(expr) => producer.handle_scalar_subquery(expr, schema),
#[expect(deprecated)]
Expr::Wildcard { .. } => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Expr::GroupingSet(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Expr::Placeholder(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Expr::OuterReferenceColumn(_, _) => {
// OuterReferenceColumn requires tracking outer query schema context for correlated

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still getting a lot of This feature is not implemented: Cannot convert OuterReferenceColumn errors when running the tests so maybe this PR can partially close the issue instead of completely? Unless you're still working on it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah for the remaining issues I intend to open follow up PRs. My goal is to resolve all the issue related to substrait in the next few weeks.

// subqueries. This is a complex feature that is not yet implemented.
not_impl_err!("Cannot convert {expr:?} to Substrait")
}
Expr::Unnest(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Expand Down
72 changes: 70 additions & 2 deletions datafusion/substrait/src/logical_plan/producer/expr/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@

use crate::logical_plan::producer::SubstraitProducer;
use datafusion::common::DFSchemaRef;
use datafusion::logical_expr::expr::InSubquery;
use substrait::proto::expression::subquery::InPredicate;
use datafusion::logical_expr::Subquery;
use datafusion::logical_expr::expr::{Exists, InSubquery};
use substrait::proto::expression::subquery::{InPredicate, Scalar, SetPredicate};
use substrait::proto::expression::{RexType, ScalarFunction};
use substrait::proto::function_argument::ArgType;
use substrait::proto::{Expression, FunctionArgument};
Expand Down Expand Up @@ -70,3 +71,70 @@ pub fn from_in_subquery(
Ok(substrait_subquery)
}
}

/// Convert DataFusion ScalarSubquery to Substrait Scalar subquery type
pub fn from_scalar_subquery(
producer: &mut impl SubstraitProducer,
subquery: &Subquery,
_schema: &DFSchemaRef,
) -> datafusion::common::Result<Expression> {
let subquery_plan = producer.handle_plan(subquery.subquery.as_ref())?;

Ok(Expression {
rex_type: Some(RexType::Subquery(Box::new(
substrait::proto::expression::Subquery {
subquery_type: Some(
substrait::proto::expression::subquery::SubqueryType::Scalar(
Box::new(Scalar {
input: Some(subquery_plan),
}),
),
),
},
))),
})
}

/// Convert DataFusion Exists expression to Substrait SetPredicate subquery type
pub fn from_exists(
producer: &mut impl SubstraitProducer,
exists: &Exists,
_schema: &DFSchemaRef,
) -> datafusion::common::Result<Expression> {
let subquery_plan = producer.handle_plan(exists.subquery.subquery.as_ref())?;

let substrait_exists = Expression {
rex_type: Some(RexType::Subquery(Box::new(
substrait::proto::expression::Subquery {
subquery_type: Some(
substrait::proto::expression::subquery::SubqueryType::SetPredicate(
Box::new(SetPredicate {
predicate_op: substrait::proto::expression::subquery::set_predicate::PredicateOp::Exists as i32,
tuples: Some(subquery_plan),
}),
),
),
},
))),
};

// Handle negated EXISTS (NOT EXISTS)
if exists.negated {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no PREDICATE_OP_NOT_EXISTS in the spec so I think this a reasonable workaround. Minor note, the consumer hardcodes negated:false so I don't think NOT EXISTS/NOT IN will round-trip correctly (Exists/InSubquery)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had look into it earlier, I was hoping to open a seperate discussion so not to clutter this PR.

let function_anchor = producer.register_function("not".to_string());

#[expect(deprecated)]
Ok(Expression {
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
function_reference: function_anchor,
arguments: vec![FunctionArgument {
arg_type: Some(ArgType::Value(substrait_exists)),
}],
output_type: None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is done consistently across the codebase, but this technically makes this substrait invalid. From the documentation:

    // Must be set to the return type of the function, exactly as derived
    // using the declaration in the extension.
    Type output_type = 3;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I wish that the substrait-rs library were at a state where it could handle this for you, but it just isn't there yet)

args: vec![],
options: vec![],
})),
})
} else {
Ok(substrait_exists)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@
use crate::extensions::Extensions;
use crate::logical_plan::producer::{
from_aggregate, from_aggregate_function, from_alias, from_between, from_binary_expr,
from_case, from_cast, from_column, from_distinct, from_empty_relation, from_filter,
from_in_list, from_in_subquery, from_join, from_like, from_limit, from_literal,
from_projection, from_repartition, from_scalar_function, from_sort,
from_subquery_alias, from_table_scan, from_try_cast, from_unary_expr, from_union,
from_values, from_window, from_window_function, to_substrait_rel, to_substrait_rex,
from_case, from_cast, from_column, from_distinct, from_empty_relation, from_exists,
from_filter, from_in_list, from_in_subquery, from_join, from_like, from_limit,
from_literal, from_projection, from_repartition, from_scalar_function,
from_scalar_subquery, from_sort, from_subquery_alias, from_table_scan, from_try_cast,
from_unary_expr, from_union, from_values, from_window, from_window_function,
to_substrait_rel, to_substrait_rex,
};
use datafusion::common::{Column, DFSchemaRef, ScalarValue, substrait_err};
use datafusion::execution::SessionState;
use datafusion::execution::registry::SerializerRegistry;
use datafusion::logical_expr::expr::{Alias, InList, InSubquery, WindowFunction};
use datafusion::logical_expr::Subquery;
use datafusion::logical_expr::expr::{Alias, Exists, InList, InSubquery, WindowFunction};
use datafusion::logical_expr::{
Aggregate, Between, BinaryExpr, Case, Cast, Distinct, EmptyRelation, Expr, Extension,
Filter, Join, Like, Limit, LogicalPlan, Projection, Repartition, Sort, SubqueryAlias,
Expand Down Expand Up @@ -361,6 +363,22 @@ pub trait SubstraitProducer: Send + Sync + Sized {
) -> datafusion::common::Result<Expression> {
from_in_subquery(self, in_subquery, schema)
}

fn handle_scalar_subquery(
&mut self,
subquery: &Subquery,
schema: &DFSchemaRef,
) -> datafusion::common::Result<Expression> {
from_scalar_subquery(self, subquery, schema)
}

fn handle_exists(
&mut self,
exists: &Exists,
schema: &DFSchemaRef,
) -> datafusion::common::Result<Expression> {
from_exists(self, exists, schema)
}
}

pub struct DefaultSubstraitProducer<'a> {
Expand Down