-
Notifications
You must be signed in to change notification settings - Fork 1.9k
fix: sqllogictest cannot convert <subquery> to Substrait #19739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,9 @@ | |
|
|
||
| use crate::logical_plan::producer::SubstraitProducer; | ||
| use datafusion::common::DFSchemaRef; | ||
| use datafusion::logical_expr::expr::InSubquery; | ||
| use substrait::proto::expression::subquery::InPredicate; | ||
| use datafusion::logical_expr::Subquery; | ||
| use datafusion::logical_expr::expr::{Exists, InSubquery}; | ||
| use substrait::proto::expression::subquery::{InPredicate, Scalar, SetPredicate}; | ||
| use substrait::proto::expression::{RexType, ScalarFunction}; | ||
| use substrait::proto::function_argument::ArgType; | ||
| use substrait::proto::{Expression, FunctionArgument}; | ||
|
|
@@ -70,3 +71,70 @@ pub fn from_in_subquery( | |
| Ok(substrait_subquery) | ||
| } | ||
| } | ||
|
|
||
| /// Convert DataFusion ScalarSubquery to Substrait Scalar subquery type | ||
| pub fn from_scalar_subquery( | ||
| producer: &mut impl SubstraitProducer, | ||
| subquery: &Subquery, | ||
| _schema: &DFSchemaRef, | ||
| ) -> datafusion::common::Result<Expression> { | ||
| let subquery_plan = producer.handle_plan(subquery.subquery.as_ref())?; | ||
|
|
||
| Ok(Expression { | ||
| rex_type: Some(RexType::Subquery(Box::new( | ||
| substrait::proto::expression::Subquery { | ||
| subquery_type: Some( | ||
| substrait::proto::expression::subquery::SubqueryType::Scalar( | ||
| Box::new(Scalar { | ||
| input: Some(subquery_plan), | ||
| }), | ||
| ), | ||
| ), | ||
| }, | ||
| ))), | ||
| }) | ||
| } | ||
|
|
||
| /// Convert DataFusion Exists expression to Substrait SetPredicate subquery type | ||
| pub fn from_exists( | ||
| producer: &mut impl SubstraitProducer, | ||
| exists: &Exists, | ||
| _schema: &DFSchemaRef, | ||
| ) -> datafusion::common::Result<Expression> { | ||
| let subquery_plan = producer.handle_plan(exists.subquery.subquery.as_ref())?; | ||
|
|
||
| let substrait_exists = Expression { | ||
| rex_type: Some(RexType::Subquery(Box::new( | ||
| substrait::proto::expression::Subquery { | ||
| subquery_type: Some( | ||
| substrait::proto::expression::subquery::SubqueryType::SetPredicate( | ||
| Box::new(SetPredicate { | ||
| predicate_op: substrait::proto::expression::subquery::set_predicate::PredicateOp::Exists as i32, | ||
| tuples: Some(subquery_plan), | ||
| }), | ||
| ), | ||
| ), | ||
| }, | ||
| ))), | ||
| }; | ||
|
|
||
| // Handle negated EXISTS (NOT EXISTS) | ||
| if exists.negated { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had look into it earlier, I was hoping to open a seperate discussion so not to clutter this PR. |
||
| let function_anchor = producer.register_function("not".to_string()); | ||
|
|
||
| #[expect(deprecated)] | ||
| Ok(Expression { | ||
| rex_type: Some(RexType::ScalarFunction(ScalarFunction { | ||
| function_reference: function_anchor, | ||
| arguments: vec![FunctionArgument { | ||
| arg_type: Some(ArgType::Value(substrait_exists)), | ||
| }], | ||
| output_type: None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if this is done consistently across the codebase, but this technically makes this substrait invalid. From the documentation: // Must be set to the return type of the function, exactly as derived
// using the declaration in the extension.
Type output_type = 3;
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I wish that the |
||
| args: vec![], | ||
| options: vec![], | ||
| })), | ||
| }) | ||
| } else { | ||
| Ok(substrait_exists) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm still getting a lot of
This feature is not implemented: Cannot convert OuterReferenceColumnerrors when running the tests so maybe this PR can partially close the issue instead of completely? Unless you're still working on itThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah for the remaining issues I intend to open follow up PRs. My goal is to resolve all the issue related to substrait in the next few weeks.