From 10d5797d8b795c27e3cde3336bd0b7c90e41e1b3 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 13:57:57 +0800 Subject: [PATCH 1/8] Implement single-source UPDATE...FROM with old-row projection Enable the SQL planner to handle single-source UPDATE ... FROM statements. Project the original target-row image as hidden columns for downstream consumers. Add a logical-plan regression test to verify the projected old-row image and refactor update.slt to focus on logical planning, excluding physical planning for now. --- datafusion/expr/src/logical_plan/dml.rs | 10 +++ datafusion/sql/src/statement.rs | 59 +++++++----- datafusion/sql/tests/sql_integration.rs | 22 +++++ datafusion/sqllogictest/test_files/update.slt | 90 +++++++++++++++---- 4 files changed, 146 insertions(+), 35 deletions(-) diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs index b668cbfe2cc35..f1a3327164be0 100644 --- a/datafusion/expr/src/logical_plan/dml.rs +++ b/datafusion/expr/src/logical_plan/dml.rs @@ -27,6 +27,16 @@ use datafusion_common::{DFSchemaRef, TableReference}; use crate::{LogicalPlan, TableSource}; +/// Prefix used for hidden columns carrying the original target-row values in +/// `UPDATE ... FROM` plans. +pub const UPDATE_FROM_OLD_COLUMN_PREFIX: &str = "__df_update_old_"; + +/// Returns the hidden `UPDATE ... FROM` column name used to carry the original +/// value of `column_name` from the target table. +pub fn update_from_old_column_name(column_name: &str) -> String { + format!("{UPDATE_FROM_OLD_COLUMN_PREFIX}{column_name}") +} + /// Operator that copies the contents of a database to file(s) #[derive(Clone)] pub struct CopyTo { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index b91e38e53776a..9a755ce7ae163 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -25,7 +25,8 @@ use crate::parser::{ LexOrdering, ResetStatement, Statement as DFStatement, }; use crate::planner::{ - ContextProvider, PlannerContext, SqlToRel, object_name_to_qualifier, + ContextProvider, IdentNormalizer, PlannerContext, SqlToRel, + object_name_to_qualifier, }; use crate::utils::normalize_ident; @@ -38,7 +39,7 @@ use datafusion_common::{ internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, unqualified_field_not_found, }; -use datafusion_expr::dml::{CopyTo, InsertOp}; +use datafusion_expr::dml::{CopyTo, InsertOp, update_from_old_column_name}; use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check; use datafusion_expr::logical_plan::DdlStatement; use datafusion_expr::logical_plan::builder::project; @@ -1084,12 +1085,6 @@ impl SqlToRel<'_, S> { } let update_from = from_clauses.and_then(|mut f| f.pop()); - // UPDATE ... FROM is currently not working - // TODO fix https://github.com/apache/datafusion/issues/19950 - if update_from.is_some() { - return not_impl_err!("UPDATE ... FROM is not supported"); - } - if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } @@ -2191,6 +2186,7 @@ impl SqlToRel<'_, S> { .collect::>>()?; // Build scan, join with from table if it exists. + let has_update_from = from.is_some(); let mut input_tables = vec![table]; input_tables.extend(from); let scan = self.plan_from_tables(input_tables, &mut planner_context)?; @@ -2216,7 +2212,7 @@ impl SqlToRel<'_, S> { }; // Build updated values for each column, using the previous value if not modified - let exprs = table_schema + let mut exprs = table_schema .iter() .map(|(qualifier, field)| { let expr = match assign_map.remove(field.name()) { @@ -2236,22 +2232,29 @@ impl SqlToRel<'_, S> { // Cast to target column type, if necessary expr.cast_to(field.data_type(), source.schema())? } - None => { - // If the target table has an alias, use it to qualify the column name - if let Some(alias) = &table_alias { - Expr::Column(Column::new( - Some(self.ident_normalizer.normalize(alias.name.clone())), - field.name(), - )) - } else { - Expr::Column(Column::from((qualifier, field))) - } - } + None => Self::update_target_column_expr( + &self.ident_normalizer, + &table_alias, + qualifier, + field, + ), }; Ok(expr.alias(field.name())) }) .collect::>>()?; + if has_update_from { + exprs.extend(table_schema.iter().map(|(qualifier, field)| { + Self::update_target_column_expr( + &self.ident_normalizer, + &table_alias, + qualifier, + field, + ) + .alias(update_from_old_column_name(field.name())) + })); + } + let source = project(source, exprs)?; let plan = LogicalPlan::Dml(DmlStatement::new( @@ -2263,6 +2266,22 @@ impl SqlToRel<'_, S> { Ok(plan) } + fn update_target_column_expr( + ident_normalizer: &IdentNormalizer, + table_alias: &Option, + qualifier: Option<&TableReference>, + field: &FieldRef, + ) -> Expr { + if let Some(alias) = table_alias { + Expr::Column(Column::new( + Some(ident_normalizer.normalize(alias.name.clone())), + field.name(), + )) + } else { + Expr::Column(Column::from((qualifier, field))) + } + } + fn insert_to_plan( &self, table_name: ObjectName, diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 9570336e995f2..f9cb173dff047 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -739,6 +739,28 @@ fn plan_update() { ); } +#[test] +fn plan_update_from_projects_original_target_row() { + let sql = "update person as dst \ + set last_name = src.last_name, age = src.age \ + from person as src \ + where dst.id = src.id"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r#" + Dml: op=[Update] table=[person] + Projection: dst.id AS id, dst.first_name AS first_name, src.last_name AS last_name, src.age AS age, dst.state AS state, dst.salary AS salary, dst.birth_date AS birth_date, dst.😀 AS 😀, dst.id AS __df_update_old_id, dst.first_name AS __df_update_old_first_name, dst.last_name AS __df_update_old_last_name, dst.age AS __df_update_old_age, dst.state AS __df_update_old_state, dst.salary AS __df_update_old_salary, dst.birth_date AS __df_update_old_birth_date, dst.😀 AS __df_update_old_😀 + Filter: dst.id = src.id + Cross Join: + SubqueryAlias: dst + TableScan: person + SubqueryAlias: src + TableScan: person + "# + ); +} + #[rstest] #[case::missing_assignment_target("UPDATE person SET doesnotexist = true")] #[case::missing_assignment_expression("UPDATE person SET age = doesnotexist + 42")] diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt index 1cd2b626e3b8e..d1d9572ee6675 100644 --- a/datafusion/sqllogictest/test_files/update.slt +++ b/datafusion/sqllogictest/test_files/update.slt @@ -67,10 +67,46 @@ logical_plan physical_plan_error This feature is not implemented: Physical plan does not support logical expression ScalarSubquery() # set from other table -# UPDATE ... FROM is currently unsupported -# TODO fix https://github.com/apache/datafusion/issues/19950 -query error DataFusion error: This feature is not implemented: UPDATE ... FROM is not supported +# UPDATE ... FROM plans successfully but physical execution is still out of scope here +query TT explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d +03)----Filter: t1.a = t2.a AND t1.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) +04)------Cross Join: +05)--------TableScan: t1 +06)--------TableScan: t2 +physical_plan_error + +# update from (FROM before SET syntax) +query TT +explain update t1 from t2 set b = t2.b, c = t1.a + t2.a where t1.a = t2.a; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: t1.a AS a, t2.b AS b, CAST(t1.a + t2.a AS Float64) AS c, t1.d AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d +03)----Filter: t1.a = t2.a +04)------Cross Join: +05)--------TableScan: t1 +06)--------TableScan: t2 +physical_plan_error + +# update from with explicit aliases and joined assignment expressions +query TT +explain update t1 as dst set b = src.b, c = src.a + dst.a, d = dst.d + src.d from t2 as src where dst.a = src.a and src.c > dst.c; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: dst.a AS a, src.b AS b, CAST(src.a + dst.a AS Float64) AS c, dst.d + src.d AS d, dst.a AS __df_update_old_a, dst.b AS __df_update_old_b, dst.c AS __df_update_old_c, dst.d AS __df_update_old_d +03)----Filter: dst.a = src.a AND src.c > dst.c +04)------Cross Join: +05)--------SubqueryAlias: dst +06)----------TableScan: t1 +07)--------SubqueryAlias: src +08)----------TableScan: t2 +physical_plan_error # test update from other table with actual data statement ok @@ -79,21 +115,36 @@ insert into t1 values (1, 'zoo', 2.0, 10), (2, 'qux', 3.0, 20), (3, 'bar', 4.0, statement ok insert into t2 values (1, 'updated_b', 5.0, 40), (2, 'updated_b2', 2.5, 50), (4, 'updated_b3', 1.5, 60); -# UPDATE ... FROM is currently unsupported - qualifier stripping breaks source column references -# causing assignments like 'b = t2.b' to resolve to target table's 'b' instead of source table's 'b' -# TODO fix https://github.com/apache/datafusion/issues/19950 -statement error DataFusion error: This feature is not implemented: UPDATE ... FROM is not supported -update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; +# physical execution stays out of scope for this file until provider support lands +query TT +explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d +03)----Filter: t1.a = t2.a AND t1.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) +04)------Cross Join: +05)--------TableScan: t1 +06)--------TableScan: t2 +physical_plan_error # set from multiple tables, DataFusion only supports from one table statement error DataFusion error: This feature is not implemented: Multiple tables in UPDATE SET FROM not yet supported explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a; # test table alias -# UPDATE ... FROM is currently unsupported -# TODO fix https://github.com/apache/datafusion/issues/19950 -statement error DataFusion error: This feature is not implemented: UPDATE ... FROM is not supported +query TT explain update t1 as T set b = t2.b, c = t.a, d = 1 from t2 where t.a = t2.a and t.b > 'foo' and t2.c > 1.0; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: t.a AS a, t2.b AS b, CAST(t.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t.a AS __df_update_old_a, t.b AS __df_update_old_b, t.c AS __df_update_old_c, t.d AS __df_update_old_d +03)----Filter: t.a = t2.a AND t.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) +04)------Cross Join: +05)--------SubqueryAlias: t +06)----------TableScan: t1 +07)--------TableScan: t2 +physical_plan_error # test update with table alias with actual data statement ok @@ -108,7 +159,16 @@ insert into t1 values (1, 'zebra', 1.5, 5), (2, 'wolf', 2.0, 10), (3, 'apple', 3 statement ok insert into t2 values (1, 'new_val', 2.0, 100), (2, 'new_val2', 1.5, 200); -# UPDATE ... FROM is currently unsupported -# TODO fix https://github.com/apache/datafusion/issues/19950 -statement error DataFusion error: This feature is not implemented: UPDATE ... FROM is not supported -update t1 as T set b = t2.b, c = t.a, d = 1 from t2 where t.a = t2.a and t.b > 'foo' and t2.c > 1.0; +# physical execution stays out of scope for this file until provider support lands +query TT +explain update t1 as T set b = t2.b, c = t.a, d = 1 from t2 where t.a = t2.a and t.b > 'foo' and t2.c > 1.0; +---- +logical_plan +01)Dml: op=[Update] table=[t1] +02)--Projection: t.a AS a, t2.b AS b, CAST(t.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t.a AS __df_update_old_a, t.b AS __df_update_old_b, t.c AS __df_update_old_c, t.d AS __df_update_old_d +03)----Filter: t.a = t2.a AND t.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) +04)------Cross Join: +05)--------SubqueryAlias: t +06)----------TableScan: t1 +07)--------TableScan: t2 +physical_plan_error From 480795e5efd7885870c479f53942a72289b758b4 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 14:10:33 +0800 Subject: [PATCH 2/8] Remove unsupported UPDATE ... FROM test cases and related queries --- datafusion/sqllogictest/test_files/update.slt | 103 ------------------ 1 file changed, 103 deletions(-) diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt index d1d9572ee6675..8a0d834379ccd 100644 --- a/datafusion/sqllogictest/test_files/update.slt +++ b/datafusion/sqllogictest/test_files/update.slt @@ -66,109 +66,6 @@ logical_plan 08)----TableScan: t1 physical_plan_error This feature is not implemented: Physical plan does not support logical expression ScalarSubquery() -# set from other table -# UPDATE ... FROM plans successfully but physical execution is still out of scope here -query TT -explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d -03)----Filter: t1.a = t2.a AND t1.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) -04)------Cross Join: -05)--------TableScan: t1 -06)--------TableScan: t2 -physical_plan_error - -# update from (FROM before SET syntax) -query TT -explain update t1 from t2 set b = t2.b, c = t1.a + t2.a where t1.a = t2.a; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: t1.a AS a, t2.b AS b, CAST(t1.a + t2.a AS Float64) AS c, t1.d AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d -03)----Filter: t1.a = t2.a -04)------Cross Join: -05)--------TableScan: t1 -06)--------TableScan: t2 -physical_plan_error - -# update from with explicit aliases and joined assignment expressions -query TT -explain update t1 as dst set b = src.b, c = src.a + dst.a, d = dst.d + src.d from t2 as src where dst.a = src.a and src.c > dst.c; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: dst.a AS a, src.b AS b, CAST(src.a + dst.a AS Float64) AS c, dst.d + src.d AS d, dst.a AS __df_update_old_a, dst.b AS __df_update_old_b, dst.c AS __df_update_old_c, dst.d AS __df_update_old_d -03)----Filter: dst.a = src.a AND src.c > dst.c -04)------Cross Join: -05)--------SubqueryAlias: dst -06)----------TableScan: t1 -07)--------SubqueryAlias: src -08)----------TableScan: t2 -physical_plan_error - -# test update from other table with actual data -statement ok -insert into t1 values (1, 'zoo', 2.0, 10), (2, 'qux', 3.0, 20), (3, 'bar', 4.0, 30); - -statement ok -insert into t2 values (1, 'updated_b', 5.0, 40), (2, 'updated_b2', 2.5, 50), (4, 'updated_b3', 1.5, 60); - -# physical execution stays out of scope for this file until provider support lands -query TT -explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t1.a AS __df_update_old_a, t1.b AS __df_update_old_b, t1.c AS __df_update_old_c, t1.d AS __df_update_old_d -03)----Filter: t1.a = t2.a AND t1.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) -04)------Cross Join: -05)--------TableScan: t1 -06)--------TableScan: t2 -physical_plan_error - # set from multiple tables, DataFusion only supports from one table statement error DataFusion error: This feature is not implemented: Multiple tables in UPDATE SET FROM not yet supported explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a; - -# test table alias -query TT -explain update t1 as T set b = t2.b, c = t.a, d = 1 from t2 where t.a = t2.a and t.b > 'foo' and t2.c > 1.0; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: t.a AS a, t2.b AS b, CAST(t.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t.a AS __df_update_old_a, t.b AS __df_update_old_b, t.c AS __df_update_old_c, t.d AS __df_update_old_d -03)----Filter: t.a = t2.a AND t.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) -04)------Cross Join: -05)--------SubqueryAlias: t -06)----------TableScan: t1 -07)--------TableScan: t2 -physical_plan_error - -# test update with table alias with actual data -statement ok -delete from t1; - -statement ok -delete from t2; - -statement ok -insert into t1 values (1, 'zebra', 1.5, 5), (2, 'wolf', 2.0, 10), (3, 'apple', 3.5, 15); - -statement ok -insert into t2 values (1, 'new_val', 2.0, 100), (2, 'new_val2', 1.5, 200); - -# physical execution stays out of scope for this file until provider support lands -query TT -explain update t1 as T set b = t2.b, c = t.a, d = 1 from t2 where t.a = t2.a and t.b > 'foo' and t2.c > 1.0; ----- -logical_plan -01)Dml: op=[Update] table=[t1] -02)--Projection: t.a AS a, t2.b AS b, CAST(t.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d, t.a AS __df_update_old_a, t.b AS __df_update_old_b, t.c AS __df_update_old_c, t.d AS __df_update_old_d -03)----Filter: t.a = t2.a AND t.b > CAST(Utf8("foo") AS Utf8View) AND t2.c > Float64(1) -04)------Cross Join: -05)--------SubqueryAlias: t -06)----------TableScan: t1 -07)--------TableScan: t2 -physical_plan_error From 66c15e899b687a35af40c44d3ad24e00248d07f4 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 14:16:07 +0800 Subject: [PATCH 3/8] Add SQL integration tests for planner-only updates Added planner-only SQL integration coverage in sql_integration.rs. Added tests for basic UPDATE ... FROM, UPDATE ... FROM with FROM before SET, and alias-based target/source UPDATE ... FROM, ensuring existing alias coverage is preserved. --- datafusion/sql/tests/sql_integration.rs | 42 ++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index f9cb173dff047..5e1fa4d5dafc3 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -740,7 +740,47 @@ fn plan_update() { } #[test] -fn plan_update_from_projects_original_target_row() { +fn plan_update_from() { + let sql = "update person \ + set last_name = src.last_name, age = src.age \ + from person as src \ + where person.id = src.id"; + let plan = logical_plan(sql).unwrap(); + let expected = [ + "Dml: op=[Update] table=[person]", + " Projection: person.id AS id, person.first_name AS first_name, src.last_name AS last_name, src.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀, person.id AS __df_update_old_id, person.first_name AS __df_update_old_first_name, person.last_name AS __df_update_old_last_name, person.age AS __df_update_old_age, person.state AS __df_update_old_state, person.salary AS __df_update_old_salary, person.birth_date AS __df_update_old_birth_date, person.😀 AS __df_update_old_😀", + " Filter: person.id = src.id", + " Cross Join:", + " TableScan: person", + " SubqueryAlias: src", + " TableScan: person", + ] + .join("\n"); + assert_eq!(format!("{plan}"), expected); +} + +#[test] +fn plan_update_from_before_set() { + let sql = "update person \ + from person as src \ + set last_name = src.last_name, age = src.age \ + where person.id = src.id"; + let plan = logical_plan(sql).unwrap(); + let expected = [ + "Dml: op=[Update] table=[person]", + " Projection: person.id AS id, person.first_name AS first_name, src.last_name AS last_name, src.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀, person.id AS __df_update_old_id, person.first_name AS __df_update_old_first_name, person.last_name AS __df_update_old_last_name, person.age AS __df_update_old_age, person.state AS __df_update_old_state, person.salary AS __df_update_old_salary, person.birth_date AS __df_update_old_birth_date, person.😀 AS __df_update_old_😀", + " Filter: person.id = src.id", + " Cross Join:", + " TableScan: person", + " SubqueryAlias: src", + " TableScan: person", + ] + .join("\n"); + assert_eq!(format!("{plan}"), expected); +} + +#[test] +fn plan_update_from_with_aliases_projects_original_target_row() { let sql = "update person as dst \ set last_name = src.last_name, age = src.age \ from person as src \ From 612092e6bf782214fe8c86f0cda19911bde93175 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 14:30:01 +0800 Subject: [PATCH 4/8] cargo fmt --- datafusion/sql/src/statement.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 9a755ce7ae163..2b959a64c94a4 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -25,8 +25,7 @@ use crate::parser::{ LexOrdering, ResetStatement, Statement as DFStatement, }; use crate::planner::{ - ContextProvider, IdentNormalizer, PlannerContext, SqlToRel, - object_name_to_qualifier, + ContextProvider, IdentNormalizer, PlannerContext, SqlToRel, object_name_to_qualifier, }; use crate::utils::normalize_ident; From 589789c5d04db4deecf952c719f31e523f7f5eb2 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 14:40:18 +0800 Subject: [PATCH 5/8] Add UPDATE ... FROM plan detection and early rejection Implement detection for UPDATE ... FROM plans using hidden old-row projection aliases in the physical planner. Introduce early rejection in the WriteOp::Update path to ensure that unsupported operations return an appropriate error message. This change enhances error handling and clarifies feature support. --- datafusion/core/src/physical_planner.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index b4fb44f670e8d..b74a1f4d8a126 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -84,6 +84,7 @@ use datafusion_expr::expr::{ }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; +use datafusion_expr::logical_plan::dml::UPDATE_FROM_OLD_COLUMN_PREFIX; use datafusion_expr::utils::{expr_to_columns, split_conjunction}; use datafusion_expr::{ Analyze, BinaryExpr, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension, @@ -781,6 +782,12 @@ impl DefaultPhysicalPlanner { if let Some(provider) = target.as_any().downcast_ref::() { + if has_update_from_old_row_projection(input)? { + return not_impl_err!( + "UPDATE ... FROM execution is not yet supported" + ); + } + // For UPDATE, the assignments are encoded in the projection of input // We pass the filters and let the provider handle the projection let filters = extract_dml_filters(input, table_name)?; @@ -2212,6 +2219,24 @@ fn predicate_is_on_target_multi( })) } +fn has_update_from_old_row_projection(input: &Arc) -> Result { + let mut has_old_row_projection = false; + input.apply(|node| { + if let LogicalPlan::Projection(projection) = node + && projection.expr.iter().any(|expr| { + matches!(expr, Expr::Alias(alias) if alias.name.starts_with(UPDATE_FROM_OLD_COLUMN_PREFIX)) + }) + { + has_old_row_projection = true; + return Ok(TreeNodeRecursion::Stop); + } + + Ok(TreeNodeRecursion::Continue) + })?; + + Ok(has_old_row_projection) +} + /// Strip table qualifiers from column references in an expression. /// This is needed because DML filter expressions contain qualified column names /// (e.g., "table.column") but the TableProvider's schema only has simple names. From 1345aab4d6cf608ec52dfc2e9ee0d0fca0e76583 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sat, 7 Mar 2026 14:43:33 +0800 Subject: [PATCH 6/8] Add regression test for UPDATE ... FROM logic Implement helper to create UPDATE ... FROM input. Add a test to verify that no extracted assignment names begin with __df_update_old_, while ensuring real assignments are still extracted correctly. --- datafusion/core/src/physical_planner.rs | 71 +++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index b74a1f4d8a126..6613646a88a27 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2273,6 +2273,11 @@ fn extract_update_assignments(input: &Arc) -> Result