diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index cc263dfe3e619..5d54dcefb01e8 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -17,6 +17,7 @@ //! Runtime configuration, via [`ConfigOptions`] +use arrow::array::timezone::Tz; use arrow_ipc::CompressionType; #[cfg(feature = "parquet_encryption")] @@ -720,6 +721,42 @@ impl From for Option { } } +/// A validated timezone configuration value. +#[derive(Debug, Copy, Clone)] +pub struct ConfigTimeZone(Tz); + +impl ConfigTimeZone { + /// Returns the parsed timezone value. + pub fn tz(&self) -> Tz { + self.0 + } +} + +impl FromStr for ConfigTimeZone { + type Err = DataFusionError; + + fn from_str(value: &str) -> Result { + value + .parse::() + .map(Self) + .map_err(|e| DataFusionError::Configuration(e.to_string())) + } +} + +impl Display for ConfigTimeZone { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for ConfigTimeZone { + fn eq(&self, other: &Self) -> bool { + self.to_string() == other.to_string() + } +} + +impl Eq for ConfigTimeZone {} + config_namespace! { /// Options related to query execution /// @@ -770,7 +807,7 @@ config_namespace! { /// The default time zone /// /// Some functions, e.g. `now` return timestamps in this time zone - pub time_zone: Option, default = None + pub time_zone: Option, default = None /// Parquet options pub parquet: ParquetOptions, default = Default::default() @@ -2238,6 +2275,38 @@ impl ConfigField for Option { } } +impl ConfigField for Option { + fn visit(&self, v: &mut V, key: &str, description: &'static str) { + match self { + Some(tz) => v.some(key, tz, description), + None => v.none(key, description), + } + } + + fn set(&mut self, key: &str, value: &str) -> Result<()> { + if !key.is_empty() { + return _config_err!( + "Config field is an optional timezone and does not have nested field \"{}\"", + key + ); + } + *self = Some(value.parse()?); + Ok(()) + } + + fn reset(&mut self, key: &str) -> Result<()> { + if key.is_empty() { + *self = None; + Ok(()) + } else { + _config_err!( + "Config field is an optional timezone and does not have nested field \"{}\"", + key + ) + } + } +} + /// Default transformation to parse a [`ConfigField`] for a string. /// /// This uses [`FromStr`] to parse the data. @@ -4324,6 +4393,45 @@ mod tests { ); } + #[test] + fn test_execution_time_zone_validation() { + use crate::config::{ConfigOptions, ConfigTimeZone}; + + let mut config = ConfigOptions::default(); + + for valid in ["+08:00", "-08:00", "+0800", "+08", "Asia/Taipei"] { + config.set("datafusion.execution.time_zone", valid).unwrap(); + assert_eq!( + config.execution.time_zone.as_ref().map(ToString::to_string), + Some(valid.parse::().unwrap().to_string()) + ); + } + + let previous = config.execution.time_zone; + for invalid in ["+08:00:00", "08:00", "08", "Asia/Taipei2"] { + let err = config + .set("datafusion.execution.time_zone", invalid) + .unwrap_err(); + assert_eq!( + err.to_string(), + format!( + "Invalid or Unsupported Configuration: Parser error: Invalid timezone {invalid:?}: failed to parse timezone" + ) + ); + assert_eq!(config.execution.time_zone, previous); + } + + let err = config + .set("datafusion.execution.time_zone.name", "UTC") + .unwrap_err(); + assert!( + err.strip_backtrace() + .contains("does not have nested field \"name\""), + "Unexpected error {err:?}" + ); + assert_eq!(config.execution.time_zone, previous); + } + #[cfg(feature = "parquet")] #[test] fn set_cdc_enabled_flag() { diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index b758aeb5209e8..e379236d361f9 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -1970,7 +1970,12 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - let tz = args.config_options.execution.time_zone.clone(); + let tz = args + .config_options + .execution + .time_zone + .as_ref() + .map(ToString::to_string); Ok(ColumnarValue::Scalar(ScalarValue::from(tz))) } } @@ -1980,7 +1985,7 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> { }); let mut config = SessionConfig::new(); - config.options_mut().execution.time_zone = Some("AEST".into()); + config.options_mut().execution.time_zone = Some("Australia/Sydney".parse().unwrap()); let ctx = SessionContext::new_with_config(config); @@ -1993,7 +1998,7 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> { let expected_schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); let expected = RecordBatch::try_new( SchemaRef::from(expected_schema), - vec![create_array!(Utf8, ["AEST"])], + vec![create_array!(Utf8, ["Australia/Sydney"])], )?; assert_eq!(expected, actual[0]); diff --git a/datafusion/ffi/src/tests/udf_udaf_udwf.rs b/datafusion/ffi/src/tests/udf_udaf_udwf.rs index b393f5db3a506..4548e37eb996c 100644 --- a/datafusion/ffi/src/tests/udf_udaf_udwf.rs +++ b/datafusion/ffi/src/tests/udf_udaf_udwf.rs @@ -99,7 +99,12 @@ impl ScalarUDFImpl for TimeZoneUDF { &self, args: ScalarFunctionArgs, ) -> datafusion_common::Result { - let tz = args.config_options.execution.time_zone.clone(); + let tz = args + .config_options + .execution + .time_zone + .as_ref() + .map(ToString::to_string); Ok(ColumnarValue::Scalar(ScalarValue::from(tz))) } } diff --git a/datafusion/ffi/tests/ffi_udf.rs b/datafusion/ffi/tests/ffi_udf.rs index dffaf83c479b1..18085eb7d529f 100644 --- a/datafusion/ffi/tests/ffi_udf.rs +++ b/datafusion/ffi/tests/ffi_udf.rs @@ -135,7 +135,8 @@ mod tests { assert!(result[0].column(0).as_string::().is_null(0)); let mut config = SessionConfig::new(); - config.options_mut().execution.time_zone = Some("AEST".into()); + config.options_mut().execution.time_zone = + Some("Australia/Sydney".parse().unwrap()); let ctx = SessionContext::new_with_config(config); @@ -148,7 +149,7 @@ mod tests { assert!(result.len() == 1); assert!(!result[0].column(0).as_string::().is_null(0)); let result = result[0].column(0).as_string::().value(0); - assert_eq!(result, "AEST"); + assert_eq!(result, "Australia/Sydney"); Ok(()) } diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index 90ea145d5d2c0..7e9e99cc37caa 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -113,7 +113,7 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_field = Field::new("a", DataType::Utf8, false).into(); let arg_fields = vec![arg_field]; let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("UTC".into()); + options.execution.time_zone = Some("UTC".parse().unwrap()); let config_options = Arc::new(options); let to_timestamp_udf = to_timestamp(config_options.as_ref()); diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index d07a3b1caf13b..c75907c4923e9 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::timezone::Tz; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Date32; use chrono::{Datelike, NaiveDate, TimeZone}; @@ -121,7 +120,7 @@ impl ScalarUDFImpl for CurrentDateFunc { .execution .time_zone .as_ref() - .and_then(|tz| tz.parse::().ok()) + .map(|tz| tz.tz()) .map_or_else( || datetime_to_days(&now_ts), |tz| { diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 92f4ae5e66f02..a40a3b9482b5e 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::timezone::Tz; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Time64; use arrow::datatypes::TimeUnit::Nanosecond; @@ -118,7 +117,7 @@ impl ScalarUDFImpl for CurrentTimeFunc { .execution .time_zone .as_ref() - .and_then(|tz| tz.parse::().ok()) + .map(|tz| tz.tz()) .map_or_else( || datetime_to_time_nanos(&now_ts), |tz| { @@ -160,7 +159,7 @@ mod tests { config.execution.time_zone = if tz.is_empty() { None } else { - Some(tz.to_string()) + Some(tz.parse().unwrap()) }; let schema = Arc::new(DFSchema::empty()); SimplifyContext::builder() diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index 82bb1251b2045..e5acab5665112 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -87,7 +87,7 @@ impl NowFunc { .execution .time_zone .as_ref() - .map(|tz| Arc::from(tz.as_str())), + .map(|tz| Arc::from(tz.to_string())), } } } diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index f4507ab250559..8b96666cdbf7a 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -319,7 +319,7 @@ macro_rules! impl_to_timestamp_constructors { .execution .time_zone .as_ref() - .map(|tz| Arc::from(tz.as_str())), + .map(|tz| Arc::from(tz.to_string())), } } } @@ -914,30 +914,6 @@ mod tests { udfs } - fn validate_expected_error( - options: &mut ConfigOptions, - args: ScalarFunctionArgs, - expected_err: &str, - ) { - let udfs = udfs_and_timeunit(); - - for (udf, _) in udfs { - match udf - .with_updated_config(options) - .unwrap() - .invoke_with_args(args.clone()) - { - Ok(_) => panic!("Expected error but got success"), - Err(e) => { - assert!( - e.to_string().contains(expected_err), - "Can not find expected error '{expected_err}'. Actual error '{e}'" - ); - } - } - } - } - #[test] fn to_timestamp_arrays_and_nulls() -> Result<()> { // ensure that arrow array implementation is wired up and handles nulls correctly @@ -1042,13 +1018,13 @@ mod tests { let udfs = udfs_and_timeunit(); let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("-05:00".to_string()); + options.execution.time_zone = Some("-05:00".parse().unwrap()); let time_zone: Option> = options .execution .time_zone .as_ref() - .map(|tz| Arc::from(tz.as_str())); + .map(|tz| Arc::from(tz.to_string())); for (udf, time_unit) in udfs { let field = Field::new("arg", Utf8, true).into(); @@ -1152,13 +1128,13 @@ mod tests { let udfs = udfs_and_timeunit(); let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("-05:00".to_string()); + options.execution.time_zone = Some("-05:00".parse().unwrap()); let time_zone: Option> = options .execution .time_zone .as_ref() - .map(|tz| Arc::from(tz.as_str())); + .map(|tz| Arc::from(tz.to_string())); let expr_field = Field::new("arg", Utf8, true).into(); let format_field: Arc = Field::new("fmt", Utf8, true).into(); @@ -1292,113 +1268,6 @@ mod tests { Ok(()) } - #[test] - fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> { - let field: Arc = Field::new("arg", Utf8, true).into(); - let return_field: Arc = - Field::new("f", Timestamp(Nanosecond, None), true).into(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("Invalid/Timezone".to_string()); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "2020-09-08T13:42:29Z".to_string(), - )))], - arg_fields: vec![Arc::clone(&field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - let expected_err = - "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; - - validate_expected_error(&mut options, args, expected_err); - - Ok(()) - } - - #[test] - fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> { - let expr_field: Arc = Field::new("arg", Utf8, true).into(); - let format_field: Arc = Field::new("fmt", Utf8, true).into(); - let return_field: Arc = - Field::new("f", Timestamp(Nanosecond, None), true).into(); - - let mut options = ConfigOptions::default(); - options.execution.time_zone = Some("Invalid/Timezone".to_string()); - - let expected_err = - "Invalid timezone \"Invalid/Timezone\": failed to parse timezone"; - - let make_args = |value: &str, format: &str| ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))), - ], - arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - for (value, format, _expected_str) in [ - ( - "2020-09-08 09:42:29 -05:00", - "%Y-%m-%d %H:%M:%S %z", - Some("2020-09-08 09:42:29 -05:00"), - ), - ( - "2020-09-08T13:42:29Z", - "%+", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "2020-09-08 13:42:29 +0000", - "%Y-%m-%d %H:%M:%S %z", - Some("2020-09-08 08:42:29 -05:00"), - ), - ( - "+0000 2024-01-01 12:00:00", - "%z %Y-%m-%d %H:%M:%S", - Some("2024-01-01 07:00:00 -05:00"), - ), - ( - "20200908134229+0100", - "%Y%m%d%H%M%S%z", - Some("2020-09-08 07:42:29 -05:00"), - ), - ( - "2020-09-08+0230 13:42", - "%Y-%m-%d%z %H:%M", - Some("2020-09-08 06:12:00 -05:00"), - ), - ] { - let args = make_args(value, format); - validate_expected_error(&mut options.clone(), args, expected_err); - } - - let args = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "2020-09-08T13:42:29".to_string(), - ))), - ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "%Y-%m-%dT%H:%M:%S".to_string(), - ))), - ], - arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)], - number_rows: 1, - return_field: Arc::clone(&return_field), - config_options: Arc::new(options.clone()), - }; - - validate_expected_error(&mut options.clone(), args, expected_err); - - Ok(()) - } - #[test] fn to_timestamp_invalid_input_type() -> Result<()> { // pass the wrong type of input array to to_timestamp and test diff --git a/datafusion/spark/src/function/conversion/cast.rs b/datafusion/spark/src/function/conversion/cast.rs index 45d1b336261d7..b4b605674c9dd 100644 --- a/datafusion/spark/src/function/conversion/cast.rs +++ b/datafusion/spark/src/function/conversion/cast.rs @@ -151,7 +151,7 @@ impl SparkCast { .execution .time_zone .as_ref() - .map(|tz| Arc::from(tz.as_str())) + .map(|tz| Arc::from(tz.to_string())) .or_else(|| Some(Arc::from("UTC"))), } } @@ -373,7 +373,7 @@ mod tests { )); let mut config = ConfigOptions::default(); if let Some(tz) = timezone { - config.execution.time_zone = Some(tz.to_string()); + config.execution.time_zone = Some(tz.parse().unwrap()); } ScalarFunctionArgs { args: vec![ @@ -761,7 +761,7 @@ mod tests { true, )); let mut config = ConfigOptions::default(); - config.execution.time_zone = Some("UTC".to_string()); + config.execution.time_zone = Some("UTC".parse().unwrap()); config.execution.enable_ansi_mode = enable_ansi_mode; ScalarFunctionArgs { args: vec![ diff --git a/datafusion/spark/src/function/datetime/date_trunc.rs b/datafusion/spark/src/function/datetime/date_trunc.rs index c8b0fbca36165..c9d63c0090fbf 100644 --- a/datafusion/spark/src/function/datetime/date_trunc.rs +++ b/datafusion/spark/src/function/datetime/date_trunc.rs @@ -115,7 +115,12 @@ impl ScalarUDFImpl for SparkDateTrunc { other => other, }; - let session_tz = info.config_options().execution.time_zone.clone(); + let session_tz = info + .config_options() + .execution + .time_zone + .as_ref() + .map(ToString::to_string); let ts_type = ts_expr.get_type(info.schema())?; // Spark interprets timestamps in the session timezone before truncating, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index a17cb224d1caf..d5d23ede9bc66 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -768,7 +768,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Timestamp With Time Zone // INPUT : [SQLDataType] TimestampTz + [Config] Time Zone // OUTPUT: [ArrowDataType] Timestamp - self.context_provider.options().execution.time_zone.clone() + self.context_provider + .options() + .execution + .time_zone + .as_ref() + .map(|tz| Arc::from(tz.to_string())) } else { // Timestamp Without Time zone None @@ -780,7 +785,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { None | Some(9) => TimeUnit::Nanosecond, _ => unreachable!(), }; - Ok(DataType::Timestamp(precision, tz.map(Into::into))) + Ok(DataType::Timestamp(precision, tz)) } SQLDataType::Date => Ok(DataType::Date32), SQLDataType::Time(None, tz_info) => { diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 06740fa0f5439..529bdbef2174a 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -82,7 +82,7 @@ SET TIME ZONE = '+08' query T select arrow_typeof(now()); ---- -Timestamp(ns, "+08") +Timestamp(ns, "+08:00") query I SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date; diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 0514deba28a33..5047ab0274e45 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -213,24 +213,20 @@ SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ statement ok set datafusion.catalog.information_schema = true -statement ok +statement error DataFusion error: Invalid or Unsupported Configuration: Parser error: Invalid timezone "\+08:00:00": failed to parse timezone SET TIME ZONE = '+08:00:00' -statement error Arrow error: Parser error: Invalid timezone "\+08:00:00": failed to parse timezone -SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ +query TT +SHOW TIME ZONE +---- +datafusion.execution.time_zone +08:00 -statement ok +statement error DataFusion error: Invalid or Unsupported Configuration: Parser error: Invalid timezone "08:00": failed to parse timezone SET TIME ZONE = '08:00' -statement error Arrow error: Parser error: Invalid timezone "08:00": failed to parse timezone -SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ - -statement ok +statement error DataFusion error: Invalid or Unsupported Configuration: Parser error: Invalid timezone "08": failed to parse timezone SET TIME ZONE = '08' -statement error Arrow error: Parser error: Invalid timezone "08": failed to parse timezone -SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ - statement ok SET TIME ZONE = 'Asia/Taipei' @@ -239,12 +235,9 @@ SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ ---- 2000-01-01T00:00:00+08:00 -statement ok +statement error DataFusion error: Invalid or Unsupported Configuration: Parser error: Invalid timezone "Asia/Taipei2": failed to parse timezone SET TIME ZONE = 'Asia/Taipei2' -statement error Arrow error: Parser error: Invalid timezone "Asia/Taipei2": failed to parse timezone -SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ - # reset variable restores default statement ok set datafusion.catalog.information_schema = true