diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 7d6c9e9493bf..854c26bf7702 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -28,6 +28,7 @@ pub mod make_interval; pub mod next_day; pub mod time_trunc; pub mod trunc; +pub mod unix; use datafusion_expr::ScalarUDF; use datafusion_functions::make_udf_function; @@ -48,6 +49,22 @@ make_udf_function!(make_interval::SparkMakeInterval, make_interval); make_udf_function!(next_day::SparkNextDay, next_day); make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc); make_udf_function!(trunc::SparkTrunc, trunc); +make_udf_function!(unix::SparkUnixDate, unix_date); +make_udf_function!( + unix::SparkUnixTimestamp, + unix_micros, + unix::SparkUnixTimestamp::microseconds +); +make_udf_function!( + unix::SparkUnixTimestamp, + unix_millis, + unix::SparkUnixTimestamp::milliseconds +); +make_udf_function!( + unix::SparkUnixTimestamp, + unix_seconds, + unix::SparkUnixTimestamp::seconds +); pub mod expr_fn { use datafusion_functions::export_functions; @@ -125,6 +142,26 @@ pub mod expr_fn { "Extracts a part of the date or time from a date, time, or timestamp expression.", arg1 arg2 )); + export_functions!(( + unix_date, + "Returns the number of days since epoch (1970-01-01) for the given date `dt`.", + dt + )); + export_functions!(( + unix_micros, + "Returns the number of microseconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp `ts`.", + ts + )); + export_functions!(( + unix_millis, + "Returns the number of milliseconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp `ts`.", + ts + )); + export_functions!(( + unix_seconds, + "Returns the number of seconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp `ts`.", + ts + )); } pub fn functions() -> Vec> { @@ -144,5 +181,9 @@ pub fn functions() -> Vec> { second(), time_trunc(), trunc(), + unix_date(), + unix_micros(), + unix_millis(), + unix_seconds(), ] } diff --git a/datafusion/spark/src/function/datetime/unix.rs b/datafusion/spark/src/function/datetime/unix.rs new file mode 100644 index 000000000000..4254b2ed85d5 --- /dev/null +++ b/datafusion/spark/src/function/datetime/unix.rs @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit}; +use datafusion_common::types::logical_date; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, internal_err}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ExprSchemable, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, +}; + +/// Returns the number of days since epoch (1970-01-01) for the given date. +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkUnixDate { + signature: Signature, +} + +impl Default for SparkUnixDate { + fn default() -> Self { + Self::new() + } +} + +impl SparkUnixDate { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Native( + logical_date(), + ))], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkUnixDate { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "unix_date" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields[0].is_nullable(); + Ok(Arc::new(Field::new(self.name(), DataType::Int32, nullable))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("invoke_with_args should not be called on SparkUnixDate") + } + + fn simplify( + &self, + args: Vec, + info: &SimplifyContext, + ) -> Result { + let [date] = take_function_args(self.name(), args)?; + Ok(ExprSimplifyResult::Simplified( + date.cast_to(&DataType::Date32, info.schema())? + .cast_to(&DataType::Int32, info.schema())?, + )) + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkUnixTimestamp { + time_unit: TimeUnit, + signature: Signature, + name: &'static str, +} + +impl SparkUnixTimestamp { + pub fn new(name: &'static str, time_unit: TimeUnit) -> Self { + Self { + signature: Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Timestamp)], + Volatility::Immutable, + ), + time_unit, + name, + } + } + + /// Returns the number of microseconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp. + /// + pub fn microseconds() -> Self { + Self::new("unix_micros", TimeUnit::Microsecond) + } + + /// Returns the number of milliseconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp. + /// + pub fn milliseconds() -> Self { + Self::new("unix_millis", TimeUnit::Millisecond) + } + + /// Returns the number of seconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp. + /// + pub fn seconds() -> Self { + Self::new("unix_seconds", TimeUnit::Second) + } +} + +impl ScalarUDFImpl for SparkUnixTimestamp { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields[0].is_nullable(); + Ok(Arc::new(Field::new(self.name(), DataType::Int64, nullable))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("invoke_with_args should not be called on `{}`", self.name()) + } + + fn simplify( + &self, + args: Vec, + info: &SimplifyContext, + ) -> Result { + let [ts] = take_function_args(self.name(), args)?; + Ok(ExprSimplifyResult::Simplified( + ts.cast_to( + &DataType::Timestamp(self.time_unit, Some("UTC".into())), + info.schema(), + )? + .cast_to(&DataType::Int64, info.schema())?, + )) + } +} diff --git a/datafusion/sqllogictest/test_files/spark/datetime/unix.slt b/datafusion/sqllogictest/test_files/spark/datetime/unix.slt new file mode 100644 index 000000000000..f6e057bb8d66 --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/datetime/unix.slt @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Unix Date tests + +query I +SELECT unix_date('1970-01-02'::date); +---- +1 + +query I +SELECT unix_date('1900-01-02'::date); +---- +-25566 + + +query I +SELECT unix_date(arrow_cast('1970-01-02', 'Date64')); +---- +1 + +query I +SELECT unix_date(NULL::date); +---- +NULL + +query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(Date\), Date\)\) but received NativeType::String, DataType: Utf8View +SELECT unix_date('1970-01-02'::string); + +# Unix Micro Tests + +query I +SELECT unix_micros('1970-01-01 00:00:01Z'::timestamp); +---- +1000000 + +query I +SELECT unix_micros('1900-01-01 00:00:01Z'::timestamp); +---- +-2208988799000000 + +query I +SELECT unix_micros(arrow_cast('1970-01-01 00:00:01+02:00', 'Timestamp(Microsecond, None)')); +---- +-7199000000 + +query I +SELECT unix_micros(arrow_cast('1970-01-01 00:00:01Z', 'Timestamp(Second, None)')); +---- +1000000 + +query I +SELECT unix_micros(NULL::timestamp); +---- +NULL + +query error Expect TypeSignatureClass::Timestamp but received NativeType::String, DataType: Utf8View +SELECT unix_micros('1970-01-01 00:00:01Z'::string); + + +# Unix Millis Tests + +query I +SELECT unix_millis('1970-01-01 00:00:01Z'::timestamp); +---- +1000 + +query I +SELECT unix_millis('1900-01-01 00:00:01Z'::timestamp); +---- +-2208988799000 + +query I +SELECT unix_millis(arrow_cast('1970-01-01 00:00:01+02:00', 'Timestamp(Microsecond, None)')); +---- +-7199000 + +query I +SELECT unix_millis(arrow_cast('1970-01-01 00:00:01Z', 'Timestamp(Second, None)')); +---- +1000 + +query I +SELECT unix_millis(NULL::timestamp); +---- +NULL + +query error Expect TypeSignatureClass::Timestamp but received NativeType::String, DataType: Utf8View +SELECT unix_millis('1970-01-01 00:00:01Z'::string); + + +# Unix Seconds Tests + +query I +SELECT unix_seconds('1970-01-01 00:00:01Z'::timestamp); +---- +1 + +query I +SELECT unix_seconds('1900-01-01 00:00:01Z'::timestamp); +---- +-2208988799 + +query I +SELECT unix_seconds(arrow_cast('1970-01-01 00:00:01+02:00', 'Timestamp(Microsecond, None)')); +---- +-7199 + +query I +SELECT unix_seconds(arrow_cast('1970-01-01 00:00:01Z', 'Timestamp(Second, None)')); +---- +1 + +query I +SELECT unix_seconds(NULL::timestamp); +---- +NULL + +query error Expect TypeSignatureClass::Timestamp but received NativeType::String, DataType: Utf8View +SELECT unix_seconds('1970-01-01 00:00:01Z'::string);