datafusion_spark/function/datetime/
last_day.rs1use std::sync::Arc;
19
20use arrow::array::{ArrayRef, AsArray, Date32Array};
21use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
22use chrono::{Datelike, Duration, NaiveDate};
23use datafusion_common::utils::take_function_args;
24use datafusion_common::{Result, ScalarValue, exec_datafusion_err, internal_err};
25use datafusion_expr::{
26 ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
27 Volatility,
28};
29
30#[derive(Debug, PartialEq, Eq, Hash)]
31pub struct SparkLastDay {
32 signature: Signature,
33}
34
35impl Default for SparkLastDay {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl SparkLastDay {
42 pub fn new() -> Self {
43 Self {
44 signature: Signature::exact(vec![DataType::Date32], Volatility::Immutable),
45 }
46 }
47}
48
49impl ScalarUDFImpl for SparkLastDay {
50 fn name(&self) -> &str {
51 "last_day"
52 }
53
54 fn signature(&self) -> &Signature {
55 &self.signature
56 }
57
58 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
59 internal_err!("return_field_from_args should be used instead")
60 }
61
62 fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
63 let Some(field) = args.arg_fields.first() else {
64 return internal_err!("Spark `last_day` expects exactly one argument");
65 };
66
67 Ok(Arc::new(Field::new(
68 self.name(),
69 DataType::Date32,
70 field.is_nullable(),
71 )))
72 }
73
74 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
75 let ScalarFunctionArgs { args, .. } = args;
76 let [arg] = take_function_args("last_day", args)?;
77 match arg {
78 ColumnarValue::Scalar(ScalarValue::Date32(days)) => {
79 if let Some(days) = days {
80 Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(
81 spark_last_day(days)?,
82 ))))
83 } else {
84 Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
85 }
86 }
87 ColumnarValue::Array(array) => {
88 let result = match array.data_type() {
89 DataType::Date32 => {
90 let result: Date32Array = array
91 .as_primitive::<Date32Type>()
92 .try_unary(spark_last_day)?
93 .with_data_type(DataType::Date32);
94 Ok(Arc::new(result) as ArrayRef)
95 }
96 other => {
97 internal_err!(
98 "Unsupported data type {other:?} for Spark function `last_day`"
99 )
100 }
101 }?;
102 Ok(ColumnarValue::Array(result))
103 }
104 other => {
105 internal_err!("Unsupported arg {other:?} for Spark function `last_day")
106 }
107 }
108 }
109}
110
111fn spark_last_day(days: i32) -> Result<i32> {
112 let date = Date32Type::to_naive_date_opt(days).ok_or_else(|| {
113 exec_datafusion_err!(
114 "Spark `last_day`: Unable to convert days value {days} to date"
115 )
116 })?;
117
118 let (year, month) = (date.year(), date.month());
119 let (next_year, next_month) = if month == 12 {
120 (year + 1, 1)
121 } else {
122 (year, month + 1)
123 };
124
125 let first_day_next_month = NaiveDate::from_ymd_opt(next_year, next_month, 1)
126 .ok_or_else(|| {
127 exec_datafusion_err!(
128 "Spark `last_day`: Unable to parse date from {next_year}, {next_month}, 1"
129 )
130 })?;
131
132 Ok(Date32Type::from_naive_date(
133 first_day_next_month - Duration::days(1),
134 ))
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140 use crate::function::utils::test::test_scalar_function;
141 use arrow::array::Array;
142
143 #[test]
144 fn test_last_day_nullability_matches_input() {
145 let func = SparkLastDay::new();
146
147 let non_nullable_arg = Arc::new(Field::new("arg", DataType::Date32, false));
148 let nullable_arg = Arc::new(Field::new("arg", DataType::Date32, true));
149
150 let non_nullable_out = func
151 .return_field_from_args(ReturnFieldArgs {
152 arg_fields: &[Arc::clone(&non_nullable_arg)],
153 scalar_arguments: &[None],
154 })
155 .expect("non-nullable arg should succeed");
156 assert_eq!(non_nullable_out.data_type(), &DataType::Date32);
157 assert!(!non_nullable_out.is_nullable());
158
159 let nullable_out = func
160 .return_field_from_args(ReturnFieldArgs {
161 arg_fields: &[Arc::clone(&nullable_arg)],
162 scalar_arguments: &[None],
163 })
164 .expect("nullable arg should succeed");
165 assert_eq!(nullable_out.data_type(), &DataType::Date32);
166 assert!(nullable_out.is_nullable());
167 }
168
169 #[test]
170 fn test_last_day_scalar_evaluation() {
171 test_scalar_function!(
172 SparkLastDay::new(),
173 vec![ColumnarValue::Scalar(ScalarValue::Date32(Some(0)))],
174 Ok(Some(30)),
175 i32,
176 DataType::Date32,
177 Date32Array
178 );
179
180 test_scalar_function!(
181 SparkLastDay::new(),
182 vec![ColumnarValue::Scalar(ScalarValue::Date32(None))],
183 Ok(None),
184 i32,
185 DataType::Date32,
186 Date32Array
187 );
188 }
189}