datafusion_spark/function/string/
format_string.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::fmt::Write;
20use std::sync::Arc;
21
22use core::num::FpCategory;
23
24use arrow::{
25    array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
26    datatypes::{DataType, Field, FieldRef},
27};
28use bigdecimal::{
29    BigDecimal, ToPrimitive,
30    num_bigint::{BigInt, Sign},
31};
32use chrono::{DateTime, Datelike, Timelike, Utc};
33use datafusion_common::{
34    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
35};
36use datafusion_expr::{
37    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
38    TypeSignature, Volatility,
39};
40
41/// Spark-compatible `format_string` expression
42/// <https://spark.apache.org/docs/latest/api/sql/index.html#format_string>
43#[derive(Debug, PartialEq, Eq, Hash)]
44pub struct FormatStringFunc {
45    signature: Signature,
46    aliases: Vec<String>,
47}
48
49impl Default for FormatStringFunc {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55impl FormatStringFunc {
56    pub fn new() -> Self {
57        Self {
58            signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
59            aliases: vec![String::from("printf")],
60        }
61    }
62}
63
64impl ScalarUDFImpl for FormatStringFunc {
65    fn as_any(&self) -> &dyn Any {
66        self
67    }
68
69    fn name(&self) -> &str {
70        "format_string"
71    }
72
73    fn aliases(&self) -> &[String] {
74        &self.aliases
75    }
76
77    fn signature(&self) -> &Signature {
78        &self.signature
79    }
80
81    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
82        datafusion_common::internal_err!(
83            "return_type should not be called, use return_field_from_args instead"
84        )
85    }
86
87    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
88        match args.arg_fields[0].data_type() {
89            DataType::Null => {
90                Ok(Arc::new(Field::new("format_string", DataType::Utf8, true)))
91            }
92            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
93                Ok(Arc::clone(&args.arg_fields[0]))
94            }
95            _ => exec_err!(
96                "format_string expects the first argument to be Utf8, LargeUtf8 or Utf8View, got {} instead",
97                args.arg_fields[0].data_type()
98            ),
99        }
100    }
101
102    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
103        let len = args.args.iter().find_map(|arg| match arg {
104            ColumnarValue::Scalar(_) => None,
105            ColumnarValue::Array(a) => Some(a.len()),
106        });
107        let is_scalar = len.is_none();
108        let data_types = args.args[1..]
109            .iter()
110            .map(|arg| arg.data_type())
111            .collect::<Vec<_>>();
112        let fmt_type = args.args[0].data_type();
113
114        match &args.args[0] {
115            ColumnarValue::Scalar(ScalarValue::Null) => {
116                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
117            }
118            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {
119                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
120            }
121            ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {
122                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
123            }
124            ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
125                Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
126            }
127            ColumnarValue::Scalar(ScalarValue::Utf8(Some(fmt)))
128            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(fmt)))
129            | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(fmt))) => {
130                let formatter = Formatter::parse(fmt, &data_types)?;
131                let mut result = Vec::with_capacity(len.unwrap_or(1));
132                for i in 0..len.unwrap_or(1) {
133                    let scalars = args.args[1..]
134                        .iter()
135                        .map(|arg| try_to_scalar(arg.clone(), i))
136                        .collect::<Result<Vec<_>>>()?;
137                    let formatted = formatter.format(&scalars)?;
138                    result.push(formatted);
139                }
140                if is_scalar {
141                    let scalar_result = result.pop().unwrap();
142                    match fmt_type {
143                        DataType::Utf8 => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
144                            Some(scalar_result),
145                        ))),
146                        DataType::LargeUtf8 => Ok(ColumnarValue::Scalar(
147                            ScalarValue::LargeUtf8(Some(scalar_result)),
148                        )),
149                        DataType::Utf8View => Ok(ColumnarValue::Scalar(
150                            ScalarValue::Utf8View(Some(scalar_result)),
151                        )),
152                        _ => unreachable!(),
153                    }
154                } else {
155                    let array: ArrayRef = match fmt_type {
156                        DataType::Utf8 => Arc::new(StringArray::from(result)),
157                        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
158                        DataType::Utf8View => Arc::new(StringViewArray::from(result)),
159                        _ => unreachable!(),
160                    };
161                    Ok(ColumnarValue::Array(array))
162                }
163            }
164            ColumnarValue::Array(fmts) => {
165                let mut result = Vec::with_capacity(len.unwrap());
166                for i in 0..len.unwrap() {
167                    let fmt = ScalarValue::try_from_array(fmts, i)?;
168                    match fmt.try_as_str() {
169                        Some(Some(fmt)) => {
170                            let formatter = Formatter::parse(fmt, &data_types)?;
171                            let scalars = args.args[1..]
172                                .iter()
173                                .map(|arg| try_to_scalar(arg.clone(), i))
174                                .collect::<Result<Vec<_>>>()?;
175                            let formatted = formatter.format(&scalars)?;
176                            result.push(Some(formatted));
177                        }
178                        Some(None) => {
179                            result.push(None);
180                        }
181                        _ => unreachable!(),
182                    }
183                }
184                let array: ArrayRef = match fmt_type {
185                    DataType::Utf8 => Arc::new(StringArray::from(result)),
186                    DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
187                    DataType::Utf8View => Arc::new(StringViewArray::from(result)),
188                    _ => unreachable!(),
189                };
190                Ok(ColumnarValue::Array(array))
191            }
192            _ => exec_err!(
193                "The format_string function expects the first argument to be a string"
194            ),
195        }
196    }
197}
198
199fn try_to_scalar(arg: ColumnarValue, index: usize) -> Result<ScalarValue> {
200    match arg {
201        ColumnarValue::Scalar(scalar) => Ok(scalar),
202        ColumnarValue::Array(array) => ScalarValue::try_from_array(&array, index),
203    }
204}
205
206/// Compatible with `java.util.Formatter`
207#[derive(Debug)]
208pub struct Formatter<'a> {
209    pub elements: Vec<FormatElement<'a>>,
210    pub arg_num: usize,
211}
212
213impl<'a> Formatter<'a> {
214    pub fn new(elements: Vec<FormatElement<'a>>) -> Self {
215        let arg_num = elements
216            .iter()
217            .map(|element| match element {
218                FormatElement::Format(spec) => spec.argument_index,
219                _ => 0,
220            })
221            .max()
222            .unwrap_or(0);
223        Self { elements, arg_num }
224    }
225
226    /// Parses a printf-style format string into a Formatter with validation.
227    ///
228    /// This method implements a comprehensive parser for Java `java.util.Formatter` syntax,
229    /// processing the format string character by character to identify and validate format
230    /// specifiers against the provided argument types.
231    ///
232    /// # Arguments
233    ///
234    /// * `fmt` - The format string containing literal text and format specifiers
235    /// * `arg_types` - Array of DataFusion DataTypes corresponding to the arguments
236    ///
237    /// # Parsing Process
238    ///
239    /// The parser operates in several phases:
240    ///
241    /// 1. **String Scanning**: Iterates through the format string looking for '%' characters
242    ///    that mark the beginning of format specifiers or special sequences.
243    ///
244    /// 2. **Special Sequence Handling**: Processes escape sequences:
245    ///    - `%%` becomes a literal '%' character
246    ///    - `%n` becomes a newline character
247    ///    - `%<` indicates reuse of the previous argument with a new format specifier
248    ///
249    /// 3. **Argument Index Resolution**: Determines which argument each format specifier refers to:
250    ///    - Sequential indexing: arguments are consumed in order (1, 2, 3, ...)
251    ///    - Positional indexing: explicit argument position using `%n$` syntax
252    ///    - Previous argument reuse: `%<` references the last used argument
253    ///
254    /// 4. **Format Specifier Parsing**: For each format specifier, extracts:
255    ///    - Flags (-, +, space, #, 0, ',', '(')
256    ///    - Width specification (minimum field width)
257    ///    - Precision specification (decimal places or maximum characters)
258    ///    - Conversion type (d, s, f, x, etc.)
259    ///
260    /// 5. **Type Validation**: Verifies that each format specifier's conversion type
261    ///    is compatible with the corresponding argument's DataType. For example:
262    ///    - Integer conversions (%d, %x, %o) require integer DataTypes
263    ///    - String conversions (%s, %S) accept any DataType
264    ///    - Float conversions (%f, %e, %g) require numeric DataTypes
265    ///
266    /// 6. **Element Construction**: Creates FormatElement instances for:
267    ///    - Verbatim text sections (copied directly to output)
268    ///    - Validated format specifiers with their parsed parameters
269    ///
270    /// # Internal State Management
271    ///
272    /// The parser maintains several state variables:
273    /// - `argument_index`: Tracks the current sequential argument position
274    /// - `prev`: Remembers the last used argument index for `%<` references
275    /// - `res`: Accumulates the parsed FormatElement instances
276    /// - `rem`: Points to the remaining unparsed portion of the format string
277    ///
278    /// # Validation and Error Handling
279    ///
280    /// The parser performs extensive validation including:
281    /// - Argument index bounds checking against the provided arg_types array
282    /// - Format specifier syntax validation
283    /// - Type compatibility verification between conversion types and DataTypes
284    /// - Detection of malformed numeric parameters and invalid flag combinations
285    ///
286    /// # Returns
287    ///
288    /// Returns a Formatter containing the parsed elements and the maximum argument
289    /// index encountered, enabling efficient argument validation during formatting.
290    pub fn parse(fmt: &'a str, arg_types: &[DataType]) -> Result<Self> {
291        // find the first %
292        let mut res = Vec::new();
293
294        let mut rem = fmt;
295        let mut argument_index = 0;
296
297        let mut prev: Option<usize> = None;
298
299        while !rem.is_empty() {
300            if let Some((verbatim_prefix, rest)) = rem.split_once('%') {
301                if !verbatim_prefix.is_empty() {
302                    res.push(FormatElement::Verbatim(verbatim_prefix));
303                }
304                if let Some(rest) = rest.strip_prefix('%') {
305                    res.push(FormatElement::Verbatim("%"));
306                    rem = rest;
307                    continue;
308                }
309                if let Some(rest) = rest.strip_prefix('n') {
310                    res.push(FormatElement::Verbatim("\n"));
311                    rem = rest;
312                    continue;
313                }
314                if let Some(rest) = rest.strip_prefix('<') {
315                    // %< means reuse the previous argument
316                    let Some(p) = prev else {
317                        return exec_err!("No previous argument to reference");
318                    };
319                    let (spec, rest) =
320                        take_conversion_specifier(rest, p, &arg_types[p - 1])?;
321                    res.push(FormatElement::Format(spec));
322                    rem = rest;
323                    continue;
324                }
325
326                let (current_argument_index, rest2) = take_numeric_param(rest, false);
327                let (current_argument_index, rest) =
328                    match (current_argument_index, rest2.starts_with('$')) {
329                        (NumericParam::Literal(index), true) => {
330                            (index as usize, &rest2[1..])
331                        }
332                        (NumericParam::FromArgument, true) => {
333                            return exec_err!("Invalid numeric parameter");
334                        }
335                        (_, false) => {
336                            argument_index += 1;
337                            (argument_index, rest)
338                        }
339                    };
340                if current_argument_index == 0 || current_argument_index > arg_types.len()
341                {
342                    return exec_err!(
343                        "Argument index {} is out of bounds",
344                        current_argument_index
345                    );
346                }
347
348                let (spec, rest) = take_conversion_specifier(
349                    rest,
350                    current_argument_index,
351                    &arg_types[current_argument_index - 1],
352                )
353                .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
354                res.push(FormatElement::Format(spec));
355                prev = Some(spec.argument_index);
356                rem = rest;
357            } else {
358                res.push(FormatElement::Verbatim(rem));
359                break;
360            }
361        }
362
363        Ok(Self::new(res))
364    }
365
366    pub fn format(&self, args: &[ScalarValue]) -> Result<String> {
367        if args.len() < self.arg_num {
368            return exec_err!(
369                "Expected at least {} arguments, got {}",
370                self.arg_num,
371                args.len()
372            );
373        }
374        let mut string = String::new();
375        for element in &self.elements {
376            match element {
377                FormatElement::Verbatim(text) => {
378                    string.push_str(text);
379                }
380                FormatElement::Format(spec) => {
381                    spec.format(&mut string, &args[spec.argument_index - 1])?;
382                }
383            }
384        }
385        Ok(string)
386    }
387}
388
389#[derive(Debug)]
390pub enum FormatElement<'a> {
391    /// Some characters that are copied to the output as-is
392    Verbatim(&'a str),
393    /// A format specifier
394    Format(ConversionSpecifier),
395}
396
397/// Parsed printf conversion specifier
398#[derive(Debug, Clone, Copy, PartialEq, Eq)]
399pub struct ConversionSpecifier {
400    pub argument_index: usize,
401    /// flag `#`: use `0x`, etc?
402    pub alt_form: bool,
403    /// flag `0`: left-pad with zeros?
404    pub zero_pad: bool,
405    /// flag `-`: left-adjust (pad with spaces on the right)
406    pub left_adj: bool,
407    /// flag `' '` (space): indicate sign with a space?
408    pub space_sign: bool,
409    /// flag `+`: Always show sign? (for signed numbers)
410    pub force_sign: bool,
411    /// flag `,`: include locale-specific grouping separators
412    pub grouping_separator: bool,
413    /// flag `(`: enclose negative numbers in parentheses
414    pub negative_in_parentheses: bool,
415    /// field width
416    pub width: NumericParam,
417    /// floating point field precision
418    pub precision: NumericParam,
419    /// data type
420    pub conversion_type: ConversionType,
421}
422
423/// Width / precision parameter
424#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum NumericParam {
426    /// The literal width
427    Literal(i32),
428    /// Get the width from the previous argument
429    FromArgument,
430}
431
432/// Printf data type
433#[derive(Debug, Clone, Copy, PartialEq, Eq)]
434pub enum ConversionType {
435    /// `B`
436    BooleanUpper,
437    /// `b`
438    BooleanLower,
439    /// Not implemented yet. Can be implemented after <https://github.com/apache/datafusion/pull/17093> is merged
440    /// `h`
441    HexHashLower,
442    /// `H`
443    HexHashUpper,
444    /// `d`
445    DecInt,
446    /// `o`
447    OctInt,
448    /// `x`
449    HexIntLower,
450    /// `X`
451    HexIntUpper,
452    /// `e`
453    SciFloatLower,
454    /// `E`
455    SciFloatUpper,
456    /// `f`
457    DecFloatLower,
458    /// `g`
459    CompactFloatLower,
460    /// `G`
461    CompactFloatUpper,
462    /// `a`
463    HexFloatLower,
464    /// `A`
465    HexFloatUpper,
466    /// `t`
467    TimeLower(TimeFormat),
468    /// `T`
469    TimeUpper(TimeFormat),
470    /// `c`
471    CharLower,
472    /// `C`
473    CharUpper,
474    /// `s`
475    StringLower,
476    /// `S`
477    StringUpper,
478}
479
480#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum TimeFormat {
482    // Hour of the day for the 24-hour clock,
483    // formatted as two digits with a leading zero as necessary i.e. 00 - 23. 00 corresponds to midnight.
484    HUpper,
485    // Hour for the 12-hour clock,
486    // formatted as two digits with a leading zero as necessary, i.e. 01 - 12. 01 corresponds to one o'clock (either morning or afternoon).
487    IUpper,
488    // Hour of the day for the 24-hour clock,
489    // i.e. 0 - 23. 0 corresponds to midnight.
490    KLower,
491    // Hour for the 12-hour clock,
492    // i.e. 1 - 12. 1 corresponds to one o'clock (either morning or afternoon).
493    LLower,
494    // Minute within the hour formatted as two digits with a leading zero as necessary, i.e. 00 - 59.
495    MUpper,
496    // Seconds within the minute, formatted as two digits with a leading zero as necessary,
497    // i.e. 00 - 60 ("60" is a special value required to support leap seconds).
498    SUpper,
499    // Millisecond within the second formatted as three digits with leading zeros as necessary, i.e. 000 - 999.
500    LUpper,
501    // Nanosecond within the second, formatted as nine digits with leading zeros as necessary,
502    // i.e. 000000000 - 999999999. The precision of this value is limited by the resolution of the underlying operating system or hardware.
503    NUpper,
504    // Locale-specific morning or afternoon marker in lower case, e.g."am" or "pm".
505    // Use of the conversion prefix 'T' forces this output to upper case. (Note that 'p' produces lower-case output.
506    // This is different from GNU date and POSIX strftime(3c) which produce upper-case output.)
507    PLower,
508    // RFC 822 style numeric time zone offset from GMT,
509    // e.g. -0800. This value will be adjusted as necessary for Daylight Saving Time.
510    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
511    ZLower,
512    // A string representing the abbreviation for the time zone. This value will be adjusted as necessary for Daylight Saving Time.
513    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
514    // The Formatter's locale will supersede the locale of the argument (if any).
515    ZUpper,
516    // Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
517    // i.e. Long.MIN_VALUE/1000 to Long.MAX_VALUE/1000.
518    SLower,
519    // Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
520    // i.e. Long.MIN_VALUE to Long.MAX_VALUE. The precision of this value is limited by the resolution of the underlying operating system or hardware.
521    QUpper,
522    // Locale-specific full month name, e.g. "January", "February".
523    BUpper,
524    // Locale-specific abbreviated month name, e.g. "Jan", "Feb".
525    BLower,
526    // Locale-specific full weekday name, e.g. "Monday", "Tuesday".
527    AUpper,
528    // Locale-specific abbreviated weekday name, e.g. "Mon", "Tue".
529    ALower,
530    // Four-digit year divided by 100, formatted as two digits with leading zero as necessary, i.e. 00 - 99
531    CUpper,
532    // Year, formatted to at least four digits with leading zeros as necessary, e.g. 0092 equals 92 CE for the Gregorian calendar.
533    YUpper,
534    // Last two digits of the year, formatted with leading zeros as necessary, i.e. 00 - 99.
535    YLower,
536    // Day of year, formatted as three digits with leading zeros as necessary, e.g. 001 - 366 for the Gregorian calendar. 001 corresponds to the first day of the year.
537    JLower,
538    // Month, formatted as two digits with leading zeros as necessary, i.e. 01 - 13, where "01" is the first month of the year and ("13" is a special value required to support lunar calendars).
539    MLower,
540    // Day of month, formatted as two digits with leading zeros as necessary, i.e. 01 - 31, where "01" is the first day of the month.
541    DLower,
542    // Day of month, formatted as two digits, i.e. 1 - 31 where "1" is the first day of the month.
543    ELower,
544    // Time formatted for the 24-hour clock as "%tH:%tM"
545    RUpper,
546    // Time formatted for the 24-hour clock as "%tH:%tM:%tS"
547    TUpper,
548    // Time formatted for the 12-hour clock as "%tI:%tM:%tS %Tp". The location of the morning or afternoon marker ('%Tp') may be locale-dependent.
549    RLower,
550    // Date formatted as "%tm/%td/%ty"
551    DUpper,
552    // ISO 8601 complete date formatted as "%tY-%tm-%td"
553    FUpper,
554    // Date and time formatted as "%ta %tb %td %tT %tZ %tY", e.g. "Sun Jul 20 16:17:00 EDT 1969"
555    CLower,
556}
557
558impl TryFrom<char> for TimeFormat {
559    type Error = DataFusionError;
560    fn try_from(value: char) -> Result<Self, Self::Error> {
561        match value {
562            'H' => Ok(TimeFormat::HUpper),
563            'I' => Ok(TimeFormat::IUpper),
564            'k' => Ok(TimeFormat::KLower),
565            'l' => Ok(TimeFormat::LLower),
566            'M' => Ok(TimeFormat::MUpper),
567            'S' => Ok(TimeFormat::SUpper),
568            'L' => Ok(TimeFormat::LUpper),
569            'N' => Ok(TimeFormat::NUpper),
570            'p' => Ok(TimeFormat::PLower),
571            'z' => Ok(TimeFormat::ZLower),
572            'Z' => Ok(TimeFormat::ZUpper),
573            's' => Ok(TimeFormat::SLower),
574            'Q' => Ok(TimeFormat::QUpper),
575            'B' => Ok(TimeFormat::BUpper),
576            'b' | 'h' => Ok(TimeFormat::BLower),
577            'A' => Ok(TimeFormat::AUpper),
578            'a' => Ok(TimeFormat::ALower),
579            'C' => Ok(TimeFormat::CUpper),
580            'Y' => Ok(TimeFormat::YUpper),
581            'y' => Ok(TimeFormat::YLower),
582            'j' => Ok(TimeFormat::JLower),
583            'm' => Ok(TimeFormat::MLower),
584            'd' => Ok(TimeFormat::DLower),
585            'e' => Ok(TimeFormat::ELower),
586            'R' => Ok(TimeFormat::RUpper),
587            'T' => Ok(TimeFormat::TUpper),
588            'r' => Ok(TimeFormat::RLower),
589            'D' => Ok(TimeFormat::DUpper),
590            'F' => Ok(TimeFormat::FUpper),
591            'c' => Ok(TimeFormat::CLower),
592            _ => exec_err!("Invalid time format: {}", value),
593        }
594    }
595}
596
597impl ConversionType {
598    pub fn validate(&self, arg_type: &DataType) -> Result<()> {
599        match self {
600            ConversionType::BooleanLower | ConversionType::BooleanUpper => {
601                if !matches!(arg_type, DataType::Boolean) {
602                    return exec_err!(
603                        "Invalid argument type for boolean conversion: {:?}",
604                        arg_type
605                    );
606                }
607            }
608            ConversionType::CharLower | ConversionType::CharUpper => {
609                if !matches!(
610                    arg_type,
611                    DataType::Int8
612                        | DataType::UInt8
613                        | DataType::Int16
614                        | DataType::UInt16
615                        | DataType::Int32
616                        | DataType::UInt32
617                        | DataType::Int64
618                        | DataType::UInt64
619                ) {
620                    return exec_err!(
621                        "Invalid argument type for char conversion: {:?}",
622                        arg_type
623                    );
624                }
625            }
626            ConversionType::DecInt
627            | ConversionType::OctInt
628            | ConversionType::HexIntLower
629            | ConversionType::HexIntUpper => {
630                if !arg_type.is_integer() {
631                    return exec_err!(
632                        "Invalid argument type for integer conversion: {:?}",
633                        arg_type
634                    );
635                }
636            }
637            ConversionType::SciFloatLower
638            | ConversionType::SciFloatUpper
639            | ConversionType::DecFloatLower
640            | ConversionType::CompactFloatLower
641            | ConversionType::CompactFloatUpper
642            | ConversionType::HexFloatLower
643            | ConversionType::HexFloatUpper => {
644                if !arg_type.is_numeric() {
645                    return exec_err!(
646                        "Invalid argument type for float conversion: {:?}",
647                        arg_type
648                    );
649                }
650            }
651            ConversionType::TimeLower(_) | ConversionType::TimeUpper(_) => {
652                if !arg_type.is_temporal() {
653                    return exec_err!(
654                        "Invalid argument type for time conversion: {:?}",
655                        arg_type
656                    );
657                }
658            }
659            _ => {}
660        }
661        Ok(())
662    }
663
664    fn supports_integer(&self) -> bool {
665        matches!(
666            self,
667            ConversionType::DecInt
668                | ConversionType::HexIntLower
669                | ConversionType::HexIntUpper
670                | ConversionType::OctInt
671                | ConversionType::CharLower
672                | ConversionType::CharUpper
673                | ConversionType::StringLower
674                | ConversionType::StringUpper
675        )
676    }
677
678    fn supports_float(&self) -> bool {
679        matches!(
680            self,
681            ConversionType::DecFloatLower
682                | ConversionType::SciFloatLower
683                | ConversionType::SciFloatUpper
684                | ConversionType::CompactFloatLower
685                | ConversionType::CompactFloatUpper
686                | ConversionType::StringLower
687                | ConversionType::StringUpper
688                | ConversionType::HexFloatLower
689                | ConversionType::HexFloatUpper
690        )
691    }
692
693    fn supports_decimal(&self) -> bool {
694        matches!(
695            self,
696            ConversionType::DecFloatLower
697                | ConversionType::SciFloatLower
698                | ConversionType::SciFloatUpper
699                | ConversionType::CompactFloatLower
700                | ConversionType::CompactFloatUpper
701                | ConversionType::StringLower
702                | ConversionType::StringUpper
703        )
704    }
705
706    fn supports_time(&self) -> bool {
707        matches!(
708            self,
709            ConversionType::TimeLower(_)
710                | ConversionType::TimeUpper(_)
711                | ConversionType::StringLower
712                | ConversionType::StringUpper
713        )
714    }
715
716    fn is_upper(&self) -> bool {
717        matches!(
718            self,
719            ConversionType::BooleanUpper
720                | ConversionType::HexHashUpper
721                | ConversionType::HexIntUpper
722                | ConversionType::SciFloatUpper
723                | ConversionType::CompactFloatUpper
724                | ConversionType::HexFloatUpper
725                | ConversionType::TimeUpper(_)
726                | ConversionType::CharUpper
727                | ConversionType::StringUpper
728        )
729    }
730}
731
732fn take_conversion_specifier<'a>(
733    mut s: &'a str,
734    argument_index: usize,
735    arg_type: &DataType,
736) -> Result<(ConversionSpecifier, &'a str)> {
737    let mut spec = ConversionSpecifier {
738        argument_index,
739        alt_form: false,
740        zero_pad: false,
741        left_adj: false,
742        space_sign: false,
743        force_sign: false,
744        grouping_separator: false,
745        negative_in_parentheses: false,
746        width: NumericParam::Literal(0),
747        precision: NumericParam::FromArgument, // Placeholder - must not be returned!
748        // ignore length modifier
749        conversion_type: ConversionType::DecInt,
750    };
751
752    // parse flags
753    loop {
754        match s.chars().next() {
755            Some('#') => {
756                spec.alt_form = true;
757            }
758            Some('0') => {
759                if spec.left_adj {
760                    return exec_err!("Invalid flag combination: '0' and '-'");
761                }
762                spec.zero_pad = true;
763            }
764            Some('-') => {
765                spec.left_adj = true;
766            }
767            Some(' ') => {
768                if spec.force_sign {
769                    return exec_err!("Invalid flag combination: '+' and ' '");
770                }
771                spec.space_sign = true;
772            }
773            Some('+') => {
774                if spec.space_sign {
775                    return exec_err!("Invalid flag combination: '+' and ' '");
776                }
777                spec.force_sign = true;
778            }
779            Some(',') => {
780                spec.grouping_separator = true;
781            }
782            Some('(') => {
783                spec.negative_in_parentheses = true;
784            }
785            _ => {
786                break;
787            }
788        }
789        s = &s[1..];
790    }
791    // parse width
792    let (w, mut s) = take_numeric_param(s, false);
793    spec.width = w;
794    // parse precision
795    if matches!(s.chars().next(), Some('.')) {
796        s = &s[1..];
797        let (p, s2) = take_numeric_param(s, true);
798        spec.precision = p;
799        s = s2;
800    }
801    let mut chars = s.chars();
802    let mut offset = 1;
803    // parse conversion type
804    spec.conversion_type = match chars.next() {
805        Some('b') => ConversionType::BooleanLower,
806        Some('B') => ConversionType::BooleanUpper,
807        Some('h') => ConversionType::HexHashLower,
808        Some('H') => ConversionType::HexHashUpper,
809        Some('s') => ConversionType::StringLower,
810        Some('S') => ConversionType::StringUpper,
811        Some('c') => ConversionType::CharLower,
812        Some('C') => ConversionType::CharUpper,
813        Some('d') => ConversionType::DecInt,
814        Some('o') => ConversionType::OctInt,
815        Some('x') => ConversionType::HexIntLower,
816        Some('X') => ConversionType::HexIntUpper,
817        Some('e') => ConversionType::SciFloatLower,
818        Some('E') => ConversionType::SciFloatUpper,
819        Some('f') => ConversionType::DecFloatLower,
820        Some('g') => ConversionType::CompactFloatLower,
821        Some('G') => ConversionType::CompactFloatUpper,
822        Some('a') => ConversionType::HexFloatLower,
823        Some('A') => ConversionType::HexFloatUpper,
824        Some('t') => {
825            let Some(chr) = chars.next() else {
826                return exec_err!("Invalid time format: {}", s);
827            };
828            offset += 1;
829            ConversionType::TimeLower(chr.try_into()?)
830        }
831        Some('T') => {
832            let Some(chr) = chars.next() else {
833                return exec_err!("Invalid time format: {}", s);
834            };
835            offset += 1;
836            ConversionType::TimeUpper(chr.try_into()?)
837        }
838        chr => {
839            return plan_err!("Invalid conversion type: {:?}", chr);
840        }
841    };
842
843    spec.conversion_type.validate(arg_type)?;
844    Ok((spec, &s[offset..]))
845}
846
847fn take_numeric_param(s: &str, zero: bool) -> (NumericParam, &str) {
848    match s.chars().next() {
849        Some(digit) if (if zero { '0'..='9' } else { '1'..='9' }).contains(&digit) => {
850            let mut s = s;
851            let mut w = 0;
852            loop {
853                match s.chars().next() {
854                    Some(digit) if digit.is_ascii_digit() => {
855                        w = 10 * w + (digit as i32 - '0' as i32);
856                    }
857                    _ => {
858                        break;
859                    }
860                }
861                s = &s[1..];
862            }
863            (NumericParam::Literal(w), s)
864        }
865        _ => (NumericParam::FromArgument, s),
866    }
867}
868
869impl ConversionSpecifier {
870    pub fn format(&self, string: &mut String, value: &ScalarValue) -> Result<()> {
871        match value {
872            ScalarValue::Boolean(value) => match self.conversion_type {
873                ConversionType::StringLower | ConversionType::StringUpper => {
874                    self.format_string(string, &value.unwrap_or(false).to_string())
875                }
876
877                _ => self.format_boolean(string, value),
878            },
879            ScalarValue::Int8(value) => match (self.conversion_type, value) {
880                (ConversionType::DecInt, Some(value)) => {
881                    self.format_signed(string, *value as i64)
882                }
883                (
884                    ConversionType::HexIntLower
885                    | ConversionType::HexIntUpper
886                    | ConversionType::OctInt,
887                    Some(value),
888                ) => self.format_unsigned(string, (*value as u8) as u64),
889                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
890                    self.format_char(string, *value as u8 as char)
891                }
892                (
893                    ConversionType::StringLower | ConversionType::StringUpper,
894                    Some(value),
895                ) => self.format_string(string, &value.to_string()),
896                (t, None) if t.supports_integer() => self.format_string(string, "null"),
897                _ => {
898                    exec_err!(
899                        "Invalid conversion type: {:?} for Int8",
900                        self.conversion_type
901                    )
902                }
903            },
904            ScalarValue::Int16(value) => match (self.conversion_type, value) {
905                (ConversionType::DecInt, Some(value)) => {
906                    self.format_signed(string, *value as i64)
907                }
908                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
909                    self.format_char(
910                        string,
911                        char::from_u32((*value as u16) as u32).unwrap(),
912                    )
913                }
914                (
915                    ConversionType::HexIntLower
916                    | ConversionType::HexIntUpper
917                    | ConversionType::OctInt,
918                    Some(value),
919                ) => self.format_unsigned(string, (*value as u16) as u64),
920                (
921                    ConversionType::StringLower | ConversionType::StringUpper,
922                    Some(value),
923                ) => self.format_string(string, &value.to_string()),
924                (t, None) if t.supports_integer() => self.format_string(string, "null"),
925                _ => {
926                    exec_err!(
927                        "Invalid conversion type: {:?} for Int16",
928                        self.conversion_type
929                    )
930                }
931            },
932            ScalarValue::Int32(value) => match (self.conversion_type, value) {
933                (ConversionType::DecInt, Some(value)) => {
934                    self.format_signed(string, *value as i64)
935                }
936                (
937                    ConversionType::HexIntLower
938                    | ConversionType::HexIntUpper
939                    | ConversionType::OctInt,
940                    Some(value),
941                ) => self.format_unsigned(string, (*value as u32) as u64),
942                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
943                    self.format_char(string, char::from_u32(*value as u32).unwrap())
944                }
945                (
946                    ConversionType::StringLower | ConversionType::StringUpper,
947                    Some(value),
948                ) => self.format_string(string, &value.to_string()),
949                (t, None) if t.supports_integer() => self.format_string(string, "null"),
950                _ => {
951                    exec_err!(
952                        "Invalid conversion type: {:?} for Int32",
953                        self.conversion_type
954                    )
955                }
956            },
957            ScalarValue::Int64(value) => match (self.conversion_type, value) {
958                (ConversionType::DecInt, Some(value)) => {
959                    self.format_signed(string, *value)
960                }
961                (
962                    ConversionType::HexIntLower
963                    | ConversionType::HexIntUpper
964                    | ConversionType::OctInt,
965                    Some(value),
966                ) => self.format_unsigned(string, *value as u64),
967                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
968                    self.format_char(
969                        string,
970                        char::from_u32((*value as u64) as u32).unwrap(),
971                    )
972                }
973                (
974                    ConversionType::StringLower | ConversionType::StringUpper,
975                    Some(value),
976                ) => self.format_string(string, &value.to_string()),
977                (t, None) if t.supports_integer() => self.format_string(string, "null"),
978                _ => {
979                    exec_err!(
980                        "Invalid conversion type: {:?} for Int64",
981                        self.conversion_type
982                    )
983                }
984            },
985            ScalarValue::UInt8(value) => match (self.conversion_type, value) {
986                (
987                    ConversionType::DecInt
988                    | ConversionType::HexIntLower
989                    | ConversionType::HexIntUpper
990                    | ConversionType::OctInt,
991                    Some(value),
992                ) => self.format_unsigned(string, *value as u64),
993                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
994                    self.format_char(string, *value as char)
995                }
996                (
997                    ConversionType::StringLower | ConversionType::StringUpper,
998                    Some(value),
999                ) => self.format_string(string, &value.to_string()),
1000                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1001                _ => {
1002                    exec_err!(
1003                        "Invalid conversion type: {:?} for UInt8",
1004                        self.conversion_type
1005                    )
1006                }
1007            },
1008            ScalarValue::UInt16(value) => match (self.conversion_type, value) {
1009                (
1010                    ConversionType::DecInt
1011                    | ConversionType::HexIntLower
1012                    | ConversionType::HexIntUpper
1013                    | ConversionType::OctInt,
1014                    Some(value),
1015                ) => self.format_unsigned(string, *value as u64),
1016                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1017                    self.format_char(string, char::from_u32(*value as u32).unwrap())
1018                }
1019                (
1020                    ConversionType::StringLower | ConversionType::StringUpper,
1021                    Some(value),
1022                ) => self.format_string(string, &value.to_string()),
1023                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1024                _ => {
1025                    exec_err!(
1026                        "Invalid conversion type: {:?} for UInt16",
1027                        self.conversion_type
1028                    )
1029                }
1030            },
1031            ScalarValue::UInt32(value) => match (self.conversion_type, value) {
1032                (
1033                    ConversionType::DecInt
1034                    | ConversionType::HexIntLower
1035                    | ConversionType::HexIntUpper
1036                    | ConversionType::OctInt,
1037                    Some(value),
1038                ) => self.format_unsigned(string, *value as u64),
1039                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1040                    self.format_char(string, char::from_u32(*value).unwrap())
1041                }
1042                (
1043                    ConversionType::StringLower | ConversionType::StringUpper,
1044                    Some(value),
1045                ) => self.format_string(string, &value.to_string()),
1046                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1047                _ => {
1048                    exec_err!(
1049                        "Invalid conversion type: {:?} for UInt32",
1050                        self.conversion_type
1051                    )
1052                }
1053            },
1054            ScalarValue::UInt64(value) => match (self.conversion_type, value) {
1055                (
1056                    ConversionType::DecInt
1057                    | ConversionType::HexIntLower
1058                    | ConversionType::HexIntUpper
1059                    | ConversionType::OctInt,
1060                    Some(value),
1061                ) => self.format_unsigned(string, *value),
1062                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1063                    self.format_char(string, char::from_u32(*value as u32).unwrap())
1064                }
1065                (
1066                    ConversionType::StringLower | ConversionType::StringUpper,
1067                    Some(value),
1068                ) => self.format_string(string, &value.to_string()),
1069                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1070                _ => {
1071                    exec_err!(
1072                        "Invalid conversion type: {:?} for UInt64",
1073                        self.conversion_type
1074                    )
1075                }
1076            },
1077            ScalarValue::Float16(value) => match (self.conversion_type, value) {
1078                (
1079                    ConversionType::DecFloatLower
1080                    | ConversionType::SciFloatLower
1081                    | ConversionType::SciFloatUpper
1082                    | ConversionType::CompactFloatLower
1083                    | ConversionType::CompactFloatUpper,
1084                    Some(value),
1085                ) => self.format_float(string, value.to_f64().unwrap()),
1086                (
1087                    ConversionType::StringLower | ConversionType::StringUpper,
1088                    Some(value),
1089                ) => self.format_string(string, &value.to_f32().unwrap().spark_string()),
1090                (
1091                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1092                    Some(value),
1093                ) => self.format_hex_float(string, value.to_f64().unwrap()),
1094                (t, None) if t.supports_float() => self.format_string(string, "null"),
1095                _ => {
1096                    exec_err!(
1097                        "Invalid conversion type: {:?} for Float16",
1098                        self.conversion_type
1099                    )
1100                }
1101            },
1102            ScalarValue::Float32(value) => match (self.conversion_type, value) {
1103                (
1104                    ConversionType::DecFloatLower
1105                    | ConversionType::SciFloatLower
1106                    | ConversionType::SciFloatUpper
1107                    | ConversionType::CompactFloatLower
1108                    | ConversionType::CompactFloatUpper,
1109                    Some(value),
1110                ) => self.format_float(string, *value as f64),
1111                (
1112                    ConversionType::StringLower | ConversionType::StringUpper,
1113                    Some(value),
1114                ) => self.format_string(string, &value.spark_string()),
1115                (
1116                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1117                    Some(value),
1118                ) => self.format_hex_float(string, *value as f64),
1119                (t, None) if t.supports_float() => self.format_string(string, "null"),
1120                _ => {
1121                    exec_err!(
1122                        "Invalid conversion type: {:?} for Float32",
1123                        self.conversion_type
1124                    )
1125                }
1126            },
1127            ScalarValue::Float64(value) => match (self.conversion_type, value) {
1128                (
1129                    ConversionType::DecFloatLower
1130                    | ConversionType::SciFloatLower
1131                    | ConversionType::SciFloatUpper
1132                    | ConversionType::CompactFloatLower
1133                    | ConversionType::CompactFloatUpper,
1134                    Some(value),
1135                ) => self.format_float(string, *value),
1136                (
1137                    ConversionType::StringLower | ConversionType::StringUpper,
1138                    Some(value),
1139                ) => self.format_string(string, &value.spark_string()),
1140                (
1141                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1142                    Some(value),
1143                ) => self.format_hex_float(string, *value),
1144                (t, None) if t.supports_float() => self.format_string(string, "null"),
1145                _ => {
1146                    exec_err!(
1147                        "Invalid conversion type: {:?} for Float64",
1148                        self.conversion_type
1149                    )
1150                }
1151            },
1152            ScalarValue::Utf8(value) => {
1153                let value: &str = match value {
1154                    Some(value) => value.as_str(),
1155                    None => "null",
1156                };
1157                if matches!(
1158                    self.conversion_type,
1159                    ConversionType::StringLower | ConversionType::StringUpper
1160                ) {
1161                    self.format_string(string, value)
1162                } else {
1163                    exec_err!(
1164                        "Invalid conversion type: {:?} for Utf8",
1165                        self.conversion_type
1166                    )
1167                }
1168            }
1169            ScalarValue::LargeUtf8(value) => {
1170                let value: &str = match value {
1171                    Some(value) => value.as_str(),
1172                    None => "null",
1173                };
1174                if matches!(
1175                    self.conversion_type,
1176                    ConversionType::StringLower | ConversionType::StringUpper
1177                ) {
1178                    self.format_string(string, value)
1179                } else {
1180                    exec_err!(
1181                        "Invalid conversion type: {:?} for LargeUtf8",
1182                        self.conversion_type
1183                    )
1184                }
1185            }
1186            ScalarValue::Utf8View(value) => {
1187                let value: &str = match value {
1188                    Some(value) => value.as_str(),
1189                    None => "null",
1190                };
1191                self.format_string(string, value)
1192            }
1193            ScalarValue::Decimal128(value, _, scale) => {
1194                match (self.conversion_type, value) {
1195                    (
1196                        ConversionType::DecFloatLower
1197                        | ConversionType::SciFloatLower
1198                        | ConversionType::SciFloatUpper
1199                        | ConversionType::CompactFloatLower
1200                        | ConversionType::CompactFloatUpper,
1201                        Some(value),
1202                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1203                    (
1204                        ConversionType::StringLower | ConversionType::StringUpper,
1205                        Some(value),
1206                    ) => self.format_string(string, &value.to_string()),
1207                    (t, None) if t.supports_decimal() => {
1208                        self.format_string(string, "null")
1209                    }
1210
1211                    _ => {
1212                        exec_err!(
1213                            "Invalid conversion type: {:?} for Decimal128",
1214                            self.conversion_type
1215                        )
1216                    }
1217                }
1218            }
1219            ScalarValue::Decimal256(value, _, scale) => {
1220                match (self.conversion_type, value) {
1221                    (
1222                        ConversionType::DecFloatLower
1223                        | ConversionType::SciFloatLower
1224                        | ConversionType::SciFloatUpper
1225                        | ConversionType::CompactFloatLower
1226                        | ConversionType::CompactFloatUpper,
1227                        Some(value),
1228                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1229                    (
1230                        ConversionType::StringLower | ConversionType::StringUpper,
1231                        Some(value),
1232                    ) => self.format_string(string, &value.to_string()),
1233                    (t, None) if t.supports_decimal() => {
1234                        self.format_string(string, "null")
1235                    }
1236
1237                    _ => {
1238                        exec_err!(
1239                            "Invalid conversion type: {:?} for Decimal256",
1240                            self.conversion_type
1241                        )
1242                    }
1243                }
1244            }
1245
1246            ScalarValue::Time32Second(value) => match (self.conversion_type, value) {
1247                (
1248                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1249                    Some(value),
1250                ) => self.format_time(string, *value as i64 * 1000000000, &None),
1251                (
1252                    ConversionType::StringLower | ConversionType::StringUpper,
1253                    Some(value),
1254                ) => self.format_string(string, &value.to_string()),
1255                (t, None) if t.supports_time() => self.format_string(string, "null"),
1256                _ => {
1257                    exec_err!(
1258                        "Invalid conversion type: {:?} for Time32Second",
1259                        self.conversion_type
1260                    )
1261                }
1262            },
1263            ScalarValue::Time32Millisecond(value) => {
1264                match (self.conversion_type, value) {
1265                    (
1266                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1267                        Some(value),
1268                    ) => self.format_time(string, *value as i64 * 1000000, &None),
1269                    (
1270                        ConversionType::StringLower | ConversionType::StringUpper,
1271                        Some(value),
1272                    ) => self.format_string(string, &value.to_string()),
1273                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1274                    _ => {
1275                        exec_err!(
1276                            "Invalid conversion type: {:?} for Time32Millisecond",
1277                            self.conversion_type
1278                        )
1279                    }
1280                }
1281            }
1282            ScalarValue::Time64Microsecond(value) => {
1283                match (self.conversion_type, value) {
1284                    (
1285                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1286                        Some(value),
1287                    ) => self.format_time(string, *value * 1000, &None),
1288                    (
1289                        ConversionType::StringLower | ConversionType::StringUpper,
1290                        Some(value),
1291                    ) => self.format_string(string, &value.to_string()),
1292                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1293                    _ => {
1294                        exec_err!(
1295                            "Invalid conversion type: {:?} for Time64Microsecond",
1296                            self.conversion_type
1297                        )
1298                    }
1299                }
1300            }
1301            ScalarValue::Time64Nanosecond(value) => match (self.conversion_type, value) {
1302                (
1303                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1304                    Some(value),
1305                ) => self.format_time(string, *value, &None),
1306                (
1307                    ConversionType::StringLower | ConversionType::StringUpper,
1308                    Some(value),
1309                ) => self.format_string(string, &value.to_string()),
1310                (t, None) if t.supports_time() => self.format_string(string, "null"),
1311                _ => {
1312                    exec_err!(
1313                        "Invalid conversion type: {:?} for Time64Nanosecond",
1314                        self.conversion_type
1315                    )
1316                }
1317            },
1318            ScalarValue::TimestampSecond(value, zone) => {
1319                match (self.conversion_type, value) {
1320                    (
1321                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1322                        Some(value),
1323                    ) => self.format_time(string, value * 1000000000, zone),
1324                    (
1325                        ConversionType::StringLower | ConversionType::StringUpper,
1326                        Some(value),
1327                    ) => self.format_string(string, &value.to_string()),
1328                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1329                    _ => {
1330                        exec_err!(
1331                            "Invalid conversion type: {:?} for TimestampSecond",
1332                            self.conversion_type
1333                        )
1334                    }
1335                }
1336            }
1337            ScalarValue::TimestampMillisecond(value, zone) => {
1338                match (self.conversion_type, value) {
1339                    (
1340                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1341                        Some(value),
1342                    ) => self.format_time(string, *value * 1000000, zone),
1343                    (
1344                        ConversionType::StringLower | ConversionType::StringUpper,
1345                        Some(value),
1346                    ) => self.format_string(string, &value.to_string()),
1347
1348                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1349                    _ => {
1350                        exec_err!(
1351                            "Invalid conversion type: {:?} for TimestampMillisecond",
1352                            self.conversion_type
1353                        )
1354                    }
1355                }
1356            }
1357            ScalarValue::TimestampMicrosecond(value, zone) => {
1358                match (self.conversion_type, value) {
1359                    (
1360                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1361                        Some(value),
1362                    ) => self.format_time(string, value * 1000, zone),
1363                    (
1364                        ConversionType::StringLower | ConversionType::StringUpper,
1365                        Some(value),
1366                    ) => self.format_string(string, &value.to_string()),
1367                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1368                    _ => {
1369                        exec_err!(
1370                            "Invalid conversion type: {:?} for timestampmicrosecond",
1371                            self.conversion_type
1372                        )
1373                    }
1374                }
1375            }
1376
1377            ScalarValue::TimestampNanosecond(value, zone) => {
1378                match (self.conversion_type, value) {
1379                    (
1380                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1381                        Some(value),
1382                    ) => self.format_time(string, *value, zone),
1383                    (
1384                        ConversionType::StringLower | ConversionType::StringUpper,
1385                        Some(value),
1386                    ) => self.format_string(string, &value.to_string()),
1387                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1388                    _ => {
1389                        exec_err!(
1390                            "Invalid conversion type: {:?} for TimestampNanosecond",
1391                            self.conversion_type
1392                        )
1393                    }
1394                }
1395            }
1396            ScalarValue::Date32(value) => match (self.conversion_type, value) {
1397                (
1398                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1399                    Some(value),
1400                ) => self.format_date(string, *value as i64),
1401                (
1402                    ConversionType::StringLower | ConversionType::StringUpper,
1403                    Some(value),
1404                ) => self.format_string(string, &value.to_string()),
1405                (t, None) if t.supports_time() => self.format_string(string, "null"),
1406                _ => {
1407                    exec_err!(
1408                        "Invalid conversion type: {:?} for Date32",
1409                        self.conversion_type
1410                    )
1411                }
1412            },
1413            ScalarValue::Date64(value) => match (self.conversion_type, value) {
1414                (
1415                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1416                    Some(value),
1417                ) => self.format_date(string, *value),
1418                (
1419                    ConversionType::StringLower | ConversionType::StringUpper,
1420                    Some(value),
1421                ) => self.format_string(string, &value.to_string()),
1422                (t, None) if t.supports_time() => self.format_string(string, "null"),
1423                _ => {
1424                    exec_err!(
1425                        "Invalid conversion type: {:?} for Date64",
1426                        self.conversion_type
1427                    )
1428                }
1429            },
1430            ScalarValue::Null => {
1431                let value = "null".to_string();
1432                self.format_string(string, &value)
1433            }
1434            _ => exec_err!("Invalid scalar value: {:?}", value),
1435        }
1436    }
1437
1438    fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
1439        // Handle special cases first
1440        let (sign, raw_exponent, mantissa) = value.to_parts();
1441        let is_subnormal = raw_exponent == 0;
1442
1443        let precision = match self.precision {
1444            NumericParam::FromArgument => None,
1445            NumericParam::Literal(p) => Some(p),
1446        };
1447
1448        // Determine if we need to normalize subnormal numbers
1449        // Only normalize when precision is specified and less than full mantissa width
1450        let mantissa_hex_digits = f64::MANTISSA_BITS.div_ceil(4); // 13 for f64
1451        let should_normalize = is_subnormal
1452            && precision.is_some()
1453            && precision.unwrap() < mantissa_hex_digits as i32;
1454
1455        let (value, raw_exponent, mantissa) = if should_normalize {
1456            let value = value * f64::SCALEUP;
1457            let (_, raw_exponent, mantissa) = value.to_parts();
1458            (value, raw_exponent, mantissa)
1459        } else {
1460            (value, raw_exponent, mantissa)
1461        };
1462
1463        let mut temp = String::new();
1464
1465        let sign_char = if sign {
1466            "-"
1467        } else if self.force_sign {
1468            "+"
1469        } else if self.space_sign {
1470            " "
1471        } else {
1472            ""
1473        };
1474        match value.category() {
1475            FpCategory::Nan => {
1476                write!(&mut temp, "NaN")?;
1477            }
1478            FpCategory::Infinite => {
1479                write!(&mut temp, "{sign_char}Infinity")?;
1480            }
1481            FpCategory::Zero => {
1482                write!(&mut temp, "{sign_char}0x0.0p0")?;
1483            }
1484            _ => {
1485                let bias = i32::from(f64::EXPONENT_BIAS);
1486                // Calculate actual exponent
1487                // For subnormal numbers, the exponent is 1 - bias (not 0 - bias)
1488                let exponent = if is_subnormal && !should_normalize {
1489                    1 - bias
1490                } else {
1491                    raw_exponent as i32 - bias
1492                };
1493
1494                // Handle precision for rounding
1495                let final_mantissa = if let Some(p) = precision {
1496                    if p == 0 {
1497                        // For precision 0, we still need at least 1 hex digit
1498                        // Round to the nearest integer mantissa value
1499                        let shift_distance = f64::MANTISSA_BITS as i32 - 4; // Keep 1 hex digit (4 bits)
1500                        let shifted = mantissa >> shift_distance;
1501                        let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1502                        let round_bit = 1u64 << (shift_distance - 1);
1503
1504                        // Round to nearest, ties to even
1505                        if rounding_bits > round_bit
1506                            || (rounding_bits == round_bit && (shifted & 1) != 0)
1507                        {
1508                            (shifted + 1) << shift_distance
1509                        } else {
1510                            shifted << shift_distance
1511                        }
1512                    } else {
1513                        // Apply rounding based on precision
1514                        let precision_bits = p * 4; // Each hex digit is 4 bits
1515                        let keep_bits = f64::MANTISSA_BITS as i32;
1516                        let shift_distance = keep_bits - precision_bits;
1517
1518                        if shift_distance > 0 {
1519                            let shifted = mantissa >> shift_distance;
1520                            let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1521                            let round_bit = 1u64 << (shift_distance - 1);
1522
1523                            // Round to nearest, ties to even
1524                            if rounding_bits > round_bit
1525                                || (rounding_bits == round_bit && (shifted & 1) != 0)
1526                            {
1527                                (shifted + 1) << shift_distance
1528                            } else {
1529                                shifted << shift_distance
1530                            }
1531                        } else {
1532                            mantissa
1533                        }
1534                    }
1535                } else {
1536                    mantissa
1537                };
1538
1539                if is_subnormal && !should_normalize {
1540                    // Original subnormal format: 0x0.xxxp-1022
1541                    if precision.is_some() {
1542                        // precision >= 13, show as subnormal
1543                        let full_hex = format!(
1544                            "{:0width$x}",
1545                            final_mantissa,
1546                            width = mantissa_hex_digits as usize
1547                        );
1548                        write!(&mut temp, "{sign_char}0x0.{full_hex}p{exponent}")?;
1549                    } else {
1550                        // No precision specified, show full subnormal
1551                        let hex_digits = format!(
1552                            "{:0width$x}",
1553                            final_mantissa,
1554                            width = mantissa_hex_digits as usize
1555                        );
1556                        write!(&mut temp, "{sign_char}0x0.{hex_digits}p{exponent}")?;
1557                    }
1558                } else {
1559                    // Normal format or normalized subnormal: 0x1.xxxpN
1560                    if let Some(p) = precision {
1561                        let p = if p == 0 { 1 } else { p };
1562                        let hex_digits = format!("{final_mantissa:x}");
1563                        let formatted_digits = if p as usize >= hex_digits.len() {
1564                            // Pad with zeros to match precision
1565                            format!("{:0<width$}", hex_digits, width = p as usize)
1566                        } else {
1567                            hex_digits[..p as usize].to_string()
1568                        };
1569                        write!(
1570                            &mut temp,
1571                            "{sign_char}0x1.{formatted_digits}p{exponent}"
1572                        )?;
1573                    } else {
1574                        // Default: show all significant digits
1575                        let mut hex_digits = format!("{final_mantissa:x}");
1576                        hex_digits = trim_trailing_0s_hex(&hex_digits).to_owned();
1577                        if hex_digits.is_empty() {
1578                            write!(&mut temp, "{sign_char}0x1.0p{exponent}")?;
1579                        } else {
1580                            write!(&mut temp, "{sign_char}0x1.{hex_digits}p{exponent}")?;
1581                        }
1582                    }
1583                }
1584                if should_normalize {
1585                    let (prefix, exp) = temp.split_once('p').unwrap();
1586                    let iexp = exp.parse::<i32>().unwrap() - f64::SCALEUP_POWER as i32;
1587                    temp = format!("{prefix}p{iexp}");
1588                }
1589            }
1590        };
1591
1592        if self.conversion_type.is_upper() {
1593            temp = temp.to_ascii_uppercase();
1594        }
1595
1596        let NumericParam::Literal(width) = self.width else {
1597            writer.push_str(&temp);
1598            return Ok(());
1599        };
1600        if self.left_adj {
1601            writer.push_str(&temp);
1602            for _ in temp.len()..width as usize {
1603                writer.push(' ');
1604            }
1605        } else if self.zero_pad && value.is_finite() {
1606            let delimiter = if self.conversion_type.is_upper() {
1607                "0X"
1608            } else {
1609                "0x"
1610            };
1611            let (prefix, suffix) = temp.split_once(delimiter).unwrap();
1612            writer.push_str(prefix);
1613            writer.push_str(delimiter);
1614            for _ in temp.len()..width as usize {
1615                writer.push('0');
1616            }
1617            writer.push_str(suffix);
1618        } else {
1619            while temp.len() < width as usize {
1620                temp = " ".to_owned() + &temp;
1621            }
1622            writer.push_str(&temp);
1623        };
1624        Ok(())
1625    }
1626
1627    fn format_char(&self, writer: &mut String, value: char) -> Result<()> {
1628        let upper = self.conversion_type.is_upper();
1629        match self.conversion_type {
1630            ConversionType::CharLower | ConversionType::CharUpper => {
1631                let NumericParam::Literal(width) = self.width else {
1632                    if upper {
1633                        writer.push(value.to_ascii_uppercase());
1634                    } else {
1635                        writer.push(value);
1636                    }
1637                    return Ok(());
1638                };
1639
1640                let start_len = writer.len();
1641                if self.left_adj {
1642                    if upper {
1643                        writer.push(value.to_ascii_uppercase());
1644                    } else {
1645                        writer.push(value);
1646                    }
1647                    while writer.len() - start_len < width as usize {
1648                        writer.push(' ');
1649                    }
1650                } else {
1651                    while writer.len() - start_len + value.len_utf8() < width as usize {
1652                        writer.push(' ');
1653                    }
1654                    if upper {
1655                        writer.push(value.to_ascii_uppercase());
1656                    } else {
1657                        writer.push(value);
1658                    }
1659                }
1660                Ok(())
1661            }
1662            _ => exec_err!(
1663                "Invalid conversion type: {:?} for char",
1664                self.conversion_type
1665            ),
1666        }
1667    }
1668
1669    fn format_boolean(&self, writer: &mut String, value: &Option<bool>) -> Result<()> {
1670        let value = value.unwrap_or(false);
1671
1672        let formatted = match self.conversion_type {
1673            ConversionType::BooleanUpper => {
1674                if value {
1675                    "TRUE"
1676                } else {
1677                    "FALSE"
1678                }
1679            }
1680            ConversionType::BooleanLower => {
1681                if value {
1682                    "true"
1683                } else {
1684                    "false"
1685                }
1686            }
1687            _ => {
1688                return exec_err!(
1689                    "Invalid conversion type: {:?} for boolean array",
1690                    self.conversion_type
1691                );
1692            }
1693        };
1694        self.format_str(writer, formatted)
1695    }
1696
1697    fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1698        let mut prefix = String::new();
1699        let mut suffix = String::new();
1700        let mut number = String::new();
1701        let upper = self.conversion_type.is_upper();
1702
1703        // set up the sign
1704        if value.is_sign_negative() {
1705            if self.negative_in_parentheses {
1706                prefix.push('(');
1707                suffix.push(')');
1708            } else {
1709                prefix.push('-');
1710            }
1711        } else if self.space_sign {
1712            prefix.push(' ');
1713        } else if self.force_sign {
1714            prefix.push('+');
1715        }
1716
1717        if value.is_finite() {
1718            let mut use_scientific = false;
1719            let mut strip_trailing_0s = false;
1720            let mut abs = value.abs();
1721            let mut exponent = abs.log10().floor() as i32;
1722            let mut precision = match self.precision {
1723                NumericParam::Literal(p) => p,
1724                _ => 6,
1725            };
1726            match self.conversion_type {
1727                ConversionType::DecFloatLower => {
1728                    // default
1729                }
1730                ConversionType::SciFloatLower => {
1731                    use_scientific = true;
1732                }
1733                ConversionType::SciFloatUpper => {
1734                    use_scientific = true;
1735                }
1736                ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
1737                    strip_trailing_0s = true;
1738                    if precision == 0 {
1739                        precision = 1;
1740                    }
1741                    // exponent signifies significant digits - we must round now
1742                    // to (re)calculate the exponent
1743                    let rounding_factor =
1744                        10.0_f64.powf((precision - 1 - exponent) as f64);
1745                    let rounded_fixed = (abs * rounding_factor).round();
1746                    abs = rounded_fixed / rounding_factor;
1747                    exponent = abs.log10().floor() as i32;
1748                    if exponent < -4 || exponent >= precision {
1749                        use_scientific = true;
1750                        precision -= 1;
1751                    } else {
1752                        // precision specifies the number of significant digits
1753                        precision -= 1 + exponent;
1754                    }
1755                }
1756                _ => {
1757                    return exec_err!(
1758                        "Invalid conversion type: {:?} for float",
1759                        self.conversion_type
1760                    );
1761                }
1762            }
1763
1764            if use_scientific {
1765                // Manual scientific notation formatting for uppercase E
1766                let mantissa = abs / 10.0_f64.powf(exponent as f64);
1767                let exp_char = if upper { 'E' } else { 'e' };
1768                number = format!("{mantissa:.prec$}", prec = precision as usize);
1769                if strip_trailing_0s {
1770                    number = trim_trailing_0s(&number).to_owned();
1771                }
1772                number = format!("{number}{exp_char}{exponent:+03}");
1773            } else {
1774                number = format!("{abs:.prec$}", prec = precision as usize);
1775                if strip_trailing_0s {
1776                    number = trim_trailing_0s(&number).to_owned();
1777                }
1778            }
1779            if self.alt_form && !number.contains('.') {
1780                number += ".";
1781            }
1782        } else {
1783            // not finite
1784            match self.conversion_type {
1785                ConversionType::DecFloatLower
1786                | ConversionType::SciFloatLower
1787                | ConversionType::CompactFloatLower => {
1788                    if value.is_infinite() {
1789                        number.push_str("Infinity")
1790                    } else {
1791                        number.push_str("NaN")
1792                    }
1793                }
1794                ConversionType::SciFloatUpper | ConversionType::CompactFloatUpper => {
1795                    if value.is_infinite() {
1796                        number.push_str("INFINITY")
1797                    } else {
1798                        number.push_str("NAN")
1799                    }
1800                }
1801                _ => {
1802                    return exec_err!(
1803                        "Invalid conversion type: {:?} for float",
1804                        self.conversion_type
1805                    );
1806                }
1807            }
1808        }
1809        // Take care of padding
1810        let NumericParam::Literal(width) = self.width else {
1811            writer.push_str(&prefix);
1812            writer.push_str(&number);
1813            writer.push_str(&suffix);
1814            return Ok(());
1815        };
1816        if self.left_adj {
1817            let mut full_num = prefix + &number + &suffix;
1818            while full_num.len() < width as usize {
1819                full_num.push(' ');
1820            }
1821            writer.push_str(&full_num);
1822        } else if self.zero_pad && value.is_finite() {
1823            while prefix.len() + number.len() + suffix.len() < width as usize {
1824                prefix.push('0');
1825            }
1826            writer.push_str(&prefix);
1827            writer.push_str(&number);
1828            writer.push_str(&suffix);
1829        } else {
1830            let mut full_num = prefix + &number + &suffix;
1831            while full_num.len() < width as usize {
1832                full_num = " ".to_owned() + &full_num;
1833            }
1834            writer.push_str(&full_num);
1835        };
1836
1837        Ok(())
1838    }
1839
1840    fn format_signed(&self, writer: &mut String, value: i64) -> Result<()> {
1841        let negative = value < 0;
1842        let abs_val = value.abs();
1843
1844        let (sign_prefix, sign_suffix) = if negative && self.negative_in_parentheses {
1845            ("(".to_owned(), ")".to_owned())
1846        } else if negative {
1847            ("-".to_owned(), "".to_owned())
1848        } else if self.force_sign {
1849            ("+".to_owned(), "".to_owned())
1850        } else if self.space_sign {
1851            (" ".to_owned(), "".to_owned())
1852        } else {
1853            ("".to_owned(), "".to_owned())
1854        };
1855
1856        let mut mod_spec = *self;
1857        mod_spec.width = match self.width {
1858            NumericParam::Literal(w) => NumericParam::Literal(
1859                w - sign_prefix.len() as i32 - sign_suffix.len() as i32,
1860            ),
1861            _ => NumericParam::FromArgument,
1862        };
1863        let mut formatted = String::new();
1864        mod_spec.format_unsigned(&mut formatted, abs_val as u64)?;
1865        // put the sign a after any leading spaces
1866        let mut actual_number = &formatted[0..];
1867        let mut leading_spaces = &formatted[0..0];
1868        if let Some(first_non_space) = formatted.find(|c| c != ' ') {
1869            actual_number = &formatted[first_non_space..];
1870            leading_spaces = &formatted[0..first_non_space];
1871        }
1872        write!(
1873            writer,
1874            "{}{}{}{}",
1875            leading_spaces.to_owned(),
1876            sign_prefix,
1877            actual_number,
1878            sign_suffix
1879        )
1880        .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1881        Ok(())
1882    }
1883
1884    fn format_unsigned(&self, writer: &mut String, value: u64) -> Result<()> {
1885        let mut s = String::new();
1886        let mut alt_prefix = "";
1887        match self.conversion_type {
1888            ConversionType::DecInt => {
1889                let num_str = format!("{value}");
1890                if self.grouping_separator {
1891                    // Add thousands separators
1892                    let mut result = String::new();
1893                    let chars: Vec<char> = num_str.chars().collect();
1894                    for (i, c) in chars.iter().enumerate() {
1895                        if i > 0 && (chars.len() - i).is_multiple_of(3) {
1896                            result.push(',');
1897                        }
1898                        result.push(*c);
1899                    }
1900                    s = result;
1901                } else {
1902                    s = num_str;
1903                }
1904            }
1905            ConversionType::HexIntLower => {
1906                alt_prefix = "0x";
1907                write!(&mut s, "{value:x}")
1908                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1909            }
1910            ConversionType::HexIntUpper => {
1911                alt_prefix = "0X";
1912                write!(&mut s, "{value:X}")
1913                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1914            }
1915            ConversionType::OctInt => {
1916                alt_prefix = "0";
1917                write!(&mut s, "{value:o}")
1918                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1919            }
1920            _ => {
1921                return exec_err!(
1922                    "Invalid conversion type: {:?} for u64",
1923                    self.conversion_type
1924                );
1925            }
1926        }
1927        let mut prefix = if self.alt_form {
1928            alt_prefix.to_owned()
1929        } else {
1930            String::new()
1931        };
1932
1933        let formatted = if let NumericParam::Literal(width) = self.width {
1934            if self.left_adj {
1935                let mut num_str = prefix + &s;
1936                while num_str.len() < width as usize {
1937                    num_str.push(' ');
1938                }
1939                num_str
1940            } else if self.zero_pad {
1941                while prefix.len() + s.len() < width as usize {
1942                    prefix.push('0');
1943                }
1944                prefix + &s
1945            } else {
1946                let mut num_str = prefix + &s;
1947                while num_str.len() < width as usize {
1948                    num_str = " ".to_owned() + &num_str;
1949                }
1950                num_str
1951            }
1952        } else {
1953            prefix + &s
1954        };
1955        write!(writer, "{formatted}")
1956            .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1957        Ok(())
1958    }
1959
1960    fn format_str(&self, writer: &mut String, value: &str) -> Result<()> {
1961        // Take care of precision, putting the truncated string in `content`
1962        let precision: usize = match self.precision {
1963            NumericParam::Literal(p) => p,
1964            _ => i32::MAX,
1965        }
1966        .try_into()
1967        .unwrap_or_default();
1968        let content_len = {
1969            let mut content_len = precision.min(value.len());
1970            while !value.is_char_boundary(content_len) {
1971                content_len -= 1;
1972            }
1973            content_len
1974        };
1975        let content = &value[..content_len];
1976
1977        // Pad to width if needed, putting the padded string in `s`
1978
1979        if let NumericParam::Literal(width) = self.width {
1980            let start_len = writer.len();
1981            if self.left_adj {
1982                writer.push_str(content);
1983                while writer.len() - start_len < width as usize {
1984                    writer.push(' ');
1985                }
1986            } else {
1987                while writer.len() - start_len + content.len() < width as usize {
1988                    writer.push(' ');
1989                }
1990                writer.push_str(content);
1991            }
1992        } else {
1993            writer.push_str(content);
1994        }
1995        Ok(())
1996    }
1997
1998    fn format_string(&self, writer: &mut String, value: &str) -> Result<()> {
1999        if self.conversion_type.is_upper() {
2000            let upper = value.to_ascii_uppercase();
2001            self.format_str(writer, &upper)
2002        } else {
2003            self.format_str(writer, value)
2004        }
2005    }
2006
2007    fn format_decimal(&self, writer: &mut String, value: &str, scale: i64) -> Result<()> {
2008        let mut prefix = String::new();
2009        let upper = self.conversion_type.is_upper();
2010
2011        // Parse as BigDecimal
2012        let decimal = value
2013            .parse::<BigInt>()
2014            .map_err(|e| exec_datafusion_err!("Failed to parse decimal: {}", e))?;
2015        let decimal = BigDecimal::from_bigint(decimal, scale);
2016
2017        // Handle sign
2018        let is_negative = decimal.sign() == Sign::Minus;
2019        let abs_decimal = decimal.abs();
2020
2021        if is_negative {
2022            prefix.push('-');
2023        } else if self.space_sign {
2024            prefix.push(' ');
2025        } else if self.force_sign {
2026            prefix.push('+');
2027        }
2028
2029        let exp_symb = if upper { 'E' } else { 'e' };
2030        let mut strip_trailing_0s = false;
2031
2032        // Get precision setting
2033        let mut precision = match self.precision {
2034            NumericParam::Literal(p) => p,
2035            _ => 6,
2036        };
2037
2038        let number = match self.conversion_type {
2039            ConversionType::DecFloatLower => {
2040                // Format as fixed-point decimal
2041                self.format_decimal_fixed(&abs_decimal, precision, strip_trailing_0s)?
2042            }
2043            ConversionType::SciFloatLower => self.format_decimal_scientific(
2044                &abs_decimal,
2045                precision,
2046                'e',
2047                strip_trailing_0s,
2048            )?,
2049            ConversionType::SciFloatUpper => self.format_decimal_scientific(
2050                &abs_decimal,
2051                precision,
2052                'E',
2053                strip_trailing_0s,
2054            )?,
2055            ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
2056                strip_trailing_0s = true;
2057                if precision == 0 {
2058                    precision = 1;
2059                }
2060                // Determine if we should use scientific notation
2061                let log10_val = abs_decimal.to_f64().map(|f| f.log10()).unwrap_or(0.0);
2062                if log10_val < -4.0 || log10_val >= precision as f64 {
2063                    self.format_decimal_scientific(
2064                        &abs_decimal,
2065                        precision - 1,
2066                        exp_symb,
2067                        strip_trailing_0s,
2068                    )?
2069                } else {
2070                    self.format_decimal_fixed(
2071                        &abs_decimal,
2072                        precision - 1 - log10_val.floor() as i32,
2073                        strip_trailing_0s,
2074                    )?
2075                }
2076            }
2077            _ => {
2078                return exec_err!(
2079                    "Invalid conversion type: {:?} for decimal",
2080                    self.conversion_type
2081                );
2082            }
2083        };
2084
2085        // Handle padding
2086        let NumericParam::Literal(width) = self.width else {
2087            writer.push_str(&prefix);
2088            writer.push_str(&number);
2089            return Ok(());
2090        };
2091
2092        if self.left_adj {
2093            let mut full_num = prefix + &number;
2094            while full_num.len() < width as usize {
2095                full_num.push(' ');
2096            }
2097            writer.push_str(&full_num);
2098        } else if self.zero_pad {
2099            while prefix.len() + number.len() < width as usize {
2100                prefix.push('0');
2101            }
2102            writer.push_str(&prefix);
2103            writer.push_str(&number);
2104        } else {
2105            let mut full_num = prefix + &number;
2106            while full_num.len() < width as usize {
2107                full_num = " ".to_owned() + &full_num;
2108            }
2109            writer.push_str(&full_num);
2110        }
2111
2112        Ok(())
2113    }
2114
2115    fn format_decimal_fixed(
2116        &self,
2117        decimal: &BigDecimal,
2118        precision: i32,
2119        strip_trailing_0s: bool,
2120    ) -> Result<String> {
2121        if precision <= 0 {
2122            Ok(decimal.round(0).to_string())
2123        } else {
2124            // Use BigDecimal's with_scale method for precise decimal formatting
2125            let scaled = decimal.round(precision as i64);
2126            let mut number = scaled.to_string();
2127            if strip_trailing_0s {
2128                number = trim_trailing_0s(&number).to_owned();
2129            }
2130            Ok(number)
2131        }
2132    }
2133
2134    fn format_decimal_scientific(
2135        &self,
2136        decimal: &BigDecimal,
2137        precision: i32,
2138        exp_char: char,
2139        strip_trailing_0s: bool,
2140    ) -> Result<String> {
2141        // Convert to f64 for scientific notation (may lose precision for very large numbers)
2142        let float_val = decimal.to_f64().unwrap_or(0.0);
2143        if float_val == 0.0 {
2144            return Ok(format!("0{exp_char}+00"));
2145        }
2146
2147        let abs_val = float_val.abs();
2148        let exponent = abs_val.log10().floor() as i32;
2149        let mantissa = abs_val / 10.0_f64.powf(exponent as f64);
2150
2151        let mut number = if precision <= 0 {
2152            format!("{mantissa:.0}")
2153        } else {
2154            format!("{mantissa:.prec$}", prec = precision as usize)
2155        };
2156
2157        if strip_trailing_0s {
2158            number = trim_trailing_0s(&number).to_owned();
2159        }
2160
2161        Ok(format!("{number}{exp_char}{exponent:+03}"))
2162    }
2163
2164    fn format_time(
2165        &self,
2166        writer: &mut String,
2167        timestamp_nanos: i64,
2168        timezone: &Option<Arc<str>>,
2169    ) -> Result<()> {
2170        let upper = self.conversion_type.is_upper();
2171        match &self.conversion_type {
2172            ConversionType::TimeLower(time_format)
2173            | ConversionType::TimeUpper(time_format) => {
2174                let formatted =
2175                    self.format_time_component(timestamp_nanos, *time_format, timezone)?;
2176                let result = if upper {
2177                    formatted.to_uppercase()
2178                } else {
2179                    formatted
2180                };
2181                write!(writer, "{result}")
2182                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2183                Ok(())
2184            }
2185            _ => exec_err!(
2186                "Invalid conversion type for time: {:?}",
2187                self.conversion_type
2188            ),
2189        }
2190    }
2191
2192    fn format_date(&self, writer: &mut String, date_days: i64) -> Result<()> {
2193        // Convert days since epoch to timestamp in nanoseconds
2194        let timestamp_nanos = date_days * 24 * 60 * 60 * 1_000_000_000;
2195        self.format_time(writer, timestamp_nanos, &None)
2196    }
2197
2198    fn format_time_component(
2199        &self,
2200        timestamp_nanos: i64,
2201        time_format: TimeFormat,
2202        _timezone: &Option<Arc<str>>,
2203    ) -> Result<String> {
2204        // Convert nanoseconds to seconds and nanoseconds remainder
2205        let secs = timestamp_nanos / 1_000_000_000;
2206        let nanos = (timestamp_nanos % 1_000_000_000) as u32;
2207
2208        // Create DateTime from timestamp
2209        let dt = DateTime::<Utc>::from_timestamp(secs, nanos).ok_or_else(|| {
2210            exec_datafusion_err!("Invalid timestamp: {}", timestamp_nanos)
2211        })?;
2212
2213        match time_format {
2214            TimeFormat::HUpper => Ok(format!("{:02}", dt.hour())),
2215            TimeFormat::IUpper => {
2216                let hour_12 = match dt.hour12() {
2217                    (true, h) => h,  // PM
2218                    (false, h) => h, // AM
2219                };
2220                Ok(format!("{hour_12:02}"))
2221            }
2222            TimeFormat::KLower => Ok(format!("{}", dt.hour())),
2223            TimeFormat::LLower => {
2224                let hour_12 = match dt.hour12() {
2225                    (true, h) => h,  // PM
2226                    (false, h) => h, // AM
2227                };
2228                Ok(format!("{hour_12}"))
2229            }
2230            TimeFormat::MUpper => Ok(format!("{:02}", dt.minute())),
2231            TimeFormat::SUpper => Ok(format!("{:02}", dt.second())),
2232            TimeFormat::LUpper => Ok(format!("{:03}", dt.timestamp_millis() % 1000)),
2233            TimeFormat::NUpper => Ok(format!("{:09}", dt.nanosecond())),
2234            TimeFormat::PLower => {
2235                let (is_pm, _) = dt.hour12();
2236                Ok(if is_pm {
2237                    "pm".to_string()
2238                } else {
2239                    "am".to_string()
2240                })
2241            }
2242            TimeFormat::ZLower => Ok("+0000".to_string()), // UTC timezone offset
2243            TimeFormat::ZUpper => Ok("UTC".to_string()),   // UTC timezone name
2244            TimeFormat::SLower => Ok(format!("{}", dt.timestamp())),
2245            TimeFormat::QUpper => Ok(format!("{}", dt.timestamp_millis())),
2246            TimeFormat::BUpper => Ok(dt.format("%B").to_string()), // Full month name
2247            TimeFormat::BLower => Ok(dt.format("%b").to_string()), // Abbreviated month name
2248            TimeFormat::AUpper => Ok(dt.format("%A").to_string()), // Full weekday name
2249            TimeFormat::ALower => Ok(dt.format("%a").to_string()), // Abbreviated weekday name
2250            TimeFormat::CUpper => Ok(format!("{:02}", dt.year() / 100)),
2251            TimeFormat::YUpper => Ok(format!("{:04}", dt.year())),
2252            TimeFormat::YLower => Ok(format!("{:02}", dt.year() % 100)),
2253            TimeFormat::JLower => Ok(format!("{:03}", dt.ordinal())), // Day of year
2254            TimeFormat::MLower => Ok(format!("{:02}", dt.month())),
2255            TimeFormat::DLower => Ok(format!("{:02}", dt.day())),
2256            TimeFormat::ELower => Ok(format!("{}", dt.day())),
2257            TimeFormat::RUpper => Ok(dt.format("%H:%M").to_string()),
2258            TimeFormat::TUpper => Ok(dt.format("%H:%M:%S").to_string()),
2259            TimeFormat::RLower => {
2260                let (is_pm, hour_12) = dt.hour12();
2261                let am_pm = if is_pm { "PM" } else { "AM" };
2262                Ok(format!(
2263                    "{:02}:{:02}:{:02} {}",
2264                    hour_12,
2265                    dt.minute(),
2266                    dt.second(),
2267                    am_pm
2268                ))
2269            }
2270            TimeFormat::DUpper => Ok(dt.format("%m/%d/%y").to_string()),
2271            TimeFormat::FUpper => Ok(dt.format("%Y-%m-%d").to_string()),
2272            TimeFormat::CLower => Ok(dt.format("%a %b %d %H:%M:%S UTC %Y").to_string()),
2273        }
2274    }
2275}
2276
2277trait FloatFormattable: std::fmt::Display {
2278    fn category(&self) -> FpCategory;
2279
2280    fn spark_string(&self) -> String {
2281        match self.category() {
2282            FpCategory::Nan => "NaN".to_string(),
2283            FpCategory::Infinite => {
2284                if self.negative() {
2285                    "-Infinity".to_string()
2286                } else {
2287                    "Infinity".to_string()
2288                }
2289            }
2290            _ => self.to_string(),
2291        }
2292    }
2293    fn negative(&self) -> bool;
2294}
2295
2296impl FloatFormattable for f32 {
2297    fn category(&self) -> FpCategory {
2298        self.classify()
2299    }
2300
2301    fn negative(&self) -> bool {
2302        self.is_sign_negative()
2303    }
2304}
2305
2306impl FloatFormattable for f64 {
2307    fn category(&self) -> FpCategory {
2308        self.classify()
2309    }
2310
2311    fn negative(&self) -> bool {
2312        self.is_sign_negative()
2313    }
2314}
2315
2316trait FloatBits: FloatFormattable {
2317    const MANTISSA_BITS: u8;
2318    const EXPONENT_BIAS: u16;
2319    const SCALEUP_POWER: u8;
2320    const SCALEUP: Self;
2321
2322    fn to_parts(&self) -> (bool, u16, u64);
2323}
2324
2325impl FloatBits for f64 {
2326    const MANTISSA_BITS: u8 = 52;
2327    const EXPONENT_BIAS: u16 = 1023;
2328    const SCALEUP_POWER: u8 = 54;
2329    const SCALEUP: f64 = (1_i64 << Self::SCALEUP_POWER) as f64;
2330
2331    fn to_parts(&self) -> (bool, u16, u64) {
2332        let bits = self.to_bits();
2333        let sign: bool = (bits >> 63) == 1;
2334        let exponent = ((bits >> 52) & 0x7FF) as u16;
2335        let mantissa = bits & 0x000F_FFFF_FFFF_FFFF;
2336        (sign, exponent, mantissa)
2337    }
2338}
2339
2340fn trim_trailing_0s(number: &str) -> &str {
2341    if number.contains('.') {
2342        for (i, c) in number.chars().rev().enumerate() {
2343            if c != '0' {
2344                return &number[..number.len() - i];
2345            }
2346        }
2347    }
2348    number
2349}
2350
2351fn trim_trailing_0s_hex(number: &str) -> &str {
2352    for (i, c) in number.chars().rev().enumerate() {
2353        if c != '0' {
2354            return &number[..number.len() - i];
2355        }
2356    }
2357    number
2358}
2359
2360#[cfg(test)]
2361mod tests {
2362    use super::*;
2363    use arrow::datatypes::DataType::Utf8;
2364    use datafusion_common::Result;
2365
2366    #[test]
2367    fn test_format_string_nullability() -> Result<()> {
2368        let func = FormatStringFunc::new();
2369        let nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, true));
2370
2371        let out_nullable = func.return_field_from_args(ReturnFieldArgs {
2372            arg_fields: &[nullable_format],
2373            scalar_arguments: &[None],
2374        })?;
2375
2376        assert!(
2377            out_nullable.is_nullable(),
2378            "format_string(fmt, ...) should be nullable when fmt is nullable"
2379        );
2380        let non_nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, false));
2381
2382        let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
2383            arg_fields: &[non_nullable_format],
2384            scalar_arguments: &[None],
2385        })?;
2386
2387        assert!(
2388            !out_non_nullable.is_nullable(),
2389            "format_string(fmt, ...) should NOT be nullable when fmt is NOT nullable"
2390        );
2391
2392        Ok(())
2393    }
2394}
datafusion_spark/function/string/format_string.rs

datafusion_spark/function/string/
format_string.rs