datafusion_spark/function/string/
format_string.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::Write;
19use std::sync::Arc;
20
21use core::num::FpCategory;
22
23use arrow::{
24    array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
25    datatypes::{DataType, Field, FieldRef},
26};
27use bigdecimal::{
28    BigDecimal, ToPrimitive,
29    num_bigint::{BigInt, Sign},
30};
31use chrono::{DateTime, Datelike, Timelike, Utc};
32use datafusion_common::{
33    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
34};
35use datafusion_expr::{
36    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
37    TypeSignature, Volatility,
38};
39
40/// Spark-compatible `format_string` expression
41/// <https://spark.apache.org/docs/latest/api/sql/index.html#format_string>
42#[derive(Debug, PartialEq, Eq, Hash)]
43pub struct FormatStringFunc {
44    signature: Signature,
45    aliases: Vec<String>,
46}
47
48impl Default for FormatStringFunc {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl FormatStringFunc {
55    pub fn new() -> Self {
56        Self {
57            signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
58            aliases: vec![String::from("printf")],
59        }
60    }
61}
62
63impl ScalarUDFImpl for FormatStringFunc {
64    fn name(&self) -> &str {
65        "format_string"
66    }
67
68    fn aliases(&self) -> &[String] {
69        &self.aliases
70    }
71
72    fn signature(&self) -> &Signature {
73        &self.signature
74    }
75
76    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
77        datafusion_common::internal_err!(
78            "return_type should not be called, use return_field_from_args instead"
79        )
80    }
81
82    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
83        match args.arg_fields[0].data_type() {
84            DataType::Null => {
85                Ok(Arc::new(Field::new("format_string", DataType::Utf8, true)))
86            }
87            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
88                Ok(Arc::clone(&args.arg_fields[0]))
89            }
90            _ => exec_err!(
91                "format_string expects the first argument to be Utf8, LargeUtf8 or Utf8View, got {} instead",
92                args.arg_fields[0].data_type()
93            ),
94        }
95    }
96
97    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
98        let len = args.args.iter().find_map(|arg| match arg {
99            ColumnarValue::Scalar(_) => None,
100            ColumnarValue::Array(a) => Some(a.len()),
101        });
102        let is_scalar = len.is_none();
103        let data_types = args.args[1..]
104            .iter()
105            .map(|arg| arg.data_type())
106            .collect::<Vec<_>>();
107        let fmt_type = args.args[0].data_type();
108
109        match &args.args[0] {
110            ColumnarValue::Scalar(ScalarValue::Null) => {
111                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
112            }
113            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {
114                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
115            }
116            ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {
117                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
118            }
119            ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
120                Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
121            }
122            ColumnarValue::Scalar(ScalarValue::Utf8(Some(fmt)))
123            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(fmt)))
124            | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(fmt))) => {
125                let formatter = Formatter::parse(fmt, &data_types)?;
126                let mut result = Vec::with_capacity(len.unwrap_or(1));
127                for i in 0..len.unwrap_or(1) {
128                    let scalars = args.args[1..]
129                        .iter()
130                        .map(|arg| try_to_scalar(arg.clone(), i))
131                        .collect::<Result<Vec<_>>>()?;
132                    let formatted = formatter.format(&scalars)?;
133                    result.push(formatted);
134                }
135                if is_scalar {
136                    let scalar_result = result.pop().unwrap();
137                    match fmt_type {
138                        DataType::Utf8 => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
139                            Some(scalar_result),
140                        ))),
141                        DataType::LargeUtf8 => Ok(ColumnarValue::Scalar(
142                            ScalarValue::LargeUtf8(Some(scalar_result)),
143                        )),
144                        DataType::Utf8View => Ok(ColumnarValue::Scalar(
145                            ScalarValue::Utf8View(Some(scalar_result)),
146                        )),
147                        _ => unreachable!(),
148                    }
149                } else {
150                    let array: ArrayRef = match fmt_type {
151                        DataType::Utf8 => Arc::new(StringArray::from(result)),
152                        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
153                        DataType::Utf8View => Arc::new(StringViewArray::from(result)),
154                        _ => unreachable!(),
155                    };
156                    Ok(ColumnarValue::Array(array))
157                }
158            }
159            ColumnarValue::Array(fmts) => {
160                let mut result = Vec::with_capacity(len.unwrap());
161                for i in 0..len.unwrap() {
162                    let fmt = ScalarValue::try_from_array(fmts, i)?;
163                    match fmt.try_as_str() {
164                        Some(Some(fmt)) => {
165                            let formatter = Formatter::parse(fmt, &data_types)?;
166                            let scalars = args.args[1..]
167                                .iter()
168                                .map(|arg| try_to_scalar(arg.clone(), i))
169                                .collect::<Result<Vec<_>>>()?;
170                            let formatted = formatter.format(&scalars)?;
171                            result.push(Some(formatted));
172                        }
173                        Some(None) => {
174                            result.push(None);
175                        }
176                        _ => unreachable!(),
177                    }
178                }
179                let array: ArrayRef = match fmt_type {
180                    DataType::Utf8 => Arc::new(StringArray::from(result)),
181                    DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
182                    DataType::Utf8View => Arc::new(StringViewArray::from(result)),
183                    _ => unreachable!(),
184                };
185                Ok(ColumnarValue::Array(array))
186            }
187            _ => exec_err!(
188                "The format_string function expects the first argument to be a string"
189            ),
190        }
191    }
192}
193
194fn try_to_scalar(arg: ColumnarValue, index: usize) -> Result<ScalarValue> {
195    match arg {
196        ColumnarValue::Scalar(scalar) => Ok(scalar),
197        ColumnarValue::Array(array) => ScalarValue::try_from_array(&array, index),
198    }
199}
200
201/// Compatible with `java.util.Formatter`
202#[derive(Debug)]
203pub struct Formatter<'a> {
204    pub elements: Vec<FormatElement<'a>>,
205    pub arg_num: usize,
206}
207
208impl<'a> Formatter<'a> {
209    pub fn new(elements: Vec<FormatElement<'a>>) -> Self {
210        let arg_num = elements
211            .iter()
212            .map(|element| match element {
213                FormatElement::Format(spec) => spec.argument_index,
214                _ => 0,
215            })
216            .max()
217            .unwrap_or(0);
218        Self { elements, arg_num }
219    }
220
221    /// Parses a printf-style format string into a Formatter with validation.
222    ///
223    /// This method implements a comprehensive parser for Java `java.util.Formatter` syntax,
224    /// processing the format string character by character to identify and validate format
225    /// specifiers against the provided argument types.
226    ///
227    /// # Arguments
228    ///
229    /// * `fmt` - The format string containing literal text and format specifiers
230    /// * `arg_types` - Array of DataFusion DataTypes corresponding to the arguments
231    ///
232    /// # Parsing Process
233    ///
234    /// The parser operates in several phases:
235    ///
236    /// 1. **String Scanning**: Iterates through the format string looking for '%' characters
237    ///    that mark the beginning of format specifiers or special sequences.
238    ///
239    /// 2. **Special Sequence Handling**: Processes escape sequences:
240    ///    - `%%` becomes a literal '%' character
241    ///    - `%n` becomes a newline character
242    ///    - `%<` indicates reuse of the previous argument with a new format specifier
243    ///
244    /// 3. **Argument Index Resolution**: Determines which argument each format specifier refers to:
245    ///    - Sequential indexing: arguments are consumed in order (1, 2, 3, ...)
246    ///    - Positional indexing: explicit argument position using `%n$` syntax
247    ///    - Previous argument reuse: `%<` references the last used argument
248    ///
249    /// 4. **Format Specifier Parsing**: For each format specifier, extracts:
250    ///    - Flags (-, +, space, #, 0, ',', '(')
251    ///    - Width specification (minimum field width)
252    ///    - Precision specification (decimal places or maximum characters)
253    ///    - Conversion type (d, s, f, x, etc.)
254    ///
255    /// 5. **Type Validation**: Verifies that each format specifier's conversion type
256    ///    is compatible with the corresponding argument's DataType. For example:
257    ///    - Integer conversions (%d, %x, %o) require integer DataTypes
258    ///    - String conversions (%s, %S) accept any DataType
259    ///    - Float conversions (%f, %e, %g) require numeric DataTypes
260    ///
261    /// 6. **Element Construction**: Creates FormatElement instances for:
262    ///    - Verbatim text sections (copied directly to output)
263    ///    - Validated format specifiers with their parsed parameters
264    ///
265    /// # Internal State Management
266    ///
267    /// The parser maintains several state variables:
268    /// - `argument_index`: Tracks the current sequential argument position
269    /// - `prev`: Remembers the last used argument index for `%<` references
270    /// - `res`: Accumulates the parsed FormatElement instances
271    /// - `rem`: Points to the remaining unparsed portion of the format string
272    ///
273    /// # Validation and Error Handling
274    ///
275    /// The parser performs extensive validation including:
276    /// - Argument index bounds checking against the provided arg_types array
277    /// - Format specifier syntax validation
278    /// - Type compatibility verification between conversion types and DataTypes
279    /// - Detection of malformed numeric parameters and invalid flag combinations
280    ///
281    /// # Returns
282    ///
283    /// Returns a Formatter containing the parsed elements and the maximum argument
284    /// index encountered, enabling efficient argument validation during formatting.
285    pub fn parse(fmt: &'a str, arg_types: &[DataType]) -> Result<Self> {
286        // find the first %
287        let mut res = Vec::new();
288
289        let mut rem = fmt;
290        let mut argument_index = 0;
291
292        let mut prev: Option<usize> = None;
293
294        while !rem.is_empty() {
295            if let Some((verbatim_prefix, rest)) = rem.split_once('%') {
296                if !verbatim_prefix.is_empty() {
297                    res.push(FormatElement::Verbatim(verbatim_prefix));
298                }
299                if let Some(rest) = rest.strip_prefix('%') {
300                    res.push(FormatElement::Verbatim("%"));
301                    rem = rest;
302                    continue;
303                }
304                if let Some(rest) = rest.strip_prefix('n') {
305                    res.push(FormatElement::Verbatim("\n"));
306                    rem = rest;
307                    continue;
308                }
309                if let Some(rest) = rest.strip_prefix('<') {
310                    // %< means reuse the previous argument
311                    let Some(p) = prev else {
312                        return exec_err!("No previous argument to reference");
313                    };
314                    let (spec, rest) =
315                        take_conversion_specifier(rest, p, &arg_types[p - 1])?;
316                    res.push(FormatElement::Format(spec));
317                    rem = rest;
318                    continue;
319                }
320
321                let (current_argument_index, rest2) = take_numeric_param(rest, false);
322                let (current_argument_index, rest) =
323                    match (current_argument_index, rest2.starts_with('$')) {
324                        (NumericParam::Literal(index), true) => {
325                            (index as usize, &rest2[1..])
326                        }
327                        (NumericParam::FromArgument, true) => {
328                            return exec_err!("Invalid numeric parameter");
329                        }
330                        (_, false) => {
331                            argument_index += 1;
332                            (argument_index, rest)
333                        }
334                    };
335                if current_argument_index == 0 || current_argument_index > arg_types.len()
336                {
337                    return exec_err!(
338                        "Argument index {} is out of bounds",
339                        current_argument_index
340                    );
341                }
342
343                let (spec, rest) = take_conversion_specifier(
344                    rest,
345                    current_argument_index,
346                    &arg_types[current_argument_index - 1],
347                )
348                .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
349                res.push(FormatElement::Format(spec));
350                prev = Some(spec.argument_index);
351                rem = rest;
352            } else {
353                res.push(FormatElement::Verbatim(rem));
354                break;
355            }
356        }
357
358        Ok(Self::new(res))
359    }
360
361    pub fn format(&self, args: &[ScalarValue]) -> Result<String> {
362        if args.len() < self.arg_num {
363            return exec_err!(
364                "Expected at least {} arguments, got {}",
365                self.arg_num,
366                args.len()
367            );
368        }
369        let mut string = String::new();
370        for element in &self.elements {
371            match element {
372                FormatElement::Verbatim(text) => {
373                    string.push_str(text);
374                }
375                FormatElement::Format(spec) => {
376                    spec.format(&mut string, &args[spec.argument_index - 1])?;
377                }
378            }
379        }
380        Ok(string)
381    }
382}
383
384#[derive(Debug)]
385pub enum FormatElement<'a> {
386    /// Some characters that are copied to the output as-is
387    Verbatim(&'a str),
388    /// A format specifier
389    Format(ConversionSpecifier),
390}
391
392/// Parsed printf conversion specifier
393#[derive(Debug, Clone, Copy, PartialEq, Eq)]
394pub struct ConversionSpecifier {
395    pub argument_index: usize,
396    /// flag `#`: use `0x`, etc?
397    pub alt_form: bool,
398    /// flag `0`: left-pad with zeros?
399    pub zero_pad: bool,
400    /// flag `-`: left-adjust (pad with spaces on the right)
401    pub left_adj: bool,
402    /// flag `' '` (space): indicate sign with a space?
403    pub space_sign: bool,
404    /// flag `+`: Always show sign? (for signed numbers)
405    pub force_sign: bool,
406    /// flag `,`: include locale-specific grouping separators
407    pub grouping_separator: bool,
408    /// flag `(`: enclose negative numbers in parentheses
409    pub negative_in_parentheses: bool,
410    /// field width
411    pub width: NumericParam,
412    /// floating point field precision
413    pub precision: NumericParam,
414    /// data type
415    pub conversion_type: ConversionType,
416}
417
418/// Width / precision parameter
419#[derive(Debug, Clone, Copy, PartialEq, Eq)]
420pub enum NumericParam {
421    /// The literal width
422    Literal(i32),
423    /// Get the width from the previous argument
424    FromArgument,
425}
426
427/// Printf data type
428#[derive(Debug, Clone, Copy, PartialEq, Eq)]
429pub enum ConversionType {
430    /// `B`
431    BooleanUpper,
432    /// `b`
433    BooleanLower,
434    /// Not implemented yet. Can be implemented after <https://github.com/apache/datafusion/pull/17093> is merged
435    /// `h`
436    HexHashLower,
437    /// `H`
438    HexHashUpper,
439    /// `d`
440    DecInt,
441    /// `o`
442    OctInt,
443    /// `x`
444    HexIntLower,
445    /// `X`
446    HexIntUpper,
447    /// `e`
448    SciFloatLower,
449    /// `E`
450    SciFloatUpper,
451    /// `f`
452    DecFloatLower,
453    /// `g`
454    CompactFloatLower,
455    /// `G`
456    CompactFloatUpper,
457    /// `a`
458    HexFloatLower,
459    /// `A`
460    HexFloatUpper,
461    /// `t`
462    TimeLower(TimeFormat),
463    /// `T`
464    TimeUpper(TimeFormat),
465    /// `c`
466    CharLower,
467    /// `C`
468    CharUpper,
469    /// `s`
470    StringLower,
471    /// `S`
472    StringUpper,
473}
474
475#[derive(Debug, Clone, Copy, PartialEq, Eq)]
476pub enum TimeFormat {
477    // Hour of the day for the 24-hour clock,
478    // formatted as two digits with a leading zero as necessary i.e. 00 - 23. 00 corresponds to midnight.
479    HUpper,
480    // Hour for the 12-hour clock,
481    // formatted as two digits with a leading zero as necessary, i.e. 01 - 12. 01 corresponds to one o'clock (either morning or afternoon).
482    IUpper,
483    // Hour of the day for the 24-hour clock,
484    // i.e. 0 - 23. 0 corresponds to midnight.
485    KLower,
486    // Hour for the 12-hour clock,
487    // i.e. 1 - 12. 1 corresponds to one o'clock (either morning or afternoon).
488    LLower,
489    // Minute within the hour formatted as two digits with a leading zero as necessary, i.e. 00 - 59.
490    MUpper,
491    // Seconds within the minute, formatted as two digits with a leading zero as necessary,
492    // i.e. 00 - 60 ("60" is a special value required to support leap seconds).
493    SUpper,
494    // Millisecond within the second formatted as three digits with leading zeros as necessary, i.e. 000 - 999.
495    LUpper,
496    // Nanosecond within the second, formatted as nine digits with leading zeros as necessary,
497    // i.e. 000000000 - 999999999. The precision of this value is limited by the resolution of the underlying operating system or hardware.
498    NUpper,
499    // Locale-specific morning or afternoon marker in lower case, e.g."am" or "pm".
500    // Use of the conversion prefix 'T' forces this output to upper case. (Note that 'p' produces lower-case output.
501    // This is different from GNU date and POSIX strftime(3c) which produce upper-case output.)
502    PLower,
503    // RFC 822 style numeric time zone offset from GMT,
504    // e.g. -0800. This value will be adjusted as necessary for Daylight Saving Time.
505    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
506    ZLower,
507    // A string representing the abbreviation for the time zone. This value will be adjusted as necessary for Daylight Saving Time.
508    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
509    // The Formatter's locale will supersede the locale of the argument (if any).
510    ZUpper,
511    // Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
512    // i.e. Long.MIN_VALUE/1000 to Long.MAX_VALUE/1000.
513    SLower,
514    // Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
515    // i.e. Long.MIN_VALUE to Long.MAX_VALUE. The precision of this value is limited by the resolution of the underlying operating system or hardware.
516    QUpper,
517    // Locale-specific full month name, e.g. "January", "February".
518    BUpper,
519    // Locale-specific abbreviated month name, e.g. "Jan", "Feb".
520    BLower,
521    // Locale-specific full weekday name, e.g. "Monday", "Tuesday".
522    AUpper,
523    // Locale-specific abbreviated weekday name, e.g. "Mon", "Tue".
524    ALower,
525    // Four-digit year divided by 100, formatted as two digits with leading zero as necessary, i.e. 00 - 99
526    CUpper,
527    // Year, formatted to at least four digits with leading zeros as necessary, e.g. 0092 equals 92 CE for the Gregorian calendar.
528    YUpper,
529    // Last two digits of the year, formatted with leading zeros as necessary, i.e. 00 - 99.
530    YLower,
531    // Day of year, formatted as three digits with leading zeros as necessary, e.g. 001 - 366 for the Gregorian calendar. 001 corresponds to the first day of the year.
532    JLower,
533    // Month, formatted as two digits with leading zeros as necessary, i.e. 01 - 13, where "01" is the first month of the year and ("13" is a special value required to support lunar calendars).
534    MLower,
535    // Day of month, formatted as two digits with leading zeros as necessary, i.e. 01 - 31, where "01" is the first day of the month.
536    DLower,
537    // Day of month, formatted as two digits, i.e. 1 - 31 where "1" is the first day of the month.
538    ELower,
539    // Time formatted for the 24-hour clock as "%tH:%tM"
540    RUpper,
541    // Time formatted for the 24-hour clock as "%tH:%tM:%tS"
542    TUpper,
543    // Time formatted for the 12-hour clock as "%tI:%tM:%tS %Tp". The location of the morning or afternoon marker ('%Tp') may be locale-dependent.
544    RLower,
545    // Date formatted as "%tm/%td/%ty"
546    DUpper,
547    // ISO 8601 complete date formatted as "%tY-%tm-%td"
548    FUpper,
549    // Date and time formatted as "%ta %tb %td %tT %tZ %tY", e.g. "Sun Jul 20 16:17:00 EDT 1969"
550    CLower,
551}
552
553impl TryFrom<char> for TimeFormat {
554    type Error = DataFusionError;
555    fn try_from(value: char) -> Result<Self, Self::Error> {
556        match value {
557            'H' => Ok(TimeFormat::HUpper),
558            'I' => Ok(TimeFormat::IUpper),
559            'k' => Ok(TimeFormat::KLower),
560            'l' => Ok(TimeFormat::LLower),
561            'M' => Ok(TimeFormat::MUpper),
562            'S' => Ok(TimeFormat::SUpper),
563            'L' => Ok(TimeFormat::LUpper),
564            'N' => Ok(TimeFormat::NUpper),
565            'p' => Ok(TimeFormat::PLower),
566            'z' => Ok(TimeFormat::ZLower),
567            'Z' => Ok(TimeFormat::ZUpper),
568            's' => Ok(TimeFormat::SLower),
569            'Q' => Ok(TimeFormat::QUpper),
570            'B' => Ok(TimeFormat::BUpper),
571            'b' | 'h' => Ok(TimeFormat::BLower),
572            'A' => Ok(TimeFormat::AUpper),
573            'a' => Ok(TimeFormat::ALower),
574            'C' => Ok(TimeFormat::CUpper),
575            'Y' => Ok(TimeFormat::YUpper),
576            'y' => Ok(TimeFormat::YLower),
577            'j' => Ok(TimeFormat::JLower),
578            'm' => Ok(TimeFormat::MLower),
579            'd' => Ok(TimeFormat::DLower),
580            'e' => Ok(TimeFormat::ELower),
581            'R' => Ok(TimeFormat::RUpper),
582            'T' => Ok(TimeFormat::TUpper),
583            'r' => Ok(TimeFormat::RLower),
584            'D' => Ok(TimeFormat::DUpper),
585            'F' => Ok(TimeFormat::FUpper),
586            'c' => Ok(TimeFormat::CLower),
587            _ => exec_err!("Invalid time format: {}", value),
588        }
589    }
590}
591
592impl ConversionType {
593    pub fn validate(&self, arg_type: &DataType) -> Result<()> {
594        match self {
595            ConversionType::BooleanLower | ConversionType::BooleanUpper
596                if *arg_type != DataType::Boolean =>
597            {
598                return exec_err!(
599                    "Invalid argument type for boolean conversion: {:?}",
600                    arg_type
601                );
602            }
603            ConversionType::CharLower | ConversionType::CharUpper
604                if !matches!(
605                    arg_type,
606                    DataType::Int8
607                        | DataType::UInt8
608                        | DataType::Int16
609                        | DataType::UInt16
610                        | DataType::Int32
611                        | DataType::UInt32
612                        | DataType::Int64
613                        | DataType::UInt64
614                ) =>
615            {
616                return exec_err!(
617                    "Invalid argument type for char conversion: {:?}",
618                    arg_type
619                );
620            }
621            ConversionType::DecInt
622            | ConversionType::OctInt
623            | ConversionType::HexIntLower
624            | ConversionType::HexIntUpper
625                if !arg_type.is_integer() =>
626            {
627                return exec_err!(
628                    "Invalid argument type for integer conversion: {:?}",
629                    arg_type
630                );
631            }
632            ConversionType::SciFloatLower
633            | ConversionType::SciFloatUpper
634            | ConversionType::DecFloatLower
635            | ConversionType::CompactFloatLower
636            | ConversionType::CompactFloatUpper
637            | ConversionType::HexFloatLower
638            | ConversionType::HexFloatUpper
639                if !arg_type.is_numeric() =>
640            {
641                return exec_err!(
642                    "Invalid argument type for float conversion: {:?}",
643                    arg_type
644                );
645            }
646            ConversionType::TimeLower(_) | ConversionType::TimeUpper(_)
647                if !arg_type.is_temporal() =>
648            {
649                return exec_err!(
650                    "Invalid argument type for time conversion: {:?}",
651                    arg_type
652                );
653            }
654            _ => {}
655        }
656        Ok(())
657    }
658
659    fn supports_integer(&self) -> bool {
660        matches!(
661            self,
662            ConversionType::DecInt
663                | ConversionType::HexIntLower
664                | ConversionType::HexIntUpper
665                | ConversionType::OctInt
666                | ConversionType::CharLower
667                | ConversionType::CharUpper
668                | ConversionType::StringLower
669                | ConversionType::StringUpper
670        )
671    }
672
673    fn supports_float(&self) -> bool {
674        matches!(
675            self,
676            ConversionType::DecFloatLower
677                | ConversionType::SciFloatLower
678                | ConversionType::SciFloatUpper
679                | ConversionType::CompactFloatLower
680                | ConversionType::CompactFloatUpper
681                | ConversionType::StringLower
682                | ConversionType::StringUpper
683                | ConversionType::HexFloatLower
684                | ConversionType::HexFloatUpper
685        )
686    }
687
688    fn supports_decimal(&self) -> bool {
689        matches!(
690            self,
691            ConversionType::DecFloatLower
692                | ConversionType::SciFloatLower
693                | ConversionType::SciFloatUpper
694                | ConversionType::CompactFloatLower
695                | ConversionType::CompactFloatUpper
696                | ConversionType::StringLower
697                | ConversionType::StringUpper
698        )
699    }
700
701    fn supports_time(&self) -> bool {
702        matches!(
703            self,
704            ConversionType::TimeLower(_)
705                | ConversionType::TimeUpper(_)
706                | ConversionType::StringLower
707                | ConversionType::StringUpper
708        )
709    }
710
711    fn is_upper(&self) -> bool {
712        matches!(
713            self,
714            ConversionType::BooleanUpper
715                | ConversionType::HexHashUpper
716                | ConversionType::HexIntUpper
717                | ConversionType::SciFloatUpper
718                | ConversionType::CompactFloatUpper
719                | ConversionType::HexFloatUpper
720                | ConversionType::TimeUpper(_)
721                | ConversionType::CharUpper
722                | ConversionType::StringUpper
723        )
724    }
725}
726
727fn take_conversion_specifier<'a>(
728    mut s: &'a str,
729    argument_index: usize,
730    arg_type: &DataType,
731) -> Result<(ConversionSpecifier, &'a str)> {
732    let mut spec = ConversionSpecifier {
733        argument_index,
734        alt_form: false,
735        zero_pad: false,
736        left_adj: false,
737        space_sign: false,
738        force_sign: false,
739        grouping_separator: false,
740        negative_in_parentheses: false,
741        width: NumericParam::Literal(0),
742        precision: NumericParam::FromArgument, // Placeholder - must not be returned!
743        // ignore length modifier
744        conversion_type: ConversionType::DecInt,
745    };
746
747    // parse flags
748    loop {
749        match s.chars().next() {
750            Some('#') => {
751                spec.alt_form = true;
752            }
753            Some('0') => {
754                if spec.left_adj {
755                    return exec_err!("Invalid flag combination: '0' and '-'");
756                }
757                spec.zero_pad = true;
758            }
759            Some('-') => {
760                spec.left_adj = true;
761            }
762            Some(' ') => {
763                if spec.force_sign {
764                    return exec_err!("Invalid flag combination: '+' and ' '");
765                }
766                spec.space_sign = true;
767            }
768            Some('+') => {
769                if spec.space_sign {
770                    return exec_err!("Invalid flag combination: '+' and ' '");
771                }
772                spec.force_sign = true;
773            }
774            Some(',') => {
775                spec.grouping_separator = true;
776            }
777            Some('(') => {
778                spec.negative_in_parentheses = true;
779            }
780            _ => {
781                break;
782            }
783        }
784        s = &s[1..];
785    }
786    // parse width
787    let (w, mut s) = take_numeric_param(s, false);
788    spec.width = w;
789    // parse precision
790    if matches!(s.chars().next(), Some('.')) {
791        s = &s[1..];
792        let (p, s2) = take_numeric_param(s, true);
793        spec.precision = p;
794        s = s2;
795    }
796    let mut chars = s.chars();
797    let mut offset = 1;
798    // parse conversion type
799    spec.conversion_type = match chars.next() {
800        Some('b') => ConversionType::BooleanLower,
801        Some('B') => ConversionType::BooleanUpper,
802        Some('h') => ConversionType::HexHashLower,
803        Some('H') => ConversionType::HexHashUpper,
804        Some('s') => ConversionType::StringLower,
805        Some('S') => ConversionType::StringUpper,
806        Some('c') => ConversionType::CharLower,
807        Some('C') => ConversionType::CharUpper,
808        Some('d') => ConversionType::DecInt,
809        Some('o') => ConversionType::OctInt,
810        Some('x') => ConversionType::HexIntLower,
811        Some('X') => ConversionType::HexIntUpper,
812        Some('e') => ConversionType::SciFloatLower,
813        Some('E') => ConversionType::SciFloatUpper,
814        Some('f') => ConversionType::DecFloatLower,
815        Some('g') => ConversionType::CompactFloatLower,
816        Some('G') => ConversionType::CompactFloatUpper,
817        Some('a') => ConversionType::HexFloatLower,
818        Some('A') => ConversionType::HexFloatUpper,
819        Some('t') => {
820            let Some(chr) = chars.next() else {
821                return exec_err!("Invalid time format: {}", s);
822            };
823            offset += 1;
824            ConversionType::TimeLower(chr.try_into()?)
825        }
826        Some('T') => {
827            let Some(chr) = chars.next() else {
828                return exec_err!("Invalid time format: {}", s);
829            };
830            offset += 1;
831            ConversionType::TimeUpper(chr.try_into()?)
832        }
833        chr => {
834            return plan_err!("Invalid conversion type: {:?}", chr);
835        }
836    };
837
838    spec.conversion_type.validate(arg_type)?;
839    Ok((spec, &s[offset..]))
840}
841
842fn take_numeric_param(s: &str, zero: bool) -> (NumericParam, &str) {
843    match s.chars().next() {
844        Some(digit) if (if zero { '0'..='9' } else { '1'..='9' }).contains(&digit) => {
845            let mut s = s;
846            let mut w = 0;
847            loop {
848                match s.chars().next() {
849                    Some(digit) if digit.is_ascii_digit() => {
850                        w = 10 * w + (digit as i32 - '0' as i32);
851                    }
852                    _ => {
853                        break;
854                    }
855                }
856                s = &s[1..];
857            }
858            (NumericParam::Literal(w), s)
859        }
860        _ => (NumericParam::FromArgument, s),
861    }
862}
863
864/// Convert a `u32` to a [`char`] for the `%c` conversion. Returns an error if
865/// the value is not a valid Unicode scalar value (i.e. is in the surrogate
866/// range `0xD800..=0xDFFF` or above `0x10FFFF`).
867fn codepoint_to_char(value: u32) -> Result<char> {
868    char::from_u32(value).ok_or_else(|| {
869        exec_datafusion_err!("invalid Unicode scalar value for %c: {value:#x}")
870    })
871}
872
873/// Convert a signed integer to a [`char`] for the `%c` conversion. Returns an
874/// error if the value is negative or is not a valid Unicode scalar value (i.e.
875/// is in the surrogate range `0xD800..=0xDFFF` or above `0x10FFFF`).
876fn signed_to_char(value: i64) -> Result<char> {
877    let codepoint = u32::try_from(value).map_err(|_| {
878        exec_datafusion_err!("invalid Unicode scalar value for %c: {value}")
879    })?;
880    codepoint_to_char(codepoint)
881}
882
883/// Convert an unsigned integer to a [`char`] for the `%c` conversion. Returns
884/// an error if the value does not fit in a `u32` or is not a valid Unicode
885/// scalar value (i.e. is in the surrogate range `0xD800..=0xDFFF` or above
886/// `0x10FFFF`).
887fn unsigned_to_char(value: u64) -> Result<char> {
888    let codepoint = u32::try_from(value).map_err(|_| {
889        exec_datafusion_err!("invalid Unicode scalar value for %c: {value:#x}")
890    })?;
891    codepoint_to_char(codepoint)
892}
893
894/// Convert a non-null integer scalar to a [`char`] for the `%c` conversion.
895fn integer_scalar_to_char(scalar: &ScalarValue) -> Result<char> {
896    match scalar {
897        ScalarValue::Int8(Some(value)) => signed_to_char(*value as i64),
898        ScalarValue::Int16(Some(value)) => signed_to_char(*value as i64),
899        ScalarValue::Int32(Some(value)) => signed_to_char(*value as i64),
900        ScalarValue::Int64(Some(value)) => signed_to_char(*value),
901        ScalarValue::UInt8(Some(value)) => unsigned_to_char(*value as u64),
902        ScalarValue::UInt16(Some(value)) => unsigned_to_char(*value as u64),
903        ScalarValue::UInt32(Some(value)) => unsigned_to_char(*value as u64),
904        ScalarValue::UInt64(Some(value)) => unsigned_to_char(*value),
905        _ => datafusion_common::internal_err!(
906            "integer_scalar_to_char expects a non-null integer scalar, got {scalar:?}"
907        ),
908    }
909}
910
911impl ConversionSpecifier {
912    /// Validates that the grouping separator flag is not used with scientific
913    /// notation conversions, matching Java/Spark behavior which throws
914    /// `FormatFlagsConversionMismatchException` for `%,e` / `%,E`.
915    fn validate_grouping_separator(&self) -> Result<()> {
916        if self.grouping_separator
917            && matches!(
918                self.conversion_type,
919                ConversionType::SciFloatLower | ConversionType::SciFloatUpper
920            )
921        {
922            return exec_err!(
923                "Grouping separator ',' flag is not compatible with scientific notation conversion '{}'",
924                if self.conversion_type == ConversionType::SciFloatUpper {
925                    'E'
926                } else {
927                    'e'
928                }
929            );
930        }
931        Ok(())
932    }
933
934    pub fn format(&self, string: &mut String, value: &ScalarValue) -> Result<()> {
935        match value {
936            ScalarValue::Boolean(value) => match self.conversion_type {
937                ConversionType::StringLower | ConversionType::StringUpper => {
938                    self.format_string(string, &value.unwrap_or(false).to_string())
939                }
940
941                _ => self.format_boolean(string, value),
942            },
943            ScalarValue::Int8(Some(_))
944            | ScalarValue::Int16(Some(_))
945            | ScalarValue::Int32(Some(_))
946            | ScalarValue::Int64(Some(_))
947            | ScalarValue::UInt8(Some(_))
948            | ScalarValue::UInt16(Some(_))
949            | ScalarValue::UInt32(Some(_))
950            | ScalarValue::UInt64(Some(_))
951                if matches!(
952                    self.conversion_type,
953                    ConversionType::CharLower | ConversionType::CharUpper
954                ) =>
955            {
956                self.format_char(string, integer_scalar_to_char(value)?)
957            }
958            ScalarValue::Int8(value) => match (self.conversion_type, value) {
959                (ConversionType::DecInt, Some(value)) => {
960                    self.format_signed(string, *value as i64)
961                }
962                (
963                    ConversionType::HexIntLower
964                    | ConversionType::HexIntUpper
965                    | ConversionType::OctInt,
966                    Some(value),
967                ) => self.format_unsigned(string, (*value as u8) as u64),
968                (
969                    ConversionType::StringLower | ConversionType::StringUpper,
970                    Some(value),
971                ) => self.format_string(string, &value.to_string()),
972                (t, None) if t.supports_integer() => self.format_string(string, "null"),
973                _ => {
974                    exec_err!(
975                        "Invalid conversion type: {:?} for Int8",
976                        self.conversion_type
977                    )
978                }
979            },
980            ScalarValue::Int16(value) => match (self.conversion_type, value) {
981                (ConversionType::DecInt, Some(value)) => {
982                    self.format_signed(string, *value as i64)
983                }
984                (
985                    ConversionType::HexIntLower
986                    | ConversionType::HexIntUpper
987                    | ConversionType::OctInt,
988                    Some(value),
989                ) => self.format_unsigned(string, (*value as u16) as u64),
990                (
991                    ConversionType::StringLower | ConversionType::StringUpper,
992                    Some(value),
993                ) => self.format_string(string, &value.to_string()),
994                (t, None) if t.supports_integer() => self.format_string(string, "null"),
995                _ => {
996                    exec_err!(
997                        "Invalid conversion type: {:?} for Int16",
998                        self.conversion_type
999                    )
1000                }
1001            },
1002            ScalarValue::Int32(value) => match (self.conversion_type, value) {
1003                (ConversionType::DecInt, Some(value)) => {
1004                    self.format_signed(string, *value as i64)
1005                }
1006                (
1007                    ConversionType::HexIntLower
1008                    | ConversionType::HexIntUpper
1009                    | ConversionType::OctInt,
1010                    Some(value),
1011                ) => self.format_unsigned(string, (*value as u32) as u64),
1012                (
1013                    ConversionType::StringLower | ConversionType::StringUpper,
1014                    Some(value),
1015                ) => self.format_string(string, &value.to_string()),
1016                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1017                _ => {
1018                    exec_err!(
1019                        "Invalid conversion type: {:?} for Int32",
1020                        self.conversion_type
1021                    )
1022                }
1023            },
1024            ScalarValue::Int64(value) => match (self.conversion_type, value) {
1025                (ConversionType::DecInt, Some(value)) => {
1026                    self.format_signed(string, *value)
1027                }
1028                (
1029                    ConversionType::HexIntLower
1030                    | ConversionType::HexIntUpper
1031                    | ConversionType::OctInt,
1032                    Some(value),
1033                ) => self.format_unsigned(string, *value as u64),
1034                (
1035                    ConversionType::StringLower | ConversionType::StringUpper,
1036                    Some(value),
1037                ) => self.format_string(string, &value.to_string()),
1038                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1039                _ => {
1040                    exec_err!(
1041                        "Invalid conversion type: {:?} for Int64",
1042                        self.conversion_type
1043                    )
1044                }
1045            },
1046            ScalarValue::UInt8(value) => match (self.conversion_type, value) {
1047                (
1048                    ConversionType::DecInt
1049                    | ConversionType::HexIntLower
1050                    | ConversionType::HexIntUpper
1051                    | ConversionType::OctInt,
1052                    Some(value),
1053                ) => self.format_unsigned(string, *value as u64),
1054                (
1055                    ConversionType::StringLower | ConversionType::StringUpper,
1056                    Some(value),
1057                ) => self.format_string(string, &value.to_string()),
1058                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1059                _ => {
1060                    exec_err!(
1061                        "Invalid conversion type: {:?} for UInt8",
1062                        self.conversion_type
1063                    )
1064                }
1065            },
1066            ScalarValue::UInt16(value) => match (self.conversion_type, value) {
1067                (
1068                    ConversionType::DecInt
1069                    | ConversionType::HexIntLower
1070                    | ConversionType::HexIntUpper
1071                    | ConversionType::OctInt,
1072                    Some(value),
1073                ) => self.format_unsigned(string, *value as u64),
1074                (
1075                    ConversionType::StringLower | ConversionType::StringUpper,
1076                    Some(value),
1077                ) => self.format_string(string, &value.to_string()),
1078                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1079                _ => {
1080                    exec_err!(
1081                        "Invalid conversion type: {:?} for UInt16",
1082                        self.conversion_type
1083                    )
1084                }
1085            },
1086            ScalarValue::UInt32(value) => match (self.conversion_type, value) {
1087                (
1088                    ConversionType::DecInt
1089                    | ConversionType::HexIntLower
1090                    | ConversionType::HexIntUpper
1091                    | ConversionType::OctInt,
1092                    Some(value),
1093                ) => self.format_unsigned(string, *value as u64),
1094                (
1095                    ConversionType::StringLower | ConversionType::StringUpper,
1096                    Some(value),
1097                ) => self.format_string(string, &value.to_string()),
1098                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1099                _ => {
1100                    exec_err!(
1101                        "Invalid conversion type: {:?} for UInt32",
1102                        self.conversion_type
1103                    )
1104                }
1105            },
1106            ScalarValue::UInt64(value) => match (self.conversion_type, value) {
1107                (
1108                    ConversionType::DecInt
1109                    | ConversionType::HexIntLower
1110                    | ConversionType::HexIntUpper
1111                    | ConversionType::OctInt,
1112                    Some(value),
1113                ) => self.format_unsigned(string, *value),
1114                (
1115                    ConversionType::StringLower | ConversionType::StringUpper,
1116                    Some(value),
1117                ) => self.format_string(string, &value.to_string()),
1118                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1119                _ => {
1120                    exec_err!(
1121                        "Invalid conversion type: {:?} for UInt64",
1122                        self.conversion_type
1123                    )
1124                }
1125            },
1126            ScalarValue::Float16(value) => match (self.conversion_type, value) {
1127                (
1128                    ConversionType::DecFloatLower
1129                    | ConversionType::SciFloatLower
1130                    | ConversionType::SciFloatUpper
1131                    | ConversionType::CompactFloatLower
1132                    | ConversionType::CompactFloatUpper,
1133                    Some(value),
1134                ) => self.format_float(string, value.to_f64().unwrap()),
1135                (
1136                    ConversionType::StringLower | ConversionType::StringUpper,
1137                    Some(value),
1138                ) => self.format_string(string, &value.to_f32().unwrap().spark_string()),
1139                (
1140                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1141                    Some(value),
1142                ) => self.format_hex_float(string, value.to_f64().unwrap()),
1143                (t, None) if t.supports_float() => self.format_string(string, "null"),
1144                _ => {
1145                    exec_err!(
1146                        "Invalid conversion type: {:?} for Float16",
1147                        self.conversion_type
1148                    )
1149                }
1150            },
1151            ScalarValue::Float32(value) => match (self.conversion_type, value) {
1152                (
1153                    ConversionType::DecFloatLower
1154                    | ConversionType::SciFloatLower
1155                    | ConversionType::SciFloatUpper
1156                    | ConversionType::CompactFloatLower
1157                    | ConversionType::CompactFloatUpper,
1158                    Some(value),
1159                ) => self.format_float(string, *value as f64),
1160                (
1161                    ConversionType::StringLower | ConversionType::StringUpper,
1162                    Some(value),
1163                ) => self.format_string(string, &value.spark_string()),
1164                (
1165                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1166                    Some(value),
1167                ) => self.format_hex_float(string, *value as f64),
1168                (t, None) if t.supports_float() => self.format_string(string, "null"),
1169                _ => {
1170                    exec_err!(
1171                        "Invalid conversion type: {:?} for Float32",
1172                        self.conversion_type
1173                    )
1174                }
1175            },
1176            ScalarValue::Float64(value) => match (self.conversion_type, value) {
1177                (
1178                    ConversionType::DecFloatLower
1179                    | ConversionType::SciFloatLower
1180                    | ConversionType::SciFloatUpper
1181                    | ConversionType::CompactFloatLower
1182                    | ConversionType::CompactFloatUpper,
1183                    Some(value),
1184                ) => self.format_float(string, *value),
1185                (
1186                    ConversionType::StringLower | ConversionType::StringUpper,
1187                    Some(value),
1188                ) => self.format_string(string, &value.spark_string()),
1189                (
1190                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1191                    Some(value),
1192                ) => self.format_hex_float(string, *value),
1193                (t, None) if t.supports_float() => self.format_string(string, "null"),
1194                _ => {
1195                    exec_err!(
1196                        "Invalid conversion type: {:?} for Float64",
1197                        self.conversion_type
1198                    )
1199                }
1200            },
1201            ScalarValue::Utf8(value) => {
1202                let value: &str = match value {
1203                    Some(value) => value.as_str(),
1204                    None => "null",
1205                };
1206                if matches!(
1207                    self.conversion_type,
1208                    ConversionType::StringLower | ConversionType::StringUpper
1209                ) {
1210                    self.format_string(string, value)
1211                } else {
1212                    exec_err!(
1213                        "Invalid conversion type: {:?} for Utf8",
1214                        self.conversion_type
1215                    )
1216                }
1217            }
1218            ScalarValue::LargeUtf8(value) => {
1219                let value: &str = match value {
1220                    Some(value) => value.as_str(),
1221                    None => "null",
1222                };
1223                if matches!(
1224                    self.conversion_type,
1225                    ConversionType::StringLower | ConversionType::StringUpper
1226                ) {
1227                    self.format_string(string, value)
1228                } else {
1229                    exec_err!(
1230                        "Invalid conversion type: {:?} for LargeUtf8",
1231                        self.conversion_type
1232                    )
1233                }
1234            }
1235            ScalarValue::Utf8View(value) => {
1236                let value: &str = match value {
1237                    Some(value) => value.as_str(),
1238                    None => "null",
1239                };
1240                self.format_string(string, value)
1241            }
1242            ScalarValue::Decimal128(value, _, scale) => {
1243                match (self.conversion_type, value) {
1244                    (
1245                        ConversionType::DecFloatLower
1246                        | ConversionType::SciFloatLower
1247                        | ConversionType::SciFloatUpper
1248                        | ConversionType::CompactFloatLower
1249                        | ConversionType::CompactFloatUpper,
1250                        Some(value),
1251                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1252                    (
1253                        ConversionType::StringLower | ConversionType::StringUpper,
1254                        Some(value),
1255                    ) => self.format_string(string, &value.to_string()),
1256                    (t, None) if t.supports_decimal() => {
1257                        self.format_string(string, "null")
1258                    }
1259
1260                    _ => {
1261                        exec_err!(
1262                            "Invalid conversion type: {:?} for Decimal128",
1263                            self.conversion_type
1264                        )
1265                    }
1266                }
1267            }
1268            ScalarValue::Decimal256(value, _, scale) => {
1269                match (self.conversion_type, value) {
1270                    (
1271                        ConversionType::DecFloatLower
1272                        | ConversionType::SciFloatLower
1273                        | ConversionType::SciFloatUpper
1274                        | ConversionType::CompactFloatLower
1275                        | ConversionType::CompactFloatUpper,
1276                        Some(value),
1277                    ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1278                    (
1279                        ConversionType::StringLower | ConversionType::StringUpper,
1280                        Some(value),
1281                    ) => self.format_string(string, &value.to_string()),
1282                    (t, None) if t.supports_decimal() => {
1283                        self.format_string(string, "null")
1284                    }
1285
1286                    _ => {
1287                        exec_err!(
1288                            "Invalid conversion type: {:?} for Decimal256",
1289                            self.conversion_type
1290                        )
1291                    }
1292                }
1293            }
1294
1295            ScalarValue::Time32Second(value) => match (self.conversion_type, value) {
1296                (
1297                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1298                    Some(value),
1299                ) => self.format_time(string, *value as i64 * 1000000000, &None),
1300                (
1301                    ConversionType::StringLower | ConversionType::StringUpper,
1302                    Some(value),
1303                ) => self.format_string(string, &value.to_string()),
1304                (t, None) if t.supports_time() => self.format_string(string, "null"),
1305                _ => {
1306                    exec_err!(
1307                        "Invalid conversion type: {:?} for Time32Second",
1308                        self.conversion_type
1309                    )
1310                }
1311            },
1312            ScalarValue::Time32Millisecond(value) => {
1313                match (self.conversion_type, value) {
1314                    (
1315                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1316                        Some(value),
1317                    ) => self.format_time(string, *value as i64 * 1000000, &None),
1318                    (
1319                        ConversionType::StringLower | ConversionType::StringUpper,
1320                        Some(value),
1321                    ) => self.format_string(string, &value.to_string()),
1322                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1323                    _ => {
1324                        exec_err!(
1325                            "Invalid conversion type: {:?} for Time32Millisecond",
1326                            self.conversion_type
1327                        )
1328                    }
1329                }
1330            }
1331            ScalarValue::Time64Microsecond(value) => {
1332                match (self.conversion_type, value) {
1333                    (
1334                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1335                        Some(value),
1336                    ) => self.format_time(string, *value * 1000, &None),
1337                    (
1338                        ConversionType::StringLower | ConversionType::StringUpper,
1339                        Some(value),
1340                    ) => self.format_string(string, &value.to_string()),
1341                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1342                    _ => {
1343                        exec_err!(
1344                            "Invalid conversion type: {:?} for Time64Microsecond",
1345                            self.conversion_type
1346                        )
1347                    }
1348                }
1349            }
1350            ScalarValue::Time64Nanosecond(value) => match (self.conversion_type, value) {
1351                (
1352                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1353                    Some(value),
1354                ) => self.format_time(string, *value, &None),
1355                (
1356                    ConversionType::StringLower | ConversionType::StringUpper,
1357                    Some(value),
1358                ) => self.format_string(string, &value.to_string()),
1359                (t, None) if t.supports_time() => self.format_string(string, "null"),
1360                _ => {
1361                    exec_err!(
1362                        "Invalid conversion type: {:?} for Time64Nanosecond",
1363                        self.conversion_type
1364                    )
1365                }
1366            },
1367            ScalarValue::TimestampSecond(value, zone) => {
1368                match (self.conversion_type, value) {
1369                    (
1370                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1371                        Some(value),
1372                    ) => self.format_time(string, value * 1000000000, zone),
1373                    (
1374                        ConversionType::StringLower | ConversionType::StringUpper,
1375                        Some(value),
1376                    ) => self.format_string(string, &value.to_string()),
1377                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1378                    _ => {
1379                        exec_err!(
1380                            "Invalid conversion type: {:?} for TimestampSecond",
1381                            self.conversion_type
1382                        )
1383                    }
1384                }
1385            }
1386            ScalarValue::TimestampMillisecond(value, zone) => {
1387                match (self.conversion_type, value) {
1388                    (
1389                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1390                        Some(value),
1391                    ) => self.format_time(string, *value * 1000000, zone),
1392                    (
1393                        ConversionType::StringLower | ConversionType::StringUpper,
1394                        Some(value),
1395                    ) => self.format_string(string, &value.to_string()),
1396
1397                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1398                    _ => {
1399                        exec_err!(
1400                            "Invalid conversion type: {:?} for TimestampMillisecond",
1401                            self.conversion_type
1402                        )
1403                    }
1404                }
1405            }
1406            ScalarValue::TimestampMicrosecond(value, zone) => {
1407                match (self.conversion_type, value) {
1408                    (
1409                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1410                        Some(value),
1411                    ) => self.format_time(string, value * 1000, zone),
1412                    (
1413                        ConversionType::StringLower | ConversionType::StringUpper,
1414                        Some(value),
1415                    ) => self.format_string(string, &value.to_string()),
1416                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1417                    _ => {
1418                        exec_err!(
1419                            "Invalid conversion type: {:?} for timestampmicrosecond",
1420                            self.conversion_type
1421                        )
1422                    }
1423                }
1424            }
1425
1426            ScalarValue::TimestampNanosecond(value, zone) => {
1427                match (self.conversion_type, value) {
1428                    (
1429                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1430                        Some(value),
1431                    ) => self.format_time(string, *value, zone),
1432                    (
1433                        ConversionType::StringLower | ConversionType::StringUpper,
1434                        Some(value),
1435                    ) => self.format_string(string, &value.to_string()),
1436                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1437                    _ => {
1438                        exec_err!(
1439                            "Invalid conversion type: {:?} for TimestampNanosecond",
1440                            self.conversion_type
1441                        )
1442                    }
1443                }
1444            }
1445            ScalarValue::Date32(value) => match (self.conversion_type, value) {
1446                (
1447                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1448                    Some(value),
1449                ) => self.format_date(string, *value as i64),
1450                (
1451                    ConversionType::StringLower | ConversionType::StringUpper,
1452                    Some(value),
1453                ) => self.format_string(string, &value.to_string()),
1454                (t, None) if t.supports_time() => self.format_string(string, "null"),
1455                _ => {
1456                    exec_err!(
1457                        "Invalid conversion type: {:?} for Date32",
1458                        self.conversion_type
1459                    )
1460                }
1461            },
1462            ScalarValue::Date64(value) => match (self.conversion_type, value) {
1463                (
1464                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1465                    Some(value),
1466                ) => self.format_date(string, *value),
1467                (
1468                    ConversionType::StringLower | ConversionType::StringUpper,
1469                    Some(value),
1470                ) => self.format_string(string, &value.to_string()),
1471                (t, None) if t.supports_time() => self.format_string(string, "null"),
1472                _ => {
1473                    exec_err!(
1474                        "Invalid conversion type: {:?} for Date64",
1475                        self.conversion_type
1476                    )
1477                }
1478            },
1479            ScalarValue::Null => {
1480                let value = "null".to_string();
1481                self.format_string(string, &value)
1482            }
1483            _ => exec_err!("Invalid scalar value: {value}"),
1484        }
1485    }
1486
1487    fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
1488        // Handle special cases first
1489        let (sign, raw_exponent, mantissa) = value.to_parts();
1490        let is_subnormal = raw_exponent == 0;
1491
1492        let precision = match self.precision {
1493            NumericParam::FromArgument => None,
1494            NumericParam::Literal(p) => Some(p),
1495        };
1496
1497        // Determine if we need to normalize subnormal numbers
1498        // Only normalize when precision is specified and less than full mantissa width
1499        let mantissa_hex_digits = f64::MANTISSA_BITS.div_ceil(4); // 13 for f64
1500        let should_normalize = is_subnormal
1501            && precision.is_some()
1502            && precision.unwrap() < mantissa_hex_digits as i32;
1503
1504        let (value, raw_exponent, mantissa) = if should_normalize {
1505            let value = value * f64::SCALEUP;
1506            let (_, raw_exponent, mantissa) = value.to_parts();
1507            (value, raw_exponent, mantissa)
1508        } else {
1509            (value, raw_exponent, mantissa)
1510        };
1511
1512        let mut temp = String::new();
1513
1514        let sign_char = if sign {
1515            "-"
1516        } else if self.force_sign {
1517            "+"
1518        } else if self.space_sign {
1519            " "
1520        } else {
1521            ""
1522        };
1523        match value.category() {
1524            FpCategory::Nan => {
1525                write!(&mut temp, "NaN")?;
1526            }
1527            FpCategory::Infinite => {
1528                write!(&mut temp, "{sign_char}Infinity")?;
1529            }
1530            FpCategory::Zero => {
1531                write!(&mut temp, "{sign_char}0x0.0p0")?;
1532            }
1533            _ => {
1534                let bias = i32::from(f64::EXPONENT_BIAS);
1535                // Calculate actual exponent
1536                // For subnormal numbers, the exponent is 1 - bias (not 0 - bias)
1537                let exponent = if is_subnormal && !should_normalize {
1538                    1 - bias
1539                } else {
1540                    raw_exponent as i32 - bias
1541                };
1542
1543                // Handle precision for rounding
1544                let final_mantissa = if let Some(p) = precision {
1545                    if p == 0 {
1546                        // For precision 0, we still need at least 1 hex digit
1547                        // Round to the nearest integer mantissa value
1548                        let shift_distance = f64::MANTISSA_BITS as i32 - 4; // Keep 1 hex digit (4 bits)
1549                        let shifted = mantissa >> shift_distance;
1550                        let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1551                        let round_bit = 1u64 << (shift_distance - 1);
1552
1553                        // Round to nearest, ties to even
1554                        if rounding_bits > round_bit
1555                            || (rounding_bits == round_bit && (shifted & 1) != 0)
1556                        {
1557                            (shifted + 1) << shift_distance
1558                        } else {
1559                            shifted << shift_distance
1560                        }
1561                    } else {
1562                        // Apply rounding based on precision
1563                        let precision_bits = p * 4; // Each hex digit is 4 bits
1564                        let keep_bits = f64::MANTISSA_BITS as i32;
1565                        let shift_distance = keep_bits - precision_bits;
1566
1567                        if shift_distance > 0 {
1568                            let shifted = mantissa >> shift_distance;
1569                            let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1570                            let round_bit = 1u64 << (shift_distance - 1);
1571
1572                            // Round to nearest, ties to even
1573                            if rounding_bits > round_bit
1574                                || (rounding_bits == round_bit && (shifted & 1) != 0)
1575                            {
1576                                (shifted + 1) << shift_distance
1577                            } else {
1578                                shifted << shift_distance
1579                            }
1580                        } else {
1581                            mantissa
1582                        }
1583                    }
1584                } else {
1585                    mantissa
1586                };
1587
1588                if is_subnormal && !should_normalize {
1589                    // Original subnormal format: 0x0.xxxp-1022
1590                    if precision.is_some() {
1591                        // precision >= 13, show as subnormal
1592                        let full_hex = format!(
1593                            "{:0width$x}",
1594                            final_mantissa,
1595                            width = mantissa_hex_digits as usize
1596                        );
1597                        write!(&mut temp, "{sign_char}0x0.{full_hex}p{exponent}")?;
1598                    } else {
1599                        // No precision specified, show full subnormal
1600                        let hex_digits = format!(
1601                            "{:0width$x}",
1602                            final_mantissa,
1603                            width = mantissa_hex_digits as usize
1604                        );
1605                        write!(&mut temp, "{sign_char}0x0.{hex_digits}p{exponent}")?;
1606                    }
1607                } else {
1608                    // Normal format or normalized subnormal: 0x1.xxxpN
1609                    if let Some(p) = precision {
1610                        let p = if p == 0 { 1 } else { p };
1611                        let hex_digits = format!("{final_mantissa:x}");
1612                        let formatted_digits = if p as usize >= hex_digits.len() {
1613                            // Pad with zeros to match precision
1614                            format!("{:0<width$}", hex_digits, width = p as usize)
1615                        } else {
1616                            hex_digits[..p as usize].to_string()
1617                        };
1618                        write!(
1619                            &mut temp,
1620                            "{sign_char}0x1.{formatted_digits}p{exponent}"
1621                        )?;
1622                    } else {
1623                        // Default: show all significant digits
1624                        let mut hex_digits = format!("{final_mantissa:x}");
1625                        hex_digits = trim_trailing_0s_hex(&hex_digits).to_owned();
1626                        if hex_digits.is_empty() {
1627                            write!(&mut temp, "{sign_char}0x1.0p{exponent}")?;
1628                        } else {
1629                            write!(&mut temp, "{sign_char}0x1.{hex_digits}p{exponent}")?;
1630                        }
1631                    }
1632                }
1633                if should_normalize {
1634                    let (prefix, exp) = temp.split_once('p').unwrap();
1635                    let iexp = exp.parse::<i32>().unwrap() - f64::SCALEUP_POWER as i32;
1636                    temp = format!("{prefix}p{iexp}");
1637                }
1638            }
1639        };
1640
1641        if self.conversion_type.is_upper() {
1642            temp = temp.to_ascii_uppercase();
1643        }
1644
1645        let NumericParam::Literal(width) = self.width else {
1646            writer.push_str(&temp);
1647            return Ok(());
1648        };
1649        if self.left_adj {
1650            writer.push_str(&temp);
1651            for _ in temp.len()..width as usize {
1652                writer.push(' ');
1653            }
1654        } else if self.zero_pad && value.is_finite() {
1655            let delimiter = if self.conversion_type.is_upper() {
1656                "0X"
1657            } else {
1658                "0x"
1659            };
1660            let (prefix, suffix) = temp.split_once(delimiter).unwrap();
1661            writer.push_str(prefix);
1662            writer.push_str(delimiter);
1663            for _ in temp.len()..width as usize {
1664                writer.push('0');
1665            }
1666            writer.push_str(suffix);
1667        } else {
1668            while temp.len() < width as usize {
1669                temp = " ".to_owned() + &temp;
1670            }
1671            writer.push_str(&temp);
1672        };
1673        Ok(())
1674    }
1675
1676    fn format_char(&self, writer: &mut String, value: char) -> Result<()> {
1677        let upper = self.conversion_type.is_upper();
1678        match self.conversion_type {
1679            ConversionType::CharLower | ConversionType::CharUpper => {
1680                let NumericParam::Literal(width) = self.width else {
1681                    if upper {
1682                        writer.push(value.to_ascii_uppercase());
1683                    } else {
1684                        writer.push(value);
1685                    }
1686                    return Ok(());
1687                };
1688
1689                let start_len = writer.len();
1690                if self.left_adj {
1691                    if upper {
1692                        writer.push(value.to_ascii_uppercase());
1693                    } else {
1694                        writer.push(value);
1695                    }
1696                    while writer.len() - start_len < width as usize {
1697                        writer.push(' ');
1698                    }
1699                } else {
1700                    while writer.len() - start_len + value.len_utf8() < width as usize {
1701                        writer.push(' ');
1702                    }
1703                    if upper {
1704                        writer.push(value.to_ascii_uppercase());
1705                    } else {
1706                        writer.push(value);
1707                    }
1708                }
1709                Ok(())
1710            }
1711            _ => exec_err!(
1712                "Invalid conversion type: {:?} for char",
1713                self.conversion_type
1714            ),
1715        }
1716    }
1717
1718    fn format_boolean(&self, writer: &mut String, value: &Option<bool>) -> Result<()> {
1719        let value = value.unwrap_or(false);
1720
1721        let formatted = match self.conversion_type {
1722            ConversionType::BooleanUpper => {
1723                if value {
1724                    "TRUE"
1725                } else {
1726                    "FALSE"
1727                }
1728            }
1729            ConversionType::BooleanLower => {
1730                if value {
1731                    "true"
1732                } else {
1733                    "false"
1734                }
1735            }
1736            _ => {
1737                return exec_err!(
1738                    "Invalid conversion type: {:?} for boolean array",
1739                    self.conversion_type
1740                );
1741            }
1742        };
1743        self.format_str(writer, formatted)
1744    }
1745
1746    fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1747        self.validate_grouping_separator()?;
1748
1749        let mut prefix = String::new();
1750        let mut suffix = String::new();
1751        let mut number = String::new();
1752        let upper = self.conversion_type.is_upper();
1753
1754        // set up the sign
1755        if value.is_sign_negative() {
1756            if self.negative_in_parentheses {
1757                prefix.push('(');
1758                suffix.push(')');
1759            } else {
1760                prefix.push('-');
1761            }
1762        } else if self.space_sign {
1763            prefix.push(' ');
1764        } else if self.force_sign {
1765            prefix.push('+');
1766        }
1767
1768        if value.is_finite() {
1769            let mut use_scientific = false;
1770            let mut strip_trailing_0s = false;
1771            let mut abs = value.abs();
1772            let mut exponent = abs.log10().floor() as i32;
1773            let mut precision = match self.precision {
1774                NumericParam::Literal(p) => p,
1775                _ => 6,
1776            };
1777            match self.conversion_type {
1778                ConversionType::DecFloatLower => {
1779                    // default
1780                }
1781                ConversionType::SciFloatLower => {
1782                    use_scientific = true;
1783                }
1784                ConversionType::SciFloatUpper => {
1785                    use_scientific = true;
1786                }
1787                ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
1788                    strip_trailing_0s = true;
1789                    if precision == 0 {
1790                        precision = 1;
1791                    }
1792                    // exponent signifies significant digits - we must round now
1793                    // to (re)calculate the exponent
1794                    let rounding_factor =
1795                        10.0_f64.powf((precision - 1 - exponent) as f64);
1796                    let rounded_fixed = (abs * rounding_factor).round();
1797                    abs = rounded_fixed / rounding_factor;
1798                    exponent = abs.log10().floor() as i32;
1799                    if exponent < -4 || exponent >= precision {
1800                        use_scientific = true;
1801                        precision -= 1;
1802                    } else {
1803                        // precision specifies the number of significant digits
1804                        precision -= 1 + exponent;
1805                    }
1806                }
1807                _ => {
1808                    return exec_err!(
1809                        "Invalid conversion type: {:?} for float",
1810                        self.conversion_type
1811                    );
1812                }
1813            }
1814
1815            if use_scientific {
1816                // Manual scientific notation formatting for uppercase E
1817                let mantissa = abs / 10.0_f64.powf(exponent as f64);
1818                let exp_char = if upper { 'E' } else { 'e' };
1819                number = format!("{mantissa:.prec$}", prec = precision as usize);
1820                if strip_trailing_0s {
1821                    number = trim_trailing_0s(&number).to_owned();
1822                }
1823                number = format!("{number}{exp_char}{exponent:+03}");
1824            } else {
1825                number = format!("{abs:.prec$}", prec = precision as usize);
1826                if strip_trailing_0s {
1827                    number = trim_trailing_0s(&number).to_owned();
1828                }
1829                if self.grouping_separator {
1830                    number = insert_thousands_separator(&number);
1831                }
1832            }
1833            if self.alt_form && !number.contains('.') {
1834                number += ".";
1835            }
1836        } else {
1837            // not finite
1838            match self.conversion_type {
1839                ConversionType::DecFloatLower
1840                | ConversionType::SciFloatLower
1841                | ConversionType::CompactFloatLower => {
1842                    if value.is_infinite() {
1843                        number.push_str("Infinity")
1844                    } else {
1845                        number.push_str("NaN")
1846                    }
1847                }
1848                ConversionType::SciFloatUpper | ConversionType::CompactFloatUpper => {
1849                    if value.is_infinite() {
1850                        number.push_str("INFINITY")
1851                    } else {
1852                        number.push_str("NAN")
1853                    }
1854                }
1855                _ => {
1856                    return exec_err!(
1857                        "Invalid conversion type: {:?} for float",
1858                        self.conversion_type
1859                    );
1860                }
1861            }
1862        }
1863        // Take care of padding
1864        let NumericParam::Literal(width) = self.width else {
1865            writer.push_str(&prefix);
1866            writer.push_str(&number);
1867            writer.push_str(&suffix);
1868            return Ok(());
1869        };
1870        if self.left_adj {
1871            let mut full_num = prefix + &number + &suffix;
1872            while full_num.len() < width as usize {
1873                full_num.push(' ');
1874            }
1875            writer.push_str(&full_num);
1876        } else if self.zero_pad && value.is_finite() {
1877            while prefix.len() + number.len() + suffix.len() < width as usize {
1878                prefix.push('0');
1879            }
1880            writer.push_str(&prefix);
1881            writer.push_str(&number);
1882            writer.push_str(&suffix);
1883        } else {
1884            let mut full_num = prefix + &number + &suffix;
1885            while full_num.len() < width as usize {
1886                full_num = " ".to_owned() + &full_num;
1887            }
1888            writer.push_str(&full_num);
1889        };
1890
1891        Ok(())
1892    }
1893
1894    fn format_signed(&self, writer: &mut String, value: i64) -> Result<()> {
1895        let negative = value < 0;
1896        let abs_val = value.abs();
1897
1898        let (sign_prefix, sign_suffix) = if negative && self.negative_in_parentheses {
1899            ("(".to_owned(), ")".to_owned())
1900        } else if negative {
1901            ("-".to_owned(), "".to_owned())
1902        } else if self.force_sign {
1903            ("+".to_owned(), "".to_owned())
1904        } else if self.space_sign {
1905            (" ".to_owned(), "".to_owned())
1906        } else {
1907            ("".to_owned(), "".to_owned())
1908        };
1909
1910        let mut mod_spec = *self;
1911        mod_spec.width = match self.width {
1912            NumericParam::Literal(w) => NumericParam::Literal(
1913                w - sign_prefix.len() as i32 - sign_suffix.len() as i32,
1914            ),
1915            _ => NumericParam::FromArgument,
1916        };
1917        let mut formatted = String::new();
1918        mod_spec.format_unsigned(&mut formatted, abs_val as u64)?;
1919        // put the sign a after any leading spaces
1920        let mut actual_number = &formatted[0..];
1921        let mut leading_spaces = &formatted[0..0];
1922        if let Some(first_non_space) = formatted.find(|c| c != ' ') {
1923            actual_number = &formatted[first_non_space..];
1924            leading_spaces = &formatted[0..first_non_space];
1925        }
1926        write!(
1927            writer,
1928            "{}{}{}{}",
1929            leading_spaces.to_owned(),
1930            sign_prefix,
1931            actual_number,
1932            sign_suffix
1933        )
1934        .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1935        Ok(())
1936    }
1937
1938    fn format_unsigned(&self, writer: &mut String, value: u64) -> Result<()> {
1939        let mut s = String::new();
1940        let mut alt_prefix = "";
1941        match self.conversion_type {
1942            ConversionType::DecInt => {
1943                let num_str = format!("{value}");
1944                s = if self.grouping_separator {
1945                    insert_thousands_separator(&num_str)
1946                } else {
1947                    num_str
1948                };
1949            }
1950            ConversionType::HexIntLower => {
1951                alt_prefix = "0x";
1952                write!(&mut s, "{value:x}")
1953                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1954            }
1955            ConversionType::HexIntUpper => {
1956                alt_prefix = "0X";
1957                write!(&mut s, "{value:X}")
1958                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1959            }
1960            ConversionType::OctInt => {
1961                alt_prefix = "0";
1962                write!(&mut s, "{value:o}")
1963                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1964            }
1965            _ => {
1966                return exec_err!(
1967                    "Invalid conversion type: {:?} for u64",
1968                    self.conversion_type
1969                );
1970            }
1971        }
1972        let mut prefix = if self.alt_form {
1973            alt_prefix.to_owned()
1974        } else {
1975            String::new()
1976        };
1977
1978        let formatted = if let NumericParam::Literal(width) = self.width {
1979            if self.left_adj {
1980                let mut num_str = prefix + &s;
1981                while num_str.len() < width as usize {
1982                    num_str.push(' ');
1983                }
1984                num_str
1985            } else if self.zero_pad {
1986                while prefix.len() + s.len() < width as usize {
1987                    prefix.push('0');
1988                }
1989                prefix + &s
1990            } else {
1991                let mut num_str = prefix + &s;
1992                while num_str.len() < width as usize {
1993                    num_str = " ".to_owned() + &num_str;
1994                }
1995                num_str
1996            }
1997        } else {
1998            prefix + &s
1999        };
2000        write!(writer, "{formatted}")
2001            .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2002        Ok(())
2003    }
2004
2005    fn format_str(&self, writer: &mut String, value: &str) -> Result<()> {
2006        // Take care of precision, putting the truncated string in `content`
2007        let precision: usize = match self.precision {
2008            NumericParam::Literal(p) => p,
2009            _ => i32::MAX,
2010        }
2011        .try_into()
2012        .unwrap_or_default();
2013        let content_len = {
2014            let mut content_len = precision.min(value.len());
2015            while !value.is_char_boundary(content_len) {
2016                content_len -= 1;
2017            }
2018            content_len
2019        };
2020        let content = &value[..content_len];
2021
2022        // Pad to width if needed, putting the padded string in `s`
2023
2024        if let NumericParam::Literal(width) = self.width {
2025            let start_len = writer.len();
2026            if self.left_adj {
2027                writer.push_str(content);
2028                while writer.len() - start_len < width as usize {
2029                    writer.push(' ');
2030                }
2031            } else {
2032                while writer.len() - start_len + content.len() < width as usize {
2033                    writer.push(' ');
2034                }
2035                writer.push_str(content);
2036            }
2037        } else {
2038            writer.push_str(content);
2039        }
2040        Ok(())
2041    }
2042
2043    fn format_string(&self, writer: &mut String, value: &str) -> Result<()> {
2044        if self.conversion_type.is_upper() {
2045            let upper = value.to_ascii_uppercase();
2046            self.format_str(writer, &upper)
2047        } else {
2048            self.format_str(writer, value)
2049        }
2050    }
2051
2052    fn format_decimal(&self, writer: &mut String, value: &str, scale: i64) -> Result<()> {
2053        self.validate_grouping_separator()?;
2054
2055        let mut prefix = String::new();
2056        let upper = self.conversion_type.is_upper();
2057
2058        // Parse as BigDecimal
2059        let decimal = value
2060            .parse::<BigInt>()
2061            .map_err(|e| exec_datafusion_err!("Failed to parse decimal: {}", e))?;
2062        let decimal = BigDecimal::from_bigint(decimal, scale);
2063
2064        // Handle sign
2065        // TODO: `negative_in_parentheses` (the `(` flag) is not implemented here.
2066        // Java/Spark wrap negative values in parentheses when this flag is set
2067        // (e.g. `%(,.2f` with -1234.5 → "(1,234.50)"), but this path always
2068        // uses a minus sign. See `format_float` for the correct implementation.
2069        let is_negative = decimal.sign() == Sign::Minus;
2070        let abs_decimal = decimal.abs();
2071
2072        if is_negative {
2073            prefix.push('-');
2074        } else if self.space_sign {
2075            prefix.push(' ');
2076        } else if self.force_sign {
2077            prefix.push('+');
2078        }
2079
2080        let exp_symb = if upper { 'E' } else { 'e' };
2081        let mut strip_trailing_0s = false;
2082
2083        // Get precision setting
2084        let mut precision = match self.precision {
2085            NumericParam::Literal(p) => p,
2086            _ => 6,
2087        };
2088
2089        let number = match self.conversion_type {
2090            ConversionType::DecFloatLower => {
2091                // Format as fixed-point decimal
2092                let mut n = self.format_decimal_fixed(
2093                    &abs_decimal,
2094                    precision,
2095                    strip_trailing_0s,
2096                )?;
2097                if self.grouping_separator {
2098                    n = insert_thousands_separator(&n);
2099                }
2100                n
2101            }
2102            ConversionType::SciFloatLower => self.format_decimal_scientific(
2103                &abs_decimal,
2104                precision,
2105                'e',
2106                strip_trailing_0s,
2107            )?,
2108            ConversionType::SciFloatUpper => self.format_decimal_scientific(
2109                &abs_decimal,
2110                precision,
2111                'E',
2112                strip_trailing_0s,
2113            )?,
2114            ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
2115                strip_trailing_0s = true;
2116                if precision == 0 {
2117                    precision = 1;
2118                }
2119                // Determine if we should use scientific notation
2120                let log10_val = abs_decimal.to_f64().map(|f| f.log10()).unwrap_or(0.0);
2121                if log10_val < -4.0 || log10_val >= precision as f64 {
2122                    self.format_decimal_scientific(
2123                        &abs_decimal,
2124                        precision - 1,
2125                        exp_symb,
2126                        strip_trailing_0s,
2127                    )?
2128                } else {
2129                    let mut n = self.format_decimal_fixed(
2130                        &abs_decimal,
2131                        precision - 1 - log10_val.floor() as i32,
2132                        strip_trailing_0s,
2133                    )?;
2134                    if self.grouping_separator {
2135                        n = insert_thousands_separator(&n);
2136                    }
2137                    n
2138                }
2139            }
2140            _ => {
2141                return exec_err!(
2142                    "Invalid conversion type: {:?} for decimal",
2143                    self.conversion_type
2144                );
2145            }
2146        };
2147
2148        // Handle padding
2149        let NumericParam::Literal(width) = self.width else {
2150            writer.push_str(&prefix);
2151            writer.push_str(&number);
2152            return Ok(());
2153        };
2154
2155        if self.left_adj {
2156            let mut full_num = prefix + &number;
2157            while full_num.len() < width as usize {
2158                full_num.push(' ');
2159            }
2160            writer.push_str(&full_num);
2161        } else if self.zero_pad {
2162            while prefix.len() + number.len() < width as usize {
2163                prefix.push('0');
2164            }
2165            writer.push_str(&prefix);
2166            writer.push_str(&number);
2167        } else {
2168            let mut full_num = prefix + &number;
2169            while full_num.len() < width as usize {
2170                full_num = " ".to_owned() + &full_num;
2171            }
2172            writer.push_str(&full_num);
2173        }
2174
2175        Ok(())
2176    }
2177
2178    fn format_decimal_fixed(
2179        &self,
2180        decimal: &BigDecimal,
2181        precision: i32,
2182        strip_trailing_0s: bool,
2183    ) -> Result<String> {
2184        if precision <= 0 {
2185            Ok(decimal.round(0).to_string())
2186        } else {
2187            // Use BigDecimal's with_scale method for precise decimal formatting
2188            let scaled = decimal.round(precision as i64);
2189            let mut number = scaled.to_string();
2190            if strip_trailing_0s {
2191                number = trim_trailing_0s(&number).to_owned();
2192            }
2193            Ok(number)
2194        }
2195    }
2196
2197    fn format_decimal_scientific(
2198        &self,
2199        decimal: &BigDecimal,
2200        precision: i32,
2201        exp_char: char,
2202        strip_trailing_0s: bool,
2203    ) -> Result<String> {
2204        // Convert to f64 for scientific notation (may lose precision for very large numbers)
2205        let float_val = decimal.to_f64().unwrap_or(0.0);
2206        if float_val == 0.0 {
2207            return Ok(format!("0{exp_char}+00"));
2208        }
2209
2210        let abs_val = float_val.abs();
2211        let exponent = abs_val.log10().floor() as i32;
2212        let mantissa = abs_val / 10.0_f64.powf(exponent as f64);
2213
2214        let mut number = if precision <= 0 {
2215            format!("{mantissa:.0}")
2216        } else {
2217            format!("{mantissa:.prec$}", prec = precision as usize)
2218        };
2219
2220        if strip_trailing_0s {
2221            number = trim_trailing_0s(&number).to_owned();
2222        }
2223
2224        Ok(format!("{number}{exp_char}{exponent:+03}"))
2225    }
2226
2227    fn format_time(
2228        &self,
2229        writer: &mut String,
2230        timestamp_nanos: i64,
2231        timezone: &Option<Arc<str>>,
2232    ) -> Result<()> {
2233        let upper = self.conversion_type.is_upper();
2234        match &self.conversion_type {
2235            ConversionType::TimeLower(time_format)
2236            | ConversionType::TimeUpper(time_format) => {
2237                let formatted =
2238                    self.format_time_component(timestamp_nanos, *time_format, timezone)?;
2239                let result = if upper {
2240                    formatted.to_uppercase()
2241                } else {
2242                    formatted
2243                };
2244                write!(writer, "{result}")
2245                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2246                Ok(())
2247            }
2248            _ => exec_err!(
2249                "Invalid conversion type for time: {:?}",
2250                self.conversion_type
2251            ),
2252        }
2253    }
2254
2255    fn format_date(&self, writer: &mut String, date_days: i64) -> Result<()> {
2256        // Convert days since epoch to timestamp in nanoseconds
2257        let timestamp_nanos = date_days * 24 * 60 * 60 * 1_000_000_000;
2258        self.format_time(writer, timestamp_nanos, &None)
2259    }
2260
2261    fn format_time_component(
2262        &self,
2263        timestamp_nanos: i64,
2264        time_format: TimeFormat,
2265        _timezone: &Option<Arc<str>>,
2266    ) -> Result<String> {
2267        // Convert nanoseconds to seconds and nanoseconds remainder
2268        let secs = timestamp_nanos / 1_000_000_000;
2269        let nanos = (timestamp_nanos % 1_000_000_000) as u32;
2270
2271        // Create DateTime from timestamp
2272        let dt = DateTime::<Utc>::from_timestamp(secs, nanos).ok_or_else(|| {
2273            exec_datafusion_err!("Invalid timestamp: {}", timestamp_nanos)
2274        })?;
2275
2276        match time_format {
2277            TimeFormat::HUpper => Ok(format!("{:02}", dt.hour())),
2278            TimeFormat::IUpper => {
2279                let hour_12 = match dt.hour12() {
2280                    (true, h) => h,  // PM
2281                    (false, h) => h, // AM
2282                };
2283                Ok(format!("{hour_12:02}"))
2284            }
2285            TimeFormat::KLower => Ok(format!("{}", dt.hour())),
2286            TimeFormat::LLower => {
2287                let hour_12 = match dt.hour12() {
2288                    (true, h) => h,  // PM
2289                    (false, h) => h, // AM
2290                };
2291                Ok(format!("{hour_12}"))
2292            }
2293            TimeFormat::MUpper => Ok(format!("{:02}", dt.minute())),
2294            TimeFormat::SUpper => Ok(format!("{:02}", dt.second())),
2295            TimeFormat::LUpper => Ok(format!("{:03}", dt.timestamp_millis() % 1000)),
2296            TimeFormat::NUpper => Ok(format!("{:09}", dt.nanosecond())),
2297            TimeFormat::PLower => {
2298                let (is_pm, _) = dt.hour12();
2299                Ok(if is_pm {
2300                    "pm".to_string()
2301                } else {
2302                    "am".to_string()
2303                })
2304            }
2305            TimeFormat::ZLower => Ok("+0000".to_string()), // UTC timezone offset
2306            TimeFormat::ZUpper => Ok("UTC".to_string()),   // UTC timezone name
2307            TimeFormat::SLower => Ok(format!("{}", dt.timestamp())),
2308            TimeFormat::QUpper => Ok(format!("{}", dt.timestamp_millis())),
2309            TimeFormat::BUpper => Ok(dt.format("%B").to_string()), // Full month name
2310            TimeFormat::BLower => Ok(dt.format("%b").to_string()), // Abbreviated month name
2311            TimeFormat::AUpper => Ok(dt.format("%A").to_string()), // Full weekday name
2312            TimeFormat::ALower => Ok(dt.format("%a").to_string()), // Abbreviated weekday name
2313            TimeFormat::CUpper => Ok(format!("{:02}", dt.year() / 100)),
2314            TimeFormat::YUpper => Ok(format!("{:04}", dt.year())),
2315            TimeFormat::YLower => Ok(format!("{:02}", dt.year() % 100)),
2316            TimeFormat::JLower => Ok(format!("{:03}", dt.ordinal())), // Day of year
2317            TimeFormat::MLower => Ok(format!("{:02}", dt.month())),
2318            TimeFormat::DLower => Ok(format!("{:02}", dt.day())),
2319            TimeFormat::ELower => Ok(format!("{}", dt.day())),
2320            TimeFormat::RUpper => Ok(dt.format("%H:%M").to_string()),
2321            TimeFormat::TUpper => Ok(dt.format("%H:%M:%S").to_string()),
2322            TimeFormat::RLower => {
2323                let (is_pm, hour_12) = dt.hour12();
2324                let am_pm = if is_pm { "PM" } else { "AM" };
2325                Ok(format!(
2326                    "{:02}:{:02}:{:02} {}",
2327                    hour_12,
2328                    dt.minute(),
2329                    dt.second(),
2330                    am_pm
2331                ))
2332            }
2333            TimeFormat::DUpper => Ok(dt.format("%m/%d/%y").to_string()),
2334            TimeFormat::FUpper => Ok(dt.format("%Y-%m-%d").to_string()),
2335            TimeFormat::CLower => Ok(dt.format("%a %b %d %H:%M:%S UTC %Y").to_string()),
2336        }
2337    }
2338}
2339
2340trait FloatFormattable: std::fmt::Display {
2341    fn category(&self) -> FpCategory;
2342
2343    fn spark_string(&self) -> String {
2344        match self.category() {
2345            FpCategory::Nan => "NaN".to_string(),
2346            FpCategory::Infinite => {
2347                if self.negative() {
2348                    "-Infinity".to_string()
2349                } else {
2350                    "Infinity".to_string()
2351                }
2352            }
2353            _ => self.to_string(),
2354        }
2355    }
2356    fn negative(&self) -> bool;
2357}
2358
2359impl FloatFormattable for f32 {
2360    fn category(&self) -> FpCategory {
2361        self.classify()
2362    }
2363
2364    fn negative(&self) -> bool {
2365        self.is_sign_negative()
2366    }
2367}
2368
2369impl FloatFormattable for f64 {
2370    fn category(&self) -> FpCategory {
2371        self.classify()
2372    }
2373
2374    fn negative(&self) -> bool {
2375        self.is_sign_negative()
2376    }
2377}
2378
2379trait FloatBits: FloatFormattable {
2380    const MANTISSA_BITS: u8;
2381    const EXPONENT_BIAS: u16;
2382    const SCALEUP_POWER: u8;
2383    const SCALEUP: Self;
2384
2385    fn to_parts(&self) -> (bool, u16, u64);
2386}
2387
2388impl FloatBits for f64 {
2389    const MANTISSA_BITS: u8 = 52;
2390    const EXPONENT_BIAS: u16 = 1023;
2391    const SCALEUP_POWER: u8 = 54;
2392    const SCALEUP: f64 = (1_i64 << Self::SCALEUP_POWER) as f64;
2393
2394    fn to_parts(&self) -> (bool, u16, u64) {
2395        let bits = self.to_bits();
2396        let sign: bool = (bits >> 63) == 1;
2397        let exponent = ((bits >> 52) & 0x7FF) as u16;
2398        let mantissa = bits & 0x000F_FFFF_FFFF_FFFF;
2399        (sign, exponent, mantissa)
2400    }
2401}
2402
2403/// Inserts thousands separators (`,`) into the integer part of a numeric string.
2404/// For example, `"1234567.89"` becomes `"1,234,567.89"`.
2405fn insert_thousands_separator(number: &str) -> String {
2406    let (int_part, frac_part) = match number.find('.') {
2407        Some(pos) => (&number[..pos], &number[pos..]),
2408        None => (number, ""),
2409    };
2410    let mut result = String::with_capacity(number.len() + number.len() / 3);
2411    for (i, c) in int_part.char_indices() {
2412        if i > 0 && (int_part.len() - i) % 3 == 0 {
2413            result.push(',');
2414        }
2415        result.push(c);
2416    }
2417    result.push_str(frac_part);
2418    result
2419}
2420
2421fn trim_trailing_0s(number: &str) -> &str {
2422    if number.contains('.') {
2423        for (i, c) in number.chars().rev().enumerate() {
2424            if c != '0' {
2425                return &number[..number.len() - i];
2426            }
2427        }
2428    }
2429    number
2430}
2431
2432fn trim_trailing_0s_hex(number: &str) -> &str {
2433    for (i, c) in number.chars().rev().enumerate() {
2434        if c != '0' {
2435            return &number[..number.len() - i];
2436        }
2437    }
2438    number
2439}
2440
2441#[cfg(test)]
2442mod tests {
2443    use super::*;
2444    use crate::function::utils::test::test_scalar_function;
2445    use arrow::array::StringArray;
2446    use arrow::datatypes::DataType::Utf8;
2447
2448    #[test]
2449    fn test_format_string_nullability() -> Result<()> {
2450        let func = FormatStringFunc::new();
2451        let nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, true));
2452
2453        let out_nullable = func.return_field_from_args(ReturnFieldArgs {
2454            arg_fields: &[nullable_format],
2455            scalar_arguments: &[None],
2456        })?;
2457
2458        assert!(
2459            out_nullable.is_nullable(),
2460            "format_string(fmt, ...) should be nullable when fmt is nullable"
2461        );
2462        let non_nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, false));
2463
2464        let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
2465            arg_fields: &[non_nullable_format],
2466            scalar_arguments: &[None],
2467        })?;
2468
2469        assert!(
2470            !out_non_nullable.is_nullable(),
2471            "format_string(fmt, ...) should NOT be nullable when fmt is NOT nullable"
2472        );
2473
2474        Ok(())
2475    }
2476
2477    #[test]
2478    fn test_format_char_invalid_codepoint_errors() {
2479        use arrow::datatypes::Field;
2480        use datafusion_common::config::ConfigOptions;
2481
2482        let func = FormatStringFunc::new();
2483        // Spark/Java reject any negative integer or any value outside
2484        // `0..=0x10FFFF` (and the surrogate range) regardless of integer
2485        // width, so all of these inputs must surface a SQL error rather than
2486        // panicking or silently reinterpreting the bits as unsigned.
2487        let cases: Vec<(&str, ScalarValue)> = vec![
2488            ("Int8(-1)", ScalarValue::Int8(Some(-1))),
2489            ("Int16(-1)", ScalarValue::Int16(Some(-1))),
2490            ("Int16(-10000)", ScalarValue::Int16(Some(-10000))),
2491            ("Int32(-1)", ScalarValue::Int32(Some(-1))),
2492            ("Int32(0x110000)", ScalarValue::Int32(Some(0x110000))),
2493            ("Int64(0x1FFFFFFFF)", ScalarValue::Int64(Some(0x1FFFFFFFF))),
2494            ("Int64(-1)", ScalarValue::Int64(Some(-1))),
2495            ("UInt16(0xD800)", ScalarValue::UInt16(Some(0xD800))),
2496            ("UInt32(0x110000)", ScalarValue::UInt32(Some(0x110000))),
2497            (
2498                "UInt64(0x1_0000_0000)",
2499                ScalarValue::UInt64(Some(0x1_0000_0000)),
2500            ),
2501        ];
2502
2503        for (label, value) in cases {
2504            let fmt = ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string())));
2505            let arg_data_type = value.data_type();
2506            let arg = ColumnarValue::Scalar(value);
2507            let arg_fields = vec![
2508                Arc::new(Field::new("fmt", Utf8, false)),
2509                Arc::new(Field::new("v", arg_data_type, false)),
2510            ];
2511            let res = func.invoke_with_args(ScalarFunctionArgs {
2512                args: vec![fmt, arg],
2513                number_rows: 1,
2514                arg_fields,
2515                return_field: Arc::new(Field::new("o", Utf8, false)),
2516                config_options: Arc::new(ConfigOptions::default()),
2517            });
2518            assert!(
2519                res.is_err(),
2520                "format_string('[%c]', {label}) should error, got Ok"
2521            );
2522            let err = res.unwrap_err().to_string();
2523            assert!(
2524                err.contains("invalid Unicode scalar value for %c"),
2525                "unexpected error for {label}: {err}"
2526            );
2527        }
2528    }
2529
2530    #[test]
2531    fn test_format_char_valid_codepoint_succeeds() {
2532        test_scalar_function!(
2533            FormatStringFunc::new(),
2534            vec![
2535                ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2536                ColumnarValue::Scalar(ScalarValue::Int32(Some(0x1F680))),
2537            ],
2538            Ok(Some("[\u{1F680}]")),
2539            &str,
2540            Utf8,
2541            StringArray
2542        );
2543        test_scalar_function!(
2544            FormatStringFunc::new(),
2545            vec![
2546                ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2547                ColumnarValue::Scalar(ScalarValue::UInt32(Some(0x10FFFF))),
2548            ],
2549            Ok(Some("[\u{10FFFF}]")),
2550            &str,
2551            Utf8,
2552            StringArray
2553        );
2554        test_scalar_function!(
2555            FormatStringFunc::new(),
2556            vec![
2557                ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2558                ColumnarValue::Scalar(ScalarValue::Int16(Some(65))),
2559            ],
2560            Ok(Some("[A]")),
2561            &str,
2562            Utf8,
2563            StringArray
2564        );
2565        // Int8 / UInt8 can never produce an invalid codepoint for non-negative
2566        // values, but they must still flow through the validating helper.
2567        test_scalar_function!(
2568            FormatStringFunc::new(),
2569            vec![
2570                ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2571                ColumnarValue::Scalar(ScalarValue::Int8(Some(97))),
2572            ],
2573            Ok(Some("[a]")),
2574            &str,
2575            Utf8,
2576            StringArray
2577        );
2578        test_scalar_function!(
2579            FormatStringFunc::new(),
2580            vec![
2581                ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2582                ColumnarValue::Scalar(ScalarValue::UInt8(Some(255))),
2583            ],
2584            Ok(Some("[\u{00FF}]")),
2585            &str,
2586            Utf8,
2587            StringArray
2588        );
2589    }
2590
2591    #[test]
2592    fn test_insert_thousands_separator() {
2593        assert_eq!(insert_thousands_separator("1234567.89"), "1,234,567.89");
2594        assert_eq!(insert_thousands_separator("123.45"), "123.45");
2595        assert_eq!(insert_thousands_separator("1234"), "1,234");
2596        assert_eq!(insert_thousands_separator("12"), "12");
2597        assert_eq!(insert_thousands_separator("0.5"), "0.5");
2598        assert_eq!(
2599            insert_thousands_separator("1234567890.1234"),
2600            "1,234,567,890.1234"
2601        );
2602        assert_eq!(insert_thousands_separator("1000"), "1,000");
2603        assert_eq!(insert_thousands_separator("100"), "100");
2604    }
2605
2606    #[test]
2607    fn test_grouping_separator_float() -> Result<()> {
2608        test_scalar_function!(
2609            FormatStringFunc::new(),
2610            vec![
2611                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2612                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2613            ],
2614            Ok(Some("1,234,567.89")),
2615            &str,
2616            Utf8,
2617            StringArray
2618        );
2619        Ok(())
2620    }
2621
2622    #[test]
2623    fn test_grouping_separator_decimal() -> Result<()> {
2624        test_scalar_function!(
2625            FormatStringFunc::new(),
2626            vec![
2627                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2628                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2629            ],
2630            Ok(Some("1,234,567.89")),
2631            &str,
2632            Utf8,
2633            StringArray
2634        );
2635        Ok(())
2636    }
2637
2638    #[test]
2639    fn test_grouping_separator_scientific_float() -> Result<()> {
2640        // %,e — Java/Spark reject grouping separator with scientific notation
2641        test_scalar_function!(
2642            FormatStringFunc::new(),
2643            vec![
2644                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2645                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2646            ],
2647            Err(DataFusionError::Execution(
2648                "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2649            )),
2650            &str,
2651            Utf8,
2652            StringArray
2653        );
2654        // %,E — uppercase scientific also rejected
2655        test_scalar_function!(
2656            FormatStringFunc::new(),
2657            vec![
2658                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,E".to_string()))),
2659                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2660            ],
2661            Err(DataFusionError::Execution(
2662                "Grouping separator ',' flag is not compatible with scientific notation conversion 'E'".to_string(),
2663            )),
2664            &str,
2665            Utf8,
2666            StringArray
2667        );
2668        // %,.0e — precision 0 scientific with grouping also rejected
2669        test_scalar_function!(
2670            FormatStringFunc::new(),
2671            vec![
2672                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2673                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2674            ],
2675            Err(DataFusionError::Execution(
2676                "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2677            )),
2678            &str,
2679            Utf8,
2680            StringArray
2681        );
2682        Ok(())
2683    }
2684
2685    #[test]
2686    fn test_grouping_separator_compact_float() -> Result<()> {
2687        // %,g with large number — triggers scientific, no commas
2688        test_scalar_function!(
2689            FormatStringFunc::new(),
2690            vec![
2691                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2692                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2693            ],
2694            Ok(Some("1.23457e+06")),
2695            &str,
2696            Utf8,
2697            StringArray
2698        );
2699        // %,g with small number — triggers fixed-point, commas in integer part
2700        test_scalar_function!(
2701            FormatStringFunc::new(),
2702            vec![
2703                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2704                ColumnarValue::Scalar(ScalarValue::Float64(Some(12345.6))),
2705            ],
2706            Ok(Some("12,345.6")),
2707            &str,
2708            Utf8,
2709            StringArray
2710        );
2711        // %,.0g — precision 0 compact with grouping (large number, scientific)
2712        test_scalar_function!(
2713            FormatStringFunc::new(),
2714            vec![
2715                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2716                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2717            ],
2718            Ok(Some("1e+06")),
2719            &str,
2720            Utf8,
2721            StringArray
2722        );
2723        // %,G — uppercase compact
2724        test_scalar_function!(
2725            FormatStringFunc::new(),
2726            vec![
2727                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,G".to_string()))),
2728                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2729            ],
2730            Ok(Some("1.23457E+06")),
2731            &str,
2732            Utf8,
2733            StringArray
2734        );
2735        Ok(())
2736    }
2737
2738    #[test]
2739    fn test_grouping_separator_scientific_decimal() -> Result<()> {
2740        // %,e on decimal — Java/Spark reject grouping separator with scientific notation
2741        test_scalar_function!(
2742            FormatStringFunc::new(),
2743            vec![
2744                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2745                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2746            ],
2747            Err(DataFusionError::Execution(
2748                "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2749            )),
2750            &str,
2751            Utf8,
2752            StringArray
2753        );
2754        // %,.0e on decimal — also rejected
2755        test_scalar_function!(
2756            FormatStringFunc::new(),
2757            vec![
2758                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2759                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2760            ],
2761            Err(DataFusionError::Execution(
2762                "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2763            )),
2764            &str,
2765            Utf8,
2766            StringArray
2767        );
2768        Ok(())
2769    }
2770
2771    #[test]
2772    fn test_grouping_separator_compact_decimal() -> Result<()> {
2773        // %,g on decimal — large number triggers scientific, no commas
2774        test_scalar_function!(
2775            FormatStringFunc::new(),
2776            vec![
2777                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2778                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2779            ],
2780            Ok(Some("1.23457e+06")),
2781            &str,
2782            Utf8,
2783            StringArray
2784        );
2785        // %,g on decimal — small number triggers fixed-point, commas expected
2786        test_scalar_function!(
2787            FormatStringFunc::new(),
2788            vec![
2789                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2790                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(1234560), 10, 2)),
2791            ],
2792            Ok(Some("12,345.6")),
2793            &str,
2794            Utf8,
2795            StringArray
2796        );
2797        // %,.0g on decimal — precision 0 compact with grouping (scientific)
2798        test_scalar_function!(
2799            FormatStringFunc::new(),
2800            vec![
2801                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2802                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2803            ],
2804            Ok(Some("1e+06")),
2805            &str,
2806            Utf8,
2807            StringArray
2808        );
2809        Ok(())
2810    }
2811
2812    #[test]
2813    fn test_grouping_separator_width_sign_float() -> Result<()> {
2814        // %0,15.2f — zero-pad + grouping + width
2815        test_scalar_function!(
2816            FormatStringFunc::new(),
2817            vec![
2818                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
2819                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2820            ],
2821            Ok(Some("0001,234,567.89")),
2822            &str,
2823            Utf8,
2824            StringArray
2825        );
2826        // %+,15.2f — force-sign + grouping + width (space-padded)
2827        test_scalar_function!(
2828            FormatStringFunc::new(),
2829            vec![
2830                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
2831                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2832            ],
2833            Ok(Some("  +1,234,567.89")),
2834            &str,
2835            Utf8,
2836            StringArray
2837        );
2838        // %-,15.2f — left-adjust + grouping + width
2839        test_scalar_function!(
2840            FormatStringFunc::new(),
2841            vec![
2842                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%-,15.2f".to_string()))),
2843                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2844            ],
2845            Ok(Some("1,234,567.89   ")),
2846            &str,
2847            Utf8,
2848            StringArray
2849        );
2850        Ok(())
2851    }
2852
2853    #[test]
2854    fn test_grouping_separator_width_sign_decimal() -> Result<()> {
2855        // %0,15.2f — zero-pad + grouping + width on decimal
2856        test_scalar_function!(
2857            FormatStringFunc::new(),
2858            vec![
2859                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
2860                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2861            ],
2862            Ok(Some("0001,234,567.89")),
2863            &str,
2864            Utf8,
2865            StringArray
2866        );
2867        // %+,15.2f — force-sign + grouping + width on decimal
2868        test_scalar_function!(
2869            FormatStringFunc::new(),
2870            vec![
2871                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
2872                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2873            ],
2874            Ok(Some("  +1,234,567.89")),
2875            &str,
2876            Utf8,
2877            StringArray
2878        );
2879        Ok(())
2880    }
2881
2882    #[test]
2883    fn test_grouping_separator_parentheses_float() -> Result<()> {
2884        // %(,15.2f with negative — parentheses + grouping + width
2885        // Java: String.format("%(,15.2f", -1234.5) → "     (1,234.50)"
2886        test_scalar_function!(
2887            FormatStringFunc::new(),
2888            vec![
2889                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%(,15.2f".to_string()))),
2890                ColumnarValue::Scalar(ScalarValue::Float64(Some(-1234.5))),
2891            ],
2892            Ok(Some("     (1,234.50)")),
2893            &str,
2894            Utf8,
2895            StringArray
2896        );
2897        Ok(())
2898    }
2899
2900    #[test]
2901    fn test_grouping_separator_parentheses_decimal() -> Result<()> {
2902        // %(,15.2f on negative decimal — format_decimal ignores negative_in_parentheses,
2903        // always uses '-'. Check TODO in fn format_decimal
2904        // Java: String.format("%(,15.2f", -1234.5) → "     (1,234.50)"
2905        // Ours: "      -1,234.50" (minus sign, no parens)
2906        test_scalar_function!(
2907            FormatStringFunc::new(),
2908            vec![
2909                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%(,15.2f".to_string()))),
2910                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(-123450), 10, 2)),
2911            ],
2912            Ok(Some("      -1,234.50")),
2913            &str,
2914            Utf8,
2915            StringArray
2916        );
2917        Ok(())
2918    }
2919}
datafusion_spark/function/string/format_string.rs

datafusion_spark/function/string/
format_string.rs