datafusion_spark/function/string/
format_string.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::fmt::Write;
20use std::sync::Arc;
21
22use core::num::FpCategory;
23
24use arrow::{
25    array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
26    datatypes::DataType,
27};
28use bigdecimal::{
29    num_bigint::{BigInt, Sign},
30    BigDecimal, ToPrimitive,
31};
32use chrono::{DateTime, Datelike, Timelike, Utc};
33use datafusion_common::{
34    exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
35};
36use datafusion_expr::{
37    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
38    Volatility,
39};
40
41/// Spark-compatible `format_string` expression
42/// <https://spark.apache.org/docs/latest/api/sql/index.html#format_string>
43#[derive(Debug, PartialEq, Eq, Hash)]
44pub struct FormatStringFunc {
45    signature: Signature,
46    aliases: Vec<String>,
47}
48
49impl Default for FormatStringFunc {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55impl FormatStringFunc {
56    pub fn new() -> Self {
57        Self {
58            signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
59            aliases: vec![String::from("printf")],
60        }
61    }
62}
63
64impl ScalarUDFImpl for FormatStringFunc {
65    fn as_any(&self) -> &dyn Any {
66        self
67    }
68
69    fn name(&self) -> &str {
70        "format_string"
71    }
72
73    fn aliases(&self) -> &[String] {
74        &self.aliases
75    }
76
77    fn signature(&self) -> &Signature {
78        &self.signature
79    }
80
81    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
82        match arg_types[0] {
83            DataType::Null => Ok(DataType::Utf8),
84            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(arg_types[0].clone()),
85            _ => plan_err!("The format_string function expects the first argument to be Utf8, LargeUtf8 or Utf8View")
86        }
87    }
88
89    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
90        let len = args.args.iter().find_map(|arg| match arg {
91            ColumnarValue::Scalar(_) => None,
92            ColumnarValue::Array(a) => Some(a.len()),
93        });
94        let is_scalar = len.is_none();
95        let data_types = args.args[1..]
96            .iter()
97            .map(|arg| arg.data_type())
98            .collect::<Vec<_>>();
99        let fmt_type = args.args[0].data_type();
100
101        match &args.args[0] {
102            ColumnarValue::Scalar(ScalarValue::Null) => {
103                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
104            }
105            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {
106                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
107            }
108            ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {
109                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
110            }
111            ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
112                Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
113            }
114            ColumnarValue::Scalar(ScalarValue::Utf8(Some(fmt)))
115            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(fmt)))
116            | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(fmt))) => {
117                let formatter = Formatter::parse(fmt, &data_types)?;
118                let mut result = Vec::with_capacity(len.unwrap_or(1));
119                for i in 0..len.unwrap_or(1) {
120                    let scalars = args.args[1..]
121                        .iter()
122                        .map(|arg| try_to_scalar(arg.clone(), i))
123                        .collect::<Result<Vec<_>>>()?;
124                    let formatted = formatter.format(&scalars)?;
125                    result.push(formatted);
126                }
127                if is_scalar {
128                    let scalar_result = result.pop().unwrap();
129                    match fmt_type {
130                        DataType::Utf8 => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
131                            Some(scalar_result),
132                        ))),
133                        DataType::LargeUtf8 => Ok(ColumnarValue::Scalar(
134                            ScalarValue::LargeUtf8(Some(scalar_result)),
135                        )),
136                        DataType::Utf8View => Ok(ColumnarValue::Scalar(
137                            ScalarValue::Utf8View(Some(scalar_result)),
138                        )),
139                        _ => unreachable!(),
140                    }
141                } else {
142                    let array: ArrayRef = match fmt_type {
143                        DataType::Utf8 => Arc::new(StringArray::from(result)),
144                        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
145                        DataType::Utf8View => Arc::new(StringViewArray::from(result)),
146                        _ => unreachable!(),
147                    };
148                    Ok(ColumnarValue::Array(array))
149                }
150            }
151            ColumnarValue::Array(fmts) => {
152                let mut result = Vec::with_capacity(len.unwrap());
153                for i in 0..len.unwrap() {
154                    let fmt = ScalarValue::try_from_array(fmts, i)?;
155                    match fmt.try_as_str() {
156                        Some(Some(fmt)) => {
157                            let formatter = Formatter::parse(fmt, &data_types)?;
158                            let scalars = args.args[1..]
159                                .iter()
160                                .map(|arg| try_to_scalar(arg.clone(), i))
161                                .collect::<Result<Vec<_>>>()?;
162                            let formatted = formatter.format(&scalars)?;
163                            result.push(Some(formatted));
164                        }
165                        Some(None) => {
166                            result.push(None);
167                        }
168                        _ => unreachable!(),
169                    }
170                }
171                let array: ArrayRef = match fmt_type {
172                    DataType::Utf8 => Arc::new(StringArray::from(result)),
173                    DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
174                    DataType::Utf8View => Arc::new(StringViewArray::from(result)),
175                    _ => unreachable!(),
176                };
177                Ok(ColumnarValue::Array(array))
178            }
179            _ => exec_err!(
180                "The format_string function expects the first argument to be a string"
181            ),
182        }
183    }
184}
185
186fn try_to_scalar(arg: ColumnarValue, index: usize) -> Result<ScalarValue> {
187    match arg {
188        ColumnarValue::Scalar(scalar) => Ok(scalar),
189        ColumnarValue::Array(array) => ScalarValue::try_from_array(&array, index),
190    }
191}
192
193/// Compatible with `java.util.Formatter`
194#[derive(Debug)]
195pub struct Formatter<'a> {
196    pub elements: Vec<FormatElement<'a>>,
197    pub arg_num: usize,
198}
199
200impl<'a> Formatter<'a> {
201    pub fn new(elements: Vec<FormatElement<'a>>) -> Self {
202        let arg_num = elements
203            .iter()
204            .map(|element| match element {
205                FormatElement::Format(spec) => spec.argument_index,
206                _ => 0,
207            })
208            .max()
209            .unwrap_or(0);
210        Self { elements, arg_num }
211    }
212
213    /// Parses a printf-style format string into a Formatter with validation.
214    ///
215    /// This method implements a comprehensive parser for Java `java.util.Formatter` syntax,
216    /// processing the format string character by character to identify and validate format
217    /// specifiers against the provided argument types.
218    ///
219    /// # Arguments
220    ///
221    /// * `fmt` - The format string containing literal text and format specifiers
222    /// * `arg_types` - Array of DataFusion DataTypes corresponding to the arguments
223    ///
224    /// # Parsing Process
225    ///
226    /// The parser operates in several phases:
227    ///
228    /// 1. **String Scanning**: Iterates through the format string looking for '%' characters
229    ///    that mark the beginning of format specifiers or special sequences.
230    ///
231    /// 2. **Special Sequence Handling**: Processes escape sequences:
232    ///    - `%%` becomes a literal '%' character
233    ///    - `%n` becomes a newline character
234    ///    - `%<` indicates reuse of the previous argument with a new format specifier
235    ///
236    /// 3. **Argument Index Resolution**: Determines which argument each format specifier refers to:
237    ///    - Sequential indexing: arguments are consumed in order (1, 2, 3, ...)
238    ///    - Positional indexing: explicit argument position using `%n$` syntax
239    ///    - Previous argument reuse: `%<` references the last used argument
240    ///
241    /// 4. **Format Specifier Parsing**: For each format specifier, extracts:
242    ///    - Flags (-, +, space, #, 0, ',', '(')
243    ///    - Width specification (minimum field width)
244    ///    - Precision specification (decimal places or maximum characters)
245    ///    - Conversion type (d, s, f, x, etc.)
246    ///
247    /// 5. **Type Validation**: Verifies that each format specifier's conversion type
248    ///    is compatible with the corresponding argument's DataType. For example:
249    ///    - Integer conversions (%d, %x, %o) require integer DataTypes
250    ///    - String conversions (%s, %S) accept any DataType
251    ///    - Float conversions (%f, %e, %g) require numeric DataTypes
252    ///
253    /// 6. **Element Construction**: Creates FormatElement instances for:
254    ///    - Verbatim text sections (copied directly to output)
255    ///    - Validated format specifiers with their parsed parameters
256    ///
257    /// # Internal State Management
258    ///
259    /// The parser maintains several state variables:
260    /// - `argument_index`: Tracks the current sequential argument position
261    /// - `prev`: Remembers the last used argument index for `%<` references
262    /// - `res`: Accumulates the parsed FormatElement instances
263    /// - `rem`: Points to the remaining unparsed portion of the format string
264    ///
265    /// # Validation and Error Handling
266    ///
267    /// The parser performs extensive validation including:
268    /// - Argument index bounds checking against the provided arg_types array
269    /// - Format specifier syntax validation
270    /// - Type compatibility verification between conversion types and DataTypes
271    /// - Detection of malformed numeric parameters and invalid flag combinations
272    ///
273    /// # Returns
274    ///
275    /// Returns a Formatter containing the parsed elements and the maximum argument
276    /// index encountered, enabling efficient argument validation during formatting.
277    pub fn parse(fmt: &'a str, arg_types: &[DataType]) -> Result<Self> {
278        // find the first %
279        let mut res = Vec::new();
280
281        let mut rem = fmt;
282        let mut argument_index = 0;
283
284        let mut prev: Option<usize> = None;
285
286        while !rem.is_empty() {
287            if let Some((verbatim_prefix, rest)) = rem.split_once('%') {
288                if !verbatim_prefix.is_empty() {
289                    res.push(FormatElement::Verbatim(verbatim_prefix));
290                }
291                if let Some(rest) = rest.strip_prefix('%') {
292                    res.push(FormatElement::Verbatim("%"));
293                    rem = rest;
294                    continue;
295                }
296                if let Some(rest) = rest.strip_prefix('n') {
297                    res.push(FormatElement::Verbatim("\n"));
298                    rem = rest;
299                    continue;
300                }
301                if let Some(rest) = rest.strip_prefix('<') {
302                    // %< means reuse the previous argument
303                    let Some(p) = prev else {
304                        return exec_err!("No previous argument to reference");
305                    };
306                    let (spec, rest) =
307                        take_conversion_specifier(rest, p, arg_types[p - 1].clone())?;
308                    res.push(FormatElement::Format(spec));
309                    rem = rest;
310                    continue;
311                }
312
313                let (current_argument_index, rest2) = take_numeric_param(rest, false);
314                let (current_argument_index, rest) =
315                    match (current_argument_index, rest2.starts_with('$')) {
316                        (NumericParam::Literal(index), true) => {
317                            (index as usize, &rest2[1..])
318                        }
319                        (NumericParam::FromArgument, true) => {
320                            return exec_err!("Invalid numeric parameter")
321                        }
322                        (_, false) => {
323                            argument_index += 1;
324                            (argument_index, rest)
325                        }
326                    };
327                if current_argument_index == 0 || current_argument_index > arg_types.len()
328                {
329                    return exec_err!(
330                        "Argument index {} is out of bounds",
331                        current_argument_index
332                    );
333                }
334
335                let (spec, rest) = take_conversion_specifier(
336                    rest,
337                    current_argument_index,
338                    arg_types[current_argument_index - 1].clone(),
339                )
340                .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
341                res.push(FormatElement::Format(spec));
342                prev = Some(spec.argument_index);
343                rem = rest;
344            } else {
345                res.push(FormatElement::Verbatim(rem));
346                break;
347            }
348        }
349
350        Ok(Self::new(res))
351    }
352
353    pub fn format(&self, args: &[ScalarValue]) -> Result<String> {
354        if args.len() < self.arg_num {
355            return exec_err!(
356                "Expected at least {} arguments, got {}",
357                self.arg_num,
358                args.len()
359            );
360        }
361        let mut string = String::new();
362        for element in &self.elements {
363            match element {
364                FormatElement::Verbatim(text) => {
365                    string.push_str(text);
366                }
367                FormatElement::Format(spec) => {
368                    spec.format(&mut string, &args[spec.argument_index - 1])?;
369                }
370            }
371        }
372        Ok(string)
373    }
374}
375
376#[derive(Debug)]
377pub enum FormatElement<'a> {
378    /// Some characters that are copied to the output as-is
379    Verbatim(&'a str),
380    /// A format specifier
381    Format(ConversionSpecifier),
382}
383
384/// Parsed printf conversion specifier
385#[derive(Debug, Clone, Copy, PartialEq, Eq)]
386pub struct ConversionSpecifier {
387    pub argument_index: usize,
388    /// flag `#`: use `0x`, etc?
389    pub alt_form: bool,
390    /// flag `0`: left-pad with zeros?
391    pub zero_pad: bool,
392    /// flag `-`: left-adjust (pad with spaces on the right)
393    pub left_adj: bool,
394    /// flag `' '` (space): indicate sign with a space?
395    pub space_sign: bool,
396    /// flag `+`: Always show sign? (for signed numbers)
397    pub force_sign: bool,
398    /// flag `,`: include locale-specific grouping separators
399    pub grouping_separator: bool,
400    /// flag `(`: enclose negative numbers in parentheses
401    pub negative_in_parentheses: bool,
402    /// field width
403    pub width: NumericParam,
404    /// floating point field precision
405    pub precision: NumericParam,
406    /// data type
407    pub conversion_type: ConversionType,
408}
409
410/// Width / precision parameter
411#[derive(Debug, Clone, Copy, PartialEq, Eq)]
412pub enum NumericParam {
413    /// The literal width
414    Literal(i32),
415    /// Get the width from the previous argument
416    FromArgument,
417}
418
419/// Printf data type
420#[derive(Debug, Clone, Copy, PartialEq, Eq)]
421pub enum ConversionType {
422    /// `B`
423    BooleanUpper,
424    /// `b`
425    BooleanLower,
426    /// Not implemented yet. Can be implemented after <https://github.com/apache/datafusion/pull/17093> is merged
427    /// `h`
428    HexHashLower,
429    /// `H`
430    HexHashUpper,
431    /// `d`
432    DecInt,
433    /// `o`
434    OctInt,
435    /// `x`
436    HexIntLower,
437    /// `X`
438    HexIntUpper,
439    /// `e`
440    SciFloatLower,
441    /// `E`
442    SciFloatUpper,
443    /// `f`
444    DecFloatLower,
445    /// `g`
446    CompactFloatLower,
447    /// `G`
448    CompactFloatUpper,
449    /// `a`
450    HexFloatLower,
451    /// `A`
452    HexFloatUpper,
453    /// `t`
454    TimeLower(TimeFormat),
455    /// `T`
456    TimeUpper(TimeFormat),
457    /// `c`
458    CharLower,
459    /// `C`
460    CharUpper,
461    /// `s`
462    StringLower,
463    /// `S`
464    StringUpper,
465}
466
467#[derive(Debug, Clone, Copy, PartialEq, Eq)]
468pub enum TimeFormat {
469    // Hour of the day for the 24-hour clock,
470    // formatted as two digits with a leading zero as necessary i.e. 00 - 23. 00 corresponds to midnight.
471    HUpper,
472    // Hour for the 12-hour clock,
473    // formatted as two digits with a leading zero as necessary, i.e. 01 - 12. 01 corresponds to one o'clock (either morning or afternoon).
474    IUpper,
475    // Hour of the day for the 24-hour clock,
476    // i.e. 0 - 23. 0 corresponds to midnight.
477    KLower,
478    // Hour for the 12-hour clock,
479    // i.e. 1 - 12. 1 corresponds to one o'clock (either morning or afternoon).
480    LLower,
481    // Minute within the hour formatted as two digits with a leading zero as necessary, i.e. 00 - 59.
482    MUpper,
483    // Seconds within the minute, formatted as two digits with a leading zero as necessary,
484    // i.e. 00 - 60 ("60" is a special value required to support leap seconds).
485    SUpper,
486    // Millisecond within the second formatted as three digits with leading zeros as necessary, i.e. 000 - 999.
487    LUpper,
488    // Nanosecond within the second, formatted as nine digits with leading zeros as necessary,
489    // i.e. 000000000 - 999999999. The precision of this value is limited by the resolution of the underlying operating system or hardware.
490    NUpper,
491    // Locale-specific morning or afternoon marker in lower case, e.g."am" or "pm".
492    // Use of the conversion prefix 'T' forces this output to upper case. (Note that 'p' produces lower-case output.
493    // This is different from GNU date and POSIX strftime(3c) which produce upper-case output.)
494    PLower,
495    // RFC 822 style numeric time zone offset from GMT,
496    // e.g. -0800. This value will be adjusted as necessary for Daylight Saving Time.
497    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
498    ZLower,
499    // A string representing the abbreviation for the time zone. This value will be adjusted as necessary for Daylight Saving Time.
500    // For long, Long, and Date the time zone used is the default time zone for this instance of the Java virtual machine.
501    // The Formatter's locale will supersede the locale of the argument (if any).
502    ZUpper,
503    // Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
504    // i.e. Long.MIN_VALUE/1000 to Long.MAX_VALUE/1000.
505    SLower,
506    // Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC,
507    // i.e. Long.MIN_VALUE to Long.MAX_VALUE. The precision of this value is limited by the resolution of the underlying operating system or hardware.
508    QUpper,
509    // Locale-specific full month name, e.g. "January", "February".
510    BUpper,
511    // Locale-specific abbreviated month name, e.g. "Jan", "Feb".
512    BLower,
513    // Locale-specific full weekday name, e.g. "Monday", "Tuesday".
514    AUpper,
515    // Locale-specific abbreviated weekday name, e.g. "Mon", "Tue".
516    ALower,
517    // Four-digit year divided by 100, formatted as two digits with leading zero as necessary, i.e. 00 - 99
518    CUpper,
519    // Year, formatted to at least four digits with leading zeros as necessary, e.g. 0092 equals 92 CE for the Gregorian calendar.
520    YUpper,
521    // Last two digits of the year, formatted with leading zeros as necessary, i.e. 00 - 99.
522    YLower,
523    // Day of year, formatted as three digits with leading zeros as necessary, e.g. 001 - 366 for the Gregorian calendar. 001 corresponds to the first day of the year.
524    JLower,
525    // Month, formatted as two digits with leading zeros as necessary, i.e. 01 - 13, where "01" is the first month of the year and ("13" is a special value required to support lunar calendars).
526    MLower,
527    // Day of month, formatted as two digits with leading zeros as necessary, i.e. 01 - 31, where "01" is the first day of the month.
528    DLower,
529    // Day of month, formatted as two digits, i.e. 1 - 31 where "1" is the first day of the month.
530    ELower,
531    // Time formatted for the 24-hour clock as "%tH:%tM"
532    RUpper,
533    // Time formatted for the 24-hour clock as "%tH:%tM:%tS"
534    TUpper,
535    // Time formatted for the 12-hour clock as "%tI:%tM:%tS %Tp". The location of the morning or afternoon marker ('%Tp') may be locale-dependent.
536    RLower,
537    // Date formatted as "%tm/%td/%ty"
538    DUpper,
539    // ISO 8601 complete date formatted as "%tY-%tm-%td"
540    FUpper,
541    // Date and time formatted as "%ta %tb %td %tT %tZ %tY", e.g. "Sun Jul 20 16:17:00 EDT 1969"
542    CLower,
543}
544
545impl TryFrom<char> for TimeFormat {
546    type Error = DataFusionError;
547    fn try_from(value: char) -> Result<Self, Self::Error> {
548        match value {
549            'H' => Ok(TimeFormat::HUpper),
550            'I' => Ok(TimeFormat::IUpper),
551            'k' => Ok(TimeFormat::KLower),
552            'l' => Ok(TimeFormat::LLower),
553            'M' => Ok(TimeFormat::MUpper),
554            'S' => Ok(TimeFormat::SUpper),
555            'L' => Ok(TimeFormat::LUpper),
556            'N' => Ok(TimeFormat::NUpper),
557            'p' => Ok(TimeFormat::PLower),
558            'z' => Ok(TimeFormat::ZLower),
559            'Z' => Ok(TimeFormat::ZUpper),
560            's' => Ok(TimeFormat::SLower),
561            'Q' => Ok(TimeFormat::QUpper),
562            'B' => Ok(TimeFormat::BUpper),
563            'b' | 'h' => Ok(TimeFormat::BLower),
564            'A' => Ok(TimeFormat::AUpper),
565            'a' => Ok(TimeFormat::ALower),
566            'C' => Ok(TimeFormat::CUpper),
567            'Y' => Ok(TimeFormat::YUpper),
568            'y' => Ok(TimeFormat::YLower),
569            'j' => Ok(TimeFormat::JLower),
570            'm' => Ok(TimeFormat::MLower),
571            'd' => Ok(TimeFormat::DLower),
572            'e' => Ok(TimeFormat::ELower),
573            'R' => Ok(TimeFormat::RUpper),
574            'T' => Ok(TimeFormat::TUpper),
575            'r' => Ok(TimeFormat::RLower),
576            'D' => Ok(TimeFormat::DUpper),
577            'F' => Ok(TimeFormat::FUpper),
578            'c' => Ok(TimeFormat::CLower),
579            _ => exec_err!("Invalid time format: {}", value),
580        }
581    }
582}
583
584impl ConversionType {
585    pub fn validate(&self, arg_type: DataType) -> Result<()> {
586        match self {
587            ConversionType::BooleanLower | ConversionType::BooleanUpper => {
588                if !matches!(arg_type, DataType::Boolean) {
589                    return exec_err!(
590                        "Invalid argument type for boolean conversion: {:?}",
591                        arg_type
592                    );
593                }
594            }
595            ConversionType::CharLower | ConversionType::CharUpper => {
596                if !matches!(
597                    arg_type,
598                    DataType::Int8
599                        | DataType::UInt8
600                        | DataType::Int16
601                        | DataType::UInt16
602                        | DataType::Int32
603                        | DataType::UInt32
604                        | DataType::Int64
605                        | DataType::UInt64
606                ) {
607                    return exec_err!(
608                        "Invalid argument type for char conversion: {:?}",
609                        arg_type
610                    );
611                }
612            }
613            ConversionType::DecInt
614            | ConversionType::OctInt
615            | ConversionType::HexIntLower
616            | ConversionType::HexIntUpper => {
617                if !arg_type.is_integer() {
618                    return exec_err!(
619                        "Invalid argument type for integer conversion: {:?}",
620                        arg_type
621                    );
622                }
623            }
624            ConversionType::SciFloatLower
625            | ConversionType::SciFloatUpper
626            | ConversionType::DecFloatLower
627            | ConversionType::CompactFloatLower
628            | ConversionType::CompactFloatUpper
629            | ConversionType::HexFloatLower
630            | ConversionType::HexFloatUpper => {
631                if !arg_type.is_numeric() {
632                    return exec_err!(
633                        "Invalid argument type for float conversion: {:?}",
634                        arg_type
635                    );
636                }
637            }
638            ConversionType::TimeLower(_) | ConversionType::TimeUpper(_) => {
639                if !arg_type.is_temporal() {
640                    return exec_err!(
641                        "Invalid argument type for time conversion: {:?}",
642                        arg_type
643                    );
644                }
645            }
646            _ => {}
647        }
648        Ok(())
649    }
650
651    fn supports_integer(&self) -> bool {
652        matches!(
653            self,
654            ConversionType::DecInt
655                | ConversionType::HexIntLower
656                | ConversionType::HexIntUpper
657                | ConversionType::OctInt
658                | ConversionType::CharLower
659                | ConversionType::CharUpper
660                | ConversionType::StringLower
661                | ConversionType::StringUpper
662        )
663    }
664
665    fn supports_float(&self) -> bool {
666        matches!(
667            self,
668            ConversionType::DecFloatLower
669                | ConversionType::SciFloatLower
670                | ConversionType::SciFloatUpper
671                | ConversionType::CompactFloatLower
672                | ConversionType::CompactFloatUpper
673                | ConversionType::StringLower
674                | ConversionType::StringUpper
675                | ConversionType::HexFloatLower
676                | ConversionType::HexFloatUpper
677        )
678    }
679
680    fn supports_decimal(&self) -> bool {
681        matches!(
682            self,
683            ConversionType::DecFloatLower
684                | ConversionType::SciFloatLower
685                | ConversionType::SciFloatUpper
686                | ConversionType::CompactFloatLower
687                | ConversionType::CompactFloatUpper
688                | ConversionType::StringLower
689                | ConversionType::StringUpper
690        )
691    }
692
693    fn supports_time(&self) -> bool {
694        matches!(
695            self,
696            ConversionType::TimeLower(_)
697                | ConversionType::TimeUpper(_)
698                | ConversionType::StringLower
699                | ConversionType::StringUpper
700        )
701    }
702
703    fn is_upper(&self) -> bool {
704        matches!(
705            self,
706            ConversionType::BooleanUpper
707                | ConversionType::HexHashUpper
708                | ConversionType::HexIntUpper
709                | ConversionType::SciFloatUpper
710                | ConversionType::CompactFloatUpper
711                | ConversionType::HexFloatUpper
712                | ConversionType::TimeUpper(_)
713                | ConversionType::CharUpper
714                | ConversionType::StringUpper
715        )
716    }
717}
718
719fn take_conversion_specifier(
720    mut s: &str,
721    argument_index: usize,
722    arg_type: DataType,
723) -> Result<(ConversionSpecifier, &str)> {
724    let mut spec = ConversionSpecifier {
725        argument_index,
726        alt_form: false,
727        zero_pad: false,
728        left_adj: false,
729        space_sign: false,
730        force_sign: false,
731        grouping_separator: false,
732        negative_in_parentheses: false,
733        width: NumericParam::Literal(0),
734        precision: NumericParam::FromArgument, // Placeholder - must not be returned!
735        // ignore length modifier
736        conversion_type: ConversionType::DecInt,
737    };
738
739    // parse flags
740    loop {
741        match s.chars().next() {
742            Some('#') => {
743                spec.alt_form = true;
744            }
745            Some('0') => {
746                if spec.left_adj {
747                    return exec_err!("Invalid flag combination: '0' and '-'");
748                }
749                spec.zero_pad = true;
750            }
751            Some('-') => {
752                spec.left_adj = true;
753            }
754            Some(' ') => {
755                if spec.force_sign {
756                    return exec_err!("Invalid flag combination: '+' and ' '");
757                }
758                spec.space_sign = true;
759            }
760            Some('+') => {
761                if spec.space_sign {
762                    return exec_err!("Invalid flag combination: '+' and ' '");
763                }
764                spec.force_sign = true;
765            }
766            Some(',') => {
767                spec.grouping_separator = true;
768            }
769            Some('(') => {
770                spec.negative_in_parentheses = true;
771            }
772            _ => {
773                break;
774            }
775        }
776        s = &s[1..];
777    }
778    // parse width
779    let (w, mut s) = take_numeric_param(s, false);
780    spec.width = w;
781    // parse precision
782    if matches!(s.chars().next(), Some('.')) {
783        s = &s[1..];
784        let (p, s2) = take_numeric_param(s, true);
785        spec.precision = p;
786        s = s2;
787    }
788    let mut chars = s.chars();
789    let mut offset = 1;
790    // parse conversion type
791    spec.conversion_type = match chars.next() {
792        Some('b') => ConversionType::BooleanLower,
793        Some('B') => ConversionType::BooleanUpper,
794        Some('h') => ConversionType::HexHashLower,
795        Some('H') => ConversionType::HexHashUpper,
796        Some('s') => ConversionType::StringLower,
797        Some('S') => ConversionType::StringUpper,
798        Some('c') => ConversionType::CharLower,
799        Some('C') => ConversionType::CharUpper,
800        Some('d') => ConversionType::DecInt,
801        Some('o') => ConversionType::OctInt,
802        Some('x') => ConversionType::HexIntLower,
803        Some('X') => ConversionType::HexIntUpper,
804        Some('e') => ConversionType::SciFloatLower,
805        Some('E') => ConversionType::SciFloatUpper,
806        Some('f') => ConversionType::DecFloatLower,
807        Some('g') => ConversionType::CompactFloatLower,
808        Some('G') => ConversionType::CompactFloatUpper,
809        Some('a') => ConversionType::HexFloatLower,
810        Some('A') => ConversionType::HexFloatUpper,
811        Some('t') => {
812            let Some(chr) = chars.next() else {
813                return exec_err!("Invalid time format: {}", s);
814            };
815            offset += 1;
816            ConversionType::TimeLower(chr.try_into()?)
817        }
818        Some('T') => {
819            let Some(chr) = chars.next() else {
820                return exec_err!("Invalid time format: {}", s);
821            };
822            offset += 1;
823            ConversionType::TimeUpper(chr.try_into()?)
824        }
825        chr => {
826            return plan_err!("Invalid conversion type: {:?}", chr);
827        }
828    };
829
830    spec.conversion_type.validate(arg_type)?;
831    Ok((spec, &s[offset..]))
832}
833
834fn take_numeric_param(s: &str, zero: bool) -> (NumericParam, &str) {
835    match s.chars().next() {
836        Some(digit) if (if zero { '0'..='9' } else { '1'..='9' }).contains(&digit) => {
837            let mut s = s;
838            let mut w = 0;
839            loop {
840                match s.chars().next() {
841                    Some(digit) if digit.is_ascii_digit() => {
842                        w = 10 * w + (digit as i32 - '0' as i32);
843                    }
844                    _ => {
845                        break;
846                    }
847                }
848                s = &s[1..];
849            }
850            (NumericParam::Literal(w), s)
851        }
852        _ => (NumericParam::FromArgument, s),
853    }
854}
855
856impl ConversionSpecifier {
857    pub fn format(&self, string: &mut String, value: &ScalarValue) -> Result<()> {
858        match value {
859            ScalarValue::Boolean(value) => match self.conversion_type {
860                ConversionType::StringLower | ConversionType::StringUpper => {
861                    self.format_string(string, &value.unwrap_or(false).to_string())
862                }
863
864                _ => self.format_boolean(string, value),
865            },
866            ScalarValue::Int8(value) => match (self.conversion_type, value) {
867                (ConversionType::DecInt, Some(value)) => {
868                    self.format_signed(string, *value as i64)
869                }
870                (
871                    ConversionType::HexIntLower
872                    | ConversionType::HexIntUpper
873                    | ConversionType::OctInt,
874                    Some(value),
875                ) => self.format_unsigned(string, (*value as u8) as u64),
876                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
877                    self.format_char(string, *value as u8 as char)
878                }
879                (
880                    ConversionType::StringLower | ConversionType::StringUpper,
881                    Some(value),
882                ) => self.format_string(string, &value.to_string()),
883                (t, None) if t.supports_integer() => self.format_string(string, "null"),
884                _ => {
885                    exec_err!(
886                        "Invalid conversion type: {:?} for Int8",
887                        self.conversion_type
888                    )
889                }
890            },
891            ScalarValue::Int16(value) => match (self.conversion_type, value) {
892                (ConversionType::DecInt, Some(value)) => {
893                    self.format_signed(string, *value as i64)
894                }
895                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
896                    self.format_char(
897                        string,
898                        char::from_u32((*value as u16) as u32).unwrap(),
899                    )
900                }
901                (
902                    ConversionType::HexIntLower
903                    | ConversionType::HexIntUpper
904                    | ConversionType::OctInt,
905                    Some(value),
906                ) => self.format_unsigned(string, (*value as u16) as u64),
907                (
908                    ConversionType::StringLower | ConversionType::StringUpper,
909                    Some(value),
910                ) => self.format_string(string, &value.to_string()),
911                (t, None) if t.supports_integer() => self.format_string(string, "null"),
912                _ => {
913                    exec_err!(
914                        "Invalid conversion type: {:?} for Int16",
915                        self.conversion_type
916                    )
917                }
918            },
919            ScalarValue::Int32(value) => match (self.conversion_type, value) {
920                (ConversionType::DecInt, Some(value)) => {
921                    self.format_signed(string, *value as i64)
922                }
923                (
924                    ConversionType::HexIntLower
925                    | ConversionType::HexIntUpper
926                    | ConversionType::OctInt,
927                    Some(value),
928                ) => self.format_unsigned(string, (*value as u32) as u64),
929                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
930                    self.format_char(string, char::from_u32(*value as u32).unwrap())
931                }
932                (
933                    ConversionType::StringLower | ConversionType::StringUpper,
934                    Some(value),
935                ) => self.format_string(string, &value.to_string()),
936                (t, None) if t.supports_integer() => self.format_string(string, "null"),
937                _ => {
938                    exec_err!(
939                        "Invalid conversion type: {:?} for Int32",
940                        self.conversion_type
941                    )
942                }
943            },
944            ScalarValue::Int64(value) => match (self.conversion_type, value) {
945                (ConversionType::DecInt, Some(value)) => {
946                    self.format_signed(string, *value)
947                }
948                (
949                    ConversionType::HexIntLower
950                    | ConversionType::HexIntUpper
951                    | ConversionType::OctInt,
952                    Some(value),
953                ) => self.format_unsigned(string, *value as u64),
954                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
955                    self.format_char(
956                        string,
957                        char::from_u32((*value as u64) as u32).unwrap(),
958                    )
959                }
960                (
961                    ConversionType::StringLower | ConversionType::StringUpper,
962                    Some(value),
963                ) => self.format_string(string, &value.to_string()),
964                (t, None) if t.supports_integer() => self.format_string(string, "null"),
965                _ => {
966                    exec_err!(
967                        "Invalid conversion type: {:?} for Int64",
968                        self.conversion_type
969                    )
970                }
971            },
972            ScalarValue::UInt8(value) => match (self.conversion_type, value) {
973                (
974                    ConversionType::DecInt
975                    | ConversionType::HexIntLower
976                    | ConversionType::HexIntUpper
977                    | ConversionType::OctInt,
978                    Some(value),
979                ) => self.format_unsigned(string, *value as u64),
980                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
981                    self.format_char(string, *value as char)
982                }
983                (
984                    ConversionType::StringLower | ConversionType::StringUpper,
985                    Some(value),
986                ) => self.format_string(string, &value.to_string()),
987                (t, None) if t.supports_integer() => self.format_string(string, "null"),
988                _ => {
989                    exec_err!(
990                        "Invalid conversion type: {:?} for UInt8",
991                        self.conversion_type
992                    )
993                }
994            },
995            ScalarValue::UInt16(value) => match (self.conversion_type, value) {
996                (
997                    ConversionType::DecInt
998                    | ConversionType::HexIntLower
999                    | ConversionType::HexIntUpper
1000                    | ConversionType::OctInt,
1001                    Some(value),
1002                ) => self.format_unsigned(string, *value as u64),
1003                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1004                    self.format_char(string, char::from_u32(*value as u32).unwrap())
1005                }
1006                (
1007                    ConversionType::StringLower | ConversionType::StringUpper,
1008                    Some(value),
1009                ) => self.format_string(string, &value.to_string()),
1010                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1011                _ => {
1012                    exec_err!(
1013                        "Invalid conversion type: {:?} for UInt16",
1014                        self.conversion_type
1015                    )
1016                }
1017            },
1018            ScalarValue::UInt32(value) => match (self.conversion_type, value) {
1019                (
1020                    ConversionType::DecInt
1021                    | ConversionType::HexIntLower
1022                    | ConversionType::HexIntUpper
1023                    | ConversionType::OctInt,
1024                    Some(value),
1025                ) => self.format_unsigned(string, *value as u64),
1026                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1027                    self.format_char(string, char::from_u32(*value).unwrap())
1028                }
1029                (
1030                    ConversionType::StringLower | ConversionType::StringUpper,
1031                    Some(value),
1032                ) => self.format_string(string, &value.to_string()),
1033                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1034                _ => {
1035                    exec_err!(
1036                        "Invalid conversion type: {:?} for UInt32",
1037                        self.conversion_type
1038                    )
1039                }
1040            },
1041            ScalarValue::UInt64(value) => match (self.conversion_type, value) {
1042                (
1043                    ConversionType::DecInt
1044                    | ConversionType::HexIntLower
1045                    | ConversionType::HexIntUpper
1046                    | ConversionType::OctInt,
1047                    Some(value),
1048                ) => self.format_unsigned(string, *value),
1049                (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1050                    self.format_char(string, char::from_u32(*value as u32).unwrap())
1051                }
1052                (
1053                    ConversionType::StringLower | ConversionType::StringUpper,
1054                    Some(value),
1055                ) => self.format_string(string, &value.to_string()),
1056                (t, None) if t.supports_integer() => self.format_string(string, "null"),
1057                _ => {
1058                    exec_err!(
1059                        "Invalid conversion type: {:?} for UInt64",
1060                        self.conversion_type
1061                    )
1062                }
1063            },
1064            ScalarValue::Float16(value) => match (self.conversion_type, value) {
1065                (
1066                    ConversionType::DecFloatLower
1067                    | ConversionType::SciFloatLower
1068                    | ConversionType::SciFloatUpper
1069                    | ConversionType::CompactFloatLower
1070                    | ConversionType::CompactFloatUpper,
1071                    Some(value),
1072                ) => self.format_float(string, value.to_f64().unwrap()),
1073                (
1074                    ConversionType::StringLower | ConversionType::StringUpper,
1075                    Some(value),
1076                ) => self.format_string(string, &value.to_f32().unwrap().spark_string()),
1077                (
1078                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1079                    Some(value),
1080                ) => self.format_hex_float(string, value.to_f64().unwrap()),
1081                (t, None) if t.supports_float() => self.format_string(string, "null"),
1082                _ => {
1083                    exec_err!(
1084                        "Invalid conversion type: {:?} for Float16",
1085                        self.conversion_type
1086                    )
1087                }
1088            },
1089            ScalarValue::Float32(value) => match (self.conversion_type, value) {
1090                (
1091                    ConversionType::DecFloatLower
1092                    | ConversionType::SciFloatLower
1093                    | ConversionType::SciFloatUpper
1094                    | ConversionType::CompactFloatLower
1095                    | ConversionType::CompactFloatUpper,
1096                    Some(value),
1097                ) => self.format_float(string, *value as f64),
1098                (
1099                    ConversionType::StringLower | ConversionType::StringUpper,
1100                    Some(value),
1101                ) => self.format_string(string, &value.spark_string()),
1102                (
1103                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1104                    Some(value),
1105                ) => self.format_hex_float(string, *value as f64),
1106                (t, None) if t.supports_float() => self.format_string(string, "null"),
1107                _ => {
1108                    exec_err!(
1109                        "Invalid conversion type: {:?} for Float32",
1110                        self.conversion_type
1111                    )
1112                }
1113            },
1114            ScalarValue::Float64(value) => match (self.conversion_type, value) {
1115                (
1116                    ConversionType::DecFloatLower
1117                    | ConversionType::SciFloatLower
1118                    | ConversionType::SciFloatUpper
1119                    | ConversionType::CompactFloatLower
1120                    | ConversionType::CompactFloatUpper,
1121                    Some(value),
1122                ) => self.format_float(string, *value),
1123                (
1124                    ConversionType::StringLower | ConversionType::StringUpper,
1125                    Some(value),
1126                ) => self.format_string(string, &value.spark_string()),
1127                (
1128                    ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1129                    Some(value),
1130                ) => self.format_hex_float(string, *value),
1131                (t, None) if t.supports_float() => self.format_string(string, "null"),
1132                _ => {
1133                    exec_err!(
1134                        "Invalid conversion type: {:?} for Float64",
1135                        self.conversion_type
1136                    )
1137                }
1138            },
1139            ScalarValue::Utf8(value) => {
1140                let value: &str = match value {
1141                    Some(value) => value.as_str(),
1142                    None => "null",
1143                };
1144                if matches!(
1145                    self.conversion_type,
1146                    ConversionType::StringLower | ConversionType::StringUpper
1147                ) {
1148                    self.format_string(string, value)
1149                } else {
1150                    exec_err!(
1151                        "Invalid conversion type: {:?} for Utf8",
1152                        self.conversion_type
1153                    )
1154                }
1155            }
1156            ScalarValue::LargeUtf8(value) => {
1157                let value: &str = match value {
1158                    Some(value) => value.as_str(),
1159                    None => "null",
1160                };
1161                if matches!(
1162                    self.conversion_type,
1163                    ConversionType::StringLower | ConversionType::StringUpper
1164                ) {
1165                    self.format_string(string, value)
1166                } else {
1167                    exec_err!(
1168                        "Invalid conversion type: {:?} for LargeUtf8",
1169                        self.conversion_type
1170                    )
1171                }
1172            }
1173            ScalarValue::Utf8View(value) => {
1174                let value: &str = match value {
1175                    Some(value) => value.as_str(),
1176                    None => "null",
1177                };
1178                self.format_string(string, value)
1179            }
1180            ScalarValue::Decimal128(value, _, scale) => {
1181                match (self.conversion_type, value) {
1182                    (
1183                        ConversionType::DecFloatLower
1184                        | ConversionType::SciFloatLower
1185                        | ConversionType::SciFloatUpper
1186                        | ConversionType::CompactFloatLower
1187                        | ConversionType::CompactFloatUpper,
1188                        Some(value),
1189                    ) => self.format_decimal(string, value.to_string(), *scale as i64),
1190                    (
1191                        ConversionType::StringLower | ConversionType::StringUpper,
1192                        Some(value),
1193                    ) => self.format_string(string, &value.to_string()),
1194                    (t, None) if t.supports_decimal() => {
1195                        self.format_string(string, "null")
1196                    }
1197
1198                    _ => {
1199                        exec_err!(
1200                            "Invalid conversion type: {:?} for Decimal128",
1201                            self.conversion_type
1202                        )
1203                    }
1204                }
1205            }
1206            ScalarValue::Decimal256(value, _, scale) => {
1207                match (self.conversion_type, value) {
1208                    (
1209                        ConversionType::DecFloatLower
1210                        | ConversionType::SciFloatLower
1211                        | ConversionType::SciFloatUpper
1212                        | ConversionType::CompactFloatLower
1213                        | ConversionType::CompactFloatUpper,
1214                        Some(value),
1215                    ) => self.format_decimal(string, value.to_string(), *scale as i64),
1216                    (
1217                        ConversionType::StringLower | ConversionType::StringUpper,
1218                        Some(value),
1219                    ) => self.format_string(string, &value.to_string()),
1220                    (t, None) if t.supports_decimal() => {
1221                        self.format_string(string, "null")
1222                    }
1223
1224                    _ => {
1225                        exec_err!(
1226                            "Invalid conversion type: {:?} for Decimal256",
1227                            self.conversion_type
1228                        )
1229                    }
1230                }
1231            }
1232
1233            ScalarValue::Time32Second(value) => match (self.conversion_type, value) {
1234                (
1235                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1236                    Some(value),
1237                ) => self.format_time(string, *value as i64 * 1000000000, &None),
1238                (
1239                    ConversionType::StringLower | ConversionType::StringUpper,
1240                    Some(value),
1241                ) => self.format_string(string, &value.to_string()),
1242                (t, None) if t.supports_time() => self.format_string(string, "null"),
1243                _ => {
1244                    exec_err!(
1245                        "Invalid conversion type: {:?} for Time32Second",
1246                        self.conversion_type
1247                    )
1248                }
1249            },
1250            ScalarValue::Time32Millisecond(value) => {
1251                match (self.conversion_type, value) {
1252                    (
1253                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1254                        Some(value),
1255                    ) => self.format_time(string, *value as i64 * 1000000, &None),
1256                    (
1257                        ConversionType::StringLower | ConversionType::StringUpper,
1258                        Some(value),
1259                    ) => self.format_string(string, &value.to_string()),
1260                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1261                    _ => {
1262                        exec_err!(
1263                            "Invalid conversion type: {:?} for Time32Millisecond",
1264                            self.conversion_type
1265                        )
1266                    }
1267                }
1268            }
1269            ScalarValue::Time64Microsecond(value) => {
1270                match (self.conversion_type, value) {
1271                    (
1272                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1273                        Some(value),
1274                    ) => self.format_time(string, *value * 1000, &None),
1275                    (
1276                        ConversionType::StringLower | ConversionType::StringUpper,
1277                        Some(value),
1278                    ) => self.format_string(string, &value.to_string()),
1279                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1280                    _ => {
1281                        exec_err!(
1282                            "Invalid conversion type: {:?} for Time64Microsecond",
1283                            self.conversion_type
1284                        )
1285                    }
1286                }
1287            }
1288            ScalarValue::Time64Nanosecond(value) => match (self.conversion_type, value) {
1289                (
1290                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1291                    Some(value),
1292                ) => self.format_time(string, *value, &None),
1293                (
1294                    ConversionType::StringLower | ConversionType::StringUpper,
1295                    Some(value),
1296                ) => self.format_string(string, &value.to_string()),
1297                (t, None) if t.supports_time() => self.format_string(string, "null"),
1298                _ => {
1299                    exec_err!(
1300                        "Invalid conversion type: {:?} for Time64Nanosecond",
1301                        self.conversion_type
1302                    )
1303                }
1304            },
1305            ScalarValue::TimestampSecond(value, zone) => {
1306                match (self.conversion_type, value) {
1307                    (
1308                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1309                        Some(value),
1310                    ) => self.format_time(string, value * 1000000000, zone),
1311                    (
1312                        ConversionType::StringLower | ConversionType::StringUpper,
1313                        Some(value),
1314                    ) => self.format_string(string, &value.to_string()),
1315                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1316                    _ => {
1317                        exec_err!(
1318                            "Invalid conversion type: {:?} for TimestampSecond",
1319                            self.conversion_type
1320                        )
1321                    }
1322                }
1323            }
1324            ScalarValue::TimestampMillisecond(value, zone) => {
1325                match (self.conversion_type, value) {
1326                    (
1327                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1328                        Some(value),
1329                    ) => self.format_time(string, *value * 1000000, zone),
1330                    (
1331                        ConversionType::StringLower | ConversionType::StringUpper,
1332                        Some(value),
1333                    ) => self.format_string(string, &value.to_string()),
1334
1335                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1336                    _ => {
1337                        exec_err!(
1338                            "Invalid conversion type: {:?} for TimestampMillisecond",
1339                            self.conversion_type
1340                        )
1341                    }
1342                }
1343            }
1344            ScalarValue::TimestampMicrosecond(value, zone) => {
1345                match (self.conversion_type, value) {
1346                    (
1347                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1348                        Some(value),
1349                    ) => self.format_time(string, value * 1000, zone),
1350                    (
1351                        ConversionType::StringLower | ConversionType::StringUpper,
1352                        Some(value),
1353                    ) => self.format_string(string, &value.to_string()),
1354                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1355                    _ => {
1356                        exec_err!(
1357                            "Invalid conversion type: {:?} for timestampmicrosecond",
1358                            self.conversion_type
1359                        )
1360                    }
1361                }
1362            }
1363
1364            ScalarValue::TimestampNanosecond(value, zone) => {
1365                match (self.conversion_type, value) {
1366                    (
1367                        ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1368                        Some(value),
1369                    ) => self.format_time(string, *value, zone),
1370                    (
1371                        ConversionType::StringLower | ConversionType::StringUpper,
1372                        Some(value),
1373                    ) => self.format_string(string, &value.to_string()),
1374                    (t, None) if t.supports_time() => self.format_string(string, "null"),
1375                    _ => {
1376                        exec_err!(
1377                            "Invalid conversion type: {:?} for TimestampNanosecond",
1378                            self.conversion_type
1379                        )
1380                    }
1381                }
1382            }
1383            ScalarValue::Date32(value) => match (self.conversion_type, value) {
1384                (
1385                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1386                    Some(value),
1387                ) => self.format_date(string, *value as i64),
1388                (
1389                    ConversionType::StringLower | ConversionType::StringUpper,
1390                    Some(value),
1391                ) => self.format_string(string, &value.to_string()),
1392                (t, None) if t.supports_time() => self.format_string(string, "null"),
1393                _ => {
1394                    exec_err!(
1395                        "Invalid conversion type: {:?} for Date32",
1396                        self.conversion_type
1397                    )
1398                }
1399            },
1400            ScalarValue::Date64(value) => match (self.conversion_type, value) {
1401                (
1402                    ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1403                    Some(value),
1404                ) => self.format_date(string, *value),
1405                (
1406                    ConversionType::StringLower | ConversionType::StringUpper,
1407                    Some(value),
1408                ) => self.format_string(string, &value.to_string()),
1409                (t, None) if t.supports_time() => self.format_string(string, "null"),
1410                _ => {
1411                    exec_err!(
1412                        "Invalid conversion type: {:?} for Date64",
1413                        self.conversion_type
1414                    )
1415                }
1416            },
1417            ScalarValue::Null => {
1418                let value = "null".to_string();
1419                self.format_string(string, &value)
1420            }
1421            _ => exec_err!("Invalid scalar value: {:?}", value),
1422        }
1423    }
1424
1425    fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
1426        // Handle special cases first
1427        let (sign, raw_exponent, mantissa) = value.to_parts();
1428        let is_subnormal = raw_exponent == 0;
1429
1430        let precision = match self.precision {
1431            NumericParam::FromArgument => None,
1432            NumericParam::Literal(p) => Some(p),
1433        };
1434
1435        // Determine if we need to normalize subnormal numbers
1436        // Only normalize when precision is specified and less than full mantissa width
1437        let mantissa_hex_digits = f64::MANTISSA_BITS.div_ceil(4); // 13 for f64
1438        let should_normalize = is_subnormal
1439            && precision.is_some()
1440            && precision.unwrap() < mantissa_hex_digits as i32;
1441
1442        let (value, raw_exponent, mantissa) = if should_normalize {
1443            let value = value * f64::SCALEUP;
1444            let (_, raw_exponent, mantissa) = value.to_parts();
1445            (value, raw_exponent, mantissa)
1446        } else {
1447            (value, raw_exponent, mantissa)
1448        };
1449
1450        let mut temp = String::new();
1451
1452        let sign_char = if sign {
1453            "-"
1454        } else if self.force_sign {
1455            "+"
1456        } else if self.space_sign {
1457            " "
1458        } else {
1459            ""
1460        };
1461        match value.category() {
1462            FpCategory::Nan => {
1463                write!(&mut temp, "NaN")?;
1464            }
1465            FpCategory::Infinite => {
1466                write!(&mut temp, "{sign_char}Infinity")?;
1467            }
1468            FpCategory::Zero => {
1469                write!(&mut temp, "{sign_char}0x0.0p0")?;
1470            }
1471            _ => {
1472                let bias = i32::from(f64::EXPONENT_BIAS);
1473                // Calculate actual exponent
1474                // For subnormal numbers, the exponent is 1 - bias (not 0 - bias)
1475                let exponent = if is_subnormal && !should_normalize {
1476                    1 - bias
1477                } else {
1478                    raw_exponent as i32 - bias
1479                };
1480
1481                // Handle precision for rounding
1482                let final_mantissa = if let Some(p) = precision {
1483                    if p == 0 {
1484                        // For precision 0, we still need at least 1 hex digit
1485                        // Round to the nearest integer mantissa value
1486                        let shift_distance = f64::MANTISSA_BITS as i32 - 4; // Keep 1 hex digit (4 bits)
1487                        let shifted = mantissa >> shift_distance;
1488                        let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1489                        let round_bit = 1u64 << (shift_distance - 1);
1490
1491                        // Round to nearest, ties to even
1492                        if rounding_bits > round_bit
1493                            || (rounding_bits == round_bit && (shifted & 1) != 0)
1494                        {
1495                            (shifted + 1) << shift_distance
1496                        } else {
1497                            shifted << shift_distance
1498                        }
1499                    } else {
1500                        // Apply rounding based on precision
1501                        let precision_bits = p * 4; // Each hex digit is 4 bits
1502                        let keep_bits = f64::MANTISSA_BITS as i32;
1503                        let shift_distance = keep_bits - precision_bits;
1504
1505                        if shift_distance > 0 {
1506                            let shifted = mantissa >> shift_distance;
1507                            let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1508                            let round_bit = 1u64 << (shift_distance - 1);
1509
1510                            // Round to nearest, ties to even
1511                            if rounding_bits > round_bit
1512                                || (rounding_bits == round_bit && (shifted & 1) != 0)
1513                            {
1514                                (shifted + 1) << shift_distance
1515                            } else {
1516                                shifted << shift_distance
1517                            }
1518                        } else {
1519                            mantissa
1520                        }
1521                    }
1522                } else {
1523                    mantissa
1524                };
1525
1526                if is_subnormal && !should_normalize {
1527                    // Original subnormal format: 0x0.xxxp-1022
1528                    if precision.is_some() {
1529                        // precision >= 13, show as subnormal
1530                        let full_hex = format!(
1531                            "{:0width$x}",
1532                            final_mantissa,
1533                            width = mantissa_hex_digits as usize
1534                        );
1535                        write!(&mut temp, "{sign_char}0x0.{full_hex}p{exponent}")?;
1536                    } else {
1537                        // No precision specified, show full subnormal
1538                        let hex_digits = format!(
1539                            "{:0width$x}",
1540                            final_mantissa,
1541                            width = mantissa_hex_digits as usize
1542                        );
1543                        write!(&mut temp, "{sign_char}0x0.{hex_digits}p{exponent}")?;
1544                    }
1545                } else {
1546                    // Normal format or normalized subnormal: 0x1.xxxpN
1547                    if let Some(p) = precision {
1548                        let p = if p == 0 { 1 } else { p };
1549                        let hex_digits = format!("{final_mantissa:x}");
1550                        let formatted_digits = if p as usize >= hex_digits.len() {
1551                            // Pad with zeros to match precision
1552                            format!("{:0<width$}", hex_digits, width = p as usize)
1553                        } else {
1554                            hex_digits[..p as usize].to_string()
1555                        };
1556                        write!(
1557                            &mut temp,
1558                            "{sign_char}0x1.{formatted_digits}p{exponent}"
1559                        )?;
1560                    } else {
1561                        // Default: show all significant digits
1562                        let mut hex_digits = format!("{final_mantissa:x}");
1563                        hex_digits = trim_trailing_0s_hex(&hex_digits).to_owned();
1564                        if hex_digits.is_empty() {
1565                            write!(&mut temp, "{sign_char}0x1.0p{exponent}")?;
1566                        } else {
1567                            write!(&mut temp, "{sign_char}0x1.{hex_digits}p{exponent}")?;
1568                        }
1569                    }
1570                }
1571                if should_normalize {
1572                    let (prefix, exp) = temp.split_once('p').unwrap();
1573                    let iexp = exp.parse::<i32>().unwrap() - f64::SCALEUP_POWER as i32;
1574                    temp = format!("{prefix}p{iexp}");
1575                }
1576            }
1577        };
1578
1579        if self.conversion_type.is_upper() {
1580            temp = temp.to_ascii_uppercase();
1581        }
1582
1583        let NumericParam::Literal(width) = self.width else {
1584            writer.push_str(&temp);
1585            return Ok(());
1586        };
1587        if self.left_adj {
1588            writer.push_str(&temp);
1589            for _ in temp.len()..width as usize {
1590                writer.push(' ');
1591            }
1592        } else if self.zero_pad && value.is_finite() {
1593            let delimiter = if self.conversion_type.is_upper() {
1594                "0X"
1595            } else {
1596                "0x"
1597            };
1598            let (prefix, suffix) = temp.split_once(delimiter).unwrap();
1599            writer.push_str(prefix);
1600            writer.push_str(delimiter);
1601            for _ in temp.len()..width as usize {
1602                writer.push('0');
1603            }
1604            writer.push_str(suffix);
1605        } else {
1606            while temp.len() < width as usize {
1607                temp = " ".to_owned() + &temp;
1608            }
1609            writer.push_str(&temp);
1610        };
1611        Ok(())
1612    }
1613
1614    fn format_char(&self, writer: &mut String, value: char) -> Result<()> {
1615        let upper = self.conversion_type.is_upper();
1616        match self.conversion_type {
1617            ConversionType::CharLower | ConversionType::CharUpper => {
1618                let NumericParam::Literal(width) = self.width else {
1619                    if upper {
1620                        writer.push(value.to_ascii_uppercase());
1621                    } else {
1622                        writer.push(value);
1623                    }
1624                    return Ok(());
1625                };
1626
1627                let start_len = writer.len();
1628                if self.left_adj {
1629                    if upper {
1630                        writer.push(value.to_ascii_uppercase());
1631                    } else {
1632                        writer.push(value);
1633                    }
1634                    while writer.len() - start_len < width as usize {
1635                        writer.push(' ');
1636                    }
1637                } else {
1638                    while writer.len() - start_len + value.len_utf8() < width as usize {
1639                        writer.push(' ');
1640                    }
1641                    if upper {
1642                        writer.push(value.to_ascii_uppercase());
1643                    } else {
1644                        writer.push(value);
1645                    }
1646                }
1647                Ok(())
1648            }
1649            _ => exec_err!(
1650                "Invalid conversion type: {:?} for char",
1651                self.conversion_type
1652            ),
1653        }
1654    }
1655
1656    fn format_boolean(&self, writer: &mut String, value: &Option<bool>) -> Result<()> {
1657        let value = value.unwrap_or(false);
1658
1659        let formatted = match self.conversion_type {
1660            ConversionType::BooleanUpper => {
1661                if value {
1662                    "TRUE"
1663                } else {
1664                    "FALSE"
1665                }
1666            }
1667            ConversionType::BooleanLower => {
1668                if value {
1669                    "true"
1670                } else {
1671                    "false"
1672                }
1673            }
1674            _ => {
1675                return exec_err!(
1676                    "Invalid conversion type: {:?} for boolean array",
1677                    self.conversion_type
1678                )
1679            }
1680        };
1681        self.format_str(writer, formatted)
1682    }
1683
1684    fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1685        let mut prefix = String::new();
1686        let mut suffix = String::new();
1687        let mut number = String::new();
1688        let upper = self.conversion_type.is_upper();
1689
1690        // set up the sign
1691        if value.is_sign_negative() {
1692            if self.negative_in_parentheses {
1693                prefix.push('(');
1694                suffix.push(')');
1695            } else {
1696                prefix.push('-');
1697            }
1698        } else if self.space_sign {
1699            prefix.push(' ');
1700        } else if self.force_sign {
1701            prefix.push('+');
1702        }
1703
1704        if value.is_finite() {
1705            let mut use_scientific = false;
1706            let mut strip_trailing_0s = false;
1707            let mut abs = value.abs();
1708            let mut exponent = abs.log10().floor() as i32;
1709            let mut precision = match self.precision {
1710                NumericParam::Literal(p) => p,
1711                _ => 6,
1712            };
1713            match self.conversion_type {
1714                ConversionType::DecFloatLower => {
1715                    // default
1716                }
1717                ConversionType::SciFloatLower => {
1718                    use_scientific = true;
1719                }
1720                ConversionType::SciFloatUpper => {
1721                    use_scientific = true;
1722                }
1723                ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
1724                    strip_trailing_0s = true;
1725                    if precision == 0 {
1726                        precision = 1;
1727                    }
1728                    // exponent signifies significant digits - we must round now
1729                    // to (re)calculate the exponent
1730                    let rounding_factor =
1731                        10.0_f64.powf((precision - 1 - exponent) as f64);
1732                    let rounded_fixed = (abs * rounding_factor).round();
1733                    abs = rounded_fixed / rounding_factor;
1734                    exponent = abs.log10().floor() as i32;
1735                    if exponent < -4 || exponent >= precision {
1736                        use_scientific = true;
1737                        precision -= 1;
1738                    } else {
1739                        // precision specifies the number of significant digits
1740                        precision -= 1 + exponent;
1741                    }
1742                }
1743                _ => {
1744                    return exec_err!(
1745                        "Invalid conversion type: {:?} for float",
1746                        self.conversion_type
1747                    )
1748                }
1749            }
1750
1751            if use_scientific {
1752                // Manual scientific notation formatting for uppercase E
1753                let mantissa = abs / 10.0_f64.powf(exponent as f64);
1754                let exp_char = if upper { 'E' } else { 'e' };
1755                number = format!("{mantissa:.prec$}", prec = precision as usize);
1756                if strip_trailing_0s {
1757                    number = trim_trailing_0s(&number).to_owned();
1758                }
1759                number = format!("{number}{exp_char}{exponent:+03}");
1760            } else {
1761                number = format!("{abs:.prec$}", prec = precision as usize);
1762                if strip_trailing_0s {
1763                    number = trim_trailing_0s(&number).to_owned();
1764                }
1765            }
1766            if self.alt_form && !number.contains('.') {
1767                number += ".";
1768            }
1769        } else {
1770            // not finite
1771            match self.conversion_type {
1772                ConversionType::DecFloatLower
1773                | ConversionType::SciFloatLower
1774                | ConversionType::CompactFloatLower => {
1775                    if value.is_infinite() {
1776                        number.push_str("Infinity")
1777                    } else {
1778                        number.push_str("NaN")
1779                    }
1780                }
1781                ConversionType::SciFloatUpper | ConversionType::CompactFloatUpper => {
1782                    if value.is_infinite() {
1783                        number.push_str("INFINITY")
1784                    } else {
1785                        number.push_str("NAN")
1786                    }
1787                }
1788                _ => {
1789                    return exec_err!(
1790                        "Invalid conversion type: {:?} for float",
1791                        self.conversion_type
1792                    )
1793                }
1794            }
1795        }
1796        // Take care of padding
1797        let NumericParam::Literal(width) = self.width else {
1798            writer.push_str(&prefix);
1799            writer.push_str(&number);
1800            writer.push_str(&suffix);
1801            return Ok(());
1802        };
1803        if self.left_adj {
1804            let mut full_num = prefix + &number + &suffix;
1805            while full_num.len() < width as usize {
1806                full_num.push(' ');
1807            }
1808            writer.push_str(&full_num);
1809        } else if self.zero_pad && value.is_finite() {
1810            while prefix.len() + number.len() + suffix.len() < width as usize {
1811                prefix.push('0');
1812            }
1813            writer.push_str(&prefix);
1814            writer.push_str(&number);
1815            writer.push_str(&suffix);
1816        } else {
1817            let mut full_num = prefix + &number + &suffix;
1818            while full_num.len() < width as usize {
1819                full_num = " ".to_owned() + &full_num;
1820            }
1821            writer.push_str(&full_num);
1822        };
1823
1824        Ok(())
1825    }
1826
1827    fn format_signed(&self, writer: &mut String, value: i64) -> Result<()> {
1828        let negative = value < 0;
1829        let abs_val = value.abs();
1830
1831        let (sign_prefix, sign_suffix) = if negative && self.negative_in_parentheses {
1832            ("(".to_owned(), ")".to_owned())
1833        } else if negative {
1834            ("-".to_owned(), "".to_owned())
1835        } else if self.force_sign {
1836            ("+".to_owned(), "".to_owned())
1837        } else if self.space_sign {
1838            (" ".to_owned(), "".to_owned())
1839        } else {
1840            ("".to_owned(), "".to_owned())
1841        };
1842
1843        let mut mod_spec = *self;
1844        mod_spec.width = match self.width {
1845            NumericParam::Literal(w) => NumericParam::Literal(
1846                w - sign_prefix.len() as i32 - sign_suffix.len() as i32,
1847            ),
1848            _ => NumericParam::FromArgument,
1849        };
1850        let mut formatted = String::new();
1851        mod_spec.format_unsigned(&mut formatted, abs_val as u64)?;
1852        // put the sign a after any leading spaces
1853        let mut actual_number = &formatted[0..];
1854        let mut leading_spaces = &formatted[0..0];
1855        if let Some(first_non_space) = formatted.find(|c| c != ' ') {
1856            actual_number = &formatted[first_non_space..];
1857            leading_spaces = &formatted[0..first_non_space];
1858        }
1859        write!(
1860            writer,
1861            "{}{}{}{}",
1862            leading_spaces.to_owned(),
1863            sign_prefix,
1864            actual_number,
1865            sign_suffix
1866        )
1867        .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1868        Ok(())
1869    }
1870
1871    fn format_unsigned(&self, writer: &mut String, value: u64) -> Result<()> {
1872        let mut s = String::new();
1873        let mut alt_prefix = "";
1874        match self.conversion_type {
1875            ConversionType::DecInt => {
1876                let num_str = format!("{value}");
1877                if self.grouping_separator {
1878                    // Add thousands separators
1879                    let mut result = String::new();
1880                    let chars: Vec<char> = num_str.chars().collect();
1881                    for (i, c) in chars.iter().enumerate() {
1882                        if i > 0 && (chars.len() - i).is_multiple_of(3) {
1883                            result.push(',');
1884                        }
1885                        result.push(*c);
1886                    }
1887                    s = result;
1888                } else {
1889                    s = num_str;
1890                }
1891            }
1892            ConversionType::HexIntLower => {
1893                alt_prefix = "0x";
1894                write!(&mut s, "{value:x}")
1895                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1896            }
1897            ConversionType::HexIntUpper => {
1898                alt_prefix = "0X";
1899                write!(&mut s, "{value:X}")
1900                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1901            }
1902            ConversionType::OctInt => {
1903                alt_prefix = "0";
1904                write!(&mut s, "{value:o}")
1905                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1906            }
1907            _ => {
1908                return exec_err!(
1909                    "Invalid conversion type: {:?} for u64",
1910                    self.conversion_type
1911                )
1912            }
1913        }
1914        let mut prefix = if self.alt_form {
1915            alt_prefix.to_owned()
1916        } else {
1917            String::new()
1918        };
1919
1920        let formatted = if let NumericParam::Literal(width) = self.width {
1921            if self.left_adj {
1922                let mut num_str = prefix + &s;
1923                while num_str.len() < width as usize {
1924                    num_str.push(' ');
1925                }
1926                num_str
1927            } else if self.zero_pad {
1928                while prefix.len() + s.len() < width as usize {
1929                    prefix.push('0');
1930                }
1931                prefix + &s
1932            } else {
1933                let mut num_str = prefix + &s;
1934                while num_str.len() < width as usize {
1935                    num_str = " ".to_owned() + &num_str;
1936                }
1937                num_str
1938            }
1939        } else {
1940            prefix + &s
1941        };
1942        write!(writer, "{formatted}")
1943            .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1944        Ok(())
1945    }
1946
1947    fn format_str(&self, writer: &mut String, value: &str) -> Result<()> {
1948        // Take care of precision, putting the truncated string in `content`
1949        let precision: usize = match self.precision {
1950            NumericParam::Literal(p) => p,
1951            _ => i32::MAX,
1952        }
1953        .try_into()
1954        .unwrap_or_default();
1955        let content_len = {
1956            let mut content_len = precision.min(value.len());
1957            while !value.is_char_boundary(content_len) {
1958                content_len -= 1;
1959            }
1960            content_len
1961        };
1962        let content = &value[..content_len];
1963
1964        // Pad to width if needed, putting the padded string in `s`
1965
1966        if let NumericParam::Literal(width) = self.width {
1967            let start_len = writer.len();
1968            if self.left_adj {
1969                writer.push_str(content);
1970                while writer.len() - start_len < width as usize {
1971                    writer.push(' ');
1972                }
1973            } else {
1974                while writer.len() - start_len + content.len() < width as usize {
1975                    writer.push(' ');
1976                }
1977                writer.push_str(content);
1978            }
1979        } else {
1980            writer.push_str(content);
1981        }
1982        Ok(())
1983    }
1984
1985    fn format_string(&self, writer: &mut String, value: &str) -> Result<()> {
1986        if self.conversion_type.is_upper() {
1987            let upper = value.to_ascii_uppercase();
1988            self.format_str(writer, &upper)
1989        } else {
1990            self.format_str(writer, value)
1991        }
1992    }
1993
1994    fn format_decimal(
1995        &self,
1996        writer: &mut String,
1997        value: String,
1998        scale: i64,
1999    ) -> Result<()> {
2000        let mut prefix = String::new();
2001        let upper = self.conversion_type.is_upper();
2002
2003        // Parse as BigDecimal
2004        let decimal = value
2005            .parse::<BigInt>()
2006            .map_err(|e| exec_datafusion_err!("Failed to parse decimal: {}", e))?;
2007        let decimal = BigDecimal::from_bigint(decimal, scale);
2008
2009        // Handle sign
2010        let is_negative = decimal.sign() == Sign::Minus;
2011        let abs_decimal = decimal.abs();
2012
2013        if is_negative {
2014            prefix.push('-');
2015        } else if self.space_sign {
2016            prefix.push(' ');
2017        } else if self.force_sign {
2018            prefix.push('+');
2019        }
2020
2021        let exp_symb = if upper { 'E' } else { 'e' };
2022        let mut strip_trailing_0s = false;
2023
2024        // Get precision setting
2025        let mut precision = match self.precision {
2026            NumericParam::Literal(p) => p,
2027            _ => 6,
2028        };
2029
2030        let number = match self.conversion_type {
2031            ConversionType::DecFloatLower => {
2032                // Format as fixed-point decimal
2033                self.format_decimal_fixed(&abs_decimal, precision, strip_trailing_0s)?
2034            }
2035            ConversionType::SciFloatLower => self.format_decimal_scientific(
2036                &abs_decimal,
2037                precision,
2038                'e',
2039                strip_trailing_0s,
2040            )?,
2041            ConversionType::SciFloatUpper => self.format_decimal_scientific(
2042                &abs_decimal,
2043                precision,
2044                'E',
2045                strip_trailing_0s,
2046            )?,
2047            ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
2048                strip_trailing_0s = true;
2049                if precision == 0 {
2050                    precision = 1;
2051                }
2052                // Determine if we should use scientific notation
2053                let log10_val = abs_decimal.to_f64().map(|f| f.log10()).unwrap_or(0.0);
2054                if log10_val < -4.0 || log10_val >= precision as f64 {
2055                    self.format_decimal_scientific(
2056                        &abs_decimal,
2057                        precision - 1,
2058                        exp_symb,
2059                        strip_trailing_0s,
2060                    )?
2061                } else {
2062                    self.format_decimal_fixed(
2063                        &abs_decimal,
2064                        precision - 1 - log10_val.floor() as i32,
2065                        strip_trailing_0s,
2066                    )?
2067                }
2068            }
2069            _ => {
2070                return exec_err!(
2071                    "Invalid conversion type: {:?} for decimal",
2072                    self.conversion_type
2073                )
2074            }
2075        };
2076
2077        // Handle padding
2078        let NumericParam::Literal(width) = self.width else {
2079            writer.push_str(&prefix);
2080            writer.push_str(&number);
2081            return Ok(());
2082        };
2083
2084        if self.left_adj {
2085            let mut full_num = prefix + &number;
2086            while full_num.len() < width as usize {
2087                full_num.push(' ');
2088            }
2089            writer.push_str(&full_num);
2090        } else if self.zero_pad {
2091            while prefix.len() + number.len() < width as usize {
2092                prefix.push('0');
2093            }
2094            writer.push_str(&prefix);
2095            writer.push_str(&number);
2096        } else {
2097            let mut full_num = prefix + &number;
2098            while full_num.len() < width as usize {
2099                full_num = " ".to_owned() + &full_num;
2100            }
2101            writer.push_str(&full_num);
2102        }
2103
2104        Ok(())
2105    }
2106
2107    fn format_decimal_fixed(
2108        &self,
2109        decimal: &BigDecimal,
2110        precision: i32,
2111        strip_trailing_0s: bool,
2112    ) -> Result<String> {
2113        if precision <= 0 {
2114            Ok(decimal.round(0).to_string())
2115        } else {
2116            // Use BigDecimal's with_scale method for precise decimal formatting
2117            let scaled = decimal.round(precision as i64);
2118            let mut number = scaled.to_string();
2119            if strip_trailing_0s {
2120                number = trim_trailing_0s(&number).to_owned();
2121            }
2122            Ok(number)
2123        }
2124    }
2125
2126    fn format_decimal_scientific(
2127        &self,
2128        decimal: &BigDecimal,
2129        precision: i32,
2130        exp_char: char,
2131        strip_trailing_0s: bool,
2132    ) -> Result<String> {
2133        // Convert to f64 for scientific notation (may lose precision for very large numbers)
2134        let float_val = decimal.to_f64().unwrap_or(0.0);
2135        if float_val == 0.0 {
2136            return Ok(format!("0{exp_char}+00"));
2137        }
2138
2139        let abs_val = float_val.abs();
2140        let exponent = abs_val.log10().floor() as i32;
2141        let mantissa = abs_val / 10.0_f64.powf(exponent as f64);
2142
2143        let mut number = if precision <= 0 {
2144            format!("{mantissa:.0}")
2145        } else {
2146            format!("{mantissa:.prec$}", prec = precision as usize)
2147        };
2148
2149        if strip_trailing_0s {
2150            number = trim_trailing_0s(&number).to_owned();
2151        }
2152
2153        Ok(format!("{number}{exp_char}{exponent:+03}"))
2154    }
2155
2156    fn format_time(
2157        &self,
2158        writer: &mut String,
2159        timestamp_nanos: i64,
2160        timezone: &Option<Arc<str>>,
2161    ) -> Result<()> {
2162        let upper = self.conversion_type.is_upper();
2163        match &self.conversion_type {
2164            ConversionType::TimeLower(time_format)
2165            | ConversionType::TimeUpper(time_format) => {
2166                let formatted =
2167                    self.format_time_component(timestamp_nanos, *time_format, timezone)?;
2168                let result = if upper {
2169                    formatted.to_uppercase()
2170                } else {
2171                    formatted
2172                };
2173                write!(writer, "{result}")
2174                    .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2175                Ok(())
2176            }
2177            _ => exec_err!(
2178                "Invalid conversion type for time: {:?}",
2179                self.conversion_type
2180            ),
2181        }
2182    }
2183
2184    fn format_date(&self, writer: &mut String, date_days: i64) -> Result<()> {
2185        // Convert days since epoch to timestamp in nanoseconds
2186        let timestamp_nanos = date_days * 24 * 60 * 60 * 1_000_000_000;
2187        self.format_time(writer, timestamp_nanos, &None)
2188    }
2189
2190    fn format_time_component(
2191        &self,
2192        timestamp_nanos: i64,
2193        time_format: TimeFormat,
2194        _timezone: &Option<Arc<str>>,
2195    ) -> Result<String> {
2196        // Convert nanoseconds to seconds and nanoseconds remainder
2197        let secs = timestamp_nanos / 1_000_000_000;
2198        let nanos = (timestamp_nanos % 1_000_000_000) as u32;
2199
2200        // Create DateTime from timestamp
2201        let dt = DateTime::<Utc>::from_timestamp(secs, nanos).ok_or_else(|| {
2202            exec_datafusion_err!("Invalid timestamp: {}", timestamp_nanos)
2203        })?;
2204
2205        match time_format {
2206            TimeFormat::HUpper => Ok(format!("{:02}", dt.hour())),
2207            TimeFormat::IUpper => {
2208                let hour_12 = match dt.hour12() {
2209                    (true, h) => h,  // PM
2210                    (false, h) => h, // AM
2211                };
2212                Ok(format!("{hour_12:02}"))
2213            }
2214            TimeFormat::KLower => Ok(format!("{}", dt.hour())),
2215            TimeFormat::LLower => {
2216                let hour_12 = match dt.hour12() {
2217                    (true, h) => h,  // PM
2218                    (false, h) => h, // AM
2219                };
2220                Ok(format!("{hour_12}"))
2221            }
2222            TimeFormat::MUpper => Ok(format!("{:02}", dt.minute())),
2223            TimeFormat::SUpper => Ok(format!("{:02}", dt.second())),
2224            TimeFormat::LUpper => Ok(format!("{:03}", dt.timestamp_millis() % 1000)),
2225            TimeFormat::NUpper => Ok(format!("{:09}", dt.nanosecond())),
2226            TimeFormat::PLower => {
2227                let (is_pm, _) = dt.hour12();
2228                Ok(if is_pm {
2229                    "pm".to_string()
2230                } else {
2231                    "am".to_string()
2232                })
2233            }
2234            TimeFormat::ZLower => Ok("+0000".to_string()), // UTC timezone offset
2235            TimeFormat::ZUpper => Ok("UTC".to_string()),   // UTC timezone name
2236            TimeFormat::SLower => Ok(format!("{}", dt.timestamp())),
2237            TimeFormat::QUpper => Ok(format!("{}", dt.timestamp_millis())),
2238            TimeFormat::BUpper => Ok(dt.format("%B").to_string()), // Full month name
2239            TimeFormat::BLower => Ok(dt.format("%b").to_string()), // Abbreviated month name
2240            TimeFormat::AUpper => Ok(dt.format("%A").to_string()), // Full weekday name
2241            TimeFormat::ALower => Ok(dt.format("%a").to_string()), // Abbreviated weekday name
2242            TimeFormat::CUpper => Ok(format!("{:02}", dt.year() / 100)),
2243            TimeFormat::YUpper => Ok(format!("{:04}", dt.year())),
2244            TimeFormat::YLower => Ok(format!("{:02}", dt.year() % 100)),
2245            TimeFormat::JLower => Ok(format!("{:03}", dt.ordinal())), // Day of year
2246            TimeFormat::MLower => Ok(format!("{:02}", dt.month())),
2247            TimeFormat::DLower => Ok(format!("{:02}", dt.day())),
2248            TimeFormat::ELower => Ok(format!("{}", dt.day())),
2249            TimeFormat::RUpper => Ok(dt.format("%H:%M").to_string()),
2250            TimeFormat::TUpper => Ok(dt.format("%H:%M:%S").to_string()),
2251            TimeFormat::RLower => {
2252                let (is_pm, hour_12) = dt.hour12();
2253                let am_pm = if is_pm { "PM" } else { "AM" };
2254                Ok(format!(
2255                    "{:02}:{:02}:{:02} {}",
2256                    hour_12,
2257                    dt.minute(),
2258                    dt.second(),
2259                    am_pm
2260                ))
2261            }
2262            TimeFormat::DUpper => Ok(dt.format("%m/%d/%y").to_string()),
2263            TimeFormat::FUpper => Ok(dt.format("%Y-%m-%d").to_string()),
2264            TimeFormat::CLower => Ok(dt.format("%a %b %d %H:%M:%S UTC %Y").to_string()),
2265        }
2266    }
2267}
2268
2269trait FloatFormattable: std::fmt::Display {
2270    fn category(&self) -> FpCategory;
2271
2272    fn spark_string(&self) -> String {
2273        match self.category() {
2274            FpCategory::Nan => "NaN".to_string(),
2275            FpCategory::Infinite => {
2276                if self.negative() {
2277                    "-Infinity".to_string()
2278                } else {
2279                    "Infinity".to_string()
2280                }
2281            }
2282            _ => self.to_string(),
2283        }
2284    }
2285    fn negative(&self) -> bool;
2286}
2287
2288impl FloatFormattable for f32 {
2289    fn category(&self) -> FpCategory {
2290        self.classify()
2291    }
2292
2293    fn negative(&self) -> bool {
2294        self.is_sign_negative()
2295    }
2296}
2297
2298impl FloatFormattable for f64 {
2299    fn category(&self) -> FpCategory {
2300        self.classify()
2301    }
2302
2303    fn negative(&self) -> bool {
2304        self.is_sign_negative()
2305    }
2306}
2307
2308trait FloatBits: FloatFormattable {
2309    const MANTISSA_BITS: u8;
2310    const EXPONENT_BIAS: u16;
2311    const SCALEUP_POWER: u8;
2312    const SCALEUP: Self;
2313
2314    fn to_parts(&self) -> (bool, u16, u64);
2315}
2316
2317impl FloatBits for f64 {
2318    const MANTISSA_BITS: u8 = 52;
2319    const EXPONENT_BIAS: u16 = 1023;
2320    const SCALEUP_POWER: u8 = 54;
2321    const SCALEUP: f64 = (1_i64 << Self::SCALEUP_POWER) as f64;
2322
2323    fn to_parts(&self) -> (bool, u16, u64) {
2324        let bits = self.to_bits();
2325        let sign: bool = (bits >> 63) == 1;
2326        let exponent = ((bits >> 52) & 0x7FF) as u16;
2327        let mantissa = bits & 0x000F_FFFF_FFFF_FFFF;
2328        (sign, exponent, mantissa)
2329    }
2330}
2331
2332fn trim_trailing_0s(number: &str) -> &str {
2333    if number.contains('.') {
2334        for (i, c) in number.chars().rev().enumerate() {
2335            if c != '0' {
2336                return &number[..number.len() - i];
2337            }
2338        }
2339    }
2340    number
2341}
2342
2343fn trim_trailing_0s_hex(number: &str) -> &str {
2344    for (i, c) in number.chars().rev().enumerate() {
2345        if c != '0' {
2346            return &number[..number.len() - i];
2347        }
2348    }
2349    number
2350}
datafusion_spark/function/string/format_string.rs

datafusion_spark/function/string/
format_string.rs