1use std::any::Any;
19use std::fmt::Write;
20use std::sync::Arc;
21
22use core::num::FpCategory;
23
24use arrow::{
25 array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
26 datatypes::DataType,
27};
28use bigdecimal::{
29 num_bigint::{BigInt, Sign},
30 BigDecimal, ToPrimitive,
31};
32use chrono::{DateTime, Datelike, Timelike, Utc};
33use datafusion_common::{
34 exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
35};
36use datafusion_expr::{
37 ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
38 Volatility,
39};
40
41#[derive(Debug, PartialEq, Eq, Hash)]
44pub struct FormatStringFunc {
45 signature: Signature,
46 aliases: Vec<String>,
47}
48
49impl Default for FormatStringFunc {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl FormatStringFunc {
56 pub fn new() -> Self {
57 Self {
58 signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
59 aliases: vec![String::from("printf")],
60 }
61 }
62}
63
64impl ScalarUDFImpl for FormatStringFunc {
65 fn as_any(&self) -> &dyn Any {
66 self
67 }
68
69 fn name(&self) -> &str {
70 "format_string"
71 }
72
73 fn aliases(&self) -> &[String] {
74 &self.aliases
75 }
76
77 fn signature(&self) -> &Signature {
78 &self.signature
79 }
80
81 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
82 match arg_types[0] {
83 DataType::Null => Ok(DataType::Utf8),
84 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(arg_types[0].clone()),
85 _ => plan_err!("The format_string function expects the first argument to be Utf8, LargeUtf8 or Utf8View")
86 }
87 }
88
89 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
90 let len = args.args.iter().find_map(|arg| match arg {
91 ColumnarValue::Scalar(_) => None,
92 ColumnarValue::Array(a) => Some(a.len()),
93 });
94 let is_scalar = len.is_none();
95 let data_types = args.args[1..]
96 .iter()
97 .map(|arg| arg.data_type())
98 .collect::<Vec<_>>();
99 let fmt_type = args.args[0].data_type();
100
101 match &args.args[0] {
102 ColumnarValue::Scalar(ScalarValue::Null) => {
103 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
104 }
105 ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {
106 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
107 }
108 ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {
109 Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
110 }
111 ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
112 Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
113 }
114 ColumnarValue::Scalar(ScalarValue::Utf8(Some(fmt)))
115 | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(fmt)))
116 | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(fmt))) => {
117 let formatter = Formatter::parse(fmt, &data_types)?;
118 let mut result = Vec::with_capacity(len.unwrap_or(1));
119 for i in 0..len.unwrap_or(1) {
120 let scalars = args.args[1..]
121 .iter()
122 .map(|arg| try_to_scalar(arg.clone(), i))
123 .collect::<Result<Vec<_>>>()?;
124 let formatted = formatter.format(&scalars)?;
125 result.push(formatted);
126 }
127 if is_scalar {
128 let scalar_result = result.pop().unwrap();
129 match fmt_type {
130 DataType::Utf8 => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
131 Some(scalar_result),
132 ))),
133 DataType::LargeUtf8 => Ok(ColumnarValue::Scalar(
134 ScalarValue::LargeUtf8(Some(scalar_result)),
135 )),
136 DataType::Utf8View => Ok(ColumnarValue::Scalar(
137 ScalarValue::Utf8View(Some(scalar_result)),
138 )),
139 _ => unreachable!(),
140 }
141 } else {
142 let array: ArrayRef = match fmt_type {
143 DataType::Utf8 => Arc::new(StringArray::from(result)),
144 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
145 DataType::Utf8View => Arc::new(StringViewArray::from(result)),
146 _ => unreachable!(),
147 };
148 Ok(ColumnarValue::Array(array))
149 }
150 }
151 ColumnarValue::Array(fmts) => {
152 let mut result = Vec::with_capacity(len.unwrap());
153 for i in 0..len.unwrap() {
154 let fmt = ScalarValue::try_from_array(fmts, i)?;
155 match fmt.try_as_str() {
156 Some(Some(fmt)) => {
157 let formatter = Formatter::parse(fmt, &data_types)?;
158 let scalars = args.args[1..]
159 .iter()
160 .map(|arg| try_to_scalar(arg.clone(), i))
161 .collect::<Result<Vec<_>>>()?;
162 let formatted = formatter.format(&scalars)?;
163 result.push(Some(formatted));
164 }
165 Some(None) => {
166 result.push(None);
167 }
168 _ => unreachable!(),
169 }
170 }
171 let array: ArrayRef = match fmt_type {
172 DataType::Utf8 => Arc::new(StringArray::from(result)),
173 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
174 DataType::Utf8View => Arc::new(StringViewArray::from(result)),
175 _ => unreachable!(),
176 };
177 Ok(ColumnarValue::Array(array))
178 }
179 _ => exec_err!(
180 "The format_string function expects the first argument to be a string"
181 ),
182 }
183 }
184}
185
186fn try_to_scalar(arg: ColumnarValue, index: usize) -> Result<ScalarValue> {
187 match arg {
188 ColumnarValue::Scalar(scalar) => Ok(scalar),
189 ColumnarValue::Array(array) => ScalarValue::try_from_array(&array, index),
190 }
191}
192
193#[derive(Debug)]
195pub struct Formatter<'a> {
196 pub elements: Vec<FormatElement<'a>>,
197 pub arg_num: usize,
198}
199
200impl<'a> Formatter<'a> {
201 pub fn new(elements: Vec<FormatElement<'a>>) -> Self {
202 let arg_num = elements
203 .iter()
204 .map(|element| match element {
205 FormatElement::Format(spec) => spec.argument_index,
206 _ => 0,
207 })
208 .max()
209 .unwrap_or(0);
210 Self { elements, arg_num }
211 }
212
213 pub fn parse(fmt: &'a str, arg_types: &[DataType]) -> Result<Self> {
278 let mut res = Vec::new();
280
281 let mut rem = fmt;
282 let mut argument_index = 0;
283
284 let mut prev: Option<usize> = None;
285
286 while !rem.is_empty() {
287 if let Some((verbatim_prefix, rest)) = rem.split_once('%') {
288 if !verbatim_prefix.is_empty() {
289 res.push(FormatElement::Verbatim(verbatim_prefix));
290 }
291 if let Some(rest) = rest.strip_prefix('%') {
292 res.push(FormatElement::Verbatim("%"));
293 rem = rest;
294 continue;
295 }
296 if let Some(rest) = rest.strip_prefix('n') {
297 res.push(FormatElement::Verbatim("\n"));
298 rem = rest;
299 continue;
300 }
301 if let Some(rest) = rest.strip_prefix('<') {
302 let Some(p) = prev else {
304 return exec_err!("No previous argument to reference");
305 };
306 let (spec, rest) =
307 take_conversion_specifier(rest, p, arg_types[p - 1].clone())?;
308 res.push(FormatElement::Format(spec));
309 rem = rest;
310 continue;
311 }
312
313 let (current_argument_index, rest2) = take_numeric_param(rest, false);
314 let (current_argument_index, rest) =
315 match (current_argument_index, rest2.starts_with('$')) {
316 (NumericParam::Literal(index), true) => {
317 (index as usize, &rest2[1..])
318 }
319 (NumericParam::FromArgument, true) => {
320 return exec_err!("Invalid numeric parameter")
321 }
322 (_, false) => {
323 argument_index += 1;
324 (argument_index, rest)
325 }
326 };
327 if current_argument_index == 0 || current_argument_index > arg_types.len()
328 {
329 return exec_err!(
330 "Argument index {} is out of bounds",
331 current_argument_index
332 );
333 }
334
335 let (spec, rest) = take_conversion_specifier(
336 rest,
337 current_argument_index,
338 arg_types[current_argument_index - 1].clone(),
339 )
340 .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
341 res.push(FormatElement::Format(spec));
342 prev = Some(spec.argument_index);
343 rem = rest;
344 } else {
345 res.push(FormatElement::Verbatim(rem));
346 break;
347 }
348 }
349
350 Ok(Self::new(res))
351 }
352
353 pub fn format(&self, args: &[ScalarValue]) -> Result<String> {
354 if args.len() < self.arg_num {
355 return exec_err!(
356 "Expected at least {} arguments, got {}",
357 self.arg_num,
358 args.len()
359 );
360 }
361 let mut string = String::new();
362 for element in &self.elements {
363 match element {
364 FormatElement::Verbatim(text) => {
365 string.push_str(text);
366 }
367 FormatElement::Format(spec) => {
368 spec.format(&mut string, &args[spec.argument_index - 1])?;
369 }
370 }
371 }
372 Ok(string)
373 }
374}
375
376#[derive(Debug)]
377pub enum FormatElement<'a> {
378 Verbatim(&'a str),
380 Format(ConversionSpecifier),
382}
383
384#[derive(Debug, Clone, Copy, PartialEq, Eq)]
386pub struct ConversionSpecifier {
387 pub argument_index: usize,
388 pub alt_form: bool,
390 pub zero_pad: bool,
392 pub left_adj: bool,
394 pub space_sign: bool,
396 pub force_sign: bool,
398 pub grouping_separator: bool,
400 pub negative_in_parentheses: bool,
402 pub width: NumericParam,
404 pub precision: NumericParam,
406 pub conversion_type: ConversionType,
408}
409
410#[derive(Debug, Clone, Copy, PartialEq, Eq)]
412pub enum NumericParam {
413 Literal(i32),
415 FromArgument,
417}
418
419#[derive(Debug, Clone, Copy, PartialEq, Eq)]
421pub enum ConversionType {
422 BooleanUpper,
424 BooleanLower,
426 HexHashLower,
429 HexHashUpper,
431 DecInt,
433 OctInt,
435 HexIntLower,
437 HexIntUpper,
439 SciFloatLower,
441 SciFloatUpper,
443 DecFloatLower,
445 CompactFloatLower,
447 CompactFloatUpper,
449 HexFloatLower,
451 HexFloatUpper,
453 TimeLower(TimeFormat),
455 TimeUpper(TimeFormat),
457 CharLower,
459 CharUpper,
461 StringLower,
463 StringUpper,
465}
466
467#[derive(Debug, Clone, Copy, PartialEq, Eq)]
468pub enum TimeFormat {
469 HUpper,
472 IUpper,
475 KLower,
478 LLower,
481 MUpper,
483 SUpper,
486 LUpper,
488 NUpper,
491 PLower,
495 ZLower,
499 ZUpper,
503 SLower,
506 QUpper,
509 BUpper,
511 BLower,
513 AUpper,
515 ALower,
517 CUpper,
519 YUpper,
521 YLower,
523 JLower,
525 MLower,
527 DLower,
529 ELower,
531 RUpper,
533 TUpper,
535 RLower,
537 DUpper,
539 FUpper,
541 CLower,
543}
544
545impl TryFrom<char> for TimeFormat {
546 type Error = DataFusionError;
547 fn try_from(value: char) -> Result<Self, Self::Error> {
548 match value {
549 'H' => Ok(TimeFormat::HUpper),
550 'I' => Ok(TimeFormat::IUpper),
551 'k' => Ok(TimeFormat::KLower),
552 'l' => Ok(TimeFormat::LLower),
553 'M' => Ok(TimeFormat::MUpper),
554 'S' => Ok(TimeFormat::SUpper),
555 'L' => Ok(TimeFormat::LUpper),
556 'N' => Ok(TimeFormat::NUpper),
557 'p' => Ok(TimeFormat::PLower),
558 'z' => Ok(TimeFormat::ZLower),
559 'Z' => Ok(TimeFormat::ZUpper),
560 's' => Ok(TimeFormat::SLower),
561 'Q' => Ok(TimeFormat::QUpper),
562 'B' => Ok(TimeFormat::BUpper),
563 'b' | 'h' => Ok(TimeFormat::BLower),
564 'A' => Ok(TimeFormat::AUpper),
565 'a' => Ok(TimeFormat::ALower),
566 'C' => Ok(TimeFormat::CUpper),
567 'Y' => Ok(TimeFormat::YUpper),
568 'y' => Ok(TimeFormat::YLower),
569 'j' => Ok(TimeFormat::JLower),
570 'm' => Ok(TimeFormat::MLower),
571 'd' => Ok(TimeFormat::DLower),
572 'e' => Ok(TimeFormat::ELower),
573 'R' => Ok(TimeFormat::RUpper),
574 'T' => Ok(TimeFormat::TUpper),
575 'r' => Ok(TimeFormat::RLower),
576 'D' => Ok(TimeFormat::DUpper),
577 'F' => Ok(TimeFormat::FUpper),
578 'c' => Ok(TimeFormat::CLower),
579 _ => exec_err!("Invalid time format: {}", value),
580 }
581 }
582}
583
584impl ConversionType {
585 pub fn validate(&self, arg_type: DataType) -> Result<()> {
586 match self {
587 ConversionType::BooleanLower | ConversionType::BooleanUpper => {
588 if !matches!(arg_type, DataType::Boolean) {
589 return exec_err!(
590 "Invalid argument type for boolean conversion: {:?}",
591 arg_type
592 );
593 }
594 }
595 ConversionType::CharLower | ConversionType::CharUpper => {
596 if !matches!(
597 arg_type,
598 DataType::Int8
599 | DataType::UInt8
600 | DataType::Int16
601 | DataType::UInt16
602 | DataType::Int32
603 | DataType::UInt32
604 | DataType::Int64
605 | DataType::UInt64
606 ) {
607 return exec_err!(
608 "Invalid argument type for char conversion: {:?}",
609 arg_type
610 );
611 }
612 }
613 ConversionType::DecInt
614 | ConversionType::OctInt
615 | ConversionType::HexIntLower
616 | ConversionType::HexIntUpper => {
617 if !arg_type.is_integer() {
618 return exec_err!(
619 "Invalid argument type for integer conversion: {:?}",
620 arg_type
621 );
622 }
623 }
624 ConversionType::SciFloatLower
625 | ConversionType::SciFloatUpper
626 | ConversionType::DecFloatLower
627 | ConversionType::CompactFloatLower
628 | ConversionType::CompactFloatUpper
629 | ConversionType::HexFloatLower
630 | ConversionType::HexFloatUpper => {
631 if !arg_type.is_numeric() {
632 return exec_err!(
633 "Invalid argument type for float conversion: {:?}",
634 arg_type
635 );
636 }
637 }
638 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_) => {
639 if !arg_type.is_temporal() {
640 return exec_err!(
641 "Invalid argument type for time conversion: {:?}",
642 arg_type
643 );
644 }
645 }
646 _ => {}
647 }
648 Ok(())
649 }
650
651 fn supports_integer(&self) -> bool {
652 matches!(
653 self,
654 ConversionType::DecInt
655 | ConversionType::HexIntLower
656 | ConversionType::HexIntUpper
657 | ConversionType::OctInt
658 | ConversionType::CharLower
659 | ConversionType::CharUpper
660 | ConversionType::StringLower
661 | ConversionType::StringUpper
662 )
663 }
664
665 fn supports_float(&self) -> bool {
666 matches!(
667 self,
668 ConversionType::DecFloatLower
669 | ConversionType::SciFloatLower
670 | ConversionType::SciFloatUpper
671 | ConversionType::CompactFloatLower
672 | ConversionType::CompactFloatUpper
673 | ConversionType::StringLower
674 | ConversionType::StringUpper
675 | ConversionType::HexFloatLower
676 | ConversionType::HexFloatUpper
677 )
678 }
679
680 fn supports_decimal(&self) -> bool {
681 matches!(
682 self,
683 ConversionType::DecFloatLower
684 | ConversionType::SciFloatLower
685 | ConversionType::SciFloatUpper
686 | ConversionType::CompactFloatLower
687 | ConversionType::CompactFloatUpper
688 | ConversionType::StringLower
689 | ConversionType::StringUpper
690 )
691 }
692
693 fn supports_time(&self) -> bool {
694 matches!(
695 self,
696 ConversionType::TimeLower(_)
697 | ConversionType::TimeUpper(_)
698 | ConversionType::StringLower
699 | ConversionType::StringUpper
700 )
701 }
702
703 fn is_upper(&self) -> bool {
704 matches!(
705 self,
706 ConversionType::BooleanUpper
707 | ConversionType::HexHashUpper
708 | ConversionType::HexIntUpper
709 | ConversionType::SciFloatUpper
710 | ConversionType::CompactFloatUpper
711 | ConversionType::HexFloatUpper
712 | ConversionType::TimeUpper(_)
713 | ConversionType::CharUpper
714 | ConversionType::StringUpper
715 )
716 }
717}
718
719fn take_conversion_specifier(
720 mut s: &str,
721 argument_index: usize,
722 arg_type: DataType,
723) -> Result<(ConversionSpecifier, &str)> {
724 let mut spec = ConversionSpecifier {
725 argument_index,
726 alt_form: false,
727 zero_pad: false,
728 left_adj: false,
729 space_sign: false,
730 force_sign: false,
731 grouping_separator: false,
732 negative_in_parentheses: false,
733 width: NumericParam::Literal(0),
734 precision: NumericParam::FromArgument, conversion_type: ConversionType::DecInt,
737 };
738
739 loop {
741 match s.chars().next() {
742 Some('#') => {
743 spec.alt_form = true;
744 }
745 Some('0') => {
746 if spec.left_adj {
747 return exec_err!("Invalid flag combination: '0' and '-'");
748 }
749 spec.zero_pad = true;
750 }
751 Some('-') => {
752 spec.left_adj = true;
753 }
754 Some(' ') => {
755 if spec.force_sign {
756 return exec_err!("Invalid flag combination: '+' and ' '");
757 }
758 spec.space_sign = true;
759 }
760 Some('+') => {
761 if spec.space_sign {
762 return exec_err!("Invalid flag combination: '+' and ' '");
763 }
764 spec.force_sign = true;
765 }
766 Some(',') => {
767 spec.grouping_separator = true;
768 }
769 Some('(') => {
770 spec.negative_in_parentheses = true;
771 }
772 _ => {
773 break;
774 }
775 }
776 s = &s[1..];
777 }
778 let (w, mut s) = take_numeric_param(s, false);
780 spec.width = w;
781 if matches!(s.chars().next(), Some('.')) {
783 s = &s[1..];
784 let (p, s2) = take_numeric_param(s, true);
785 spec.precision = p;
786 s = s2;
787 }
788 let mut chars = s.chars();
789 let mut offset = 1;
790 spec.conversion_type = match chars.next() {
792 Some('b') => ConversionType::BooleanLower,
793 Some('B') => ConversionType::BooleanUpper,
794 Some('h') => ConversionType::HexHashLower,
795 Some('H') => ConversionType::HexHashUpper,
796 Some('s') => ConversionType::StringLower,
797 Some('S') => ConversionType::StringUpper,
798 Some('c') => ConversionType::CharLower,
799 Some('C') => ConversionType::CharUpper,
800 Some('d') => ConversionType::DecInt,
801 Some('o') => ConversionType::OctInt,
802 Some('x') => ConversionType::HexIntLower,
803 Some('X') => ConversionType::HexIntUpper,
804 Some('e') => ConversionType::SciFloatLower,
805 Some('E') => ConversionType::SciFloatUpper,
806 Some('f') => ConversionType::DecFloatLower,
807 Some('g') => ConversionType::CompactFloatLower,
808 Some('G') => ConversionType::CompactFloatUpper,
809 Some('a') => ConversionType::HexFloatLower,
810 Some('A') => ConversionType::HexFloatUpper,
811 Some('t') => {
812 let Some(chr) = chars.next() else {
813 return exec_err!("Invalid time format: {}", s);
814 };
815 offset += 1;
816 ConversionType::TimeLower(chr.try_into()?)
817 }
818 Some('T') => {
819 let Some(chr) = chars.next() else {
820 return exec_err!("Invalid time format: {}", s);
821 };
822 offset += 1;
823 ConversionType::TimeUpper(chr.try_into()?)
824 }
825 chr => {
826 return plan_err!("Invalid conversion type: {:?}", chr);
827 }
828 };
829
830 spec.conversion_type.validate(arg_type)?;
831 Ok((spec, &s[offset..]))
832}
833
834fn take_numeric_param(s: &str, zero: bool) -> (NumericParam, &str) {
835 match s.chars().next() {
836 Some(digit) if (if zero { '0'..='9' } else { '1'..='9' }).contains(&digit) => {
837 let mut s = s;
838 let mut w = 0;
839 loop {
840 match s.chars().next() {
841 Some(digit) if digit.is_ascii_digit() => {
842 w = 10 * w + (digit as i32 - '0' as i32);
843 }
844 _ => {
845 break;
846 }
847 }
848 s = &s[1..];
849 }
850 (NumericParam::Literal(w), s)
851 }
852 _ => (NumericParam::FromArgument, s),
853 }
854}
855
856impl ConversionSpecifier {
857 pub fn format(&self, string: &mut String, value: &ScalarValue) -> Result<()> {
858 match value {
859 ScalarValue::Boolean(value) => match self.conversion_type {
860 ConversionType::StringLower | ConversionType::StringUpper => {
861 self.format_string(string, &value.unwrap_or(false).to_string())
862 }
863
864 _ => self.format_boolean(string, value),
865 },
866 ScalarValue::Int8(value) => match (self.conversion_type, value) {
867 (ConversionType::DecInt, Some(value)) => {
868 self.format_signed(string, *value as i64)
869 }
870 (
871 ConversionType::HexIntLower
872 | ConversionType::HexIntUpper
873 | ConversionType::OctInt,
874 Some(value),
875 ) => self.format_unsigned(string, (*value as u8) as u64),
876 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
877 self.format_char(string, *value as u8 as char)
878 }
879 (
880 ConversionType::StringLower | ConversionType::StringUpper,
881 Some(value),
882 ) => self.format_string(string, &value.to_string()),
883 (t, None) if t.supports_integer() => self.format_string(string, "null"),
884 _ => {
885 exec_err!(
886 "Invalid conversion type: {:?} for Int8",
887 self.conversion_type
888 )
889 }
890 },
891 ScalarValue::Int16(value) => match (self.conversion_type, value) {
892 (ConversionType::DecInt, Some(value)) => {
893 self.format_signed(string, *value as i64)
894 }
895 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
896 self.format_char(
897 string,
898 char::from_u32((*value as u16) as u32).unwrap(),
899 )
900 }
901 (
902 ConversionType::HexIntLower
903 | ConversionType::HexIntUpper
904 | ConversionType::OctInt,
905 Some(value),
906 ) => self.format_unsigned(string, (*value as u16) as u64),
907 (
908 ConversionType::StringLower | ConversionType::StringUpper,
909 Some(value),
910 ) => self.format_string(string, &value.to_string()),
911 (t, None) if t.supports_integer() => self.format_string(string, "null"),
912 _ => {
913 exec_err!(
914 "Invalid conversion type: {:?} for Int16",
915 self.conversion_type
916 )
917 }
918 },
919 ScalarValue::Int32(value) => match (self.conversion_type, value) {
920 (ConversionType::DecInt, Some(value)) => {
921 self.format_signed(string, *value as i64)
922 }
923 (
924 ConversionType::HexIntLower
925 | ConversionType::HexIntUpper
926 | ConversionType::OctInt,
927 Some(value),
928 ) => self.format_unsigned(string, (*value as u32) as u64),
929 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
930 self.format_char(string, char::from_u32(*value as u32).unwrap())
931 }
932 (
933 ConversionType::StringLower | ConversionType::StringUpper,
934 Some(value),
935 ) => self.format_string(string, &value.to_string()),
936 (t, None) if t.supports_integer() => self.format_string(string, "null"),
937 _ => {
938 exec_err!(
939 "Invalid conversion type: {:?} for Int32",
940 self.conversion_type
941 )
942 }
943 },
944 ScalarValue::Int64(value) => match (self.conversion_type, value) {
945 (ConversionType::DecInt, Some(value)) => {
946 self.format_signed(string, *value)
947 }
948 (
949 ConversionType::HexIntLower
950 | ConversionType::HexIntUpper
951 | ConversionType::OctInt,
952 Some(value),
953 ) => self.format_unsigned(string, *value as u64),
954 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
955 self.format_char(
956 string,
957 char::from_u32((*value as u64) as u32).unwrap(),
958 )
959 }
960 (
961 ConversionType::StringLower | ConversionType::StringUpper,
962 Some(value),
963 ) => self.format_string(string, &value.to_string()),
964 (t, None) if t.supports_integer() => self.format_string(string, "null"),
965 _ => {
966 exec_err!(
967 "Invalid conversion type: {:?} for Int64",
968 self.conversion_type
969 )
970 }
971 },
972 ScalarValue::UInt8(value) => match (self.conversion_type, value) {
973 (
974 ConversionType::DecInt
975 | ConversionType::HexIntLower
976 | ConversionType::HexIntUpper
977 | ConversionType::OctInt,
978 Some(value),
979 ) => self.format_unsigned(string, *value as u64),
980 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
981 self.format_char(string, *value as char)
982 }
983 (
984 ConversionType::StringLower | ConversionType::StringUpper,
985 Some(value),
986 ) => self.format_string(string, &value.to_string()),
987 (t, None) if t.supports_integer() => self.format_string(string, "null"),
988 _ => {
989 exec_err!(
990 "Invalid conversion type: {:?} for UInt8",
991 self.conversion_type
992 )
993 }
994 },
995 ScalarValue::UInt16(value) => match (self.conversion_type, value) {
996 (
997 ConversionType::DecInt
998 | ConversionType::HexIntLower
999 | ConversionType::HexIntUpper
1000 | ConversionType::OctInt,
1001 Some(value),
1002 ) => self.format_unsigned(string, *value as u64),
1003 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1004 self.format_char(string, char::from_u32(*value as u32).unwrap())
1005 }
1006 (
1007 ConversionType::StringLower | ConversionType::StringUpper,
1008 Some(value),
1009 ) => self.format_string(string, &value.to_string()),
1010 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1011 _ => {
1012 exec_err!(
1013 "Invalid conversion type: {:?} for UInt16",
1014 self.conversion_type
1015 )
1016 }
1017 },
1018 ScalarValue::UInt32(value) => match (self.conversion_type, value) {
1019 (
1020 ConversionType::DecInt
1021 | ConversionType::HexIntLower
1022 | ConversionType::HexIntUpper
1023 | ConversionType::OctInt,
1024 Some(value),
1025 ) => self.format_unsigned(string, *value as u64),
1026 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1027 self.format_char(string, char::from_u32(*value).unwrap())
1028 }
1029 (
1030 ConversionType::StringLower | ConversionType::StringUpper,
1031 Some(value),
1032 ) => self.format_string(string, &value.to_string()),
1033 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1034 _ => {
1035 exec_err!(
1036 "Invalid conversion type: {:?} for UInt32",
1037 self.conversion_type
1038 )
1039 }
1040 },
1041 ScalarValue::UInt64(value) => match (self.conversion_type, value) {
1042 (
1043 ConversionType::DecInt
1044 | ConversionType::HexIntLower
1045 | ConversionType::HexIntUpper
1046 | ConversionType::OctInt,
1047 Some(value),
1048 ) => self.format_unsigned(string, *value),
1049 (ConversionType::CharLower | ConversionType::CharUpper, Some(value)) => {
1050 self.format_char(string, char::from_u32(*value as u32).unwrap())
1051 }
1052 (
1053 ConversionType::StringLower | ConversionType::StringUpper,
1054 Some(value),
1055 ) => self.format_string(string, &value.to_string()),
1056 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1057 _ => {
1058 exec_err!(
1059 "Invalid conversion type: {:?} for UInt64",
1060 self.conversion_type
1061 )
1062 }
1063 },
1064 ScalarValue::Float16(value) => match (self.conversion_type, value) {
1065 (
1066 ConversionType::DecFloatLower
1067 | ConversionType::SciFloatLower
1068 | ConversionType::SciFloatUpper
1069 | ConversionType::CompactFloatLower
1070 | ConversionType::CompactFloatUpper,
1071 Some(value),
1072 ) => self.format_float(string, value.to_f64().unwrap()),
1073 (
1074 ConversionType::StringLower | ConversionType::StringUpper,
1075 Some(value),
1076 ) => self.format_string(string, &value.to_f32().unwrap().spark_string()),
1077 (
1078 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1079 Some(value),
1080 ) => self.format_hex_float(string, value.to_f64().unwrap()),
1081 (t, None) if t.supports_float() => self.format_string(string, "null"),
1082 _ => {
1083 exec_err!(
1084 "Invalid conversion type: {:?} for Float16",
1085 self.conversion_type
1086 )
1087 }
1088 },
1089 ScalarValue::Float32(value) => match (self.conversion_type, value) {
1090 (
1091 ConversionType::DecFloatLower
1092 | ConversionType::SciFloatLower
1093 | ConversionType::SciFloatUpper
1094 | ConversionType::CompactFloatLower
1095 | ConversionType::CompactFloatUpper,
1096 Some(value),
1097 ) => self.format_float(string, *value as f64),
1098 (
1099 ConversionType::StringLower | ConversionType::StringUpper,
1100 Some(value),
1101 ) => self.format_string(string, &value.spark_string()),
1102 (
1103 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1104 Some(value),
1105 ) => self.format_hex_float(string, *value as f64),
1106 (t, None) if t.supports_float() => self.format_string(string, "null"),
1107 _ => {
1108 exec_err!(
1109 "Invalid conversion type: {:?} for Float32",
1110 self.conversion_type
1111 )
1112 }
1113 },
1114 ScalarValue::Float64(value) => match (self.conversion_type, value) {
1115 (
1116 ConversionType::DecFloatLower
1117 | ConversionType::SciFloatLower
1118 | ConversionType::SciFloatUpper
1119 | ConversionType::CompactFloatLower
1120 | ConversionType::CompactFloatUpper,
1121 Some(value),
1122 ) => self.format_float(string, *value),
1123 (
1124 ConversionType::StringLower | ConversionType::StringUpper,
1125 Some(value),
1126 ) => self.format_string(string, &value.spark_string()),
1127 (
1128 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1129 Some(value),
1130 ) => self.format_hex_float(string, *value),
1131 (t, None) if t.supports_float() => self.format_string(string, "null"),
1132 _ => {
1133 exec_err!(
1134 "Invalid conversion type: {:?} for Float64",
1135 self.conversion_type
1136 )
1137 }
1138 },
1139 ScalarValue::Utf8(value) => {
1140 let value: &str = match value {
1141 Some(value) => value.as_str(),
1142 None => "null",
1143 };
1144 if matches!(
1145 self.conversion_type,
1146 ConversionType::StringLower | ConversionType::StringUpper
1147 ) {
1148 self.format_string(string, value)
1149 } else {
1150 exec_err!(
1151 "Invalid conversion type: {:?} for Utf8",
1152 self.conversion_type
1153 )
1154 }
1155 }
1156 ScalarValue::LargeUtf8(value) => {
1157 let value: &str = match value {
1158 Some(value) => value.as_str(),
1159 None => "null",
1160 };
1161 if matches!(
1162 self.conversion_type,
1163 ConversionType::StringLower | ConversionType::StringUpper
1164 ) {
1165 self.format_string(string, value)
1166 } else {
1167 exec_err!(
1168 "Invalid conversion type: {:?} for LargeUtf8",
1169 self.conversion_type
1170 )
1171 }
1172 }
1173 ScalarValue::Utf8View(value) => {
1174 let value: &str = match value {
1175 Some(value) => value.as_str(),
1176 None => "null",
1177 };
1178 self.format_string(string, value)
1179 }
1180 ScalarValue::Decimal128(value, _, scale) => {
1181 match (self.conversion_type, value) {
1182 (
1183 ConversionType::DecFloatLower
1184 | ConversionType::SciFloatLower
1185 | ConversionType::SciFloatUpper
1186 | ConversionType::CompactFloatLower
1187 | ConversionType::CompactFloatUpper,
1188 Some(value),
1189 ) => self.format_decimal(string, value.to_string(), *scale as i64),
1190 (
1191 ConversionType::StringLower | ConversionType::StringUpper,
1192 Some(value),
1193 ) => self.format_string(string, &value.to_string()),
1194 (t, None) if t.supports_decimal() => {
1195 self.format_string(string, "null")
1196 }
1197
1198 _ => {
1199 exec_err!(
1200 "Invalid conversion type: {:?} for Decimal128",
1201 self.conversion_type
1202 )
1203 }
1204 }
1205 }
1206 ScalarValue::Decimal256(value, _, scale) => {
1207 match (self.conversion_type, value) {
1208 (
1209 ConversionType::DecFloatLower
1210 | ConversionType::SciFloatLower
1211 | ConversionType::SciFloatUpper
1212 | ConversionType::CompactFloatLower
1213 | ConversionType::CompactFloatUpper,
1214 Some(value),
1215 ) => self.format_decimal(string, value.to_string(), *scale as i64),
1216 (
1217 ConversionType::StringLower | ConversionType::StringUpper,
1218 Some(value),
1219 ) => self.format_string(string, &value.to_string()),
1220 (t, None) if t.supports_decimal() => {
1221 self.format_string(string, "null")
1222 }
1223
1224 _ => {
1225 exec_err!(
1226 "Invalid conversion type: {:?} for Decimal256",
1227 self.conversion_type
1228 )
1229 }
1230 }
1231 }
1232
1233 ScalarValue::Time32Second(value) => match (self.conversion_type, value) {
1234 (
1235 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1236 Some(value),
1237 ) => self.format_time(string, *value as i64 * 1000000000, &None),
1238 (
1239 ConversionType::StringLower | ConversionType::StringUpper,
1240 Some(value),
1241 ) => self.format_string(string, &value.to_string()),
1242 (t, None) if t.supports_time() => self.format_string(string, "null"),
1243 _ => {
1244 exec_err!(
1245 "Invalid conversion type: {:?} for Time32Second",
1246 self.conversion_type
1247 )
1248 }
1249 },
1250 ScalarValue::Time32Millisecond(value) => {
1251 match (self.conversion_type, value) {
1252 (
1253 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1254 Some(value),
1255 ) => self.format_time(string, *value as i64 * 1000000, &None),
1256 (
1257 ConversionType::StringLower | ConversionType::StringUpper,
1258 Some(value),
1259 ) => self.format_string(string, &value.to_string()),
1260 (t, None) if t.supports_time() => self.format_string(string, "null"),
1261 _ => {
1262 exec_err!(
1263 "Invalid conversion type: {:?} for Time32Millisecond",
1264 self.conversion_type
1265 )
1266 }
1267 }
1268 }
1269 ScalarValue::Time64Microsecond(value) => {
1270 match (self.conversion_type, value) {
1271 (
1272 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1273 Some(value),
1274 ) => self.format_time(string, *value * 1000, &None),
1275 (
1276 ConversionType::StringLower | ConversionType::StringUpper,
1277 Some(value),
1278 ) => self.format_string(string, &value.to_string()),
1279 (t, None) if t.supports_time() => self.format_string(string, "null"),
1280 _ => {
1281 exec_err!(
1282 "Invalid conversion type: {:?} for Time64Microsecond",
1283 self.conversion_type
1284 )
1285 }
1286 }
1287 }
1288 ScalarValue::Time64Nanosecond(value) => match (self.conversion_type, value) {
1289 (
1290 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1291 Some(value),
1292 ) => self.format_time(string, *value, &None),
1293 (
1294 ConversionType::StringLower | ConversionType::StringUpper,
1295 Some(value),
1296 ) => self.format_string(string, &value.to_string()),
1297 (t, None) if t.supports_time() => self.format_string(string, "null"),
1298 _ => {
1299 exec_err!(
1300 "Invalid conversion type: {:?} for Time64Nanosecond",
1301 self.conversion_type
1302 )
1303 }
1304 },
1305 ScalarValue::TimestampSecond(value, zone) => {
1306 match (self.conversion_type, value) {
1307 (
1308 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1309 Some(value),
1310 ) => self.format_time(string, value * 1000000000, zone),
1311 (
1312 ConversionType::StringLower | ConversionType::StringUpper,
1313 Some(value),
1314 ) => self.format_string(string, &value.to_string()),
1315 (t, None) if t.supports_time() => self.format_string(string, "null"),
1316 _ => {
1317 exec_err!(
1318 "Invalid conversion type: {:?} for TimestampSecond",
1319 self.conversion_type
1320 )
1321 }
1322 }
1323 }
1324 ScalarValue::TimestampMillisecond(value, zone) => {
1325 match (self.conversion_type, value) {
1326 (
1327 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1328 Some(value),
1329 ) => self.format_time(string, *value * 1000000, zone),
1330 (
1331 ConversionType::StringLower | ConversionType::StringUpper,
1332 Some(value),
1333 ) => self.format_string(string, &value.to_string()),
1334
1335 (t, None) if t.supports_time() => self.format_string(string, "null"),
1336 _ => {
1337 exec_err!(
1338 "Invalid conversion type: {:?} for TimestampMillisecond",
1339 self.conversion_type
1340 )
1341 }
1342 }
1343 }
1344 ScalarValue::TimestampMicrosecond(value, zone) => {
1345 match (self.conversion_type, value) {
1346 (
1347 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1348 Some(value),
1349 ) => self.format_time(string, value * 1000, zone),
1350 (
1351 ConversionType::StringLower | ConversionType::StringUpper,
1352 Some(value),
1353 ) => self.format_string(string, &value.to_string()),
1354 (t, None) if t.supports_time() => self.format_string(string, "null"),
1355 _ => {
1356 exec_err!(
1357 "Invalid conversion type: {:?} for timestampmicrosecond",
1358 self.conversion_type
1359 )
1360 }
1361 }
1362 }
1363
1364 ScalarValue::TimestampNanosecond(value, zone) => {
1365 match (self.conversion_type, value) {
1366 (
1367 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1368 Some(value),
1369 ) => self.format_time(string, *value, zone),
1370 (
1371 ConversionType::StringLower | ConversionType::StringUpper,
1372 Some(value),
1373 ) => self.format_string(string, &value.to_string()),
1374 (t, None) if t.supports_time() => self.format_string(string, "null"),
1375 _ => {
1376 exec_err!(
1377 "Invalid conversion type: {:?} for TimestampNanosecond",
1378 self.conversion_type
1379 )
1380 }
1381 }
1382 }
1383 ScalarValue::Date32(value) => match (self.conversion_type, value) {
1384 (
1385 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1386 Some(value),
1387 ) => self.format_date(string, *value as i64),
1388 (
1389 ConversionType::StringLower | ConversionType::StringUpper,
1390 Some(value),
1391 ) => self.format_string(string, &value.to_string()),
1392 (t, None) if t.supports_time() => self.format_string(string, "null"),
1393 _ => {
1394 exec_err!(
1395 "Invalid conversion type: {:?} for Date32",
1396 self.conversion_type
1397 )
1398 }
1399 },
1400 ScalarValue::Date64(value) => match (self.conversion_type, value) {
1401 (
1402 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1403 Some(value),
1404 ) => self.format_date(string, *value),
1405 (
1406 ConversionType::StringLower | ConversionType::StringUpper,
1407 Some(value),
1408 ) => self.format_string(string, &value.to_string()),
1409 (t, None) if t.supports_time() => self.format_string(string, "null"),
1410 _ => {
1411 exec_err!(
1412 "Invalid conversion type: {:?} for Date64",
1413 self.conversion_type
1414 )
1415 }
1416 },
1417 ScalarValue::Null => {
1418 let value = "null".to_string();
1419 self.format_string(string, &value)
1420 }
1421 _ => exec_err!("Invalid scalar value: {:?}", value),
1422 }
1423 }
1424
1425 fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
1426 let (sign, raw_exponent, mantissa) = value.to_parts();
1428 let is_subnormal = raw_exponent == 0;
1429
1430 let precision = match self.precision {
1431 NumericParam::FromArgument => None,
1432 NumericParam::Literal(p) => Some(p),
1433 };
1434
1435 let mantissa_hex_digits = f64::MANTISSA_BITS.div_ceil(4); let should_normalize = is_subnormal
1439 && precision.is_some()
1440 && precision.unwrap() < mantissa_hex_digits as i32;
1441
1442 let (value, raw_exponent, mantissa) = if should_normalize {
1443 let value = value * f64::SCALEUP;
1444 let (_, raw_exponent, mantissa) = value.to_parts();
1445 (value, raw_exponent, mantissa)
1446 } else {
1447 (value, raw_exponent, mantissa)
1448 };
1449
1450 let mut temp = String::new();
1451
1452 let sign_char = if sign {
1453 "-"
1454 } else if self.force_sign {
1455 "+"
1456 } else if self.space_sign {
1457 " "
1458 } else {
1459 ""
1460 };
1461 match value.category() {
1462 FpCategory::Nan => {
1463 write!(&mut temp, "NaN")?;
1464 }
1465 FpCategory::Infinite => {
1466 write!(&mut temp, "{sign_char}Infinity")?;
1467 }
1468 FpCategory::Zero => {
1469 write!(&mut temp, "{sign_char}0x0.0p0")?;
1470 }
1471 _ => {
1472 let bias = i32::from(f64::EXPONENT_BIAS);
1473 let exponent = if is_subnormal && !should_normalize {
1476 1 - bias
1477 } else {
1478 raw_exponent as i32 - bias
1479 };
1480
1481 let final_mantissa = if let Some(p) = precision {
1483 if p == 0 {
1484 let shift_distance = f64::MANTISSA_BITS as i32 - 4; let shifted = mantissa >> shift_distance;
1488 let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1489 let round_bit = 1u64 << (shift_distance - 1);
1490
1491 if rounding_bits > round_bit
1493 || (rounding_bits == round_bit && (shifted & 1) != 0)
1494 {
1495 (shifted + 1) << shift_distance
1496 } else {
1497 shifted << shift_distance
1498 }
1499 } else {
1500 let precision_bits = p * 4; let keep_bits = f64::MANTISSA_BITS as i32;
1503 let shift_distance = keep_bits - precision_bits;
1504
1505 if shift_distance > 0 {
1506 let shifted = mantissa >> shift_distance;
1507 let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1508 let round_bit = 1u64 << (shift_distance - 1);
1509
1510 if rounding_bits > round_bit
1512 || (rounding_bits == round_bit && (shifted & 1) != 0)
1513 {
1514 (shifted + 1) << shift_distance
1515 } else {
1516 shifted << shift_distance
1517 }
1518 } else {
1519 mantissa
1520 }
1521 }
1522 } else {
1523 mantissa
1524 };
1525
1526 if is_subnormal && !should_normalize {
1527 if precision.is_some() {
1529 let full_hex = format!(
1531 "{:0width$x}",
1532 final_mantissa,
1533 width = mantissa_hex_digits as usize
1534 );
1535 write!(&mut temp, "{sign_char}0x0.{full_hex}p{exponent}")?;
1536 } else {
1537 let hex_digits = format!(
1539 "{:0width$x}",
1540 final_mantissa,
1541 width = mantissa_hex_digits as usize
1542 );
1543 write!(&mut temp, "{sign_char}0x0.{hex_digits}p{exponent}")?;
1544 }
1545 } else {
1546 if let Some(p) = precision {
1548 let p = if p == 0 { 1 } else { p };
1549 let hex_digits = format!("{final_mantissa:x}");
1550 let formatted_digits = if p as usize >= hex_digits.len() {
1551 format!("{:0<width$}", hex_digits, width = p as usize)
1553 } else {
1554 hex_digits[..p as usize].to_string()
1555 };
1556 write!(
1557 &mut temp,
1558 "{sign_char}0x1.{formatted_digits}p{exponent}"
1559 )?;
1560 } else {
1561 let mut hex_digits = format!("{final_mantissa:x}");
1563 hex_digits = trim_trailing_0s_hex(&hex_digits).to_owned();
1564 if hex_digits.is_empty() {
1565 write!(&mut temp, "{sign_char}0x1.0p{exponent}")?;
1566 } else {
1567 write!(&mut temp, "{sign_char}0x1.{hex_digits}p{exponent}")?;
1568 }
1569 }
1570 }
1571 if should_normalize {
1572 let (prefix, exp) = temp.split_once('p').unwrap();
1573 let iexp = exp.parse::<i32>().unwrap() - f64::SCALEUP_POWER as i32;
1574 temp = format!("{prefix}p{iexp}");
1575 }
1576 }
1577 };
1578
1579 if self.conversion_type.is_upper() {
1580 temp = temp.to_ascii_uppercase();
1581 }
1582
1583 let NumericParam::Literal(width) = self.width else {
1584 writer.push_str(&temp);
1585 return Ok(());
1586 };
1587 if self.left_adj {
1588 writer.push_str(&temp);
1589 for _ in temp.len()..width as usize {
1590 writer.push(' ');
1591 }
1592 } else if self.zero_pad && value.is_finite() {
1593 let delimiter = if self.conversion_type.is_upper() {
1594 "0X"
1595 } else {
1596 "0x"
1597 };
1598 let (prefix, suffix) = temp.split_once(delimiter).unwrap();
1599 writer.push_str(prefix);
1600 writer.push_str(delimiter);
1601 for _ in temp.len()..width as usize {
1602 writer.push('0');
1603 }
1604 writer.push_str(suffix);
1605 } else {
1606 while temp.len() < width as usize {
1607 temp = " ".to_owned() + &temp;
1608 }
1609 writer.push_str(&temp);
1610 };
1611 Ok(())
1612 }
1613
1614 fn format_char(&self, writer: &mut String, value: char) -> Result<()> {
1615 let upper = self.conversion_type.is_upper();
1616 match self.conversion_type {
1617 ConversionType::CharLower | ConversionType::CharUpper => {
1618 let NumericParam::Literal(width) = self.width else {
1619 if upper {
1620 writer.push(value.to_ascii_uppercase());
1621 } else {
1622 writer.push(value);
1623 }
1624 return Ok(());
1625 };
1626
1627 let start_len = writer.len();
1628 if self.left_adj {
1629 if upper {
1630 writer.push(value.to_ascii_uppercase());
1631 } else {
1632 writer.push(value);
1633 }
1634 while writer.len() - start_len < width as usize {
1635 writer.push(' ');
1636 }
1637 } else {
1638 while writer.len() - start_len + value.len_utf8() < width as usize {
1639 writer.push(' ');
1640 }
1641 if upper {
1642 writer.push(value.to_ascii_uppercase());
1643 } else {
1644 writer.push(value);
1645 }
1646 }
1647 Ok(())
1648 }
1649 _ => exec_err!(
1650 "Invalid conversion type: {:?} for char",
1651 self.conversion_type
1652 ),
1653 }
1654 }
1655
1656 fn format_boolean(&self, writer: &mut String, value: &Option<bool>) -> Result<()> {
1657 let value = value.unwrap_or(false);
1658
1659 let formatted = match self.conversion_type {
1660 ConversionType::BooleanUpper => {
1661 if value {
1662 "TRUE"
1663 } else {
1664 "FALSE"
1665 }
1666 }
1667 ConversionType::BooleanLower => {
1668 if value {
1669 "true"
1670 } else {
1671 "false"
1672 }
1673 }
1674 _ => {
1675 return exec_err!(
1676 "Invalid conversion type: {:?} for boolean array",
1677 self.conversion_type
1678 )
1679 }
1680 };
1681 self.format_str(writer, formatted)
1682 }
1683
1684 fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1685 let mut prefix = String::new();
1686 let mut suffix = String::new();
1687 let mut number = String::new();
1688 let upper = self.conversion_type.is_upper();
1689
1690 if value.is_sign_negative() {
1692 if self.negative_in_parentheses {
1693 prefix.push('(');
1694 suffix.push(')');
1695 } else {
1696 prefix.push('-');
1697 }
1698 } else if self.space_sign {
1699 prefix.push(' ');
1700 } else if self.force_sign {
1701 prefix.push('+');
1702 }
1703
1704 if value.is_finite() {
1705 let mut use_scientific = false;
1706 let mut strip_trailing_0s = false;
1707 let mut abs = value.abs();
1708 let mut exponent = abs.log10().floor() as i32;
1709 let mut precision = match self.precision {
1710 NumericParam::Literal(p) => p,
1711 _ => 6,
1712 };
1713 match self.conversion_type {
1714 ConversionType::DecFloatLower => {
1715 }
1717 ConversionType::SciFloatLower => {
1718 use_scientific = true;
1719 }
1720 ConversionType::SciFloatUpper => {
1721 use_scientific = true;
1722 }
1723 ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
1724 strip_trailing_0s = true;
1725 if precision == 0 {
1726 precision = 1;
1727 }
1728 let rounding_factor =
1731 10.0_f64.powf((precision - 1 - exponent) as f64);
1732 let rounded_fixed = (abs * rounding_factor).round();
1733 abs = rounded_fixed / rounding_factor;
1734 exponent = abs.log10().floor() as i32;
1735 if exponent < -4 || exponent >= precision {
1736 use_scientific = true;
1737 precision -= 1;
1738 } else {
1739 precision -= 1 + exponent;
1741 }
1742 }
1743 _ => {
1744 return exec_err!(
1745 "Invalid conversion type: {:?} for float",
1746 self.conversion_type
1747 )
1748 }
1749 }
1750
1751 if use_scientific {
1752 let mantissa = abs / 10.0_f64.powf(exponent as f64);
1754 let exp_char = if upper { 'E' } else { 'e' };
1755 number = format!("{mantissa:.prec$}", prec = precision as usize);
1756 if strip_trailing_0s {
1757 number = trim_trailing_0s(&number).to_owned();
1758 }
1759 number = format!("{number}{exp_char}{exponent:+03}");
1760 } else {
1761 number = format!("{abs:.prec$}", prec = precision as usize);
1762 if strip_trailing_0s {
1763 number = trim_trailing_0s(&number).to_owned();
1764 }
1765 }
1766 if self.alt_form && !number.contains('.') {
1767 number += ".";
1768 }
1769 } else {
1770 match self.conversion_type {
1772 ConversionType::DecFloatLower
1773 | ConversionType::SciFloatLower
1774 | ConversionType::CompactFloatLower => {
1775 if value.is_infinite() {
1776 number.push_str("Infinity")
1777 } else {
1778 number.push_str("NaN")
1779 }
1780 }
1781 ConversionType::SciFloatUpper | ConversionType::CompactFloatUpper => {
1782 if value.is_infinite() {
1783 number.push_str("INFINITY")
1784 } else {
1785 number.push_str("NAN")
1786 }
1787 }
1788 _ => {
1789 return exec_err!(
1790 "Invalid conversion type: {:?} for float",
1791 self.conversion_type
1792 )
1793 }
1794 }
1795 }
1796 let NumericParam::Literal(width) = self.width else {
1798 writer.push_str(&prefix);
1799 writer.push_str(&number);
1800 writer.push_str(&suffix);
1801 return Ok(());
1802 };
1803 if self.left_adj {
1804 let mut full_num = prefix + &number + &suffix;
1805 while full_num.len() < width as usize {
1806 full_num.push(' ');
1807 }
1808 writer.push_str(&full_num);
1809 } else if self.zero_pad && value.is_finite() {
1810 while prefix.len() + number.len() + suffix.len() < width as usize {
1811 prefix.push('0');
1812 }
1813 writer.push_str(&prefix);
1814 writer.push_str(&number);
1815 writer.push_str(&suffix);
1816 } else {
1817 let mut full_num = prefix + &number + &suffix;
1818 while full_num.len() < width as usize {
1819 full_num = " ".to_owned() + &full_num;
1820 }
1821 writer.push_str(&full_num);
1822 };
1823
1824 Ok(())
1825 }
1826
1827 fn format_signed(&self, writer: &mut String, value: i64) -> Result<()> {
1828 let negative = value < 0;
1829 let abs_val = value.abs();
1830
1831 let (sign_prefix, sign_suffix) = if negative && self.negative_in_parentheses {
1832 ("(".to_owned(), ")".to_owned())
1833 } else if negative {
1834 ("-".to_owned(), "".to_owned())
1835 } else if self.force_sign {
1836 ("+".to_owned(), "".to_owned())
1837 } else if self.space_sign {
1838 (" ".to_owned(), "".to_owned())
1839 } else {
1840 ("".to_owned(), "".to_owned())
1841 };
1842
1843 let mut mod_spec = *self;
1844 mod_spec.width = match self.width {
1845 NumericParam::Literal(w) => NumericParam::Literal(
1846 w - sign_prefix.len() as i32 - sign_suffix.len() as i32,
1847 ),
1848 _ => NumericParam::FromArgument,
1849 };
1850 let mut formatted = String::new();
1851 mod_spec.format_unsigned(&mut formatted, abs_val as u64)?;
1852 let mut actual_number = &formatted[0..];
1854 let mut leading_spaces = &formatted[0..0];
1855 if let Some(first_non_space) = formatted.find(|c| c != ' ') {
1856 actual_number = &formatted[first_non_space..];
1857 leading_spaces = &formatted[0..first_non_space];
1858 }
1859 write!(
1860 writer,
1861 "{}{}{}{}",
1862 leading_spaces.to_owned(),
1863 sign_prefix,
1864 actual_number,
1865 sign_suffix
1866 )
1867 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1868 Ok(())
1869 }
1870
1871 fn format_unsigned(&self, writer: &mut String, value: u64) -> Result<()> {
1872 let mut s = String::new();
1873 let mut alt_prefix = "";
1874 match self.conversion_type {
1875 ConversionType::DecInt => {
1876 let num_str = format!("{value}");
1877 if self.grouping_separator {
1878 let mut result = String::new();
1880 let chars: Vec<char> = num_str.chars().collect();
1881 for (i, c) in chars.iter().enumerate() {
1882 if i > 0 && (chars.len() - i).is_multiple_of(3) {
1883 result.push(',');
1884 }
1885 result.push(*c);
1886 }
1887 s = result;
1888 } else {
1889 s = num_str;
1890 }
1891 }
1892 ConversionType::HexIntLower => {
1893 alt_prefix = "0x";
1894 write!(&mut s, "{value:x}")
1895 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1896 }
1897 ConversionType::HexIntUpper => {
1898 alt_prefix = "0X";
1899 write!(&mut s, "{value:X}")
1900 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1901 }
1902 ConversionType::OctInt => {
1903 alt_prefix = "0";
1904 write!(&mut s, "{value:o}")
1905 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1906 }
1907 _ => {
1908 return exec_err!(
1909 "Invalid conversion type: {:?} for u64",
1910 self.conversion_type
1911 )
1912 }
1913 }
1914 let mut prefix = if self.alt_form {
1915 alt_prefix.to_owned()
1916 } else {
1917 String::new()
1918 };
1919
1920 let formatted = if let NumericParam::Literal(width) = self.width {
1921 if self.left_adj {
1922 let mut num_str = prefix + &s;
1923 while num_str.len() < width as usize {
1924 num_str.push(' ');
1925 }
1926 num_str
1927 } else if self.zero_pad {
1928 while prefix.len() + s.len() < width as usize {
1929 prefix.push('0');
1930 }
1931 prefix + &s
1932 } else {
1933 let mut num_str = prefix + &s;
1934 while num_str.len() < width as usize {
1935 num_str = " ".to_owned() + &num_str;
1936 }
1937 num_str
1938 }
1939 } else {
1940 prefix + &s
1941 };
1942 write!(writer, "{formatted}")
1943 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1944 Ok(())
1945 }
1946
1947 fn format_str(&self, writer: &mut String, value: &str) -> Result<()> {
1948 let precision: usize = match self.precision {
1950 NumericParam::Literal(p) => p,
1951 _ => i32::MAX,
1952 }
1953 .try_into()
1954 .unwrap_or_default();
1955 let content_len = {
1956 let mut content_len = precision.min(value.len());
1957 while !value.is_char_boundary(content_len) {
1958 content_len -= 1;
1959 }
1960 content_len
1961 };
1962 let content = &value[..content_len];
1963
1964 if let NumericParam::Literal(width) = self.width {
1967 let start_len = writer.len();
1968 if self.left_adj {
1969 writer.push_str(content);
1970 while writer.len() - start_len < width as usize {
1971 writer.push(' ');
1972 }
1973 } else {
1974 while writer.len() - start_len + content.len() < width as usize {
1975 writer.push(' ');
1976 }
1977 writer.push_str(content);
1978 }
1979 } else {
1980 writer.push_str(content);
1981 }
1982 Ok(())
1983 }
1984
1985 fn format_string(&self, writer: &mut String, value: &str) -> Result<()> {
1986 if self.conversion_type.is_upper() {
1987 let upper = value.to_ascii_uppercase();
1988 self.format_str(writer, &upper)
1989 } else {
1990 self.format_str(writer, value)
1991 }
1992 }
1993
1994 fn format_decimal(
1995 &self,
1996 writer: &mut String,
1997 value: String,
1998 scale: i64,
1999 ) -> Result<()> {
2000 let mut prefix = String::new();
2001 let upper = self.conversion_type.is_upper();
2002
2003 let decimal = value
2005 .parse::<BigInt>()
2006 .map_err(|e| exec_datafusion_err!("Failed to parse decimal: {}", e))?;
2007 let decimal = BigDecimal::from_bigint(decimal, scale);
2008
2009 let is_negative = decimal.sign() == Sign::Minus;
2011 let abs_decimal = decimal.abs();
2012
2013 if is_negative {
2014 prefix.push('-');
2015 } else if self.space_sign {
2016 prefix.push(' ');
2017 } else if self.force_sign {
2018 prefix.push('+');
2019 }
2020
2021 let exp_symb = if upper { 'E' } else { 'e' };
2022 let mut strip_trailing_0s = false;
2023
2024 let mut precision = match self.precision {
2026 NumericParam::Literal(p) => p,
2027 _ => 6,
2028 };
2029
2030 let number = match self.conversion_type {
2031 ConversionType::DecFloatLower => {
2032 self.format_decimal_fixed(&abs_decimal, precision, strip_trailing_0s)?
2034 }
2035 ConversionType::SciFloatLower => self.format_decimal_scientific(
2036 &abs_decimal,
2037 precision,
2038 'e',
2039 strip_trailing_0s,
2040 )?,
2041 ConversionType::SciFloatUpper => self.format_decimal_scientific(
2042 &abs_decimal,
2043 precision,
2044 'E',
2045 strip_trailing_0s,
2046 )?,
2047 ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
2048 strip_trailing_0s = true;
2049 if precision == 0 {
2050 precision = 1;
2051 }
2052 let log10_val = abs_decimal.to_f64().map(|f| f.log10()).unwrap_or(0.0);
2054 if log10_val < -4.0 || log10_val >= precision as f64 {
2055 self.format_decimal_scientific(
2056 &abs_decimal,
2057 precision - 1,
2058 exp_symb,
2059 strip_trailing_0s,
2060 )?
2061 } else {
2062 self.format_decimal_fixed(
2063 &abs_decimal,
2064 precision - 1 - log10_val.floor() as i32,
2065 strip_trailing_0s,
2066 )?
2067 }
2068 }
2069 _ => {
2070 return exec_err!(
2071 "Invalid conversion type: {:?} for decimal",
2072 self.conversion_type
2073 )
2074 }
2075 };
2076
2077 let NumericParam::Literal(width) = self.width else {
2079 writer.push_str(&prefix);
2080 writer.push_str(&number);
2081 return Ok(());
2082 };
2083
2084 if self.left_adj {
2085 let mut full_num = prefix + &number;
2086 while full_num.len() < width as usize {
2087 full_num.push(' ');
2088 }
2089 writer.push_str(&full_num);
2090 } else if self.zero_pad {
2091 while prefix.len() + number.len() < width as usize {
2092 prefix.push('0');
2093 }
2094 writer.push_str(&prefix);
2095 writer.push_str(&number);
2096 } else {
2097 let mut full_num = prefix + &number;
2098 while full_num.len() < width as usize {
2099 full_num = " ".to_owned() + &full_num;
2100 }
2101 writer.push_str(&full_num);
2102 }
2103
2104 Ok(())
2105 }
2106
2107 fn format_decimal_fixed(
2108 &self,
2109 decimal: &BigDecimal,
2110 precision: i32,
2111 strip_trailing_0s: bool,
2112 ) -> Result<String> {
2113 if precision <= 0 {
2114 Ok(decimal.round(0).to_string())
2115 } else {
2116 let scaled = decimal.round(precision as i64);
2118 let mut number = scaled.to_string();
2119 if strip_trailing_0s {
2120 number = trim_trailing_0s(&number).to_owned();
2121 }
2122 Ok(number)
2123 }
2124 }
2125
2126 fn format_decimal_scientific(
2127 &self,
2128 decimal: &BigDecimal,
2129 precision: i32,
2130 exp_char: char,
2131 strip_trailing_0s: bool,
2132 ) -> Result<String> {
2133 let float_val = decimal.to_f64().unwrap_or(0.0);
2135 if float_val == 0.0 {
2136 return Ok(format!("0{exp_char}+00"));
2137 }
2138
2139 let abs_val = float_val.abs();
2140 let exponent = abs_val.log10().floor() as i32;
2141 let mantissa = abs_val / 10.0_f64.powf(exponent as f64);
2142
2143 let mut number = if precision <= 0 {
2144 format!("{mantissa:.0}")
2145 } else {
2146 format!("{mantissa:.prec$}", prec = precision as usize)
2147 };
2148
2149 if strip_trailing_0s {
2150 number = trim_trailing_0s(&number).to_owned();
2151 }
2152
2153 Ok(format!("{number}{exp_char}{exponent:+03}"))
2154 }
2155
2156 fn format_time(
2157 &self,
2158 writer: &mut String,
2159 timestamp_nanos: i64,
2160 timezone: &Option<Arc<str>>,
2161 ) -> Result<()> {
2162 let upper = self.conversion_type.is_upper();
2163 match &self.conversion_type {
2164 ConversionType::TimeLower(time_format)
2165 | ConversionType::TimeUpper(time_format) => {
2166 let formatted =
2167 self.format_time_component(timestamp_nanos, *time_format, timezone)?;
2168 let result = if upper {
2169 formatted.to_uppercase()
2170 } else {
2171 formatted
2172 };
2173 write!(writer, "{result}")
2174 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2175 Ok(())
2176 }
2177 _ => exec_err!(
2178 "Invalid conversion type for time: {:?}",
2179 self.conversion_type
2180 ),
2181 }
2182 }
2183
2184 fn format_date(&self, writer: &mut String, date_days: i64) -> Result<()> {
2185 let timestamp_nanos = date_days * 24 * 60 * 60 * 1_000_000_000;
2187 self.format_time(writer, timestamp_nanos, &None)
2188 }
2189
2190 fn format_time_component(
2191 &self,
2192 timestamp_nanos: i64,
2193 time_format: TimeFormat,
2194 _timezone: &Option<Arc<str>>,
2195 ) -> Result<String> {
2196 let secs = timestamp_nanos / 1_000_000_000;
2198 let nanos = (timestamp_nanos % 1_000_000_000) as u32;
2199
2200 let dt = DateTime::<Utc>::from_timestamp(secs, nanos).ok_or_else(|| {
2202 exec_datafusion_err!("Invalid timestamp: {}", timestamp_nanos)
2203 })?;
2204
2205 match time_format {
2206 TimeFormat::HUpper => Ok(format!("{:02}", dt.hour())),
2207 TimeFormat::IUpper => {
2208 let hour_12 = match dt.hour12() {
2209 (true, h) => h, (false, h) => h, };
2212 Ok(format!("{hour_12:02}"))
2213 }
2214 TimeFormat::KLower => Ok(format!("{}", dt.hour())),
2215 TimeFormat::LLower => {
2216 let hour_12 = match dt.hour12() {
2217 (true, h) => h, (false, h) => h, };
2220 Ok(format!("{hour_12}"))
2221 }
2222 TimeFormat::MUpper => Ok(format!("{:02}", dt.minute())),
2223 TimeFormat::SUpper => Ok(format!("{:02}", dt.second())),
2224 TimeFormat::LUpper => Ok(format!("{:03}", dt.timestamp_millis() % 1000)),
2225 TimeFormat::NUpper => Ok(format!("{:09}", dt.nanosecond())),
2226 TimeFormat::PLower => {
2227 let (is_pm, _) = dt.hour12();
2228 Ok(if is_pm {
2229 "pm".to_string()
2230 } else {
2231 "am".to_string()
2232 })
2233 }
2234 TimeFormat::ZLower => Ok("+0000".to_string()), TimeFormat::ZUpper => Ok("UTC".to_string()), TimeFormat::SLower => Ok(format!("{}", dt.timestamp())),
2237 TimeFormat::QUpper => Ok(format!("{}", dt.timestamp_millis())),
2238 TimeFormat::BUpper => Ok(dt.format("%B").to_string()), TimeFormat::BLower => Ok(dt.format("%b").to_string()), TimeFormat::AUpper => Ok(dt.format("%A").to_string()), TimeFormat::ALower => Ok(dt.format("%a").to_string()), TimeFormat::CUpper => Ok(format!("{:02}", dt.year() / 100)),
2243 TimeFormat::YUpper => Ok(format!("{:04}", dt.year())),
2244 TimeFormat::YLower => Ok(format!("{:02}", dt.year() % 100)),
2245 TimeFormat::JLower => Ok(format!("{:03}", dt.ordinal())), TimeFormat::MLower => Ok(format!("{:02}", dt.month())),
2247 TimeFormat::DLower => Ok(format!("{:02}", dt.day())),
2248 TimeFormat::ELower => Ok(format!("{}", dt.day())),
2249 TimeFormat::RUpper => Ok(dt.format("%H:%M").to_string()),
2250 TimeFormat::TUpper => Ok(dt.format("%H:%M:%S").to_string()),
2251 TimeFormat::RLower => {
2252 let (is_pm, hour_12) = dt.hour12();
2253 let am_pm = if is_pm { "PM" } else { "AM" };
2254 Ok(format!(
2255 "{:02}:{:02}:{:02} {}",
2256 hour_12,
2257 dt.minute(),
2258 dt.second(),
2259 am_pm
2260 ))
2261 }
2262 TimeFormat::DUpper => Ok(dt.format("%m/%d/%y").to_string()),
2263 TimeFormat::FUpper => Ok(dt.format("%Y-%m-%d").to_string()),
2264 TimeFormat::CLower => Ok(dt.format("%a %b %d %H:%M:%S UTC %Y").to_string()),
2265 }
2266 }
2267}
2268
2269trait FloatFormattable: std::fmt::Display {
2270 fn category(&self) -> FpCategory;
2271
2272 fn spark_string(&self) -> String {
2273 match self.category() {
2274 FpCategory::Nan => "NaN".to_string(),
2275 FpCategory::Infinite => {
2276 if self.negative() {
2277 "-Infinity".to_string()
2278 } else {
2279 "Infinity".to_string()
2280 }
2281 }
2282 _ => self.to_string(),
2283 }
2284 }
2285 fn negative(&self) -> bool;
2286}
2287
2288impl FloatFormattable for f32 {
2289 fn category(&self) -> FpCategory {
2290 self.classify()
2291 }
2292
2293 fn negative(&self) -> bool {
2294 self.is_sign_negative()
2295 }
2296}
2297
2298impl FloatFormattable for f64 {
2299 fn category(&self) -> FpCategory {
2300 self.classify()
2301 }
2302
2303 fn negative(&self) -> bool {
2304 self.is_sign_negative()
2305 }
2306}
2307
2308trait FloatBits: FloatFormattable {
2309 const MANTISSA_BITS: u8;
2310 const EXPONENT_BIAS: u16;
2311 const SCALEUP_POWER: u8;
2312 const SCALEUP: Self;
2313
2314 fn to_parts(&self) -> (bool, u16, u64);
2315}
2316
2317impl FloatBits for f64 {
2318 const MANTISSA_BITS: u8 = 52;
2319 const EXPONENT_BIAS: u16 = 1023;
2320 const SCALEUP_POWER: u8 = 54;
2321 const SCALEUP: f64 = (1_i64 << Self::SCALEUP_POWER) as f64;
2322
2323 fn to_parts(&self) -> (bool, u16, u64) {
2324 let bits = self.to_bits();
2325 let sign: bool = (bits >> 63) == 1;
2326 let exponent = ((bits >> 52) & 0x7FF) as u16;
2327 let mantissa = bits & 0x000F_FFFF_FFFF_FFFF;
2328 (sign, exponent, mantissa)
2329 }
2330}
2331
2332fn trim_trailing_0s(number: &str) -> &str {
2333 if number.contains('.') {
2334 for (i, c) in number.chars().rev().enumerate() {
2335 if c != '0' {
2336 return &number[..number.len() - i];
2337 }
2338 }
2339 }
2340 number
2341}
2342
2343fn trim_trailing_0s_hex(number: &str) -> &str {
2344 for (i, c) in number.chars().rev().enumerate() {
2345 if c != '0' {
2346 return &number[..number.len() - i];
2347 }
2348 }
2349 number
2350}