1use std::fmt::Write;
19use std::sync::Arc;
20
21use core::num::FpCategory;
22
23use arrow::{
24 array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray},
25 datatypes::{DataType, Field, FieldRef},
26};
27use bigdecimal::{
28 BigDecimal, ToPrimitive,
29 num_bigint::{BigInt, Sign},
30};
31use chrono::{DateTime, Datelike, Timelike, Utc};
32use datafusion_common::{
33 DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
34};
35use datafusion_expr::{
36 ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
37 TypeSignature, Volatility,
38};
39
40#[derive(Debug, PartialEq, Eq, Hash)]
43pub struct FormatStringFunc {
44 signature: Signature,
45 aliases: Vec<String>,
46}
47
48impl Default for FormatStringFunc {
49 fn default() -> Self {
50 Self::new()
51 }
52}
53
54impl FormatStringFunc {
55 pub fn new() -> Self {
56 Self {
57 signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
58 aliases: vec![String::from("printf")],
59 }
60 }
61}
62
63impl ScalarUDFImpl for FormatStringFunc {
64 fn name(&self) -> &str {
65 "format_string"
66 }
67
68 fn aliases(&self) -> &[String] {
69 &self.aliases
70 }
71
72 fn signature(&self) -> &Signature {
73 &self.signature
74 }
75
76 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
77 datafusion_common::internal_err!(
78 "return_type should not be called, use return_field_from_args instead"
79 )
80 }
81
82 fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
83 match args.arg_fields[0].data_type() {
84 DataType::Null => {
85 Ok(Arc::new(Field::new("format_string", DataType::Utf8, true)))
86 }
87 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
88 Ok(Arc::clone(&args.arg_fields[0]))
89 }
90 _ => exec_err!(
91 "format_string expects the first argument to be Utf8, LargeUtf8 or Utf8View, got {} instead",
92 args.arg_fields[0].data_type()
93 ),
94 }
95 }
96
97 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
98 let len = args.args.iter().find_map(|arg| match arg {
99 ColumnarValue::Scalar(_) => None,
100 ColumnarValue::Array(a) => Some(a.len()),
101 });
102 let is_scalar = len.is_none();
103 let data_types = args.args[1..]
104 .iter()
105 .map(|arg| arg.data_type())
106 .collect::<Vec<_>>();
107 let fmt_type = args.args[0].data_type();
108
109 match &args.args[0] {
110 ColumnarValue::Scalar(ScalarValue::Null) => {
111 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
112 }
113 ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {
114 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
115 }
116 ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {
117 Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
118 }
119 ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
120 Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
121 }
122 ColumnarValue::Scalar(ScalarValue::Utf8(Some(fmt)))
123 | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(fmt)))
124 | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(fmt))) => {
125 let formatter = Formatter::parse(fmt, &data_types)?;
126 let mut result = Vec::with_capacity(len.unwrap_or(1));
127 for i in 0..len.unwrap_or(1) {
128 let scalars = args.args[1..]
129 .iter()
130 .map(|arg| try_to_scalar(arg.clone(), i))
131 .collect::<Result<Vec<_>>>()?;
132 let formatted = formatter.format(&scalars)?;
133 result.push(formatted);
134 }
135 if is_scalar {
136 let scalar_result = result.pop().unwrap();
137 match fmt_type {
138 DataType::Utf8 => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
139 Some(scalar_result),
140 ))),
141 DataType::LargeUtf8 => Ok(ColumnarValue::Scalar(
142 ScalarValue::LargeUtf8(Some(scalar_result)),
143 )),
144 DataType::Utf8View => Ok(ColumnarValue::Scalar(
145 ScalarValue::Utf8View(Some(scalar_result)),
146 )),
147 _ => unreachable!(),
148 }
149 } else {
150 let array: ArrayRef = match fmt_type {
151 DataType::Utf8 => Arc::new(StringArray::from(result)),
152 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
153 DataType::Utf8View => Arc::new(StringViewArray::from(result)),
154 _ => unreachable!(),
155 };
156 Ok(ColumnarValue::Array(array))
157 }
158 }
159 ColumnarValue::Array(fmts) => {
160 let mut result = Vec::with_capacity(len.unwrap());
161 for i in 0..len.unwrap() {
162 let fmt = ScalarValue::try_from_array(fmts, i)?;
163 match fmt.try_as_str() {
164 Some(Some(fmt)) => {
165 let formatter = Formatter::parse(fmt, &data_types)?;
166 let scalars = args.args[1..]
167 .iter()
168 .map(|arg| try_to_scalar(arg.clone(), i))
169 .collect::<Result<Vec<_>>>()?;
170 let formatted = formatter.format(&scalars)?;
171 result.push(Some(formatted));
172 }
173 Some(None) => {
174 result.push(None);
175 }
176 _ => unreachable!(),
177 }
178 }
179 let array: ArrayRef = match fmt_type {
180 DataType::Utf8 => Arc::new(StringArray::from(result)),
181 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(result)),
182 DataType::Utf8View => Arc::new(StringViewArray::from(result)),
183 _ => unreachable!(),
184 };
185 Ok(ColumnarValue::Array(array))
186 }
187 _ => exec_err!(
188 "The format_string function expects the first argument to be a string"
189 ),
190 }
191 }
192}
193
194fn try_to_scalar(arg: ColumnarValue, index: usize) -> Result<ScalarValue> {
195 match arg {
196 ColumnarValue::Scalar(scalar) => Ok(scalar),
197 ColumnarValue::Array(array) => ScalarValue::try_from_array(&array, index),
198 }
199}
200
201#[derive(Debug)]
203pub struct Formatter<'a> {
204 pub elements: Vec<FormatElement<'a>>,
205 pub arg_num: usize,
206}
207
208impl<'a> Formatter<'a> {
209 pub fn new(elements: Vec<FormatElement<'a>>) -> Self {
210 let arg_num = elements
211 .iter()
212 .map(|element| match element {
213 FormatElement::Format(spec) => spec.argument_index,
214 _ => 0,
215 })
216 .max()
217 .unwrap_or(0);
218 Self { elements, arg_num }
219 }
220
221 pub fn parse(fmt: &'a str, arg_types: &[DataType]) -> Result<Self> {
286 let mut res = Vec::new();
288
289 let mut rem = fmt;
290 let mut argument_index = 0;
291
292 let mut prev: Option<usize> = None;
293
294 while !rem.is_empty() {
295 if let Some((verbatim_prefix, rest)) = rem.split_once('%') {
296 if !verbatim_prefix.is_empty() {
297 res.push(FormatElement::Verbatim(verbatim_prefix));
298 }
299 if let Some(rest) = rest.strip_prefix('%') {
300 res.push(FormatElement::Verbatim("%"));
301 rem = rest;
302 continue;
303 }
304 if let Some(rest) = rest.strip_prefix('n') {
305 res.push(FormatElement::Verbatim("\n"));
306 rem = rest;
307 continue;
308 }
309 if let Some(rest) = rest.strip_prefix('<') {
310 let Some(p) = prev else {
312 return exec_err!("No previous argument to reference");
313 };
314 let (spec, rest) =
315 take_conversion_specifier(rest, p, &arg_types[p - 1])?;
316 res.push(FormatElement::Format(spec));
317 rem = rest;
318 continue;
319 }
320
321 let (current_argument_index, rest2) = take_numeric_param(rest, false);
322 let (current_argument_index, rest) =
323 match (current_argument_index, rest2.starts_with('$')) {
324 (NumericParam::Literal(index), true) => {
325 (index as usize, &rest2[1..])
326 }
327 (NumericParam::FromArgument, true) => {
328 return exec_err!("Invalid numeric parameter");
329 }
330 (_, false) => {
331 argument_index += 1;
332 (argument_index, rest)
333 }
334 };
335 if current_argument_index == 0 || current_argument_index > arg_types.len()
336 {
337 return exec_err!(
338 "Argument index {} is out of bounds",
339 current_argument_index
340 );
341 }
342
343 let (spec, rest) = take_conversion_specifier(
344 rest,
345 current_argument_index,
346 &arg_types[current_argument_index - 1],
347 )
348 .map_err(|e| exec_datafusion_err!("{:?}, format string: {:?}", e, fmt))?;
349 res.push(FormatElement::Format(spec));
350 prev = Some(spec.argument_index);
351 rem = rest;
352 } else {
353 res.push(FormatElement::Verbatim(rem));
354 break;
355 }
356 }
357
358 Ok(Self::new(res))
359 }
360
361 pub fn format(&self, args: &[ScalarValue]) -> Result<String> {
362 if args.len() < self.arg_num {
363 return exec_err!(
364 "Expected at least {} arguments, got {}",
365 self.arg_num,
366 args.len()
367 );
368 }
369 let mut string = String::new();
370 for element in &self.elements {
371 match element {
372 FormatElement::Verbatim(text) => {
373 string.push_str(text);
374 }
375 FormatElement::Format(spec) => {
376 spec.format(&mut string, &args[spec.argument_index - 1])?;
377 }
378 }
379 }
380 Ok(string)
381 }
382}
383
384#[derive(Debug)]
385pub enum FormatElement<'a> {
386 Verbatim(&'a str),
388 Format(ConversionSpecifier),
390}
391
392#[derive(Debug, Clone, Copy, PartialEq, Eq)]
394pub struct ConversionSpecifier {
395 pub argument_index: usize,
396 pub alt_form: bool,
398 pub zero_pad: bool,
400 pub left_adj: bool,
402 pub space_sign: bool,
404 pub force_sign: bool,
406 pub grouping_separator: bool,
408 pub negative_in_parentheses: bool,
410 pub width: NumericParam,
412 pub precision: NumericParam,
414 pub conversion_type: ConversionType,
416}
417
418#[derive(Debug, Clone, Copy, PartialEq, Eq)]
420pub enum NumericParam {
421 Literal(i32),
423 FromArgument,
425}
426
427#[derive(Debug, Clone, Copy, PartialEq, Eq)]
429pub enum ConversionType {
430 BooleanUpper,
432 BooleanLower,
434 HexHashLower,
437 HexHashUpper,
439 DecInt,
441 OctInt,
443 HexIntLower,
445 HexIntUpper,
447 SciFloatLower,
449 SciFloatUpper,
451 DecFloatLower,
453 CompactFloatLower,
455 CompactFloatUpper,
457 HexFloatLower,
459 HexFloatUpper,
461 TimeLower(TimeFormat),
463 TimeUpper(TimeFormat),
465 CharLower,
467 CharUpper,
469 StringLower,
471 StringUpper,
473}
474
475#[derive(Debug, Clone, Copy, PartialEq, Eq)]
476pub enum TimeFormat {
477 HUpper,
480 IUpper,
483 KLower,
486 LLower,
489 MUpper,
491 SUpper,
494 LUpper,
496 NUpper,
499 PLower,
503 ZLower,
507 ZUpper,
511 SLower,
514 QUpper,
517 BUpper,
519 BLower,
521 AUpper,
523 ALower,
525 CUpper,
527 YUpper,
529 YLower,
531 JLower,
533 MLower,
535 DLower,
537 ELower,
539 RUpper,
541 TUpper,
543 RLower,
545 DUpper,
547 FUpper,
549 CLower,
551}
552
553impl TryFrom<char> for TimeFormat {
554 type Error = DataFusionError;
555 fn try_from(value: char) -> Result<Self, Self::Error> {
556 match value {
557 'H' => Ok(TimeFormat::HUpper),
558 'I' => Ok(TimeFormat::IUpper),
559 'k' => Ok(TimeFormat::KLower),
560 'l' => Ok(TimeFormat::LLower),
561 'M' => Ok(TimeFormat::MUpper),
562 'S' => Ok(TimeFormat::SUpper),
563 'L' => Ok(TimeFormat::LUpper),
564 'N' => Ok(TimeFormat::NUpper),
565 'p' => Ok(TimeFormat::PLower),
566 'z' => Ok(TimeFormat::ZLower),
567 'Z' => Ok(TimeFormat::ZUpper),
568 's' => Ok(TimeFormat::SLower),
569 'Q' => Ok(TimeFormat::QUpper),
570 'B' => Ok(TimeFormat::BUpper),
571 'b' | 'h' => Ok(TimeFormat::BLower),
572 'A' => Ok(TimeFormat::AUpper),
573 'a' => Ok(TimeFormat::ALower),
574 'C' => Ok(TimeFormat::CUpper),
575 'Y' => Ok(TimeFormat::YUpper),
576 'y' => Ok(TimeFormat::YLower),
577 'j' => Ok(TimeFormat::JLower),
578 'm' => Ok(TimeFormat::MLower),
579 'd' => Ok(TimeFormat::DLower),
580 'e' => Ok(TimeFormat::ELower),
581 'R' => Ok(TimeFormat::RUpper),
582 'T' => Ok(TimeFormat::TUpper),
583 'r' => Ok(TimeFormat::RLower),
584 'D' => Ok(TimeFormat::DUpper),
585 'F' => Ok(TimeFormat::FUpper),
586 'c' => Ok(TimeFormat::CLower),
587 _ => exec_err!("Invalid time format: {}", value),
588 }
589 }
590}
591
592impl ConversionType {
593 pub fn validate(&self, arg_type: &DataType) -> Result<()> {
594 match self {
595 ConversionType::BooleanLower | ConversionType::BooleanUpper
596 if *arg_type != DataType::Boolean =>
597 {
598 return exec_err!(
599 "Invalid argument type for boolean conversion: {:?}",
600 arg_type
601 );
602 }
603 ConversionType::CharLower | ConversionType::CharUpper
604 if !matches!(
605 arg_type,
606 DataType::Int8
607 | DataType::UInt8
608 | DataType::Int16
609 | DataType::UInt16
610 | DataType::Int32
611 | DataType::UInt32
612 | DataType::Int64
613 | DataType::UInt64
614 ) =>
615 {
616 return exec_err!(
617 "Invalid argument type for char conversion: {:?}",
618 arg_type
619 );
620 }
621 ConversionType::DecInt
622 | ConversionType::OctInt
623 | ConversionType::HexIntLower
624 | ConversionType::HexIntUpper
625 if !arg_type.is_integer() =>
626 {
627 return exec_err!(
628 "Invalid argument type for integer conversion: {:?}",
629 arg_type
630 );
631 }
632 ConversionType::SciFloatLower
633 | ConversionType::SciFloatUpper
634 | ConversionType::DecFloatLower
635 | ConversionType::CompactFloatLower
636 | ConversionType::CompactFloatUpper
637 | ConversionType::HexFloatLower
638 | ConversionType::HexFloatUpper
639 if !arg_type.is_numeric() =>
640 {
641 return exec_err!(
642 "Invalid argument type for float conversion: {:?}",
643 arg_type
644 );
645 }
646 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_)
647 if !arg_type.is_temporal() =>
648 {
649 return exec_err!(
650 "Invalid argument type for time conversion: {:?}",
651 arg_type
652 );
653 }
654 _ => {}
655 }
656 Ok(())
657 }
658
659 fn supports_integer(&self) -> bool {
660 matches!(
661 self,
662 ConversionType::DecInt
663 | ConversionType::HexIntLower
664 | ConversionType::HexIntUpper
665 | ConversionType::OctInt
666 | ConversionType::CharLower
667 | ConversionType::CharUpper
668 | ConversionType::StringLower
669 | ConversionType::StringUpper
670 )
671 }
672
673 fn supports_float(&self) -> bool {
674 matches!(
675 self,
676 ConversionType::DecFloatLower
677 | ConversionType::SciFloatLower
678 | ConversionType::SciFloatUpper
679 | ConversionType::CompactFloatLower
680 | ConversionType::CompactFloatUpper
681 | ConversionType::StringLower
682 | ConversionType::StringUpper
683 | ConversionType::HexFloatLower
684 | ConversionType::HexFloatUpper
685 )
686 }
687
688 fn supports_decimal(&self) -> bool {
689 matches!(
690 self,
691 ConversionType::DecFloatLower
692 | ConversionType::SciFloatLower
693 | ConversionType::SciFloatUpper
694 | ConversionType::CompactFloatLower
695 | ConversionType::CompactFloatUpper
696 | ConversionType::StringLower
697 | ConversionType::StringUpper
698 )
699 }
700
701 fn supports_time(&self) -> bool {
702 matches!(
703 self,
704 ConversionType::TimeLower(_)
705 | ConversionType::TimeUpper(_)
706 | ConversionType::StringLower
707 | ConversionType::StringUpper
708 )
709 }
710
711 fn is_upper(&self) -> bool {
712 matches!(
713 self,
714 ConversionType::BooleanUpper
715 | ConversionType::HexHashUpper
716 | ConversionType::HexIntUpper
717 | ConversionType::SciFloatUpper
718 | ConversionType::CompactFloatUpper
719 | ConversionType::HexFloatUpper
720 | ConversionType::TimeUpper(_)
721 | ConversionType::CharUpper
722 | ConversionType::StringUpper
723 )
724 }
725}
726
727fn take_conversion_specifier<'a>(
728 mut s: &'a str,
729 argument_index: usize,
730 arg_type: &DataType,
731) -> Result<(ConversionSpecifier, &'a str)> {
732 let mut spec = ConversionSpecifier {
733 argument_index,
734 alt_form: false,
735 zero_pad: false,
736 left_adj: false,
737 space_sign: false,
738 force_sign: false,
739 grouping_separator: false,
740 negative_in_parentheses: false,
741 width: NumericParam::Literal(0),
742 precision: NumericParam::FromArgument, conversion_type: ConversionType::DecInt,
745 };
746
747 loop {
749 match s.chars().next() {
750 Some('#') => {
751 spec.alt_form = true;
752 }
753 Some('0') => {
754 if spec.left_adj {
755 return exec_err!("Invalid flag combination: '0' and '-'");
756 }
757 spec.zero_pad = true;
758 }
759 Some('-') => {
760 spec.left_adj = true;
761 }
762 Some(' ') => {
763 if spec.force_sign {
764 return exec_err!("Invalid flag combination: '+' and ' '");
765 }
766 spec.space_sign = true;
767 }
768 Some('+') => {
769 if spec.space_sign {
770 return exec_err!("Invalid flag combination: '+' and ' '");
771 }
772 spec.force_sign = true;
773 }
774 Some(',') => {
775 spec.grouping_separator = true;
776 }
777 Some('(') => {
778 spec.negative_in_parentheses = true;
779 }
780 _ => {
781 break;
782 }
783 }
784 s = &s[1..];
785 }
786 let (w, mut s) = take_numeric_param(s, false);
788 spec.width = w;
789 if matches!(s.chars().next(), Some('.')) {
791 s = &s[1..];
792 let (p, s2) = take_numeric_param(s, true);
793 spec.precision = p;
794 s = s2;
795 }
796 let mut chars = s.chars();
797 let mut offset = 1;
798 spec.conversion_type = match chars.next() {
800 Some('b') => ConversionType::BooleanLower,
801 Some('B') => ConversionType::BooleanUpper,
802 Some('h') => ConversionType::HexHashLower,
803 Some('H') => ConversionType::HexHashUpper,
804 Some('s') => ConversionType::StringLower,
805 Some('S') => ConversionType::StringUpper,
806 Some('c') => ConversionType::CharLower,
807 Some('C') => ConversionType::CharUpper,
808 Some('d') => ConversionType::DecInt,
809 Some('o') => ConversionType::OctInt,
810 Some('x') => ConversionType::HexIntLower,
811 Some('X') => ConversionType::HexIntUpper,
812 Some('e') => ConversionType::SciFloatLower,
813 Some('E') => ConversionType::SciFloatUpper,
814 Some('f') => ConversionType::DecFloatLower,
815 Some('g') => ConversionType::CompactFloatLower,
816 Some('G') => ConversionType::CompactFloatUpper,
817 Some('a') => ConversionType::HexFloatLower,
818 Some('A') => ConversionType::HexFloatUpper,
819 Some('t') => {
820 let Some(chr) = chars.next() else {
821 return exec_err!("Invalid time format: {}", s);
822 };
823 offset += 1;
824 ConversionType::TimeLower(chr.try_into()?)
825 }
826 Some('T') => {
827 let Some(chr) = chars.next() else {
828 return exec_err!("Invalid time format: {}", s);
829 };
830 offset += 1;
831 ConversionType::TimeUpper(chr.try_into()?)
832 }
833 chr => {
834 return plan_err!("Invalid conversion type: {:?}", chr);
835 }
836 };
837
838 spec.conversion_type.validate(arg_type)?;
839 Ok((spec, &s[offset..]))
840}
841
842fn take_numeric_param(s: &str, zero: bool) -> (NumericParam, &str) {
843 match s.chars().next() {
844 Some(digit) if (if zero { '0'..='9' } else { '1'..='9' }).contains(&digit) => {
845 let mut s = s;
846 let mut w = 0;
847 loop {
848 match s.chars().next() {
849 Some(digit) if digit.is_ascii_digit() => {
850 w = 10 * w + (digit as i32 - '0' as i32);
851 }
852 _ => {
853 break;
854 }
855 }
856 s = &s[1..];
857 }
858 (NumericParam::Literal(w), s)
859 }
860 _ => (NumericParam::FromArgument, s),
861 }
862}
863
864fn codepoint_to_char(value: u32) -> Result<char> {
868 char::from_u32(value).ok_or_else(|| {
869 exec_datafusion_err!("invalid Unicode scalar value for %c: {value:#x}")
870 })
871}
872
873fn signed_to_char(value: i64) -> Result<char> {
877 let codepoint = u32::try_from(value).map_err(|_| {
878 exec_datafusion_err!("invalid Unicode scalar value for %c: {value}")
879 })?;
880 codepoint_to_char(codepoint)
881}
882
883fn unsigned_to_char(value: u64) -> Result<char> {
888 let codepoint = u32::try_from(value).map_err(|_| {
889 exec_datafusion_err!("invalid Unicode scalar value for %c: {value:#x}")
890 })?;
891 codepoint_to_char(codepoint)
892}
893
894fn integer_scalar_to_char(scalar: &ScalarValue) -> Result<char> {
896 match scalar {
897 ScalarValue::Int8(Some(value)) => signed_to_char(*value as i64),
898 ScalarValue::Int16(Some(value)) => signed_to_char(*value as i64),
899 ScalarValue::Int32(Some(value)) => signed_to_char(*value as i64),
900 ScalarValue::Int64(Some(value)) => signed_to_char(*value),
901 ScalarValue::UInt8(Some(value)) => unsigned_to_char(*value as u64),
902 ScalarValue::UInt16(Some(value)) => unsigned_to_char(*value as u64),
903 ScalarValue::UInt32(Some(value)) => unsigned_to_char(*value as u64),
904 ScalarValue::UInt64(Some(value)) => unsigned_to_char(*value),
905 _ => datafusion_common::internal_err!(
906 "integer_scalar_to_char expects a non-null integer scalar, got {scalar:?}"
907 ),
908 }
909}
910
911impl ConversionSpecifier {
912 fn validate_grouping_separator(&self) -> Result<()> {
916 if self.grouping_separator
917 && matches!(
918 self.conversion_type,
919 ConversionType::SciFloatLower | ConversionType::SciFloatUpper
920 )
921 {
922 return exec_err!(
923 "Grouping separator ',' flag is not compatible with scientific notation conversion '{}'",
924 if self.conversion_type == ConversionType::SciFloatUpper {
925 'E'
926 } else {
927 'e'
928 }
929 );
930 }
931 Ok(())
932 }
933
934 pub fn format(&self, string: &mut String, value: &ScalarValue) -> Result<()> {
935 match value {
936 ScalarValue::Boolean(value) => match self.conversion_type {
937 ConversionType::StringLower | ConversionType::StringUpper => {
938 self.format_string(string, &value.unwrap_or(false).to_string())
939 }
940
941 _ => self.format_boolean(string, value),
942 },
943 ScalarValue::Int8(Some(_))
944 | ScalarValue::Int16(Some(_))
945 | ScalarValue::Int32(Some(_))
946 | ScalarValue::Int64(Some(_))
947 | ScalarValue::UInt8(Some(_))
948 | ScalarValue::UInt16(Some(_))
949 | ScalarValue::UInt32(Some(_))
950 | ScalarValue::UInt64(Some(_))
951 if matches!(
952 self.conversion_type,
953 ConversionType::CharLower | ConversionType::CharUpper
954 ) =>
955 {
956 self.format_char(string, integer_scalar_to_char(value)?)
957 }
958 ScalarValue::Int8(value) => match (self.conversion_type, value) {
959 (ConversionType::DecInt, Some(value)) => {
960 self.format_signed(string, *value as i64)
961 }
962 (
963 ConversionType::HexIntLower
964 | ConversionType::HexIntUpper
965 | ConversionType::OctInt,
966 Some(value),
967 ) => self.format_unsigned(string, (*value as u8) as u64),
968 (
969 ConversionType::StringLower | ConversionType::StringUpper,
970 Some(value),
971 ) => self.format_string(string, &value.to_string()),
972 (t, None) if t.supports_integer() => self.format_string(string, "null"),
973 _ => {
974 exec_err!(
975 "Invalid conversion type: {:?} for Int8",
976 self.conversion_type
977 )
978 }
979 },
980 ScalarValue::Int16(value) => match (self.conversion_type, value) {
981 (ConversionType::DecInt, Some(value)) => {
982 self.format_signed(string, *value as i64)
983 }
984 (
985 ConversionType::HexIntLower
986 | ConversionType::HexIntUpper
987 | ConversionType::OctInt,
988 Some(value),
989 ) => self.format_unsigned(string, (*value as u16) as u64),
990 (
991 ConversionType::StringLower | ConversionType::StringUpper,
992 Some(value),
993 ) => self.format_string(string, &value.to_string()),
994 (t, None) if t.supports_integer() => self.format_string(string, "null"),
995 _ => {
996 exec_err!(
997 "Invalid conversion type: {:?} for Int16",
998 self.conversion_type
999 )
1000 }
1001 },
1002 ScalarValue::Int32(value) => match (self.conversion_type, value) {
1003 (ConversionType::DecInt, Some(value)) => {
1004 self.format_signed(string, *value as i64)
1005 }
1006 (
1007 ConversionType::HexIntLower
1008 | ConversionType::HexIntUpper
1009 | ConversionType::OctInt,
1010 Some(value),
1011 ) => self.format_unsigned(string, (*value as u32) as u64),
1012 (
1013 ConversionType::StringLower | ConversionType::StringUpper,
1014 Some(value),
1015 ) => self.format_string(string, &value.to_string()),
1016 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1017 _ => {
1018 exec_err!(
1019 "Invalid conversion type: {:?} for Int32",
1020 self.conversion_type
1021 )
1022 }
1023 },
1024 ScalarValue::Int64(value) => match (self.conversion_type, value) {
1025 (ConversionType::DecInt, Some(value)) => {
1026 self.format_signed(string, *value)
1027 }
1028 (
1029 ConversionType::HexIntLower
1030 | ConversionType::HexIntUpper
1031 | ConversionType::OctInt,
1032 Some(value),
1033 ) => self.format_unsigned(string, *value as u64),
1034 (
1035 ConversionType::StringLower | ConversionType::StringUpper,
1036 Some(value),
1037 ) => self.format_string(string, &value.to_string()),
1038 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1039 _ => {
1040 exec_err!(
1041 "Invalid conversion type: {:?} for Int64",
1042 self.conversion_type
1043 )
1044 }
1045 },
1046 ScalarValue::UInt8(value) => match (self.conversion_type, value) {
1047 (
1048 ConversionType::DecInt
1049 | ConversionType::HexIntLower
1050 | ConversionType::HexIntUpper
1051 | ConversionType::OctInt,
1052 Some(value),
1053 ) => self.format_unsigned(string, *value as u64),
1054 (
1055 ConversionType::StringLower | ConversionType::StringUpper,
1056 Some(value),
1057 ) => self.format_string(string, &value.to_string()),
1058 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1059 _ => {
1060 exec_err!(
1061 "Invalid conversion type: {:?} for UInt8",
1062 self.conversion_type
1063 )
1064 }
1065 },
1066 ScalarValue::UInt16(value) => match (self.conversion_type, value) {
1067 (
1068 ConversionType::DecInt
1069 | ConversionType::HexIntLower
1070 | ConversionType::HexIntUpper
1071 | ConversionType::OctInt,
1072 Some(value),
1073 ) => self.format_unsigned(string, *value as u64),
1074 (
1075 ConversionType::StringLower | ConversionType::StringUpper,
1076 Some(value),
1077 ) => self.format_string(string, &value.to_string()),
1078 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1079 _ => {
1080 exec_err!(
1081 "Invalid conversion type: {:?} for UInt16",
1082 self.conversion_type
1083 )
1084 }
1085 },
1086 ScalarValue::UInt32(value) => match (self.conversion_type, value) {
1087 (
1088 ConversionType::DecInt
1089 | ConversionType::HexIntLower
1090 | ConversionType::HexIntUpper
1091 | ConversionType::OctInt,
1092 Some(value),
1093 ) => self.format_unsigned(string, *value as u64),
1094 (
1095 ConversionType::StringLower | ConversionType::StringUpper,
1096 Some(value),
1097 ) => self.format_string(string, &value.to_string()),
1098 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1099 _ => {
1100 exec_err!(
1101 "Invalid conversion type: {:?} for UInt32",
1102 self.conversion_type
1103 )
1104 }
1105 },
1106 ScalarValue::UInt64(value) => match (self.conversion_type, value) {
1107 (
1108 ConversionType::DecInt
1109 | ConversionType::HexIntLower
1110 | ConversionType::HexIntUpper
1111 | ConversionType::OctInt,
1112 Some(value),
1113 ) => self.format_unsigned(string, *value),
1114 (
1115 ConversionType::StringLower | ConversionType::StringUpper,
1116 Some(value),
1117 ) => self.format_string(string, &value.to_string()),
1118 (t, None) if t.supports_integer() => self.format_string(string, "null"),
1119 _ => {
1120 exec_err!(
1121 "Invalid conversion type: {:?} for UInt64",
1122 self.conversion_type
1123 )
1124 }
1125 },
1126 ScalarValue::Float16(value) => match (self.conversion_type, value) {
1127 (
1128 ConversionType::DecFloatLower
1129 | ConversionType::SciFloatLower
1130 | ConversionType::SciFloatUpper
1131 | ConversionType::CompactFloatLower
1132 | ConversionType::CompactFloatUpper,
1133 Some(value),
1134 ) => self.format_float(string, value.to_f64().unwrap()),
1135 (
1136 ConversionType::StringLower | ConversionType::StringUpper,
1137 Some(value),
1138 ) => self.format_string(string, &value.to_f32().unwrap().spark_string()),
1139 (
1140 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1141 Some(value),
1142 ) => self.format_hex_float(string, value.to_f64().unwrap()),
1143 (t, None) if t.supports_float() => self.format_string(string, "null"),
1144 _ => {
1145 exec_err!(
1146 "Invalid conversion type: {:?} for Float16",
1147 self.conversion_type
1148 )
1149 }
1150 },
1151 ScalarValue::Float32(value) => match (self.conversion_type, value) {
1152 (
1153 ConversionType::DecFloatLower
1154 | ConversionType::SciFloatLower
1155 | ConversionType::SciFloatUpper
1156 | ConversionType::CompactFloatLower
1157 | ConversionType::CompactFloatUpper,
1158 Some(value),
1159 ) => self.format_float(string, *value as f64),
1160 (
1161 ConversionType::StringLower | ConversionType::StringUpper,
1162 Some(value),
1163 ) => self.format_string(string, &value.spark_string()),
1164 (
1165 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1166 Some(value),
1167 ) => self.format_hex_float(string, *value as f64),
1168 (t, None) if t.supports_float() => self.format_string(string, "null"),
1169 _ => {
1170 exec_err!(
1171 "Invalid conversion type: {:?} for Float32",
1172 self.conversion_type
1173 )
1174 }
1175 },
1176 ScalarValue::Float64(value) => match (self.conversion_type, value) {
1177 (
1178 ConversionType::DecFloatLower
1179 | ConversionType::SciFloatLower
1180 | ConversionType::SciFloatUpper
1181 | ConversionType::CompactFloatLower
1182 | ConversionType::CompactFloatUpper,
1183 Some(value),
1184 ) => self.format_float(string, *value),
1185 (
1186 ConversionType::StringLower | ConversionType::StringUpper,
1187 Some(value),
1188 ) => self.format_string(string, &value.spark_string()),
1189 (
1190 ConversionType::HexFloatLower | ConversionType::HexFloatUpper,
1191 Some(value),
1192 ) => self.format_hex_float(string, *value),
1193 (t, None) if t.supports_float() => self.format_string(string, "null"),
1194 _ => {
1195 exec_err!(
1196 "Invalid conversion type: {:?} for Float64",
1197 self.conversion_type
1198 )
1199 }
1200 },
1201 ScalarValue::Utf8(value) => {
1202 let value: &str = match value {
1203 Some(value) => value.as_str(),
1204 None => "null",
1205 };
1206 if matches!(
1207 self.conversion_type,
1208 ConversionType::StringLower | ConversionType::StringUpper
1209 ) {
1210 self.format_string(string, value)
1211 } else {
1212 exec_err!(
1213 "Invalid conversion type: {:?} for Utf8",
1214 self.conversion_type
1215 )
1216 }
1217 }
1218 ScalarValue::LargeUtf8(value) => {
1219 let value: &str = match value {
1220 Some(value) => value.as_str(),
1221 None => "null",
1222 };
1223 if matches!(
1224 self.conversion_type,
1225 ConversionType::StringLower | ConversionType::StringUpper
1226 ) {
1227 self.format_string(string, value)
1228 } else {
1229 exec_err!(
1230 "Invalid conversion type: {:?} for LargeUtf8",
1231 self.conversion_type
1232 )
1233 }
1234 }
1235 ScalarValue::Utf8View(value) => {
1236 let value: &str = match value {
1237 Some(value) => value.as_str(),
1238 None => "null",
1239 };
1240 self.format_string(string, value)
1241 }
1242 ScalarValue::Decimal128(value, _, scale) => {
1243 match (self.conversion_type, value) {
1244 (
1245 ConversionType::DecFloatLower
1246 | ConversionType::SciFloatLower
1247 | ConversionType::SciFloatUpper
1248 | ConversionType::CompactFloatLower
1249 | ConversionType::CompactFloatUpper,
1250 Some(value),
1251 ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1252 (
1253 ConversionType::StringLower | ConversionType::StringUpper,
1254 Some(value),
1255 ) => self.format_string(string, &value.to_string()),
1256 (t, None) if t.supports_decimal() => {
1257 self.format_string(string, "null")
1258 }
1259
1260 _ => {
1261 exec_err!(
1262 "Invalid conversion type: {:?} for Decimal128",
1263 self.conversion_type
1264 )
1265 }
1266 }
1267 }
1268 ScalarValue::Decimal256(value, _, scale) => {
1269 match (self.conversion_type, value) {
1270 (
1271 ConversionType::DecFloatLower
1272 | ConversionType::SciFloatLower
1273 | ConversionType::SciFloatUpper
1274 | ConversionType::CompactFloatLower
1275 | ConversionType::CompactFloatUpper,
1276 Some(value),
1277 ) => self.format_decimal(string, &value.to_string(), *scale as i64),
1278 (
1279 ConversionType::StringLower | ConversionType::StringUpper,
1280 Some(value),
1281 ) => self.format_string(string, &value.to_string()),
1282 (t, None) if t.supports_decimal() => {
1283 self.format_string(string, "null")
1284 }
1285
1286 _ => {
1287 exec_err!(
1288 "Invalid conversion type: {:?} for Decimal256",
1289 self.conversion_type
1290 )
1291 }
1292 }
1293 }
1294
1295 ScalarValue::Time32Second(value) => match (self.conversion_type, value) {
1296 (
1297 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1298 Some(value),
1299 ) => self.format_time(string, *value as i64 * 1000000000, &None),
1300 (
1301 ConversionType::StringLower | ConversionType::StringUpper,
1302 Some(value),
1303 ) => self.format_string(string, &value.to_string()),
1304 (t, None) if t.supports_time() => self.format_string(string, "null"),
1305 _ => {
1306 exec_err!(
1307 "Invalid conversion type: {:?} for Time32Second",
1308 self.conversion_type
1309 )
1310 }
1311 },
1312 ScalarValue::Time32Millisecond(value) => {
1313 match (self.conversion_type, value) {
1314 (
1315 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1316 Some(value),
1317 ) => self.format_time(string, *value as i64 * 1000000, &None),
1318 (
1319 ConversionType::StringLower | ConversionType::StringUpper,
1320 Some(value),
1321 ) => self.format_string(string, &value.to_string()),
1322 (t, None) if t.supports_time() => self.format_string(string, "null"),
1323 _ => {
1324 exec_err!(
1325 "Invalid conversion type: {:?} for Time32Millisecond",
1326 self.conversion_type
1327 )
1328 }
1329 }
1330 }
1331 ScalarValue::Time64Microsecond(value) => {
1332 match (self.conversion_type, value) {
1333 (
1334 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1335 Some(value),
1336 ) => self.format_time(string, *value * 1000, &None),
1337 (
1338 ConversionType::StringLower | ConversionType::StringUpper,
1339 Some(value),
1340 ) => self.format_string(string, &value.to_string()),
1341 (t, None) if t.supports_time() => self.format_string(string, "null"),
1342 _ => {
1343 exec_err!(
1344 "Invalid conversion type: {:?} for Time64Microsecond",
1345 self.conversion_type
1346 )
1347 }
1348 }
1349 }
1350 ScalarValue::Time64Nanosecond(value) => match (self.conversion_type, value) {
1351 (
1352 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1353 Some(value),
1354 ) => self.format_time(string, *value, &None),
1355 (
1356 ConversionType::StringLower | ConversionType::StringUpper,
1357 Some(value),
1358 ) => self.format_string(string, &value.to_string()),
1359 (t, None) if t.supports_time() => self.format_string(string, "null"),
1360 _ => {
1361 exec_err!(
1362 "Invalid conversion type: {:?} for Time64Nanosecond",
1363 self.conversion_type
1364 )
1365 }
1366 },
1367 ScalarValue::TimestampSecond(value, zone) => {
1368 match (self.conversion_type, value) {
1369 (
1370 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1371 Some(value),
1372 ) => self.format_time(string, value * 1000000000, zone),
1373 (
1374 ConversionType::StringLower | ConversionType::StringUpper,
1375 Some(value),
1376 ) => self.format_string(string, &value.to_string()),
1377 (t, None) if t.supports_time() => self.format_string(string, "null"),
1378 _ => {
1379 exec_err!(
1380 "Invalid conversion type: {:?} for TimestampSecond",
1381 self.conversion_type
1382 )
1383 }
1384 }
1385 }
1386 ScalarValue::TimestampMillisecond(value, zone) => {
1387 match (self.conversion_type, value) {
1388 (
1389 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1390 Some(value),
1391 ) => self.format_time(string, *value * 1000000, zone),
1392 (
1393 ConversionType::StringLower | ConversionType::StringUpper,
1394 Some(value),
1395 ) => self.format_string(string, &value.to_string()),
1396
1397 (t, None) if t.supports_time() => self.format_string(string, "null"),
1398 _ => {
1399 exec_err!(
1400 "Invalid conversion type: {:?} for TimestampMillisecond",
1401 self.conversion_type
1402 )
1403 }
1404 }
1405 }
1406 ScalarValue::TimestampMicrosecond(value, zone) => {
1407 match (self.conversion_type, value) {
1408 (
1409 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1410 Some(value),
1411 ) => self.format_time(string, value * 1000, zone),
1412 (
1413 ConversionType::StringLower | ConversionType::StringUpper,
1414 Some(value),
1415 ) => self.format_string(string, &value.to_string()),
1416 (t, None) if t.supports_time() => self.format_string(string, "null"),
1417 _ => {
1418 exec_err!(
1419 "Invalid conversion type: {:?} for timestampmicrosecond",
1420 self.conversion_type
1421 )
1422 }
1423 }
1424 }
1425
1426 ScalarValue::TimestampNanosecond(value, zone) => {
1427 match (self.conversion_type, value) {
1428 (
1429 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1430 Some(value),
1431 ) => self.format_time(string, *value, zone),
1432 (
1433 ConversionType::StringLower | ConversionType::StringUpper,
1434 Some(value),
1435 ) => self.format_string(string, &value.to_string()),
1436 (t, None) if t.supports_time() => self.format_string(string, "null"),
1437 _ => {
1438 exec_err!(
1439 "Invalid conversion type: {:?} for TimestampNanosecond",
1440 self.conversion_type
1441 )
1442 }
1443 }
1444 }
1445 ScalarValue::Date32(value) => match (self.conversion_type, value) {
1446 (
1447 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1448 Some(value),
1449 ) => self.format_date(string, *value as i64),
1450 (
1451 ConversionType::StringLower | ConversionType::StringUpper,
1452 Some(value),
1453 ) => self.format_string(string, &value.to_string()),
1454 (t, None) if t.supports_time() => self.format_string(string, "null"),
1455 _ => {
1456 exec_err!(
1457 "Invalid conversion type: {:?} for Date32",
1458 self.conversion_type
1459 )
1460 }
1461 },
1462 ScalarValue::Date64(value) => match (self.conversion_type, value) {
1463 (
1464 ConversionType::TimeLower(_) | ConversionType::TimeUpper(_),
1465 Some(value),
1466 ) => self.format_date(string, *value),
1467 (
1468 ConversionType::StringLower | ConversionType::StringUpper,
1469 Some(value),
1470 ) => self.format_string(string, &value.to_string()),
1471 (t, None) if t.supports_time() => self.format_string(string, "null"),
1472 _ => {
1473 exec_err!(
1474 "Invalid conversion type: {:?} for Date64",
1475 self.conversion_type
1476 )
1477 }
1478 },
1479 ScalarValue::Null => {
1480 let value = "null".to_string();
1481 self.format_string(string, &value)
1482 }
1483 _ => exec_err!("Invalid scalar value: {value}"),
1484 }
1485 }
1486
1487 fn format_hex_float(&self, writer: &mut String, value: f64) -> Result<()> {
1488 let (sign, raw_exponent, mantissa) = value.to_parts();
1490 let is_subnormal = raw_exponent == 0;
1491
1492 let precision = match self.precision {
1493 NumericParam::FromArgument => None,
1494 NumericParam::Literal(p) => Some(p),
1495 };
1496
1497 let mantissa_hex_digits = f64::MANTISSA_BITS.div_ceil(4); let should_normalize = is_subnormal
1501 && precision.is_some()
1502 && precision.unwrap() < mantissa_hex_digits as i32;
1503
1504 let (value, raw_exponent, mantissa) = if should_normalize {
1505 let value = value * f64::SCALEUP;
1506 let (_, raw_exponent, mantissa) = value.to_parts();
1507 (value, raw_exponent, mantissa)
1508 } else {
1509 (value, raw_exponent, mantissa)
1510 };
1511
1512 let mut temp = String::new();
1513
1514 let sign_char = if sign {
1515 "-"
1516 } else if self.force_sign {
1517 "+"
1518 } else if self.space_sign {
1519 " "
1520 } else {
1521 ""
1522 };
1523 match value.category() {
1524 FpCategory::Nan => {
1525 write!(&mut temp, "NaN")?;
1526 }
1527 FpCategory::Infinite => {
1528 write!(&mut temp, "{sign_char}Infinity")?;
1529 }
1530 FpCategory::Zero => {
1531 write!(&mut temp, "{sign_char}0x0.0p0")?;
1532 }
1533 _ => {
1534 let bias = i32::from(f64::EXPONENT_BIAS);
1535 let exponent = if is_subnormal && !should_normalize {
1538 1 - bias
1539 } else {
1540 raw_exponent as i32 - bias
1541 };
1542
1543 let final_mantissa = if let Some(p) = precision {
1545 if p == 0 {
1546 let shift_distance = f64::MANTISSA_BITS as i32 - 4; let shifted = mantissa >> shift_distance;
1550 let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1551 let round_bit = 1u64 << (shift_distance - 1);
1552
1553 if rounding_bits > round_bit
1555 || (rounding_bits == round_bit && (shifted & 1) != 0)
1556 {
1557 (shifted + 1) << shift_distance
1558 } else {
1559 shifted << shift_distance
1560 }
1561 } else {
1562 let precision_bits = p * 4; let keep_bits = f64::MANTISSA_BITS as i32;
1565 let shift_distance = keep_bits - precision_bits;
1566
1567 if shift_distance > 0 {
1568 let shifted = mantissa >> shift_distance;
1569 let rounding_bits = mantissa & ((1u64 << shift_distance) - 1);
1570 let round_bit = 1u64 << (shift_distance - 1);
1571
1572 if rounding_bits > round_bit
1574 || (rounding_bits == round_bit && (shifted & 1) != 0)
1575 {
1576 (shifted + 1) << shift_distance
1577 } else {
1578 shifted << shift_distance
1579 }
1580 } else {
1581 mantissa
1582 }
1583 }
1584 } else {
1585 mantissa
1586 };
1587
1588 if is_subnormal && !should_normalize {
1589 if precision.is_some() {
1591 let full_hex = format!(
1593 "{:0width$x}",
1594 final_mantissa,
1595 width = mantissa_hex_digits as usize
1596 );
1597 write!(&mut temp, "{sign_char}0x0.{full_hex}p{exponent}")?;
1598 } else {
1599 let hex_digits = format!(
1601 "{:0width$x}",
1602 final_mantissa,
1603 width = mantissa_hex_digits as usize
1604 );
1605 write!(&mut temp, "{sign_char}0x0.{hex_digits}p{exponent}")?;
1606 }
1607 } else {
1608 if let Some(p) = precision {
1610 let p = if p == 0 { 1 } else { p };
1611 let hex_digits = format!("{final_mantissa:x}");
1612 let formatted_digits = if p as usize >= hex_digits.len() {
1613 format!("{:0<width$}", hex_digits, width = p as usize)
1615 } else {
1616 hex_digits[..p as usize].to_string()
1617 };
1618 write!(
1619 &mut temp,
1620 "{sign_char}0x1.{formatted_digits}p{exponent}"
1621 )?;
1622 } else {
1623 let mut hex_digits = format!("{final_mantissa:x}");
1625 hex_digits = trim_trailing_0s_hex(&hex_digits).to_owned();
1626 if hex_digits.is_empty() {
1627 write!(&mut temp, "{sign_char}0x1.0p{exponent}")?;
1628 } else {
1629 write!(&mut temp, "{sign_char}0x1.{hex_digits}p{exponent}")?;
1630 }
1631 }
1632 }
1633 if should_normalize {
1634 let (prefix, exp) = temp.split_once('p').unwrap();
1635 let iexp = exp.parse::<i32>().unwrap() - f64::SCALEUP_POWER as i32;
1636 temp = format!("{prefix}p{iexp}");
1637 }
1638 }
1639 };
1640
1641 if self.conversion_type.is_upper() {
1642 temp = temp.to_ascii_uppercase();
1643 }
1644
1645 let NumericParam::Literal(width) = self.width else {
1646 writer.push_str(&temp);
1647 return Ok(());
1648 };
1649 if self.left_adj {
1650 writer.push_str(&temp);
1651 for _ in temp.len()..width as usize {
1652 writer.push(' ');
1653 }
1654 } else if self.zero_pad && value.is_finite() {
1655 let delimiter = if self.conversion_type.is_upper() {
1656 "0X"
1657 } else {
1658 "0x"
1659 };
1660 let (prefix, suffix) = temp.split_once(delimiter).unwrap();
1661 writer.push_str(prefix);
1662 writer.push_str(delimiter);
1663 for _ in temp.len()..width as usize {
1664 writer.push('0');
1665 }
1666 writer.push_str(suffix);
1667 } else {
1668 while temp.len() < width as usize {
1669 temp = " ".to_owned() + &temp;
1670 }
1671 writer.push_str(&temp);
1672 };
1673 Ok(())
1674 }
1675
1676 fn format_char(&self, writer: &mut String, value: char) -> Result<()> {
1677 let upper = self.conversion_type.is_upper();
1678 match self.conversion_type {
1679 ConversionType::CharLower | ConversionType::CharUpper => {
1680 let NumericParam::Literal(width) = self.width else {
1681 if upper {
1682 writer.push(value.to_ascii_uppercase());
1683 } else {
1684 writer.push(value);
1685 }
1686 return Ok(());
1687 };
1688
1689 let start_len = writer.len();
1690 if self.left_adj {
1691 if upper {
1692 writer.push(value.to_ascii_uppercase());
1693 } else {
1694 writer.push(value);
1695 }
1696 while writer.len() - start_len < width as usize {
1697 writer.push(' ');
1698 }
1699 } else {
1700 while writer.len() - start_len + value.len_utf8() < width as usize {
1701 writer.push(' ');
1702 }
1703 if upper {
1704 writer.push(value.to_ascii_uppercase());
1705 } else {
1706 writer.push(value);
1707 }
1708 }
1709 Ok(())
1710 }
1711 _ => exec_err!(
1712 "Invalid conversion type: {:?} for char",
1713 self.conversion_type
1714 ),
1715 }
1716 }
1717
1718 fn format_boolean(&self, writer: &mut String, value: &Option<bool>) -> Result<()> {
1719 let value = value.unwrap_or(false);
1720
1721 let formatted = match self.conversion_type {
1722 ConversionType::BooleanUpper => {
1723 if value {
1724 "TRUE"
1725 } else {
1726 "FALSE"
1727 }
1728 }
1729 ConversionType::BooleanLower => {
1730 if value {
1731 "true"
1732 } else {
1733 "false"
1734 }
1735 }
1736 _ => {
1737 return exec_err!(
1738 "Invalid conversion type: {:?} for boolean array",
1739 self.conversion_type
1740 );
1741 }
1742 };
1743 self.format_str(writer, formatted)
1744 }
1745
1746 fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1747 self.validate_grouping_separator()?;
1748
1749 let mut prefix = String::new();
1750 let mut suffix = String::new();
1751 let mut number = String::new();
1752 let upper = self.conversion_type.is_upper();
1753
1754 if value.is_sign_negative() {
1756 if self.negative_in_parentheses {
1757 prefix.push('(');
1758 suffix.push(')');
1759 } else {
1760 prefix.push('-');
1761 }
1762 } else if self.space_sign {
1763 prefix.push(' ');
1764 } else if self.force_sign {
1765 prefix.push('+');
1766 }
1767
1768 if value.is_finite() {
1769 let mut use_scientific = false;
1770 let mut strip_trailing_0s = false;
1771 let mut abs = value.abs();
1772 let mut exponent = abs.log10().floor() as i32;
1773 let mut precision = match self.precision {
1774 NumericParam::Literal(p) => p,
1775 _ => 6,
1776 };
1777 match self.conversion_type {
1778 ConversionType::DecFloatLower => {
1779 }
1781 ConversionType::SciFloatLower => {
1782 use_scientific = true;
1783 }
1784 ConversionType::SciFloatUpper => {
1785 use_scientific = true;
1786 }
1787 ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
1788 strip_trailing_0s = true;
1789 if precision == 0 {
1790 precision = 1;
1791 }
1792 let rounding_factor =
1795 10.0_f64.powf((precision - 1 - exponent) as f64);
1796 let rounded_fixed = (abs * rounding_factor).round();
1797 abs = rounded_fixed / rounding_factor;
1798 exponent = abs.log10().floor() as i32;
1799 if exponent < -4 || exponent >= precision {
1800 use_scientific = true;
1801 precision -= 1;
1802 } else {
1803 precision -= 1 + exponent;
1805 }
1806 }
1807 _ => {
1808 return exec_err!(
1809 "Invalid conversion type: {:?} for float",
1810 self.conversion_type
1811 );
1812 }
1813 }
1814
1815 if use_scientific {
1816 let mantissa = abs / 10.0_f64.powf(exponent as f64);
1818 let exp_char = if upper { 'E' } else { 'e' };
1819 number = format!("{mantissa:.prec$}", prec = precision as usize);
1820 if strip_trailing_0s {
1821 number = trim_trailing_0s(&number).to_owned();
1822 }
1823 number = format!("{number}{exp_char}{exponent:+03}");
1824 } else {
1825 number = format!("{abs:.prec$}", prec = precision as usize);
1826 if strip_trailing_0s {
1827 number = trim_trailing_0s(&number).to_owned();
1828 }
1829 if self.grouping_separator {
1830 number = insert_thousands_separator(&number);
1831 }
1832 }
1833 if self.alt_form && !number.contains('.') {
1834 number += ".";
1835 }
1836 } else {
1837 match self.conversion_type {
1839 ConversionType::DecFloatLower
1840 | ConversionType::SciFloatLower
1841 | ConversionType::CompactFloatLower => {
1842 if value.is_infinite() {
1843 number.push_str("Infinity")
1844 } else {
1845 number.push_str("NaN")
1846 }
1847 }
1848 ConversionType::SciFloatUpper | ConversionType::CompactFloatUpper => {
1849 if value.is_infinite() {
1850 number.push_str("INFINITY")
1851 } else {
1852 number.push_str("NAN")
1853 }
1854 }
1855 _ => {
1856 return exec_err!(
1857 "Invalid conversion type: {:?} for float",
1858 self.conversion_type
1859 );
1860 }
1861 }
1862 }
1863 let NumericParam::Literal(width) = self.width else {
1865 writer.push_str(&prefix);
1866 writer.push_str(&number);
1867 writer.push_str(&suffix);
1868 return Ok(());
1869 };
1870 if self.left_adj {
1871 let mut full_num = prefix + &number + &suffix;
1872 while full_num.len() < width as usize {
1873 full_num.push(' ');
1874 }
1875 writer.push_str(&full_num);
1876 } else if self.zero_pad && value.is_finite() {
1877 while prefix.len() + number.len() + suffix.len() < width as usize {
1878 prefix.push('0');
1879 }
1880 writer.push_str(&prefix);
1881 writer.push_str(&number);
1882 writer.push_str(&suffix);
1883 } else {
1884 let mut full_num = prefix + &number + &suffix;
1885 while full_num.len() < width as usize {
1886 full_num = " ".to_owned() + &full_num;
1887 }
1888 writer.push_str(&full_num);
1889 };
1890
1891 Ok(())
1892 }
1893
1894 fn format_signed(&self, writer: &mut String, value: i64) -> Result<()> {
1895 let negative = value < 0;
1896 let abs_val = value.abs();
1897
1898 let (sign_prefix, sign_suffix) = if negative && self.negative_in_parentheses {
1899 ("(".to_owned(), ")".to_owned())
1900 } else if negative {
1901 ("-".to_owned(), "".to_owned())
1902 } else if self.force_sign {
1903 ("+".to_owned(), "".to_owned())
1904 } else if self.space_sign {
1905 (" ".to_owned(), "".to_owned())
1906 } else {
1907 ("".to_owned(), "".to_owned())
1908 };
1909
1910 let mut mod_spec = *self;
1911 mod_spec.width = match self.width {
1912 NumericParam::Literal(w) => NumericParam::Literal(
1913 w - sign_prefix.len() as i32 - sign_suffix.len() as i32,
1914 ),
1915 _ => NumericParam::FromArgument,
1916 };
1917 let mut formatted = String::new();
1918 mod_spec.format_unsigned(&mut formatted, abs_val as u64)?;
1919 let mut actual_number = &formatted[0..];
1921 let mut leading_spaces = &formatted[0..0];
1922 if let Some(first_non_space) = formatted.find(|c| c != ' ') {
1923 actual_number = &formatted[first_non_space..];
1924 leading_spaces = &formatted[0..first_non_space];
1925 }
1926 write!(
1927 writer,
1928 "{}{}{}{}",
1929 leading_spaces.to_owned(),
1930 sign_prefix,
1931 actual_number,
1932 sign_suffix
1933 )
1934 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1935 Ok(())
1936 }
1937
1938 fn format_unsigned(&self, writer: &mut String, value: u64) -> Result<()> {
1939 let mut s = String::new();
1940 let mut alt_prefix = "";
1941 match self.conversion_type {
1942 ConversionType::DecInt => {
1943 let num_str = format!("{value}");
1944 s = if self.grouping_separator {
1945 insert_thousands_separator(&num_str)
1946 } else {
1947 num_str
1948 };
1949 }
1950 ConversionType::HexIntLower => {
1951 alt_prefix = "0x";
1952 write!(&mut s, "{value:x}")
1953 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1954 }
1955 ConversionType::HexIntUpper => {
1956 alt_prefix = "0X";
1957 write!(&mut s, "{value:X}")
1958 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1959 }
1960 ConversionType::OctInt => {
1961 alt_prefix = "0";
1962 write!(&mut s, "{value:o}")
1963 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
1964 }
1965 _ => {
1966 return exec_err!(
1967 "Invalid conversion type: {:?} for u64",
1968 self.conversion_type
1969 );
1970 }
1971 }
1972 let mut prefix = if self.alt_form {
1973 alt_prefix.to_owned()
1974 } else {
1975 String::new()
1976 };
1977
1978 let formatted = if let NumericParam::Literal(width) = self.width {
1979 if self.left_adj {
1980 let mut num_str = prefix + &s;
1981 while num_str.len() < width as usize {
1982 num_str.push(' ');
1983 }
1984 num_str
1985 } else if self.zero_pad {
1986 while prefix.len() + s.len() < width as usize {
1987 prefix.push('0');
1988 }
1989 prefix + &s
1990 } else {
1991 let mut num_str = prefix + &s;
1992 while num_str.len() < width as usize {
1993 num_str = " ".to_owned() + &num_str;
1994 }
1995 num_str
1996 }
1997 } else {
1998 prefix + &s
1999 };
2000 write!(writer, "{formatted}")
2001 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2002 Ok(())
2003 }
2004
2005 fn format_str(&self, writer: &mut String, value: &str) -> Result<()> {
2006 let precision: usize = match self.precision {
2008 NumericParam::Literal(p) => p,
2009 _ => i32::MAX,
2010 }
2011 .try_into()
2012 .unwrap_or_default();
2013 let content_len = {
2014 let mut content_len = precision.min(value.len());
2015 while !value.is_char_boundary(content_len) {
2016 content_len -= 1;
2017 }
2018 content_len
2019 };
2020 let content = &value[..content_len];
2021
2022 if let NumericParam::Literal(width) = self.width {
2025 let start_len = writer.len();
2026 if self.left_adj {
2027 writer.push_str(content);
2028 while writer.len() - start_len < width as usize {
2029 writer.push(' ');
2030 }
2031 } else {
2032 while writer.len() - start_len + content.len() < width as usize {
2033 writer.push(' ');
2034 }
2035 writer.push_str(content);
2036 }
2037 } else {
2038 writer.push_str(content);
2039 }
2040 Ok(())
2041 }
2042
2043 fn format_string(&self, writer: &mut String, value: &str) -> Result<()> {
2044 if self.conversion_type.is_upper() {
2045 let upper = value.to_ascii_uppercase();
2046 self.format_str(writer, &upper)
2047 } else {
2048 self.format_str(writer, value)
2049 }
2050 }
2051
2052 fn format_decimal(&self, writer: &mut String, value: &str, scale: i64) -> Result<()> {
2053 self.validate_grouping_separator()?;
2054
2055 let mut prefix = String::new();
2056 let upper = self.conversion_type.is_upper();
2057
2058 let decimal = value
2060 .parse::<BigInt>()
2061 .map_err(|e| exec_datafusion_err!("Failed to parse decimal: {}", e))?;
2062 let decimal = BigDecimal::from_bigint(decimal, scale);
2063
2064 let is_negative = decimal.sign() == Sign::Minus;
2070 let abs_decimal = decimal.abs();
2071
2072 if is_negative {
2073 prefix.push('-');
2074 } else if self.space_sign {
2075 prefix.push(' ');
2076 } else if self.force_sign {
2077 prefix.push('+');
2078 }
2079
2080 let exp_symb = if upper { 'E' } else { 'e' };
2081 let mut strip_trailing_0s = false;
2082
2083 let mut precision = match self.precision {
2085 NumericParam::Literal(p) => p,
2086 _ => 6,
2087 };
2088
2089 let number = match self.conversion_type {
2090 ConversionType::DecFloatLower => {
2091 let mut n = self.format_decimal_fixed(
2093 &abs_decimal,
2094 precision,
2095 strip_trailing_0s,
2096 )?;
2097 if self.grouping_separator {
2098 n = insert_thousands_separator(&n);
2099 }
2100 n
2101 }
2102 ConversionType::SciFloatLower => self.format_decimal_scientific(
2103 &abs_decimal,
2104 precision,
2105 'e',
2106 strip_trailing_0s,
2107 )?,
2108 ConversionType::SciFloatUpper => self.format_decimal_scientific(
2109 &abs_decimal,
2110 precision,
2111 'E',
2112 strip_trailing_0s,
2113 )?,
2114 ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => {
2115 strip_trailing_0s = true;
2116 if precision == 0 {
2117 precision = 1;
2118 }
2119 let log10_val = abs_decimal.to_f64().map(|f| f.log10()).unwrap_or(0.0);
2121 if log10_val < -4.0 || log10_val >= precision as f64 {
2122 self.format_decimal_scientific(
2123 &abs_decimal,
2124 precision - 1,
2125 exp_symb,
2126 strip_trailing_0s,
2127 )?
2128 } else {
2129 let mut n = self.format_decimal_fixed(
2130 &abs_decimal,
2131 precision - 1 - log10_val.floor() as i32,
2132 strip_trailing_0s,
2133 )?;
2134 if self.grouping_separator {
2135 n = insert_thousands_separator(&n);
2136 }
2137 n
2138 }
2139 }
2140 _ => {
2141 return exec_err!(
2142 "Invalid conversion type: {:?} for decimal",
2143 self.conversion_type
2144 );
2145 }
2146 };
2147
2148 let NumericParam::Literal(width) = self.width else {
2150 writer.push_str(&prefix);
2151 writer.push_str(&number);
2152 return Ok(());
2153 };
2154
2155 if self.left_adj {
2156 let mut full_num = prefix + &number;
2157 while full_num.len() < width as usize {
2158 full_num.push(' ');
2159 }
2160 writer.push_str(&full_num);
2161 } else if self.zero_pad {
2162 while prefix.len() + number.len() < width as usize {
2163 prefix.push('0');
2164 }
2165 writer.push_str(&prefix);
2166 writer.push_str(&number);
2167 } else {
2168 let mut full_num = prefix + &number;
2169 while full_num.len() < width as usize {
2170 full_num = " ".to_owned() + &full_num;
2171 }
2172 writer.push_str(&full_num);
2173 }
2174
2175 Ok(())
2176 }
2177
2178 fn format_decimal_fixed(
2179 &self,
2180 decimal: &BigDecimal,
2181 precision: i32,
2182 strip_trailing_0s: bool,
2183 ) -> Result<String> {
2184 if precision <= 0 {
2185 Ok(decimal.round(0).to_string())
2186 } else {
2187 let scaled = decimal.round(precision as i64);
2189 let mut number = scaled.to_string();
2190 if strip_trailing_0s {
2191 number = trim_trailing_0s(&number).to_owned();
2192 }
2193 Ok(number)
2194 }
2195 }
2196
2197 fn format_decimal_scientific(
2198 &self,
2199 decimal: &BigDecimal,
2200 precision: i32,
2201 exp_char: char,
2202 strip_trailing_0s: bool,
2203 ) -> Result<String> {
2204 let float_val = decimal.to_f64().unwrap_or(0.0);
2206 if float_val == 0.0 {
2207 return Ok(format!("0{exp_char}+00"));
2208 }
2209
2210 let abs_val = float_val.abs();
2211 let exponent = abs_val.log10().floor() as i32;
2212 let mantissa = abs_val / 10.0_f64.powf(exponent as f64);
2213
2214 let mut number = if precision <= 0 {
2215 format!("{mantissa:.0}")
2216 } else {
2217 format!("{mantissa:.prec$}", prec = precision as usize)
2218 };
2219
2220 if strip_trailing_0s {
2221 number = trim_trailing_0s(&number).to_owned();
2222 }
2223
2224 Ok(format!("{number}{exp_char}{exponent:+03}"))
2225 }
2226
2227 fn format_time(
2228 &self,
2229 writer: &mut String,
2230 timestamp_nanos: i64,
2231 timezone: &Option<Arc<str>>,
2232 ) -> Result<()> {
2233 let upper = self.conversion_type.is_upper();
2234 match &self.conversion_type {
2235 ConversionType::TimeLower(time_format)
2236 | ConversionType::TimeUpper(time_format) => {
2237 let formatted =
2238 self.format_time_component(timestamp_nanos, *time_format, timezone)?;
2239 let result = if upper {
2240 formatted.to_uppercase()
2241 } else {
2242 formatted
2243 };
2244 write!(writer, "{result}")
2245 .map_err(|e| exec_datafusion_err!("Write error: {}", e))?;
2246 Ok(())
2247 }
2248 _ => exec_err!(
2249 "Invalid conversion type for time: {:?}",
2250 self.conversion_type
2251 ),
2252 }
2253 }
2254
2255 fn format_date(&self, writer: &mut String, date_days: i64) -> Result<()> {
2256 let timestamp_nanos = date_days * 24 * 60 * 60 * 1_000_000_000;
2258 self.format_time(writer, timestamp_nanos, &None)
2259 }
2260
2261 fn format_time_component(
2262 &self,
2263 timestamp_nanos: i64,
2264 time_format: TimeFormat,
2265 _timezone: &Option<Arc<str>>,
2266 ) -> Result<String> {
2267 let secs = timestamp_nanos / 1_000_000_000;
2269 let nanos = (timestamp_nanos % 1_000_000_000) as u32;
2270
2271 let dt = DateTime::<Utc>::from_timestamp(secs, nanos).ok_or_else(|| {
2273 exec_datafusion_err!("Invalid timestamp: {}", timestamp_nanos)
2274 })?;
2275
2276 match time_format {
2277 TimeFormat::HUpper => Ok(format!("{:02}", dt.hour())),
2278 TimeFormat::IUpper => {
2279 let hour_12 = match dt.hour12() {
2280 (true, h) => h, (false, h) => h, };
2283 Ok(format!("{hour_12:02}"))
2284 }
2285 TimeFormat::KLower => Ok(format!("{}", dt.hour())),
2286 TimeFormat::LLower => {
2287 let hour_12 = match dt.hour12() {
2288 (true, h) => h, (false, h) => h, };
2291 Ok(format!("{hour_12}"))
2292 }
2293 TimeFormat::MUpper => Ok(format!("{:02}", dt.minute())),
2294 TimeFormat::SUpper => Ok(format!("{:02}", dt.second())),
2295 TimeFormat::LUpper => Ok(format!("{:03}", dt.timestamp_millis() % 1000)),
2296 TimeFormat::NUpper => Ok(format!("{:09}", dt.nanosecond())),
2297 TimeFormat::PLower => {
2298 let (is_pm, _) = dt.hour12();
2299 Ok(if is_pm {
2300 "pm".to_string()
2301 } else {
2302 "am".to_string()
2303 })
2304 }
2305 TimeFormat::ZLower => Ok("+0000".to_string()), TimeFormat::ZUpper => Ok("UTC".to_string()), TimeFormat::SLower => Ok(format!("{}", dt.timestamp())),
2308 TimeFormat::QUpper => Ok(format!("{}", dt.timestamp_millis())),
2309 TimeFormat::BUpper => Ok(dt.format("%B").to_string()), TimeFormat::BLower => Ok(dt.format("%b").to_string()), TimeFormat::AUpper => Ok(dt.format("%A").to_string()), TimeFormat::ALower => Ok(dt.format("%a").to_string()), TimeFormat::CUpper => Ok(format!("{:02}", dt.year() / 100)),
2314 TimeFormat::YUpper => Ok(format!("{:04}", dt.year())),
2315 TimeFormat::YLower => Ok(format!("{:02}", dt.year() % 100)),
2316 TimeFormat::JLower => Ok(format!("{:03}", dt.ordinal())), TimeFormat::MLower => Ok(format!("{:02}", dt.month())),
2318 TimeFormat::DLower => Ok(format!("{:02}", dt.day())),
2319 TimeFormat::ELower => Ok(format!("{}", dt.day())),
2320 TimeFormat::RUpper => Ok(dt.format("%H:%M").to_string()),
2321 TimeFormat::TUpper => Ok(dt.format("%H:%M:%S").to_string()),
2322 TimeFormat::RLower => {
2323 let (is_pm, hour_12) = dt.hour12();
2324 let am_pm = if is_pm { "PM" } else { "AM" };
2325 Ok(format!(
2326 "{:02}:{:02}:{:02} {}",
2327 hour_12,
2328 dt.minute(),
2329 dt.second(),
2330 am_pm
2331 ))
2332 }
2333 TimeFormat::DUpper => Ok(dt.format("%m/%d/%y").to_string()),
2334 TimeFormat::FUpper => Ok(dt.format("%Y-%m-%d").to_string()),
2335 TimeFormat::CLower => Ok(dt.format("%a %b %d %H:%M:%S UTC %Y").to_string()),
2336 }
2337 }
2338}
2339
2340trait FloatFormattable: std::fmt::Display {
2341 fn category(&self) -> FpCategory;
2342
2343 fn spark_string(&self) -> String {
2344 match self.category() {
2345 FpCategory::Nan => "NaN".to_string(),
2346 FpCategory::Infinite => {
2347 if self.negative() {
2348 "-Infinity".to_string()
2349 } else {
2350 "Infinity".to_string()
2351 }
2352 }
2353 _ => self.to_string(),
2354 }
2355 }
2356 fn negative(&self) -> bool;
2357}
2358
2359impl FloatFormattable for f32 {
2360 fn category(&self) -> FpCategory {
2361 self.classify()
2362 }
2363
2364 fn negative(&self) -> bool {
2365 self.is_sign_negative()
2366 }
2367}
2368
2369impl FloatFormattable for f64 {
2370 fn category(&self) -> FpCategory {
2371 self.classify()
2372 }
2373
2374 fn negative(&self) -> bool {
2375 self.is_sign_negative()
2376 }
2377}
2378
2379trait FloatBits: FloatFormattable {
2380 const MANTISSA_BITS: u8;
2381 const EXPONENT_BIAS: u16;
2382 const SCALEUP_POWER: u8;
2383 const SCALEUP: Self;
2384
2385 fn to_parts(&self) -> (bool, u16, u64);
2386}
2387
2388impl FloatBits for f64 {
2389 const MANTISSA_BITS: u8 = 52;
2390 const EXPONENT_BIAS: u16 = 1023;
2391 const SCALEUP_POWER: u8 = 54;
2392 const SCALEUP: f64 = (1_i64 << Self::SCALEUP_POWER) as f64;
2393
2394 fn to_parts(&self) -> (bool, u16, u64) {
2395 let bits = self.to_bits();
2396 let sign: bool = (bits >> 63) == 1;
2397 let exponent = ((bits >> 52) & 0x7FF) as u16;
2398 let mantissa = bits & 0x000F_FFFF_FFFF_FFFF;
2399 (sign, exponent, mantissa)
2400 }
2401}
2402
2403fn insert_thousands_separator(number: &str) -> String {
2406 let (int_part, frac_part) = match number.find('.') {
2407 Some(pos) => (&number[..pos], &number[pos..]),
2408 None => (number, ""),
2409 };
2410 let mut result = String::with_capacity(number.len() + number.len() / 3);
2411 for (i, c) in int_part.char_indices() {
2412 if i > 0 && (int_part.len() - i) % 3 == 0 {
2413 result.push(',');
2414 }
2415 result.push(c);
2416 }
2417 result.push_str(frac_part);
2418 result
2419}
2420
2421fn trim_trailing_0s(number: &str) -> &str {
2422 if number.contains('.') {
2423 for (i, c) in number.chars().rev().enumerate() {
2424 if c != '0' {
2425 return &number[..number.len() - i];
2426 }
2427 }
2428 }
2429 number
2430}
2431
2432fn trim_trailing_0s_hex(number: &str) -> &str {
2433 for (i, c) in number.chars().rev().enumerate() {
2434 if c != '0' {
2435 return &number[..number.len() - i];
2436 }
2437 }
2438 number
2439}
2440
2441#[cfg(test)]
2442mod tests {
2443 use super::*;
2444 use crate::function::utils::test::test_scalar_function;
2445 use arrow::array::StringArray;
2446 use arrow::datatypes::DataType::Utf8;
2447
2448 #[test]
2449 fn test_format_string_nullability() -> Result<()> {
2450 let func = FormatStringFunc::new();
2451 let nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, true));
2452
2453 let out_nullable = func.return_field_from_args(ReturnFieldArgs {
2454 arg_fields: &[nullable_format],
2455 scalar_arguments: &[None],
2456 })?;
2457
2458 assert!(
2459 out_nullable.is_nullable(),
2460 "format_string(fmt, ...) should be nullable when fmt is nullable"
2461 );
2462 let non_nullable_format: FieldRef = Arc::new(Field::new("fmt", Utf8, false));
2463
2464 let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
2465 arg_fields: &[non_nullable_format],
2466 scalar_arguments: &[None],
2467 })?;
2468
2469 assert!(
2470 !out_non_nullable.is_nullable(),
2471 "format_string(fmt, ...) should NOT be nullable when fmt is NOT nullable"
2472 );
2473
2474 Ok(())
2475 }
2476
2477 #[test]
2478 fn test_format_char_invalid_codepoint_errors() {
2479 use arrow::datatypes::Field;
2480 use datafusion_common::config::ConfigOptions;
2481
2482 let func = FormatStringFunc::new();
2483 let cases: Vec<(&str, ScalarValue)> = vec![
2488 ("Int8(-1)", ScalarValue::Int8(Some(-1))),
2489 ("Int16(-1)", ScalarValue::Int16(Some(-1))),
2490 ("Int16(-10000)", ScalarValue::Int16(Some(-10000))),
2491 ("Int32(-1)", ScalarValue::Int32(Some(-1))),
2492 ("Int32(0x110000)", ScalarValue::Int32(Some(0x110000))),
2493 ("Int64(0x1FFFFFFFF)", ScalarValue::Int64(Some(0x1FFFFFFFF))),
2494 ("Int64(-1)", ScalarValue::Int64(Some(-1))),
2495 ("UInt16(0xD800)", ScalarValue::UInt16(Some(0xD800))),
2496 ("UInt32(0x110000)", ScalarValue::UInt32(Some(0x110000))),
2497 (
2498 "UInt64(0x1_0000_0000)",
2499 ScalarValue::UInt64(Some(0x1_0000_0000)),
2500 ),
2501 ];
2502
2503 for (label, value) in cases {
2504 let fmt = ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string())));
2505 let arg_data_type = value.data_type();
2506 let arg = ColumnarValue::Scalar(value);
2507 let arg_fields = vec![
2508 Arc::new(Field::new("fmt", Utf8, false)),
2509 Arc::new(Field::new("v", arg_data_type, false)),
2510 ];
2511 let res = func.invoke_with_args(ScalarFunctionArgs {
2512 args: vec![fmt, arg],
2513 number_rows: 1,
2514 arg_fields,
2515 return_field: Arc::new(Field::new("o", Utf8, false)),
2516 config_options: Arc::new(ConfigOptions::default()),
2517 });
2518 assert!(
2519 res.is_err(),
2520 "format_string('[%c]', {label}) should error, got Ok"
2521 );
2522 let err = res.unwrap_err().to_string();
2523 assert!(
2524 err.contains("invalid Unicode scalar value for %c"),
2525 "unexpected error for {label}: {err}"
2526 );
2527 }
2528 }
2529
2530 #[test]
2531 fn test_format_char_valid_codepoint_succeeds() {
2532 test_scalar_function!(
2533 FormatStringFunc::new(),
2534 vec![
2535 ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2536 ColumnarValue::Scalar(ScalarValue::Int32(Some(0x1F680))),
2537 ],
2538 Ok(Some("[\u{1F680}]")),
2539 &str,
2540 Utf8,
2541 StringArray
2542 );
2543 test_scalar_function!(
2544 FormatStringFunc::new(),
2545 vec![
2546 ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2547 ColumnarValue::Scalar(ScalarValue::UInt32(Some(0x10FFFF))),
2548 ],
2549 Ok(Some("[\u{10FFFF}]")),
2550 &str,
2551 Utf8,
2552 StringArray
2553 );
2554 test_scalar_function!(
2555 FormatStringFunc::new(),
2556 vec![
2557 ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2558 ColumnarValue::Scalar(ScalarValue::Int16(Some(65))),
2559 ],
2560 Ok(Some("[A]")),
2561 &str,
2562 Utf8,
2563 StringArray
2564 );
2565 test_scalar_function!(
2568 FormatStringFunc::new(),
2569 vec![
2570 ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2571 ColumnarValue::Scalar(ScalarValue::Int8(Some(97))),
2572 ],
2573 Ok(Some("[a]")),
2574 &str,
2575 Utf8,
2576 StringArray
2577 );
2578 test_scalar_function!(
2579 FormatStringFunc::new(),
2580 vec![
2581 ColumnarValue::Scalar(ScalarValue::Utf8(Some("[%c]".to_string()))),
2582 ColumnarValue::Scalar(ScalarValue::UInt8(Some(255))),
2583 ],
2584 Ok(Some("[\u{00FF}]")),
2585 &str,
2586 Utf8,
2587 StringArray
2588 );
2589 }
2590
2591 #[test]
2592 fn test_insert_thousands_separator() {
2593 assert_eq!(insert_thousands_separator("1234567.89"), "1,234,567.89");
2594 assert_eq!(insert_thousands_separator("123.45"), "123.45");
2595 assert_eq!(insert_thousands_separator("1234"), "1,234");
2596 assert_eq!(insert_thousands_separator("12"), "12");
2597 assert_eq!(insert_thousands_separator("0.5"), "0.5");
2598 assert_eq!(
2599 insert_thousands_separator("1234567890.1234"),
2600 "1,234,567,890.1234"
2601 );
2602 assert_eq!(insert_thousands_separator("1000"), "1,000");
2603 assert_eq!(insert_thousands_separator("100"), "100");
2604 }
2605
2606 #[test]
2607 fn test_grouping_separator_float() -> Result<()> {
2608 test_scalar_function!(
2609 FormatStringFunc::new(),
2610 vec![
2611 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2612 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2613 ],
2614 Ok(Some("1,234,567.89")),
2615 &str,
2616 Utf8,
2617 StringArray
2618 );
2619 Ok(())
2620 }
2621
2622 #[test]
2623 fn test_grouping_separator_decimal() -> Result<()> {
2624 test_scalar_function!(
2625 FormatStringFunc::new(),
2626 vec![
2627 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2628 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2629 ],
2630 Ok(Some("1,234,567.89")),
2631 &str,
2632 Utf8,
2633 StringArray
2634 );
2635 Ok(())
2636 }
2637
2638 #[test]
2639 fn test_grouping_separator_scientific_float() -> Result<()> {
2640 test_scalar_function!(
2642 FormatStringFunc::new(),
2643 vec![
2644 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2645 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2646 ],
2647 Err(DataFusionError::Execution(
2648 "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2649 )),
2650 &str,
2651 Utf8,
2652 StringArray
2653 );
2654 test_scalar_function!(
2656 FormatStringFunc::new(),
2657 vec![
2658 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,E".to_string()))),
2659 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2660 ],
2661 Err(DataFusionError::Execution(
2662 "Grouping separator ',' flag is not compatible with scientific notation conversion 'E'".to_string(),
2663 )),
2664 &str,
2665 Utf8,
2666 StringArray
2667 );
2668 test_scalar_function!(
2670 FormatStringFunc::new(),
2671 vec![
2672 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2673 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2674 ],
2675 Err(DataFusionError::Execution(
2676 "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2677 )),
2678 &str,
2679 Utf8,
2680 StringArray
2681 );
2682 Ok(())
2683 }
2684
2685 #[test]
2686 fn test_grouping_separator_compact_float() -> Result<()> {
2687 test_scalar_function!(
2689 FormatStringFunc::new(),
2690 vec![
2691 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2692 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2693 ],
2694 Ok(Some("1.23457e+06")),
2695 &str,
2696 Utf8,
2697 StringArray
2698 );
2699 test_scalar_function!(
2701 FormatStringFunc::new(),
2702 vec![
2703 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2704 ColumnarValue::Scalar(ScalarValue::Float64(Some(12345.6))),
2705 ],
2706 Ok(Some("12,345.6")),
2707 &str,
2708 Utf8,
2709 StringArray
2710 );
2711 test_scalar_function!(
2713 FormatStringFunc::new(),
2714 vec![
2715 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2716 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2717 ],
2718 Ok(Some("1e+06")),
2719 &str,
2720 Utf8,
2721 StringArray
2722 );
2723 test_scalar_function!(
2725 FormatStringFunc::new(),
2726 vec![
2727 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,G".to_string()))),
2728 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2729 ],
2730 Ok(Some("1.23457E+06")),
2731 &str,
2732 Utf8,
2733 StringArray
2734 );
2735 Ok(())
2736 }
2737
2738 #[test]
2739 fn test_grouping_separator_scientific_decimal() -> Result<()> {
2740 test_scalar_function!(
2742 FormatStringFunc::new(),
2743 vec![
2744 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2745 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2746 ],
2747 Err(DataFusionError::Execution(
2748 "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2749 )),
2750 &str,
2751 Utf8,
2752 StringArray
2753 );
2754 test_scalar_function!(
2756 FormatStringFunc::new(),
2757 vec![
2758 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2759 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2760 ],
2761 Err(DataFusionError::Execution(
2762 "Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2763 )),
2764 &str,
2765 Utf8,
2766 StringArray
2767 );
2768 Ok(())
2769 }
2770
2771 #[test]
2772 fn test_grouping_separator_compact_decimal() -> Result<()> {
2773 test_scalar_function!(
2775 FormatStringFunc::new(),
2776 vec![
2777 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2778 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2779 ],
2780 Ok(Some("1.23457e+06")),
2781 &str,
2782 Utf8,
2783 StringArray
2784 );
2785 test_scalar_function!(
2787 FormatStringFunc::new(),
2788 vec![
2789 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2790 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(1234560), 10, 2)),
2791 ],
2792 Ok(Some("12,345.6")),
2793 &str,
2794 Utf8,
2795 StringArray
2796 );
2797 test_scalar_function!(
2799 FormatStringFunc::new(),
2800 vec![
2801 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2802 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2803 ],
2804 Ok(Some("1e+06")),
2805 &str,
2806 Utf8,
2807 StringArray
2808 );
2809 Ok(())
2810 }
2811
2812 #[test]
2813 fn test_grouping_separator_width_sign_float() -> Result<()> {
2814 test_scalar_function!(
2816 FormatStringFunc::new(),
2817 vec![
2818 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
2819 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2820 ],
2821 Ok(Some("0001,234,567.89")),
2822 &str,
2823 Utf8,
2824 StringArray
2825 );
2826 test_scalar_function!(
2828 FormatStringFunc::new(),
2829 vec![
2830 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
2831 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2832 ],
2833 Ok(Some(" +1,234,567.89")),
2834 &str,
2835 Utf8,
2836 StringArray
2837 );
2838 test_scalar_function!(
2840 FormatStringFunc::new(),
2841 vec![
2842 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%-,15.2f".to_string()))),
2843 ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2844 ],
2845 Ok(Some("1,234,567.89 ")),
2846 &str,
2847 Utf8,
2848 StringArray
2849 );
2850 Ok(())
2851 }
2852
2853 #[test]
2854 fn test_grouping_separator_width_sign_decimal() -> Result<()> {
2855 test_scalar_function!(
2857 FormatStringFunc::new(),
2858 vec![
2859 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
2860 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2861 ],
2862 Ok(Some("0001,234,567.89")),
2863 &str,
2864 Utf8,
2865 StringArray
2866 );
2867 test_scalar_function!(
2869 FormatStringFunc::new(),
2870 vec![
2871 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
2872 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2873 ],
2874 Ok(Some(" +1,234,567.89")),
2875 &str,
2876 Utf8,
2877 StringArray
2878 );
2879 Ok(())
2880 }
2881
2882 #[test]
2883 fn test_grouping_separator_parentheses_float() -> Result<()> {
2884 test_scalar_function!(
2887 FormatStringFunc::new(),
2888 vec![
2889 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%(,15.2f".to_string()))),
2890 ColumnarValue::Scalar(ScalarValue::Float64(Some(-1234.5))),
2891 ],
2892 Ok(Some(" (1,234.50)")),
2893 &str,
2894 Utf8,
2895 StringArray
2896 );
2897 Ok(())
2898 }
2899
2900 #[test]
2901 fn test_grouping_separator_parentheses_decimal() -> Result<()> {
2902 test_scalar_function!(
2907 FormatStringFunc::new(),
2908 vec![
2909 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%(,15.2f".to_string()))),
2910 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(-123450), 10, 2)),
2911 ],
2912 Ok(Some(" -1,234.50")),
2913 &str,
2914 Utf8,
2915 StringArray
2916 );
2917 Ok(())
2918 }
2919}