1use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469 match opt {
470 None => false,
471 Some(x) => f(x),
472 }
473}
474
475macro_rules! parser_primitive {
476 ($t:ty) => {
477 impl Parser for $t {
478 fn parse(string: &str) -> Option<Self::Native> {
479 if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480 return None;
481 }
482 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483 string.as_bytes(),
484 ) {
485 (Some(n), x) if x == string.len() => Some(n),
486 _ => None,
487 }
488 }
489 }
490 };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506 fn parse(string: &str) -> Option<i64> {
507 string_to_timestamp_nanos(string).ok()
508 }
509}
510
511impl Parser for TimestampMicrosecondType {
512 fn parse(string: &str) -> Option<i64> {
513 let nanos = string_to_timestamp_nanos(string).ok();
514 nanos.map(|x| x / 1000)
515 }
516}
517
518impl Parser for TimestampMillisecondType {
519 fn parse(string: &str) -> Option<i64> {
520 let nanos = string_to_timestamp_nanos(string).ok();
521 nanos.map(|x| x / 1_000_000)
522 }
523}
524
525impl Parser for TimestampSecondType {
526 fn parse(string: &str) -> Option<i64> {
527 let nanos = string_to_timestamp_nanos(string).ok();
528 nanos.map(|x| x / 1_000_000_000)
529 }
530}
531
532impl Parser for Time64NanosecondType {
533 fn parse(string: &str) -> Option<Self::Native> {
535 string_to_time_nanoseconds(string)
536 .ok()
537 .or_else(|| string.parse::<Self::Native>().ok())
538 }
539
540 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541 let nt = NaiveTime::parse_from_str(string, format).ok()?;
542 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543 }
544}
545
546impl Parser for Time64MicrosecondType {
547 fn parse(string: &str) -> Option<Self::Native> {
549 string_to_time_nanoseconds(string)
550 .ok()
551 .map(|nanos| nanos / 1_000)
552 .or_else(|| string.parse::<Self::Native>().ok())
553 }
554
555 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556 let nt = NaiveTime::parse_from_str(string, format).ok()?;
557 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558 }
559}
560
561impl Parser for Time32MillisecondType {
562 fn parse(string: &str) -> Option<Self::Native> {
564 string_to_time_nanoseconds(string)
565 .ok()
566 .map(|nanos| (nanos / 1_000_000) as i32)
567 .or_else(|| string.parse::<Self::Native>().ok())
568 }
569
570 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571 let nt = NaiveTime::parse_from_str(string, format).ok()?;
572 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573 }
574}
575
576impl Parser for Time32SecondType {
577 fn parse(string: &str) -> Option<Self::Native> {
579 string_to_time_nanoseconds(string)
580 .ok()
581 .map(|nanos| (nanos / 1_000_000_000) as i32)
582 .or_else(|| string.parse::<Self::Native>().ok())
583 }
584
585 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586 let nt = NaiveTime::parse_from_str(string, format).ok()?;
587 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588 }
589}
590
591const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598 if string.len() > 10 {
599 return string_to_datetime(&Utc, string)
601 .map(|dt| dt.date_naive())
602 .ok();
603 };
604 let mut digits = [0; 10];
605 let mut mask = 0;
606
607 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
609 *o = i.wrapping_sub(b'0');
610 mask |= ((*o < 10) as u16) << idx
611 }
612
613 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
614
615 if digits[4] != HYPHEN {
617 let (year, month, day) = match (mask, string.len()) {
618 (0b11111111, 8) => (
619 digits[0] as u16 * 1000
620 + digits[1] as u16 * 100
621 + digits[2] as u16 * 10
622 + digits[3] as u16,
623 digits[4] * 10 + digits[5],
624 digits[6] * 10 + digits[7],
625 ),
626 _ => return None,
627 };
628 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
629 }
630
631 let (month, day) = match mask {
632 0b1101101111 => {
633 if digits[7] != HYPHEN {
634 return None;
635 }
636 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
637 }
638 0b101101111 => {
639 if digits[7] != HYPHEN {
640 return None;
641 }
642 (digits[5] * 10 + digits[6], digits[8])
643 }
644 0b110101111 => {
645 if digits[6] != HYPHEN {
646 return None;
647 }
648 (digits[5], digits[7] * 10 + digits[8])
649 }
650 0b10101111 => {
651 if digits[6] != HYPHEN {
652 return None;
653 }
654 (digits[5], digits[7])
655 }
656 _ => return None,
657 };
658
659 let year =
660 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
661
662 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
663}
664
665impl Parser for Date32Type {
666 fn parse(string: &str) -> Option<i32> {
667 let date = parse_date(string)?;
668 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
669 }
670
671 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
672 let date = NaiveDate::parse_from_str(string, format).ok()?;
673 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
674 }
675}
676
677impl Parser for Date64Type {
678 fn parse(string: &str) -> Option<i64> {
679 if string.len() <= 10 {
680 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
681 Some(datetime.and_utc().timestamp_millis())
682 } else {
683 let date_time = string_to_datetime(&Utc, string).ok()?;
684 Some(date_time.timestamp_millis())
685 }
686 }
687
688 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
689 use chrono::format::Fixed;
690 use chrono::format::StrftimeItems;
691 let fmt = StrftimeItems::new(format);
692 let has_zone = fmt.into_iter().any(|item| match item {
693 chrono::format::Item::Fixed(fixed_item) => matches!(
694 fixed_item,
695 Fixed::RFC2822
696 | Fixed::RFC3339
697 | Fixed::TimezoneName
698 | Fixed::TimezoneOffsetColon
699 | Fixed::TimezoneOffsetColonZ
700 | Fixed::TimezoneOffset
701 | Fixed::TimezoneOffsetZ
702 ),
703 _ => false,
704 });
705 if has_zone {
706 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
707 Some(date_time.timestamp_millis())
708 } else {
709 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
710 Some(date_time.and_utc().timestamp_millis())
711 }
712 }
713}
714
715fn parse_e_notation<T: DecimalType>(
716 s: &str,
717 mut digits: u16,
718 mut fractionals: i16,
719 mut result: T::Native,
720 index: usize,
721 precision: u16,
722 scale: i16,
723) -> Result<T::Native, ArrowError> {
724 let mut exp: i16 = 0;
725 let base = T::Native::usize_as(10);
726
727 let mut exp_start: bool = false;
728 let mut pos_shift_direction: bool = true;
730
731 let mut bs;
733 if fractionals > 0 {
734 bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
736 } else {
737 bs = s.as_bytes().iter().skip(index);
739 }
740
741 while let Some(b) = bs.next() {
742 match b {
743 b'0'..=b'9' => {
744 result = result.mul_wrapping(base);
745 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
746 if fractionals > 0 {
747 fractionals += 1;
748 }
749 digits += 1;
750 }
751 &b'e' | &b'E' => {
752 exp_start = true;
753 }
754 _ => {
755 return Err(ArrowError::ParseError(format!(
756 "can't parse the string value {s} to decimal"
757 )));
758 }
759 };
760
761 if exp_start {
762 pos_shift_direction = match bs.next() {
763 Some(&b'-') => false,
764 Some(&b'+') => true,
765 Some(b) => {
766 if !b.is_ascii_digit() {
767 return Err(ArrowError::ParseError(format!(
768 "can't parse the string value {s} to decimal"
769 )));
770 }
771
772 exp *= 10;
773 exp += (b - b'0') as i16;
774
775 true
776 }
777 None => {
778 return Err(ArrowError::ParseError(format!(
779 "can't parse the string value {s} to decimal"
780 )))
781 }
782 };
783
784 for b in bs.by_ref() {
785 if !b.is_ascii_digit() {
786 return Err(ArrowError::ParseError(format!(
787 "can't parse the string value {s} to decimal"
788 )));
789 }
790 exp *= 10;
791 exp += (b - b'0') as i16;
792 }
793 }
794 }
795
796 if digits == 0 && fractionals == 0 && exp == 0 {
797 return Err(ArrowError::ParseError(format!(
798 "can't parse the string value {s} to decimal"
799 )));
800 }
801
802 if !pos_shift_direction {
803 if exp - (digits as i16 + scale) > 0 {
806 return Ok(T::Native::usize_as(0));
807 }
808 exp *= -1;
809 }
810
811 exp = fractionals - exp;
813 if !pos_shift_direction && exp > digits as i16 {
815 digits = exp as u16;
816 }
817 exp = scale - exp;
819
820 if (digits as i16 + exp) as u16 > precision {
821 return Err(ArrowError::ParseError(format!(
822 "parse decimal overflow ({s})"
823 )));
824 }
825
826 if exp < 0 {
827 result = result.div_wrapping(base.pow_wrapping(-exp as _));
828 } else {
829 result = result.mul_wrapping(base.pow_wrapping(exp as _));
830 }
831
832 Ok(result)
833}
834
835pub fn parse_decimal<T: DecimalType>(
838 s: &str,
839 precision: u8,
840 scale: i8,
841) -> Result<T::Native, ArrowError> {
842 let mut result = T::Native::usize_as(0);
843 let mut fractionals: i8 = 0;
844 let mut digits: u8 = 0;
845 let base = T::Native::usize_as(10);
846
847 let bs = s.as_bytes();
848 let (signed, negative) = match bs.first() {
849 Some(b'-') => (true, true),
850 Some(b'+') => (true, false),
851 _ => (false, false),
852 };
853
854 if bs.is_empty() || signed && bs.len() == 1 {
855 return Err(ArrowError::ParseError(format!(
856 "can't parse the string value {s} to decimal"
857 )));
858 }
859
860 let mut bs = bs.iter().enumerate().skip(signed as usize);
862
863 let mut is_e_notation = false;
864
865 #[allow(clippy::question_mark)]
868 while let Some((index, b)) = bs.next() {
869 match b {
870 b'0'..=b'9' => {
871 if digits == 0 && *b == b'0' {
872 continue;
874 }
875 digits += 1;
876 result = result.mul_wrapping(base);
877 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
878 }
879 b'.' => {
880 let point_index = index;
881
882 for (_, b) in bs.by_ref() {
883 if !b.is_ascii_digit() {
884 if *b == b'e' || *b == b'E' {
885 result = match parse_e_notation::<T>(
886 s,
887 digits as u16,
888 fractionals as i16,
889 result,
890 point_index,
891 precision as u16,
892 scale as i16,
893 ) {
894 Err(e) => return Err(e),
895 Ok(v) => v,
896 };
897
898 is_e_notation = true;
899
900 break;
901 }
902 return Err(ArrowError::ParseError(format!(
903 "can't parse the string value {s} to decimal"
904 )));
905 }
906 if fractionals == scale && scale != 0 {
907 continue;
911 }
912 fractionals += 1;
913 digits += 1;
914 result = result.mul_wrapping(base);
915 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
916 }
917
918 if is_e_notation {
919 break;
920 }
921
922 if digits == 0 {
924 return Err(ArrowError::ParseError(format!(
925 "can't parse the string value {s} to decimal"
926 )));
927 }
928 }
929 b'e' | b'E' => {
930 result = match parse_e_notation::<T>(
931 s,
932 digits as u16,
933 fractionals as i16,
934 result,
935 index,
936 precision as u16,
937 scale as i16,
938 ) {
939 Err(e) => return Err(e),
940 Ok(v) => v,
941 };
942
943 is_e_notation = true;
944
945 break;
946 }
947 _ => {
948 return Err(ArrowError::ParseError(format!(
949 "can't parse the string value {s} to decimal"
950 )));
951 }
952 }
953 }
954
955 if !is_e_notation {
956 if fractionals < scale {
957 let exp = scale - fractionals;
958 if exp as u8 + digits > precision {
959 return Err(ArrowError::ParseError(format!(
960 "parse decimal overflow ({s})"
961 )));
962 }
963 let mul = base.pow_wrapping(exp as _);
964 result = result.mul_wrapping(mul);
965 } else if digits > precision {
966 return Err(ArrowError::ParseError(format!(
967 "parse decimal overflow ({s})"
968 )));
969 }
970 }
971
972 Ok(if negative {
973 result.neg_wrapping()
974 } else {
975 result
976 })
977}
978
979pub fn parse_interval_year_month(
981 value: &str,
982) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
983 let config = IntervalParseConfig::new(IntervalUnit::Year);
984 let interval = Interval::parse(value, &config)?;
985
986 let months = interval.to_year_months().map_err(|_| {
987 ArrowError::CastError(format!(
988 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
989 ))
990 })?;
991
992 Ok(IntervalYearMonthType::make_value(0, months))
993}
994
995pub fn parse_interval_day_time(
997 value: &str,
998) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
999 let config = IntervalParseConfig::new(IntervalUnit::Day);
1000 let interval = Interval::parse(value, &config)?;
1001
1002 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1003 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1004 )))?;
1005
1006 Ok(IntervalDayTimeType::make_value(days, millis))
1007}
1008
1009pub fn parse_interval_month_day_nano_config(
1011 value: &str,
1012 config: IntervalParseConfig,
1013) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1014 let interval = Interval::parse(value, &config)?;
1015
1016 let (months, days, nanos) = interval.to_month_day_nanos();
1017
1018 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1019}
1020
1021pub fn parse_interval_month_day_nano(
1023 value: &str,
1024) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1025 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1026}
1027
1028const NANOS_PER_MILLIS: i64 = 1_000_000;
1029const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1030const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1031const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1032#[cfg(test)]
1033const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1034
1035#[derive(Debug, Clone)]
1039pub struct IntervalParseConfig {
1040 default_unit: IntervalUnit,
1043}
1044
1045impl IntervalParseConfig {
1046 pub fn new(default_unit: IntervalUnit) -> Self {
1048 Self { default_unit }
1049 }
1050}
1051
1052#[rustfmt::skip]
1053#[derive(Debug, Clone, Copy)]
1054#[repr(u16)]
1055pub enum IntervalUnit {
1058 Century = 0b_0000_0000_0001,
1060 Decade = 0b_0000_0000_0010,
1062 Year = 0b_0000_0000_0100,
1064 Month = 0b_0000_0000_1000,
1066 Week = 0b_0000_0001_0000,
1068 Day = 0b_0000_0010_0000,
1070 Hour = 0b_0000_0100_0000,
1072 Minute = 0b_0000_1000_0000,
1074 Second = 0b_0001_0000_0000,
1076 Millisecond = 0b_0010_0000_0000,
1078 Microsecond = 0b_0100_0000_0000,
1080 Nanosecond = 0b_1000_0000_0000,
1082}
1083
1084impl FromStr for IntervalUnit {
1089 type Err = ArrowError;
1090
1091 fn from_str(s: &str) -> Result<Self, ArrowError> {
1092 match s.to_lowercase().as_str() {
1093 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1094 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1095 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1096 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1097 "w" | "week" | "weeks" => Ok(Self::Week),
1098 "d" | "day" | "days" => Ok(Self::Day),
1099 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1100 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1101 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1102 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1103 Ok(Self::Millisecond)
1104 }
1105 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1106 Ok(Self::Microsecond)
1107 }
1108 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1109 _ => Err(ArrowError::InvalidArgumentError(format!(
1110 "Unknown interval type: {s}"
1111 ))),
1112 }
1113 }
1114}
1115
1116impl IntervalUnit {
1117 fn from_str_or_config(
1118 s: Option<&str>,
1119 config: &IntervalParseConfig,
1120 ) -> Result<Self, ArrowError> {
1121 match s {
1122 Some(s) => s.parse(),
1123 None => Ok(config.default_unit),
1124 }
1125 }
1126}
1127
1128pub type MonthDayNano = (i32, i32, i64);
1130
1131const INTERVAL_PRECISION: u32 = 15;
1133
1134#[derive(Clone, Copy, Debug, PartialEq)]
1135struct IntervalAmount {
1136 integer: i64,
1138 frac: i64,
1140}
1141
1142#[cfg(test)]
1143impl IntervalAmount {
1144 fn new(integer: i64, frac: i64) -> Self {
1145 Self { integer, frac }
1146 }
1147}
1148
1149impl FromStr for IntervalAmount {
1150 type Err = ArrowError;
1151
1152 fn from_str(s: &str) -> Result<Self, Self::Err> {
1153 match s.split_once('.') {
1154 Some((integer, frac))
1155 if frac.len() <= INTERVAL_PRECISION as usize
1156 && !frac.is_empty()
1157 && !frac.starts_with('-') =>
1158 {
1159 let explicit_neg = integer.starts_with('-');
1162 let integer = if integer.is_empty() || integer == "-" {
1163 Ok(0)
1164 } else {
1165 integer.parse::<i64>().map_err(|_| {
1166 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1167 })
1168 }?;
1169
1170 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1171 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1172 })?;
1173
1174 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1176
1177 let frac = if integer < 0 || explicit_neg {
1179 -frac
1180 } else {
1181 frac
1182 };
1183
1184 let result = Self { integer, frac };
1185
1186 Ok(result)
1187 }
1188 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1189 "Failed to parse {s} as interval amount"
1190 ))),
1191 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1192 Err(ArrowError::ParseError(format!(
1193 "{s} exceeds the precision available for interval amount"
1194 )))
1195 }
1196 Some(_) | None => {
1197 let integer = s.parse::<i64>().map_err(|_| {
1198 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1199 })?;
1200
1201 let result = Self { integer, frac: 0 };
1202 Ok(result)
1203 }
1204 }
1205 }
1206}
1207
1208#[derive(Debug, Default, PartialEq)]
1209struct Interval {
1210 months: i32,
1211 days: i32,
1212 nanos: i64,
1213}
1214
1215impl Interval {
1216 fn new(months: i32, days: i32, nanos: i64) -> Self {
1217 Self {
1218 months,
1219 days,
1220 nanos,
1221 }
1222 }
1223
1224 fn to_year_months(&self) -> Result<i32, ArrowError> {
1225 match (self.months, self.days, self.nanos) {
1226 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1227 _ => Err(ArrowError::InvalidArgumentError(format!(
1228 "Unable to represent interval with days and nanos as year-months: {:?}",
1229 self
1230 ))),
1231 }
1232 }
1233
1234 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1235 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1236
1237 match self.nanos {
1238 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1239 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1240 ArrowError::InvalidArgumentError(format!(
1241 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1242 self.nanos
1243 ))
1244 })?;
1245
1246 Ok((days, millis))
1247 }
1248 nanos => Err(ArrowError::InvalidArgumentError(format!(
1249 "Unable to represent {nanos} as milliseconds"
1250 ))),
1251 }
1252 }
1253
1254 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1255 (self.months, self.days, self.nanos)
1256 }
1257
1258 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1261 let components = parse_interval_components(value, config)?;
1262
1263 components
1264 .into_iter()
1265 .try_fold(Self::default(), |result, (amount, unit)| {
1266 result.add(amount, unit)
1267 })
1268 }
1269
1270 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1277 let result = match unit {
1278 IntervalUnit::Century => {
1279 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1280 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1281 let months = months_int
1282 .add_checked(month_frac)?
1283 .try_into()
1284 .map_err(|_| {
1285 ArrowError::ParseError(format!(
1286 "Unable to represent {} centuries as months in a signed 32-bit integer",
1287 &amount.integer
1288 ))
1289 })?;
1290
1291 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1292 }
1293 IntervalUnit::Decade => {
1294 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1295
1296 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1297 let months = months_int
1298 .add_checked(month_frac)?
1299 .try_into()
1300 .map_err(|_| {
1301 ArrowError::ParseError(format!(
1302 "Unable to represent {} decades as months in a signed 32-bit integer",
1303 &amount.integer
1304 ))
1305 })?;
1306
1307 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1308 }
1309 IntervalUnit::Year => {
1310 let months_int = amount.integer.mul_checked(12)?;
1311 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1312 let months = months_int
1313 .add_checked(month_frac)?
1314 .try_into()
1315 .map_err(|_| {
1316 ArrowError::ParseError(format!(
1317 "Unable to represent {} years as months in a signed 32-bit integer",
1318 &amount.integer
1319 ))
1320 })?;
1321
1322 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1323 }
1324 IntervalUnit::Month => {
1325 let months = amount.integer.try_into().map_err(|_| {
1326 ArrowError::ParseError(format!(
1327 "Unable to represent {} months in a signed 32-bit integer",
1328 &amount.integer
1329 ))
1330 })?;
1331
1332 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1333 let days = days.try_into().map_err(|_| {
1334 ArrowError::ParseError(format!(
1335 "Unable to represent {} months as days in a signed 32-bit integer",
1336 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1337 ))
1338 })?;
1339
1340 Self::new(
1341 self.months.add_checked(months)?,
1342 self.days.add_checked(days)?,
1343 self.nanos,
1344 )
1345 }
1346 IntervalUnit::Week => {
1347 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1348 ArrowError::ParseError(format!(
1349 "Unable to represent {} weeks as days in a signed 32-bit integer",
1350 &amount.integer
1351 ))
1352 })?;
1353
1354 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1355
1356 Self::new(
1357 self.months,
1358 self.days.add_checked(days)?,
1359 self.nanos.add_checked(nanos)?,
1360 )
1361 }
1362 IntervalUnit::Day => {
1363 let days = amount.integer.try_into().map_err(|_| {
1364 ArrowError::InvalidArgumentError(format!(
1365 "Unable to represent {} days in a signed 32-bit integer",
1366 amount.integer
1367 ))
1368 })?;
1369
1370 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1371
1372 Self::new(
1373 self.months,
1374 self.days.add_checked(days)?,
1375 self.nanos.add_checked(nanos)?,
1376 )
1377 }
1378 IntervalUnit::Hour => {
1379 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1380 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1381 let nanos = nanos_int.add_checked(nanos_frac)?;
1382
1383 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1384 }
1385 IntervalUnit::Minute => {
1386 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1387 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1388
1389 let nanos = nanos_int.add_checked(nanos_frac)?;
1390
1391 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1392 }
1393 IntervalUnit::Second => {
1394 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1395 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1396 let nanos = nanos_int.add_checked(nanos_frac)?;
1397
1398 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1399 }
1400 IntervalUnit::Millisecond => {
1401 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1402 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1403 let nanos = nanos_int.add_checked(nanos_frac)?;
1404
1405 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1406 }
1407 IntervalUnit::Microsecond => {
1408 let nanos_int = amount.integer.mul_checked(1_000)?;
1409 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1410 let nanos = nanos_int.add_checked(nanos_frac)?;
1411
1412 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1413 }
1414 IntervalUnit::Nanosecond => {
1415 let nanos_int = amount.integer;
1416 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1417 let nanos = nanos_int.add_checked(nanos_frac)?;
1418
1419 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1420 }
1421 };
1422
1423 Ok(result)
1424 }
1425}
1426
1427fn parse_interval_components(
1429 value: &str,
1430 config: &IntervalParseConfig,
1431) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1432 let raw_pairs = split_interval_components(value);
1433
1434 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1436 .iter()
1437 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1438 .collect()
1439 else {
1440 return Err(ArrowError::ParseError(format!(
1441 "Invalid input syntax for type interval: {value:?}"
1442 )));
1443 };
1444
1445 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1447
1448 let mut observed_interval_types = 0;
1450 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1451 if observed_interval_types & (*unit as u16) != 0 {
1452 return Err(ArrowError::ParseError(format!(
1453 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1454 value,
1455 raw_unit.unwrap_or_default(),
1456 )));
1457 }
1458
1459 observed_interval_types |= *unit as u16;
1460 }
1461
1462 let result = amounts.iter().copied().zip(units.iter().copied());
1463
1464 Ok(result.collect::<Vec<_>>())
1465}
1466
1467fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1473 let mut result = vec![];
1474 let mut words = value.split(char::is_whitespace);
1475 while let Some(word) = words.next() {
1476 if let Some(split_word_at) = word.find(not_interval_amount) {
1477 let (amount, unit) = word.split_at(split_word_at);
1478 result.push((amount, Some(unit)));
1479 } else if let Some(unit) = words.next() {
1480 result.push((word, Some(unit)));
1481 } else {
1482 result.push((word, None));
1483 break;
1484 }
1485 }
1486 result
1487}
1488
1489fn not_interval_amount(c: char) -> bool {
1491 !c.is_ascii_digit() && c != '.' && c != '-'
1492}
1493
1494#[cfg(test)]
1495mod tests {
1496 use super::*;
1497 use arrow_array::temporal_conversions::date32_to_datetime;
1498 use arrow_buffer::i256;
1499
1500 #[test]
1501 fn test_parse_nanos() {
1502 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1503 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1504 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1505 }
1506
1507 #[test]
1508 fn string_to_timestamp_timezone() {
1509 assert_eq!(
1511 1599572549190855000,
1512 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1513 );
1514 assert_eq!(
1515 1599572549190855000,
1516 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1517 );
1518 assert_eq!(
1519 1599572549000000000,
1520 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1521 ); assert_eq!(
1523 1599590549190855000,
1524 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1525 );
1526 }
1527
1528 #[test]
1529 fn string_to_timestamp_timezone_space() {
1530 assert_eq!(
1532 1599572549190855000,
1533 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1534 );
1535 assert_eq!(
1536 1599572549190855000,
1537 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1538 );
1539 assert_eq!(
1540 1599572549000000000,
1541 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1542 ); assert_eq!(
1544 1599590549190855000,
1545 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1546 );
1547 }
1548
1549 #[test]
1550 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1552 let naive_datetime = NaiveDateTime::new(
1556 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1557 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1558 );
1559
1560 assert_eq!(
1562 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1563 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1564 );
1565
1566 assert_eq!(
1567 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1568 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1569 );
1570
1571 let datetime_whole_secs = NaiveDateTime::new(
1574 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1575 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1576 )
1577 .and_utc();
1578
1579 assert_eq!(
1581 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1582 parse_timestamp("2020-09-08T13:42:29").unwrap()
1583 );
1584
1585 assert_eq!(
1586 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1587 parse_timestamp("2020-09-08 13:42:29").unwrap()
1588 );
1589
1590 let datetime_no_time = NaiveDateTime::new(
1594 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1595 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1596 )
1597 .and_utc();
1598
1599 assert_eq!(
1600 datetime_no_time.timestamp_nanos_opt().unwrap(),
1601 parse_timestamp("2020-09-08").unwrap()
1602 )
1603 }
1604
1605 #[test]
1606 fn string_to_timestamp_chrono() {
1607 let cases = [
1608 "2020-09-08T13:42:29Z",
1609 "1969-01-01T00:00:00.1Z",
1610 "2020-09-08T12:00:12.12345678+00:00",
1611 "2020-09-08T12:00:12+00:00",
1612 "2020-09-08T12:00:12.1+00:00",
1613 "2020-09-08T12:00:12.12+00:00",
1614 "2020-09-08T12:00:12.123+00:00",
1615 "2020-09-08T12:00:12.1234+00:00",
1616 "2020-09-08T12:00:12.12345+00:00",
1617 "2020-09-08T12:00:12.123456+00:00",
1618 "2020-09-08T12:00:12.1234567+00:00",
1619 "2020-09-08T12:00:12.12345678+00:00",
1620 "2020-09-08T12:00:12.123456789+00:00",
1621 "2020-09-08T12:00:12.12345678912z",
1622 "2020-09-08T12:00:12.123456789123Z",
1623 "2020-09-08T12:00:12.123456789123+02:00",
1624 "2020-09-08T12:00:12.12345678912345Z",
1625 "2020-09-08T12:00:12.1234567891234567+02:00",
1626 "2020-09-08T12:00:60Z",
1627 "2020-09-08T12:00:60.123Z",
1628 "2020-09-08T12:00:60.123456+02:00",
1629 "2020-09-08T12:00:60.1234567891234567+02:00",
1630 "2020-09-08T12:00:60.999999999+02:00",
1631 "2020-09-08t12:00:12.12345678+00:00",
1632 "2020-09-08t12:00:12+00:00",
1633 "2020-09-08t12:00:12Z",
1634 ];
1635
1636 for case in cases {
1637 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1638 let chrono_utc = chrono.with_timezone(&Utc);
1639
1640 let custom = string_to_datetime(&Utc, case).unwrap();
1641 assert_eq!(chrono_utc, custom)
1642 }
1643 }
1644
1645 #[test]
1646 fn string_to_timestamp_naive() {
1647 let cases = [
1648 "2018-11-13T17:11:10.011375885995",
1649 "2030-12-04T17:11:10.123",
1650 "2030-12-04T17:11:10.1234",
1651 "2030-12-04T17:11:10.123456",
1652 ];
1653 for case in cases {
1654 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1655 let custom = string_to_datetime(&Utc, case).unwrap();
1656 assert_eq!(chrono, custom.naive_utc())
1657 }
1658 }
1659
1660 #[test]
1661 fn string_to_timestamp_invalid() {
1662 let cases = [
1664 ("", "timestamp must contain at least 10 characters"),
1665 ("SS", "timestamp must contain at least 10 characters"),
1666 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1667 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1668 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1669 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1670 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1671 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1672 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1673 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1674 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1675 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1676 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1677 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1678 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1679 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1680 ("1997-01-31T092656.123Z", "error parsing time"),
1681 ("1997-01-10T12:00:06.", "error parsing time"),
1682 ("1997-01-10T12:00:06. ", "error parsing time"),
1683 ];
1684
1685 for (s, ctx) in cases {
1686 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1687 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1688 assert_eq!(actual, expected)
1689 }
1690 }
1691
1692 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1694 let result = string_to_timestamp_nanos(s);
1695 if let Err(e) = &result {
1696 eprintln!("Error parsing timestamp '{s}': {e:?}");
1697 }
1698 result
1699 }
1700
1701 #[test]
1702 fn string_without_timezone_to_timestamp() {
1703 let naive_datetime = NaiveDateTime::new(
1706 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1707 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1708 );
1709
1710 assert_eq!(
1712 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1713 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1714 );
1715
1716 assert_eq!(
1717 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1718 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1719 );
1720
1721 let naive_datetime = NaiveDateTime::new(
1722 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1723 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1724 );
1725
1726 assert_eq!(
1728 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1729 parse_timestamp("2020-09-08T13:42:29").unwrap()
1730 );
1731
1732 assert_eq!(
1733 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1734 parse_timestamp("2020-09-08 13:42:29").unwrap()
1735 );
1736
1737 let tz: Tz = "+02:00".parse().unwrap();
1738 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1739 let utc = date.naive_utc().to_string();
1740 assert_eq!(utc, "2020-09-08 11:42:29");
1741 let local = date.naive_local().to_string();
1742 assert_eq!(local, "2020-09-08 13:42:29");
1743
1744 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1745 let utc = date.naive_utc().to_string();
1746 assert_eq!(utc, "2020-09-08 13:42:29");
1747 let local = date.naive_local().to_string();
1748 assert_eq!(local, "2020-09-08 15:42:29");
1749
1750 let dt =
1751 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1752 let local: Tz = "+08:00".parse().unwrap();
1753
1754 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1756 assert_eq!(dt, date.naive_utc());
1757 assert_ne!(dt, date.naive_local());
1758
1759 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1761 assert_eq!(dt, date.naive_local());
1762 assert_ne!(dt, date.naive_utc());
1763 }
1764
1765 #[test]
1766 fn parse_date32() {
1767 let cases = [
1768 "2020-09-08",
1769 "2020-9-8",
1770 "2020-09-8",
1771 "2020-9-08",
1772 "2020-12-1",
1773 "1690-2-5",
1774 "2020-09-08 01:02:03",
1775 ];
1776 for case in cases {
1777 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1778 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1779 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1780 .unwrap();
1781 assert_eq!(v.date(), expected);
1782 }
1783
1784 let err_cases = [
1785 "",
1786 "80-01-01",
1787 "342",
1788 "Foo",
1789 "2020-09-08-03",
1790 "2020--04-03",
1791 "2020--",
1792 "2020-09-08 01",
1793 "2020-09-08 01:02",
1794 "2020-09-08 01-02-03",
1795 "2020-9-8 01:02:03",
1796 "2020-09-08 1:2:3",
1797 ];
1798 for case in err_cases {
1799 assert_eq!(Date32Type::parse(case), None);
1800 }
1801 }
1802
1803 #[test]
1804 fn parse_time64_nanos() {
1805 assert_eq!(
1806 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1807 Some(7_801_123_456_789)
1808 );
1809 assert_eq!(
1810 Time64NanosecondType::parse("02:10:01.1234567"),
1811 Some(7_801_123_456_700)
1812 );
1813 assert_eq!(
1814 Time64NanosecondType::parse("2:10:01.1234567"),
1815 Some(7_801_123_456_700)
1816 );
1817 assert_eq!(
1818 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1819 Some(601_123_456_789)
1820 );
1821 assert_eq!(
1822 Time64NanosecondType::parse("12:10:01.123456789 am"),
1823 Some(601_123_456_789)
1824 );
1825 assert_eq!(
1826 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1827 Some(51_001_123_456_780)
1828 );
1829 assert_eq!(
1830 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1831 Some(51_001_123_456_780)
1832 );
1833 assert_eq!(
1834 Time64NanosecondType::parse("02:10:01"),
1835 Some(7_801_000_000_000)
1836 );
1837 assert_eq!(
1838 Time64NanosecondType::parse("2:10:01"),
1839 Some(7_801_000_000_000)
1840 );
1841 assert_eq!(
1842 Time64NanosecondType::parse("12:10:01 AM"),
1843 Some(601_000_000_000)
1844 );
1845 assert_eq!(
1846 Time64NanosecondType::parse("12:10:01 am"),
1847 Some(601_000_000_000)
1848 );
1849 assert_eq!(
1850 Time64NanosecondType::parse("2:10:01 PM"),
1851 Some(51_001_000_000_000)
1852 );
1853 assert_eq!(
1854 Time64NanosecondType::parse("2:10:01 pm"),
1855 Some(51_001_000_000_000)
1856 );
1857 assert_eq!(
1858 Time64NanosecondType::parse("02:10"),
1859 Some(7_800_000_000_000)
1860 );
1861 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1862 assert_eq!(
1863 Time64NanosecondType::parse("12:10 AM"),
1864 Some(600_000_000_000)
1865 );
1866 assert_eq!(
1867 Time64NanosecondType::parse("12:10 am"),
1868 Some(600_000_000_000)
1869 );
1870 assert_eq!(
1871 Time64NanosecondType::parse("2:10 PM"),
1872 Some(51_000_000_000_000)
1873 );
1874 assert_eq!(
1875 Time64NanosecondType::parse("2:10 pm"),
1876 Some(51_000_000_000_000)
1877 );
1878
1879 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1881
1882 assert_eq!(
1884 Time64NanosecondType::parse("23:59:60"),
1885 Some(86_400_000_000_000)
1886 );
1887
1888 assert_eq!(
1890 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1891 Some(7_801_123_456_700)
1892 );
1893 }
1894
1895 #[test]
1896 fn parse_time64_micros() {
1897 assert_eq!(
1899 Time64MicrosecondType::parse("02:10:01.1234"),
1900 Some(7_801_123_400)
1901 );
1902 assert_eq!(
1903 Time64MicrosecondType::parse("2:10:01.1234"),
1904 Some(7_801_123_400)
1905 );
1906 assert_eq!(
1907 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1908 Some(601_123_456)
1909 );
1910 assert_eq!(
1911 Time64MicrosecondType::parse("12:10:01.123456 am"),
1912 Some(601_123_456)
1913 );
1914 assert_eq!(
1915 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1916 Some(51_001_123_450)
1917 );
1918 assert_eq!(
1919 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1920 Some(51_001_123_450)
1921 );
1922 assert_eq!(
1923 Time64MicrosecondType::parse("02:10:01"),
1924 Some(7_801_000_000)
1925 );
1926 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1927 assert_eq!(
1928 Time64MicrosecondType::parse("12:10:01 AM"),
1929 Some(601_000_000)
1930 );
1931 assert_eq!(
1932 Time64MicrosecondType::parse("12:10:01 am"),
1933 Some(601_000_000)
1934 );
1935 assert_eq!(
1936 Time64MicrosecondType::parse("2:10:01 PM"),
1937 Some(51_001_000_000)
1938 );
1939 assert_eq!(
1940 Time64MicrosecondType::parse("2:10:01 pm"),
1941 Some(51_001_000_000)
1942 );
1943 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1944 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1945 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1946 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1947 assert_eq!(
1948 Time64MicrosecondType::parse("2:10 PM"),
1949 Some(51_000_000_000)
1950 );
1951 assert_eq!(
1952 Time64MicrosecondType::parse("2:10 pm"),
1953 Some(51_000_000_000)
1954 );
1955
1956 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1958
1959 assert_eq!(
1961 Time64MicrosecondType::parse("23:59:60"),
1962 Some(86_400_000_000)
1963 );
1964
1965 assert_eq!(
1967 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1968 Some(7_801_123_400)
1969 );
1970 }
1971
1972 #[test]
1973 fn parse_time32_millis() {
1974 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1976 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1977 assert_eq!(
1978 Time32MillisecondType::parse("12:10:01.123 AM"),
1979 Some(601_123)
1980 );
1981 assert_eq!(
1982 Time32MillisecondType::parse("12:10:01.123 am"),
1983 Some(601_123)
1984 );
1985 assert_eq!(
1986 Time32MillisecondType::parse("2:10:01.12 PM"),
1987 Some(51_001_120)
1988 );
1989 assert_eq!(
1990 Time32MillisecondType::parse("2:10:01.12 pm"),
1991 Some(51_001_120)
1992 );
1993 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1994 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1995 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1996 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1997 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1998 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1999 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2000 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2001 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2002 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2003 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2004 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2005
2006 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2008
2009 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2011
2012 assert_eq!(
2014 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2015 Some(7_801_100)
2016 );
2017 }
2018
2019 #[test]
2020 fn parse_time32_secs() {
2021 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2023 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2024 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2025 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2026 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2027 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2028 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2029 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2030 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2031 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2032 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2033 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2034 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2035
2036 assert_eq!(Time32SecondType::parse("1"), Some(1));
2038
2039 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2041
2042 assert_eq!(
2044 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2045 Some(7_801)
2046 );
2047 }
2048
2049 #[test]
2050 fn test_string_to_time_invalid() {
2051 let cases = [
2052 "25:00",
2053 "9:00:",
2054 "009:00",
2055 "09:0:00",
2056 "25:00:00",
2057 "13:00 AM",
2058 "13:00 PM",
2059 "12:00. AM",
2060 "09:0:00",
2061 "09:01:0",
2062 "09:01:1",
2063 "9:1:0",
2064 "09:01:0",
2065 "1:00.123",
2066 "1:00:00.123f",
2067 " 9:00:00",
2068 ":09:00",
2069 "T9:00:00",
2070 "AM",
2071 ];
2072 for case in cases {
2073 assert!(string_to_time(case).is_none(), "{case}");
2074 }
2075 }
2076
2077 #[test]
2078 fn test_string_to_time_chrono() {
2079 let cases = [
2080 ("1:00", "%H:%M"),
2081 ("12:00", "%H:%M"),
2082 ("13:00", "%H:%M"),
2083 ("24:00", "%H:%M"),
2084 ("1:00:00", "%H:%M:%S"),
2085 ("12:00:30", "%H:%M:%S"),
2086 ("13:00:59", "%H:%M:%S"),
2087 ("24:00:60", "%H:%M:%S"),
2088 ("09:00:00", "%H:%M:%S%.f"),
2089 ("0:00:30.123456", "%H:%M:%S%.f"),
2090 ("0:00 AM", "%I:%M %P"),
2091 ("1:00 AM", "%I:%M %P"),
2092 ("12:00 AM", "%I:%M %P"),
2093 ("13:00 AM", "%I:%M %P"),
2094 ("0:00 PM", "%I:%M %P"),
2095 ("1:00 PM", "%I:%M %P"),
2096 ("12:00 PM", "%I:%M %P"),
2097 ("13:00 PM", "%I:%M %P"),
2098 ("1:00 pM", "%I:%M %P"),
2099 ("1:00 Pm", "%I:%M %P"),
2100 ("1:00 aM", "%I:%M %P"),
2101 ("1:00 Am", "%I:%M %P"),
2102 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2103 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2104 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2105 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2106 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2107 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2108 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2109 ];
2110 for (s, format) in cases {
2111 let chrono = NaiveTime::parse_from_str(s, format).ok();
2112 let custom = string_to_time(s);
2113 assert_eq!(chrono, custom, "{s}");
2114 }
2115 }
2116
2117 #[test]
2118 fn test_parse_interval() {
2119 let config = IntervalParseConfig::new(IntervalUnit::Month);
2120
2121 assert_eq!(
2122 Interval::new(1i32, 0i32, 0i64),
2123 Interval::parse("1 month", &config).unwrap(),
2124 );
2125
2126 assert_eq!(
2127 Interval::new(2i32, 0i32, 0i64),
2128 Interval::parse("2 month", &config).unwrap(),
2129 );
2130
2131 assert_eq!(
2132 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2133 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2134 );
2135
2136 assert_eq!(
2137 Interval::new(0i32, 15i32, 0),
2138 Interval::parse("0.5 months", &config).unwrap(),
2139 );
2140
2141 assert_eq!(
2142 Interval::new(0i32, 15i32, 0),
2143 Interval::parse(".5 months", &config).unwrap(),
2144 );
2145
2146 assert_eq!(
2147 Interval::new(0i32, -15i32, 0),
2148 Interval::parse("-0.5 months", &config).unwrap(),
2149 );
2150
2151 assert_eq!(
2152 Interval::new(0i32, -15i32, 0),
2153 Interval::parse("-.5 months", &config).unwrap(),
2154 );
2155
2156 assert_eq!(
2157 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2158 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2159 );
2160
2161 assert_eq!(
2162 Interval::parse("1 centurys 1 month", &config)
2163 .unwrap_err()
2164 .to_string(),
2165 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2166 );
2167
2168 assert_eq!(
2169 Interval::new(37i32, 0i32, 0i64),
2170 Interval::parse("3 year 1 month", &config).unwrap(),
2171 );
2172
2173 assert_eq!(
2174 Interval::new(35i32, 0i32, 0i64),
2175 Interval::parse("3 year -1 month", &config).unwrap(),
2176 );
2177
2178 assert_eq!(
2179 Interval::new(-37i32, 0i32, 0i64),
2180 Interval::parse("-3 year -1 month", &config).unwrap(),
2181 );
2182
2183 assert_eq!(
2184 Interval::new(-35i32, 0i32, 0i64),
2185 Interval::parse("-3 year 1 month", &config).unwrap(),
2186 );
2187
2188 assert_eq!(
2189 Interval::new(0i32, 5i32, 0i64),
2190 Interval::parse("5 days", &config).unwrap(),
2191 );
2192
2193 assert_eq!(
2194 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2195 Interval::parse("7 days 3 hours", &config).unwrap(),
2196 );
2197
2198 assert_eq!(
2199 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2200 Interval::parse("7 days 5 minutes", &config).unwrap(),
2201 );
2202
2203 assert_eq!(
2204 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2205 Interval::parse("7 days -5 minutes", &config).unwrap(),
2206 );
2207
2208 assert_eq!(
2209 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2210 Interval::parse("-7 days 5 hours", &config).unwrap(),
2211 );
2212
2213 assert_eq!(
2214 Interval::new(
2215 0i32,
2216 -7i32,
2217 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2218 ),
2219 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2220 );
2221
2222 assert_eq!(
2223 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2224 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2225 );
2226
2227 assert_eq!(
2228 Interval::new(
2229 12i32,
2230 1i32,
2231 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2232 ),
2233 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2234 );
2235
2236 assert_eq!(
2237 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2238 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2239 );
2240
2241 assert_eq!(
2242 Interval::new(12i32, 1i32, 1000i64),
2243 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2244 );
2245
2246 assert_eq!(
2247 Interval::new(12i32, 1i32, 1i64),
2248 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2249 );
2250
2251 assert_eq!(
2252 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2253 Interval::parse("1 month -1 second", &config).unwrap(),
2254 );
2255
2256 assert_eq!(
2257 Interval::new(
2258 -13i32,
2259 -8i32,
2260 -NANOS_PER_HOUR
2261 - NANOS_PER_MINUTE
2262 - NANOS_PER_SECOND
2263 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2264 ),
2265 Interval::parse(
2266 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2267 &config
2268 )
2269 .unwrap(),
2270 );
2271
2272 assert_eq!(
2274 Interval::new(1, 0, 0),
2275 Interval::parse("1", &config).unwrap()
2276 );
2277 assert_eq!(
2278 Interval::new(42, 0, 0),
2279 Interval::parse("42", &config).unwrap()
2280 );
2281 assert_eq!(
2282 Interval::new(0, 0, 42_000_000_000),
2283 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2284 );
2285
2286 assert_eq!(
2288 Interval::new(1, 0, 0),
2289 Interval::parse("1 mon", &config).unwrap()
2290 );
2291 assert_eq!(
2292 Interval::new(1, 0, 0),
2293 Interval::parse("1 mons", &config).unwrap()
2294 );
2295 assert_eq!(
2296 Interval::new(0, 0, 1_000_000),
2297 Interval::parse("1 ms", &config).unwrap()
2298 );
2299 assert_eq!(
2300 Interval::new(0, 0, 1_000),
2301 Interval::parse("1 us", &config).unwrap()
2302 );
2303
2304 assert_eq!(
2306 Interval::new(0, 0, 1_000),
2307 Interval::parse("1us", &config).unwrap()
2308 );
2309 assert_eq!(
2310 Interval::new(0, 0, NANOS_PER_SECOND),
2311 Interval::parse("1s", &config).unwrap()
2312 );
2313 assert_eq!(
2314 Interval::new(1, 2, 10_864_000_000_000),
2315 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2316 );
2317
2318 assert_eq!(
2319 Interval::new(
2320 -13i32,
2321 -8i32,
2322 -NANOS_PER_HOUR
2323 - NANOS_PER_MINUTE
2324 - NANOS_PER_SECOND
2325 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2326 ),
2327 Interval::parse(
2328 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2329 &config
2330 )
2331 .unwrap(),
2332 );
2333
2334 assert_eq!(
2335 Interval::parse("1h s", &config).unwrap_err().to_string(),
2336 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2337 );
2338
2339 assert_eq!(
2340 Interval::parse("1XX", &config).unwrap_err().to_string(),
2341 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2342 );
2343 }
2344
2345 #[test]
2346 fn test_duplicate_interval_type() {
2347 let config = IntervalParseConfig::new(IntervalUnit::Month);
2348
2349 let err = Interval::parse("1 month 1 second 1 second", &config)
2350 .expect_err("parsing interval should have failed");
2351 assert_eq!(
2352 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2353 format!("{err:?}")
2354 );
2355
2356 let err = Interval::parse("1 century 2 centuries", &config)
2358 .expect_err("parsing interval should have failed");
2359 assert_eq!(
2360 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2361 format!("{err:?}")
2362 );
2363 }
2364
2365 #[test]
2366 fn test_interval_amount_parsing() {
2367 let result = IntervalAmount::from_str("123").unwrap();
2369 let expected = IntervalAmount::new(123, 0);
2370
2371 assert_eq!(result, expected);
2372
2373 let result = IntervalAmount::from_str("0.3").unwrap();
2375 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2376
2377 assert_eq!(result, expected);
2378
2379 let result = IntervalAmount::from_str("-3.5").unwrap();
2381 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2382
2383 assert_eq!(result, expected);
2384
2385 let result = IntervalAmount::from_str("3.");
2387 assert!(result.is_err());
2388
2389 let result = IntervalAmount::from_str("3.-5");
2391 assert!(result.is_err());
2392 }
2393
2394 #[test]
2395 fn test_interval_precision() {
2396 let config = IntervalParseConfig::new(IntervalUnit::Month);
2397
2398 let result = Interval::parse("100000.1 days", &config).unwrap();
2399 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2400
2401 assert_eq!(result, expected);
2402 }
2403
2404 #[test]
2405 fn test_interval_addition() {
2406 let start = Interval::new(1, 2, 3);
2408 let expected = Interval::new(4921, 2, 3);
2409
2410 let result = start
2411 .add(
2412 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2413 IntervalUnit::Century,
2414 )
2415 .unwrap();
2416
2417 assert_eq!(result, expected);
2418
2419 let start = Interval::new(1, 2, 3);
2421 let expected = Interval::new(1231, 2, 3);
2422
2423 let result = start
2424 .add(
2425 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2426 IntervalUnit::Decade,
2427 )
2428 .unwrap();
2429
2430 assert_eq!(result, expected);
2431
2432 let start = Interval::new(1, 2, 3);
2434 let expected = Interval::new(364, 2, 3);
2435
2436 let result = start
2437 .add(
2438 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2439 IntervalUnit::Year,
2440 )
2441 .unwrap();
2442
2443 assert_eq!(result, expected);
2444
2445 let start = Interval::new(1, 2, 3);
2447 let expected = Interval::new(2, 17, 3);
2448
2449 let result = start
2450 .add(
2451 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2452 IntervalUnit::Month,
2453 )
2454 .unwrap();
2455
2456 assert_eq!(result, expected);
2457
2458 let start = Interval::new(1, 25, 3);
2460 let expected = Interval::new(1, 11, 3);
2461
2462 let result = start
2463 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2464 .unwrap();
2465
2466 assert_eq!(result, expected);
2467
2468 let start = Interval::new(12, 15, 3);
2470 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2471
2472 let result = start
2473 .add(
2474 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2475 IntervalUnit::Day,
2476 )
2477 .unwrap();
2478
2479 assert_eq!(result, expected);
2480
2481 let start = Interval::new(1, 2, 3);
2483 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2484
2485 let result = start
2486 .add(
2487 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2488 IntervalUnit::Hour,
2489 )
2490 .unwrap();
2491
2492 assert_eq!(result, expected);
2493
2494 let start = Interval::new(0, 0, -3);
2496 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2497
2498 let result = start
2499 .add(
2500 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2501 IntervalUnit::Minute,
2502 )
2503 .unwrap();
2504
2505 assert_eq!(result, expected);
2506 }
2507
2508 #[test]
2509 fn string_to_timestamp_old() {
2510 parse_timestamp("1677-06-14T07:29:01.256")
2511 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2512 .unwrap_err();
2513 }
2514
2515 #[test]
2516 fn test_parse_decimal_with_parameter() {
2517 let tests = [
2518 ("0", 0i128),
2519 ("123.123", 123123i128),
2520 ("123.1234", 123123i128),
2521 ("123.1", 123100i128),
2522 ("123", 123000i128),
2523 ("-123.123", -123123i128),
2524 ("-123.1234", -123123i128),
2525 ("-123.1", -123100i128),
2526 ("-123", -123000i128),
2527 ("0.0000123", 0i128),
2528 ("12.", 12000i128),
2529 ("-12.", -12000i128),
2530 ("00.1", 100i128),
2531 ("-00.1", -100i128),
2532 ("12345678912345678.1234", 12345678912345678123i128),
2533 ("-12345678912345678.1234", -12345678912345678123i128),
2534 ("99999999999999999.999", 99999999999999999999i128),
2535 ("-99999999999999999.999", -99999999999999999999i128),
2536 (".123", 123i128),
2537 ("-.123", -123i128),
2538 ("123.", 123000i128),
2539 ("-123.", -123000i128),
2540 ];
2541 for (s, i) in tests {
2542 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2543 assert_eq!(i, result_128.unwrap());
2544 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2545 assert_eq!(i256::from_i128(i), result_256.unwrap());
2546 }
2547
2548 let e_notation_tests = [
2549 ("1.23e3", "1230.0", 2),
2550 ("5.6714e+2", "567.14", 4),
2551 ("5.6714e-2", "0.056714", 4),
2552 ("5.6714e-2", "0.056714", 3),
2553 ("5.6741214125e2", "567.41214125", 4),
2554 ("8.91E4", "89100.0", 2),
2555 ("3.14E+5", "314000.0", 2),
2556 ("2.718e0", "2.718", 2),
2557 ("9.999999e-1", "0.9999999", 4),
2558 ("1.23e+3", "1230", 2),
2559 ("1.234559e+3", "1234.559", 2),
2560 ("1.00E-10", "0.0000000001", 11),
2561 ("1.23e-4", "0.000123", 2),
2562 ("9.876e7", "98760000.0", 2),
2563 ("5.432E+8", "543200000.0", 10),
2564 ("1.234567e9", "1234567000.0", 2),
2565 ("1.234567e2", "123.45670000", 2),
2566 ("4749.3e-5", "0.047493", 10),
2567 ("4749.3e+5", "474930000", 10),
2568 ("4749.3e-5", "0.047493", 1),
2569 ("4749.3e+5", "474930000", 1),
2570 ("0E-8", "0", 10),
2571 ("0E+6", "0", 10),
2572 ("1E-8", "0.00000001", 10),
2573 ("12E+6", "12000000", 10),
2574 ("12E-6", "0.000012", 10),
2575 ("0.1e-6", "0.0000001", 10),
2576 ("0.1e+6", "100000", 10),
2577 ("0.12e-6", "0.00000012", 10),
2578 ("0.12e+6", "120000", 10),
2579 ("000000000001e0", "000000000001", 3),
2580 ("000001.1034567002e0", "000001.1034567002", 3),
2581 ("1.234e16", "12340000000000000", 0),
2582 ("123.4e16", "1234000000000000000", 0),
2583 ];
2584 for (e, d, scale) in e_notation_tests {
2585 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2586 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2587 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2588 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2589 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2590 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2591 }
2592 let can_not_parse_tests = [
2593 "123,123",
2594 ".",
2595 "123.123.123",
2596 "",
2597 "+",
2598 "-",
2599 "e",
2600 "1.3e+e3",
2601 "5.6714ee-2",
2602 "4.11ee-+4",
2603 "4.11e++4",
2604 "1.1e.12",
2605 "1.23e+3.",
2606 "1.23e+3.1",
2607 ];
2608 for s in can_not_parse_tests {
2609 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2610 assert_eq!(
2611 format!("Parser error: can't parse the string value {s} to decimal"),
2612 result_128.unwrap_err().to_string()
2613 );
2614 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2615 assert_eq!(
2616 format!("Parser error: can't parse the string value {s} to decimal"),
2617 result_256.unwrap_err().to_string()
2618 );
2619 }
2620 let overflow_parse_tests = [
2621 ("12345678", 3),
2622 ("1.2345678e7", 3),
2623 ("12345678.9", 3),
2624 ("1.23456789e+7", 3),
2625 ("99999999.99", 3),
2626 ("9.999999999e7", 3),
2627 ("12345678908765.123456", 3),
2628 ("123456789087651234.56e-4", 3),
2629 ("1234560000000", 0),
2630 ("1.23456e12", 0),
2631 ];
2632 for (s, scale) in overflow_parse_tests {
2633 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2634 let expected_128 = "Parser error: parse decimal overflow";
2635 let actual_128 = result_128.unwrap_err().to_string();
2636
2637 assert!(
2638 actual_128.contains(expected_128),
2639 "actual: '{actual_128}', expected: '{expected_128}'"
2640 );
2641
2642 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2643 let expected_256 = "Parser error: parse decimal overflow";
2644 let actual_256 = result_256.unwrap_err().to_string();
2645
2646 assert!(
2647 actual_256.contains(expected_256),
2648 "actual: '{actual_256}', expected: '{expected_256}'"
2649 );
2650 }
2651
2652 let edge_tests_128 = [
2653 (
2654 "99999999999999999999999999999999999999",
2655 99999999999999999999999999999999999999i128,
2656 0,
2657 ),
2658 (
2659 "999999999999999999999999999999999999.99",
2660 99999999999999999999999999999999999999i128,
2661 2,
2662 ),
2663 (
2664 "9999999999999999999999999.9999999999999",
2665 99999999999999999999999999999999999999i128,
2666 13,
2667 ),
2668 (
2669 "9999999999999999999999999",
2670 99999999999999999999999990000000000000i128,
2671 13,
2672 ),
2673 (
2674 "0.99999999999999999999999999999999999999",
2675 99999999999999999999999999999999999999i128,
2676 38,
2677 ),
2678 (
2679 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2680 0i128,
2681 15,
2682 ),
2683 (
2684 "1.016744e-320",
2685 0i128,
2686 15,
2687 ),
2688 (
2689 "-1e3",
2690 -1000000000i128,
2691 6,
2692 ),
2693 (
2694 "+1e3",
2695 1000000000i128,
2696 6,
2697 ),
2698 (
2699 "-1e31",
2700 -10000000000000000000000000000000000000i128,
2701 6,
2702 ),
2703 ];
2704 for (s, i, scale) in edge_tests_128 {
2705 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2706 assert_eq!(i, result_128.unwrap());
2707 }
2708 let edge_tests_256 = [
2709 (
2710 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2711 i256::from_string(
2712 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2713 )
2714 .unwrap(),
2715 0,
2716 ),
2717 (
2718 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2719 i256::from_string(
2720 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2721 )
2722 .unwrap(),
2723 4,
2724 ),
2725 (
2726 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2727 i256::from_string(
2728 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2729 )
2730 .unwrap(),
2731 26,
2732 ),
2733 (
2734 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2735 i256::from_string(
2736 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2737 )
2738 .unwrap(),
2739 26,
2740 ),
2741 (
2742 "99999999999999999999999999999999999999999999999999",
2743 i256::from_string(
2744 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2745 )
2746 .unwrap(),
2747 26,
2748 ),
2749 (
2750 "9.9999999999999999999999999999999999999999999999999e+49",
2751 i256::from_string(
2752 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2753 )
2754 .unwrap(),
2755 26,
2756 ),
2757 ];
2758 for (s, i, scale) in edge_tests_256 {
2759 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2760 assert_eq!(i, result.unwrap());
2761 }
2762 }
2763
2764 #[test]
2765 fn test_parse_empty() {
2766 assert_eq!(Int32Type::parse(""), None);
2767 assert_eq!(Int64Type::parse(""), None);
2768 assert_eq!(UInt32Type::parse(""), None);
2769 assert_eq!(UInt64Type::parse(""), None);
2770 assert_eq!(Float32Type::parse(""), None);
2771 assert_eq!(Float64Type::parse(""), None);
2772 assert_eq!(Int32Type::parse("+"), None);
2773 assert_eq!(Int64Type::parse("+"), None);
2774 assert_eq!(UInt32Type::parse("+"), None);
2775 assert_eq!(UInt64Type::parse("+"), None);
2776 assert_eq!(Float32Type::parse("+"), None);
2777 assert_eq!(Float64Type::parse("+"), None);
2778 assert_eq!(TimestampNanosecondType::parse(""), None);
2779 assert_eq!(Date32Type::parse(""), None);
2780 }
2781
2782 #[test]
2783 fn test_parse_interval_month_day_nano_config() {
2784 let interval = parse_interval_month_day_nano_config(
2785 "1",
2786 IntervalParseConfig::new(IntervalUnit::Second),
2787 )
2788 .unwrap();
2789 assert_eq!(interval.months, 0);
2790 assert_eq!(interval.days, 0);
2791 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2792 }
2793}