1use arrow_array::ArrowNativeTypeOp;
22use arrow_array::timezone::Tz;
23use arrow_array::types::*;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466macro_rules! parser_primitive {
467 ($t:ty) => {
468 impl Parser for $t {
469 fn parse(string: &str) -> Option<Self::Native> {
470 if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471 return None;
472 }
473 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474 string.as_bytes(),
475 ) {
476 (Some(n), x) if x == string.len() => Some(n),
477 _ => None,
478 }
479 }
480 }
481 };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497 fn parse(string: &str) -> Option<i64> {
498 string_to_timestamp_nanos(string).ok()
499 }
500}
501
502impl Parser for TimestampMicrosecondType {
503 fn parse(string: &str) -> Option<i64> {
504 let nanos = string_to_timestamp_nanos(string).ok();
505 nanos.map(|x| x / 1000)
506 }
507}
508
509impl Parser for TimestampMillisecondType {
510 fn parse(string: &str) -> Option<i64> {
511 let nanos = string_to_timestamp_nanos(string).ok();
512 nanos.map(|x| x / 1_000_000)
513 }
514}
515
516impl Parser for TimestampSecondType {
517 fn parse(string: &str) -> Option<i64> {
518 let nanos = string_to_timestamp_nanos(string).ok();
519 nanos.map(|x| x / 1_000_000_000)
520 }
521}
522
523impl Parser for Time64NanosecondType {
524 fn parse(string: &str) -> Option<Self::Native> {
526 string_to_time_nanoseconds(string)
527 .ok()
528 .or_else(|| string.parse::<Self::Native>().ok())
529 }
530
531 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532 let nt = NaiveTime::parse_from_str(string, format).ok()?;
533 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534 }
535}
536
537impl Parser for Time64MicrosecondType {
538 fn parse(string: &str) -> Option<Self::Native> {
540 string_to_time_nanoseconds(string)
541 .ok()
542 .map(|nanos| nanos / 1_000)
543 .or_else(|| string.parse::<Self::Native>().ok())
544 }
545
546 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547 let nt = NaiveTime::parse_from_str(string, format).ok()?;
548 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549 }
550}
551
552impl Parser for Time32MillisecondType {
553 fn parse(string: &str) -> Option<Self::Native> {
555 string_to_time_nanoseconds(string)
556 .ok()
557 .map(|nanos| (nanos / 1_000_000) as i32)
558 .or_else(|| string.parse::<Self::Native>().ok())
559 }
560
561 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562 let nt = NaiveTime::parse_from_str(string, format).ok()?;
563 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564 }
565}
566
567impl Parser for Time32SecondType {
568 fn parse(string: &str) -> Option<Self::Native> {
570 string_to_time_nanoseconds(string)
571 .ok()
572 .map(|nanos| (nanos / 1_000_000_000) as i32)
573 .or_else(|| string.parse::<Self::Native>().ok())
574 }
575
576 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577 let nt = NaiveTime::parse_from_str(string, format).ok()?;
578 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579 }
580}
581
582const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_extended_ymd(string: &str) -> Option<(i32, u32, u32)> {
595 debug_assert!(string.starts_with('+') || string.starts_with('-'));
596 let rest = &string[1..];
599 let hyphen = rest.find('-')?;
600 if hyphen < 4 {
601 return None;
602 }
603 let year: i32 = string[..hyphen + 1].parse().ok()?;
606 let remainder = string[hyphen + 1..].strip_prefix('-')?;
608 let mut parts = remainder.splitn(2, '-');
609 let month: u32 = parts.next()?.parse().ok()?;
610 let day: u32 = parts.next()?.parse().ok()?;
611 Some((year, month, day))
612}
613
614fn parse_date(string: &str) -> Option<NaiveDate> {
615 if string.starts_with('+') || string.starts_with('-') {
623 let (year, month, day) = parse_extended_ymd(string)?;
624 return NaiveDate::from_ymd_opt(year, month, day);
625 }
626
627 if string.len() > 10 {
628 return string_to_datetime(&Utc, string)
630 .map(|dt| dt.date_naive())
631 .ok();
632 };
633 let mut digits = [0; 10];
634 let mut mask = 0;
635
636 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
638 *o = i.wrapping_sub(b'0');
639 mask |= ((*o < 10) as u16) << idx
640 }
641
642 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
643
644 if digits[4] != HYPHEN {
646 let (year, month, day) = match (mask, string.len()) {
647 (0b11111111, 8) => (
648 digits[0] as u16 * 1000
649 + digits[1] as u16 * 100
650 + digits[2] as u16 * 10
651 + digits[3] as u16,
652 digits[4] * 10 + digits[5],
653 digits[6] * 10 + digits[7],
654 ),
655 _ => return None,
656 };
657 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
658 }
659
660 let (month, day) = match mask {
661 0b1101101111 => {
662 if digits[7] != HYPHEN {
663 return None;
664 }
665 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
666 }
667 0b101101111 => {
668 if digits[7] != HYPHEN {
669 return None;
670 }
671 (digits[5] * 10 + digits[6], digits[8])
672 }
673 0b110101111 => {
674 if digits[6] != HYPHEN {
675 return None;
676 }
677 (digits[5], digits[7] * 10 + digits[8])
678 }
679 0b10101111 => {
680 if digits[6] != HYPHEN {
681 return None;
682 }
683 (digits[5], digits[7])
684 }
685 _ => return None,
686 };
687
688 let year =
689 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
690
691 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
692}
693
694fn parse_date_to_days(string: &str) -> Option<i32> {
703 if string.starts_with('+') || string.starts_with('-') {
704 let (year, month, day) = parse_extended_ymd(string)?;
705 let y = year as i64;
706 let era = y.div_euclid(400);
707 let yoe = y.rem_euclid(400) as i32;
708 let nd = NaiveDate::from_ymd_opt(yoe, month, day)?;
709 let in_era = (nd.num_days_from_ce() - EPOCH_DAYS_FROM_CE) as i64;
710 return i32::try_from(era * 146_097 + in_era).ok();
711 }
712 parse_date(string).map(|nd| nd.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
713}
714
715impl Parser for Date32Type {
716 fn parse(string: &str) -> Option<i32> {
717 parse_date_to_days(string)
718 }
719
720 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
721 let date = NaiveDate::parse_from_str(string, format).ok()?;
722 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
723 }
724}
725
726impl Parser for Date64Type {
727 fn parse(string: &str) -> Option<i64> {
728 if string.len() <= 10 {
729 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
730 Some(datetime.and_utc().timestamp_millis())
731 } else {
732 let date_time = string_to_datetime(&Utc, string).ok()?;
733 Some(date_time.timestamp_millis())
734 }
735 }
736
737 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
738 use chrono::format::Fixed;
739 use chrono::format::StrftimeItems;
740 let fmt = StrftimeItems::new(format);
741 let has_zone = fmt.into_iter().any(|item| match item {
742 chrono::format::Item::Fixed(fixed_item) => matches!(
743 fixed_item,
744 Fixed::RFC2822
745 | Fixed::RFC3339
746 | Fixed::TimezoneName
747 | Fixed::TimezoneOffsetColon
748 | Fixed::TimezoneOffsetColonZ
749 | Fixed::TimezoneOffset
750 | Fixed::TimezoneOffsetZ
751 ),
752 _ => false,
753 });
754 if has_zone {
755 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
756 Some(date_time.timestamp_millis())
757 } else {
758 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
759 Some(date_time.and_utc().timestamp_millis())
760 }
761 }
762}
763
764fn parse_e_notation<T: DecimalType>(
765 s: &str,
766 mut digits: u16,
767 mut fractionals: i16,
768 mut result: T::Native,
769 index: usize,
770 precision: u16,
771 scale: i16,
772) -> Result<T::Native, ArrowError> {
773 let mut exp: i16 = 0;
774 let base = T::Native::usize_as(10);
775
776 let mut pos_shift_direction: bool = true;
778
779 let mut bs = s.as_bytes().iter().skip(index + fractionals as usize);
781
782 for b in bs.by_ref() {
788 match b {
789 b'0'..=b'9' => {
790 result = result.mul_wrapping(base);
791 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
792 fractionals += 1;
793 digits += 1;
794 }
795 b'e' | b'E' => {
796 break;
797 }
798 _ => {
799 return Err(ArrowError::ParseError(format!(
800 "can't parse the string value {s} to decimal"
801 )));
802 }
803 };
804 }
805
806 let mut signed = false;
808 for b in bs {
809 match b {
810 b'-' if !signed => {
811 pos_shift_direction = false;
812 signed = true;
813 }
814 b'+' if !signed => {
815 pos_shift_direction = true;
816 signed = true;
817 }
818 b if b.is_ascii_digit() => {
819 exp *= 10;
820 exp += (b - b'0') as i16;
821 }
822 _ => {
823 return Err(ArrowError::ParseError(format!(
824 "can't parse the string value {s} to decimal"
825 )));
826 }
827 }
828 }
829
830 if digits == 0 && fractionals == 0 && exp == 0 {
831 return Err(ArrowError::ParseError(format!(
832 "can't parse the string value {s} to decimal"
833 )));
834 }
835
836 if !pos_shift_direction {
837 if exp - (digits as i16 + scale) > 0 {
840 return Ok(T::Native::usize_as(0));
841 }
842 exp *= -1;
843 }
844
845 exp = fractionals - exp;
847 if !pos_shift_direction && exp > digits as i16 {
849 digits = exp as u16;
850 }
851 exp = scale - exp;
853
854 if (digits as i16 + exp) as u16 > precision {
855 return Err(ArrowError::ParseError(format!(
856 "parse decimal overflow ({s})"
857 )));
858 }
859
860 if exp < 0 {
861 result = result.div_wrapping(base.pow_wrapping(-exp as _));
862 } else {
863 result = result.mul_wrapping(base.pow_wrapping(exp as _));
864 }
865
866 Ok(result)
867}
868
869pub fn parse_decimal<T: DecimalType>(
876 s: &str,
877 precision: u8,
878 scale: i8,
879) -> Result<T::Native, ArrowError> {
880 let mut result = T::Native::usize_as(0);
881 let mut fractionals: i8 = 0;
882 let mut digits: u8 = 0;
883 let base = T::Native::usize_as(10);
884
885 let bs = s.as_bytes();
886
887 if !bs
888 .last()
889 .is_some_and(|b| b.is_ascii_digit() || (b == &b'.' && s.len() > 1))
890 {
891 return Err(ArrowError::ParseError(format!(
894 "can't parse the string value {s} to decimal"
895 )));
896 }
897
898 let (signed, negative) = match bs.first() {
899 Some(b'-') => (true, true),
900 Some(b'+') => (true, false),
901 _ => (false, false),
902 };
903
904 let mut bs = bs.iter().enumerate().skip(signed as usize);
906
907 let mut is_e_notation = false;
908
909 while let Some((index, b)) = bs.next() {
912 match b {
913 b'0'..=b'9' => {
914 if digits == 0 && *b == b'0' {
915 continue;
917 }
918 digits += 1;
919 result = result.mul_wrapping(base);
920 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
921 }
922 b'.' => {
923 let point_index = index;
924
925 for (_, b) in bs.by_ref() {
926 if !b.is_ascii_digit() {
927 if *b == b'e' || *b == b'E' {
928 result = parse_e_notation::<T>(
929 s,
930 digits as u16,
931 fractionals as i16,
932 result,
933 point_index + 1,
934 precision as u16,
935 scale as i16,
936 )?;
937
938 is_e_notation = true;
939
940 break;
941 }
942 return Err(ArrowError::ParseError(format!(
943 "can't parse the string value {s} to decimal"
944 )));
945 }
946 if fractionals == scale {
947 continue;
951 }
952 fractionals += 1;
953 digits += 1;
954 result = result.mul_wrapping(base);
955 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
956 }
957
958 if is_e_notation {
959 break;
960 }
961 }
962 b'e' | b'E' => {
963 result = parse_e_notation::<T>(
964 s,
965 digits as u16,
966 fractionals as i16,
967 result,
968 index,
969 precision as u16,
970 scale as i16,
971 )?;
972
973 is_e_notation = true;
974
975 break;
976 }
977 _ => {
978 return Err(ArrowError::ParseError(format!(
979 "can't parse the string value {s} to decimal"
980 )));
981 }
982 }
983 }
984
985 if !is_e_notation {
986 if fractionals < scale {
987 let exp = scale - fractionals;
988 if exp as u8 + digits > precision {
989 return Err(ArrowError::ParseError(format!(
990 "parse decimal overflow ({s})"
991 )));
992 }
993 let mul = base.pow_wrapping(exp as _);
994 result = result.mul_wrapping(mul);
995 } else if digits > precision {
996 return Err(ArrowError::ParseError(format!(
997 "parse decimal overflow ({s})"
998 )));
999 }
1000 }
1001
1002 Ok(if negative {
1003 result.neg_wrapping()
1004 } else {
1005 result
1006 })
1007}
1008
1009pub fn parse_interval_year_month(
1011 value: &str,
1012) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
1013 let config = IntervalParseConfig::new(IntervalUnit::Year);
1014 let interval = Interval::parse(value, &config)?;
1015
1016 let months = interval.to_year_months().map_err(|_| {
1017 ArrowError::CastError(format!(
1018 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
1019 ))
1020 })?;
1021
1022 Ok(IntervalYearMonthType::make_value(0, months))
1023}
1024
1025pub fn parse_interval_day_time(
1027 value: &str,
1028) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1029 let config = IntervalParseConfig::new(IntervalUnit::Day);
1030 let interval = Interval::parse(value, &config)?;
1031
1032 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1033 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1034 )))?;
1035
1036 Ok(IntervalDayTimeType::make_value(days, millis))
1037}
1038
1039pub fn parse_interval_month_day_nano_config(
1041 value: &str,
1042 config: IntervalParseConfig,
1043) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1044 let interval = Interval::parse(value, &config)?;
1045
1046 let (months, days, nanos) = interval.to_month_day_nanos();
1047
1048 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1049}
1050
1051pub fn parse_interval_month_day_nano(
1053 value: &str,
1054) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1055 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1056}
1057
1058const NANOS_PER_MILLIS: i64 = 1_000_000;
1059const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1060const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1061const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1062#[cfg(test)]
1063const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1064
1065#[derive(Debug, Clone)]
1069pub struct IntervalParseConfig {
1070 default_unit: IntervalUnit,
1073}
1074
1075impl IntervalParseConfig {
1076 pub fn new(default_unit: IntervalUnit) -> Self {
1078 Self { default_unit }
1079 }
1080}
1081
1082#[rustfmt::skip]
1083#[derive(Debug, Clone, Copy)]
1084#[repr(u16)]
1085pub enum IntervalUnit {
1088 Century = 0b_0000_0000_0001,
1090 Decade = 0b_0000_0000_0010,
1092 Year = 0b_0000_0000_0100,
1094 Month = 0b_0000_0000_1000,
1096 Week = 0b_0000_0001_0000,
1098 Day = 0b_0000_0010_0000,
1100 Hour = 0b_0000_0100_0000,
1102 Minute = 0b_0000_1000_0000,
1104 Second = 0b_0001_0000_0000,
1106 Millisecond = 0b_0010_0000_0000,
1108 Microsecond = 0b_0100_0000_0000,
1110 Nanosecond = 0b_1000_0000_0000,
1112}
1113
1114impl FromStr for IntervalUnit {
1119 type Err = ArrowError;
1120
1121 fn from_str(s: &str) -> Result<Self, ArrowError> {
1122 match s.to_lowercase().as_str() {
1123 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1124 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1125 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1126 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1127 "w" | "week" | "weeks" => Ok(Self::Week),
1128 "d" | "day" | "days" => Ok(Self::Day),
1129 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1130 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1131 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1132 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1133 Ok(Self::Millisecond)
1134 }
1135 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1136 Ok(Self::Microsecond)
1137 }
1138 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1139 _ => Err(ArrowError::InvalidArgumentError(format!(
1140 "Unknown interval type: {s}"
1141 ))),
1142 }
1143 }
1144}
1145
1146impl IntervalUnit {
1147 fn from_str_or_config(
1148 s: Option<&str>,
1149 config: &IntervalParseConfig,
1150 ) -> Result<Self, ArrowError> {
1151 match s {
1152 Some(s) => s.parse(),
1153 None => Ok(config.default_unit),
1154 }
1155 }
1156}
1157
1158pub type MonthDayNano = (i32, i32, i64);
1160
1161const INTERVAL_PRECISION: u32 = 15;
1163
1164#[derive(Clone, Copy, Debug, PartialEq)]
1165struct IntervalAmount {
1166 integer: i64,
1168 frac: i64,
1170}
1171
1172#[cfg(test)]
1173impl IntervalAmount {
1174 fn new(integer: i64, frac: i64) -> Self {
1175 Self { integer, frac }
1176 }
1177}
1178
1179impl FromStr for IntervalAmount {
1180 type Err = ArrowError;
1181
1182 fn from_str(s: &str) -> Result<Self, Self::Err> {
1183 match s.split_once('.') {
1184 Some((integer, frac))
1185 if frac.len() <= INTERVAL_PRECISION as usize
1186 && !frac.is_empty()
1187 && !frac.starts_with('-') =>
1188 {
1189 let explicit_neg = integer.starts_with('-');
1192 let integer = if integer.is_empty() || integer == "-" {
1193 Ok(0)
1194 } else {
1195 integer.parse::<i64>().map_err(|_| {
1196 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1197 })
1198 }?;
1199
1200 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1201 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1202 })?;
1203
1204 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1206
1207 let frac = if integer < 0 || explicit_neg {
1209 -frac
1210 } else {
1211 frac
1212 };
1213
1214 let result = Self { integer, frac };
1215
1216 Ok(result)
1217 }
1218 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1219 "Failed to parse {s} as interval amount"
1220 ))),
1221 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1222 Err(ArrowError::ParseError(format!(
1223 "{s} exceeds the precision available for interval amount"
1224 )))
1225 }
1226 Some(_) | None => {
1227 let integer = s.parse::<i64>().map_err(|_| {
1228 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1229 })?;
1230
1231 let result = Self { integer, frac: 0 };
1232 Ok(result)
1233 }
1234 }
1235 }
1236}
1237
1238#[derive(Debug, Default, PartialEq)]
1239struct Interval {
1240 months: i32,
1241 days: i32,
1242 nanos: i64,
1243}
1244
1245impl Interval {
1246 fn new(months: i32, days: i32, nanos: i64) -> Self {
1247 Self {
1248 months,
1249 days,
1250 nanos,
1251 }
1252 }
1253
1254 fn to_year_months(&self) -> Result<i32, ArrowError> {
1255 match (self.months, self.days, self.nanos) {
1256 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1257 _ => Err(ArrowError::InvalidArgumentError(format!(
1258 "Unable to represent interval with days and nanos as year-months: {self:?}"
1259 ))),
1260 }
1261 }
1262
1263 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1264 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1265
1266 match self.nanos {
1267 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1268 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1269 ArrowError::InvalidArgumentError(format!(
1270 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1271 self.nanos
1272 ))
1273 })?;
1274
1275 Ok((days, millis))
1276 }
1277 nanos => Err(ArrowError::InvalidArgumentError(format!(
1278 "Unable to represent {nanos} as milliseconds"
1279 ))),
1280 }
1281 }
1282
1283 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1284 (self.months, self.days, self.nanos)
1285 }
1286
1287 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1290 let components = parse_interval_components(value, config)?;
1291
1292 components
1293 .into_iter()
1294 .try_fold(Self::default(), |result, (amount, unit)| {
1295 result.add(amount, unit)
1296 })
1297 }
1298
1299 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1306 let result = match unit {
1307 IntervalUnit::Century => {
1308 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1309 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1310 let months = months_int
1311 .add_checked(month_frac)?
1312 .try_into()
1313 .map_err(|_| {
1314 ArrowError::ParseError(format!(
1315 "Unable to represent {} centuries as months in a signed 32-bit integer",
1316 &amount.integer
1317 ))
1318 })?;
1319
1320 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1321 }
1322 IntervalUnit::Decade => {
1323 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1324
1325 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1326 let months = months_int
1327 .add_checked(month_frac)?
1328 .try_into()
1329 .map_err(|_| {
1330 ArrowError::ParseError(format!(
1331 "Unable to represent {} decades as months in a signed 32-bit integer",
1332 &amount.integer
1333 ))
1334 })?;
1335
1336 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1337 }
1338 IntervalUnit::Year => {
1339 let months_int = amount.integer.mul_checked(12)?;
1340 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1341 let months = months_int
1342 .add_checked(month_frac)?
1343 .try_into()
1344 .map_err(|_| {
1345 ArrowError::ParseError(format!(
1346 "Unable to represent {} years as months in a signed 32-bit integer",
1347 &amount.integer
1348 ))
1349 })?;
1350
1351 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1352 }
1353 IntervalUnit::Month => {
1354 let months = amount.integer.try_into().map_err(|_| {
1355 ArrowError::ParseError(format!(
1356 "Unable to represent {} months in a signed 32-bit integer",
1357 &amount.integer
1358 ))
1359 })?;
1360
1361 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1362 let days = days.try_into().map_err(|_| {
1363 ArrowError::ParseError(format!(
1364 "Unable to represent {} months as days in a signed 32-bit integer",
1365 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1366 ))
1367 })?;
1368
1369 Self::new(
1370 self.months.add_checked(months)?,
1371 self.days.add_checked(days)?,
1372 self.nanos,
1373 )
1374 }
1375 IntervalUnit::Week => {
1376 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1377 ArrowError::ParseError(format!(
1378 "Unable to represent {} weeks as days in a signed 32-bit integer",
1379 &amount.integer
1380 ))
1381 })?;
1382
1383 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1384
1385 Self::new(
1386 self.months,
1387 self.days.add_checked(days)?,
1388 self.nanos.add_checked(nanos)?,
1389 )
1390 }
1391 IntervalUnit::Day => {
1392 let days = amount.integer.try_into().map_err(|_| {
1393 ArrowError::InvalidArgumentError(format!(
1394 "Unable to represent {} days in a signed 32-bit integer",
1395 amount.integer
1396 ))
1397 })?;
1398
1399 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1400
1401 Self::new(
1402 self.months,
1403 self.days.add_checked(days)?,
1404 self.nanos.add_checked(nanos)?,
1405 )
1406 }
1407 IntervalUnit::Hour => {
1408 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1409 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1410 let nanos = nanos_int.add_checked(nanos_frac)?;
1411
1412 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1413 }
1414 IntervalUnit::Minute => {
1415 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1416 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1417
1418 let nanos = nanos_int.add_checked(nanos_frac)?;
1419
1420 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1421 }
1422 IntervalUnit::Second => {
1423 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1424 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1425 let nanos = nanos_int.add_checked(nanos_frac)?;
1426
1427 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1428 }
1429 IntervalUnit::Millisecond => {
1430 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1431 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1432 let nanos = nanos_int.add_checked(nanos_frac)?;
1433
1434 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1435 }
1436 IntervalUnit::Microsecond => {
1437 let nanos_int = amount.integer.mul_checked(1_000)?;
1438 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1439 let nanos = nanos_int.add_checked(nanos_frac)?;
1440
1441 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1442 }
1443 IntervalUnit::Nanosecond => {
1444 let nanos_int = amount.integer;
1445 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1446 let nanos = nanos_int.add_checked(nanos_frac)?;
1447
1448 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1449 }
1450 };
1451
1452 Ok(result)
1453 }
1454}
1455
1456fn parse_interval_components(
1458 value: &str,
1459 config: &IntervalParseConfig,
1460) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1461 let raw_pairs = split_interval_components(value);
1462
1463 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1465 .iter()
1466 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1467 .collect()
1468 else {
1469 return Err(ArrowError::ParseError(format!(
1470 "Invalid input syntax for type interval: {value:?}"
1471 )));
1472 };
1473
1474 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1476
1477 let mut observed_interval_types = 0;
1479 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1480 if observed_interval_types & (*unit as u16) != 0 {
1481 return Err(ArrowError::ParseError(format!(
1482 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1483 value,
1484 raw_unit.unwrap_or_default(),
1485 )));
1486 }
1487
1488 observed_interval_types |= *unit as u16;
1489 }
1490
1491 let result = amounts.iter().copied().zip(units.iter().copied());
1492
1493 Ok(result.collect::<Vec<_>>())
1494}
1495
1496fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1502 let mut result = vec![];
1503 let mut words = value.split(char::is_whitespace);
1504 while let Some(word) = words.next() {
1505 if let Some(split_word_at) = word.find(not_interval_amount) {
1506 let (amount, unit) = word.split_at(split_word_at);
1507 result.push((amount, Some(unit)));
1508 } else if let Some(unit) = words.next() {
1509 result.push((word, Some(unit)));
1510 } else {
1511 result.push((word, None));
1512 break;
1513 }
1514 }
1515 result
1516}
1517
1518fn not_interval_amount(c: char) -> bool {
1520 !c.is_ascii_digit() && c != '.' && c != '-'
1521}
1522
1523#[cfg(test)]
1524mod tests {
1525 use super::*;
1526 use arrow_array::temporal_conversions::date32_to_datetime;
1527 use arrow_buffer::i256;
1528
1529 #[test]
1530 fn test_parse_nanos() {
1531 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1532 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1533 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1534 }
1535
1536 #[test]
1537 fn string_to_timestamp_timezone() {
1538 assert_eq!(
1540 1599572549190855000,
1541 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1542 );
1543 assert_eq!(
1544 1599572549190855000,
1545 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1546 );
1547 assert_eq!(
1548 1599572549000000000,
1549 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1550 ); assert_eq!(
1552 1599590549190855000,
1553 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1554 );
1555 }
1556
1557 #[test]
1558 fn string_to_timestamp_timezone_space() {
1559 assert_eq!(
1561 1599572549190855000,
1562 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1563 );
1564 assert_eq!(
1565 1599572549190855000,
1566 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1567 );
1568 assert_eq!(
1569 1599572549000000000,
1570 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1571 ); assert_eq!(
1573 1599590549190855000,
1574 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1575 );
1576 }
1577
1578 #[test]
1579 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1581 let naive_datetime = NaiveDateTime::new(
1585 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1586 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1587 );
1588
1589 assert_eq!(
1591 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1592 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1593 );
1594
1595 assert_eq!(
1596 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1597 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1598 );
1599
1600 let datetime_whole_secs = NaiveDateTime::new(
1603 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1604 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1605 )
1606 .and_utc();
1607
1608 assert_eq!(
1610 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1611 parse_timestamp("2020-09-08T13:42:29").unwrap()
1612 );
1613
1614 assert_eq!(
1615 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1616 parse_timestamp("2020-09-08 13:42:29").unwrap()
1617 );
1618
1619 let datetime_no_time = NaiveDateTime::new(
1623 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1624 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1625 )
1626 .and_utc();
1627
1628 assert_eq!(
1629 datetime_no_time.timestamp_nanos_opt().unwrap(),
1630 parse_timestamp("2020-09-08").unwrap()
1631 )
1632 }
1633
1634 #[test]
1635 fn string_to_timestamp_chrono() {
1636 let cases = [
1637 "2020-09-08T13:42:29Z",
1638 "1969-01-01T00:00:00.1Z",
1639 "2020-09-08T12:00:12.12345678+00:00",
1640 "2020-09-08T12:00:12+00:00",
1641 "2020-09-08T12:00:12.1+00:00",
1642 "2020-09-08T12:00:12.12+00:00",
1643 "2020-09-08T12:00:12.123+00:00",
1644 "2020-09-08T12:00:12.1234+00:00",
1645 "2020-09-08T12:00:12.12345+00:00",
1646 "2020-09-08T12:00:12.123456+00:00",
1647 "2020-09-08T12:00:12.1234567+00:00",
1648 "2020-09-08T12:00:12.12345678+00:00",
1649 "2020-09-08T12:00:12.123456789+00:00",
1650 "2020-09-08T12:00:12.12345678912z",
1651 "2020-09-08T12:00:12.123456789123Z",
1652 "2020-09-08T12:00:12.123456789123+02:00",
1653 "2020-09-08T12:00:12.12345678912345Z",
1654 "2020-09-08T12:00:12.1234567891234567+02:00",
1655 "2020-09-08T12:00:60Z",
1656 "2020-09-08T12:00:60.123Z",
1657 "2020-09-08T12:00:60.123456+02:00",
1658 "2020-09-08T12:00:60.1234567891234567+02:00",
1659 "2020-09-08T12:00:60.999999999+02:00",
1660 "2020-09-08t12:00:12.12345678+00:00",
1661 "2020-09-08t12:00:12+00:00",
1662 "2020-09-08t12:00:12Z",
1663 ];
1664
1665 for case in cases {
1666 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1667 let chrono_utc = chrono.with_timezone(&Utc);
1668
1669 let custom = string_to_datetime(&Utc, case).unwrap();
1670 assert_eq!(chrono_utc, custom)
1671 }
1672 }
1673
1674 #[test]
1675 fn string_to_timestamp_naive() {
1676 let cases = [
1677 "2018-11-13T17:11:10.011375885995",
1678 "2030-12-04T17:11:10.123",
1679 "2030-12-04T17:11:10.1234",
1680 "2030-12-04T17:11:10.123456",
1681 ];
1682 for case in cases {
1683 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1684 let custom = string_to_datetime(&Utc, case).unwrap();
1685 assert_eq!(chrono, custom.naive_utc())
1686 }
1687 }
1688
1689 #[test]
1690 fn string_to_timestamp_invalid() {
1691 let cases = [
1693 ("", "timestamp must contain at least 10 characters"),
1694 ("SS", "timestamp must contain at least 10 characters"),
1695 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1696 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1697 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1698 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1699 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1700 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1701 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1702 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1703 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1704 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1705 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1706 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1707 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1708 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1709 ("1997-01-31T092656.123Z", "error parsing time"),
1710 ("1997-01-10T12:00:06.", "error parsing time"),
1711 ("1997-01-10T12:00:06. ", "error parsing time"),
1712 ];
1713
1714 for (s, ctx) in cases {
1715 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1716 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1717 assert_eq!(actual, expected)
1718 }
1719 }
1720
1721 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1723 let result = string_to_timestamp_nanos(s);
1724 if let Err(e) = &result {
1725 eprintln!("Error parsing timestamp '{s}': {e:?}");
1726 }
1727 result
1728 }
1729
1730 #[test]
1731 fn string_without_timezone_to_timestamp() {
1732 let naive_datetime = NaiveDateTime::new(
1735 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1736 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1737 );
1738
1739 assert_eq!(
1741 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1742 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1743 );
1744
1745 assert_eq!(
1746 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1747 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1748 );
1749
1750 let naive_datetime = NaiveDateTime::new(
1751 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1752 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1753 );
1754
1755 assert_eq!(
1757 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1758 parse_timestamp("2020-09-08T13:42:29").unwrap()
1759 );
1760
1761 assert_eq!(
1762 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1763 parse_timestamp("2020-09-08 13:42:29").unwrap()
1764 );
1765
1766 let tz: Tz = "+02:00".parse().unwrap();
1767 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1768 let utc = date.naive_utc().to_string();
1769 assert_eq!(utc, "2020-09-08 11:42:29");
1770 let local = date.naive_local().to_string();
1771 assert_eq!(local, "2020-09-08 13:42:29");
1772
1773 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1774 let utc = date.naive_utc().to_string();
1775 assert_eq!(utc, "2020-09-08 13:42:29");
1776 let local = date.naive_local().to_string();
1777 assert_eq!(local, "2020-09-08 15:42:29");
1778
1779 let dt =
1780 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1781 let local: Tz = "+08:00".parse().unwrap();
1782
1783 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1785 assert_eq!(dt, date.naive_utc());
1786 assert_ne!(dt, date.naive_local());
1787
1788 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1790 assert_eq!(dt, date.naive_local());
1791 assert_ne!(dt, date.naive_utc());
1792 }
1793
1794 #[test]
1795 fn parse_date32() {
1796 let cases = [
1797 "2020-09-08",
1798 "2020-9-8",
1799 "2020-09-8",
1800 "2020-9-08",
1801 "2020-12-1",
1802 "1690-2-5",
1803 "2020-09-08 01:02:03",
1804 ];
1805 for case in cases {
1806 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1807 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1808 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1809 .unwrap();
1810 assert_eq!(v.date(), expected);
1811 }
1812
1813 let err_cases = [
1814 "",
1815 "80-01-01",
1816 "342",
1817 "Foo",
1818 "2020-09-08-03",
1819 "2020--04-03",
1820 "2020--",
1821 "2020-09-08 01",
1822 "2020-09-08 01:02",
1823 "2020-09-08 01-02-03",
1824 "2020-9-8 01:02:03",
1825 "2020-09-08 1:2:3",
1826 ];
1827 for case in err_cases {
1828 assert_eq!(Date32Type::parse(case), None);
1829 }
1830 }
1831
1832 #[test]
1833 fn parse_date32_extended_year() {
1834 let cases: &[(&str, i32)] = &[
1836 ("+1970-01-01", 0),
1837 ("+2024-01-01", 19_723),
1838 ("-0001-01-01", -719_893),
1839 ("+29349-01-26", 10_000_000),
1840 ("+2739877-01-03", 1_000_000_000),
1841 ("+5881580-07-11", i32::MAX),
1843 ("-5877641-06-23", i32::MIN),
1844 ];
1845 for (input, expected) in cases {
1846 assert_eq!(Date32Type::parse(input), Some(*expected), "input: {input}");
1847 }
1848
1849 assert_eq!(Date32Type::parse("+5881580-07-12"), None);
1851 assert_eq!(Date32Type::parse("-5877641-06-22"), None);
1852 assert_eq!(Date32Type::parse("+2739877-02-30"), None);
1854 assert_eq!(Date32Type::parse("+2739877-13-01"), None);
1855 assert_eq!(Date32Type::parse("-2739877-02-30"), None);
1856 }
1857
1858 #[test]
1859 fn parse_time64_nanos() {
1860 assert_eq!(
1861 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1862 Some(7_801_123_456_789)
1863 );
1864 assert_eq!(
1865 Time64NanosecondType::parse("02:10:01.1234567"),
1866 Some(7_801_123_456_700)
1867 );
1868 assert_eq!(
1869 Time64NanosecondType::parse("2:10:01.1234567"),
1870 Some(7_801_123_456_700)
1871 );
1872 assert_eq!(
1873 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1874 Some(601_123_456_789)
1875 );
1876 assert_eq!(
1877 Time64NanosecondType::parse("12:10:01.123456789 am"),
1878 Some(601_123_456_789)
1879 );
1880 assert_eq!(
1881 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1882 Some(51_001_123_456_780)
1883 );
1884 assert_eq!(
1885 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1886 Some(51_001_123_456_780)
1887 );
1888 assert_eq!(
1889 Time64NanosecondType::parse("02:10:01"),
1890 Some(7_801_000_000_000)
1891 );
1892 assert_eq!(
1893 Time64NanosecondType::parse("2:10:01"),
1894 Some(7_801_000_000_000)
1895 );
1896 assert_eq!(
1897 Time64NanosecondType::parse("12:10:01 AM"),
1898 Some(601_000_000_000)
1899 );
1900 assert_eq!(
1901 Time64NanosecondType::parse("12:10:01 am"),
1902 Some(601_000_000_000)
1903 );
1904 assert_eq!(
1905 Time64NanosecondType::parse("2:10:01 PM"),
1906 Some(51_001_000_000_000)
1907 );
1908 assert_eq!(
1909 Time64NanosecondType::parse("2:10:01 pm"),
1910 Some(51_001_000_000_000)
1911 );
1912 assert_eq!(
1913 Time64NanosecondType::parse("02:10"),
1914 Some(7_800_000_000_000)
1915 );
1916 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1917 assert_eq!(
1918 Time64NanosecondType::parse("12:10 AM"),
1919 Some(600_000_000_000)
1920 );
1921 assert_eq!(
1922 Time64NanosecondType::parse("12:10 am"),
1923 Some(600_000_000_000)
1924 );
1925 assert_eq!(
1926 Time64NanosecondType::parse("2:10 PM"),
1927 Some(51_000_000_000_000)
1928 );
1929 assert_eq!(
1930 Time64NanosecondType::parse("2:10 pm"),
1931 Some(51_000_000_000_000)
1932 );
1933
1934 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1936
1937 assert_eq!(
1939 Time64NanosecondType::parse("23:59:60"),
1940 Some(86_400_000_000_000)
1941 );
1942
1943 assert_eq!(
1945 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1946 Some(7_801_123_456_700)
1947 );
1948 }
1949
1950 #[test]
1951 fn parse_time64_micros() {
1952 assert_eq!(
1954 Time64MicrosecondType::parse("02:10:01.1234"),
1955 Some(7_801_123_400)
1956 );
1957 assert_eq!(
1958 Time64MicrosecondType::parse("2:10:01.1234"),
1959 Some(7_801_123_400)
1960 );
1961 assert_eq!(
1962 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1963 Some(601_123_456)
1964 );
1965 assert_eq!(
1966 Time64MicrosecondType::parse("12:10:01.123456 am"),
1967 Some(601_123_456)
1968 );
1969 assert_eq!(
1970 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1971 Some(51_001_123_450)
1972 );
1973 assert_eq!(
1974 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1975 Some(51_001_123_450)
1976 );
1977 assert_eq!(
1978 Time64MicrosecondType::parse("02:10:01"),
1979 Some(7_801_000_000)
1980 );
1981 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1982 assert_eq!(
1983 Time64MicrosecondType::parse("12:10:01 AM"),
1984 Some(601_000_000)
1985 );
1986 assert_eq!(
1987 Time64MicrosecondType::parse("12:10:01 am"),
1988 Some(601_000_000)
1989 );
1990 assert_eq!(
1991 Time64MicrosecondType::parse("2:10:01 PM"),
1992 Some(51_001_000_000)
1993 );
1994 assert_eq!(
1995 Time64MicrosecondType::parse("2:10:01 pm"),
1996 Some(51_001_000_000)
1997 );
1998 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1999 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
2000 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
2001 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
2002 assert_eq!(
2003 Time64MicrosecondType::parse("2:10 PM"),
2004 Some(51_000_000_000)
2005 );
2006 assert_eq!(
2007 Time64MicrosecondType::parse("2:10 pm"),
2008 Some(51_000_000_000)
2009 );
2010
2011 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
2013
2014 assert_eq!(
2016 Time64MicrosecondType::parse("23:59:60"),
2017 Some(86_400_000_000)
2018 );
2019
2020 assert_eq!(
2022 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
2023 Some(7_801_123_400)
2024 );
2025 }
2026
2027 #[test]
2028 fn parse_time32_millis() {
2029 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
2031 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
2032 assert_eq!(
2033 Time32MillisecondType::parse("12:10:01.123 AM"),
2034 Some(601_123)
2035 );
2036 assert_eq!(
2037 Time32MillisecondType::parse("12:10:01.123 am"),
2038 Some(601_123)
2039 );
2040 assert_eq!(
2041 Time32MillisecondType::parse("2:10:01.12 PM"),
2042 Some(51_001_120)
2043 );
2044 assert_eq!(
2045 Time32MillisecondType::parse("2:10:01.12 pm"),
2046 Some(51_001_120)
2047 );
2048 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2049 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2050 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2051 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2052 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2053 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2054 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2055 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2056 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2057 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2058 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2059 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2060
2061 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2063
2064 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2066
2067 assert_eq!(
2069 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2070 Some(7_801_100)
2071 );
2072 }
2073
2074 #[test]
2075 fn parse_time32_secs() {
2076 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2078 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2079 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2080 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2081 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2082 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2083 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2084 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2085 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2086 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2087 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2088 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2089 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2090
2091 assert_eq!(Time32SecondType::parse("1"), Some(1));
2093
2094 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2096
2097 assert_eq!(
2099 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2100 Some(7_801)
2101 );
2102 }
2103
2104 #[test]
2105 fn test_string_to_time_invalid() {
2106 let cases = [
2107 "25:00",
2108 "9:00:",
2109 "009:00",
2110 "09:0:00",
2111 "25:00:00",
2112 "13:00 AM",
2113 "13:00 PM",
2114 "12:00. AM",
2115 "09:0:00",
2116 "09:01:0",
2117 "09:01:1",
2118 "9:1:0",
2119 "09:01:0",
2120 "1:00.123",
2121 "1:00:00.123f",
2122 " 9:00:00",
2123 ":09:00",
2124 "T9:00:00",
2125 "AM",
2126 ];
2127 for case in cases {
2128 assert!(string_to_time(case).is_none(), "{case}");
2129 }
2130 }
2131
2132 #[test]
2133 fn test_string_to_time_chrono() {
2134 let cases = [
2135 ("1:00", "%H:%M"),
2136 ("12:00", "%H:%M"),
2137 ("13:00", "%H:%M"),
2138 ("24:00", "%H:%M"),
2139 ("1:00:00", "%H:%M:%S"),
2140 ("12:00:30", "%H:%M:%S"),
2141 ("13:00:59", "%H:%M:%S"),
2142 ("24:00:60", "%H:%M:%S"),
2143 ("09:00:00", "%H:%M:%S%.f"),
2144 ("0:00:30.123456", "%H:%M:%S%.f"),
2145 ("0:00 AM", "%I:%M %P"),
2146 ("1:00 AM", "%I:%M %P"),
2147 ("12:00 AM", "%I:%M %P"),
2148 ("13:00 AM", "%I:%M %P"),
2149 ("0:00 PM", "%I:%M %P"),
2150 ("1:00 PM", "%I:%M %P"),
2151 ("12:00 PM", "%I:%M %P"),
2152 ("13:00 PM", "%I:%M %P"),
2153 ("1:00 pM", "%I:%M %P"),
2154 ("1:00 Pm", "%I:%M %P"),
2155 ("1:00 aM", "%I:%M %P"),
2156 ("1:00 Am", "%I:%M %P"),
2157 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2158 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2159 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2160 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2161 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2162 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2163 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2164 ];
2165 for (s, format) in cases {
2166 let chrono = NaiveTime::parse_from_str(s, format).ok();
2167 let custom = string_to_time(s);
2168 assert_eq!(chrono, custom, "{s}");
2169 }
2170 }
2171
2172 #[test]
2173 fn test_parse_interval() {
2174 let config = IntervalParseConfig::new(IntervalUnit::Month);
2175
2176 assert_eq!(
2177 Interval::new(1i32, 0i32, 0i64),
2178 Interval::parse("1 month", &config).unwrap(),
2179 );
2180
2181 assert_eq!(
2182 Interval::new(2i32, 0i32, 0i64),
2183 Interval::parse("2 month", &config).unwrap(),
2184 );
2185
2186 assert_eq!(
2187 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2188 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2189 );
2190
2191 assert_eq!(
2192 Interval::new(0i32, 15i32, 0),
2193 Interval::parse("0.5 months", &config).unwrap(),
2194 );
2195
2196 assert_eq!(
2197 Interval::new(0i32, 15i32, 0),
2198 Interval::parse(".5 months", &config).unwrap(),
2199 );
2200
2201 assert_eq!(
2202 Interval::new(0i32, -15i32, 0),
2203 Interval::parse("-0.5 months", &config).unwrap(),
2204 );
2205
2206 assert_eq!(
2207 Interval::new(0i32, -15i32, 0),
2208 Interval::parse("-.5 months", &config).unwrap(),
2209 );
2210
2211 assert_eq!(
2212 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2213 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2214 );
2215
2216 assert_eq!(
2217 Interval::parse("1 centurys 1 month", &config)
2218 .unwrap_err()
2219 .to_string(),
2220 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2221 );
2222
2223 assert_eq!(
2224 Interval::new(37i32, 0i32, 0i64),
2225 Interval::parse("3 year 1 month", &config).unwrap(),
2226 );
2227
2228 assert_eq!(
2229 Interval::new(35i32, 0i32, 0i64),
2230 Interval::parse("3 year -1 month", &config).unwrap(),
2231 );
2232
2233 assert_eq!(
2234 Interval::new(-37i32, 0i32, 0i64),
2235 Interval::parse("-3 year -1 month", &config).unwrap(),
2236 );
2237
2238 assert_eq!(
2239 Interval::new(-35i32, 0i32, 0i64),
2240 Interval::parse("-3 year 1 month", &config).unwrap(),
2241 );
2242
2243 assert_eq!(
2244 Interval::new(0i32, 5i32, 0i64),
2245 Interval::parse("5 days", &config).unwrap(),
2246 );
2247
2248 assert_eq!(
2249 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2250 Interval::parse("7 days 3 hours", &config).unwrap(),
2251 );
2252
2253 assert_eq!(
2254 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2255 Interval::parse("7 days 5 minutes", &config).unwrap(),
2256 );
2257
2258 assert_eq!(
2259 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2260 Interval::parse("7 days -5 minutes", &config).unwrap(),
2261 );
2262
2263 assert_eq!(
2264 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2265 Interval::parse("-7 days 5 hours", &config).unwrap(),
2266 );
2267
2268 assert_eq!(
2269 Interval::new(
2270 0i32,
2271 -7i32,
2272 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2273 ),
2274 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2275 );
2276
2277 assert_eq!(
2278 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2279 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2280 );
2281
2282 assert_eq!(
2283 Interval::new(
2284 12i32,
2285 1i32,
2286 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2287 ),
2288 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2289 );
2290
2291 assert_eq!(
2292 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2293 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2294 );
2295
2296 assert_eq!(
2297 Interval::new(12i32, 1i32, 1000i64),
2298 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2299 );
2300
2301 assert_eq!(
2302 Interval::new(12i32, 1i32, 1i64),
2303 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2304 );
2305
2306 assert_eq!(
2307 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2308 Interval::parse("1 month -1 second", &config).unwrap(),
2309 );
2310
2311 assert_eq!(
2312 Interval::new(
2313 -13i32,
2314 -8i32,
2315 -NANOS_PER_HOUR
2316 - NANOS_PER_MINUTE
2317 - NANOS_PER_SECOND
2318 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2319 ),
2320 Interval::parse(
2321 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2322 &config
2323 )
2324 .unwrap(),
2325 );
2326
2327 assert_eq!(
2329 Interval::new(1, 0, 0),
2330 Interval::parse("1", &config).unwrap()
2331 );
2332 assert_eq!(
2333 Interval::new(42, 0, 0),
2334 Interval::parse("42", &config).unwrap()
2335 );
2336 assert_eq!(
2337 Interval::new(0, 0, 42_000_000_000),
2338 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2339 );
2340
2341 assert_eq!(
2343 Interval::new(1, 0, 0),
2344 Interval::parse("1 mon", &config).unwrap()
2345 );
2346 assert_eq!(
2347 Interval::new(1, 0, 0),
2348 Interval::parse("1 mons", &config).unwrap()
2349 );
2350 assert_eq!(
2351 Interval::new(0, 0, 1_000_000),
2352 Interval::parse("1 ms", &config).unwrap()
2353 );
2354 assert_eq!(
2355 Interval::new(0, 0, 1_000),
2356 Interval::parse("1 us", &config).unwrap()
2357 );
2358
2359 assert_eq!(
2361 Interval::new(0, 0, 1_000),
2362 Interval::parse("1us", &config).unwrap()
2363 );
2364 assert_eq!(
2365 Interval::new(0, 0, NANOS_PER_SECOND),
2366 Interval::parse("1s", &config).unwrap()
2367 );
2368 assert_eq!(
2369 Interval::new(1, 2, 10_864_000_000_000),
2370 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2371 );
2372
2373 assert_eq!(
2374 Interval::new(
2375 -13i32,
2376 -8i32,
2377 -NANOS_PER_HOUR
2378 - NANOS_PER_MINUTE
2379 - NANOS_PER_SECOND
2380 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2381 ),
2382 Interval::parse(
2383 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2384 &config
2385 )
2386 .unwrap(),
2387 );
2388
2389 assert_eq!(
2390 Interval::parse("1h s", &config).unwrap_err().to_string(),
2391 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2392 );
2393
2394 assert_eq!(
2395 Interval::parse("1XX", &config).unwrap_err().to_string(),
2396 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2397 );
2398 }
2399
2400 #[test]
2401 fn test_duplicate_interval_type() {
2402 let config = IntervalParseConfig::new(IntervalUnit::Month);
2403
2404 let err = Interval::parse("1 month 1 second 1 second", &config)
2405 .expect_err("parsing interval should have failed");
2406 assert_eq!(
2407 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2408 format!("{err:?}")
2409 );
2410
2411 let err = Interval::parse("1 century 2 centuries", &config)
2413 .expect_err("parsing interval should have failed");
2414 assert_eq!(
2415 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2416 format!("{err:?}")
2417 );
2418 }
2419
2420 #[test]
2421 fn test_interval_amount_parsing() {
2422 let result = IntervalAmount::from_str("123").unwrap();
2424 let expected = IntervalAmount::new(123, 0);
2425
2426 assert_eq!(result, expected);
2427
2428 let result = IntervalAmount::from_str("0.3").unwrap();
2430 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2431
2432 assert_eq!(result, expected);
2433
2434 let result = IntervalAmount::from_str("-3.5").unwrap();
2436 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2437
2438 assert_eq!(result, expected);
2439
2440 let result = IntervalAmount::from_str("3.");
2442 assert!(result.is_err());
2443
2444 let result = IntervalAmount::from_str("3.-5");
2446 assert!(result.is_err());
2447 }
2448
2449 #[test]
2450 fn test_interval_precision() {
2451 let config = IntervalParseConfig::new(IntervalUnit::Month);
2452
2453 let result = Interval::parse("100000.1 days", &config).unwrap();
2454 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2455
2456 assert_eq!(result, expected);
2457 }
2458
2459 #[test]
2460 fn test_interval_addition() {
2461 let start = Interval::new(1, 2, 3);
2463 let expected = Interval::new(4921, 2, 3);
2464
2465 let result = start
2466 .add(
2467 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2468 IntervalUnit::Century,
2469 )
2470 .unwrap();
2471
2472 assert_eq!(result, expected);
2473
2474 let start = Interval::new(1, 2, 3);
2476 let expected = Interval::new(1231, 2, 3);
2477
2478 let result = start
2479 .add(
2480 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2481 IntervalUnit::Decade,
2482 )
2483 .unwrap();
2484
2485 assert_eq!(result, expected);
2486
2487 let start = Interval::new(1, 2, 3);
2489 let expected = Interval::new(364, 2, 3);
2490
2491 let result = start
2492 .add(
2493 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2494 IntervalUnit::Year,
2495 )
2496 .unwrap();
2497
2498 assert_eq!(result, expected);
2499
2500 let start = Interval::new(1, 2, 3);
2502 let expected = Interval::new(2, 17, 3);
2503
2504 let result = start
2505 .add(
2506 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2507 IntervalUnit::Month,
2508 )
2509 .unwrap();
2510
2511 assert_eq!(result, expected);
2512
2513 let start = Interval::new(1, 25, 3);
2515 let expected = Interval::new(1, 11, 3);
2516
2517 let result = start
2518 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2519 .unwrap();
2520
2521 assert_eq!(result, expected);
2522
2523 let start = Interval::new(12, 15, 3);
2525 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2526
2527 let result = start
2528 .add(
2529 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2530 IntervalUnit::Day,
2531 )
2532 .unwrap();
2533
2534 assert_eq!(result, expected);
2535
2536 let start = Interval::new(1, 2, 3);
2538 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2539
2540 let result = start
2541 .add(
2542 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2543 IntervalUnit::Hour,
2544 )
2545 .unwrap();
2546
2547 assert_eq!(result, expected);
2548
2549 let start = Interval::new(0, 0, -3);
2551 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2552
2553 let result = start
2554 .add(
2555 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2556 IntervalUnit::Minute,
2557 )
2558 .unwrap();
2559
2560 assert_eq!(result, expected);
2561 }
2562
2563 #[test]
2564 fn string_to_timestamp_old() {
2565 parse_timestamp("1677-06-14T07:29:01.256")
2566 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2567 .unwrap_err();
2568 }
2569
2570 #[test]
2571 fn test_parse_decimal_with_parameter() {
2572 let tests = [
2573 ("0", 0i128),
2574 ("123.123", 123123i128),
2575 ("123.1234", 123123i128),
2576 ("123.1", 123100i128),
2577 ("123", 123000i128),
2578 ("-123.123", -123123i128),
2579 ("-123.1234", -123123i128),
2580 ("-123.1", -123100i128),
2581 ("-123", -123000i128),
2582 ("0.0000123", 0i128),
2583 ("12.", 12000i128),
2584 ("-12.", -12000i128),
2585 ("00.1", 100i128),
2586 ("-00.1", -100i128),
2587 ("12345678912345678.1234", 12345678912345678123i128),
2588 ("-12345678912345678.1234", -12345678912345678123i128),
2589 ("99999999999999999.999", 99999999999999999999i128),
2590 ("-99999999999999999.999", -99999999999999999999i128),
2591 (".123", 123i128),
2592 ("-.123", -123i128),
2593 ("123.", 123000i128),
2594 ("-123.", -123000i128),
2595 ];
2596 for (s, i) in tests {
2597 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2598 assert_eq!(i, result_128.unwrap());
2599 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2600 assert_eq!(i256::from_i128(i), result_256.unwrap());
2601 }
2602
2603 let e_notation_tests = [
2604 ("1.23e3", "1230.0", 2),
2605 ("5.6714e+2", "567.14", 4),
2606 ("5.6714e-2", "0.056714", 4),
2607 ("5.6714e-2", "0.056714", 3),
2608 ("5.6741214125e2", "567.41214125", 4),
2609 ("8.91E4", "89100.0", 2),
2610 ("3.14E+5", "314000.0", 2),
2611 ("2.718e0", "2.718", 2),
2612 ("9.999999e-1", "0.9999999", 4),
2613 ("1.23e+3", "1230", 2),
2614 ("1.234559e+3", "1234.559", 2),
2615 ("1.00E-10", "0.0000000001", 11),
2616 ("1.23e-4", "0.000123", 2),
2617 ("9.876e7", "98760000.0", 2),
2618 ("5.432E+8", "543200000.0", 10),
2619 ("1.234567e9", "1234567000.0", 2),
2620 ("1.234567e2", "123.45670000", 2),
2621 ("4749.3e-5", "0.047493", 10),
2622 ("4749.3e+5", "474930000", 10),
2623 ("4749.3e-5", "0.047493", 1),
2624 ("4749.3e+5", "474930000", 1),
2625 ("0E-8", "0", 10),
2626 ("0E+6", "0", 10),
2627 ("1E-8", "0.00000001", 10),
2628 ("12E+6", "12000000", 10),
2629 ("12E-6", "0.000012", 10),
2630 ("0.1e-6", "0.0000001", 10),
2631 ("0.1e+6", "100000", 10),
2632 ("0.12e-6", "0.00000012", 10),
2633 ("0.12e+6", "120000", 10),
2634 ("000000000001e0", "000000000001", 3),
2635 ("000001.1034567002e0", "000001.1034567002", 3),
2636 ("1.234e16", "12340000000000000", 0),
2637 ("123.4e16", "1234000000000000000", 0),
2638 ];
2639 for (e, d, scale) in e_notation_tests {
2640 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2641 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2642 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2643 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2644 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2645 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2646 }
2647 let can_not_parse_tests = [
2648 "123,123",
2649 ".",
2650 "123.123.123",
2651 "",
2652 "+",
2653 "-",
2654 "e",
2655 "1.3e+e3",
2656 "5.6714ee-2",
2657 "4.11ee-+4",
2658 "4.11e++4",
2659 "1.1e.12",
2660 "1.23e+3.",
2661 "1.23e+3.1",
2662 "1e",
2663 "1e+",
2664 "1e-",
2665 ];
2666 for s in can_not_parse_tests {
2667 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2668 assert_eq!(
2669 format!("Parser error: can't parse the string value {s} to decimal"),
2670 result_128.unwrap_err().to_string()
2671 );
2672 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2673 assert_eq!(
2674 format!("Parser error: can't parse the string value {s} to decimal"),
2675 result_256.unwrap_err().to_string()
2676 );
2677 }
2678 let overflow_parse_tests = [
2679 ("12345678", 3),
2680 ("1.2345678e7", 3),
2681 ("12345678.9", 3),
2682 ("1.23456789e+7", 3),
2683 ("99999999.99", 3),
2684 ("9.999999999e7", 3),
2685 ("12345678908765.123456", 3),
2686 ("123456789087651234.56e-4", 3),
2687 ("1234560000000", 0),
2688 ("12345678900.0", 0),
2689 ("1.23456e12", 0),
2690 ];
2691 for (s, scale) in overflow_parse_tests {
2692 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2693 let expected_128 = "Parser error: parse decimal overflow";
2694 let actual_128 = result_128.unwrap_err().to_string();
2695
2696 assert!(
2697 actual_128.contains(expected_128),
2698 "actual: '{actual_128}', expected: '{expected_128}'"
2699 );
2700
2701 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2702 let expected_256 = "Parser error: parse decimal overflow";
2703 let actual_256 = result_256.unwrap_err().to_string();
2704
2705 assert!(
2706 actual_256.contains(expected_256),
2707 "actual: '{actual_256}', expected: '{expected_256}'"
2708 );
2709 }
2710
2711 let edge_tests_128 = [
2712 (
2713 "99999999999999999999999999999999999999",
2714 99999999999999999999999999999999999999i128,
2715 0,
2716 ),
2717 (
2718 "999999999999999999999999999999999999.99",
2719 99999999999999999999999999999999999999i128,
2720 2,
2721 ),
2722 (
2723 "9999999999999999999999999.9999999999999",
2724 99999999999999999999999999999999999999i128,
2725 13,
2726 ),
2727 (
2728 "9999999999999999999999999",
2729 99999999999999999999999990000000000000i128,
2730 13,
2731 ),
2732 (
2733 "0.99999999999999999999999999999999999999",
2734 99999999999999999999999999999999999999i128,
2735 38,
2736 ),
2737 (
2738 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2739 0i128,
2740 15,
2741 ),
2742 ("1.016744e-320", 0i128, 15),
2743 ("-1e3", -1000000000i128, 6),
2744 ("+1e3", 1000000000i128, 6),
2745 ("-1e31", -10000000000000000000000000000000000000i128, 6),
2746 ];
2747 for (s, i, scale) in edge_tests_128 {
2748 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2749 assert_eq!(i, result_128.unwrap());
2750 }
2751 let edge_tests_256 = [
2752 (
2753 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2754 i256::from_string(
2755 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2756 )
2757 .unwrap(),
2758 0,
2759 ),
2760 (
2761 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2762 i256::from_string(
2763 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2764 )
2765 .unwrap(),
2766 4,
2767 ),
2768 (
2769 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2770 i256::from_string(
2771 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2772 )
2773 .unwrap(),
2774 26,
2775 ),
2776 (
2777 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2778 i256::from_string(
2779 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2780 )
2781 .unwrap(),
2782 26,
2783 ),
2784 (
2785 "99999999999999999999999999999999999999999999999999",
2786 i256::from_string(
2787 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2788 )
2789 .unwrap(),
2790 26,
2791 ),
2792 (
2793 "9.9999999999999999999999999999999999999999999999999e+49",
2794 i256::from_string(
2795 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2796 )
2797 .unwrap(),
2798 26,
2799 ),
2800 ];
2801 for (s, i, scale) in edge_tests_256 {
2802 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2803 assert_eq!(i, result.unwrap());
2804 }
2805
2806 let zero_scale_tests = [
2807 (".123", 0, 3),
2808 ("0.123", 0, 3),
2809 ("1.0", 1, 3),
2810 ("1.2", 1, 3),
2811 ("1.00", 1, 3),
2812 ("1.23", 1, 3),
2813 ("1.000", 1, 3),
2814 ("1.123", 1, 3),
2815 ("123.0", 123, 3),
2816 ("123.4", 123, 3),
2817 ("123.00", 123, 3),
2818 ("123.45", 123, 3),
2819 ("123.000000000000000000004", 123, 3),
2820 ("0.123e2", 12, 3),
2821 ("0.123e4", 1230, 10),
2822 ("1.23e4", 12300, 10),
2823 ("12.3e4", 123000, 10),
2824 ("123e4", 1230000, 10),
2825 (
2826 "20000000000000000000000000000000000002.0",
2827 20000000000000000000000000000000000002,
2828 38,
2829 ),
2830 ];
2831 for (s, i, precision) in zero_scale_tests {
2832 let result_128 = parse_decimal::<Decimal128Type>(s, precision, 0).unwrap();
2833 assert_eq!(i, result_128);
2834 }
2835
2836 let can_not_parse_zero_scale = [".", "blag", "", "+", "-", "e"];
2837 for s in can_not_parse_zero_scale {
2838 let result_128 = parse_decimal::<Decimal128Type>(s, 5, 0);
2839 assert_eq!(
2840 format!("Parser error: can't parse the string value {s} to decimal"),
2841 result_128.unwrap_err().to_string(),
2842 );
2843 }
2844 }
2845
2846 #[test]
2847 fn test_parse_empty() {
2848 assert_eq!(Int32Type::parse(""), None);
2849 assert_eq!(Int64Type::parse(""), None);
2850 assert_eq!(UInt32Type::parse(""), None);
2851 assert_eq!(UInt64Type::parse(""), None);
2852 assert_eq!(Float32Type::parse(""), None);
2853 assert_eq!(Float64Type::parse(""), None);
2854 assert_eq!(Int32Type::parse("+"), None);
2855 assert_eq!(Int64Type::parse("+"), None);
2856 assert_eq!(UInt32Type::parse("+"), None);
2857 assert_eq!(UInt64Type::parse("+"), None);
2858 assert_eq!(Float32Type::parse("+"), None);
2859 assert_eq!(Float64Type::parse("+"), None);
2860 assert_eq!(TimestampNanosecondType::parse(""), None);
2861 assert_eq!(Date32Type::parse(""), None);
2862 }
2863
2864 #[test]
2865 fn test_parse_interval_month_day_nano_config() {
2866 let interval = parse_interval_month_day_nano_config(
2867 "1",
2868 IntervalParseConfig::new(IntervalUnit::Second),
2869 )
2870 .unwrap();
2871 assert_eq!(interval.months, 0);
2872 assert_eq!(interval.days, 0);
2873 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2874 }
2875}