arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466/// This API is only stable since 1.70 so can't use it when current MSRV is lower
467#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469    match opt {
470        None => false,
471        Some(x) => f(x),
472    }
473}
474
475macro_rules! parser_primitive {
476    ($t:ty) => {
477        impl Parser for $t {
478            fn parse(string: &str) -> Option<Self::Native> {
479                if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480                    return None;
481                }
482                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483                    string.as_bytes(),
484                ) {
485                    (Some(n), x) if x == string.len() => Some(n),
486                    _ => None,
487                }
488            }
489        }
490    };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506    fn parse(string: &str) -> Option<i64> {
507        string_to_timestamp_nanos(string).ok()
508    }
509}
510
511impl Parser for TimestampMicrosecondType {
512    fn parse(string: &str) -> Option<i64> {
513        let nanos = string_to_timestamp_nanos(string).ok();
514        nanos.map(|x| x / 1000)
515    }
516}
517
518impl Parser for TimestampMillisecondType {
519    fn parse(string: &str) -> Option<i64> {
520        let nanos = string_to_timestamp_nanos(string).ok();
521        nanos.map(|x| x / 1_000_000)
522    }
523}
524
525impl Parser for TimestampSecondType {
526    fn parse(string: &str) -> Option<i64> {
527        let nanos = string_to_timestamp_nanos(string).ok();
528        nanos.map(|x| x / 1_000_000_000)
529    }
530}
531
532impl Parser for Time64NanosecondType {
533    // Will truncate any fractions of a nanosecond
534    fn parse(string: &str) -> Option<Self::Native> {
535        string_to_time_nanoseconds(string)
536            .ok()
537            .or_else(|| string.parse::<Self::Native>().ok())
538    }
539
540    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541        let nt = NaiveTime::parse_from_str(string, format).ok()?;
542        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543    }
544}
545
546impl Parser for Time64MicrosecondType {
547    // Will truncate any fractions of a microsecond
548    fn parse(string: &str) -> Option<Self::Native> {
549        string_to_time_nanoseconds(string)
550            .ok()
551            .map(|nanos| nanos / 1_000)
552            .or_else(|| string.parse::<Self::Native>().ok())
553    }
554
555    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556        let nt = NaiveTime::parse_from_str(string, format).ok()?;
557        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558    }
559}
560
561impl Parser for Time32MillisecondType {
562    // Will truncate any fractions of a millisecond
563    fn parse(string: &str) -> Option<Self::Native> {
564        string_to_time_nanoseconds(string)
565            .ok()
566            .map(|nanos| (nanos / 1_000_000) as i32)
567            .or_else(|| string.parse::<Self::Native>().ok())
568    }
569
570    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571        let nt = NaiveTime::parse_from_str(string, format).ok()?;
572        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573    }
574}
575
576impl Parser for Time32SecondType {
577    // Will truncate any fractions of a second
578    fn parse(string: &str) -> Option<Self::Native> {
579        string_to_time_nanoseconds(string)
580            .ok()
581            .map(|nanos| (nanos / 1_000_000_000) as i32)
582            .or_else(|| string.parse::<Self::Native>().ok())
583    }
584
585    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586        let nt = NaiveTime::parse_from_str(string, format).ok()?;
587        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588    }
589}
590
591/// Number of days between 0001-01-01 and 1970-01-01
592const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594/// Error message if nanosecond conversion request beyond supported interval
595const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598    if string.len() > 10 {
599        // Try to parse as datetime and return just the date part
600        return string_to_datetime(&Utc, string)
601            .map(|dt| dt.date_naive())
602            .ok();
603    };
604    let mut digits = [0; 10];
605    let mut mask = 0;
606
607    // Treating all bytes the same way, helps LLVM vectorise this correctly
608    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
609        *o = i.wrapping_sub(b'0');
610        mask |= ((*o < 10) as u16) << idx
611    }
612
613    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
614
615    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
616    if digits[4] != HYPHEN {
617        let (year, month, day) = match (mask, string.len()) {
618            (0b11111111, 8) => (
619                digits[0] as u16 * 1000
620                    + digits[1] as u16 * 100
621                    + digits[2] as u16 * 10
622                    + digits[3] as u16,
623                digits[4] * 10 + digits[5],
624                digits[6] * 10 + digits[7],
625            ),
626            _ => return None,
627        };
628        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
629    }
630
631    let (month, day) = match mask {
632        0b1101101111 => {
633            if digits[7] != HYPHEN {
634                return None;
635            }
636            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
637        }
638        0b101101111 => {
639            if digits[7] != HYPHEN {
640                return None;
641            }
642            (digits[5] * 10 + digits[6], digits[8])
643        }
644        0b110101111 => {
645            if digits[6] != HYPHEN {
646                return None;
647            }
648            (digits[5], digits[7] * 10 + digits[8])
649        }
650        0b10101111 => {
651            if digits[6] != HYPHEN {
652                return None;
653            }
654            (digits[5], digits[7])
655        }
656        _ => return None,
657    };
658
659    let year =
660        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
661
662    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
663}
664
665impl Parser for Date32Type {
666    fn parse(string: &str) -> Option<i32> {
667        let date = parse_date(string)?;
668        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
669    }
670
671    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
672        let date = NaiveDate::parse_from_str(string, format).ok()?;
673        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
674    }
675}
676
677impl Parser for Date64Type {
678    fn parse(string: &str) -> Option<i64> {
679        if string.len() <= 10 {
680            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
681            Some(datetime.and_utc().timestamp_millis())
682        } else {
683            let date_time = string_to_datetime(&Utc, string).ok()?;
684            Some(date_time.timestamp_millis())
685        }
686    }
687
688    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
689        use chrono::format::Fixed;
690        use chrono::format::StrftimeItems;
691        let fmt = StrftimeItems::new(format);
692        let has_zone = fmt.into_iter().any(|item| match item {
693            chrono::format::Item::Fixed(fixed_item) => matches!(
694                fixed_item,
695                Fixed::RFC2822
696                    | Fixed::RFC3339
697                    | Fixed::TimezoneName
698                    | Fixed::TimezoneOffsetColon
699                    | Fixed::TimezoneOffsetColonZ
700                    | Fixed::TimezoneOffset
701                    | Fixed::TimezoneOffsetZ
702            ),
703            _ => false,
704        });
705        if has_zone {
706            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
707            Some(date_time.timestamp_millis())
708        } else {
709            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
710            Some(date_time.and_utc().timestamp_millis())
711        }
712    }
713}
714
715fn parse_e_notation<T: DecimalType>(
716    s: &str,
717    mut digits: u16,
718    mut fractionals: i16,
719    mut result: T::Native,
720    index: usize,
721    precision: u16,
722    scale: i16,
723) -> Result<T::Native, ArrowError> {
724    let mut exp: i16 = 0;
725    let base = T::Native::usize_as(10);
726
727    let mut exp_start: bool = false;
728    // e has a plus sign
729    let mut pos_shift_direction: bool = true;
730
731    // skip to point or exponent index
732    let mut bs;
733    if fractionals > 0 {
734        // it's a fraction, so the point index needs to be skipped, so +1
735        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
736    } else {
737        // it's actually an integer that is already written into the result, so let's skip on to e
738        bs = s.as_bytes().iter().skip(index);
739    }
740
741    while let Some(b) = bs.next() {
742        match b {
743            b'0'..=b'9' => {
744                result = result.mul_wrapping(base);
745                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
746                if fractionals > 0 {
747                    fractionals += 1;
748                }
749                digits += 1;
750            }
751            &b'e' | &b'E' => {
752                exp_start = true;
753            }
754            _ => {
755                return Err(ArrowError::ParseError(format!(
756                    "can't parse the string value {s} to decimal"
757                )));
758            }
759        };
760
761        if exp_start {
762            pos_shift_direction = match bs.next() {
763                Some(&b'-') => false,
764                Some(&b'+') => true,
765                Some(b) => {
766                    if !b.is_ascii_digit() {
767                        return Err(ArrowError::ParseError(format!(
768                            "can't parse the string value {s} to decimal"
769                        )));
770                    }
771
772                    exp *= 10;
773                    exp += (b - b'0') as i16;
774
775                    true
776                }
777                None => {
778                    return Err(ArrowError::ParseError(format!(
779                        "can't parse the string value {s} to decimal"
780                    )))
781                }
782            };
783
784            for b in bs.by_ref() {
785                if !b.is_ascii_digit() {
786                    return Err(ArrowError::ParseError(format!(
787                        "can't parse the string value {s} to decimal"
788                    )));
789                }
790                exp *= 10;
791                exp += (b - b'0') as i16;
792            }
793        }
794    }
795
796    if digits == 0 && fractionals == 0 && exp == 0 {
797        return Err(ArrowError::ParseError(format!(
798            "can't parse the string value {s} to decimal"
799        )));
800    }
801
802    if !pos_shift_direction {
803        // exponent has a large negative sign
804        // 1.12345e-30 => 0.0{29}12345, scale = 5
805        if exp - (digits as i16 + scale) > 0 {
806            return Ok(T::Native::usize_as(0));
807        }
808        exp *= -1;
809    }
810
811    // point offset
812    exp = fractionals - exp;
813    // We have zeros on the left, we need to count them
814    if !pos_shift_direction && exp > digits as i16 {
815        digits = exp as u16;
816    }
817    // Number of numbers to be removed or added
818    exp = scale - exp;
819
820    if (digits as i16 + exp) as u16 > precision {
821        return Err(ArrowError::ParseError(format!(
822            "parse decimal overflow ({s})"
823        )));
824    }
825
826    if exp < 0 {
827        result = result.div_wrapping(base.pow_wrapping(-exp as _));
828    } else {
829        result = result.mul_wrapping(base.pow_wrapping(exp as _));
830    }
831
832    Ok(result)
833}
834
835/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
836/// The result value can't be out of bounds.
837pub fn parse_decimal<T: DecimalType>(
838    s: &str,
839    precision: u8,
840    scale: i8,
841) -> Result<T::Native, ArrowError> {
842    let mut result = T::Native::usize_as(0);
843    let mut fractionals: i8 = 0;
844    let mut digits: u8 = 0;
845    let base = T::Native::usize_as(10);
846
847    let bs = s.as_bytes();
848    let (signed, negative) = match bs.first() {
849        Some(b'-') => (true, true),
850        Some(b'+') => (true, false),
851        _ => (false, false),
852    };
853
854    if bs.is_empty() || signed && bs.len() == 1 {
855        return Err(ArrowError::ParseError(format!(
856            "can't parse the string value {s} to decimal"
857        )));
858    }
859
860    // Iterate over the raw input bytes, skipping the sign if any
861    let mut bs = bs.iter().enumerate().skip(signed as usize);
862
863    let mut is_e_notation = false;
864
865    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
866    // Thus, if we validate the precision correctly, we can skip overflow checks.
867    #[allow(clippy::question_mark)]
868    while let Some((index, b)) = bs.next() {
869        match b {
870            b'0'..=b'9' => {
871                if digits == 0 && *b == b'0' {
872                    // Ignore leading zeros.
873                    continue;
874                }
875                digits += 1;
876                result = result.mul_wrapping(base);
877                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
878            }
879            b'.' => {
880                let point_index = index;
881
882                for (_, b) in bs.by_ref() {
883                    if !b.is_ascii_digit() {
884                        if *b == b'e' || *b == b'E' {
885                            result = match parse_e_notation::<T>(
886                                s,
887                                digits as u16,
888                                fractionals as i16,
889                                result,
890                                point_index,
891                                precision as u16,
892                                scale as i16,
893                            ) {
894                                Err(e) => return Err(e),
895                                Ok(v) => v,
896                            };
897
898                            is_e_notation = true;
899
900                            break;
901                        }
902                        return Err(ArrowError::ParseError(format!(
903                            "can't parse the string value {s} to decimal"
904                        )));
905                    }
906                    if fractionals == scale && scale != 0 {
907                        // We have processed all the digits that we need. All that
908                        // is left is to validate that the rest of the string contains
909                        // valid digits.
910                        continue;
911                    }
912                    fractionals += 1;
913                    digits += 1;
914                    result = result.mul_wrapping(base);
915                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
916                }
917
918                if is_e_notation {
919                    break;
920                }
921
922                // Fail on "."
923                if digits == 0 {
924                    return Err(ArrowError::ParseError(format!(
925                        "can't parse the string value {s} to decimal"
926                    )));
927                }
928            }
929            b'e' | b'E' => {
930                result = match parse_e_notation::<T>(
931                    s,
932                    digits as u16,
933                    fractionals as i16,
934                    result,
935                    index,
936                    precision as u16,
937                    scale as i16,
938                ) {
939                    Err(e) => return Err(e),
940                    Ok(v) => v,
941                };
942
943                is_e_notation = true;
944
945                break;
946            }
947            _ => {
948                return Err(ArrowError::ParseError(format!(
949                    "can't parse the string value {s} to decimal"
950                )));
951            }
952        }
953    }
954
955    if !is_e_notation {
956        if fractionals < scale {
957            let exp = scale - fractionals;
958            if exp as u8 + digits > precision {
959                return Err(ArrowError::ParseError(format!(
960                    "parse decimal overflow ({s})"
961                )));
962            }
963            let mul = base.pow_wrapping(exp as _);
964            result = result.mul_wrapping(mul);
965        } else if digits > precision {
966            return Err(ArrowError::ParseError(format!(
967                "parse decimal overflow ({s})"
968            )));
969        }
970    }
971
972    Ok(if negative {
973        result.neg_wrapping()
974    } else {
975        result
976    })
977}
978
979/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
980pub fn parse_interval_year_month(
981    value: &str,
982) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
983    let config = IntervalParseConfig::new(IntervalUnit::Year);
984    let interval = Interval::parse(value, &config)?;
985
986    let months = interval.to_year_months().map_err(|_| {
987        ArrowError::CastError(format!(
988            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
989        ))
990    })?;
991
992    Ok(IntervalYearMonthType::make_value(0, months))
993}
994
995/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
996pub fn parse_interval_day_time(
997    value: &str,
998) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
999    let config = IntervalParseConfig::new(IntervalUnit::Day);
1000    let interval = Interval::parse(value, &config)?;
1001
1002    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1003        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1004    )))?;
1005
1006    Ok(IntervalDayTimeType::make_value(days, millis))
1007}
1008
1009/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1010pub fn parse_interval_month_day_nano_config(
1011    value: &str,
1012    config: IntervalParseConfig,
1013) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1014    let interval = Interval::parse(value, &config)?;
1015
1016    let (months, days, nanos) = interval.to_month_day_nanos();
1017
1018    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1019}
1020
1021/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1022pub fn parse_interval_month_day_nano(
1023    value: &str,
1024) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1025    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1026}
1027
1028const NANOS_PER_MILLIS: i64 = 1_000_000;
1029const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1030const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1031const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1032#[cfg(test)]
1033const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1034
1035/// Config to parse interval strings
1036///
1037/// Currently stores the `default_unit` to use if the string doesn't have one specified
1038#[derive(Debug, Clone)]
1039pub struct IntervalParseConfig {
1040    /// The default unit to use if none is specified
1041    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1042    default_unit: IntervalUnit,
1043}
1044
1045impl IntervalParseConfig {
1046    /// Create a new [IntervalParseConfig] with the given default unit
1047    pub fn new(default_unit: IntervalUnit) -> Self {
1048        Self { default_unit }
1049    }
1050}
1051
1052#[rustfmt::skip]
1053#[derive(Debug, Clone, Copy)]
1054#[repr(u16)]
1055/// Represents the units of an interval, with each variant
1056/// corresponding to a bit in the interval's bitfield representation
1057pub enum IntervalUnit {
1058    /// A Century
1059    Century     = 0b_0000_0000_0001,
1060    /// A Decade
1061    Decade      = 0b_0000_0000_0010,
1062    /// A Year
1063    Year        = 0b_0000_0000_0100,
1064    /// A Month
1065    Month       = 0b_0000_0000_1000,
1066    /// A Week
1067    Week        = 0b_0000_0001_0000,
1068    /// A Day
1069    Day         = 0b_0000_0010_0000,
1070    /// An Hour
1071    Hour        = 0b_0000_0100_0000,
1072    /// A Minute
1073    Minute      = 0b_0000_1000_0000,
1074    /// A Second
1075    Second      = 0b_0001_0000_0000,
1076    /// A Millisecond
1077    Millisecond = 0b_0010_0000_0000,
1078    /// A Microsecond
1079    Microsecond = 0b_0100_0000_0000,
1080    /// A Nanosecond
1081    Nanosecond  = 0b_1000_0000_0000,
1082}
1083
1084/// Logic for parsing interval unit strings
1085///
1086/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1087/// for a list of unit names supported by PostgreSQL which we try to match here.
1088impl FromStr for IntervalUnit {
1089    type Err = ArrowError;
1090
1091    fn from_str(s: &str) -> Result<Self, ArrowError> {
1092        match s.to_lowercase().as_str() {
1093            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1094            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1095            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1096            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1097            "w" | "week" | "weeks" => Ok(Self::Week),
1098            "d" | "day" | "days" => Ok(Self::Day),
1099            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1100            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1101            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1102            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1103                Ok(Self::Millisecond)
1104            }
1105            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1106                Ok(Self::Microsecond)
1107            }
1108            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1109            _ => Err(ArrowError::InvalidArgumentError(format!(
1110                "Unknown interval type: {s}"
1111            ))),
1112        }
1113    }
1114}
1115
1116impl IntervalUnit {
1117    fn from_str_or_config(
1118        s: Option<&str>,
1119        config: &IntervalParseConfig,
1120    ) -> Result<Self, ArrowError> {
1121        match s {
1122            Some(s) => s.parse(),
1123            None => Ok(config.default_unit),
1124        }
1125    }
1126}
1127
1128/// A tuple representing (months, days, nanoseconds) in an interval
1129pub type MonthDayNano = (i32, i32, i64);
1130
1131/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1132const INTERVAL_PRECISION: u32 = 15;
1133
1134#[derive(Clone, Copy, Debug, PartialEq)]
1135struct IntervalAmount {
1136    /// The integer component of the interval amount
1137    integer: i64,
1138    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1139    frac: i64,
1140}
1141
1142#[cfg(test)]
1143impl IntervalAmount {
1144    fn new(integer: i64, frac: i64) -> Self {
1145        Self { integer, frac }
1146    }
1147}
1148
1149impl FromStr for IntervalAmount {
1150    type Err = ArrowError;
1151
1152    fn from_str(s: &str) -> Result<Self, Self::Err> {
1153        match s.split_once('.') {
1154            Some((integer, frac))
1155                if frac.len() <= INTERVAL_PRECISION as usize
1156                    && !frac.is_empty()
1157                    && !frac.starts_with('-') =>
1158            {
1159                // integer will be "" for values like ".5"
1160                // and "-" for values like "-.5"
1161                let explicit_neg = integer.starts_with('-');
1162                let integer = if integer.is_empty() || integer == "-" {
1163                    Ok(0)
1164                } else {
1165                    integer.parse::<i64>().map_err(|_| {
1166                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1167                    })
1168                }?;
1169
1170                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1171                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1172                })?;
1173
1174                // scale fractional part by interval precision
1175                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1176
1177                // propagate the sign of the integer part to the fractional part
1178                let frac = if integer < 0 || explicit_neg {
1179                    -frac
1180                } else {
1181                    frac
1182                };
1183
1184                let result = Self { integer, frac };
1185
1186                Ok(result)
1187            }
1188            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1189                "Failed to parse {s} as interval amount"
1190            ))),
1191            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1192                Err(ArrowError::ParseError(format!(
1193                    "{s} exceeds the precision available for interval amount"
1194                )))
1195            }
1196            Some(_) | None => {
1197                let integer = s.parse::<i64>().map_err(|_| {
1198                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1199                })?;
1200
1201                let result = Self { integer, frac: 0 };
1202                Ok(result)
1203            }
1204        }
1205    }
1206}
1207
1208#[derive(Debug, Default, PartialEq)]
1209struct Interval {
1210    months: i32,
1211    days: i32,
1212    nanos: i64,
1213}
1214
1215impl Interval {
1216    fn new(months: i32, days: i32, nanos: i64) -> Self {
1217        Self {
1218            months,
1219            days,
1220            nanos,
1221        }
1222    }
1223
1224    fn to_year_months(&self) -> Result<i32, ArrowError> {
1225        match (self.months, self.days, self.nanos) {
1226            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1227            _ => Err(ArrowError::InvalidArgumentError(format!(
1228                "Unable to represent interval with days and nanos as year-months: {:?}",
1229                self
1230            ))),
1231        }
1232    }
1233
1234    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1235        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1236
1237        match self.nanos {
1238            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1239                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1240                    ArrowError::InvalidArgumentError(format!(
1241                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1242                        self.nanos
1243                    ))
1244                })?;
1245
1246                Ok((days, millis))
1247            }
1248            nanos => Err(ArrowError::InvalidArgumentError(format!(
1249                "Unable to represent {nanos} as milliseconds"
1250            ))),
1251        }
1252    }
1253
1254    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1255        (self.months, self.days, self.nanos)
1256    }
1257
1258    /// Parse string value in traditional Postgres format such as
1259    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1260    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1261        let components = parse_interval_components(value, config)?;
1262
1263        components
1264            .into_iter()
1265            .try_fold(Self::default(), |result, (amount, unit)| {
1266                result.add(amount, unit)
1267            })
1268    }
1269
1270    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1271    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1272    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1273    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1274    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1275    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1276    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1277        let result = match unit {
1278            IntervalUnit::Century => {
1279                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1280                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1281                let months = months_int
1282                    .add_checked(month_frac)?
1283                    .try_into()
1284                    .map_err(|_| {
1285                        ArrowError::ParseError(format!(
1286                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1287                            &amount.integer
1288                        ))
1289                    })?;
1290
1291                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1292            }
1293            IntervalUnit::Decade => {
1294                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1295
1296                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1297                let months = months_int
1298                    .add_checked(month_frac)?
1299                    .try_into()
1300                    .map_err(|_| {
1301                        ArrowError::ParseError(format!(
1302                            "Unable to represent {} decades as months in a signed 32-bit integer",
1303                            &amount.integer
1304                        ))
1305                    })?;
1306
1307                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1308            }
1309            IntervalUnit::Year => {
1310                let months_int = amount.integer.mul_checked(12)?;
1311                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1312                let months = months_int
1313                    .add_checked(month_frac)?
1314                    .try_into()
1315                    .map_err(|_| {
1316                        ArrowError::ParseError(format!(
1317                            "Unable to represent {} years as months in a signed 32-bit integer",
1318                            &amount.integer
1319                        ))
1320                    })?;
1321
1322                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1323            }
1324            IntervalUnit::Month => {
1325                let months = amount.integer.try_into().map_err(|_| {
1326                    ArrowError::ParseError(format!(
1327                        "Unable to represent {} months in a signed 32-bit integer",
1328                        &amount.integer
1329                    ))
1330                })?;
1331
1332                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1333                let days = days.try_into().map_err(|_| {
1334                    ArrowError::ParseError(format!(
1335                        "Unable to represent {} months as days in a signed 32-bit integer",
1336                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1337                    ))
1338                })?;
1339
1340                Self::new(
1341                    self.months.add_checked(months)?,
1342                    self.days.add_checked(days)?,
1343                    self.nanos,
1344                )
1345            }
1346            IntervalUnit::Week => {
1347                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1348                    ArrowError::ParseError(format!(
1349                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1350                        &amount.integer
1351                    ))
1352                })?;
1353
1354                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1355
1356                Self::new(
1357                    self.months,
1358                    self.days.add_checked(days)?,
1359                    self.nanos.add_checked(nanos)?,
1360                )
1361            }
1362            IntervalUnit::Day => {
1363                let days = amount.integer.try_into().map_err(|_| {
1364                    ArrowError::InvalidArgumentError(format!(
1365                        "Unable to represent {} days in a signed 32-bit integer",
1366                        amount.integer
1367                    ))
1368                })?;
1369
1370                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1371
1372                Self::new(
1373                    self.months,
1374                    self.days.add_checked(days)?,
1375                    self.nanos.add_checked(nanos)?,
1376                )
1377            }
1378            IntervalUnit::Hour => {
1379                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1380                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1381                let nanos = nanos_int.add_checked(nanos_frac)?;
1382
1383                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1384            }
1385            IntervalUnit::Minute => {
1386                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1387                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1388
1389                let nanos = nanos_int.add_checked(nanos_frac)?;
1390
1391                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1392            }
1393            IntervalUnit::Second => {
1394                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1395                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1396                let nanos = nanos_int.add_checked(nanos_frac)?;
1397
1398                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1399            }
1400            IntervalUnit::Millisecond => {
1401                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1402                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1403                let nanos = nanos_int.add_checked(nanos_frac)?;
1404
1405                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1406            }
1407            IntervalUnit::Microsecond => {
1408                let nanos_int = amount.integer.mul_checked(1_000)?;
1409                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1410                let nanos = nanos_int.add_checked(nanos_frac)?;
1411
1412                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1413            }
1414            IntervalUnit::Nanosecond => {
1415                let nanos_int = amount.integer;
1416                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1417                let nanos = nanos_int.add_checked(nanos_frac)?;
1418
1419                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1420            }
1421        };
1422
1423        Ok(result)
1424    }
1425}
1426
1427/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1428fn parse_interval_components(
1429    value: &str,
1430    config: &IntervalParseConfig,
1431) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1432    let raw_pairs = split_interval_components(value);
1433
1434    // parse amounts and units
1435    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1436        .iter()
1437        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1438        .collect()
1439    else {
1440        return Err(ArrowError::ParseError(format!(
1441            "Invalid input syntax for type interval: {value:?}"
1442        )));
1443    };
1444
1445    // collect parsed results
1446    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1447
1448    // duplicate units?
1449    let mut observed_interval_types = 0;
1450    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1451        if observed_interval_types & (*unit as u16) != 0 {
1452            return Err(ArrowError::ParseError(format!(
1453                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1454                value,
1455                raw_unit.unwrap_or_default(),
1456            )));
1457        }
1458
1459        observed_interval_types |= *unit as u16;
1460    }
1461
1462    let result = amounts.iter().copied().zip(units.iter().copied());
1463
1464    Ok(result.collect::<Vec<_>>())
1465}
1466
1467/// Split an interval into a vec of amounts and units.
1468///
1469/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1470///
1471/// This should match the behavior of PostgreSQL's interval parser.
1472fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1473    let mut result = vec![];
1474    let mut words = value.split(char::is_whitespace);
1475    while let Some(word) = words.next() {
1476        if let Some(split_word_at) = word.find(not_interval_amount) {
1477            let (amount, unit) = word.split_at(split_word_at);
1478            result.push((amount, Some(unit)));
1479        } else if let Some(unit) = words.next() {
1480            result.push((word, Some(unit)));
1481        } else {
1482            result.push((word, None));
1483            break;
1484        }
1485    }
1486    result
1487}
1488
1489/// test if a character is NOT part of an interval numeric amount
1490fn not_interval_amount(c: char) -> bool {
1491    !c.is_ascii_digit() && c != '.' && c != '-'
1492}
1493
1494#[cfg(test)]
1495mod tests {
1496    use super::*;
1497    use arrow_array::temporal_conversions::date32_to_datetime;
1498    use arrow_buffer::i256;
1499
1500    #[test]
1501    fn test_parse_nanos() {
1502        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1503        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1504        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1505    }
1506
1507    #[test]
1508    fn string_to_timestamp_timezone() {
1509        // Explicit timezone
1510        assert_eq!(
1511            1599572549190855000,
1512            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1513        );
1514        assert_eq!(
1515            1599572549190855000,
1516            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1517        );
1518        assert_eq!(
1519            1599572549000000000,
1520            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1521        ); // no fractional part
1522        assert_eq!(
1523            1599590549190855000,
1524            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1525        );
1526    }
1527
1528    #[test]
1529    fn string_to_timestamp_timezone_space() {
1530        // Ensure space rather than T between time and date is accepted
1531        assert_eq!(
1532            1599572549190855000,
1533            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1534        );
1535        assert_eq!(
1536            1599572549190855000,
1537            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1538        );
1539        assert_eq!(
1540            1599572549000000000,
1541            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1542        ); // no fractional part
1543        assert_eq!(
1544            1599590549190855000,
1545            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1546        );
1547    }
1548
1549    #[test]
1550    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1551    fn string_to_timestamp_no_timezone() {
1552        // This test is designed to succeed in regardless of the local
1553        // timezone the test machine is running. Thus it is still
1554        // somewhat susceptible to bugs in the use of chrono
1555        let naive_datetime = NaiveDateTime::new(
1556            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1557            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1558        );
1559
1560        // Ensure both T and ' ' variants work
1561        assert_eq!(
1562            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1563            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1564        );
1565
1566        assert_eq!(
1567            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1568            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1569        );
1570
1571        // Also ensure that parsing timestamps with no fractional
1572        // second part works as well
1573        let datetime_whole_secs = NaiveDateTime::new(
1574            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1575            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1576        )
1577        .and_utc();
1578
1579        // Ensure both T and ' ' variants work
1580        assert_eq!(
1581            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1582            parse_timestamp("2020-09-08T13:42:29").unwrap()
1583        );
1584
1585        assert_eq!(
1586            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1587            parse_timestamp("2020-09-08 13:42:29").unwrap()
1588        );
1589
1590        // ensure without time work
1591        // no time, should be the nano second at
1592        // 2020-09-08 0:0:0
1593        let datetime_no_time = NaiveDateTime::new(
1594            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1595            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1596        )
1597        .and_utc();
1598
1599        assert_eq!(
1600            datetime_no_time.timestamp_nanos_opt().unwrap(),
1601            parse_timestamp("2020-09-08").unwrap()
1602        )
1603    }
1604
1605    #[test]
1606    fn string_to_timestamp_chrono() {
1607        let cases = [
1608            "2020-09-08T13:42:29Z",
1609            "1969-01-01T00:00:00.1Z",
1610            "2020-09-08T12:00:12.12345678+00:00",
1611            "2020-09-08T12:00:12+00:00",
1612            "2020-09-08T12:00:12.1+00:00",
1613            "2020-09-08T12:00:12.12+00:00",
1614            "2020-09-08T12:00:12.123+00:00",
1615            "2020-09-08T12:00:12.1234+00:00",
1616            "2020-09-08T12:00:12.12345+00:00",
1617            "2020-09-08T12:00:12.123456+00:00",
1618            "2020-09-08T12:00:12.1234567+00:00",
1619            "2020-09-08T12:00:12.12345678+00:00",
1620            "2020-09-08T12:00:12.123456789+00:00",
1621            "2020-09-08T12:00:12.12345678912z",
1622            "2020-09-08T12:00:12.123456789123Z",
1623            "2020-09-08T12:00:12.123456789123+02:00",
1624            "2020-09-08T12:00:12.12345678912345Z",
1625            "2020-09-08T12:00:12.1234567891234567+02:00",
1626            "2020-09-08T12:00:60Z",
1627            "2020-09-08T12:00:60.123Z",
1628            "2020-09-08T12:00:60.123456+02:00",
1629            "2020-09-08T12:00:60.1234567891234567+02:00",
1630            "2020-09-08T12:00:60.999999999+02:00",
1631            "2020-09-08t12:00:12.12345678+00:00",
1632            "2020-09-08t12:00:12+00:00",
1633            "2020-09-08t12:00:12Z",
1634        ];
1635
1636        for case in cases {
1637            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1638            let chrono_utc = chrono.with_timezone(&Utc);
1639
1640            let custom = string_to_datetime(&Utc, case).unwrap();
1641            assert_eq!(chrono_utc, custom)
1642        }
1643    }
1644
1645    #[test]
1646    fn string_to_timestamp_naive() {
1647        let cases = [
1648            "2018-11-13T17:11:10.011375885995",
1649            "2030-12-04T17:11:10.123",
1650            "2030-12-04T17:11:10.1234",
1651            "2030-12-04T17:11:10.123456",
1652        ];
1653        for case in cases {
1654            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1655            let custom = string_to_datetime(&Utc, case).unwrap();
1656            assert_eq!(chrono, custom.naive_utc())
1657        }
1658    }
1659
1660    #[test]
1661    fn string_to_timestamp_invalid() {
1662        // Test parsing invalid formats
1663        let cases = [
1664            ("", "timestamp must contain at least 10 characters"),
1665            ("SS", "timestamp must contain at least 10 characters"),
1666            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1667            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1668            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1669            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1670            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1671            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1672            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1673            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1674            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1675            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1676            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1677            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1678            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1679            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1680            ("1997-01-31T092656.123Z", "error parsing time"),
1681            ("1997-01-10T12:00:06.", "error parsing time"),
1682            ("1997-01-10T12:00:06. ", "error parsing time"),
1683        ];
1684
1685        for (s, ctx) in cases {
1686            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1687            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1688            assert_eq!(actual, expected)
1689        }
1690    }
1691
1692    // Parse a timestamp to timestamp int with a useful human readable error message
1693    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1694        let result = string_to_timestamp_nanos(s);
1695        if let Err(e) = &result {
1696            eprintln!("Error parsing timestamp '{s}': {e:?}");
1697        }
1698        result
1699    }
1700
1701    #[test]
1702    fn string_without_timezone_to_timestamp() {
1703        // string without timezone should always output the same regardless the local or session timezone
1704
1705        let naive_datetime = NaiveDateTime::new(
1706            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1707            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1708        );
1709
1710        // Ensure both T and ' ' variants work
1711        assert_eq!(
1712            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1713            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1714        );
1715
1716        assert_eq!(
1717            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1718            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1719        );
1720
1721        let naive_datetime = NaiveDateTime::new(
1722            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1723            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1724        );
1725
1726        // Ensure both T and ' ' variants work
1727        assert_eq!(
1728            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1729            parse_timestamp("2020-09-08T13:42:29").unwrap()
1730        );
1731
1732        assert_eq!(
1733            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1734            parse_timestamp("2020-09-08 13:42:29").unwrap()
1735        );
1736
1737        let tz: Tz = "+02:00".parse().unwrap();
1738        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1739        let utc = date.naive_utc().to_string();
1740        assert_eq!(utc, "2020-09-08 11:42:29");
1741        let local = date.naive_local().to_string();
1742        assert_eq!(local, "2020-09-08 13:42:29");
1743
1744        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1745        let utc = date.naive_utc().to_string();
1746        assert_eq!(utc, "2020-09-08 13:42:29");
1747        let local = date.naive_local().to_string();
1748        assert_eq!(local, "2020-09-08 15:42:29");
1749
1750        let dt =
1751            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1752        let local: Tz = "+08:00".parse().unwrap();
1753
1754        // Parsed as offset from UTC
1755        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1756        assert_eq!(dt, date.naive_utc());
1757        assert_ne!(dt, date.naive_local());
1758
1759        // Parsed as offset from local
1760        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1761        assert_eq!(dt, date.naive_local());
1762        assert_ne!(dt, date.naive_utc());
1763    }
1764
1765    #[test]
1766    fn parse_date32() {
1767        let cases = [
1768            "2020-09-08",
1769            "2020-9-8",
1770            "2020-09-8",
1771            "2020-9-08",
1772            "2020-12-1",
1773            "1690-2-5",
1774            "2020-09-08 01:02:03",
1775        ];
1776        for case in cases {
1777            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1778            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1779                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1780                .unwrap();
1781            assert_eq!(v.date(), expected);
1782        }
1783
1784        let err_cases = [
1785            "",
1786            "80-01-01",
1787            "342",
1788            "Foo",
1789            "2020-09-08-03",
1790            "2020--04-03",
1791            "2020--",
1792            "2020-09-08 01",
1793            "2020-09-08 01:02",
1794            "2020-09-08 01-02-03",
1795            "2020-9-8 01:02:03",
1796            "2020-09-08 1:2:3",
1797        ];
1798        for case in err_cases {
1799            assert_eq!(Date32Type::parse(case), None);
1800        }
1801    }
1802
1803    #[test]
1804    fn parse_time64_nanos() {
1805        assert_eq!(
1806            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1807            Some(7_801_123_456_789)
1808        );
1809        assert_eq!(
1810            Time64NanosecondType::parse("02:10:01.1234567"),
1811            Some(7_801_123_456_700)
1812        );
1813        assert_eq!(
1814            Time64NanosecondType::parse("2:10:01.1234567"),
1815            Some(7_801_123_456_700)
1816        );
1817        assert_eq!(
1818            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1819            Some(601_123_456_789)
1820        );
1821        assert_eq!(
1822            Time64NanosecondType::parse("12:10:01.123456789 am"),
1823            Some(601_123_456_789)
1824        );
1825        assert_eq!(
1826            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1827            Some(51_001_123_456_780)
1828        );
1829        assert_eq!(
1830            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1831            Some(51_001_123_456_780)
1832        );
1833        assert_eq!(
1834            Time64NanosecondType::parse("02:10:01"),
1835            Some(7_801_000_000_000)
1836        );
1837        assert_eq!(
1838            Time64NanosecondType::parse("2:10:01"),
1839            Some(7_801_000_000_000)
1840        );
1841        assert_eq!(
1842            Time64NanosecondType::parse("12:10:01 AM"),
1843            Some(601_000_000_000)
1844        );
1845        assert_eq!(
1846            Time64NanosecondType::parse("12:10:01 am"),
1847            Some(601_000_000_000)
1848        );
1849        assert_eq!(
1850            Time64NanosecondType::parse("2:10:01 PM"),
1851            Some(51_001_000_000_000)
1852        );
1853        assert_eq!(
1854            Time64NanosecondType::parse("2:10:01 pm"),
1855            Some(51_001_000_000_000)
1856        );
1857        assert_eq!(
1858            Time64NanosecondType::parse("02:10"),
1859            Some(7_800_000_000_000)
1860        );
1861        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1862        assert_eq!(
1863            Time64NanosecondType::parse("12:10 AM"),
1864            Some(600_000_000_000)
1865        );
1866        assert_eq!(
1867            Time64NanosecondType::parse("12:10 am"),
1868            Some(600_000_000_000)
1869        );
1870        assert_eq!(
1871            Time64NanosecondType::parse("2:10 PM"),
1872            Some(51_000_000_000_000)
1873        );
1874        assert_eq!(
1875            Time64NanosecondType::parse("2:10 pm"),
1876            Some(51_000_000_000_000)
1877        );
1878
1879        // parse directly as nanoseconds
1880        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1881
1882        // leap second
1883        assert_eq!(
1884            Time64NanosecondType::parse("23:59:60"),
1885            Some(86_400_000_000_000)
1886        );
1887
1888        // custom format
1889        assert_eq!(
1890            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1891            Some(7_801_123_456_700)
1892        );
1893    }
1894
1895    #[test]
1896    fn parse_time64_micros() {
1897        // expected formats
1898        assert_eq!(
1899            Time64MicrosecondType::parse("02:10:01.1234"),
1900            Some(7_801_123_400)
1901        );
1902        assert_eq!(
1903            Time64MicrosecondType::parse("2:10:01.1234"),
1904            Some(7_801_123_400)
1905        );
1906        assert_eq!(
1907            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1908            Some(601_123_456)
1909        );
1910        assert_eq!(
1911            Time64MicrosecondType::parse("12:10:01.123456 am"),
1912            Some(601_123_456)
1913        );
1914        assert_eq!(
1915            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1916            Some(51_001_123_450)
1917        );
1918        assert_eq!(
1919            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1920            Some(51_001_123_450)
1921        );
1922        assert_eq!(
1923            Time64MicrosecondType::parse("02:10:01"),
1924            Some(7_801_000_000)
1925        );
1926        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1927        assert_eq!(
1928            Time64MicrosecondType::parse("12:10:01 AM"),
1929            Some(601_000_000)
1930        );
1931        assert_eq!(
1932            Time64MicrosecondType::parse("12:10:01 am"),
1933            Some(601_000_000)
1934        );
1935        assert_eq!(
1936            Time64MicrosecondType::parse("2:10:01 PM"),
1937            Some(51_001_000_000)
1938        );
1939        assert_eq!(
1940            Time64MicrosecondType::parse("2:10:01 pm"),
1941            Some(51_001_000_000)
1942        );
1943        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1944        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1945        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1946        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1947        assert_eq!(
1948            Time64MicrosecondType::parse("2:10 PM"),
1949            Some(51_000_000_000)
1950        );
1951        assert_eq!(
1952            Time64MicrosecondType::parse("2:10 pm"),
1953            Some(51_000_000_000)
1954        );
1955
1956        // parse directly as microseconds
1957        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1958
1959        // leap second
1960        assert_eq!(
1961            Time64MicrosecondType::parse("23:59:60"),
1962            Some(86_400_000_000)
1963        );
1964
1965        // custom format
1966        assert_eq!(
1967            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1968            Some(7_801_123_400)
1969        );
1970    }
1971
1972    #[test]
1973    fn parse_time32_millis() {
1974        // expected formats
1975        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1976        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1977        assert_eq!(
1978            Time32MillisecondType::parse("12:10:01.123 AM"),
1979            Some(601_123)
1980        );
1981        assert_eq!(
1982            Time32MillisecondType::parse("12:10:01.123 am"),
1983            Some(601_123)
1984        );
1985        assert_eq!(
1986            Time32MillisecondType::parse("2:10:01.12 PM"),
1987            Some(51_001_120)
1988        );
1989        assert_eq!(
1990            Time32MillisecondType::parse("2:10:01.12 pm"),
1991            Some(51_001_120)
1992        );
1993        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1994        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1995        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1996        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1997        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1998        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1999        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2000        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2001        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2002        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2003        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2004        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2005
2006        // parse directly as milliseconds
2007        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2008
2009        // leap second
2010        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2011
2012        // custom format
2013        assert_eq!(
2014            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2015            Some(7_801_100)
2016        );
2017    }
2018
2019    #[test]
2020    fn parse_time32_secs() {
2021        // expected formats
2022        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2023        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2024        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2025        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2026        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2027        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2028        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2029        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2030        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2031        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2032        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2033        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2034        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2035
2036        // parse directly as seconds
2037        assert_eq!(Time32SecondType::parse("1"), Some(1));
2038
2039        // leap second
2040        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2041
2042        // custom format
2043        assert_eq!(
2044            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2045            Some(7_801)
2046        );
2047    }
2048
2049    #[test]
2050    fn test_string_to_time_invalid() {
2051        let cases = [
2052            "25:00",
2053            "9:00:",
2054            "009:00",
2055            "09:0:00",
2056            "25:00:00",
2057            "13:00 AM",
2058            "13:00 PM",
2059            "12:00. AM",
2060            "09:0:00",
2061            "09:01:0",
2062            "09:01:1",
2063            "9:1:0",
2064            "09:01:0",
2065            "1:00.123",
2066            "1:00:00.123f",
2067            " 9:00:00",
2068            ":09:00",
2069            "T9:00:00",
2070            "AM",
2071        ];
2072        for case in cases {
2073            assert!(string_to_time(case).is_none(), "{case}");
2074        }
2075    }
2076
2077    #[test]
2078    fn test_string_to_time_chrono() {
2079        let cases = [
2080            ("1:00", "%H:%M"),
2081            ("12:00", "%H:%M"),
2082            ("13:00", "%H:%M"),
2083            ("24:00", "%H:%M"),
2084            ("1:00:00", "%H:%M:%S"),
2085            ("12:00:30", "%H:%M:%S"),
2086            ("13:00:59", "%H:%M:%S"),
2087            ("24:00:60", "%H:%M:%S"),
2088            ("09:00:00", "%H:%M:%S%.f"),
2089            ("0:00:30.123456", "%H:%M:%S%.f"),
2090            ("0:00 AM", "%I:%M %P"),
2091            ("1:00 AM", "%I:%M %P"),
2092            ("12:00 AM", "%I:%M %P"),
2093            ("13:00 AM", "%I:%M %P"),
2094            ("0:00 PM", "%I:%M %P"),
2095            ("1:00 PM", "%I:%M %P"),
2096            ("12:00 PM", "%I:%M %P"),
2097            ("13:00 PM", "%I:%M %P"),
2098            ("1:00 pM", "%I:%M %P"),
2099            ("1:00 Pm", "%I:%M %P"),
2100            ("1:00 aM", "%I:%M %P"),
2101            ("1:00 Am", "%I:%M %P"),
2102            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2103            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2104            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2105            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2106            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2107            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2108            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2109        ];
2110        for (s, format) in cases {
2111            let chrono = NaiveTime::parse_from_str(s, format).ok();
2112            let custom = string_to_time(s);
2113            assert_eq!(chrono, custom, "{s}");
2114        }
2115    }
2116
2117    #[test]
2118    fn test_parse_interval() {
2119        let config = IntervalParseConfig::new(IntervalUnit::Month);
2120
2121        assert_eq!(
2122            Interval::new(1i32, 0i32, 0i64),
2123            Interval::parse("1 month", &config).unwrap(),
2124        );
2125
2126        assert_eq!(
2127            Interval::new(2i32, 0i32, 0i64),
2128            Interval::parse("2 month", &config).unwrap(),
2129        );
2130
2131        assert_eq!(
2132            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2133            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2134        );
2135
2136        assert_eq!(
2137            Interval::new(0i32, 15i32, 0),
2138            Interval::parse("0.5 months", &config).unwrap(),
2139        );
2140
2141        assert_eq!(
2142            Interval::new(0i32, 15i32, 0),
2143            Interval::parse(".5 months", &config).unwrap(),
2144        );
2145
2146        assert_eq!(
2147            Interval::new(0i32, -15i32, 0),
2148            Interval::parse("-0.5 months", &config).unwrap(),
2149        );
2150
2151        assert_eq!(
2152            Interval::new(0i32, -15i32, 0),
2153            Interval::parse("-.5 months", &config).unwrap(),
2154        );
2155
2156        assert_eq!(
2157            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2158            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2159        );
2160
2161        assert_eq!(
2162            Interval::parse("1 centurys 1 month", &config)
2163                .unwrap_err()
2164                .to_string(),
2165            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2166        );
2167
2168        assert_eq!(
2169            Interval::new(37i32, 0i32, 0i64),
2170            Interval::parse("3 year 1 month", &config).unwrap(),
2171        );
2172
2173        assert_eq!(
2174            Interval::new(35i32, 0i32, 0i64),
2175            Interval::parse("3 year -1 month", &config).unwrap(),
2176        );
2177
2178        assert_eq!(
2179            Interval::new(-37i32, 0i32, 0i64),
2180            Interval::parse("-3 year -1 month", &config).unwrap(),
2181        );
2182
2183        assert_eq!(
2184            Interval::new(-35i32, 0i32, 0i64),
2185            Interval::parse("-3 year 1 month", &config).unwrap(),
2186        );
2187
2188        assert_eq!(
2189            Interval::new(0i32, 5i32, 0i64),
2190            Interval::parse("5 days", &config).unwrap(),
2191        );
2192
2193        assert_eq!(
2194            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2195            Interval::parse("7 days 3 hours", &config).unwrap(),
2196        );
2197
2198        assert_eq!(
2199            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2200            Interval::parse("7 days 5 minutes", &config).unwrap(),
2201        );
2202
2203        assert_eq!(
2204            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2205            Interval::parse("7 days -5 minutes", &config).unwrap(),
2206        );
2207
2208        assert_eq!(
2209            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2210            Interval::parse("-7 days 5 hours", &config).unwrap(),
2211        );
2212
2213        assert_eq!(
2214            Interval::new(
2215                0i32,
2216                -7i32,
2217                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2218            ),
2219            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2220        );
2221
2222        assert_eq!(
2223            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2224            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2225        );
2226
2227        assert_eq!(
2228            Interval::new(
2229                12i32,
2230                1i32,
2231                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2232            ),
2233            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2234        );
2235
2236        assert_eq!(
2237            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2238            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2239        );
2240
2241        assert_eq!(
2242            Interval::new(12i32, 1i32, 1000i64),
2243            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2244        );
2245
2246        assert_eq!(
2247            Interval::new(12i32, 1i32, 1i64),
2248            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2249        );
2250
2251        assert_eq!(
2252            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2253            Interval::parse("1 month -1 second", &config).unwrap(),
2254        );
2255
2256        assert_eq!(
2257            Interval::new(
2258                -13i32,
2259                -8i32,
2260                -NANOS_PER_HOUR
2261                    - NANOS_PER_MINUTE
2262                    - NANOS_PER_SECOND
2263                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2264            ),
2265            Interval::parse(
2266                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2267                &config
2268            )
2269            .unwrap(),
2270        );
2271
2272        // no units
2273        assert_eq!(
2274            Interval::new(1, 0, 0),
2275            Interval::parse("1", &config).unwrap()
2276        );
2277        assert_eq!(
2278            Interval::new(42, 0, 0),
2279            Interval::parse("42", &config).unwrap()
2280        );
2281        assert_eq!(
2282            Interval::new(0, 0, 42_000_000_000),
2283            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2284        );
2285
2286        // shorter units
2287        assert_eq!(
2288            Interval::new(1, 0, 0),
2289            Interval::parse("1 mon", &config).unwrap()
2290        );
2291        assert_eq!(
2292            Interval::new(1, 0, 0),
2293            Interval::parse("1 mons", &config).unwrap()
2294        );
2295        assert_eq!(
2296            Interval::new(0, 0, 1_000_000),
2297            Interval::parse("1 ms", &config).unwrap()
2298        );
2299        assert_eq!(
2300            Interval::new(0, 0, 1_000),
2301            Interval::parse("1 us", &config).unwrap()
2302        );
2303
2304        // no space
2305        assert_eq!(
2306            Interval::new(0, 0, 1_000),
2307            Interval::parse("1us", &config).unwrap()
2308        );
2309        assert_eq!(
2310            Interval::new(0, 0, NANOS_PER_SECOND),
2311            Interval::parse("1s", &config).unwrap()
2312        );
2313        assert_eq!(
2314            Interval::new(1, 2, 10_864_000_000_000),
2315            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2316        );
2317
2318        assert_eq!(
2319            Interval::new(
2320                -13i32,
2321                -8i32,
2322                -NANOS_PER_HOUR
2323                    - NANOS_PER_MINUTE
2324                    - NANOS_PER_SECOND
2325                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2326            ),
2327            Interval::parse(
2328                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2329                &config
2330            )
2331            .unwrap(),
2332        );
2333
2334        assert_eq!(
2335            Interval::parse("1h s", &config).unwrap_err().to_string(),
2336            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2337        );
2338
2339        assert_eq!(
2340            Interval::parse("1XX", &config).unwrap_err().to_string(),
2341            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2342        );
2343    }
2344
2345    #[test]
2346    fn test_duplicate_interval_type() {
2347        let config = IntervalParseConfig::new(IntervalUnit::Month);
2348
2349        let err = Interval::parse("1 month 1 second 1 second", &config)
2350            .expect_err("parsing interval should have failed");
2351        assert_eq!(
2352            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2353            format!("{err:?}")
2354        );
2355
2356        // test with singular and plural forms
2357        let err = Interval::parse("1 century 2 centuries", &config)
2358            .expect_err("parsing interval should have failed");
2359        assert_eq!(
2360            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2361            format!("{err:?}")
2362        );
2363    }
2364
2365    #[test]
2366    fn test_interval_amount_parsing() {
2367        // integer
2368        let result = IntervalAmount::from_str("123").unwrap();
2369        let expected = IntervalAmount::new(123, 0);
2370
2371        assert_eq!(result, expected);
2372
2373        // positive w/ fractional
2374        let result = IntervalAmount::from_str("0.3").unwrap();
2375        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2376
2377        assert_eq!(result, expected);
2378
2379        // negative w/ fractional
2380        let result = IntervalAmount::from_str("-3.5").unwrap();
2381        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2382
2383        assert_eq!(result, expected);
2384
2385        // invalid: missing fractional
2386        let result = IntervalAmount::from_str("3.");
2387        assert!(result.is_err());
2388
2389        // invalid: sign in fractional
2390        let result = IntervalAmount::from_str("3.-5");
2391        assert!(result.is_err());
2392    }
2393
2394    #[test]
2395    fn test_interval_precision() {
2396        let config = IntervalParseConfig::new(IntervalUnit::Month);
2397
2398        let result = Interval::parse("100000.1 days", &config).unwrap();
2399        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2400
2401        assert_eq!(result, expected);
2402    }
2403
2404    #[test]
2405    fn test_interval_addition() {
2406        // add 4.1 centuries
2407        let start = Interval::new(1, 2, 3);
2408        let expected = Interval::new(4921, 2, 3);
2409
2410        let result = start
2411            .add(
2412                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2413                IntervalUnit::Century,
2414            )
2415            .unwrap();
2416
2417        assert_eq!(result, expected);
2418
2419        // add 10.25 decades
2420        let start = Interval::new(1, 2, 3);
2421        let expected = Interval::new(1231, 2, 3);
2422
2423        let result = start
2424            .add(
2425                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2426                IntervalUnit::Decade,
2427            )
2428            .unwrap();
2429
2430        assert_eq!(result, expected);
2431
2432        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2433        let start = Interval::new(1, 2, 3);
2434        let expected = Interval::new(364, 2, 3);
2435
2436        let result = start
2437            .add(
2438                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2439                IntervalUnit::Year,
2440            )
2441            .unwrap();
2442
2443        assert_eq!(result, expected);
2444
2445        // add 1.5 months
2446        let start = Interval::new(1, 2, 3);
2447        let expected = Interval::new(2, 17, 3);
2448
2449        let result = start
2450            .add(
2451                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2452                IntervalUnit::Month,
2453            )
2454            .unwrap();
2455
2456        assert_eq!(result, expected);
2457
2458        // add -2 weeks
2459        let start = Interval::new(1, 25, 3);
2460        let expected = Interval::new(1, 11, 3);
2461
2462        let result = start
2463            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2464            .unwrap();
2465
2466        assert_eq!(result, expected);
2467
2468        // add 2.2 days
2469        let start = Interval::new(12, 15, 3);
2470        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2471
2472        let result = start
2473            .add(
2474                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2475                IntervalUnit::Day,
2476            )
2477            .unwrap();
2478
2479        assert_eq!(result, expected);
2480
2481        // add 12.5 hours
2482        let start = Interval::new(1, 2, 3);
2483        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2484
2485        let result = start
2486            .add(
2487                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2488                IntervalUnit::Hour,
2489            )
2490            .unwrap();
2491
2492        assert_eq!(result, expected);
2493
2494        // add -1.5 minutes
2495        let start = Interval::new(0, 0, -3);
2496        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2497
2498        let result = start
2499            .add(
2500                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2501                IntervalUnit::Minute,
2502            )
2503            .unwrap();
2504
2505        assert_eq!(result, expected);
2506    }
2507
2508    #[test]
2509    fn string_to_timestamp_old() {
2510        parse_timestamp("1677-06-14T07:29:01.256")
2511            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2512            .unwrap_err();
2513    }
2514
2515    #[test]
2516    fn test_parse_decimal_with_parameter() {
2517        let tests = [
2518            ("0", 0i128),
2519            ("123.123", 123123i128),
2520            ("123.1234", 123123i128),
2521            ("123.1", 123100i128),
2522            ("123", 123000i128),
2523            ("-123.123", -123123i128),
2524            ("-123.1234", -123123i128),
2525            ("-123.1", -123100i128),
2526            ("-123", -123000i128),
2527            ("0.0000123", 0i128),
2528            ("12.", 12000i128),
2529            ("-12.", -12000i128),
2530            ("00.1", 100i128),
2531            ("-00.1", -100i128),
2532            ("12345678912345678.1234", 12345678912345678123i128),
2533            ("-12345678912345678.1234", -12345678912345678123i128),
2534            ("99999999999999999.999", 99999999999999999999i128),
2535            ("-99999999999999999.999", -99999999999999999999i128),
2536            (".123", 123i128),
2537            ("-.123", -123i128),
2538            ("123.", 123000i128),
2539            ("-123.", -123000i128),
2540        ];
2541        for (s, i) in tests {
2542            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2543            assert_eq!(i, result_128.unwrap());
2544            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2545            assert_eq!(i256::from_i128(i), result_256.unwrap());
2546        }
2547
2548        let e_notation_tests = [
2549            ("1.23e3", "1230.0", 2),
2550            ("5.6714e+2", "567.14", 4),
2551            ("5.6714e-2", "0.056714", 4),
2552            ("5.6714e-2", "0.056714", 3),
2553            ("5.6741214125e2", "567.41214125", 4),
2554            ("8.91E4", "89100.0", 2),
2555            ("3.14E+5", "314000.0", 2),
2556            ("2.718e0", "2.718", 2),
2557            ("9.999999e-1", "0.9999999", 4),
2558            ("1.23e+3", "1230", 2),
2559            ("1.234559e+3", "1234.559", 2),
2560            ("1.00E-10", "0.0000000001", 11),
2561            ("1.23e-4", "0.000123", 2),
2562            ("9.876e7", "98760000.0", 2),
2563            ("5.432E+8", "543200000.0", 10),
2564            ("1.234567e9", "1234567000.0", 2),
2565            ("1.234567e2", "123.45670000", 2),
2566            ("4749.3e-5", "0.047493", 10),
2567            ("4749.3e+5", "474930000", 10),
2568            ("4749.3e-5", "0.047493", 1),
2569            ("4749.3e+5", "474930000", 1),
2570            ("0E-8", "0", 10),
2571            ("0E+6", "0", 10),
2572            ("1E-8", "0.00000001", 10),
2573            ("12E+6", "12000000", 10),
2574            ("12E-6", "0.000012", 10),
2575            ("0.1e-6", "0.0000001", 10),
2576            ("0.1e+6", "100000", 10),
2577            ("0.12e-6", "0.00000012", 10),
2578            ("0.12e+6", "120000", 10),
2579            ("000000000001e0", "000000000001", 3),
2580            ("000001.1034567002e0", "000001.1034567002", 3),
2581            ("1.234e16", "12340000000000000", 0),
2582            ("123.4e16", "1234000000000000000", 0),
2583        ];
2584        for (e, d, scale) in e_notation_tests {
2585            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2586            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2587            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2588            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2589            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2590            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2591        }
2592        let can_not_parse_tests = [
2593            "123,123",
2594            ".",
2595            "123.123.123",
2596            "",
2597            "+",
2598            "-",
2599            "e",
2600            "1.3e+e3",
2601            "5.6714ee-2",
2602            "4.11ee-+4",
2603            "4.11e++4",
2604            "1.1e.12",
2605            "1.23e+3.",
2606            "1.23e+3.1",
2607        ];
2608        for s in can_not_parse_tests {
2609            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2610            assert_eq!(
2611                format!("Parser error: can't parse the string value {s} to decimal"),
2612                result_128.unwrap_err().to_string()
2613            );
2614            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2615            assert_eq!(
2616                format!("Parser error: can't parse the string value {s} to decimal"),
2617                result_256.unwrap_err().to_string()
2618            );
2619        }
2620        let overflow_parse_tests = [
2621            ("12345678", 3),
2622            ("1.2345678e7", 3),
2623            ("12345678.9", 3),
2624            ("1.23456789e+7", 3),
2625            ("99999999.99", 3),
2626            ("9.999999999e7", 3),
2627            ("12345678908765.123456", 3),
2628            ("123456789087651234.56e-4", 3),
2629            ("1234560000000", 0),
2630            ("1.23456e12", 0),
2631        ];
2632        for (s, scale) in overflow_parse_tests {
2633            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2634            let expected_128 = "Parser error: parse decimal overflow";
2635            let actual_128 = result_128.unwrap_err().to_string();
2636
2637            assert!(
2638                actual_128.contains(expected_128),
2639                "actual: '{actual_128}', expected: '{expected_128}'"
2640            );
2641
2642            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2643            let expected_256 = "Parser error: parse decimal overflow";
2644            let actual_256 = result_256.unwrap_err().to_string();
2645
2646            assert!(
2647                actual_256.contains(expected_256),
2648                "actual: '{actual_256}', expected: '{expected_256}'"
2649            );
2650        }
2651
2652        let edge_tests_128 = [
2653            (
2654                "99999999999999999999999999999999999999",
2655                99999999999999999999999999999999999999i128,
2656                0,
2657            ),
2658            (
2659                "999999999999999999999999999999999999.99",
2660                99999999999999999999999999999999999999i128,
2661                2,
2662            ),
2663            (
2664                "9999999999999999999999999.9999999999999",
2665                99999999999999999999999999999999999999i128,
2666                13,
2667            ),
2668            (
2669                "9999999999999999999999999",
2670                99999999999999999999999990000000000000i128,
2671                13,
2672            ),
2673            (
2674                "0.99999999999999999999999999999999999999",
2675                99999999999999999999999999999999999999i128,
2676                38,
2677            ),
2678            (
2679                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2680                0i128,
2681                15,
2682            ),
2683            (
2684                "1.016744e-320",
2685                0i128,
2686                15,
2687            ),
2688            (
2689                "-1e3",
2690                -1000000000i128,
2691                6,
2692            ),
2693            (
2694                "+1e3",
2695                1000000000i128,
2696                6,
2697            ),
2698            (
2699                "-1e31",
2700                -10000000000000000000000000000000000000i128,
2701                6,
2702            ),
2703        ];
2704        for (s, i, scale) in edge_tests_128 {
2705            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2706            assert_eq!(i, result_128.unwrap());
2707        }
2708        let edge_tests_256 = [
2709            (
2710                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2711                i256::from_string(
2712                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2713                )
2714                .unwrap(),
2715                0,
2716            ),
2717            (
2718                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2719                i256::from_string(
2720                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2721                )
2722                .unwrap(),
2723                4,
2724            ),
2725            (
2726                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2727                i256::from_string(
2728                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2729                )
2730                .unwrap(),
2731                26,
2732            ),
2733            (
2734                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2735                i256::from_string(
2736                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2737                )
2738                .unwrap(),
2739                26,
2740            ),
2741            (
2742                "99999999999999999999999999999999999999999999999999",
2743                i256::from_string(
2744                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2745                )
2746                .unwrap(),
2747                26,
2748            ),
2749            (
2750                "9.9999999999999999999999999999999999999999999999999e+49",
2751                i256::from_string(
2752                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2753                )
2754                .unwrap(),
2755                26,
2756            ),
2757        ];
2758        for (s, i, scale) in edge_tests_256 {
2759            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2760            assert_eq!(i, result.unwrap());
2761        }
2762    }
2763
2764    #[test]
2765    fn test_parse_empty() {
2766        assert_eq!(Int32Type::parse(""), None);
2767        assert_eq!(Int64Type::parse(""), None);
2768        assert_eq!(UInt32Type::parse(""), None);
2769        assert_eq!(UInt64Type::parse(""), None);
2770        assert_eq!(Float32Type::parse(""), None);
2771        assert_eq!(Float64Type::parse(""), None);
2772        assert_eq!(Int32Type::parse("+"), None);
2773        assert_eq!(Int64Type::parse("+"), None);
2774        assert_eq!(UInt32Type::parse("+"), None);
2775        assert_eq!(UInt64Type::parse("+"), None);
2776        assert_eq!(Float32Type::parse("+"), None);
2777        assert_eq!(Float64Type::parse("+"), None);
2778        assert_eq!(TimestampNanosecondType::parse(""), None);
2779        assert_eq!(Date32Type::parse(""), None);
2780    }
2781
2782    #[test]
2783    fn test_parse_interval_month_day_nano_config() {
2784        let interval = parse_interval_month_day_nano_config(
2785            "1",
2786            IntervalParseConfig::new(IntervalUnit::Second),
2787        )
2788        .unwrap();
2789        assert_eq!(interval.months, 0);
2790        assert_eq!(interval.days, 0);
2791        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2792    }
2793}
arrow_cast/parse.rs

arrow_cast/
parse.rs