Skip to main content

arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::ArrowNativeTypeOp;
22use arrow_array::timezone::Tz;
23use arrow_array::types::*;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466macro_rules! parser_primitive {
467    ($t:ty) => {
468        impl Parser for $t {
469            fn parse(string: &str) -> Option<Self::Native> {
470                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471                    return None;
472                }
473                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474                    string.as_bytes(),
475                ) {
476                    (Some(n), x) if x == string.len() => Some(n),
477                    _ => None,
478                }
479            }
480        }
481    };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497    fn parse(string: &str) -> Option<i64> {
498        string_to_timestamp_nanos(string).ok()
499    }
500}
501
502impl Parser for TimestampMicrosecondType {
503    fn parse(string: &str) -> Option<i64> {
504        let nanos = string_to_timestamp_nanos(string).ok();
505        nanos.map(|x| x / 1000)
506    }
507}
508
509impl Parser for TimestampMillisecondType {
510    fn parse(string: &str) -> Option<i64> {
511        let nanos = string_to_timestamp_nanos(string).ok();
512        nanos.map(|x| x / 1_000_000)
513    }
514}
515
516impl Parser for TimestampSecondType {
517    fn parse(string: &str) -> Option<i64> {
518        let nanos = string_to_timestamp_nanos(string).ok();
519        nanos.map(|x| x / 1_000_000_000)
520    }
521}
522
523impl Parser for Time64NanosecondType {
524    // Will truncate any fractions of a nanosecond
525    fn parse(string: &str) -> Option<Self::Native> {
526        string_to_time_nanoseconds(string)
527            .ok()
528            .or_else(|| string.parse::<Self::Native>().ok())
529    }
530
531    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532        let nt = NaiveTime::parse_from_str(string, format).ok()?;
533        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534    }
535}
536
537impl Parser for Time64MicrosecondType {
538    // Will truncate any fractions of a microsecond
539    fn parse(string: &str) -> Option<Self::Native> {
540        string_to_time_nanoseconds(string)
541            .ok()
542            .map(|nanos| nanos / 1_000)
543            .or_else(|| string.parse::<Self::Native>().ok())
544    }
545
546    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547        let nt = NaiveTime::parse_from_str(string, format).ok()?;
548        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549    }
550}
551
552impl Parser for Time32MillisecondType {
553    // Will truncate any fractions of a millisecond
554    fn parse(string: &str) -> Option<Self::Native> {
555        string_to_time_nanoseconds(string)
556            .ok()
557            .map(|nanos| (nanos / 1_000_000) as i32)
558            .or_else(|| string.parse::<Self::Native>().ok())
559    }
560
561    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562        let nt = NaiveTime::parse_from_str(string, format).ok()?;
563        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564    }
565}
566
567impl Parser for Time32SecondType {
568    // Will truncate any fractions of a second
569    fn parse(string: &str) -> Option<Self::Native> {
570        string_to_time_nanoseconds(string)
571            .ok()
572            .map(|nanos| (nanos / 1_000_000_000) as i32)
573            .or_else(|| string.parse::<Self::Native>().ok())
574    }
575
576    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577        let nt = NaiveTime::parse_from_str(string, format).ok()?;
578        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579    }
580}
581
582/// Number of days between 0001-01-01 and 1970-01-01
583const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585/// Error message if nanosecond conversion request beyond supported interval
586const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588/// Parse the ISO 8601 signed extended-year form (`±YYYY[Y...]-MM-DD`) into
589/// raw `(year, month, day)` components, without validating the calendar date.
590///
591/// The caller must have already verified that `string` begins with `+` or `-`;
592/// the year must have at least 4 digits. Returns `None` if the shape is
593/// malformed or any component fails to parse numerically.
594fn parse_extended_ymd(string: &str) -> Option<(i32, u32, u32)> {
595    debug_assert!(string.starts_with('+') || string.starts_with('-'));
596    // Skip the sign and look for the hyphen that terminates the year digits.
597    // Per ISO 8601 the unsigned year part must be at least 4 digits.
598    let rest = &string[1..];
599    let hyphen = rest.find('-')?;
600    if hyphen < 4 {
601        return None;
602    }
603    // The year substring is the sign and the digits (but not the separator),
604    // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999".
605    let year: i32 = string[..hyphen + 1].parse().ok()?;
606    // The remainder should begin with a '-' which we strip off, leaving the month-day part.
607    let remainder = string[hyphen + 1..].strip_prefix('-')?;
608    let mut parts = remainder.splitn(2, '-');
609    let month: u32 = parts.next()?.parse().ok()?;
610    let day: u32 = parts.next()?.parse().ok()?;
611    Some((year, month, day))
612}
613
614fn parse_date(string: &str) -> Option<NaiveDate> {
615    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
616    //
617    // According to [ISO 8601], years have:
618    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
619    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
620    //
621    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
622    if string.starts_with('+') || string.starts_with('-') {
623        let (year, month, day) = parse_extended_ymd(string)?;
624        return NaiveDate::from_ymd_opt(year, month, day);
625    }
626
627    if string.len() > 10 {
628        // Try to parse as datetime and return just the date part
629        return string_to_datetime(&Utc, string)
630            .map(|dt| dt.date_naive())
631            .ok();
632    };
633    let mut digits = [0; 10];
634    let mut mask = 0;
635
636    // Treating all bytes the same way, helps LLVM vectorise this correctly
637    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
638        *o = i.wrapping_sub(b'0');
639        mask |= ((*o < 10) as u16) << idx
640    }
641
642    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
643
644    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
645    if digits[4] != HYPHEN {
646        let (year, month, day) = match (mask, string.len()) {
647            (0b11111111, 8) => (
648                digits[0] as u16 * 1000
649                    + digits[1] as u16 * 100
650                    + digits[2] as u16 * 10
651                    + digits[3] as u16,
652                digits[4] * 10 + digits[5],
653                digits[6] * 10 + digits[7],
654            ),
655            _ => return None,
656        };
657        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
658    }
659
660    let (month, day) = match mask {
661        0b1101101111 => {
662            if digits[7] != HYPHEN {
663                return None;
664            }
665            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
666        }
667        0b101101111 => {
668            if digits[7] != HYPHEN {
669                return None;
670            }
671            (digits[5] * 10 + digits[6], digits[8])
672        }
673        0b110101111 => {
674            if digits[6] != HYPHEN {
675                return None;
676            }
677            (digits[5], digits[7] * 10 + digits[8])
678        }
679        0b10101111 => {
680            if digits[6] != HYPHEN {
681                return None;
682            }
683            (digits[5], digits[7])
684        }
685        _ => return None,
686    };
687
688    let year =
689        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
690
691    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
692}
693
694/// Parse a date string into days since 1970-01-01, covering the full
695/// `Date32` range (years ≈ ±5,881,580) for the signed extended-year form.
696///
697/// The Gregorian calendar repeats exactly every 400 years (146,097 days), so
698/// we fold the year into `[0, 400)`, validate the folded date, and add
699/// `era * 146_097` to recover the absolute day count.
700///
701/// For all other inputs, behavior matches [`parse_date`].
702fn parse_date_to_days(string: &str) -> Option<i32> {
703    if string.starts_with('+') || string.starts_with('-') {
704        let (year, month, day) = parse_extended_ymd(string)?;
705        let y = year as i64;
706        let era = y.div_euclid(400);
707        let yoe = y.rem_euclid(400) as i32;
708        let nd = NaiveDate::from_ymd_opt(yoe, month, day)?;
709        let in_era = (nd.num_days_from_ce() - EPOCH_DAYS_FROM_CE) as i64;
710        return i32::try_from(era * 146_097 + in_era).ok();
711    }
712    parse_date(string).map(|nd| nd.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
713}
714
715impl Parser for Date32Type {
716    fn parse(string: &str) -> Option<i32> {
717        parse_date_to_days(string)
718    }
719
720    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
721        let date = NaiveDate::parse_from_str(string, format).ok()?;
722        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
723    }
724}
725
726impl Parser for Date64Type {
727    fn parse(string: &str) -> Option<i64> {
728        if string.len() <= 10 {
729            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
730            Some(datetime.and_utc().timestamp_millis())
731        } else {
732            let date_time = string_to_datetime(&Utc, string).ok()?;
733            Some(date_time.timestamp_millis())
734        }
735    }
736
737    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
738        use chrono::format::Fixed;
739        use chrono::format::StrftimeItems;
740        let fmt = StrftimeItems::new(format);
741        let has_zone = fmt.into_iter().any(|item| match item {
742            chrono::format::Item::Fixed(fixed_item) => matches!(
743                fixed_item,
744                Fixed::RFC2822
745                    | Fixed::RFC3339
746                    | Fixed::TimezoneName
747                    | Fixed::TimezoneOffsetColon
748                    | Fixed::TimezoneOffsetColonZ
749                    | Fixed::TimezoneOffset
750                    | Fixed::TimezoneOffsetZ
751            ),
752            _ => false,
753        });
754        if has_zone {
755            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
756            Some(date_time.timestamp_millis())
757        } else {
758            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
759            Some(date_time.and_utc().timestamp_millis())
760        }
761    }
762}
763
764fn parse_e_notation<T: DecimalType>(
765    s: &str,
766    mut digits: u16,
767    mut fractionals: i16,
768    mut result: T::Native,
769    index: usize,
770    precision: u16,
771    scale: i16,
772) -> Result<T::Native, ArrowError> {
773    let mut exp: i16 = 0;
774    let base = T::Native::usize_as(10);
775
776    // e has a plus sign
777    let mut pos_shift_direction: bool = true;
778
779    // skip to the exponent index directly or just after any processed fractionals
780    let mut bs = s.as_bytes().iter().skip(index + fractionals as usize);
781
782    // This function is only called from `parse_decimal`, in which we skip parsing any fractionals
783    // after we reach `scale` digits, not knowing ahead of time whether the decimal contains an
784    // e-notation or not.
785    // So once we do hit into an e-notation, and drop down into this function, we need to parse the
786    // remaining unprocessed fractionals too, since otherwise we might lose precision.
787    for b in bs.by_ref() {
788        match b {
789            b'0'..=b'9' => {
790                result = result.mul_wrapping(base);
791                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
792                fractionals += 1;
793                digits += 1;
794            }
795            b'e' | b'E' => {
796                break;
797            }
798            _ => {
799                return Err(ArrowError::ParseError(format!(
800                    "can't parse the string value {s} to decimal"
801                )));
802            }
803        };
804    }
805
806    // parse the exponent itself
807    let mut signed = false;
808    for b in bs {
809        match b {
810            b'-' if !signed => {
811                pos_shift_direction = false;
812                signed = true;
813            }
814            b'+' if !signed => {
815                pos_shift_direction = true;
816                signed = true;
817            }
818            b if b.is_ascii_digit() => {
819                exp *= 10;
820                exp += (b - b'0') as i16;
821            }
822            _ => {
823                return Err(ArrowError::ParseError(format!(
824                    "can't parse the string value {s} to decimal"
825                )));
826            }
827        }
828    }
829
830    if digits == 0 && fractionals == 0 && exp == 0 {
831        return Err(ArrowError::ParseError(format!(
832            "can't parse the string value {s} to decimal"
833        )));
834    }
835
836    if !pos_shift_direction {
837        // exponent has a large negative sign
838        // 1.12345e-30 => 0.0{29}12345, scale = 5
839        if exp - (digits as i16 + scale) > 0 {
840            return Ok(T::Native::usize_as(0));
841        }
842        exp *= -1;
843    }
844
845    // point offset
846    exp = fractionals - exp;
847    // We have zeros on the left, we need to count them
848    if !pos_shift_direction && exp > digits as i16 {
849        digits = exp as u16;
850    }
851    // Number of numbers to be removed or added
852    exp = scale - exp;
853
854    if (digits as i16 + exp) as u16 > precision {
855        return Err(ArrowError::ParseError(format!(
856            "parse decimal overflow ({s})"
857        )));
858    }
859
860    if exp < 0 {
861        result = result.div_wrapping(base.pow_wrapping(-exp as _));
862    } else {
863        result = result.mul_wrapping(base.pow_wrapping(exp as _));
864    }
865
866    Ok(result)
867}
868
869/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
870/// Expected behavior:
871/// - The result value can't be out of bounds.
872/// - When parsing a decimal with scale 0, all fractional digits will be discarded. The final
873///   fractional digits may be a subset or a superset of the digits after the decimal point when
874///   e-notation is used.
875pub fn parse_decimal<T: DecimalType>(
876    s: &str,
877    precision: u8,
878    scale: i8,
879) -> Result<T::Native, ArrowError> {
880    let mut result = T::Native::usize_as(0);
881    let mut fractionals: i8 = 0;
882    let mut digits: u8 = 0;
883    let base = T::Native::usize_as(10);
884
885    let bs = s.as_bytes();
886
887    if !bs
888        .last()
889        .is_some_and(|b| b.is_ascii_digit() || (b == &b'.' && s.len() > 1))
890    {
891        // If the last character is not a digit (or a decimal point prefixed with some digits), then
892        // it's not a valid decimal.
893        return Err(ArrowError::ParseError(format!(
894            "can't parse the string value {s} to decimal"
895        )));
896    }
897
898    let (signed, negative) = match bs.first() {
899        Some(b'-') => (true, true),
900        Some(b'+') => (true, false),
901        _ => (false, false),
902    };
903
904    // Iterate over the raw input bytes, skipping the sign if any
905    let mut bs = bs.iter().enumerate().skip(signed as usize);
906
907    let mut is_e_notation = false;
908
909    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
910    // Thus, if we validate the precision correctly, we can skip overflow checks.
911    while let Some((index, b)) = bs.next() {
912        match b {
913            b'0'..=b'9' => {
914                if digits == 0 && *b == b'0' {
915                    // Ignore leading zeros.
916                    continue;
917                }
918                digits += 1;
919                result = result.mul_wrapping(base);
920                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
921            }
922            b'.' => {
923                let point_index = index;
924
925                for (_, b) in bs.by_ref() {
926                    if !b.is_ascii_digit() {
927                        if *b == b'e' || *b == b'E' {
928                            result = parse_e_notation::<T>(
929                                s,
930                                digits as u16,
931                                fractionals as i16,
932                                result,
933                                point_index + 1,
934                                precision as u16,
935                                scale as i16,
936                            )?;
937
938                            is_e_notation = true;
939
940                            break;
941                        }
942                        return Err(ArrowError::ParseError(format!(
943                            "can't parse the string value {s} to decimal"
944                        )));
945                    }
946                    if fractionals == scale {
947                        // We have processed all the digits that we need. All that
948                        // is left is to validate that the rest of the string contains
949                        // valid digits.
950                        continue;
951                    }
952                    fractionals += 1;
953                    digits += 1;
954                    result = result.mul_wrapping(base);
955                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
956                }
957
958                if is_e_notation {
959                    break;
960                }
961            }
962            b'e' | b'E' => {
963                result = parse_e_notation::<T>(
964                    s,
965                    digits as u16,
966                    fractionals as i16,
967                    result,
968                    index,
969                    precision as u16,
970                    scale as i16,
971                )?;
972
973                is_e_notation = true;
974
975                break;
976            }
977            _ => {
978                return Err(ArrowError::ParseError(format!(
979                    "can't parse the string value {s} to decimal"
980                )));
981            }
982        }
983    }
984
985    if !is_e_notation {
986        if fractionals < scale {
987            let exp = scale - fractionals;
988            if exp as u8 + digits > precision {
989                return Err(ArrowError::ParseError(format!(
990                    "parse decimal overflow ({s})"
991                )));
992            }
993            let mul = base.pow_wrapping(exp as _);
994            result = result.mul_wrapping(mul);
995        } else if digits > precision {
996            return Err(ArrowError::ParseError(format!(
997                "parse decimal overflow ({s})"
998            )));
999        }
1000    }
1001
1002    Ok(if negative {
1003        result.neg_wrapping()
1004    } else {
1005        result
1006    })
1007}
1008
1009/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
1010pub fn parse_interval_year_month(
1011    value: &str,
1012) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
1013    let config = IntervalParseConfig::new(IntervalUnit::Year);
1014    let interval = Interval::parse(value, &config)?;
1015
1016    let months = interval.to_year_months().map_err(|_| {
1017        ArrowError::CastError(format!(
1018            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
1019        ))
1020    })?;
1021
1022    Ok(IntervalYearMonthType::make_value(0, months))
1023}
1024
1025/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
1026pub fn parse_interval_day_time(
1027    value: &str,
1028) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1029    let config = IntervalParseConfig::new(IntervalUnit::Day);
1030    let interval = Interval::parse(value, &config)?;
1031
1032    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1033        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1034    )))?;
1035
1036    Ok(IntervalDayTimeType::make_value(days, millis))
1037}
1038
1039/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1040pub fn parse_interval_month_day_nano_config(
1041    value: &str,
1042    config: IntervalParseConfig,
1043) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1044    let interval = Interval::parse(value, &config)?;
1045
1046    let (months, days, nanos) = interval.to_month_day_nanos();
1047
1048    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1049}
1050
1051/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1052pub fn parse_interval_month_day_nano(
1053    value: &str,
1054) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1055    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1056}
1057
1058const NANOS_PER_MILLIS: i64 = 1_000_000;
1059const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1060const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1061const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1062#[cfg(test)]
1063const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1064
1065/// Config to parse interval strings
1066///
1067/// Currently stores the `default_unit` to use if the string doesn't have one specified
1068#[derive(Debug, Clone)]
1069pub struct IntervalParseConfig {
1070    /// The default unit to use if none is specified
1071    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1072    default_unit: IntervalUnit,
1073}
1074
1075impl IntervalParseConfig {
1076    /// Create a new [IntervalParseConfig] with the given default unit
1077    pub fn new(default_unit: IntervalUnit) -> Self {
1078        Self { default_unit }
1079    }
1080}
1081
1082#[rustfmt::skip]
1083#[derive(Debug, Clone, Copy)]
1084#[repr(u16)]
1085/// Represents the units of an interval, with each variant
1086/// corresponding to a bit in the interval's bitfield representation
1087pub enum IntervalUnit {
1088    /// A Century
1089    Century     = 0b_0000_0000_0001,
1090    /// A Decade
1091    Decade      = 0b_0000_0000_0010,
1092    /// A Year
1093    Year        = 0b_0000_0000_0100,
1094    /// A Month
1095    Month       = 0b_0000_0000_1000,
1096    /// A Week
1097    Week        = 0b_0000_0001_0000,
1098    /// A Day
1099    Day         = 0b_0000_0010_0000,
1100    /// An Hour
1101    Hour        = 0b_0000_0100_0000,
1102    /// A Minute
1103    Minute      = 0b_0000_1000_0000,
1104    /// A Second
1105    Second      = 0b_0001_0000_0000,
1106    /// A Millisecond
1107    Millisecond = 0b_0010_0000_0000,
1108    /// A Microsecond
1109    Microsecond = 0b_0100_0000_0000,
1110    /// A Nanosecond
1111    Nanosecond  = 0b_1000_0000_0000,
1112}
1113
1114/// Logic for parsing interval unit strings
1115///
1116/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1117/// for a list of unit names supported by PostgreSQL which we try to match here.
1118impl FromStr for IntervalUnit {
1119    type Err = ArrowError;
1120
1121    fn from_str(s: &str) -> Result<Self, ArrowError> {
1122        match s.to_lowercase().as_str() {
1123            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1124            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1125            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1126            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1127            "w" | "week" | "weeks" => Ok(Self::Week),
1128            "d" | "day" | "days" => Ok(Self::Day),
1129            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1130            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1131            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1132            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1133                Ok(Self::Millisecond)
1134            }
1135            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1136                Ok(Self::Microsecond)
1137            }
1138            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1139            _ => Err(ArrowError::InvalidArgumentError(format!(
1140                "Unknown interval type: {s}"
1141            ))),
1142        }
1143    }
1144}
1145
1146impl IntervalUnit {
1147    fn from_str_or_config(
1148        s: Option<&str>,
1149        config: &IntervalParseConfig,
1150    ) -> Result<Self, ArrowError> {
1151        match s {
1152            Some(s) => s.parse(),
1153            None => Ok(config.default_unit),
1154        }
1155    }
1156}
1157
1158/// A tuple representing (months, days, nanoseconds) in an interval
1159pub type MonthDayNano = (i32, i32, i64);
1160
1161/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1162const INTERVAL_PRECISION: u32 = 15;
1163
1164#[derive(Clone, Copy, Debug, PartialEq)]
1165struct IntervalAmount {
1166    /// The integer component of the interval amount
1167    integer: i64,
1168    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1169    frac: i64,
1170}
1171
1172#[cfg(test)]
1173impl IntervalAmount {
1174    fn new(integer: i64, frac: i64) -> Self {
1175        Self { integer, frac }
1176    }
1177}
1178
1179impl FromStr for IntervalAmount {
1180    type Err = ArrowError;
1181
1182    fn from_str(s: &str) -> Result<Self, Self::Err> {
1183        match s.split_once('.') {
1184            Some((integer, frac))
1185                if frac.len() <= INTERVAL_PRECISION as usize
1186                    && !frac.is_empty()
1187                    && !frac.starts_with('-') =>
1188            {
1189                // integer will be "" for values like ".5"
1190                // and "-" for values like "-.5"
1191                let explicit_neg = integer.starts_with('-');
1192                let integer = if integer.is_empty() || integer == "-" {
1193                    Ok(0)
1194                } else {
1195                    integer.parse::<i64>().map_err(|_| {
1196                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1197                    })
1198                }?;
1199
1200                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1201                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1202                })?;
1203
1204                // scale fractional part by interval precision
1205                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1206
1207                // propagate the sign of the integer part to the fractional part
1208                let frac = if integer < 0 || explicit_neg {
1209                    -frac
1210                } else {
1211                    frac
1212                };
1213
1214                let result = Self { integer, frac };
1215
1216                Ok(result)
1217            }
1218            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1219                "Failed to parse {s} as interval amount"
1220            ))),
1221            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1222                Err(ArrowError::ParseError(format!(
1223                    "{s} exceeds the precision available for interval amount"
1224                )))
1225            }
1226            Some(_) | None => {
1227                let integer = s.parse::<i64>().map_err(|_| {
1228                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1229                })?;
1230
1231                let result = Self { integer, frac: 0 };
1232                Ok(result)
1233            }
1234        }
1235    }
1236}
1237
1238#[derive(Debug, Default, PartialEq)]
1239struct Interval {
1240    months: i32,
1241    days: i32,
1242    nanos: i64,
1243}
1244
1245impl Interval {
1246    fn new(months: i32, days: i32, nanos: i64) -> Self {
1247        Self {
1248            months,
1249            days,
1250            nanos,
1251        }
1252    }
1253
1254    fn to_year_months(&self) -> Result<i32, ArrowError> {
1255        match (self.months, self.days, self.nanos) {
1256            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1257            _ => Err(ArrowError::InvalidArgumentError(format!(
1258                "Unable to represent interval with days and nanos as year-months: {self:?}"
1259            ))),
1260        }
1261    }
1262
1263    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1264        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1265
1266        match self.nanos {
1267            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1268                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1269                    ArrowError::InvalidArgumentError(format!(
1270                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1271                        self.nanos
1272                    ))
1273                })?;
1274
1275                Ok((days, millis))
1276            }
1277            nanos => Err(ArrowError::InvalidArgumentError(format!(
1278                "Unable to represent {nanos} as milliseconds"
1279            ))),
1280        }
1281    }
1282
1283    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1284        (self.months, self.days, self.nanos)
1285    }
1286
1287    /// Parse string value in traditional Postgres format such as
1288    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1289    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1290        let components = parse_interval_components(value, config)?;
1291
1292        components
1293            .into_iter()
1294            .try_fold(Self::default(), |result, (amount, unit)| {
1295                result.add(amount, unit)
1296            })
1297    }
1298
1299    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1300    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1301    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1302    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1303    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1304    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1305    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1306        let result = match unit {
1307            IntervalUnit::Century => {
1308                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1309                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1310                let months = months_int
1311                    .add_checked(month_frac)?
1312                    .try_into()
1313                    .map_err(|_| {
1314                        ArrowError::ParseError(format!(
1315                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1316                            &amount.integer
1317                        ))
1318                    })?;
1319
1320                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1321            }
1322            IntervalUnit::Decade => {
1323                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1324
1325                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1326                let months = months_int
1327                    .add_checked(month_frac)?
1328                    .try_into()
1329                    .map_err(|_| {
1330                        ArrowError::ParseError(format!(
1331                            "Unable to represent {} decades as months in a signed 32-bit integer",
1332                            &amount.integer
1333                        ))
1334                    })?;
1335
1336                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1337            }
1338            IntervalUnit::Year => {
1339                let months_int = amount.integer.mul_checked(12)?;
1340                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1341                let months = months_int
1342                    .add_checked(month_frac)?
1343                    .try_into()
1344                    .map_err(|_| {
1345                        ArrowError::ParseError(format!(
1346                            "Unable to represent {} years as months in a signed 32-bit integer",
1347                            &amount.integer
1348                        ))
1349                    })?;
1350
1351                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1352            }
1353            IntervalUnit::Month => {
1354                let months = amount.integer.try_into().map_err(|_| {
1355                    ArrowError::ParseError(format!(
1356                        "Unable to represent {} months in a signed 32-bit integer",
1357                        &amount.integer
1358                    ))
1359                })?;
1360
1361                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1362                let days = days.try_into().map_err(|_| {
1363                    ArrowError::ParseError(format!(
1364                        "Unable to represent {} months as days in a signed 32-bit integer",
1365                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1366                    ))
1367                })?;
1368
1369                Self::new(
1370                    self.months.add_checked(months)?,
1371                    self.days.add_checked(days)?,
1372                    self.nanos,
1373                )
1374            }
1375            IntervalUnit::Week => {
1376                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1377                    ArrowError::ParseError(format!(
1378                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1379                        &amount.integer
1380                    ))
1381                })?;
1382
1383                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1384
1385                Self::new(
1386                    self.months,
1387                    self.days.add_checked(days)?,
1388                    self.nanos.add_checked(nanos)?,
1389                )
1390            }
1391            IntervalUnit::Day => {
1392                let days = amount.integer.try_into().map_err(|_| {
1393                    ArrowError::InvalidArgumentError(format!(
1394                        "Unable to represent {} days in a signed 32-bit integer",
1395                        amount.integer
1396                    ))
1397                })?;
1398
1399                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1400
1401                Self::new(
1402                    self.months,
1403                    self.days.add_checked(days)?,
1404                    self.nanos.add_checked(nanos)?,
1405                )
1406            }
1407            IntervalUnit::Hour => {
1408                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1409                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1410                let nanos = nanos_int.add_checked(nanos_frac)?;
1411
1412                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1413            }
1414            IntervalUnit::Minute => {
1415                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1416                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1417
1418                let nanos = nanos_int.add_checked(nanos_frac)?;
1419
1420                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1421            }
1422            IntervalUnit::Second => {
1423                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1424                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1425                let nanos = nanos_int.add_checked(nanos_frac)?;
1426
1427                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1428            }
1429            IntervalUnit::Millisecond => {
1430                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1431                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1432                let nanos = nanos_int.add_checked(nanos_frac)?;
1433
1434                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1435            }
1436            IntervalUnit::Microsecond => {
1437                let nanos_int = amount.integer.mul_checked(1_000)?;
1438                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1439                let nanos = nanos_int.add_checked(nanos_frac)?;
1440
1441                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1442            }
1443            IntervalUnit::Nanosecond => {
1444                let nanos_int = amount.integer;
1445                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1446                let nanos = nanos_int.add_checked(nanos_frac)?;
1447
1448                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1449            }
1450        };
1451
1452        Ok(result)
1453    }
1454}
1455
1456/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1457fn parse_interval_components(
1458    value: &str,
1459    config: &IntervalParseConfig,
1460) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1461    let raw_pairs = split_interval_components(value);
1462
1463    // parse amounts and units
1464    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1465        .iter()
1466        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1467        .collect()
1468    else {
1469        return Err(ArrowError::ParseError(format!(
1470            "Invalid input syntax for type interval: {value:?}"
1471        )));
1472    };
1473
1474    // collect parsed results
1475    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1476
1477    // duplicate units?
1478    let mut observed_interval_types = 0;
1479    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1480        if observed_interval_types & (*unit as u16) != 0 {
1481            return Err(ArrowError::ParseError(format!(
1482                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1483                value,
1484                raw_unit.unwrap_or_default(),
1485            )));
1486        }
1487
1488        observed_interval_types |= *unit as u16;
1489    }
1490
1491    let result = amounts.iter().copied().zip(units.iter().copied());
1492
1493    Ok(result.collect::<Vec<_>>())
1494}
1495
1496/// Split an interval into a vec of amounts and units.
1497///
1498/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1499///
1500/// This should match the behavior of PostgreSQL's interval parser.
1501fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1502    let mut result = vec![];
1503    let mut words = value.split(char::is_whitespace);
1504    while let Some(word) = words.next() {
1505        if let Some(split_word_at) = word.find(not_interval_amount) {
1506            let (amount, unit) = word.split_at(split_word_at);
1507            result.push((amount, Some(unit)));
1508        } else if let Some(unit) = words.next() {
1509            result.push((word, Some(unit)));
1510        } else {
1511            result.push((word, None));
1512            break;
1513        }
1514    }
1515    result
1516}
1517
1518/// test if a character is NOT part of an interval numeric amount
1519fn not_interval_amount(c: char) -> bool {
1520    !c.is_ascii_digit() && c != '.' && c != '-'
1521}
1522
1523#[cfg(test)]
1524mod tests {
1525    use super::*;
1526    use arrow_array::temporal_conversions::date32_to_datetime;
1527    use arrow_buffer::i256;
1528
1529    #[test]
1530    fn test_parse_nanos() {
1531        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1532        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1533        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1534    }
1535
1536    #[test]
1537    fn string_to_timestamp_timezone() {
1538        // Explicit timezone
1539        assert_eq!(
1540            1599572549190855000,
1541            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1542        );
1543        assert_eq!(
1544            1599572549190855000,
1545            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1546        );
1547        assert_eq!(
1548            1599572549000000000,
1549            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1550        ); // no fractional part
1551        assert_eq!(
1552            1599590549190855000,
1553            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1554        );
1555    }
1556
1557    #[test]
1558    fn string_to_timestamp_timezone_space() {
1559        // Ensure space rather than T between time and date is accepted
1560        assert_eq!(
1561            1599572549190855000,
1562            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1563        );
1564        assert_eq!(
1565            1599572549190855000,
1566            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1567        );
1568        assert_eq!(
1569            1599572549000000000,
1570            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1571        ); // no fractional part
1572        assert_eq!(
1573            1599590549190855000,
1574            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1575        );
1576    }
1577
1578    #[test]
1579    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1580    fn string_to_timestamp_no_timezone() {
1581        // This test is designed to succeed in regardless of the local
1582        // timezone the test machine is running. Thus it is still
1583        // somewhat susceptible to bugs in the use of chrono
1584        let naive_datetime = NaiveDateTime::new(
1585            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1586            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1587        );
1588
1589        // Ensure both T and ' ' variants work
1590        assert_eq!(
1591            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1592            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1593        );
1594
1595        assert_eq!(
1596            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1597            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1598        );
1599
1600        // Also ensure that parsing timestamps with no fractional
1601        // second part works as well
1602        let datetime_whole_secs = NaiveDateTime::new(
1603            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1604            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1605        )
1606        .and_utc();
1607
1608        // Ensure both T and ' ' variants work
1609        assert_eq!(
1610            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1611            parse_timestamp("2020-09-08T13:42:29").unwrap()
1612        );
1613
1614        assert_eq!(
1615            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1616            parse_timestamp("2020-09-08 13:42:29").unwrap()
1617        );
1618
1619        // ensure without time work
1620        // no time, should be the nano second at
1621        // 2020-09-08 0:0:0
1622        let datetime_no_time = NaiveDateTime::new(
1623            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1624            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1625        )
1626        .and_utc();
1627
1628        assert_eq!(
1629            datetime_no_time.timestamp_nanos_opt().unwrap(),
1630            parse_timestamp("2020-09-08").unwrap()
1631        )
1632    }
1633
1634    #[test]
1635    fn string_to_timestamp_chrono() {
1636        let cases = [
1637            "2020-09-08T13:42:29Z",
1638            "1969-01-01T00:00:00.1Z",
1639            "2020-09-08T12:00:12.12345678+00:00",
1640            "2020-09-08T12:00:12+00:00",
1641            "2020-09-08T12:00:12.1+00:00",
1642            "2020-09-08T12:00:12.12+00:00",
1643            "2020-09-08T12:00:12.123+00:00",
1644            "2020-09-08T12:00:12.1234+00:00",
1645            "2020-09-08T12:00:12.12345+00:00",
1646            "2020-09-08T12:00:12.123456+00:00",
1647            "2020-09-08T12:00:12.1234567+00:00",
1648            "2020-09-08T12:00:12.12345678+00:00",
1649            "2020-09-08T12:00:12.123456789+00:00",
1650            "2020-09-08T12:00:12.12345678912z",
1651            "2020-09-08T12:00:12.123456789123Z",
1652            "2020-09-08T12:00:12.123456789123+02:00",
1653            "2020-09-08T12:00:12.12345678912345Z",
1654            "2020-09-08T12:00:12.1234567891234567+02:00",
1655            "2020-09-08T12:00:60Z",
1656            "2020-09-08T12:00:60.123Z",
1657            "2020-09-08T12:00:60.123456+02:00",
1658            "2020-09-08T12:00:60.1234567891234567+02:00",
1659            "2020-09-08T12:00:60.999999999+02:00",
1660            "2020-09-08t12:00:12.12345678+00:00",
1661            "2020-09-08t12:00:12+00:00",
1662            "2020-09-08t12:00:12Z",
1663        ];
1664
1665        for case in cases {
1666            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1667            let chrono_utc = chrono.with_timezone(&Utc);
1668
1669            let custom = string_to_datetime(&Utc, case).unwrap();
1670            assert_eq!(chrono_utc, custom)
1671        }
1672    }
1673
1674    #[test]
1675    fn string_to_timestamp_naive() {
1676        let cases = [
1677            "2018-11-13T17:11:10.011375885995",
1678            "2030-12-04T17:11:10.123",
1679            "2030-12-04T17:11:10.1234",
1680            "2030-12-04T17:11:10.123456",
1681        ];
1682        for case in cases {
1683            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1684            let custom = string_to_datetime(&Utc, case).unwrap();
1685            assert_eq!(chrono, custom.naive_utc())
1686        }
1687    }
1688
1689    #[test]
1690    fn string_to_timestamp_invalid() {
1691        // Test parsing invalid formats
1692        let cases = [
1693            ("", "timestamp must contain at least 10 characters"),
1694            ("SS", "timestamp must contain at least 10 characters"),
1695            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1696            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1697            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1698            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1699            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1700            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1701            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1702            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1703            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1704            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1705            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1706            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1707            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1708            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1709            ("1997-01-31T092656.123Z", "error parsing time"),
1710            ("1997-01-10T12:00:06.", "error parsing time"),
1711            ("1997-01-10T12:00:06. ", "error parsing time"),
1712        ];
1713
1714        for (s, ctx) in cases {
1715            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1716            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1717            assert_eq!(actual, expected)
1718        }
1719    }
1720
1721    // Parse a timestamp to timestamp int with a useful human readable error message
1722    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1723        let result = string_to_timestamp_nanos(s);
1724        if let Err(e) = &result {
1725            eprintln!("Error parsing timestamp '{s}': {e:?}");
1726        }
1727        result
1728    }
1729
1730    #[test]
1731    fn string_without_timezone_to_timestamp() {
1732        // string without timezone should always output the same regardless the local or session timezone
1733
1734        let naive_datetime = NaiveDateTime::new(
1735            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1736            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1737        );
1738
1739        // Ensure both T and ' ' variants work
1740        assert_eq!(
1741            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1742            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1743        );
1744
1745        assert_eq!(
1746            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1747            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1748        );
1749
1750        let naive_datetime = NaiveDateTime::new(
1751            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1752            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1753        );
1754
1755        // Ensure both T and ' ' variants work
1756        assert_eq!(
1757            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1758            parse_timestamp("2020-09-08T13:42:29").unwrap()
1759        );
1760
1761        assert_eq!(
1762            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1763            parse_timestamp("2020-09-08 13:42:29").unwrap()
1764        );
1765
1766        let tz: Tz = "+02:00".parse().unwrap();
1767        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1768        let utc = date.naive_utc().to_string();
1769        assert_eq!(utc, "2020-09-08 11:42:29");
1770        let local = date.naive_local().to_string();
1771        assert_eq!(local, "2020-09-08 13:42:29");
1772
1773        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1774        let utc = date.naive_utc().to_string();
1775        assert_eq!(utc, "2020-09-08 13:42:29");
1776        let local = date.naive_local().to_string();
1777        assert_eq!(local, "2020-09-08 15:42:29");
1778
1779        let dt =
1780            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1781        let local: Tz = "+08:00".parse().unwrap();
1782
1783        // Parsed as offset from UTC
1784        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1785        assert_eq!(dt, date.naive_utc());
1786        assert_ne!(dt, date.naive_local());
1787
1788        // Parsed as offset from local
1789        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1790        assert_eq!(dt, date.naive_local());
1791        assert_ne!(dt, date.naive_utc());
1792    }
1793
1794    #[test]
1795    fn parse_date32() {
1796        let cases = [
1797            "2020-09-08",
1798            "2020-9-8",
1799            "2020-09-8",
1800            "2020-9-08",
1801            "2020-12-1",
1802            "1690-2-5",
1803            "2020-09-08 01:02:03",
1804        ];
1805        for case in cases {
1806            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1807            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1808                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1809                .unwrap();
1810            assert_eq!(v.date(), expected);
1811        }
1812
1813        let err_cases = [
1814            "",
1815            "80-01-01",
1816            "342",
1817            "Foo",
1818            "2020-09-08-03",
1819            "2020--04-03",
1820            "2020--",
1821            "2020-09-08 01",
1822            "2020-09-08 01:02",
1823            "2020-09-08 01-02-03",
1824            "2020-9-8 01:02:03",
1825            "2020-09-08 1:2:3",
1826        ];
1827        for case in err_cases {
1828            assert_eq!(Date32Type::parse(case), None);
1829        }
1830    }
1831
1832    #[test]
1833    fn parse_date32_extended_year() {
1834        // `Date32` covers any i32 days-from-epoch, verify we can parse it
1835        let cases: &[(&str, i32)] = &[
1836            ("+1970-01-01", 0),
1837            ("+2024-01-01", 19_723),
1838            ("-0001-01-01", -719_893),
1839            ("+29349-01-26", 10_000_000),
1840            ("+2739877-01-03", 1_000_000_000),
1841            // Extremes of the Date32 representable range.
1842            ("+5881580-07-11", i32::MAX),
1843            ("-5877641-06-23", i32::MIN),
1844        ];
1845        for (input, expected) in cases {
1846            assert_eq!(Date32Type::parse(input), Some(*expected), "input: {input}");
1847        }
1848
1849        // One past Date32::MAX / MIN overflows i32 days-from-epoch.
1850        assert_eq!(Date32Type::parse("+5881580-07-12"), None);
1851        assert_eq!(Date32Type::parse("-5877641-06-22"), None);
1852        // Invalid calendar dates still rejected regardless of year magnitude.
1853        assert_eq!(Date32Type::parse("+2739877-02-30"), None);
1854        assert_eq!(Date32Type::parse("+2739877-13-01"), None);
1855        assert_eq!(Date32Type::parse("-2739877-02-30"), None);
1856    }
1857
1858    #[test]
1859    fn parse_time64_nanos() {
1860        assert_eq!(
1861            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1862            Some(7_801_123_456_789)
1863        );
1864        assert_eq!(
1865            Time64NanosecondType::parse("02:10:01.1234567"),
1866            Some(7_801_123_456_700)
1867        );
1868        assert_eq!(
1869            Time64NanosecondType::parse("2:10:01.1234567"),
1870            Some(7_801_123_456_700)
1871        );
1872        assert_eq!(
1873            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1874            Some(601_123_456_789)
1875        );
1876        assert_eq!(
1877            Time64NanosecondType::parse("12:10:01.123456789 am"),
1878            Some(601_123_456_789)
1879        );
1880        assert_eq!(
1881            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1882            Some(51_001_123_456_780)
1883        );
1884        assert_eq!(
1885            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1886            Some(51_001_123_456_780)
1887        );
1888        assert_eq!(
1889            Time64NanosecondType::parse("02:10:01"),
1890            Some(7_801_000_000_000)
1891        );
1892        assert_eq!(
1893            Time64NanosecondType::parse("2:10:01"),
1894            Some(7_801_000_000_000)
1895        );
1896        assert_eq!(
1897            Time64NanosecondType::parse("12:10:01 AM"),
1898            Some(601_000_000_000)
1899        );
1900        assert_eq!(
1901            Time64NanosecondType::parse("12:10:01 am"),
1902            Some(601_000_000_000)
1903        );
1904        assert_eq!(
1905            Time64NanosecondType::parse("2:10:01 PM"),
1906            Some(51_001_000_000_000)
1907        );
1908        assert_eq!(
1909            Time64NanosecondType::parse("2:10:01 pm"),
1910            Some(51_001_000_000_000)
1911        );
1912        assert_eq!(
1913            Time64NanosecondType::parse("02:10"),
1914            Some(7_800_000_000_000)
1915        );
1916        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1917        assert_eq!(
1918            Time64NanosecondType::parse("12:10 AM"),
1919            Some(600_000_000_000)
1920        );
1921        assert_eq!(
1922            Time64NanosecondType::parse("12:10 am"),
1923            Some(600_000_000_000)
1924        );
1925        assert_eq!(
1926            Time64NanosecondType::parse("2:10 PM"),
1927            Some(51_000_000_000_000)
1928        );
1929        assert_eq!(
1930            Time64NanosecondType::parse("2:10 pm"),
1931            Some(51_000_000_000_000)
1932        );
1933
1934        // parse directly as nanoseconds
1935        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1936
1937        // leap second
1938        assert_eq!(
1939            Time64NanosecondType::parse("23:59:60"),
1940            Some(86_400_000_000_000)
1941        );
1942
1943        // custom format
1944        assert_eq!(
1945            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1946            Some(7_801_123_456_700)
1947        );
1948    }
1949
1950    #[test]
1951    fn parse_time64_micros() {
1952        // expected formats
1953        assert_eq!(
1954            Time64MicrosecondType::parse("02:10:01.1234"),
1955            Some(7_801_123_400)
1956        );
1957        assert_eq!(
1958            Time64MicrosecondType::parse("2:10:01.1234"),
1959            Some(7_801_123_400)
1960        );
1961        assert_eq!(
1962            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1963            Some(601_123_456)
1964        );
1965        assert_eq!(
1966            Time64MicrosecondType::parse("12:10:01.123456 am"),
1967            Some(601_123_456)
1968        );
1969        assert_eq!(
1970            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1971            Some(51_001_123_450)
1972        );
1973        assert_eq!(
1974            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1975            Some(51_001_123_450)
1976        );
1977        assert_eq!(
1978            Time64MicrosecondType::parse("02:10:01"),
1979            Some(7_801_000_000)
1980        );
1981        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1982        assert_eq!(
1983            Time64MicrosecondType::parse("12:10:01 AM"),
1984            Some(601_000_000)
1985        );
1986        assert_eq!(
1987            Time64MicrosecondType::parse("12:10:01 am"),
1988            Some(601_000_000)
1989        );
1990        assert_eq!(
1991            Time64MicrosecondType::parse("2:10:01 PM"),
1992            Some(51_001_000_000)
1993        );
1994        assert_eq!(
1995            Time64MicrosecondType::parse("2:10:01 pm"),
1996            Some(51_001_000_000)
1997        );
1998        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1999        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
2000        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
2001        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
2002        assert_eq!(
2003            Time64MicrosecondType::parse("2:10 PM"),
2004            Some(51_000_000_000)
2005        );
2006        assert_eq!(
2007            Time64MicrosecondType::parse("2:10 pm"),
2008            Some(51_000_000_000)
2009        );
2010
2011        // parse directly as microseconds
2012        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
2013
2014        // leap second
2015        assert_eq!(
2016            Time64MicrosecondType::parse("23:59:60"),
2017            Some(86_400_000_000)
2018        );
2019
2020        // custom format
2021        assert_eq!(
2022            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
2023            Some(7_801_123_400)
2024        );
2025    }
2026
2027    #[test]
2028    fn parse_time32_millis() {
2029        // expected formats
2030        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
2031        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
2032        assert_eq!(
2033            Time32MillisecondType::parse("12:10:01.123 AM"),
2034            Some(601_123)
2035        );
2036        assert_eq!(
2037            Time32MillisecondType::parse("12:10:01.123 am"),
2038            Some(601_123)
2039        );
2040        assert_eq!(
2041            Time32MillisecondType::parse("2:10:01.12 PM"),
2042            Some(51_001_120)
2043        );
2044        assert_eq!(
2045            Time32MillisecondType::parse("2:10:01.12 pm"),
2046            Some(51_001_120)
2047        );
2048        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2049        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2050        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2051        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2052        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2053        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2054        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2055        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2056        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2057        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2058        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2059        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2060
2061        // parse directly as milliseconds
2062        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2063
2064        // leap second
2065        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2066
2067        // custom format
2068        assert_eq!(
2069            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2070            Some(7_801_100)
2071        );
2072    }
2073
2074    #[test]
2075    fn parse_time32_secs() {
2076        // expected formats
2077        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2078        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2079        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2080        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2081        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2082        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2083        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2084        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2085        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2086        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2087        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2088        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2089        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2090
2091        // parse directly as seconds
2092        assert_eq!(Time32SecondType::parse("1"), Some(1));
2093
2094        // leap second
2095        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2096
2097        // custom format
2098        assert_eq!(
2099            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2100            Some(7_801)
2101        );
2102    }
2103
2104    #[test]
2105    fn test_string_to_time_invalid() {
2106        let cases = [
2107            "25:00",
2108            "9:00:",
2109            "009:00",
2110            "09:0:00",
2111            "25:00:00",
2112            "13:00 AM",
2113            "13:00 PM",
2114            "12:00. AM",
2115            "09:0:00",
2116            "09:01:0",
2117            "09:01:1",
2118            "9:1:0",
2119            "09:01:0",
2120            "1:00.123",
2121            "1:00:00.123f",
2122            " 9:00:00",
2123            ":09:00",
2124            "T9:00:00",
2125            "AM",
2126        ];
2127        for case in cases {
2128            assert!(string_to_time(case).is_none(), "{case}");
2129        }
2130    }
2131
2132    #[test]
2133    fn test_string_to_time_chrono() {
2134        let cases = [
2135            ("1:00", "%H:%M"),
2136            ("12:00", "%H:%M"),
2137            ("13:00", "%H:%M"),
2138            ("24:00", "%H:%M"),
2139            ("1:00:00", "%H:%M:%S"),
2140            ("12:00:30", "%H:%M:%S"),
2141            ("13:00:59", "%H:%M:%S"),
2142            ("24:00:60", "%H:%M:%S"),
2143            ("09:00:00", "%H:%M:%S%.f"),
2144            ("0:00:30.123456", "%H:%M:%S%.f"),
2145            ("0:00 AM", "%I:%M %P"),
2146            ("1:00 AM", "%I:%M %P"),
2147            ("12:00 AM", "%I:%M %P"),
2148            ("13:00 AM", "%I:%M %P"),
2149            ("0:00 PM", "%I:%M %P"),
2150            ("1:00 PM", "%I:%M %P"),
2151            ("12:00 PM", "%I:%M %P"),
2152            ("13:00 PM", "%I:%M %P"),
2153            ("1:00 pM", "%I:%M %P"),
2154            ("1:00 Pm", "%I:%M %P"),
2155            ("1:00 aM", "%I:%M %P"),
2156            ("1:00 Am", "%I:%M %P"),
2157            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2158            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2159            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2160            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2161            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2162            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2163            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2164        ];
2165        for (s, format) in cases {
2166            let chrono = NaiveTime::parse_from_str(s, format).ok();
2167            let custom = string_to_time(s);
2168            assert_eq!(chrono, custom, "{s}");
2169        }
2170    }
2171
2172    #[test]
2173    fn test_parse_interval() {
2174        let config = IntervalParseConfig::new(IntervalUnit::Month);
2175
2176        assert_eq!(
2177            Interval::new(1i32, 0i32, 0i64),
2178            Interval::parse("1 month", &config).unwrap(),
2179        );
2180
2181        assert_eq!(
2182            Interval::new(2i32, 0i32, 0i64),
2183            Interval::parse("2 month", &config).unwrap(),
2184        );
2185
2186        assert_eq!(
2187            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2188            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2189        );
2190
2191        assert_eq!(
2192            Interval::new(0i32, 15i32, 0),
2193            Interval::parse("0.5 months", &config).unwrap(),
2194        );
2195
2196        assert_eq!(
2197            Interval::new(0i32, 15i32, 0),
2198            Interval::parse(".5 months", &config).unwrap(),
2199        );
2200
2201        assert_eq!(
2202            Interval::new(0i32, -15i32, 0),
2203            Interval::parse("-0.5 months", &config).unwrap(),
2204        );
2205
2206        assert_eq!(
2207            Interval::new(0i32, -15i32, 0),
2208            Interval::parse("-.5 months", &config).unwrap(),
2209        );
2210
2211        assert_eq!(
2212            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2213            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2214        );
2215
2216        assert_eq!(
2217            Interval::parse("1 centurys 1 month", &config)
2218                .unwrap_err()
2219                .to_string(),
2220            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2221        );
2222
2223        assert_eq!(
2224            Interval::new(37i32, 0i32, 0i64),
2225            Interval::parse("3 year 1 month", &config).unwrap(),
2226        );
2227
2228        assert_eq!(
2229            Interval::new(35i32, 0i32, 0i64),
2230            Interval::parse("3 year -1 month", &config).unwrap(),
2231        );
2232
2233        assert_eq!(
2234            Interval::new(-37i32, 0i32, 0i64),
2235            Interval::parse("-3 year -1 month", &config).unwrap(),
2236        );
2237
2238        assert_eq!(
2239            Interval::new(-35i32, 0i32, 0i64),
2240            Interval::parse("-3 year 1 month", &config).unwrap(),
2241        );
2242
2243        assert_eq!(
2244            Interval::new(0i32, 5i32, 0i64),
2245            Interval::parse("5 days", &config).unwrap(),
2246        );
2247
2248        assert_eq!(
2249            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2250            Interval::parse("7 days 3 hours", &config).unwrap(),
2251        );
2252
2253        assert_eq!(
2254            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2255            Interval::parse("7 days 5 minutes", &config).unwrap(),
2256        );
2257
2258        assert_eq!(
2259            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2260            Interval::parse("7 days -5 minutes", &config).unwrap(),
2261        );
2262
2263        assert_eq!(
2264            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2265            Interval::parse("-7 days 5 hours", &config).unwrap(),
2266        );
2267
2268        assert_eq!(
2269            Interval::new(
2270                0i32,
2271                -7i32,
2272                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2273            ),
2274            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2275        );
2276
2277        assert_eq!(
2278            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2279            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2280        );
2281
2282        assert_eq!(
2283            Interval::new(
2284                12i32,
2285                1i32,
2286                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2287            ),
2288            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2289        );
2290
2291        assert_eq!(
2292            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2293            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2294        );
2295
2296        assert_eq!(
2297            Interval::new(12i32, 1i32, 1000i64),
2298            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2299        );
2300
2301        assert_eq!(
2302            Interval::new(12i32, 1i32, 1i64),
2303            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2304        );
2305
2306        assert_eq!(
2307            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2308            Interval::parse("1 month -1 second", &config).unwrap(),
2309        );
2310
2311        assert_eq!(
2312            Interval::new(
2313                -13i32,
2314                -8i32,
2315                -NANOS_PER_HOUR
2316                    - NANOS_PER_MINUTE
2317                    - NANOS_PER_SECOND
2318                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2319            ),
2320            Interval::parse(
2321                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2322                &config
2323            )
2324            .unwrap(),
2325        );
2326
2327        // no units
2328        assert_eq!(
2329            Interval::new(1, 0, 0),
2330            Interval::parse("1", &config).unwrap()
2331        );
2332        assert_eq!(
2333            Interval::new(42, 0, 0),
2334            Interval::parse("42", &config).unwrap()
2335        );
2336        assert_eq!(
2337            Interval::new(0, 0, 42_000_000_000),
2338            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2339        );
2340
2341        // shorter units
2342        assert_eq!(
2343            Interval::new(1, 0, 0),
2344            Interval::parse("1 mon", &config).unwrap()
2345        );
2346        assert_eq!(
2347            Interval::new(1, 0, 0),
2348            Interval::parse("1 mons", &config).unwrap()
2349        );
2350        assert_eq!(
2351            Interval::new(0, 0, 1_000_000),
2352            Interval::parse("1 ms", &config).unwrap()
2353        );
2354        assert_eq!(
2355            Interval::new(0, 0, 1_000),
2356            Interval::parse("1 us", &config).unwrap()
2357        );
2358
2359        // no space
2360        assert_eq!(
2361            Interval::new(0, 0, 1_000),
2362            Interval::parse("1us", &config).unwrap()
2363        );
2364        assert_eq!(
2365            Interval::new(0, 0, NANOS_PER_SECOND),
2366            Interval::parse("1s", &config).unwrap()
2367        );
2368        assert_eq!(
2369            Interval::new(1, 2, 10_864_000_000_000),
2370            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2371        );
2372
2373        assert_eq!(
2374            Interval::new(
2375                -13i32,
2376                -8i32,
2377                -NANOS_PER_HOUR
2378                    - NANOS_PER_MINUTE
2379                    - NANOS_PER_SECOND
2380                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2381            ),
2382            Interval::parse(
2383                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2384                &config
2385            )
2386            .unwrap(),
2387        );
2388
2389        assert_eq!(
2390            Interval::parse("1h s", &config).unwrap_err().to_string(),
2391            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2392        );
2393
2394        assert_eq!(
2395            Interval::parse("1XX", &config).unwrap_err().to_string(),
2396            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2397        );
2398    }
2399
2400    #[test]
2401    fn test_duplicate_interval_type() {
2402        let config = IntervalParseConfig::new(IntervalUnit::Month);
2403
2404        let err = Interval::parse("1 month 1 second 1 second", &config)
2405            .expect_err("parsing interval should have failed");
2406        assert_eq!(
2407            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2408            format!("{err:?}")
2409        );
2410
2411        // test with singular and plural forms
2412        let err = Interval::parse("1 century 2 centuries", &config)
2413            .expect_err("parsing interval should have failed");
2414        assert_eq!(
2415            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2416            format!("{err:?}")
2417        );
2418    }
2419
2420    #[test]
2421    fn test_interval_amount_parsing() {
2422        // integer
2423        let result = IntervalAmount::from_str("123").unwrap();
2424        let expected = IntervalAmount::new(123, 0);
2425
2426        assert_eq!(result, expected);
2427
2428        // positive w/ fractional
2429        let result = IntervalAmount::from_str("0.3").unwrap();
2430        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2431
2432        assert_eq!(result, expected);
2433
2434        // negative w/ fractional
2435        let result = IntervalAmount::from_str("-3.5").unwrap();
2436        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2437
2438        assert_eq!(result, expected);
2439
2440        // invalid: missing fractional
2441        let result = IntervalAmount::from_str("3.");
2442        assert!(result.is_err());
2443
2444        // invalid: sign in fractional
2445        let result = IntervalAmount::from_str("3.-5");
2446        assert!(result.is_err());
2447    }
2448
2449    #[test]
2450    fn test_interval_precision() {
2451        let config = IntervalParseConfig::new(IntervalUnit::Month);
2452
2453        let result = Interval::parse("100000.1 days", &config).unwrap();
2454        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2455
2456        assert_eq!(result, expected);
2457    }
2458
2459    #[test]
2460    fn test_interval_addition() {
2461        // add 4.1 centuries
2462        let start = Interval::new(1, 2, 3);
2463        let expected = Interval::new(4921, 2, 3);
2464
2465        let result = start
2466            .add(
2467                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2468                IntervalUnit::Century,
2469            )
2470            .unwrap();
2471
2472        assert_eq!(result, expected);
2473
2474        // add 10.25 decades
2475        let start = Interval::new(1, 2, 3);
2476        let expected = Interval::new(1231, 2, 3);
2477
2478        let result = start
2479            .add(
2480                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2481                IntervalUnit::Decade,
2482            )
2483            .unwrap();
2484
2485        assert_eq!(result, expected);
2486
2487        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2488        let start = Interval::new(1, 2, 3);
2489        let expected = Interval::new(364, 2, 3);
2490
2491        let result = start
2492            .add(
2493                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2494                IntervalUnit::Year,
2495            )
2496            .unwrap();
2497
2498        assert_eq!(result, expected);
2499
2500        // add 1.5 months
2501        let start = Interval::new(1, 2, 3);
2502        let expected = Interval::new(2, 17, 3);
2503
2504        let result = start
2505            .add(
2506                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2507                IntervalUnit::Month,
2508            )
2509            .unwrap();
2510
2511        assert_eq!(result, expected);
2512
2513        // add -2 weeks
2514        let start = Interval::new(1, 25, 3);
2515        let expected = Interval::new(1, 11, 3);
2516
2517        let result = start
2518            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2519            .unwrap();
2520
2521        assert_eq!(result, expected);
2522
2523        // add 2.2 days
2524        let start = Interval::new(12, 15, 3);
2525        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2526
2527        let result = start
2528            .add(
2529                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2530                IntervalUnit::Day,
2531            )
2532            .unwrap();
2533
2534        assert_eq!(result, expected);
2535
2536        // add 12.5 hours
2537        let start = Interval::new(1, 2, 3);
2538        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2539
2540        let result = start
2541            .add(
2542                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2543                IntervalUnit::Hour,
2544            )
2545            .unwrap();
2546
2547        assert_eq!(result, expected);
2548
2549        // add -1.5 minutes
2550        let start = Interval::new(0, 0, -3);
2551        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2552
2553        let result = start
2554            .add(
2555                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2556                IntervalUnit::Minute,
2557            )
2558            .unwrap();
2559
2560        assert_eq!(result, expected);
2561    }
2562
2563    #[test]
2564    fn string_to_timestamp_old() {
2565        parse_timestamp("1677-06-14T07:29:01.256")
2566            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2567            .unwrap_err();
2568    }
2569
2570    #[test]
2571    fn test_parse_decimal_with_parameter() {
2572        let tests = [
2573            ("0", 0i128),
2574            ("123.123", 123123i128),
2575            ("123.1234", 123123i128),
2576            ("123.1", 123100i128),
2577            ("123", 123000i128),
2578            ("-123.123", -123123i128),
2579            ("-123.1234", -123123i128),
2580            ("-123.1", -123100i128),
2581            ("-123", -123000i128),
2582            ("0.0000123", 0i128),
2583            ("12.", 12000i128),
2584            ("-12.", -12000i128),
2585            ("00.1", 100i128),
2586            ("-00.1", -100i128),
2587            ("12345678912345678.1234", 12345678912345678123i128),
2588            ("-12345678912345678.1234", -12345678912345678123i128),
2589            ("99999999999999999.999", 99999999999999999999i128),
2590            ("-99999999999999999.999", -99999999999999999999i128),
2591            (".123", 123i128),
2592            ("-.123", -123i128),
2593            ("123.", 123000i128),
2594            ("-123.", -123000i128),
2595        ];
2596        for (s, i) in tests {
2597            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2598            assert_eq!(i, result_128.unwrap());
2599            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2600            assert_eq!(i256::from_i128(i), result_256.unwrap());
2601        }
2602
2603        let e_notation_tests = [
2604            ("1.23e3", "1230.0", 2),
2605            ("5.6714e+2", "567.14", 4),
2606            ("5.6714e-2", "0.056714", 4),
2607            ("5.6714e-2", "0.056714", 3),
2608            ("5.6741214125e2", "567.41214125", 4),
2609            ("8.91E4", "89100.0", 2),
2610            ("3.14E+5", "314000.0", 2),
2611            ("2.718e0", "2.718", 2),
2612            ("9.999999e-1", "0.9999999", 4),
2613            ("1.23e+3", "1230", 2),
2614            ("1.234559e+3", "1234.559", 2),
2615            ("1.00E-10", "0.0000000001", 11),
2616            ("1.23e-4", "0.000123", 2),
2617            ("9.876e7", "98760000.0", 2),
2618            ("5.432E+8", "543200000.0", 10),
2619            ("1.234567e9", "1234567000.0", 2),
2620            ("1.234567e2", "123.45670000", 2),
2621            ("4749.3e-5", "0.047493", 10),
2622            ("4749.3e+5", "474930000", 10),
2623            ("4749.3e-5", "0.047493", 1),
2624            ("4749.3e+5", "474930000", 1),
2625            ("0E-8", "0", 10),
2626            ("0E+6", "0", 10),
2627            ("1E-8", "0.00000001", 10),
2628            ("12E+6", "12000000", 10),
2629            ("12E-6", "0.000012", 10),
2630            ("0.1e-6", "0.0000001", 10),
2631            ("0.1e+6", "100000", 10),
2632            ("0.12e-6", "0.00000012", 10),
2633            ("0.12e+6", "120000", 10),
2634            ("000000000001e0", "000000000001", 3),
2635            ("000001.1034567002e0", "000001.1034567002", 3),
2636            ("1.234e16", "12340000000000000", 0),
2637            ("123.4e16", "1234000000000000000", 0),
2638        ];
2639        for (e, d, scale) in e_notation_tests {
2640            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2641            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2642            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2643            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2644            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2645            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2646        }
2647        let can_not_parse_tests = [
2648            "123,123",
2649            ".",
2650            "123.123.123",
2651            "",
2652            "+",
2653            "-",
2654            "e",
2655            "1.3e+e3",
2656            "5.6714ee-2",
2657            "4.11ee-+4",
2658            "4.11e++4",
2659            "1.1e.12",
2660            "1.23e+3.",
2661            "1.23e+3.1",
2662            "1e",
2663            "1e+",
2664            "1e-",
2665        ];
2666        for s in can_not_parse_tests {
2667            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2668            assert_eq!(
2669                format!("Parser error: can't parse the string value {s} to decimal"),
2670                result_128.unwrap_err().to_string()
2671            );
2672            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2673            assert_eq!(
2674                format!("Parser error: can't parse the string value {s} to decimal"),
2675                result_256.unwrap_err().to_string()
2676            );
2677        }
2678        let overflow_parse_tests = [
2679            ("12345678", 3),
2680            ("1.2345678e7", 3),
2681            ("12345678.9", 3),
2682            ("1.23456789e+7", 3),
2683            ("99999999.99", 3),
2684            ("9.999999999e7", 3),
2685            ("12345678908765.123456", 3),
2686            ("123456789087651234.56e-4", 3),
2687            ("1234560000000", 0),
2688            ("12345678900.0", 0),
2689            ("1.23456e12", 0),
2690        ];
2691        for (s, scale) in overflow_parse_tests {
2692            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2693            let expected_128 = "Parser error: parse decimal overflow";
2694            let actual_128 = result_128.unwrap_err().to_string();
2695
2696            assert!(
2697                actual_128.contains(expected_128),
2698                "actual: '{actual_128}', expected: '{expected_128}'"
2699            );
2700
2701            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2702            let expected_256 = "Parser error: parse decimal overflow";
2703            let actual_256 = result_256.unwrap_err().to_string();
2704
2705            assert!(
2706                actual_256.contains(expected_256),
2707                "actual: '{actual_256}', expected: '{expected_256}'"
2708            );
2709        }
2710
2711        let edge_tests_128 = [
2712            (
2713                "99999999999999999999999999999999999999",
2714                99999999999999999999999999999999999999i128,
2715                0,
2716            ),
2717            (
2718                "999999999999999999999999999999999999.99",
2719                99999999999999999999999999999999999999i128,
2720                2,
2721            ),
2722            (
2723                "9999999999999999999999999.9999999999999",
2724                99999999999999999999999999999999999999i128,
2725                13,
2726            ),
2727            (
2728                "9999999999999999999999999",
2729                99999999999999999999999990000000000000i128,
2730                13,
2731            ),
2732            (
2733                "0.99999999999999999999999999999999999999",
2734                99999999999999999999999999999999999999i128,
2735                38,
2736            ),
2737            (
2738                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2739                0i128,
2740                15,
2741            ),
2742            ("1.016744e-320", 0i128, 15),
2743            ("-1e3", -1000000000i128, 6),
2744            ("+1e3", 1000000000i128, 6),
2745            ("-1e31", -10000000000000000000000000000000000000i128, 6),
2746        ];
2747        for (s, i, scale) in edge_tests_128 {
2748            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2749            assert_eq!(i, result_128.unwrap());
2750        }
2751        let edge_tests_256 = [
2752            (
2753                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2754                i256::from_string(
2755                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2756                )
2757                .unwrap(),
2758                0,
2759            ),
2760            (
2761                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2762                i256::from_string(
2763                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2764                )
2765                .unwrap(),
2766                4,
2767            ),
2768            (
2769                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2770                i256::from_string(
2771                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2772                )
2773                .unwrap(),
2774                26,
2775            ),
2776            (
2777                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2778                i256::from_string(
2779                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2780                )
2781                .unwrap(),
2782                26,
2783            ),
2784            (
2785                "99999999999999999999999999999999999999999999999999",
2786                i256::from_string(
2787                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2788                )
2789                .unwrap(),
2790                26,
2791            ),
2792            (
2793                "9.9999999999999999999999999999999999999999999999999e+49",
2794                i256::from_string(
2795                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2796                )
2797                .unwrap(),
2798                26,
2799            ),
2800        ];
2801        for (s, i, scale) in edge_tests_256 {
2802            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2803            assert_eq!(i, result.unwrap());
2804        }
2805
2806        let zero_scale_tests = [
2807            (".123", 0, 3),
2808            ("0.123", 0, 3),
2809            ("1.0", 1, 3),
2810            ("1.2", 1, 3),
2811            ("1.00", 1, 3),
2812            ("1.23", 1, 3),
2813            ("1.000", 1, 3),
2814            ("1.123", 1, 3),
2815            ("123.0", 123, 3),
2816            ("123.4", 123, 3),
2817            ("123.00", 123, 3),
2818            ("123.45", 123, 3),
2819            ("123.000000000000000000004", 123, 3),
2820            ("0.123e2", 12, 3),
2821            ("0.123e4", 1230, 10),
2822            ("1.23e4", 12300, 10),
2823            ("12.3e4", 123000, 10),
2824            ("123e4", 1230000, 10),
2825            (
2826                "20000000000000000000000000000000000002.0",
2827                20000000000000000000000000000000000002,
2828                38,
2829            ),
2830        ];
2831        for (s, i, precision) in zero_scale_tests {
2832            let result_128 = parse_decimal::<Decimal128Type>(s, precision, 0).unwrap();
2833            assert_eq!(i, result_128);
2834        }
2835
2836        let can_not_parse_zero_scale = [".", "blag", "", "+", "-", "e"];
2837        for s in can_not_parse_zero_scale {
2838            let result_128 = parse_decimal::<Decimal128Type>(s, 5, 0);
2839            assert_eq!(
2840                format!("Parser error: can't parse the string value {s} to decimal"),
2841                result_128.unwrap_err().to_string(),
2842            );
2843        }
2844    }
2845
2846    #[test]
2847    fn test_parse_empty() {
2848        assert_eq!(Int32Type::parse(""), None);
2849        assert_eq!(Int64Type::parse(""), None);
2850        assert_eq!(UInt32Type::parse(""), None);
2851        assert_eq!(UInt64Type::parse(""), None);
2852        assert_eq!(Float32Type::parse(""), None);
2853        assert_eq!(Float64Type::parse(""), None);
2854        assert_eq!(Int32Type::parse("+"), None);
2855        assert_eq!(Int64Type::parse("+"), None);
2856        assert_eq!(UInt32Type::parse("+"), None);
2857        assert_eq!(UInt64Type::parse("+"), None);
2858        assert_eq!(Float32Type::parse("+"), None);
2859        assert_eq!(Float64Type::parse("+"), None);
2860        assert_eq!(TimestampNanosecondType::parse(""), None);
2861        assert_eq!(Date32Type::parse(""), None);
2862    }
2863
2864    #[test]
2865    fn test_parse_interval_month_day_nano_config() {
2866        let interval = parse_interval_month_day_nano_config(
2867            "1",
2868            IntervalParseConfig::new(IntervalUnit::Second),
2869        )
2870        .unwrap();
2871        assert_eq!(interval.months, 0);
2872        assert_eq!(interval.days, 0);
2873        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2874    }
2875}