Skip to main content

deep_time/time_parts/
from_str_iso.rs

1use crate::{DtErr, DtErrKind, Offset, Scale, TimeParts, an_err};
2
3impl TimeParts {
4    /// Generalized ISO / CCSDS ASCII Time Code parser (A or B variant).
5    /// - Parses e.g. **`+2000-01-01T17:00:00 -0500 [America/New_York] TAI`**.
6    /// - Only supports ASCII characters.
7    /// - If a time is included then some kind of date-time separator e.g. `T` is
8    ///   required.
9    /// - Supports both calendar (`%Y-%m-%d`) and day-of-year (`%Y-%j`) formats.
10    /// - Treats years digits literally as shown, for example `99-01-01` would be
11    ///   the year 99 AD not 1999.
12    /// - Supported **optional** components:
13    ///     - Time components after a date e.g. `T12:00:00`.
14    ///     - Offset after time components or directly after the date e.g. `+0200` or
15    ///       `2023-01-01+05:00`.
16    ///     - Timezone name, **requires square brackets** and requires `jiff-tz` feature,
17    ///       after time or offset e.g. `T12:00:00 [America/New_York]`.
18    ///     - Library time scale right on the end of the input, e.g. `TAI`.
19    /// - This function is considerably faster than all other string parsing methods if
20    ///   your date-time string is in the supported formats.
21    pub fn from_str_iso(input: &str) -> Result<Self, DtErr> {
22        let bytes = input.as_bytes();
23        let len_ = bytes.len();
24
25        let mut start = 0usize;
26        while start < len_ {
27            let b = bytes[start];
28            if b.is_ascii_digit()
29                || (matches!(b, b'+' | b'-')
30                    && start + 1 < len_
31                    && bytes[start + 1].is_ascii_digit())
32            {
33                break;
34            }
35            start += 1;
36        }
37
38        if start == len_ {
39            return Err(an_err!(
40                DtErrKind::ExpectedYear,
41                "year start (digit or +/- and digit)"
42            ));
43        }
44
45        let input = &input[start..];
46        let bytes = input.as_bytes();
47        let len_ = bytes.len();
48        let mut pos: usize = 0;
49        let mut tp = TimeParts::new_utc();
50
51        // Year (manual accumulation, optional sign)
52        let mut year: i64 = 0;
53        let negative_year = if pos < len_ && matches!(bytes[pos], b'+' | b'-') {
54            pos += 1;
55            bytes[pos] == b'-'
56        } else {
57            false
58        };
59
60        if bytes[pos].is_ascii_digit() {
61            while pos < len_ && bytes[pos].is_ascii_digit() {
62                year = year * 10 + (bytes[pos] - b'0') as i64;
63                pos += 1;
64            }
65        } else {
66            return Err(an_err!(
67                DtErrKind::ExpectedYear,
68                "year (digits after optional sign)"
69            ));
70        }
71
72        if negative_year {
73            year = -year;
74        }
75        tp.yr = Some(year);
76
77        // required separator after year
78        if pos < len_ {
79            pos += 1;
80        }
81
82        // DOY vs calendar detection, uses required datetime separator to detect
83        let is_doy = pos + 3 == len_ || (pos + 3 < len_ && !bytes[pos + 3].is_ascii_digit());
84
85        if is_doy {
86            // 3-digit day of year
87            let mut doy: u16 = 0;
88            // digit 1
89            if bytes[pos].is_ascii_digit() {
90                doy = doy * 10 + (bytes[pos] - b'0') as u16;
91                pos += 1;
92            } else {
93                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "0/3 digits"));
94            }
95
96            // digit 2
97            if bytes[pos].is_ascii_digit() {
98                doy = doy * 10 + (bytes[pos] - b'0') as u16;
99                pos += 1;
100            } else {
101                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "1/3 digits"));
102            }
103
104            // digit 3
105            if bytes[pos].is_ascii_digit() {
106                doy = doy * 10 + (bytes[pos] - b'0') as u16;
107                pos += 1;
108            } else {
109                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "2/3 digits"));
110            }
111
112            tp.day_of_yr = Some(doy);
113        } else {
114            // 2-digit month
115            let mut mo: u8 = 0;
116            // digit 1
117            if bytes[pos].is_ascii_digit() {
118                mo = mo * 10 + (bytes[pos] - b'0');
119                pos += 1;
120            } else {
121                return Err(an_err!(DtErrKind::ExpectedMonth, "0/2 digits"));
122            }
123            // digit 2
124            if bytes[pos].is_ascii_digit() {
125                mo = mo * 10 + (bytes[pos] - b'0');
126                pos += 1;
127            } else {
128                return Err(an_err!(DtErrKind::ExpectedMonth, "1/2 digits"));
129            }
130
131            tp.mo = Some(mo);
132
133            // Optional separator after month
134            if pos < len_ && !bytes[pos].is_ascii_digit() {
135                pos += 1;
136            }
137
138            // 2-digit day
139            let mut day: u8 = 0;
140            // digit 1
141            if bytes[pos].is_ascii_digit() {
142                day = day * 10 + (bytes[pos] - b'0');
143                pos += 1;
144            } else {
145                return Err(an_err!(DtErrKind::ExpectedDay, "0/2 digits"));
146            }
147            // digit 2
148            if bytes[pos].is_ascii_digit() {
149                day = day * 10 + (bytes[pos] - b'0');
150                pos += 1;
151            } else {
152                return Err(an_err!(DtErrKind::ExpectedDay, "1/2 digits"));
153            }
154
155            tp.day = Some(day);
156        }
157
158        // required date-time separator
159        while pos < len_ && bytes[pos].is_ascii_whitespace() {
160            pos += 1;
161        }
162        if pos < len_ {
163            let c = bytes[pos];
164            // push past a T
165            if !c.is_ascii_digit() {
166                if pos + 1 < len_ && !matches!(c, b'+' | b'-') {
167                    if bytes[pos + 1].is_ascii_digit() {
168                        pos += 1;
169                    } else if bytes[pos + 1].is_ascii_whitespace() {
170                        pos += 1;
171                        while pos < len_ && bytes[pos].is_ascii_whitespace() {
172                            pos += 1;
173                        }
174                    }
175                }
176            }
177        }
178
179        // Optional time components
180        if pos < len_ && bytes[pos].is_ascii_digit() {
181            // Hour (2 digits)
182            let mut hr: u8 = 0;
183            // digit 1
184            if bytes[pos].is_ascii_digit() {
185                hr = hr * 10 + (bytes[pos] - b'0');
186                pos += 1;
187            } else {
188                return Err(an_err!(DtErrKind::ExpectedHour, "0/2 digits"));
189            }
190            // digit 2
191            if bytes[pos].is_ascii_digit() {
192                hr = hr * 10 + (bytes[pos] - b'0');
193                pos += 1;
194            } else {
195                return Err(an_err!(DtErrKind::ExpectedHour, "1/2 digits"));
196            }
197
198            tp.hr = hr;
199
200            'time: {
201                // only continue if it's not a + or - and not an alpha
202                if pos >= len_
203                    || bytes[pos].is_ascii_digit()
204                    || matches!(bytes[pos], b'+' | b'-')
205                    || bytes[pos].is_ascii_alphabetic()
206                {
207                    break 'time;
208                }
209                pos += 1;
210
211                // Minute (2 digits)
212                if pos + 2 > len_ {
213                    break 'time;
214                }
215                let mut min: u8 = 0;
216                // digit 1
217                if bytes[pos].is_ascii_digit() {
218                    min = min * 10 + (bytes[pos] - b'0');
219                    pos += 1;
220                } else {
221                    return Err(an_err!(DtErrKind::ExpectedMinute, "0/2 digits"));
222                }
223                // digit 2
224                if bytes[pos].is_ascii_digit() {
225                    min = min * 10 + (bytes[pos] - b'0');
226                    pos += 1;
227                } else {
228                    return Err(an_err!(DtErrKind::ExpectedMinute, "0/2 digits"));
229                }
230
231                tp.min = min;
232
233                // only continue if it's not a + or - and not an alpha
234                if pos >= len_
235                    || bytes[pos].is_ascii_digit()
236                    || matches!(bytes[pos], b'+' | b'-')
237                    || bytes[pos].is_ascii_alphabetic()
238                {
239                    break 'time;
240                }
241                pos += 1;
242
243                // Second (2 digits, if present)
244                if pos + 2 > len_ {
245                    break 'time;
246                }
247                let mut sec: u8 = 0;
248                // digit 1
249                if bytes[pos].is_ascii_digit() {
250                    sec = sec * 10 + (bytes[pos] - b'0');
251                    pos += 1;
252                } else {
253                    return Err(an_err!(DtErrKind::ExpectedSecond, "0/2 digits"));
254                }
255                // digit 2
256                if bytes[pos].is_ascii_digit() {
257                    sec = sec * 10 + (bytes[pos] - b'0');
258                    pos += 1;
259                } else {
260                    return Err(an_err!(DtErrKind::ExpectedSecond, "1/2 digits"));
261                }
262
263                tp.sec = sec;
264
265                // only continue if it's not a + or - and not an alpha
266                if pos >= len_
267                    || bytes[pos].is_ascii_digit()
268                    || matches!(bytes[pos], b'+' | b'-')
269                    || bytes[pos].is_ascii_alphabetic()
270                {
271                    break 'time;
272                }
273                pos += 1;
274
275                // Fractional seconds (with or without leading dot)
276                if pos < len_ {
277                    if bytes[pos] == b'.' {
278                        pos += 1;
279                    }
280
281                    if pos < len_ && bytes[pos].is_ascii_digit() {
282                        let mut attos: u64 = 0;
283                        let mut digits_seen: usize = 0;
284
285                        while pos < len_ && bytes[pos].is_ascii_digit() {
286                            if digits_seen < 18 {
287                                attos = attos * 10 + (bytes[pos] - b'0') as u64;
288                                digits_seen += 1;
289                            }
290                            // Ignore any digits beyond the first 18
291                            pos += 1;
292                        }
293
294                        if digits_seen > 0 {
295                            tp.attos = attos * 10u64.pow(18u32.saturating_sub(digits_seen as u32));
296                        }
297                    }
298                }
299            }
300            // Optional trailing Z/z
301            if pos < len_ && matches!(bytes[pos], b'Z' | b'z') {
302                pos += 1;
303            }
304        }
305
306        // Skip any whitespace
307        while pos < len_ && bytes[pos].is_ascii_whitespace() {
308            pos += 1;
309        }
310
311        // Optional offset
312        if pos < len_ && matches!(bytes[pos], b'+' | b'-') {
313            let sign: i64 = if bytes[pos] == b'+' { 1 } else { -1 };
314            pos += 1;
315
316            // Parse hours (up to 2 digits). "+05:30"/"+0530"
317            let mut hours: i64 = 0;
318            let mut h_digits = 0usize;
319            while pos < len_ && bytes[pos].is_ascii_digit() && h_digits < 2 {
320                hours = hours * 10 + (bytes[pos] - b'0') as i64;
321                pos += 1;
322                h_digits += 1;
323            }
324
325            if h_digits > 0 {
326                // Optional ':' separator before minutes
327                if pos < len_ && bytes[pos] == b':' {
328                    pos += 1;
329                }
330
331                // Parse minutes (up to 2 digits; optional)
332                let mut minutes: i64 = 0;
333                let mut m_digits = 0usize;
334                while pos < len_ && bytes[pos].is_ascii_digit() && m_digits < 2 {
335                    minutes = minutes * 10 + (bytes[pos] - b'0') as i64;
336                    pos += 1;
337                    m_digits += 1;
338                }
339
340                let total_sec_i64 = sign * (hours * 3600 + minutes * 60);
341                let total_seconds: i32 =
342                    total_sec_i64.clamp(i32::MIN as i64, i32::MAX as i64) as i32;
343                tp.offset = Some(Offset::Fixed(total_seconds));
344            }
345        }
346
347        // Skip any whitespace before IANA name or scale
348        while pos < len_ && bytes[pos].is_ascii_whitespace() {
349            pos += 1;
350        }
351
352        // Optional IANA timezone name in square brackets, e.g. [America/New_York]
353        // Must be explicitly wrapped in [] so we don't mistake a scale for a zone.
354        if pos < len_ && bytes[pos] == b'[' {
355            pos += 1; // skip '['
356
357            let name_start = pos;
358
359            while pos < len_ && bytes[pos] != b']' {
360                pos += 1;
361            }
362
363            if pos >= len_ {
364                return Err(an_err!(
365                    DtErrKind::InvalidTimezoneOffset,
366                    "unclosed IANA tz name (missing ']')"
367                ));
368            }
369
370            // pos is now at ']'
371            let iana_bytes = &bytes[name_start..pos];
372
373            let iana = core::str::from_utf8(iana_bytes).map_err(|_| {
374                an_err!(
375                    DtErrKind::InvalidBytes,
376                    "IANA tz name contains invalid UTF-8"
377                )
378            })?;
379
380            tp.set_iana_name(Some(iana));
381            pos += 1; // consume ']'
382        }
383
384        // Optional trailing scale (e.g. TAI, UTC)
385        if pos < len_ {
386            while pos < len_ && !bytes[pos].is_ascii_alphabetic() {
387                pos += 1;
388            }
389            if pos < len_ {
390                let end = {
391                    let mut i = pos;
392                    while i < len_ && bytes[i].is_ascii_alphabetic() {
393                        i += 1;
394                        if i - pos > 8 {
395                            break;
396                        }
397                    }
398                    i
399                };
400                if let Some(sc) = Scale::from_abbrev(&input[pos..end]) {
401                    tp.scale = sc;
402                    // pos += end - pos;
403                }
404            }
405        }
406
407        Ok(tp)
408    }
409}