Skip to main content

deep_time/time_parts/
from_str_iso.rs

1use crate::{DtErr, DtErrKind, Offset, STRTIME_SIZE, Scale, TimeParts, an_err};
2
3impl TimeParts {
4    /// Generalized ISO / CCSDS ASCII Time Code parser (A or B variant).
5    /// - Parses e.g. **`+2000-01-01T17:00:00 -0500 [America/New_York] TAI`**.
6    /// - Only supports ASCII characters.
7    /// - If a time is included then some kind of date-time separator e.g. `T` is
8    ///   required.
9    /// - Supports both calendar (`%Y-%m-%d`) and day-of-year (`%Y-%j`) formats.
10    /// - Treats years digits literally as shown, for example `99-01-01` would be
11    ///   the year 99 AD not 1999.
12    /// - Supported **optional** components:
13    ///     - Time components after a date e.g. `T12:00:00`.
14    ///     - Offset after time components or directly after the date e.g. `+0200` or
15    ///       `2023-01-01+05:00`.
16    ///     - Timezone name, **requires square brackets** and requires `jiff-tz` feature,
17    ///       after time or offset e.g. `T12:00:00 [America/New_York]`.
18    ///     - Library time scale right on the end of the input, e.g. `TAI`.
19    /// - This function is considerably faster than all other string parsing methods if
20    ///   your date-time string is in the supported formats.
21    pub fn from_str_iso(input: &str) -> Result<Self, DtErr> {
22        let bytes = input.as_bytes();
23        let len_ = bytes.len();
24        if len_ > STRTIME_SIZE {
25            return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", input));
26        }
27
28        let mut start = 0usize;
29        while start < len_ {
30            let b = bytes[start];
31            if b.is_ascii_digit()
32                || (matches!(b, b'+' | b'-')
33                    && start + 1 < len_
34                    && bytes[start + 1].is_ascii_digit())
35            {
36                break;
37            }
38            start += 1;
39        }
40
41        if start == len_ {
42            return Err(an_err!(
43                DtErrKind::ExpectedYear,
44                "year start (digit or +/- and digit)"
45            ));
46        }
47
48        let input = &input[start..];
49        let bytes = input.as_bytes();
50        let len_ = bytes.len();
51        let mut pos: usize = 0;
52        let mut tp = TimeParts::new_utc();
53
54        // Year (manual accumulation, optional sign)
55        let mut year: i64 = 0;
56        let negative_year = if pos < len_ && matches!(bytes[pos], b'+' | b'-') {
57            pos += 1;
58            bytes[pos] == b'-'
59        } else {
60            false
61        };
62
63        if bytes[pos].is_ascii_digit() {
64            while pos < len_ && bytes[pos].is_ascii_digit() {
65                year = year * 10 + (bytes[pos] - b'0') as i64;
66                pos += 1;
67            }
68        } else {
69            return Err(an_err!(
70                DtErrKind::ExpectedYear,
71                "year (digits after optional sign)"
72            ));
73        }
74
75        if negative_year {
76            year = -year;
77        }
78        tp.yr = Some(year);
79
80        // required separator after year
81        if pos < len_ {
82            pos += 1;
83        }
84
85        // DOY vs calendar detection, uses required datetime separator to detect
86        let is_doy = pos + 3 == len_ || (pos + 3 < len_ && !bytes[pos + 3].is_ascii_digit());
87
88        if is_doy {
89            // 3-digit day of year
90            let mut doy: u16 = 0;
91            // digit 1
92            if bytes[pos].is_ascii_digit() {
93                doy = doy * 10 + (bytes[pos] - b'0') as u16;
94                pos += 1;
95            } else {
96                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "0/3 digits"));
97            }
98
99            // digit 2
100            if bytes[pos].is_ascii_digit() {
101                doy = doy * 10 + (bytes[pos] - b'0') as u16;
102                pos += 1;
103            } else {
104                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "1/3 digits"));
105            }
106
107            // digit 3
108            if bytes[pos].is_ascii_digit() {
109                doy = doy * 10 + (bytes[pos] - b'0') as u16;
110                pos += 1;
111            } else {
112                return Err(an_err!(DtErrKind::ExpectedDayOfYear, "2/3 digits"));
113            }
114
115            tp.day_of_yr = Some(doy);
116        } else {
117            // 2-digit month
118            let mut mo: u8 = 0;
119            // digit 1
120            if bytes[pos].is_ascii_digit() {
121                mo = mo * 10 + (bytes[pos] - b'0');
122                pos += 1;
123            } else {
124                return Err(an_err!(DtErrKind::ExpectedMonth, "0/2 digits"));
125            }
126            // digit 2
127            if bytes[pos].is_ascii_digit() {
128                mo = mo * 10 + (bytes[pos] - b'0');
129                pos += 1;
130            } else {
131                return Err(an_err!(DtErrKind::ExpectedMonth, "1/2 digits"));
132            }
133
134            tp.mo = Some(mo);
135
136            // Optional separator after month
137            if pos < len_ && !bytes[pos].is_ascii_digit() {
138                pos += 1;
139            }
140
141            // 2-digit day
142            let mut day: u8 = 0;
143            // digit 1
144            if bytes[pos].is_ascii_digit() {
145                day = day * 10 + (bytes[pos] - b'0');
146                pos += 1;
147            } else {
148                return Err(an_err!(DtErrKind::ExpectedDay, "0/2 digits"));
149            }
150            // digit 2
151            if bytes[pos].is_ascii_digit() {
152                day = day * 10 + (bytes[pos] - b'0');
153                pos += 1;
154            } else {
155                return Err(an_err!(DtErrKind::ExpectedDay, "1/2 digits"));
156            }
157
158            tp.day = Some(day);
159        }
160
161        // required date-time separator
162        while pos < len_ && bytes[pos].is_ascii_whitespace() {
163            pos += 1;
164        }
165        if pos < len_ {
166            let c = bytes[pos];
167            // push past a T
168            if !c.is_ascii_digit() && pos + 1 < len_ && !matches!(c, b'+' | b'-') {
169                if bytes[pos + 1].is_ascii_digit() {
170                    pos += 1;
171                } else if bytes[pos + 1].is_ascii_whitespace() {
172                    pos += 1;
173                    while pos < len_ && bytes[pos].is_ascii_whitespace() {
174                        pos += 1;
175                    }
176                }
177            }
178        }
179
180        // Optional time components
181        if pos < len_ && bytes[pos].is_ascii_digit() {
182            // Hour (2 digits)
183            let mut hr: u8 = 0;
184            // digit 1
185            if bytes[pos].is_ascii_digit() {
186                hr = hr * 10 + (bytes[pos] - b'0');
187                pos += 1;
188            } else {
189                return Err(an_err!(DtErrKind::ExpectedHour, "0/2 digits"));
190            }
191            // digit 2
192            if bytes[pos].is_ascii_digit() {
193                hr = hr * 10 + (bytes[pos] - b'0');
194                pos += 1;
195            } else {
196                return Err(an_err!(DtErrKind::ExpectedHour, "1/2 digits"));
197            }
198
199            tp.hr = hr;
200
201            'time: {
202                // only continue if it's not a + or - and not an alpha
203                if pos >= len_
204                    || bytes[pos].is_ascii_digit()
205                    || matches!(bytes[pos], b'+' | b'-')
206                    || bytes[pos].is_ascii_alphabetic()
207                {
208                    break 'time;
209                }
210                pos += 1;
211
212                // Minute (2 digits)
213                if pos + 2 > len_ {
214                    break 'time;
215                }
216                let mut min: u8 = 0;
217                // digit 1
218                if bytes[pos].is_ascii_digit() {
219                    min = min * 10 + (bytes[pos] - b'0');
220                    pos += 1;
221                } else {
222                    return Err(an_err!(DtErrKind::ExpectedMinute, "0/2 digits"));
223                }
224                // digit 2
225                if bytes[pos].is_ascii_digit() {
226                    min = min * 10 + (bytes[pos] - b'0');
227                    pos += 1;
228                } else {
229                    return Err(an_err!(DtErrKind::ExpectedMinute, "0/2 digits"));
230                }
231
232                tp.min = min;
233
234                // only continue if it's not a + or - and not an alpha
235                if pos >= len_
236                    || bytes[pos].is_ascii_digit()
237                    || matches!(bytes[pos], b'+' | b'-')
238                    || bytes[pos].is_ascii_alphabetic()
239                {
240                    break 'time;
241                }
242                pos += 1;
243
244                // Second (2 digits, if present)
245                if pos + 2 > len_ {
246                    break 'time;
247                }
248                let mut sec: u8 = 0;
249                // digit 1
250                if bytes[pos].is_ascii_digit() {
251                    sec = sec * 10 + (bytes[pos] - b'0');
252                    pos += 1;
253                } else {
254                    return Err(an_err!(DtErrKind::ExpectedSecond, "0/2 digits"));
255                }
256                // digit 2
257                if bytes[pos].is_ascii_digit() {
258                    sec = sec * 10 + (bytes[pos] - b'0');
259                    pos += 1;
260                } else {
261                    return Err(an_err!(DtErrKind::ExpectedSecond, "1/2 digits"));
262                }
263
264                tp.sec = sec;
265
266                // only continue if it's not a + or - and not an alpha
267                if pos >= len_
268                    || bytes[pos].is_ascii_digit()
269                    || matches!(bytes[pos], b'+' | b'-')
270                    || bytes[pos].is_ascii_alphabetic()
271                {
272                    break 'time;
273                }
274                pos += 1;
275
276                // Fractional seconds (with or without leading dot)
277                if pos < len_ {
278                    if bytes[pos] == b'.' {
279                        pos += 1;
280                    }
281
282                    if pos < len_ && bytes[pos].is_ascii_digit() {
283                        let mut attos: u64 = 0;
284                        let mut digits_seen: usize = 0;
285
286                        while pos < len_ && bytes[pos].is_ascii_digit() {
287                            if digits_seen < 18 {
288                                attos = attos * 10 + (bytes[pos] - b'0') as u64;
289                                digits_seen += 1;
290                            }
291                            // Ignore any digits beyond the first 18
292                            pos += 1;
293                        }
294
295                        if digits_seen > 0 {
296                            tp.attos = attos * 10u64.pow(18u32.saturating_sub(digits_seen as u32));
297                        }
298                    }
299                }
300            }
301            // Optional trailing Z/z
302            if pos < len_ && matches!(bytes[pos], b'Z' | b'z') {
303                pos += 1;
304            }
305        }
306
307        // Skip any whitespace
308        while pos < len_ && bytes[pos].is_ascii_whitespace() {
309            pos += 1;
310        }
311
312        // Optional offset
313        if pos < len_ && matches!(bytes[pos], b'+' | b'-') {
314            let sign: i64 = if bytes[pos] == b'+' { 1 } else { -1 };
315            pos += 1;
316
317            // Parse hours (up to 2 digits). "+05:30"/"+0530"
318            let mut hours: i64 = 0;
319            let mut h_digits = 0usize;
320            while pos < len_ && bytes[pos].is_ascii_digit() && h_digits < 2 {
321                hours = hours * 10 + (bytes[pos] - b'0') as i64;
322                pos += 1;
323                h_digits += 1;
324            }
325
326            if h_digits > 0 {
327                // Optional ':' separator before minutes
328                if pos < len_ && bytes[pos] == b':' {
329                    pos += 1;
330                }
331
332                // Parse minutes (up to 2 digits; optional)
333                let mut minutes: i64 = 0;
334                let mut m_digits = 0usize;
335                while pos < len_ && bytes[pos].is_ascii_digit() && m_digits < 2 {
336                    minutes = minutes * 10 + (bytes[pos] - b'0') as i64;
337                    pos += 1;
338                    m_digits += 1;
339                }
340
341                let total_sec_i64 = sign * (hours * 3600 + minutes * 60);
342                let total_seconds: i32 =
343                    total_sec_i64.clamp(i32::MIN as i64, i32::MAX as i64) as i32;
344                tp.offset = Some(Offset::Fixed(total_seconds));
345            }
346        }
347
348        // Skip any whitespace before IANA name or scale
349        while pos < len_ && bytes[pos].is_ascii_whitespace() {
350            pos += 1;
351        }
352
353        // Optional IANA timezone name in square brackets, e.g. [America/New_York]
354        // Must be explicitly wrapped in [] so we don't mistake a scale for a zone.
355        if pos < len_ && bytes[pos] == b'[' {
356            pos += 1; // skip '['
357
358            let name_start = pos;
359
360            while pos < len_ && bytes[pos] != b']' {
361                pos += 1;
362            }
363
364            if pos >= len_ {
365                return Err(an_err!(
366                    DtErrKind::InvalidTimezoneOffset,
367                    "unclosed IANA tz name (missing ']')"
368                ));
369            }
370
371            // pos is now at ']'
372            let iana_bytes = &bytes[name_start..pos];
373
374            let iana = core::str::from_utf8(iana_bytes).map_err(|_| {
375                an_err!(
376                    DtErrKind::InvalidBytes,
377                    "IANA tz name contains invalid UTF-8"
378                )
379            })?;
380
381            tp.set_iana_name(Some(iana));
382            pos += 1; // consume ']'
383        }
384
385        // Optional trailing scale (e.g. TAI, UTC)
386        if pos < len_ {
387            while pos < len_ && !bytes[pos].is_ascii_alphabetic() {
388                pos += 1;
389            }
390            if pos < len_ {
391                let end = {
392                    let mut i = pos;
393                    while i < len_ && bytes[i].is_ascii_alphabetic() {
394                        i += 1;
395                        if i - pos > 8 {
396                            break;
397                        }
398                    }
399                    i
400                };
401                if let Some(sc) = Scale::from_abbrev(&input[pos..end]) {
402                    tp.scale = sc;
403                    // pos += end - pos;
404                }
405            }
406        }
407
408        Ok(tp)
409    }
410}