Skip to main content

timezone_data/
parse.rs

1//! TZif (RFC 8536) binary parser and the [`Zone`] type.
2//!
3//! Parsing records only the byte ranges of each TZif data block; individual
4//! records are decoded lazily by the accessor iterators. Nothing is copied out
5//! of the source bytes and nothing is allocated.
6
7use crate::error::Error;
8use crate::posix::{parse_posix_tz, year_of, PosixTz};
9
10/// Describes a local time type (e.g. `EST`, `EDT`).
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct ZoneType<'a> {
13    /// Abbreviated name.
14    pub abbrev: &'a str,
15    /// Seconds east of UTC.
16    pub offset: i32,
17    /// True if this is a daylight-saving time type.
18    pub is_dst: bool,
19}
20
21/// A moment when the timezone rule changes.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub struct Transition {
24    /// Unix timestamp at which the transition takes effect.
25    pub when: i64,
26    /// Index into the zone's [`types`](Zone::types).
27    pub type_idx: usize,
28    /// True if the transition time is standard (not wall clock).
29    pub is_std: bool,
30    /// True if the transition time is UT (not local).
31    pub is_ut: bool,
32}
33
34/// A leap-second record.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub struct LeapSecond {
37    /// Unix timestamp of the leap second.
38    pub when: i64,
39    /// Cumulative correction in seconds.
40    pub correction: i32,
41}
42
43/// A transition produced by [`Zone::transitions_for_range`].
44///
45/// Unlike [`Transition`], this carries the resolved [`ZoneType`] directly, since
46/// transitions generated from the POSIX extend rule may name a type that does
47/// not appear in the stored type table.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub struct RangeTransition<'a> {
50    /// Unix timestamp at which the transition takes effect.
51    pub when: i64,
52    /// The zone type in effect after the transition.
53    pub zone_type: ZoneType<'a>,
54}
55
56/// A parsed IANA timezone with all raw data exposed.
57///
58/// `Zone` borrows the TZif bytes it was parsed from; it is cheap to copy.
59#[derive(Debug, Clone, Copy)]
60pub struct Zone<'a> {
61    name: &'a str,
62    version: u8,
63    data: &'a [u8],
64    time_size: usize,
65    leap_size: usize,
66    timecnt: usize,
67    typecnt: usize,
68    leapcnt: usize,
69    trans_times: &'a [u8],
70    trans_types: &'a [u8],
71    ttinfo: &'a [u8],
72    abbrev: &'a [u8],
73    leap: &'a [u8],
74    isstd: &'a [u8],
75    isut: &'a [u8],
76    extend_raw: &'a str,
77    extend: Option<PosixTz<'a>>,
78}
79
80fn be_i32(b: &[u8]) -> i32 {
81    i32::from_be_bytes([b[0], b[1], b[2], b[3]])
82}
83
84fn be_i64(b: &[u8]) -> i64 {
85    i64::from_be_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]])
86}
87
88fn be_u32(b: &[u8]) -> u32 {
89    u32::from_be_bytes([b[0], b[1], b[2], b[3]])
90}
91
92/// Extracts a NUL-terminated string starting at `idx` within `block`.
93fn byte_string(block: &[u8], idx: usize) -> &str {
94    let s = block.get(idx..).unwrap_or(&[]);
95    let end = s.iter().position(|&b| b == 0).unwrap_or(s.len());
96    core::str::from_utf8(&s[..end]).unwrap_or("")
97}
98
99/// Decodes the `i`-th 6-byte ttinfo record into a [`ZoneType`].
100fn decode_type<'a>(ttinfo: &'a [u8], abbrev: &'a [u8], i: usize) -> ZoneType<'a> {
101    let r = &ttinfo[i * 6..i * 6 + 6];
102    ZoneType {
103        offset: be_i32(&r[0..4]),
104        is_dst: r[4] != 0,
105        abbrev: byte_string(abbrev, r[5] as usize),
106    }
107}
108
109/// Decodes the `i`-th transition time from `times`.
110fn time_at(times: &[u8], time_size: usize, i: usize) -> i64 {
111    let off = i * time_size;
112    if time_size == 8 {
113        be_i64(&times[off..off + 8])
114    } else {
115        be_i32(&times[off..off + 4]) as i64
116    }
117}
118
119/// Parses TZif-format binary data into a [`Zone`].
120pub fn parse<'a>(name: &'a str, data: &'a [u8]) -> Result<Zone<'a>, Error> {
121    if data.len() < 44 {
122        return Err(Error::BadData("file too short"));
123    }
124    if &data[..4] != b"TZif" {
125        return Err(Error::BadData("invalid magic number"));
126    }
127
128    let version = match data[4] {
129        0 => 1u8,
130        b'2' => 2,
131        b'3' => 3,
132        b'4' => 4,
133        _ => return Err(Error::BadData("unknown version byte")),
134    };
135
136    // Header counts: six big-endian u32 at offset 20.
137    let counts = |base: usize| -> Result<[usize; 6], Error> {
138        let h = data
139            .get(base..base + 24)
140            .ok_or(Error::BadData("header truncated"))?;
141        Ok([
142            be_u32(&h[0..4]) as usize,   // isutcnt
143            be_u32(&h[4..8]) as usize,   // isstdcnt
144            be_u32(&h[8..12]) as usize,  // leapcnt
145            be_u32(&h[12..16]) as usize, // timecnt
146            be_u32(&h[16..20]) as usize, // typecnt
147            be_u32(&h[20..24]) as usize, // charcnt
148        ])
149    };
150
151    let [mut isutcnt, mut isstdcnt, mut leapcnt, mut timecnt, mut typecnt, mut charcnt] =
152        counts(20)?;
153
154    if typecnt == 0 {
155        return Err(Error::BadData("no time types"));
156    }
157
158    // Size of the v1 data block, used to skip it for v2+ files.
159    let v1_data_size = timecnt * 4 // transition times (int32)
160        + timecnt          // transition type indices
161        + typecnt * 6      // ttinfo records
162        + charcnt          // abbreviation chars
163        + leapcnt * 8      // leap second records (v1: 4+4)
164        + isstdcnt         // std/wall indicators
165        + isutcnt; // UT/local indicators
166
167    if data.len() < 44 + v1_data_size {
168        return Err(Error::BadData("v1 data block truncated"));
169    }
170
171    let time_size;
172    let leap_size;
173    let data_off;
174
175    if version >= 2 {
176        // Skip the v1 data block and read the v2+ header.
177        let v2_hdr = 44 + v1_data_size;
178        let h = data
179            .get(v2_hdr..v2_hdr + 44)
180            .ok_or(Error::BadData("v2 header truncated"))?;
181        if &h[..4] != b"TZif" {
182            return Err(Error::BadData("v2 magic mismatch"));
183        }
184        [isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt] = counts(v2_hdr + 20)?;
185        if typecnt == 0 {
186            return Err(Error::BadData("no time types in v2 block"));
187        }
188        time_size = 8;
189        leap_size = 12;
190        data_off = v2_hdr + 44;
191    } else {
192        time_size = 4;
193        leap_size = 8;
194        data_off = 44;
195    }
196
197    let total_needed = timecnt * time_size
198        + timecnt
199        + typecnt * 6
200        + charcnt
201        + leapcnt * leap_size
202        + isstdcnt
203        + isutcnt;
204
205    let block = data
206        .get(data_off..data_off + total_needed)
207        .ok_or(Error::BadData("data block truncated"))?;
208
209    // Carve the data block into its constituent slices.
210    let mut p = 0;
211    let take = |p: &mut usize, n: usize| -> &[u8] {
212        let s = &block[*p..*p + n];
213        *p += n;
214        s
215    };
216    let trans_times = take(&mut p, timecnt * time_size);
217    let trans_types = take(&mut p, timecnt);
218    let ttinfo = take(&mut p, typecnt * 6);
219    let abbrev = take(&mut p, charcnt);
220    let leap = take(&mut p, leapcnt * leap_size);
221    let isstd = take(&mut p, isstdcnt);
222    let isut = take(&mut p, isutcnt);
223
224    // Validate transition type indices up front.
225    for &idx in trans_types {
226        if idx as usize >= typecnt {
227            return Err(Error::BadData("transition type index out of range"));
228        }
229    }
230
231    // POSIX TZ footer (v2+ only): "\n<rule>\n".
232    let mut extend_raw = "";
233    let mut extend = None;
234    if version >= 2 {
235        let footer = &data[data_off + total_needed..];
236        if footer.len() > 1 && footer[0] == b'\n' {
237            let rest = &footer[1..];
238            if let Some(nl) = rest.iter().position(|&b| b == b'\n') {
239                if let Ok(s) = core::str::from_utf8(&rest[..nl]) {
240                    extend_raw = s;
241                    if !s.is_empty() {
242                        extend = parse_posix_tz(s).ok();
243                    }
244                }
245            }
246        }
247    }
248
249    Ok(Zone {
250        name,
251        version,
252        data,
253        time_size,
254        leap_size,
255        timecnt,
256        typecnt,
257        leapcnt,
258        trans_times,
259        trans_types,
260        ttinfo,
261        abbrev,
262        leap,
263        isstd,
264        isut,
265        extend_raw,
266        extend,
267    })
268}
269
270impl<'a> Zone<'a> {
271    /// The IANA timezone name.
272    pub fn name(&self) -> &'a str {
273        self.name
274    }
275
276    /// The TZif format version (1, 2, 3, or 4).
277    pub fn version(&self) -> u8 {
278        self.version
279    }
280
281    /// The original TZif binary data this zone was parsed from.
282    pub fn raw_data(&self) -> &'a [u8] {
283        self.data
284    }
285
286    /// The parsed POSIX TZ rule for computing future transitions, if any.
287    pub fn extend(&self) -> Option<&PosixTz<'a>> {
288        self.extend.as_ref()
289    }
290
291    /// The raw POSIX TZ footer string (empty if none).
292    pub fn extend_raw(&self) -> &'a str {
293        self.extend_raw
294    }
295
296    /// The number of local time types.
297    pub fn type_count(&self) -> usize {
298        self.typecnt
299    }
300
301    /// Returns the `i`-th local time type. Panics if `i >= type_count()`.
302    pub fn type_at(&self, i: usize) -> ZoneType<'a> {
303        decode_type(self.ttinfo, self.abbrev, i)
304    }
305
306    /// Iterates over the zone's local time types.
307    pub fn types(&self) -> impl Iterator<Item = ZoneType<'a>> + 'a {
308        let ttinfo = self.ttinfo;
309        let abbrev = self.abbrev;
310        (0..self.typecnt).map(move |i| decode_type(ttinfo, abbrev, i))
311    }
312
313    /// Iterates over the stored transition records.
314    pub fn transitions(&self) -> impl Iterator<Item = Transition> + 'a {
315        let times = self.trans_times;
316        let types = self.trans_types;
317        let isstd = self.isstd;
318        let isut = self.isut;
319        let time_size = self.time_size;
320        (0..self.timecnt).map(move |i| Transition {
321            when: time_at(times, time_size, i),
322            type_idx: types[i] as usize,
323            is_std: isstd.get(i).is_some_and(|&b| b != 0),
324            is_ut: isut.get(i).is_some_and(|&b| b != 0),
325        })
326    }
327
328    /// Iterates over the leap-second records.
329    pub fn leap_seconds(&self) -> impl Iterator<Item = LeapSecond> + 'a {
330        let leap = self.leap;
331        let leap_size = self.leap_size;
332        let time_size = self.time_size;
333        (0..self.leapcnt).map(move |i| {
334            let off = i * leap_size;
335            let when = if time_size == 8 {
336                be_i64(&leap[off..off + 8])
337            } else {
338                be_i32(&leap[off..off + 4]) as i64
339            };
340            LeapSecond {
341                when,
342                correction: be_i32(&leap[off + time_size..off + time_size + 4]),
343            }
344        })
345    }
346
347    /// Returns the zone type in effect at the given Unix timestamp.
348    ///
349    /// Searches stored transitions and falls back to the POSIX TZ rule for
350    /// times after the last transition.
351    pub fn lookup(&self, unix: i64) -> ZoneType<'a> {
352        if self.timecnt == 0 {
353            if self.typecnt > 0 {
354                return self.type_at(0);
355            }
356            return ZoneType {
357                abbrev: "UTC",
358                offset: 0,
359                is_dst: false,
360            };
361        }
362
363        // Binary search: lo = number of transitions whose time is <= unix.
364        let (mut lo, mut hi) = (0usize, self.timecnt);
365        while lo < hi {
366            let mid = lo + (hi - lo) / 2;
367            if time_at(self.trans_times, self.time_size, mid) <= unix {
368                lo = mid + 1;
369            } else {
370                hi = mid;
371            }
372        }
373
374        if lo == 0 {
375            // Before the first transition: first non-DST type, else type 0.
376            for zt in self.types() {
377                if !zt.is_dst {
378                    return zt;
379                }
380            }
381            return self.type_at(0);
382        }
383
384        if lo == self.timecnt {
385            if let Some(ext) = &self.extend {
386                let (abbrev, offset, is_dst) = ext.lookup(unix);
387                return ZoneType {
388                    abbrev,
389                    offset,
390                    is_dst,
391                };
392            }
393        }
394
395        self.type_at(self.trans_types[lo - 1] as usize)
396    }
397
398    /// Returns transitions in the half-open interval `[start_unix, end_unix)`,
399    /// combining stored transitions with ones generated from the POSIX TZ
400    /// extend rule. The result is yielded in chronological order.
401    pub fn transitions_for_range(&self, start_unix: i64, end_unix: i64) -> RangeIter<'a> {
402        let last_stored = if self.timecnt > 0 {
403            time_at(self.trans_times, self.time_size, self.timecnt - 1)
404        } else {
405            i64::MIN
406        };
407        let generate = self.extend.map(|e| e.has_dst()).unwrap_or(false);
408        RangeIter {
409            zone: *self,
410            start_unix,
411            end_unix,
412            stored_idx: 0,
413            stored_done: false,
414            last_stored,
415            generate,
416            year: year_of(start_unix),
417            end_year: year_of(end_unix),
418            pending: [None, None],
419            pending_i: 0,
420        }
421    }
422}
423
424/// Iterator returned by [`Zone::transitions_for_range`].
425pub struct RangeIter<'a> {
426    zone: Zone<'a>,
427    start_unix: i64,
428    end_unix: i64,
429    stored_idx: usize,
430    stored_done: bool,
431    last_stored: i64,
432    generate: bool,
433    year: i32,
434    end_year: i32,
435    pending: [Option<RangeTransition<'a>>; 2],
436    pending_i: usize,
437}
438
439impl<'a> Iterator for RangeIter<'a> {
440    type Item = RangeTransition<'a>;
441
442    fn next(&mut self) -> Option<RangeTransition<'a>> {
443        // Phase 1: stored transitions in range.
444        if !self.stored_done {
445            let z = &self.zone;
446            while self.stored_idx < z.timecnt {
447                let i = self.stored_idx;
448                let when = time_at(z.trans_times, z.time_size, i);
449                if when >= self.end_unix {
450                    self.stored_done = true;
451                    break;
452                }
453                self.stored_idx += 1;
454                if when >= self.start_unix {
455                    return Some(RangeTransition {
456                        when,
457                        zone_type: z.type_at(z.trans_types[i] as usize),
458                    });
459                }
460            }
461            self.stored_done = true;
462        }
463
464        // Phase 2: transitions generated from the POSIX extend rule.
465        if !self.generate {
466            return None;
467        }
468        let ext = self.zone.extend.expect("generate implies extend");
469        loop {
470            // Drain any pending transitions buffered for the current year.
471            while self.pending_i < 2 {
472                let item = self.pending[self.pending_i].take();
473                self.pending_i += 1;
474                if let Some(t) = item {
475                    return Some(t);
476                }
477            }
478
479            if self.year > self.end_year {
480                return None;
481            }
482
483            // Compute this year's transitions, filter to range, sort, buffer.
484            let year = self.year;
485            self.year += 1;
486            self.pending = [None, None];
487            self.pending_i = 0;
488
489            if let Some((dst_start, dst_end)) = ext.transitions_for_year(year) {
490                let dst_type = ZoneType {
491                    abbrev: ext.dst_abbrev,
492                    offset: ext.dst_offset,
493                    is_dst: true,
494                };
495                let std_type = ZoneType {
496                    abbrev: ext.std_abbrev,
497                    offset: ext.std_offset,
498                    is_dst: false,
499                };
500                let mut buf: [Option<RangeTransition<'a>>; 2] = [None, None];
501                let mut n = 0;
502                if self.in_range(dst_start) {
503                    buf[n] = Some(RangeTransition {
504                        when: dst_start,
505                        zone_type: dst_type,
506                    });
507                    n += 1;
508                }
509                if self.in_range(dst_end) {
510                    buf[n] = Some(RangeTransition {
511                        when: dst_end,
512                        zone_type: std_type,
513                    });
514                    n += 1;
515                }
516                // Sort the (at most two) candidates chronologically.
517                if n == 2 && buf[0].as_ref().map(|t| t.when) > buf[1].as_ref().map(|t| t.when) {
518                    buf.swap(0, 1);
519                }
520                self.pending = buf;
521            }
522        }
523    }
524}
525
526impl RangeIter<'_> {
527    fn in_range(&self, when: i64) -> bool {
528        when >= self.start_unix && when < self.end_unix && when > self.last_stored
529    }
530}