Skip to main content

zdump_rs/
tzif.rs

1//! An independent, bounds-checked TZif reader (RFC 9636).
2//!
3//! This is a SECOND implementation, intentionally sharing no code with zic-rs's `tzif` module — its value
4//! as evidence is exactly that it was written separately, so agreement between the two is meaningful. It
5//! reads v1/v2/v3/v4 files; for v2+ it parses the 64-bit data block (the one modern readers use) and keeps
6//! the POSIX footer string for later (Phase 2) future-projection work.
7//!
8//! Scope (Phase 1): header + data block (transitions, transition-type indices, local-time types,
9//! leap-second records, the footer string). It does NOT interpret the footer, apply leap corrections to
10//! wall time, or validate to the full RFC predicate set — that is `rfc9636::validate` in zic-rs's lane.
11
12#![forbid(unsafe_code)]
13
14/// A local-time type (`ttinfo`): UT offset, DST flag, and abbreviation.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct TtInfo {
17    pub utoff: i32,
18    pub is_dst: bool,
19    pub abbr: String,
20}
21
22/// A leap-second record (occurrence in UT, cumulative correction).
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct LeapSecond {
25    pub occur: i64,
26    pub corr: i32,
27}
28
29/// A parsed TZif file (the data this witness reasons over).
30#[derive(Debug, Clone)]
31pub struct Tzif {
32    pub version: u8, // 1, 2, 3, or 4
33    pub transitions: Vec<i64>,
34    pub type_indices: Vec<u8>,
35    pub types: Vec<TtInfo>,
36    pub leaps: Vec<LeapSecond>,
37    pub footer: Option<String>,
38}
39
40/// A big-endian cursor with explicit bounds checks — a malformed/truncated TZif yields `Err`, never a
41/// panic (the same hostile-input discipline zic-rs holds itself to).
42struct Cur<'a> {
43    b: &'a [u8],
44    p: usize,
45}
46impl<'a> Cur<'a> {
47    fn new(b: &'a [u8]) -> Self {
48        Cur { b, p: 0 }
49    }
50    fn take(&mut self, n: usize) -> Result<&'a [u8], String> {
51        let end = self.p.checked_add(n).ok_or("offset overflow")?;
52        if end > self.b.len() {
53            return Err(format!("truncated: need {n} bytes at {}", self.p));
54        }
55        let s = &self.b[self.p..end];
56        self.p = end;
57        Ok(s)
58    }
59    fn u32(&mut self) -> Result<u32, String> {
60        let s = self.take(4)?;
61        Ok(u32::from_be_bytes([s[0], s[1], s[2], s[3]]))
62    }
63    fn i32(&mut self) -> Result<i32, String> {
64        Ok(self.u32()? as i32)
65    }
66    fn i64(&mut self) -> Result<i64, String> {
67        let s = self.take(8)?;
68        let mut a = [0u8; 8];
69        a.copy_from_slice(s);
70        Ok(i64::from_be_bytes(a))
71    }
72    fn u8(&mut self) -> Result<u8, String> {
73        Ok(self.take(1)?[0])
74    }
75}
76
77#[derive(Clone, Copy)]
78struct Counts {
79    isutcnt: u32,
80    isstdcnt: u32,
81    leapcnt: u32,
82    timecnt: u32,
83    typecnt: u32,
84    charcnt: u32,
85}
86
87fn read_header(c: &mut Cur) -> Result<(u8, Counts), String> {
88    let magic = c.take(4)?;
89    if magic != b"TZif" {
90        return Err("bad magic (not TZif)".into());
91    }
92    let ver = c.u8()?;
93    let version = match ver {
94        0 => 1,
95        b'2' => 2,
96        b'3' => 3,
97        b'4' => 4,
98        other => return Err(format!("unknown version byte {other:#x}")),
99    };
100    c.take(15)?; // reserved
101    let counts = Counts {
102        isutcnt: c.u32()?,
103        isstdcnt: c.u32()?,
104        leapcnt: c.u32()?,
105        timecnt: c.u32()?,
106        typecnt: c.u32()?,
107        charcnt: c.u32()?,
108    };
109    Ok((version, counts))
110}
111
112/// The decoded contents of one TZif data block.
113struct Block {
114    transitions: Vec<i64>,
115    type_indices: Vec<u8>,
116    types: Vec<TtInfo>,
117    leaps: Vec<LeapSecond>,
118}
119
120/// Parse a TZif data block. `wide` selects 64-bit (`true`, v2+ block) vs 32-bit (`false`, v1 block)
121/// transition/leap-occurrence widths.
122fn read_block(c: &mut Cur, n: Counts, wide: bool) -> Result<Block, String> {
123    if n.typecnt == 0 {
124        return Err("typecnt == 0".into());
125    }
126    let mut transitions = Vec::with_capacity(n.timecnt as usize);
127    for _ in 0..n.timecnt {
128        transitions.push(if wide { c.i64()? } else { c.i32()? as i64 });
129    }
130    let mut type_indices = Vec::with_capacity(n.timecnt as usize);
131    for _ in 0..n.timecnt {
132        let ti = c.u8()?;
133        if ti as u32 >= n.typecnt {
134            return Err(format!(
135                "transition type index {ti} >= typecnt {}",
136                n.typecnt
137            ));
138        }
139        type_indices.push(ti);
140    }
141    // ttinfo: int32 utoff, u8 isdst, u8 desigidx
142    let mut raw_types = Vec::with_capacity(n.typecnt as usize);
143    for _ in 0..n.typecnt {
144        let utoff = c.i32()?;
145        let isdst = c.u8()? != 0;
146        let desigidx = c.u8()?;
147        if desigidx as u32 >= n.charcnt {
148            return Err(format!(
149                "designation index {desigidx} >= charcnt {}",
150                n.charcnt
151            ));
152        }
153        raw_types.push((utoff, isdst, desigidx));
154    }
155    let desig = c.take(n.charcnt as usize)?;
156    let mut types = Vec::with_capacity(n.typecnt as usize);
157    for (utoff, isdst, di) in raw_types {
158        let start = di as usize;
159        let end = desig[start..]
160            .iter()
161            .position(|&b| b == 0)
162            .map(|z| start + z)
163            .unwrap_or(desig.len());
164        let abbr = String::from_utf8_lossy(&desig[start..end]).into_owned();
165        types.push(TtInfo {
166            utoff,
167            is_dst: isdst,
168            abbr,
169        });
170    }
171    let mut leaps = Vec::with_capacity(n.leapcnt as usize);
172    for _ in 0..n.leapcnt {
173        let occur = if wide { c.i64()? } else { c.i32()? as i64 };
174        let corr = c.i32()?;
175        leaps.push(LeapSecond { occur, corr });
176    }
177    for _ in 0..n.isstdcnt {
178        c.u8()?;
179    }
180    for _ in 0..n.isutcnt {
181        c.u8()?;
182    }
183    Ok(Block {
184        transitions,
185        type_indices,
186        types,
187        leaps,
188    })
189}
190
191/// Parse a complete TZif file.
192pub fn parse(bytes: &[u8]) -> Result<Tzif, String> {
193    let mut c = Cur::new(bytes);
194    let (version, counts) = read_header(&mut c)?;
195    if version == 1 {
196        let b = read_block(&mut c, counts, false)?;
197        return Ok(Tzif {
198            version,
199            transitions: b.transitions,
200            type_indices: b.type_indices,
201            types: b.types,
202            leaps: b.leaps,
203            footer: None,
204        });
205    }
206    // v2+: skip the v1 block, then read the second header + 64-bit block + footer.
207    let _ = read_block(&mut c, counts, false)?;
208    let (v2, counts2) = read_header(&mut c)?;
209    if v2 != version {
210        return Err(format!("v1/v2 header version mismatch: {version} vs {v2}"));
211    }
212    let b = read_block(&mut c, counts2, true)?;
213    // footer: <newline> TZ-string <newline>
214    let footer = read_footer(&mut c);
215    Ok(Tzif {
216        version,
217        transitions: b.transitions,
218        type_indices: b.type_indices,
219        types: b.types,
220        leaps: b.leaps,
221        footer,
222    })
223}
224
225fn read_footer(c: &mut Cur) -> Option<String> {
226    let rest = &c.b[c.p.min(c.b.len())..];
227    let s = String::from_utf8_lossy(rest);
228    let s = s.trim_matches(|ch| ch == '\n' || ch == '\r' || ch == '\0');
229    if s.is_empty() {
230        None
231    } else {
232        Some(s.to_string())
233    }
234}
235
236impl Tzif {
237    /// The local-time type index in effect *before the first transition* (RFC 9636: the first non-DST
238    /// type, else type 0). Used when a probe instant precedes every recorded transition.
239    fn pre_first_type(&self) -> usize {
240        self.types.iter().position(|t| !t.is_dst).unwrap_or(0)
241    }
242
243    /// The type in effect at Unix instant `t`.
244    ///
245    /// Within the explicit transition table this is a binary-search lookup. **Beyond** the last explicit
246    /// transition (Phase 2) the POSIX footer governs: if it parses, the answer is projected from it (so
247    /// far-future instants now match reference `zdump` instead of being pinned to the last type). Without a
248    /// footer, the last type is used and `beyond_explicit` flags it.
249    pub fn observe(&self, t: i64) -> Observation {
250        if self.beyond_explicit(t) {
251            if let Some(f) = &self.footer {
252                if let Some(tz) = crate::posix::parse(f) {
253                    return tz.observe(t);
254                }
255            }
256        }
257        self.observe_explicit(t)
258    }
259
260    /// The explicit-table-only lookup (no footer projection) — used within the recorded range and as the
261    /// fallback when there is no parseable footer.
262    pub fn observe_explicit(&self, t: i64) -> Observation {
263        let idx = match self.transitions.binary_search(&t) {
264            Ok(i) => Some(i),      // a transition begins exactly at t
265            Err(0) => None,        // before the first transition
266            Err(i) => Some(i - 1), // last transition strictly before t
267        };
268        let ti = match idx {
269            Some(i) => self.type_indices[i] as usize,
270            None => self.pre_first_type(),
271        };
272        let tt = &self.types[ti];
273        Observation {
274            utoff: tt.utoff,
275            is_dst: tt.is_dst,
276            abbr: tt.abbr.clone(),
277        }
278    }
279
280    /// True when `t` is at or beyond the last explicit transition, so the answer is footer-governed (now
281    /// footer-projected in Phase 2 when a footer is present; the field still records the factual provenance
282    /// "this value came from the footer, not the explicit table").
283    pub fn beyond_explicit(&self, t: i64) -> bool {
284        match self.transitions.last() {
285            Some(&last) => t >= last,
286            None => true,
287        }
288    }
289
290    /// Every explicit transition with `lo <= at < hi`, each with the type in effect just before and at it
291    /// (the `zdump -v` analog, restricted to the explicit table). Phase 2.
292    pub fn transitions_in(&self, lo: i64, hi: i64) -> Vec<TransitionRow> {
293        let mut out = Vec::new();
294        for (i, &at) in self.transitions.iter().enumerate() {
295            if at < lo || at >= hi {
296                continue;
297            }
298            let before = if i == 0 {
299                let tt = &self.types[self.pre_first_type()];
300                Observation {
301                    utoff: tt.utoff,
302                    is_dst: tt.is_dst,
303                    abbr: tt.abbr.clone(),
304                }
305            } else {
306                let tt = &self.types[self.type_indices[i - 1] as usize];
307                Observation {
308                    utoff: tt.utoff,
309                    is_dst: tt.is_dst,
310                    abbr: tt.abbr.clone(),
311                }
312            };
313            let at_t = &self.types[self.type_indices[i] as usize];
314            let after = Observation {
315                utoff: at_t.utoff,
316                is_dst: at_t.is_dst,
317                abbr: at_t.abbr.clone(),
318            };
319            out.push(TransitionRow { at, before, after });
320        }
321        out
322    }
323}
324
325/// One explicit transition: the instant, and the local-time type just before and at it.
326#[derive(Debug, Clone, PartialEq, Eq)]
327pub struct TransitionRow {
328    pub at: i64,
329    pub before: Observation,
330    pub after: Observation,
331}
332
333/// What the witness observed at one instant.
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct Observation {
336    pub utoff: i32,
337    pub is_dst: bool,
338    pub abbr: String,
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    // A minimal hand-built v1 TZif: one type (UTC, "UTC"), no transitions.
345    fn utc_v1() -> Vec<u8> {
346        let mut b = Vec::new();
347        b.extend_from_slice(b"TZif");
348        b.push(0); // v1
349        b.extend_from_slice(&[0u8; 15]);
350        for v in [0u32, 0, 0, 0, 1, 4] {
351            b.extend_from_slice(&v.to_be_bytes()); // isut,isstd,leap,time,type,char
352        }
353        // one ttinfo: utoff 0, isdst 0, desigidx 0
354        b.extend_from_slice(&0i32.to_be_bytes());
355        b.push(0);
356        b.push(0);
357        b.extend_from_slice(b"UTC\0");
358        b
359    }
360    #[test]
361    fn parses_utc_and_observes() {
362        let z = parse(&utc_v1()).unwrap();
363        assert_eq!(z.types.len(), 1);
364        let o = z.observe(1_767_225_600); // 2026-01-01Z
365        assert_eq!(o.utoff, 0);
366        assert!(!o.is_dst);
367        assert_eq!(o.abbr, "UTC");
368    }
369    #[test]
370    fn rejects_truncated() {
371        assert!(parse(b"TZif").is_err());
372        assert!(parse(b"XXXX").is_err());
373    }
374    #[test]
375    fn rejects_oob_type_index() {
376        // timecnt=1 transition pointing at type 5 with typecnt=1 -> must be rejected, not panic
377        let mut b = Vec::new();
378        b.extend_from_slice(b"TZif");
379        b.push(0);
380        b.extend_from_slice(&[0u8; 15]);
381        for v in [0u32, 0, 0, 1, 1, 4] {
382            b.extend_from_slice(&v.to_be_bytes());
383        }
384        b.extend_from_slice(&0i32.to_be_bytes()); // transition time
385        b.push(5); // bad type index
386        b.extend_from_slice(&0i32.to_be_bytes());
387        b.push(0);
388        b.push(0);
389        b.extend_from_slice(b"UTC\0");
390        assert!(parse(&b).is_err());
391    }
392}