tzcompile/tzif/
validate.rs

1//! Reading TZif back into semantics.
2//!
3//! This is the inverse of the writer, and it pulls double duty:
4//!
5//! * the writer's tests round-trip their own output through [`parse`] to prove the bytes
6//!   decode to what went in; and
7//! * the oracle (`compare::semantic`) decodes **reference `zic`** output with the very same
8//!   code, so a semantic diff compares like with like.
9//!
10//! We decode the v2+ block (the authoritative data in modern files) plus the footer. The
11//! v1 block is parsed only enough to know how many bytes to skip — in slim files it is the
12//! placeholder stub described in `tzif/mod.rs`, so its contents are not meaningful.
13
14use super::header::{Counts, MAGIC};
15use super::{LeapRecord, LocalTimeType, Transition};
16use crate::error::{Error, Result};
17
18/// The semantically-meaningful contents decoded from a TZif file.
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct ParsedTzif {
21    pub version: u8,
22    pub types: Vec<LocalTimeType>,
23    pub transitions: Vec<Transition>,
24    /// Decoded leap-second records of the authoritative block (T11.3): (occurrence, cumulative
25    /// correction). Empty for ordinary zones.
26    pub leaps: Vec<LeapRecord>,
27    pub footer: String,
28    /// The raw count fields of the *authoritative* block's header (the v2+ block for a v2+
29    /// file; the sole block for a v1-only file). Exposed so the structural-parity inventory
30    /// (T8) can compare `timecnt`/`typecnt`/`charcnt`/`isutcnt`/`isstdcnt`/`leapcnt` against
31    /// reference `zic` without re-decoding the header.
32    pub counts: Counts,
33    /// Raw structural bytes the RFC-9636 validator needs but which `parse` otherwise collapses or skips
34    /// (T23.kani.3f.enforce): per-type `isdst`/`desigidx` octets, the designation table, and the std/UT
35    /// indicator arrays. `parse` stays memory-safe-*lenient* (it does not reject on these); the strict RFC
36    /// byte-validity rules are applied by `rfc9636::validate` over this. (`utoff` lives in [`types`].)
37    pub raw: RawStructural,
38}
39
40/// Raw structural fields of the authoritative block, captured for the RFC-9636 validator. *Memory-safe ≠
41/// format-valid:* `parse` does not reject on these byte values; `rfc9636::validate` applies the strict
42/// RFC 9636 §3.2 rules (`isdst ∈ {0,1}`, designation-index validity, indicator byte values + `isut⇒isstd`
43/// pairing — the predicates proven by T23.kani.3f.1–.4). Indexed parallel to `ParsedTzif::types`.
44#[derive(Debug, Clone, PartialEq, Eq, Default)]
45pub struct RawStructural {
46    pub isdst: Vec<u8>,
47    pub desigidx: Vec<u8>,
48    pub designation: Vec<u8>,
49    pub std_indicators: Vec<u8>,
50    pub ut_indicators: Vec<u8>,
51}
52
53/// A tiny cursor over the input with bounds-checked reads.
54struct Cursor<'a> {
55    buf: &'a [u8],
56    pos: usize,
57}
58
59impl<'a> Cursor<'a> {
60    fn new(buf: &'a [u8]) -> Self {
61        Cursor { buf, pos: 0 }
62    }
63
64    fn take(&mut self, n: usize) -> Result<&'a [u8]> {
65        let end = self
66            .pos
67            .checked_add(n)
68            .ok_or_else(|| Error::message("TZif length overflow"))?;
69        if end > self.buf.len() {
70            return Err(Error::message("TZif truncated"));
71        }
72        let s = &self.buf[self.pos..end];
73        self.pos = end;
74        Ok(s)
75    }
76
77    fn u32(&mut self) -> Result<u32> {
78        let b = self.take(4)?;
79        Ok(u32::from_be_bytes([b[0], b[1], b[2], b[3]]))
80    }
81
82    /// Bytes not yet consumed. Used (T17.5) to bound an untrusted declared count against the data that
83    /// is *physically present* before allocating for it.
84    fn remaining(&self) -> usize {
85        self.buf.len().saturating_sub(self.pos)
86    }
87
88    /// Advance past `n` bytes (T17.5: checked — a count-driven skip, e.g. the v1 block, must not
89    /// overflow `pos` nor land past the buffer end; either is a typed `Err`, never a wrap or a silent
90    /// out-of-range cursor).
91    fn skip(&mut self, n: usize) -> Result<()> {
92        let end = self
93            .pos
94            .checked_add(n)
95            .ok_or_else(|| Error::message("TZif length overflow"))?;
96        if end > self.buf.len() {
97            return Err(Error::message(
98                "TZif truncated (a counted block extends past the input)",
99            ));
100        }
101        self.pos = end;
102        Ok(())
103    }
104}
105
106/// Read a 44-byte header, validating the magic, and return its version byte and counts.
107fn read_header(c: &mut Cursor<'_>) -> Result<(u8, Counts)> {
108    let magic = c.take(4)?;
109    if magic != MAGIC {
110        return Err(Error::message("bad TZif magic"));
111    }
112    let version = c.take(1)?[0];
113    let _reserved = c.take(15)?;
114    let counts = Counts {
115        isutcnt: c.u32()?,
116        isstdcnt: c.u32()?,
117        leapcnt: c.u32()?,
118        timecnt: c.u32()?,
119        typecnt: c.u32()?,
120        charcnt: c.u32()?,
121    };
122    Ok((version, counts))
123}
124
125/// Number of bytes a data block occupies for the given counts and `time_size`.
126///
127/// **T17.5 (CountArithmeticVerdict):** every term is `count × element-size`, where the counts are
128/// **untrusted `u32`s read straight from the header**. The arithmetic is fully **checked** — a header
129/// claiming `timecnt = u32::MAX` (× 8 + the other terms) overflows `usize` on a 32-bit target and is a
130/// large value on 64-bit; either way an overflow is a typed `Err`, never a wrap (which on 32-bit would
131/// under-compute the length and mis-slice) and never a panic (which `overflow-checks` would otherwise
132/// raise). *Counts from input are hostile until range-checked.*
133fn checked_block_len(counts: &Counts, time_size: usize) -> Result<usize> {
134    let oflow =
135        || Error::message("TZif count arithmetic overflow (declared counts are implausibly large)");
136    // Accumulate `count * size` terms with checked math.
137    let term = |count: u32, size: usize| count_mul(count, size).ok_or_else(oflow);
138    let mut total: usize = 0;
139    for t in [
140        term(counts.timecnt, time_size)?,     // transition times
141        term(counts.timecnt, 1)?,             // transition type indices
142        term(counts.typecnt, 6)?,             // ttinfo records
143        term(counts.charcnt, 1)?,             // designation table
144        term(counts.leapcnt, time_size + 4)?, // leap records
145        term(counts.isstdcnt, 1)?,            // std/wall indicators
146        term(counts.isutcnt, 1)?,             // ut/local indicators
147    ] {
148        total = total.checked_add(t).ok_or_else(oflow)?;
149    }
150    Ok(total)
151}
152
153/// `count (u32) * size (usize)` with overflow → `None` (T17.5).
154fn count_mul(count: u32, size: usize) -> Option<usize> {
155    (count as usize).checked_mul(size)
156}
157
158/// The position of the first transition type-index that is **out of range** for a `typecnt`-length
159/// local-time-type table, or `None` if every index is in range (RFC 9636 §3.2).
160///
161/// Pure, allocation-free, and format-free **by design** so it can be bounded-model-checked in isolation
162/// (`audits/kani`, T23.kani.3a) — the proof shows `None` ⟺ every index is `< typecnt`, which is exactly the
163/// precondition that makes the downstream `types[type_index]` indexing safe. The caller ([`read_block`])
164/// turns a `Some(pos)` into the typed error (the `format!` stays out of the proven helper).
165fn first_oob_type_index(type_indices: &[u8], typecnt: usize) -> Option<usize> {
166    type_indices.iter().position(|&i| (i as usize) >= typecnt)
167}
168
169/// Whether a designation (abbreviation) index is a **slice-safe** start offset into a `table_len`-byte
170/// abbreviation table: `idx <= table_len`, so `&table[idx..]` is an in-bounds (possibly empty) slice and
171/// [`read_cstr`] cannot panic on a hostile `ttinfo` designation index. Pure + alloc-free → BMC-checkable
172/// (`audits/kani`, T23.kani.3b proves exactly this).
173///
174/// **Precision boundary (memory-safe ≠ format-valid):** this is *Rust slice safety*, **not** RFC 9636
175/// designation-index *validity*. RFC 9636 §3.2 is stricter — a conformant `desigidx` must be `< charcnt`
176/// (not merely `<= charcnt`), with a NUL octet at or after it, and `charcnt` itself must be non-zero. The
177/// reader is intentionally memory-safe-*lenient* here (an out-of-range-but-in-bounds index yields an empty
178/// or unterminated abbreviation, never a panic); enforcing the stricter RFC structural rule is the RFC-9636
179/// validator's job (`tzif/rfc9636.rs`) and is currently a tracked gap (see `audits/claim-boundary-map.md`).
180fn abbr_index_slice_safe(idx: usize, table_len: usize) -> bool {
181    idx <= table_len
182}
183
184/// RFC 9636 §3.2 designation-index **validity** (stricter than the slice-safety of `abbr_index_slice_safe`):
185/// a conformant `ttinfo` designation index must be `< charcnt` (not merely `<= charcnt`), the designation
186/// table must be non-empty (`charcnt != 0`), and a NUL terminator must exist at or after `idx`. The reader is
187/// intentionally memory-safe-*lenient* here; this is the proven basis for the stricter RFC check (enforcement
188/// in `rfc9636::validate` — now **enforced** there, T23.kani.3f.enforce). Pure → BMC-checkable (T23.kani.3f.1).
189pub(crate) fn rfc_designation_index_valid(
190    idx: usize,
191    charcnt: usize,
192    has_nul_at_or_after: bool,
193) -> bool {
194    charcnt != 0 && idx < charcnt && has_nul_at_or_after
195}
196
197/// RFC 9636 §3.2: a `ttinfo` `isdst` octet is structurally valid iff it is 0 or 1. `parse` collapses it to a
198/// `bool` via `!= 0`, so the raw byte value is unchecked today — this is the proven basis for the stricter
199/// check (T23.kani.3f.2). Pure.
200pub(crate) fn isdst_byte_valid(isdst: u8) -> bool {
201    isdst <= 1
202}
203
204/// RFC 9636 §3.2 indicator-byte pairing: the std/wall (`isstd`) and UT/local (`isut`) octets must each be 0 or
205/// 1, and a UT indicator is only meaningful with a standard indicator: `isut == 1 ⇒ isstd == 1`. `parse` reads
206/// only the indicator *counts*, not these byte values — proven basis for the stricter check (T23.kani.3f.3).
207pub(crate) fn indicator_pair_valid(isut: u8, isstd: u8) -> bool {
208    isut <= 1 && isstd <= 1 && (isut == 0 || isstd == 1)
209}
210
211/// RFC 9636 §3.2: a `ttinfo` `utoff` (UT offset, seconds) must not be `-2^31` (`i32::MIN`) — the one value
212/// whose signed negation overflows, which would break offset arithmetic. Proven basis for the stricter check
213/// (T23.kani.3f.4). Pure.
214pub(crate) fn utoff_structural_valid(utoff: i32) -> bool {
215    utoff != i32::MIN
216}
217
218/// Decode a full data block (transitions + types) given its counts and time size.
219// The 4-tuple is a private decode result threaded straight into `parse`'s two call sites; a named struct
220// would be ceremony for one internal path, so the complexity lint is allowed here with this note.
221#[allow(clippy::type_complexity)]
222fn read_block(
223    c: &mut Cursor<'_>,
224    counts: &Counts,
225    time_size: usize,
226) -> Result<(
227    Vec<Transition>,
228    Vec<LocalTimeType>,
229    Vec<LeapRecord>,
230    RawStructural,
231)> {
232    // T17.5 — bound the *untrusted declared counts* against the data actually present BEFORE allocating
233    // for them. The block's full byte length (checked arithmetic) must fit in the remaining input; a
234    // header claiming `timecnt = 4e9` with a 100-byte file is rejected here, so `Vec::with_capacity(tc)`
235    // below can never be asked to reserve gigabytes from a tiny file (a real DoS otherwise — the
236    // allocation happens up front, before the per-element `take()` bounds checks would ever fire).
237    let needed = checked_block_len(counts, time_size)?;
238    if needed > c.remaining() {
239        return Err(Error::message(
240            "TZif truncated: declared counts require more bytes than the input contains",
241        ));
242    }
243
244    let tc = counts.timecnt as usize;
245    let ty = counts.typecnt as usize;
246
247    // 1. transition times. `with_capacity(tc)` is now safe: the check above proved the whole block —
248    // including `tc * time_size` bytes — fits in the remaining input, so `tc` is bounded by the file size.
249    let mut times = Vec::with_capacity(tc);
250    for _ in 0..tc {
251        let b = c.take(time_size)?;
252        let at = match time_size {
253            4 => i32::from_be_bytes([b[0], b[1], b[2], b[3]]) as i64,
254            8 => i64::from_be_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
255            _ => return Err(Error::message("bad time size")),
256        };
257        times.push(at);
258    }
259    // 2. transition type indices.
260    let idxs = c.take(tc)?.to_vec();
261    // 3. ttinfo records.
262    let mut raw_types = Vec::with_capacity(ty);
263    let mut raw_isdst = Vec::with_capacity(ty);
264    let mut raw_desigidx = Vec::with_capacity(ty);
265    for _ in 0..ty {
266        let b = c.take(6)?;
267        let utoff = i32::from_be_bytes([b[0], b[1], b[2], b[3]]);
268        let is_dst = b[4] != 0;
269        let desigidx = b[5] as usize;
270        // Capture the raw `isdst`/`desigidx` octets for the RFC-9636 validator (parse stays lenient:
271        // `is_dst` is `b[4] != 0`, so `isdst == 2` would be silently `true` without this).
272        raw_isdst.push(b[4]);
273        raw_desigidx.push(b[5]);
274        raw_types.push((utoff, is_dst, desigidx));
275    }
276    // 4. designation table.
277    let table = c.take(counts.charcnt as usize)?;
278    let designation = table.to_vec();
279    // 5. leap-second records — decoded (T11.3): (occurrence, cumulative correction).
280    let mut leaps = Vec::with_capacity(counts.leapcnt as usize);
281    for _ in 0..counts.leapcnt as usize {
282        let tb = c.take(time_size)?;
283        let trans = match time_size {
284            4 => i32::from_be_bytes([tb[0], tb[1], tb[2], tb[3]]) as i64,
285            8 => i64::from_be_bytes([tb[0], tb[1], tb[2], tb[3], tb[4], tb[5], tb[6], tb[7]]),
286            _ => return Err(Error::message("bad time size")),
287        };
288        let cb = c.take(4)?;
289        let corr = i32::from_be_bytes([cb[0], cb[1], cb[2], cb[3]]);
290        leaps.push(LeapRecord { trans, corr });
291    }
292    // 6/7. standard/wall + UT/local indicators — captured for the RFC-9636 validator (byte values + the
293    // `isut ⇒ isstd` pairing; `parse` itself does not enforce them — memory-safe ≠ format-valid).
294    let std_indicators = c.take(counts.isstdcnt as usize)?.to_vec();
295    let ut_indicators = c.take(counts.isutcnt as usize)?.to_vec();
296
297    // Resolve designation indices to strings (NUL-terminated from the index).
298    let types = raw_types
299        .into_iter()
300        .map(|(utoff, is_dst, idx)| {
301            let abbr = read_cstr(table, idx)?;
302            Ok(LocalTimeType {
303                utoff,
304                is_dst,
305                abbr,
306            })
307        })
308        .collect::<Result<Vec<_>>>()?;
309
310    // T17.1 bounds-guard: every transition's type index must point at a real local-time type.
311    // `parse` is the single choke point before any consumer (`compare::semantic::diff`,
312    // `compile::leap`) indexes `types[transition.type_index as usize]`, so enforcing the RFC 9636
313    // §3.2 invariant (transition type indices < `typecnt`) HERE turns a malformed/hostile TZif into
314    // a typed rejection instead of a latent out-of-bounds panic downstream. (`typecnt == 0` with any
315    // transition is caught by the same check, since no index can be `< 0`.) The decision is extracted
316    // into the pure [`first_oob_type_index`] so it can be bounded-model-checked alone (`audits/kani`,
317    // T23.kani.3a) without dragging the allocating parse path through the solver; the `format!` for the
318    // error stays here (out of the proven helper). Checked on the raw `idxs` before they are zipped.
319    if let Some(pos) = first_oob_type_index(&idxs, types.len()) {
320        return Err(Error::message(format!(
321            "TZif transition type index {} out of range (typecnt {})",
322            idxs[pos],
323            types.len()
324        )));
325    }
326
327    let transitions: Vec<Transition> = times
328        .into_iter()
329        .zip(idxs)
330        .map(|(at, type_index)| Transition { at, type_index })
331        .collect();
332
333    Ok((
334        transitions,
335        types,
336        leaps,
337        RawStructural {
338            isdst: raw_isdst,
339            desigidx: raw_desigidx,
340            designation,
341            std_indicators,
342            ut_indicators,
343        },
344    ))
345}
346
347/// Read a NUL-terminated abbreviation starting at `idx` within the designation table.
348fn read_cstr(table: &[u8], idx: usize) -> Result<String> {
349    if !abbr_index_slice_safe(idx, table.len()) {
350        return Err(Error::message("designation index out of range"));
351    }
352    let rest = &table[idx..];
353    let end = rest.iter().position(|&b| b == 0).unwrap_or(rest.len());
354    String::from_utf8(rest[..end].to_vec()).map_err(|_| Error::message("non-UTF-8 abbreviation"))
355}
356
357/// Parse a complete TZif file into its semantically-meaningful contents.
358pub fn parse(bytes: &[u8]) -> Result<ParsedTzif> {
359    let mut c = Cursor::new(bytes);
360    let (version, v1_counts) = read_header(&mut c)?;
361
362    if version == 0 {
363        // A v1-only file: the v1 block *is* the data; there is no v2 block or footer.
364        let (transitions, types, leaps, raw) = read_block(&mut c, &v1_counts, 4)?;
365        return Ok(ParsedTzif {
366            version,
367            types,
368            transitions,
369            leaps,
370            footer: String::new(),
371            counts: v1_counts,
372            raw,
373        });
374    }
375
376    // Skip the v1 stub block, then decode the authoritative v2+ block. T17.5: the skip distance is a
377    // checked count×size sum, and `skip` verifies it stays within the buffer — a header with implausible
378    // v1 counts is a typed `Err`, never a wrapped/overrun cursor.
379    c.skip(checked_block_len(&v1_counts, 4)?)?;
380    let (v2_version, v2_counts) = read_header(&mut c)?;
381    let (transitions, types, leaps, raw) = read_block(&mut c, &v2_counts, 8)?;
382
383    // Footer: `\n` <TZ> `\n` occupying the rest of the file.
384    let tail = &c.buf[c.pos..];
385    let footer = parse_footer(tail)?;
386
387    Ok(ParsedTzif {
388        version: v2_version,
389        types,
390        transitions,
391        leaps,
392        footer,
393        counts: v2_counts,
394        raw,
395    })
396}
397
398/// Extract the POSIX TZ string from the trailing `\n<TZ>\n` footer.
399fn parse_footer(tail: &[u8]) -> Result<String> {
400    if tail.is_empty() {
401        return Ok(String::new());
402    }
403    if tail[0] != b'\n' || tail[tail.len() - 1] != b'\n' {
404        return Err(Error::message("malformed TZif footer"));
405    }
406    let inner = &tail[1..tail.len() - 1];
407    String::from_utf8(inner.to_vec()).map_err(|_| Error::message("non-UTF-8 footer"))
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413    use crate::tzif::{write_bytes, TzifData};
414
415    #[test]
416    fn round_trip_fixed_offset() {
417        let data = TzifData::fixed(-18000, "EST", "EST5");
418        let bytes = write_bytes(&data).unwrap();
419        let parsed = parse(&bytes).unwrap();
420        assert_eq!(parsed.version, b'2');
421        assert_eq!(parsed.transitions.len(), 0);
422        assert_eq!(parsed.types.len(), 1);
423        assert_eq!(parsed.types[0].utoff, -18000);
424        assert!(!parsed.types[0].is_dst);
425        assert_eq!(parsed.types[0].abbr, "EST");
426        assert_eq!(parsed.footer, "EST5");
427    }
428
429    #[test]
430    fn round_trip_utc() {
431        let data = TzifData::fixed(0, "UTC", "UTC0");
432        let parsed = parse(&write_bytes(&data).unwrap()).unwrap();
433        assert_eq!(parsed.types[0].abbr, "UTC");
434        assert_eq!(parsed.footer, "UTC0");
435    }
436
437    /// Hand-build a minimal v1-only TZif (`version` byte `0x00` → the v1-block-is-data path) with
438    /// exactly one transition and one local-time type, where the transition's type index is
439    /// `trans_type_index`. With `typecnt == 1`, index `0` is the only valid value; anything `>= 1`
440    /// is out of range. Lets the regression test drive the exact byte shape that previously fed an
441    /// out-of-bounds index downstream.
442    fn v1_tzif_one_transition(trans_type_index: u8) -> Vec<u8> {
443        let mut b = Vec::new();
444        b.extend_from_slice(&MAGIC); // "TZif"
445        b.push(0); // version 0 → v1-only path
446        b.extend_from_slice(&[0u8; 15]); // reserved
447                                         // counts: isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt
448        for v in [0u32, 0, 0, 1, 1, 4] {
449            b.extend_from_slice(&v.to_be_bytes());
450        }
451        b.extend_from_slice(&0i32.to_be_bytes()); // 1 transition time (4 bytes, v1)
452        b.push(trans_type_index); // 1 transition type index
453        b.extend_from_slice(&0i32.to_be_bytes()); // ttinfo[0].utoff
454        b.push(0); // ttinfo[0].is_dst
455        b.push(0); // ttinfo[0].desigidx
456        b.extend_from_slice(b"UTC\0"); // designation table (charcnt = 4)
457        b
458    }
459
460    #[test]
461    fn valid_transition_type_index_parses() {
462        // Index 0 is in range (typecnt == 1) → parses cleanly.
463        let parsed = parse(&v1_tzif_one_transition(0)).unwrap();
464        assert_eq!(parsed.transitions.len(), 1);
465        assert_eq!(parsed.transitions[0].type_index, 0);
466        assert_eq!(parsed.types.len(), 1);
467    }
468
469    /// Hand-build a v1-only TZif header with arbitrary counts and a chosen body length (T17.5). Lets a
470    /// test declare an implausibly-large count with a tiny body.
471    fn v1_header_with_counts(
472        isutcnt: u32,
473        isstdcnt: u32,
474        leapcnt: u32,
475        timecnt: u32,
476        typecnt: u32,
477        charcnt: u32,
478        body_len: usize,
479    ) -> Vec<u8> {
480        let mut b = Vec::new();
481        b.extend_from_slice(&MAGIC);
482        b.push(0); // version 0 → v1-only path
483        b.extend_from_slice(&[0u8; 15]);
484        for v in [isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt] {
485            b.extend_from_slice(&v.to_be_bytes());
486        }
487        b.extend(std::iter::repeat(0u8).take(body_len));
488        b
489    }
490
491    #[test]
492    fn implausibly_large_declared_count_is_rejected_not_ooming() {
493        // T17.5: a header claiming a billion transitions with only a few body bytes must be a typed
494        // `Err` *before* any `Vec::with_capacity(timecnt)` — never a multi-gigabyte allocation/abort.
495        let bytes = v1_header_with_counts(0, 0, 0, 1_000_000_000, 1, 4, 8);
496        let err = parse(&bytes).unwrap_err();
497        assert!(
498            err.to_string()
499                .contains("more bytes than the input contains")
500                || err.to_string().contains("truncated"),
501            "expected a counts-exceed-input rejection, got: {err}"
502        );
503    }
504
505    #[test]
506    fn count_block_len_is_checked_arithmetic() {
507        // The block-length arithmetic is checked: a maximal `timecnt` does not wrap; on 64-bit it is a
508        // large-but-finite value that simply exceeds any real input (→ rejected), never a wrap that
509        // under-computes the length and mis-slices.
510        let needed = checked_block_len(
511            &Counts {
512                isutcnt: 0,
513                isstdcnt: 0,
514                leapcnt: 0,
515                timecnt: u32::MAX,
516                typecnt: 1,
517                charcnt: 4,
518            },
519            8,
520        )
521        .unwrap();
522        // u32::MAX transitions × (8 bytes time + 1 byte index) is ~38.6e9 — far past any real file.
523        assert!(needed > 30_000_000_000);
524    }
525
526    #[test]
527    fn out_of_range_transition_type_index_is_rejected_not_panic() {
528        // T17.1 regression: a transition referencing type index 1 when only type 0 exists must be a
529        // typed Err, never a panic. This is the exact shape that would otherwise reach
530        // `types[type_index]` in `compare::semantic::diff` / `compile::leap`.
531        let err = parse(&v1_tzif_one_transition(1)).unwrap_err();
532        assert!(
533            err.to_string().contains("type index 1 out of range"),
534            "expected an out-of-range rejection, got: {err}"
535        );
536        // A wildly out-of-range index (max u8) is equally rejected, not indexed.
537        assert!(parse(&v1_tzif_one_transition(255)).is_err());
538    }
539
540    #[test]
541    fn rfc_designation_index_validity_rules() {
542        assert!(rfc_designation_index_valid(0, 4, true));
543        assert!(!rfc_designation_index_valid(4, 4, true)); // idx == charcnt: slice-safe but NOT RFC-valid
544        assert!(!rfc_designation_index_valid(0, 0, true)); // empty designation table
545        assert!(!rfc_designation_index_valid(0, 4, false)); // no NUL terminator at/after idx
546        assert!(abbr_index_slice_safe(3, 4) && rfc_designation_index_valid(3, 4, true));
547        // RFC-valid ⇒ slice-safe
548    }
549
550    #[test]
551    fn isdst_byte_validity_rules() {
552        assert!(isdst_byte_valid(0) && isdst_byte_valid(1));
553        assert!(!isdst_byte_valid(2) && !isdst_byte_valid(255));
554    }
555
556    #[test]
557    fn indicator_pair_validity_rules() {
558        assert!(
559            indicator_pair_valid(0, 0) && indicator_pair_valid(0, 1) && indicator_pair_valid(1, 1)
560        );
561        assert!(!indicator_pair_valid(1, 0)); // UT indicator without its standard indicator
562        assert!(!indicator_pair_valid(2, 0) && !indicator_pair_valid(0, 2)); // byte not in {0,1}
563    }
564
565    #[test]
566    fn utoff_structural_validity_rules() {
567        assert!(
568            utoff_structural_valid(0)
569                && utoff_structural_valid(-18000)
570                && utoff_structural_valid(i32::MAX)
571        );
572        assert!(!utoff_structural_valid(i32::MIN));
573    }
574}
575
576/// T23.kani.3 — bounded proof that the TZif parser never panics (`audits/kani`).
577///
578/// Compiled **only** under `--cfg kani` (set by the Kani compiler), so it has zero effect on the normal
579/// `cargo build`/`test`/`clippy` gate — it is `cfg`-gated out everywhere else; `cargo fmt` still formats it.
580/// Over an arbitrary byte array up to a declared bound, [`parse`] must return `Ok`/`Err` **without panicking
581/// or exhibiting UB**. This is the *proving* complement to `audits/panic-analysis` (static census) and
582/// `audits/miri` (concrete-test execution): a bounded symbolic proof over the invariant itself.
583///
584/// **Bounded proof — what it does NOT establish:** semantic correctness, full RFC-9636 validity,
585/// reference-`zic` parity, inputs longer than the bound, resource-exhaustion freedom, or filesystem safety.
586#[cfg(kani)]
587mod kani_harness {
588    use super::{
589        abbr_index_slice_safe, checked_block_len, first_oob_type_index, indicator_pair_valid,
590        isdst_byte_valid, rfc_designation_index_valid, utoff_structural_valid, Counts, Cursor,
591    };
592
593    /// **T23.kani.3b — the abbreviation-index guard is SLICE-SAFE (memory safety, NOT RFC validity).** For
594    /// an arbitrary table and `idx`, when `abbr_index_slice_safe` accepts the index, `&table[idx..]` is an
595    /// in-bounds (possibly empty) slice — so `read_cstr` never panics on a hostile `ttinfo` designation
596    /// index; when it rejects, `idx > table.len()`. Alloc-free, loop-free → converges. **This proves Rust
597    /// slice safety only — NOT RFC 9636 §3.2 designation-index validity** (`idx < charcnt` + NUL at/after +
598    /// `charcnt != 0`), which is a separate, tracked gap (`audits/claim-boundary-map.md`).
599    #[kani::proof]
600    fn abbr_index_guard_prevents_oob_slice() {
601        let table: [u8; 8] = kani::any();
602        let idx: usize = kani::any();
603        if abbr_index_slice_safe(idx, table.len()) {
604            let _slice = &table[idx..]; // CBMC proves this slice index is in-bounds under the guard
605            assert!(idx <= table.len());
606        } else {
607            assert!(idx > table.len());
608        }
609    }
610
611    /// **T23.kani.3a — the transition type-index guard is sound (RFC 9636 §3.2 / the T17.1a OOB-index
612    /// guard, `RISK.COUNT.1`).** For an arbitrary index slice and any `typecnt`, `first_oob_type_index`
613    /// returns `None` **iff** every index is `< typecnt` — i.e. a `None` is exactly the precondition that
614    /// makes the downstream `types[type_index]` indexing in-bounds — and on `Some(p)` the reported index is
615    /// genuinely the first out-of-range one. Alloc-free + format-free → converges. This is the formal form
616    /// of what `audits/miri` only checks on concrete fixtures and what `parse` enforces at the choke point.
617    #[kani::proof]
618    #[kani::unwind(5)]
619    fn type_index_guard_is_sound() {
620        let idxs: [u8; 4] = kani::any();
621        let typecnt: usize = kani::any();
622        match first_oob_type_index(&idxs, typecnt) {
623            None => {
624                // soundness: no out-of-range index ⇒ every index is a valid offset into a
625                // `typecnt`-length type table, so the downstream `types[idx]` can never be OOB.
626                for &i in idxs.iter() {
627                    assert!((i as usize) < typecnt);
628                }
629            }
630            Some(p) => {
631                assert!(p < idxs.len()); // a real position
632                assert!((idxs[p] as usize) >= typecnt); // and it is genuinely out of range
633            }
634        }
635    }
636
637    /// **T23.kani.2 — `Cursor::take` never panics and preserves the bounds invariant.** For an arbitrary
638    /// buffer, start `pos ≤ len`, and requested `n`: `take` returns `Ok` only with `pos' = pos + n ≤ len`
639    /// (no wrap — `checked_add`) and never indexes out of bounds; on `Err` it leaves `pos` unchanged.
640    /// Alloc-free, loop-free → CBMC converges. (Risk: section-offset accumulation / truncation, T17.5.)
641    #[kani::proof]
642    fn take_never_panics_and_preserves_bounds() {
643        let buf: [u8; 8] = kani::any();
644        let pos: usize = kani::any();
645        kani::assume(pos <= buf.len());
646        let mut c = Cursor { buf: &buf, pos };
647        let n: usize = kani::any();
648        match c.take(n) {
649            Ok(s) => {
650                assert!(s.len() == n);
651                assert!(c.pos == pos + n);
652                assert!(c.pos <= buf.len());
653            }
654            Err(_) => assert!(c.pos == pos),
655        }
656    }
657
658    /// **T23.kani.2 — `Cursor::skip` never panics and never advances past the end** (the count-driven v1-block
659    /// skip, T17.5): `Ok` ⇒ `pos' = pos + n ≤ len` (no wrap); `Err` ⇒ `pos` unchanged.
660    #[kani::proof]
661    fn skip_never_panics_and_preserves_bounds() {
662        let buf: [u8; 8] = kani::any();
663        let pos: usize = kani::any();
664        kani::assume(pos <= buf.len());
665        let mut c = Cursor { buf: &buf, pos };
666        let n: usize = kani::any();
667        match c.skip(n) {
668            Ok(()) => assert!(c.pos == pos + n && c.pos <= buf.len()),
669            Err(_) => assert!(c.pos == pos),
670        }
671    }
672
673    /// **T23.kani.2 — the pre-allocation bound is sound.** `remaining()` is exactly the bytes left, and any
674    /// block whose length is `≤ remaining()` can always be skipped without truncation. This is the link
675    /// the T17.5 guard relies on: *`count × size ≤ remaining()` ⇒ those bytes are physically present*, so a
676    /// passing bound check never lets a later read run off the end.
677    #[kani::proof]
678    fn skip_within_remaining_cannot_truncate() {
679        let buf: [u8; 8] = kani::any();
680        let pos: usize = kani::any();
681        kani::assume(pos <= buf.len());
682        let mut c = Cursor { buf: &buf, pos };
683        let r = c.remaining();
684        assert!(pos + r == buf.len()); // `remaining` is exactly what is left (no panic, no wrap)
685        let n: usize = kani::any();
686        kani::assume(n <= r);
687        assert!(c.skip(n).is_ok()); // anything within `remaining` is skippable
688        assert!(c.pos <= buf.len());
689    }
690
691    /// **T23.kani.1 (converges) — the T17.5 count-arithmetic guard never panics/overflow-wraps.** For *all*
692    /// symbolic header counts and `time_size ∈ {4, 8}`, [`checked_block_len`] returns `Ok(total)` or a typed
693    /// overflow `Err` — never a panic, never a wraparound (the `overflow-checks` hazard), never a giant
694    /// allocation (it computes a length, it does not allocate). This is the *tightest* form of the danger
695    /// surface `reports/t17-count-arithmetic-verdict.md` describes, and it is alloc-free + format-light so
696    /// CBMC converges. **Does not prove:** the full parser, semantic correctness, or anything about reads.
697    #[kani::proof]
698    fn checked_block_len_never_panics() {
699        let counts = Counts {
700            isutcnt: kani::any(),
701            isstdcnt: kani::any(),
702            leapcnt: kani::any(),
703            timecnt: kani::any(),
704            typecnt: kani::any(),
705            charcnt: kani::any(),
706        };
707        let time_size: usize = if kani::any() { 4 } else { 8 };
708        let _ = checked_block_len(&counts, time_size);
709    }
710
711    /// **T23.kani.3f.1 — RFC designation-index validity is exact and strictly implies slice-safety.** For an
712    /// arbitrary `(idx, charcnt, has_nul)`, `rfc_designation_index_valid` accepts iff `charcnt != 0 && idx <
713    /// charcnt && has_nul`; and whenever it accepts, the index is also `abbr_index_slice_safe` — so RFC
714    /// validity is strictly stronger than the T23.kani.3b slice-safety (no RFC-valid index is ever OOB).
715    #[kani::proof]
716    fn rfc_designation_index_valid_is_exact_and_implies_slice_safe() {
717        let idx: usize = kani::any();
718        let charcnt: usize = kani::any();
719        let has_nul: bool = kani::any();
720        let got = rfc_designation_index_valid(idx, charcnt, has_nul);
721        assert_eq!(got, charcnt != 0 && idx < charcnt && has_nul);
722        if got {
723            assert!(abbr_index_slice_safe(idx, charcnt)); // RFC-valid ⇒ slice-safe (idx < charcnt ⇒ idx ≤ charcnt)
724        }
725    }
726
727    /// **T23.kani.3f.2 — `isdst` byte structural validity (RFC: octet ∈ {0,1}).** Total over all `u8`; an
728    /// accepted byte is exactly a Rust `bool` (0 or 1), the only values for which the lenient `!= 0` reading
729    /// loses no information.
730    #[kani::proof]
731    fn isdst_byte_valid_is_exact() {
732        let b: u8 = kani::any();
733        let got = isdst_byte_valid(b);
734        assert_eq!(got, b <= 1);
735        if got {
736            assert!(b == 0 || b == 1);
737        }
738    }
739
740    /// **T23.kani.3f.3 — indicator-byte pairing (RFC: each ∈ {0,1} and `isut == 1 ⇒ isstd == 1`).** Total over
741    /// all `(u8, u8)`; an accepted pair never carries a UT indicator without its standard indicator.
742    #[kani::proof]
743    fn indicator_pair_valid_is_exact() {
744        let isut: u8 = kani::any();
745        let isstd: u8 = kani::any();
746        let got = indicator_pair_valid(isut, isstd);
747        assert_eq!(got, isut <= 1 && isstd <= 1 && !(isut == 1 && isstd == 0));
748        if got && isut == 1 {
749            assert!(isstd == 1);
750        }
751    }
752
753    /// **T23.kani.3f.4 — `utoff` structural validity (RFC forbids `i32::MIN`).** Total over all `i32`; an
754    /// accepted offset can always be negated without overflow — the concrete safety reason the RFC excludes
755    /// `-2^31` (its signed negation is the one value that wraps).
756    #[kani::proof]
757    fn utoff_structural_valid_excludes_unnegatable_min() {
758        let u: i32 = kani::any();
759        let got = utoff_structural_valid(u);
760        assert_eq!(got, u != i32::MIN);
761        if got {
762            assert!(u.checked_neg().is_some()); // valid ⇒ negation cannot overflow
763        }
764    }
765}
tzcompile/tzif/validate.rs

tzcompile/tzif/
validate.rs