tzcompile/tzif/validate.rs
1//! Reading TZif back into semantics.
2//!
3//! This is the inverse of the writer, and it pulls double duty:
4//!
5//! * the writer's tests round-trip their own output through [`parse`] to prove the bytes
6//! decode to what went in; and
7//! * the oracle (`compare::semantic`) decodes **reference `zic`** output with the very same
8//! code, so a semantic diff compares like with like.
9//!
10//! We decode the v2+ block (the authoritative data in modern files) plus the footer. The
11//! v1 block is parsed only enough to know how many bytes to skip — in slim files it is the
12//! placeholder stub described in `tzif/mod.rs`, so its contents are not meaningful.
13
14use super::header::{Counts, MAGIC};
15use super::{LeapRecord, LocalTimeType, Transition};
16use crate::error::{Error, Result};
17
18/// The semantically-meaningful contents decoded from a TZif file.
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct ParsedTzif {
21 pub version: u8,
22 pub types: Vec<LocalTimeType>,
23 pub transitions: Vec<Transition>,
24 /// Decoded leap-second records of the authoritative block (T11.3): (occurrence, cumulative
25 /// correction). Empty for ordinary zones.
26 pub leaps: Vec<LeapRecord>,
27 pub footer: String,
28 /// The raw count fields of the *authoritative* block's header (the v2+ block for a v2+
29 /// file; the sole block for a v1-only file). Exposed so the structural-parity inventory
30 /// (T8) can compare `timecnt`/`typecnt`/`charcnt`/`isutcnt`/`isstdcnt`/`leapcnt` against
31 /// reference `zic` without re-decoding the header.
32 pub counts: Counts,
33 /// Raw structural bytes the RFC-9636 validator needs but which `parse` otherwise collapses or skips
34 /// (T23.kani.3f.enforce): per-type `isdst`/`desigidx` octets, the designation table, and the std/UT
35 /// indicator arrays. `parse` stays memory-safe-*lenient* (it does not reject on these); the strict RFC
36 /// byte-validity rules are applied by `rfc9636::validate` over this. (`utoff` lives in [`types`].)
37 pub raw: RawStructural,
38}
39
40/// Raw structural fields of the authoritative block, captured for the RFC-9636 validator. *Memory-safe ≠
41/// format-valid:* `parse` does not reject on these byte values; `rfc9636::validate` applies the strict
42/// RFC 9636 §3.2 rules (`isdst ∈ {0,1}`, designation-index validity, indicator byte values + `isut⇒isstd`
43/// pairing — the predicates proven by T23.kani.3f.1–.4). Indexed parallel to `ParsedTzif::types`.
44#[derive(Debug, Clone, PartialEq, Eq, Default)]
45pub struct RawStructural {
46 pub isdst: Vec<u8>,
47 pub desigidx: Vec<u8>,
48 pub designation: Vec<u8>,
49 pub std_indicators: Vec<u8>,
50 pub ut_indicators: Vec<u8>,
51}
52
53/// A tiny cursor over the input with bounds-checked reads.
54struct Cursor<'a> {
55 buf: &'a [u8],
56 pos: usize,
57}
58
59impl<'a> Cursor<'a> {
60 fn new(buf: &'a [u8]) -> Self {
61 Cursor { buf, pos: 0 }
62 }
63
64 fn take(&mut self, n: usize) -> Result<&'a [u8]> {
65 let end = self
66 .pos
67 .checked_add(n)
68 .ok_or_else(|| Error::message("TZif length overflow"))?;
69 if end > self.buf.len() {
70 return Err(Error::message("TZif truncated"));
71 }
72 let s = &self.buf[self.pos..end];
73 self.pos = end;
74 Ok(s)
75 }
76
77 fn u32(&mut self) -> Result<u32> {
78 let b = self.take(4)?;
79 Ok(u32::from_be_bytes([b[0], b[1], b[2], b[3]]))
80 }
81
82 /// Bytes not yet consumed. Used (T17.5) to bound an untrusted declared count against the data that
83 /// is *physically present* before allocating for it.
84 fn remaining(&self) -> usize {
85 self.buf.len().saturating_sub(self.pos)
86 }
87
88 /// Advance past `n` bytes (T17.5: checked — a count-driven skip, e.g. the v1 block, must not
89 /// overflow `pos` nor land past the buffer end; either is a typed `Err`, never a wrap or a silent
90 /// out-of-range cursor).
91 fn skip(&mut self, n: usize) -> Result<()> {
92 let end = self
93 .pos
94 .checked_add(n)
95 .ok_or_else(|| Error::message("TZif length overflow"))?;
96 if end > self.buf.len() {
97 return Err(Error::message(
98 "TZif truncated (a counted block extends past the input)",
99 ));
100 }
101 self.pos = end;
102 Ok(())
103 }
104}
105
106/// Read a 44-byte header, validating the magic, and return its version byte and counts.
107fn read_header(c: &mut Cursor<'_>) -> Result<(u8, Counts)> {
108 let magic = c.take(4)?;
109 if magic != MAGIC {
110 return Err(Error::message("bad TZif magic"));
111 }
112 let version = c.take(1)?[0];
113 let _reserved = c.take(15)?;
114 let counts = Counts {
115 isutcnt: c.u32()?,
116 isstdcnt: c.u32()?,
117 leapcnt: c.u32()?,
118 timecnt: c.u32()?,
119 typecnt: c.u32()?,
120 charcnt: c.u32()?,
121 };
122 Ok((version, counts))
123}
124
125/// Number of bytes a data block occupies for the given counts and `time_size`.
126///
127/// **T17.5 (CountArithmeticVerdict):** every term is `count × element-size`, where the counts are
128/// **untrusted `u32`s read straight from the header**. The arithmetic is fully **checked** — a header
129/// claiming `timecnt = u32::MAX` (× 8 + the other terms) overflows `usize` on a 32-bit target and is a
130/// large value on 64-bit; either way an overflow is a typed `Err`, never a wrap (which on 32-bit would
131/// under-compute the length and mis-slice) and never a panic (which `overflow-checks` would otherwise
132/// raise). *Counts from input are hostile until range-checked.*
133fn checked_block_len(counts: &Counts, time_size: usize) -> Result<usize> {
134 let oflow =
135 || Error::message("TZif count arithmetic overflow (declared counts are implausibly large)");
136 // Accumulate `count * size` terms with checked math.
137 let term = |count: u32, size: usize| count_mul(count, size).ok_or_else(oflow);
138 let mut total: usize = 0;
139 for t in [
140 term(counts.timecnt, time_size)?, // transition times
141 term(counts.timecnt, 1)?, // transition type indices
142 term(counts.typecnt, 6)?, // ttinfo records
143 term(counts.charcnt, 1)?, // designation table
144 term(counts.leapcnt, time_size + 4)?, // leap records
145 term(counts.isstdcnt, 1)?, // std/wall indicators
146 term(counts.isutcnt, 1)?, // ut/local indicators
147 ] {
148 total = total.checked_add(t).ok_or_else(oflow)?;
149 }
150 Ok(total)
151}
152
153/// `count (u32) * size (usize)` with overflow → `None` (T17.5).
154fn count_mul(count: u32, size: usize) -> Option<usize> {
155 (count as usize).checked_mul(size)
156}
157
158/// The position of the first transition type-index that is **out of range** for a `typecnt`-length
159/// local-time-type table, or `None` if every index is in range (RFC 9636 §3.2).
160///
161/// Pure, allocation-free, and format-free **by design** so it can be bounded-model-checked in isolation
162/// (`audits/kani`, T23.kani.3a) — the proof shows `None` ⟺ every index is `< typecnt`, which is exactly the
163/// precondition that makes the downstream `types[type_index]` indexing safe. The caller ([`read_block`])
164/// turns a `Some(pos)` into the typed error (the `format!` stays out of the proven helper).
165fn first_oob_type_index(type_indices: &[u8], typecnt: usize) -> Option<usize> {
166 type_indices.iter().position(|&i| (i as usize) >= typecnt)
167}
168
169/// Whether a designation (abbreviation) index is a **slice-safe** start offset into a `table_len`-byte
170/// abbreviation table: `idx <= table_len`, so `&table[idx..]` is an in-bounds (possibly empty) slice and
171/// [`read_cstr`] cannot panic on a hostile `ttinfo` designation index. Pure + alloc-free → BMC-checkable
172/// (`audits/kani`, T23.kani.3b proves exactly this).
173///
174/// **Precision boundary (memory-safe ≠ format-valid):** this is *Rust slice safety*, **not** RFC 9636
175/// designation-index *validity*. RFC 9636 §3.2 is stricter — a conformant `desigidx` must be `< charcnt`
176/// (not merely `<= charcnt`), with a NUL octet at or after it, and `charcnt` itself must be non-zero. The
177/// reader is intentionally memory-safe-*lenient* here (an out-of-range-but-in-bounds index yields an empty
178/// or unterminated abbreviation, never a panic); enforcing the stricter RFC structural rule is the RFC-9636
179/// validator's job (`tzif/rfc9636.rs`) and is currently a tracked gap (see `audits/claim-boundary-map.md`).
180fn abbr_index_slice_safe(idx: usize, table_len: usize) -> bool {
181 idx <= table_len
182}
183
184/// RFC 9636 §3.2 designation-index **validity** (stricter than the slice-safety of `abbr_index_slice_safe`):
185/// a conformant `ttinfo` designation index must be `< charcnt` (not merely `<= charcnt`), the designation
186/// table must be non-empty (`charcnt != 0`), and a NUL terminator must exist at or after `idx`. The reader is
187/// intentionally memory-safe-*lenient* here; this is the proven basis for the stricter RFC check (enforcement
188/// in `rfc9636::validate` — now **enforced** there, T23.kani.3f.enforce). Pure → BMC-checkable (T23.kani.3f.1).
189pub(crate) fn rfc_designation_index_valid(
190 idx: usize,
191 charcnt: usize,
192 has_nul_at_or_after: bool,
193) -> bool {
194 charcnt != 0 && idx < charcnt && has_nul_at_or_after
195}
196
197/// RFC 9636 §3.2: a `ttinfo` `isdst` octet is structurally valid iff it is 0 or 1. `parse` collapses it to a
198/// `bool` via `!= 0`, so the raw byte value is unchecked today — this is the proven basis for the stricter
199/// check (T23.kani.3f.2). Pure.
200pub(crate) fn isdst_byte_valid(isdst: u8) -> bool {
201 isdst <= 1
202}
203
204/// RFC 9636 §3.2 indicator-byte pairing: the std/wall (`isstd`) and UT/local (`isut`) octets must each be 0 or
205/// 1, and a UT indicator is only meaningful with a standard indicator: `isut == 1 ⇒ isstd == 1`. `parse` reads
206/// only the indicator *counts*, not these byte values — proven basis for the stricter check (T23.kani.3f.3).
207pub(crate) fn indicator_pair_valid(isut: u8, isstd: u8) -> bool {
208 isut <= 1 && isstd <= 1 && (isut == 0 || isstd == 1)
209}
210
211/// RFC 9636 §3.2: a `ttinfo` `utoff` (UT offset, seconds) must not be `-2^31` (`i32::MIN`) — the one value
212/// whose signed negation overflows, which would break offset arithmetic. Proven basis for the stricter check
213/// (T23.kani.3f.4). Pure.
214pub(crate) fn utoff_structural_valid(utoff: i32) -> bool {
215 utoff != i32::MIN
216}
217
218/// Decode a full data block (transitions + types) given its counts and time size.
219// The 4-tuple is a private decode result threaded straight into `parse`'s two call sites; a named struct
220// would be ceremony for one internal path, so the complexity lint is allowed here with this note.
221#[allow(clippy::type_complexity)]
222fn read_block(
223 c: &mut Cursor<'_>,
224 counts: &Counts,
225 time_size: usize,
226) -> Result<(
227 Vec<Transition>,
228 Vec<LocalTimeType>,
229 Vec<LeapRecord>,
230 RawStructural,
231)> {
232 // T17.5 — bound the *untrusted declared counts* against the data actually present BEFORE allocating
233 // for them. The block's full byte length (checked arithmetic) must fit in the remaining input; a
234 // header claiming `timecnt = 4e9` with a 100-byte file is rejected here, so `Vec::with_capacity(tc)`
235 // below can never be asked to reserve gigabytes from a tiny file (a real DoS otherwise — the
236 // allocation happens up front, before the per-element `take()` bounds checks would ever fire).
237 let needed = checked_block_len(counts, time_size)?;
238 if needed > c.remaining() {
239 return Err(Error::message(
240 "TZif truncated: declared counts require more bytes than the input contains",
241 ));
242 }
243
244 let tc = counts.timecnt as usize;
245 let ty = counts.typecnt as usize;
246
247 // 1. transition times. `with_capacity(tc)` is now safe: the check above proved the whole block —
248 // including `tc * time_size` bytes — fits in the remaining input, so `tc` is bounded by the file size.
249 let mut times = Vec::with_capacity(tc);
250 for _ in 0..tc {
251 let b = c.take(time_size)?;
252 let at = match time_size {
253 4 => i32::from_be_bytes([b[0], b[1], b[2], b[3]]) as i64,
254 8 => i64::from_be_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
255 _ => return Err(Error::message("bad time size")),
256 };
257 times.push(at);
258 }
259 // 2. transition type indices.
260 let idxs = c.take(tc)?.to_vec();
261 // 3. ttinfo records.
262 let mut raw_types = Vec::with_capacity(ty);
263 let mut raw_isdst = Vec::with_capacity(ty);
264 let mut raw_desigidx = Vec::with_capacity(ty);
265 for _ in 0..ty {
266 let b = c.take(6)?;
267 let utoff = i32::from_be_bytes([b[0], b[1], b[2], b[3]]);
268 let is_dst = b[4] != 0;
269 let desigidx = b[5] as usize;
270 // Capture the raw `isdst`/`desigidx` octets for the RFC-9636 validator (parse stays lenient:
271 // `is_dst` is `b[4] != 0`, so `isdst == 2` would be silently `true` without this).
272 raw_isdst.push(b[4]);
273 raw_desigidx.push(b[5]);
274 raw_types.push((utoff, is_dst, desigidx));
275 }
276 // 4. designation table.
277 let table = c.take(counts.charcnt as usize)?;
278 let designation = table.to_vec();
279 // 5. leap-second records — decoded (T11.3): (occurrence, cumulative correction).
280 let mut leaps = Vec::with_capacity(counts.leapcnt as usize);
281 for _ in 0..counts.leapcnt as usize {
282 let tb = c.take(time_size)?;
283 let trans = match time_size {
284 4 => i32::from_be_bytes([tb[0], tb[1], tb[2], tb[3]]) as i64,
285 8 => i64::from_be_bytes([tb[0], tb[1], tb[2], tb[3], tb[4], tb[5], tb[6], tb[7]]),
286 _ => return Err(Error::message("bad time size")),
287 };
288 let cb = c.take(4)?;
289 let corr = i32::from_be_bytes([cb[0], cb[1], cb[2], cb[3]]);
290 leaps.push(LeapRecord { trans, corr });
291 }
292 // 6/7. standard/wall + UT/local indicators — captured for the RFC-9636 validator (byte values + the
293 // `isut ⇒ isstd` pairing; `parse` itself does not enforce them — memory-safe ≠ format-valid).
294 let std_indicators = c.take(counts.isstdcnt as usize)?.to_vec();
295 let ut_indicators = c.take(counts.isutcnt as usize)?.to_vec();
296
297 // Resolve designation indices to strings (NUL-terminated from the index).
298 let types = raw_types
299 .into_iter()
300 .map(|(utoff, is_dst, idx)| {
301 let abbr = read_cstr(table, idx)?;
302 Ok(LocalTimeType {
303 utoff,
304 is_dst,
305 abbr,
306 })
307 })
308 .collect::<Result<Vec<_>>>()?;
309
310 // T17.1 bounds-guard: every transition's type index must point at a real local-time type.
311 // `parse` is the single choke point before any consumer (`compare::semantic::diff`,
312 // `compile::leap`) indexes `types[transition.type_index as usize]`, so enforcing the RFC 9636
313 // §3.2 invariant (transition type indices < `typecnt`) HERE turns a malformed/hostile TZif into
314 // a typed rejection instead of a latent out-of-bounds panic downstream. (`typecnt == 0` with any
315 // transition is caught by the same check, since no index can be `< 0`.) The decision is extracted
316 // into the pure [`first_oob_type_index`] so it can be bounded-model-checked alone (`audits/kani`,
317 // T23.kani.3a) without dragging the allocating parse path through the solver; the `format!` for the
318 // error stays here (out of the proven helper). Checked on the raw `idxs` before they are zipped.
319 if let Some(pos) = first_oob_type_index(&idxs, types.len()) {
320 return Err(Error::message(format!(
321 "TZif transition type index {} out of range (typecnt {})",
322 idxs[pos],
323 types.len()
324 )));
325 }
326
327 let transitions: Vec<Transition> = times
328 .into_iter()
329 .zip(idxs)
330 .map(|(at, type_index)| Transition { at, type_index })
331 .collect();
332
333 Ok((
334 transitions,
335 types,
336 leaps,
337 RawStructural {
338 isdst: raw_isdst,
339 desigidx: raw_desigidx,
340 designation,
341 std_indicators,
342 ut_indicators,
343 },
344 ))
345}
346
347/// Read a NUL-terminated abbreviation starting at `idx` within the designation table.
348fn read_cstr(table: &[u8], idx: usize) -> Result<String> {
349 if !abbr_index_slice_safe(idx, table.len()) {
350 return Err(Error::message("designation index out of range"));
351 }
352 let rest = &table[idx..];
353 let end = rest.iter().position(|&b| b == 0).unwrap_or(rest.len());
354 String::from_utf8(rest[..end].to_vec()).map_err(|_| Error::message("non-UTF-8 abbreviation"))
355}
356
357/// Parse a complete TZif file into its semantically-meaningful contents.
358pub fn parse(bytes: &[u8]) -> Result<ParsedTzif> {
359 let mut c = Cursor::new(bytes);
360 let (version, v1_counts) = read_header(&mut c)?;
361
362 if version == 0 {
363 // A v1-only file: the v1 block *is* the data; there is no v2 block or footer.
364 let (transitions, types, leaps, raw) = read_block(&mut c, &v1_counts, 4)?;
365 return Ok(ParsedTzif {
366 version,
367 types,
368 transitions,
369 leaps,
370 footer: String::new(),
371 counts: v1_counts,
372 raw,
373 });
374 }
375
376 // Skip the v1 stub block, then decode the authoritative v2+ block. T17.5: the skip distance is a
377 // checked count×size sum, and `skip` verifies it stays within the buffer — a header with implausible
378 // v1 counts is a typed `Err`, never a wrapped/overrun cursor.
379 c.skip(checked_block_len(&v1_counts, 4)?)?;
380 let (v2_version, v2_counts) = read_header(&mut c)?;
381 let (transitions, types, leaps, raw) = read_block(&mut c, &v2_counts, 8)?;
382
383 // Footer: `\n` <TZ> `\n` occupying the rest of the file.
384 let tail = &c.buf[c.pos..];
385 let footer = parse_footer(tail)?;
386
387 Ok(ParsedTzif {
388 version: v2_version,
389 types,
390 transitions,
391 leaps,
392 footer,
393 counts: v2_counts,
394 raw,
395 })
396}
397
398/// Extract the POSIX TZ string from the trailing `\n<TZ>\n` footer.
399fn parse_footer(tail: &[u8]) -> Result<String> {
400 if tail.is_empty() {
401 return Ok(String::new());
402 }
403 if tail[0] != b'\n' || tail[tail.len() - 1] != b'\n' {
404 return Err(Error::message("malformed TZif footer"));
405 }
406 let inner = &tail[1..tail.len() - 1];
407 String::from_utf8(inner.to_vec()).map_err(|_| Error::message("non-UTF-8 footer"))
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413 use crate::tzif::{write_bytes, TzifData};
414
415 #[test]
416 fn round_trip_fixed_offset() {
417 let data = TzifData::fixed(-18000, "EST", "EST5");
418 let bytes = write_bytes(&data).unwrap();
419 let parsed = parse(&bytes).unwrap();
420 assert_eq!(parsed.version, b'2');
421 assert_eq!(parsed.transitions.len(), 0);
422 assert_eq!(parsed.types.len(), 1);
423 assert_eq!(parsed.types[0].utoff, -18000);
424 assert!(!parsed.types[0].is_dst);
425 assert_eq!(parsed.types[0].abbr, "EST");
426 assert_eq!(parsed.footer, "EST5");
427 }
428
429 #[test]
430 fn round_trip_utc() {
431 let data = TzifData::fixed(0, "UTC", "UTC0");
432 let parsed = parse(&write_bytes(&data).unwrap()).unwrap();
433 assert_eq!(parsed.types[0].abbr, "UTC");
434 assert_eq!(parsed.footer, "UTC0");
435 }
436
437 /// Hand-build a minimal v1-only TZif (`version` byte `0x00` → the v1-block-is-data path) with
438 /// exactly one transition and one local-time type, where the transition's type index is
439 /// `trans_type_index`. With `typecnt == 1`, index `0` is the only valid value; anything `>= 1`
440 /// is out of range. Lets the regression test drive the exact byte shape that previously fed an
441 /// out-of-bounds index downstream.
442 fn v1_tzif_one_transition(trans_type_index: u8) -> Vec<u8> {
443 let mut b = Vec::new();
444 b.extend_from_slice(&MAGIC); // "TZif"
445 b.push(0); // version 0 → v1-only path
446 b.extend_from_slice(&[0u8; 15]); // reserved
447 // counts: isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt
448 for v in [0u32, 0, 0, 1, 1, 4] {
449 b.extend_from_slice(&v.to_be_bytes());
450 }
451 b.extend_from_slice(&0i32.to_be_bytes()); // 1 transition time (4 bytes, v1)
452 b.push(trans_type_index); // 1 transition type index
453 b.extend_from_slice(&0i32.to_be_bytes()); // ttinfo[0].utoff
454 b.push(0); // ttinfo[0].is_dst
455 b.push(0); // ttinfo[0].desigidx
456 b.extend_from_slice(b"UTC\0"); // designation table (charcnt = 4)
457 b
458 }
459
460 #[test]
461 fn valid_transition_type_index_parses() {
462 // Index 0 is in range (typecnt == 1) → parses cleanly.
463 let parsed = parse(&v1_tzif_one_transition(0)).unwrap();
464 assert_eq!(parsed.transitions.len(), 1);
465 assert_eq!(parsed.transitions[0].type_index, 0);
466 assert_eq!(parsed.types.len(), 1);
467 }
468
469 /// Hand-build a v1-only TZif header with arbitrary counts and a chosen body length (T17.5). Lets a
470 /// test declare an implausibly-large count with a tiny body.
471 fn v1_header_with_counts(
472 isutcnt: u32,
473 isstdcnt: u32,
474 leapcnt: u32,
475 timecnt: u32,
476 typecnt: u32,
477 charcnt: u32,
478 body_len: usize,
479 ) -> Vec<u8> {
480 let mut b = Vec::new();
481 b.extend_from_slice(&MAGIC);
482 b.push(0); // version 0 → v1-only path
483 b.extend_from_slice(&[0u8; 15]);
484 for v in [isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt] {
485 b.extend_from_slice(&v.to_be_bytes());
486 }
487 b.extend(std::iter::repeat(0u8).take(body_len));
488 b
489 }
490
491 #[test]
492 fn implausibly_large_declared_count_is_rejected_not_ooming() {
493 // T17.5: a header claiming a billion transitions with only a few body bytes must be a typed
494 // `Err` *before* any `Vec::with_capacity(timecnt)` — never a multi-gigabyte allocation/abort.
495 let bytes = v1_header_with_counts(0, 0, 0, 1_000_000_000, 1, 4, 8);
496 let err = parse(&bytes).unwrap_err();
497 assert!(
498 err.to_string()
499 .contains("more bytes than the input contains")
500 || err.to_string().contains("truncated"),
501 "expected a counts-exceed-input rejection, got: {err}"
502 );
503 }
504
505 #[test]
506 fn count_block_len_is_checked_arithmetic() {
507 // The block-length arithmetic is checked: a maximal `timecnt` does not wrap; on 64-bit it is a
508 // large-but-finite value that simply exceeds any real input (→ rejected), never a wrap that
509 // under-computes the length and mis-slices.
510 let needed = checked_block_len(
511 &Counts {
512 isutcnt: 0,
513 isstdcnt: 0,
514 leapcnt: 0,
515 timecnt: u32::MAX,
516 typecnt: 1,
517 charcnt: 4,
518 },
519 8,
520 )
521 .unwrap();
522 // u32::MAX transitions × (8 bytes time + 1 byte index) is ~38.6e9 — far past any real file.
523 assert!(needed > 30_000_000_000);
524 }
525
526 #[test]
527 fn out_of_range_transition_type_index_is_rejected_not_panic() {
528 // T17.1 regression: a transition referencing type index 1 when only type 0 exists must be a
529 // typed Err, never a panic. This is the exact shape that would otherwise reach
530 // `types[type_index]` in `compare::semantic::diff` / `compile::leap`.
531 let err = parse(&v1_tzif_one_transition(1)).unwrap_err();
532 assert!(
533 err.to_string().contains("type index 1 out of range"),
534 "expected an out-of-range rejection, got: {err}"
535 );
536 // A wildly out-of-range index (max u8) is equally rejected, not indexed.
537 assert!(parse(&v1_tzif_one_transition(255)).is_err());
538 }
539
540 #[test]
541 fn rfc_designation_index_validity_rules() {
542 assert!(rfc_designation_index_valid(0, 4, true));
543 assert!(!rfc_designation_index_valid(4, 4, true)); // idx == charcnt: slice-safe but NOT RFC-valid
544 assert!(!rfc_designation_index_valid(0, 0, true)); // empty designation table
545 assert!(!rfc_designation_index_valid(0, 4, false)); // no NUL terminator at/after idx
546 assert!(abbr_index_slice_safe(3, 4) && rfc_designation_index_valid(3, 4, true));
547 // RFC-valid ⇒ slice-safe
548 }
549
550 #[test]
551 fn isdst_byte_validity_rules() {
552 assert!(isdst_byte_valid(0) && isdst_byte_valid(1));
553 assert!(!isdst_byte_valid(2) && !isdst_byte_valid(255));
554 }
555
556 #[test]
557 fn indicator_pair_validity_rules() {
558 assert!(
559 indicator_pair_valid(0, 0) && indicator_pair_valid(0, 1) && indicator_pair_valid(1, 1)
560 );
561 assert!(!indicator_pair_valid(1, 0)); // UT indicator without its standard indicator
562 assert!(!indicator_pair_valid(2, 0) && !indicator_pair_valid(0, 2)); // byte not in {0,1}
563 }
564
565 #[test]
566 fn utoff_structural_validity_rules() {
567 assert!(
568 utoff_structural_valid(0)
569 && utoff_structural_valid(-18000)
570 && utoff_structural_valid(i32::MAX)
571 );
572 assert!(!utoff_structural_valid(i32::MIN));
573 }
574}
575
576/// T23.kani.3 — bounded proof that the TZif parser never panics (`audits/kani`).
577///
578/// Compiled **only** under `--cfg kani` (set by the Kani compiler), so it has zero effect on the normal
579/// `cargo build`/`test`/`clippy` gate — it is `cfg`-gated out everywhere else; `cargo fmt` still formats it.
580/// Over an arbitrary byte array up to a declared bound, [`parse`] must return `Ok`/`Err` **without panicking
581/// or exhibiting UB**. This is the *proving* complement to `audits/panic-analysis` (static census) and
582/// `audits/miri` (concrete-test execution): a bounded symbolic proof over the invariant itself.
583///
584/// **Bounded proof — what it does NOT establish:** semantic correctness, full RFC-9636 validity,
585/// reference-`zic` parity, inputs longer than the bound, resource-exhaustion freedom, or filesystem safety.
586#[cfg(kani)]
587mod kani_harness {
588 use super::{
589 abbr_index_slice_safe, checked_block_len, first_oob_type_index, indicator_pair_valid,
590 isdst_byte_valid, rfc_designation_index_valid, utoff_structural_valid, Counts, Cursor,
591 };
592
593 /// **T23.kani.3b — the abbreviation-index guard is SLICE-SAFE (memory safety, NOT RFC validity).** For
594 /// an arbitrary table and `idx`, when `abbr_index_slice_safe` accepts the index, `&table[idx..]` is an
595 /// in-bounds (possibly empty) slice — so `read_cstr` never panics on a hostile `ttinfo` designation
596 /// index; when it rejects, `idx > table.len()`. Alloc-free, loop-free → converges. **This proves Rust
597 /// slice safety only — NOT RFC 9636 §3.2 designation-index validity** (`idx < charcnt` + NUL at/after +
598 /// `charcnt != 0`), which is a separate, tracked gap (`audits/claim-boundary-map.md`).
599 #[kani::proof]
600 fn abbr_index_guard_prevents_oob_slice() {
601 let table: [u8; 8] = kani::any();
602 let idx: usize = kani::any();
603 if abbr_index_slice_safe(idx, table.len()) {
604 let _slice = &table[idx..]; // CBMC proves this slice index is in-bounds under the guard
605 assert!(idx <= table.len());
606 } else {
607 assert!(idx > table.len());
608 }
609 }
610
611 /// **T23.kani.3a — the transition type-index guard is sound (RFC 9636 §3.2 / the T17.1a OOB-index
612 /// guard, `RISK.COUNT.1`).** For an arbitrary index slice and any `typecnt`, `first_oob_type_index`
613 /// returns `None` **iff** every index is `< typecnt` — i.e. a `None` is exactly the precondition that
614 /// makes the downstream `types[type_index]` indexing in-bounds — and on `Some(p)` the reported index is
615 /// genuinely the first out-of-range one. Alloc-free + format-free → converges. This is the formal form
616 /// of what `audits/miri` only checks on concrete fixtures and what `parse` enforces at the choke point.
617 #[kani::proof]
618 #[kani::unwind(5)]
619 fn type_index_guard_is_sound() {
620 let idxs: [u8; 4] = kani::any();
621 let typecnt: usize = kani::any();
622 match first_oob_type_index(&idxs, typecnt) {
623 None => {
624 // soundness: no out-of-range index ⇒ every index is a valid offset into a
625 // `typecnt`-length type table, so the downstream `types[idx]` can never be OOB.
626 for &i in idxs.iter() {
627 assert!((i as usize) < typecnt);
628 }
629 }
630 Some(p) => {
631 assert!(p < idxs.len()); // a real position
632 assert!((idxs[p] as usize) >= typecnt); // and it is genuinely out of range
633 }
634 }
635 }
636
637 /// **T23.kani.2 — `Cursor::take` never panics and preserves the bounds invariant.** For an arbitrary
638 /// buffer, start `pos ≤ len`, and requested `n`: `take` returns `Ok` only with `pos' = pos + n ≤ len`
639 /// (no wrap — `checked_add`) and never indexes out of bounds; on `Err` it leaves `pos` unchanged.
640 /// Alloc-free, loop-free → CBMC converges. (Risk: section-offset accumulation / truncation, T17.5.)
641 #[kani::proof]
642 fn take_never_panics_and_preserves_bounds() {
643 let buf: [u8; 8] = kani::any();
644 let pos: usize = kani::any();
645 kani::assume(pos <= buf.len());
646 let mut c = Cursor { buf: &buf, pos };
647 let n: usize = kani::any();
648 match c.take(n) {
649 Ok(s) => {
650 assert!(s.len() == n);
651 assert!(c.pos == pos + n);
652 assert!(c.pos <= buf.len());
653 }
654 Err(_) => assert!(c.pos == pos),
655 }
656 }
657
658 /// **T23.kani.2 — `Cursor::skip` never panics and never advances past the end** (the count-driven v1-block
659 /// skip, T17.5): `Ok` ⇒ `pos' = pos + n ≤ len` (no wrap); `Err` ⇒ `pos` unchanged.
660 #[kani::proof]
661 fn skip_never_panics_and_preserves_bounds() {
662 let buf: [u8; 8] = kani::any();
663 let pos: usize = kani::any();
664 kani::assume(pos <= buf.len());
665 let mut c = Cursor { buf: &buf, pos };
666 let n: usize = kani::any();
667 match c.skip(n) {
668 Ok(()) => assert!(c.pos == pos + n && c.pos <= buf.len()),
669 Err(_) => assert!(c.pos == pos),
670 }
671 }
672
673 /// **T23.kani.2 — the pre-allocation bound is sound.** `remaining()` is exactly the bytes left, and any
674 /// block whose length is `≤ remaining()` can always be skipped without truncation. This is the link
675 /// the T17.5 guard relies on: *`count × size ≤ remaining()` ⇒ those bytes are physically present*, so a
676 /// passing bound check never lets a later read run off the end.
677 #[kani::proof]
678 fn skip_within_remaining_cannot_truncate() {
679 let buf: [u8; 8] = kani::any();
680 let pos: usize = kani::any();
681 kani::assume(pos <= buf.len());
682 let mut c = Cursor { buf: &buf, pos };
683 let r = c.remaining();
684 assert!(pos + r == buf.len()); // `remaining` is exactly what is left (no panic, no wrap)
685 let n: usize = kani::any();
686 kani::assume(n <= r);
687 assert!(c.skip(n).is_ok()); // anything within `remaining` is skippable
688 assert!(c.pos <= buf.len());
689 }
690
691 /// **T23.kani.1 (converges) — the T17.5 count-arithmetic guard never panics/overflow-wraps.** For *all*
692 /// symbolic header counts and `time_size ∈ {4, 8}`, [`checked_block_len`] returns `Ok(total)` or a typed
693 /// overflow `Err` — never a panic, never a wraparound (the `overflow-checks` hazard), never a giant
694 /// allocation (it computes a length, it does not allocate). This is the *tightest* form of the danger
695 /// surface `reports/t17-count-arithmetic-verdict.md` describes, and it is alloc-free + format-light so
696 /// CBMC converges. **Does not prove:** the full parser, semantic correctness, or anything about reads.
697 #[kani::proof]
698 fn checked_block_len_never_panics() {
699 let counts = Counts {
700 isutcnt: kani::any(),
701 isstdcnt: kani::any(),
702 leapcnt: kani::any(),
703 timecnt: kani::any(),
704 typecnt: kani::any(),
705 charcnt: kani::any(),
706 };
707 let time_size: usize = if kani::any() { 4 } else { 8 };
708 let _ = checked_block_len(&counts, time_size);
709 }
710
711 /// **T23.kani.3f.1 — RFC designation-index validity is exact and strictly implies slice-safety.** For an
712 /// arbitrary `(idx, charcnt, has_nul)`, `rfc_designation_index_valid` accepts iff `charcnt != 0 && idx <
713 /// charcnt && has_nul`; and whenever it accepts, the index is also `abbr_index_slice_safe` — so RFC
714 /// validity is strictly stronger than the T23.kani.3b slice-safety (no RFC-valid index is ever OOB).
715 #[kani::proof]
716 fn rfc_designation_index_valid_is_exact_and_implies_slice_safe() {
717 let idx: usize = kani::any();
718 let charcnt: usize = kani::any();
719 let has_nul: bool = kani::any();
720 let got = rfc_designation_index_valid(idx, charcnt, has_nul);
721 assert_eq!(got, charcnt != 0 && idx < charcnt && has_nul);
722 if got {
723 assert!(abbr_index_slice_safe(idx, charcnt)); // RFC-valid ⇒ slice-safe (idx < charcnt ⇒ idx ≤ charcnt)
724 }
725 }
726
727 /// **T23.kani.3f.2 — `isdst` byte structural validity (RFC: octet ∈ {0,1}).** Total over all `u8`; an
728 /// accepted byte is exactly a Rust `bool` (0 or 1), the only values for which the lenient `!= 0` reading
729 /// loses no information.
730 #[kani::proof]
731 fn isdst_byte_valid_is_exact() {
732 let b: u8 = kani::any();
733 let got = isdst_byte_valid(b);
734 assert_eq!(got, b <= 1);
735 if got {
736 assert!(b == 0 || b == 1);
737 }
738 }
739
740 /// **T23.kani.3f.3 — indicator-byte pairing (RFC: each ∈ {0,1} and `isut == 1 ⇒ isstd == 1`).** Total over
741 /// all `(u8, u8)`; an accepted pair never carries a UT indicator without its standard indicator.
742 #[kani::proof]
743 fn indicator_pair_valid_is_exact() {
744 let isut: u8 = kani::any();
745 let isstd: u8 = kani::any();
746 let got = indicator_pair_valid(isut, isstd);
747 assert_eq!(got, isut <= 1 && isstd <= 1 && !(isut == 1 && isstd == 0));
748 if got && isut == 1 {
749 assert!(isstd == 1);
750 }
751 }
752
753 /// **T23.kani.3f.4 — `utoff` structural validity (RFC forbids `i32::MIN`).** Total over all `i32`; an
754 /// accepted offset can always be negated without overflow — the concrete safety reason the RFC excludes
755 /// `-2^31` (its signed negation is the one value that wraps).
756 #[kani::proof]
757 fn utoff_structural_valid_excludes_unnegatable_min() {
758 let u: i32 = kani::any();
759 let got = utoff_structural_valid(u);
760 assert_eq!(got, u != i32::MIN);
761 if got {
762 assert!(u.checked_neg().is_some()); // valid ⇒ negation cannot overflow
763 }
764 }
765}