Skip to main content

tzcompile/
structural.rs

1//! `structural-report` — a measured **structural-parity inventory** of zic-rs TZif output
2//! against reference `zic` (campaign T8).
3//!
4//! This is a *separate axis* from the behaviour oracle. CORE.1 already establishes that every
5//! canonical zone **behaviour-matches** reference `zic`/`zdump` over `1900..2040` (the binding
6//! contract). This report answers a different question: *how do the emitted TZif bytes differ
7//! structurally* — which is exactly what a "drop-in file replacement" claim would need, and is
8//! deliberately **not** claimed by CORE.1.
9//!
10//! For each canonical zone we compile both ways (zic-rs in memory, reference `zic` into a temp
11//! tree), decode both with the shared [`crate::tzif::parse`], and classify the difference into a
12//! fixed taxonomy ([`ParityClass`]). The point is to *measure* the distance, not to chase byte
13//! parity: behaviour parity stays the contract, structural parity is reported honestly, and byte
14//! parity is only ever claimed where a reference blob is pinned (`fixtures/expected/`).
15//!
16//! Empirically (tzdata.zi 2026b vs tzcode 2026b): `isutcnt`/`isstdcnt`/`leapcnt`/`typecnt`/`version`
17//! /`footer` are at full parity across all 341 zones (version+footer 341/341 after T8-v3 pinned
18//! `zic.c`'s `compat >= 2013` version rule — see `compile::posix_footer::recurring`). The only
19//! remaining differences are `timecnt` (the documented slim/fat explicit-transition window) and two
20//! `charcnt` zones (zic shares abbreviation *suffixes* in the designation table; zic-rs stores them
21//! separately). All are `zdump`-equivalent.
22
23use std::collections::BTreeMap;
24use std::path::{Path, PathBuf};
25
26use crate::compare::reference_zic;
27use crate::error::{Error, Result};
28use crate::json::escape;
29use crate::model::Database;
30use crate::tzif::{self, ParsedTzif};
31
32/// Schema identifier for the JSON form.
33const SCHEMA: &str = "zic-rs-structural-report-v3";
34
35/// How many example zones to show per class in the text report (the JSON form lists all).
36const TEXT_EXAMPLES: usize = 8;
37
38/// A structural snapshot of one compiled TZif file's authoritative (v2+) block.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct Shape {
41    pub version: u8,
42    pub timecnt: u32,
43    pub typecnt: u32,
44    pub charcnt: u32,
45    pub isutcnt: u32,
46    pub isstdcnt: u32,
47    pub leapcnt: u32,
48    pub footer: String,
49}
50
51impl Shape {
52    /// Decode a [`Shape`] from a parsed TZif. `pub(crate)` so `release_diff` (T16.6) reuses the
53    /// exact same structural snapshot the structural-parity inventory (T8) uses — one source of truth.
54    pub(crate) fn of(p: &ParsedTzif) -> Self {
55        Shape {
56            version: p.version,
57            timecnt: p.counts.timecnt,
58            typecnt: p.counts.typecnt,
59            charcnt: p.counts.charcnt,
60            isutcnt: p.counts.isutcnt,
61            isstdcnt: p.counts.isstdcnt,
62            leapcnt: p.counts.leapcnt,
63            footer: p.footer.clone(),
64        }
65    }
66}
67
68/// The taxonomy a zone's structural difference falls into. A zone lands in exactly one class:
69/// the single differing dimension when there is exactly one, else a coarser catch-all.
70#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
71pub enum ParityClass {
72    /// Byte-for-byte identical output.
73    ByteIdentical,
74    /// Bytes differ but every decoded count + version + footer matches (type/abbreviation
75    /// *ordering* or designation *packing* differs but is invisible at the count level).
76    StructurallyEquivalent,
77    /// Only `timecnt` differs — the documented slim/fat explicit-transition window.
78    SlimFatTimecnt,
79    /// Only `typecnt` differs — local-time-type table count.
80    TypeCount,
81    /// Only `charcnt` differs — designation-table packing (zic suffix-sharing).
82    AbbrevTable,
83    /// Only the TZif version byte differs.
84    Version,
85    /// Only the POSIX `TZ` footer string differs.
86    Footer,
87    /// Only `isutcnt`/`isstdcnt` differ — `ttisut`/`ttisstd` indicator policy.
88    TtisStdUt,
89    /// Only `leapcnt` differs — leap-record count.
90    Leap,
91    /// More than one dimension differs — flagged for investigation.
92    Mixed,
93}
94
95impl ParityClass {
96    /// A stable label used in both the text report and the JSON keys.
97    pub fn label(self) -> &'static str {
98        match self {
99            ParityClass::ByteIdentical => "byte-identical",
100            ParityClass::StructurallyEquivalent => "structurally-equivalent",
101            ParityClass::SlimFatTimecnt => "slim/fat-timecnt",
102            ParityClass::TypeCount => "type-count",
103            ParityClass::AbbrevTable => "abbreviation-table",
104            ParityClass::Version => "version-byte",
105            ParityClass::Footer => "footer",
106            ParityClass::TtisStdUt => "ttisstd/ttisut",
107            ParityClass::Leap => "leap-count",
108            ParityClass::Mixed => "mixed/unexpected",
109        }
110    }
111}
112
113/// The list of dimensions that differ between two shapes (stable order, for diagnostics).
114/// `pub(crate)` so `release_diff` (T16.6) classifies a two-release structural delta with the same
115/// dimension vocabulary the T8 inventory uses.
116pub(crate) fn differing_dims(a: &Shape, b: &Shape) -> Vec<&'static str> {
117    let mut d = Vec::new();
118    if a.version != b.version {
119        d.push("version");
120    }
121    if a.timecnt != b.timecnt {
122        d.push("timecnt");
123    }
124    if a.typecnt != b.typecnt {
125        d.push("typecnt");
126    }
127    if a.charcnt != b.charcnt {
128        d.push("charcnt");
129    }
130    if a.isutcnt != b.isutcnt {
131        d.push("isutcnt");
132    }
133    if a.isstdcnt != b.isstdcnt {
134        d.push("isstdcnt");
135    }
136    if a.leapcnt != b.leapcnt {
137        d.push("leapcnt");
138    }
139    if a.footer != b.footer {
140        d.push("footer");
141    }
142    d
143}
144
145/// Classify a single zone given its byte-identity and the differing dimensions.
146/// `pub(crate)` so `release_diff` (T16.6) reuses the identical single-class taxonomy.
147pub(crate) fn classify(byte_identical: bool, dims: &[&str]) -> ParityClass {
148    if byte_identical {
149        return ParityClass::ByteIdentical;
150    }
151    match dims {
152        [] => ParityClass::StructurallyEquivalent,
153        ["timecnt"] => ParityClass::SlimFatTimecnt,
154        ["typecnt"] => ParityClass::TypeCount,
155        ["charcnt"] => ParityClass::AbbrevTable,
156        ["version"] => ParityClass::Version,
157        ["footer"] => ParityClass::Footer,
158        ["isutcnt"] | ["isstdcnt"] | ["isutcnt", "isstdcnt"] => ParityClass::TtisStdUt,
159        ["leapcnt"] => ParityClass::Leap,
160        _ => ParityClass::Mixed,
161    }
162}
163
164/// One canonical zone's structural comparison.
165#[derive(Debug, Clone)]
166pub struct ZoneShape {
167    pub name: String,
168    pub class: ParityClass,
169    /// The differing dimensions (empty when byte-identical or structurally-equivalent).
170    pub diffs: Vec<&'static str>,
171    pub ours: Shape,
172    pub theirs: Shape,
173}
174
175/// A zone that could not be structurally compared (one side failed to compile or decode).
176#[derive(Debug, Clone)]
177pub struct ZoneError {
178    pub name: String,
179    pub reason: String,
180}
181
182/// The complete structural-parity inventory for one source file.
183#[derive(Debug)]
184pub struct StructuralReport {
185    pub tzdb_version: Option<String>,
186    pub reference_zic: String,
187    /// Per-zone comparisons, sorted by name.
188    pub zones: Vec<ZoneShape>,
189    /// Zones skipped because one side errored, sorted by name.
190    pub errors: Vec<ZoneError>,
191    /// Net extra explicit transitions zic-rs emits over reference `zic` (Σ over slim/fat zones;
192    /// negative would mean reference is fatter). A pure measure of the slim/fat gap.
193    pub timecnt_delta_total: i64,
194}
195
196impl StructuralReport {
197    /// Count of zones in each class, in the taxonomy's canonical order.
198    pub fn class_counts(&self) -> BTreeMap<ParityClass, usize> {
199        let mut m: BTreeMap<ParityClass, usize> = BTreeMap::new();
200        for z in &self.zones {
201            *m.entry(z.class).or_default() += 1;
202        }
203        m
204    }
205
206    pub fn zones_compared(&self) -> usize {
207        self.zones.len()
208    }
209
210    /// Zones whose behaviour-relevant emitted structure — the TZif **version byte** and the POSIX
211    /// **footer** — matches reference `zic` exactly. Computed directly (not by class) so a zone
212    /// that is `mixed` only on benign dimensions (slim/fat `timecnt` + `charcnt` packing) still
213    /// counts as version+footer-clean. This is the honest "structurally drop-in modulo slim/fat +
214    /// abbreviation packing" count.
215    pub fn version_footer_match(&self) -> usize {
216        self.zones
217            .iter()
218            .filter(|z| z.ours.version == z.theirs.version && z.ours.footer == z.theirs.footer)
219            .count()
220    }
221
222    pub fn to_text(&self) -> String {
223        let mut s = String::new();
224        s.push_str("zic-rs structural-parity inventory (campaign T8)\n");
225        s.push_str(
226            "  axis: TZif *structure* vs reference `zic` — SEPARATE from behaviour parity.\n",
227        );
228        s.push_str(
229            "  behaviour parity (CORE.1: 341/341 zdump-match over 1900..2040) is the contract;\n",
230        );
231        s.push_str("  byte parity is claimed only where a reference blob is pinned.\n\n");
232        if let Some(v) = &self.tzdb_version {
233            s.push_str(&format!("  tzdb release      : {v}\n"));
234        }
235        s.push_str(&format!("  reference zic     : {}\n", self.reference_zic));
236        s.push_str(&format!(
237            "  zones compared    : {}\n",
238            self.zones_compared()
239        ));
240        s.push_str(&format!(
241            "  version+footer ok : {} (structure drop-in modulo slim/fat + packing)\n",
242            self.version_footer_match()
243        ));
244        s.push_str(&format!(
245            "  net extra transitions (ours − ref, slim/fat): {}\n\n",
246            self.timecnt_delta_total
247        ));
248
249        s.push_str("  parity classes:\n");
250        let counts = self.class_counts();
251        for (class, n) in &counts {
252            s.push_str(&format!("    {:<24} {}\n", class.label(), n));
253            // Show examples for the classes a maintainer cares about (anything not the bulk
254            // byte-identical / structurally-equivalent / slim-fat rows).
255            if matches!(
256                class,
257                ParityClass::Version
258                    | ParityClass::Footer
259                    | ParityClass::AbbrevTable
260                    | ParityClass::TypeCount
261                    | ParityClass::TtisStdUt
262                    | ParityClass::Leap
263                    | ParityClass::Mixed
264            ) {
265                for (shown, z) in self.zones.iter().filter(|z| z.class == *class).enumerate() {
266                    if shown == TEXT_EXAMPLES {
267                        s.push_str(&format!(
268                            "        (+{} more)\n",
269                            counts[class] - TEXT_EXAMPLES
270                        ));
271                        break;
272                    }
273                    s.push_str(&format!(
274                        "        {}: [{}]  ours v{} tc={} cc={}  ref v{} tc={} cc={}\n",
275                        z.name,
276                        z.diffs.join(","),
277                        z.ours.version as char,
278                        z.ours.timecnt,
279                        z.ours.charcnt,
280                        z.theirs.version as char,
281                        z.theirs.timecnt,
282                        z.theirs.charcnt,
283                    ));
284                }
285            }
286        }
287
288        if !self.errors.is_empty() {
289            s.push_str(&format!("\n  not compared ({}):\n", self.errors.len()));
290            for e in self.errors.iter().take(TEXT_EXAMPLES) {
291                s.push_str(&format!("    {}: {}\n", e.name, e.reason));
292            }
293            if self.errors.len() > TEXT_EXAMPLES {
294                s.push_str(&format!(
295                    "    (+{} more)\n",
296                    self.errors.len() - TEXT_EXAMPLES
297                ));
298            }
299        }
300        // T12.6 — static provenance/capability statement (manifest schema + source-variant gate).
301        s.push_str(&crate::manifest::provenance_block_text());
302        s
303    }
304
305    pub fn to_json(&self) -> String {
306        let mut s = String::new();
307        s.push_str("{\n");
308        s.push_str(&format!("  \"schema\": {},\n", escape(SCHEMA)));
309        // T12.6 — static provenance/capability block (schema + source-variant pin-gate state +
310        // T15.2 `negative_capabilities`).
311        s.push_str(&crate::manifest::provenance_block_json());
312        // T15.2 — oracle availability, typed. `structural-report` compares against reference `zic`'s
313        // emitted bytes, so the oracle that backs its verdicts is `reference_zic`. Visible, never silent.
314        s.push_str(&format!(
315            "  \"oracle_mode\": {},\n",
316            crate::manifest::OracleMode::ReferenceZic.to_json_field()
317        ));
318        match &self.tzdb_version {
319            Some(v) => s.push_str(&format!("  \"tzdb_version\": {},\n", escape(v))),
320            None => s.push_str("  \"tzdb_version\": null,\n"),
321        }
322        s.push_str(&format!(
323            "  \"reference_zic\": {},\n",
324            escape(&self.reference_zic)
325        ));
326        s.push_str(&format!(
327            "  \"zones_compared\": {},\n",
328            self.zones_compared()
329        ));
330        s.push_str(&format!(
331            "  \"version_footer_match\": {},\n",
332            self.version_footer_match()
333        ));
334        s.push_str(&format!(
335            "  \"timecnt_delta_total\": {},\n",
336            self.timecnt_delta_total
337        ));
338        s.push_str("  \"class_counts\": {");
339        let mut first = true;
340        for (class, n) in &self.class_counts() {
341            s.push_str(if first { "\n" } else { ",\n" });
342            first = false;
343            s.push_str(&format!("    {}: {}", escape(class.label()), n));
344        }
345        s.push_str(if first { "}," } else { "\n  },\n" });
346        s.push('\n');
347        // Per-zone rows only for the classes worth auditing (omit the byte-identical /
348        // structurally-equivalent / slim-fat bulk to keep the JSON focused on differences).
349        s.push_str("  \"differences\": [");
350        let mut first = true;
351        for z in self.zones.iter().filter(|z| {
352            !matches!(
353                z.class,
354                ParityClass::ByteIdentical
355                    | ParityClass::StructurallyEquivalent
356                    | ParityClass::SlimFatTimecnt
357            )
358        }) {
359            s.push_str(if first { "\n" } else { ",\n" });
360            first = false;
361            let dims: Vec<String> = z.diffs.iter().map(|d| escape(d)).collect();
362            // The version byte is the ASCII digit ('2'/'3'); emit it as a quoted digit string.
363            s.push_str(&format!(
364                "    {{ \"zone\": {}, \"class\": {}, \"dims\": [{}], \"ours_version\": {}, \"ref_version\": {}, \"ours_timecnt\": {}, \"ref_timecnt\": {}, \"ours_charcnt\": {}, \"ref_charcnt\": {} }}",
365                escape(&z.name),
366                escape(z.class.label()),
367                dims.join(", "),
368                escape(&(z.ours.version as char).to_string()),
369                escape(&(z.theirs.version as char).to_string()),
370                z.ours.timecnt,
371                z.theirs.timecnt,
372                z.ours.charcnt,
373                z.theirs.charcnt,
374            ));
375        }
376        s.push_str(if first { "],\n" } else { "\n  ],\n" });
377        s.push_str(&format!("  \"errors\": {}\n", self.errors.len()));
378        s.push_str("}\n");
379        s
380    }
381}
382
383/// Build the structural-parity inventory for `db`.
384///
385/// `inputs` are the source files (passed to reference `zic`, which takes files not dirs);
386/// `reference_zic` is the reference compiler program; `work_dir` is a caller-controlled
387/// (absolute) scratch directory (typically a tempdir). `only` restricts to a single zone.
388pub fn build_structural_report(
389    db: &Database,
390    inputs: &[PathBuf],
391    reference_zic: &str,
392    work_dir: &Path,
393    only: Option<&str>,
394    tzdb_version: Option<String>,
395    emit_style: crate::EmitStyle,
396) -> Result<StructuralReport> {
397    // Compile every zone with reference `zic` once, into work_dir/ref.
398    let ref_root = work_dir.join("ref");
399    std::fs::create_dir_all(&ref_root).map_err(|e| Error::io(&ref_root, e))?;
400    reference_zic::compile_with_reference(reference_zic, inputs, &ref_root)?;
401
402    let names: Vec<String> = match only {
403        Some(z) => vec![z.to_string()],
404        None => {
405            let mut v: Vec<String> = db.zones.iter().map(|z| z.name.clone()).collect();
406            v.sort();
407            v
408        }
409    };
410
411    let mut zones = Vec::new();
412    let mut errors = Vec::new();
413    let mut timecnt_delta_total: i64 = 0;
414
415    for name in names {
416        let ours_bytes = match crate::compile_zone_to_bytes_styled(db, &name, emit_style) {
417            Ok(b) => b,
418            Err(e) => {
419                errors.push(ZoneError {
420                    name,
421                    reason: format!("ours: {e}"),
422                });
423                continue;
424            }
425        };
426        let ref_path = reference_zic::compiled_path(&ref_root, &name);
427        let theirs_bytes = match std::fs::read(&ref_path) {
428            Ok(b) => b,
429            Err(e) => {
430                errors.push(ZoneError {
431                    name,
432                    reason: format!("reference: {e}"),
433                });
434                continue;
435            }
436        };
437        let byte_identical = ours_bytes == theirs_bytes;
438        let ours = match tzif::parse(&ours_bytes) {
439            Ok(p) => Shape::of(&p),
440            Err(e) => {
441                errors.push(ZoneError {
442                    name,
443                    reason: format!("decode ours: {e}"),
444                });
445                continue;
446            }
447        };
448        let theirs = match tzif::parse(&theirs_bytes) {
449            Ok(p) => Shape::of(&p),
450            Err(e) => {
451                errors.push(ZoneError {
452                    name,
453                    reason: format!("decode reference: {e}"),
454                });
455                continue;
456            }
457        };
458        let diffs = differing_dims(&ours, &theirs);
459        let class = classify(byte_identical, &diffs);
460        timecnt_delta_total += ours.timecnt as i64 - theirs.timecnt as i64;
461        zones.push(ZoneShape {
462            name,
463            class,
464            diffs,
465            ours,
466            theirs,
467        });
468    }
469
470    Ok(StructuralReport {
471        tzdb_version,
472        reference_zic: reference_zic.to_string(),
473        zones,
474        errors,
475        timecnt_delta_total,
476    })
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    fn shape(version: u8, timecnt: u32, charcnt: u32, footer: &str) -> Shape {
484        Shape {
485            version,
486            timecnt,
487            typecnt: 3,
488            charcnt,
489            isutcnt: 0,
490            isstdcnt: 0,
491            leapcnt: 0,
492            footer: footer.to_string(),
493        }
494    }
495
496    #[test]
497    fn byte_identical_dominates() {
498        let a = shape(b'2', 100, 20, "EST5");
499        let b = shape(b'2', 100, 20, "EST5");
500        assert_eq!(
501            classify(true, &differing_dims(&a, &b)),
502            ParityClass::ByteIdentical
503        );
504    }
505
506    #[test]
507    fn equal_decode_but_byte_diff_is_structurally_equivalent() {
508        // Same counts/version/footer but bytes differ (e.g. type ordering) → equivalent.
509        let a = shape(b'2', 100, 20, "EST5");
510        let b = shape(b'2', 100, 20, "EST5");
511        assert_eq!(
512            classify(false, &differing_dims(&a, &b)),
513            ParityClass::StructurallyEquivalent
514        );
515    }
516
517    #[test]
518    fn lone_timecnt_diff_is_slim_fat() {
519        let a = shape(b'2', 236, 20, "EST5EDT,M3.2.0,M11.1.0");
520        let b = shape(b'2', 175, 20, "EST5EDT,M3.2.0,M11.1.0");
521        let d = differing_dims(&a, &b);
522        assert_eq!(d, ["timecnt"]);
523        assert_eq!(classify(false, &d), ParityClass::SlimFatTimecnt);
524    }
525
526    #[test]
527    fn lone_charcnt_diff_is_abbrev_table() {
528        // Adak/Ho_Chi_Minh shape: ours 4 bytes larger, everything else equal.
529        let a = shape(b'2', 145, 37, "HST10");
530        let b = shape(b'2', 145, 33, "HST10");
531        assert_eq!(
532            classify(false, &differing_dims(&a, &b)),
533            ParityClass::AbbrevTable
534        );
535    }
536
537    #[test]
538    fn lone_version_diff_is_version() {
539        // Santiago shape: ref v3, ours v2, identical footer.
540        let a = shape(b'2', 100, 12, "<-04>4<-03>,M9.1.6/24,M4.1.6/24");
541        let b = shape(b'3', 100, 12, "<-04>4<-03>,M9.1.6/24,M4.1.6/24");
542        assert_eq!(
543            classify(false, &differing_dims(&a, &b)),
544            ParityClass::Version
545        );
546    }
547
548    #[test]
549    fn multiple_dims_is_mixed() {
550        let a = shape(b'2', 100, 20, "EST5");
551        let b = shape(b'3', 90, 18, "EST5EDT,M3.2.0,M11.1.0");
552        assert_eq!(classify(false, &differing_dims(&a, &b)), ParityClass::Mixed);
553    }
554}