Skip to main content

tzcompile/
manifest.rs

1//! The **alias/canonical manifest** (`alias-map.json`) — a producer-side artifact that
2//! records, for a compile run, which identifiers are *canonical zones* and which are *links*
3//! (aliases), with content hashes and an explicit account of where link materialisation
4//! duplicates bytes.
5//!
6//! ## Why this exists
7//!
8//! Downstream consumers/bundlers of timezone data care about *how* a bundle was produced, not
9//! only the file bytes. jiff#258 is the concrete motivation: it observed that concatenated
10//! zoneinfo appears to **duplicate** data for aliases (e.g. a release with 597 identifiers but
11//! only 339 non-alias zones) and asked for that to be documented. This manifest answers that
12//! directly: every identifier is tagged `zone` or `link`, links carry their target + the
13//! target's hash, and the summary reports `identifiers` / `canonical_zones` / `links` plus
14//! `duplicated_byte_links` (links we materialised as byte copies). See `docs/rust-ecosystem.md`
15//! and `docs/generated-data-contract.md`.
16//!
17//! ## Dependency-free by design
18//!
19//! Hashing uses the in-house [`crate::hash`] SHA-256; JSON is written by a tiny deterministic
20//! serializer here (no `serde`). The schema is small and fixed, and identifier strings are
21//! escaped properly, so this stays correct without a serialization framework — consistent
22//! with the crate's minimal-dependency ethos.
23
24use std::collections::BTreeMap;
25use std::path::Path;
26
27use crate::error::{Error, Result};
28use crate::hash::sha256_hex;
29use crate::{CompileReport, LinkMode};
30
31/// Stable schema identifier embedded in the output so consumers can version-gate.
32pub const SCHEMA: &str = "zic-rs-alias-map-v1";
33
34/// One identifier's entry in the alias map.
35#[derive(Debug, Clone, PartialEq, Eq)]
36pub enum AliasEntry {
37    /// A canonical zone: the SHA-256 of its compiled TZif file.
38    Zone { sha256: String },
39    /// A link (alias) to a canonical zone, with the target's hash and how it was materialised.
40    Link {
41        target: String,
42        target_sha256: String,
43        /// How the link was materialised — the typed [`LinkMode`] (`Copy` = bytes duplicated; `Symlink`
44        /// = no duplication), rendered as `"copy"`/`"symlink"` only at the JSON boundary (CONTRACT.TYPING:
45        /// a finite-vocabulary claim-bearing field is owned by an enum, not a hand-emitted string).
46        materialised: LinkMode,
47    },
48}
49
50impl AliasEntry {
51    /// The alias-map `"kind"` vocabulary (`"zone"` / `"link"`). **T17.2 (CONTRACT.TYPING):** the `kind`
52    /// field was previously a hand-emitted string literal inside the JSON format string; the variant is
53    /// the finite claim ("is this identifier a canonical zone or an alias?"), so the literal is owned
54    /// here and rendered through this accessor — a new `kind` value cannot enter the alias map as prose.
55    pub fn kind_str(&self) -> &'static str {
56        match self {
57            AliasEntry::Zone { .. } => "zone",
58            AliasEntry::Link { .. } => "link",
59        }
60    }
61}
62
63/// The whole manifest: a deterministic (sorted) map of identifiers plus summary counts.
64#[derive(Debug, Clone)]
65pub struct AliasMap {
66    /// Identifier → entry, ordered by name for deterministic output.
67    pub entries: BTreeMap<String, AliasEntry>,
68    pub identifiers: usize,
69    pub canonical_zones: usize,
70    pub links: usize,
71    /// Links materialised as byte copies (i.e. where the same TZif bytes exist under two
72    /// names). This is the figure jiff#258 was asking to make visible.
73    pub duplicated_byte_links: usize,
74}
75
76/// Build an [`AliasMap`] from a finished [`CompileReport`], hashing each compiled file under
77/// `root`. Reads the just-written output files (cheap; they are small).
78pub fn build(report: &CompileReport, _root: &Path) -> Result<AliasMap> {
79    // Hash every canonical zone once, keyed by zone name, by reading its output file.
80    let mut zone_hash: BTreeMap<String, String> = BTreeMap::new();
81    for z in &report.zones_compiled {
82        let bytes = std::fs::read(&z.output_path).map_err(|e| Error::io(&z.output_path, e))?;
83        zone_hash.insert(z.name.clone(), sha256_hex(&bytes));
84    }
85
86    let mut entries: BTreeMap<String, AliasEntry> = BTreeMap::new();
87    for z in &report.zones_compiled {
88        entries.insert(
89            z.name.clone(),
90            AliasEntry::Zone {
91                sha256: zone_hash.get(&z.name).cloned().unwrap_or_default(),
92            },
93        );
94    }
95
96    let mut duplicated_byte_links = 0;
97    for l in &report.links_written {
98        // The typed materialisation policy (CONTRACT.TYPING owns the literal via `LinkMode::as_str()`).
99        let materialised = l.mode;
100        if materialised == LinkMode::Copy {
101            duplicated_byte_links += 1;
102        }
103        // `l.target` is the resolved canonical zone (see compile::plan), so its hash is known.
104        let target_sha256 = zone_hash.get(&l.target).cloned().unwrap_or_default();
105        entries.insert(
106            l.link_name.clone(),
107            AliasEntry::Link {
108                target: l.target.clone(),
109                target_sha256,
110                materialised,
111            },
112        );
113    }
114
115    let map = AliasMap {
116        identifiers: entries.len(),
117        canonical_zones: report.zones_compiled.len(),
118        links: report.links_written.len(),
119        duplicated_byte_links,
120        entries,
121    };
122    // T12.4c — fail closed if the produced map is not internally consistent (a link to a
123    // non-compiled zone, a hash that doesn't match its target, a self-link, or summary counts that
124    // disagree with the entries). In normal flow this always passes — `plan::run` only records a
125    // link whose resolved canonical zone was compiled — so this is a defensive invariant that turns
126    // any future regression into a hard error rather than a silently wrong artifact.
127    map.validate()?;
128    Ok(map)
129}
130
131impl AliasMap {
132    /// Validate the alias map's **internal consistency** (T12.4c). Guarantees that every `Link`
133    /// entry "corresponds to a materialised link/copy" of a real compiled zone:
134    ///
135    /// - every `Link`'s `target` is present in this map as a **`Zone`** entry (no dangling alias —
136    ///   *"missing target fails"*; a link that resolved to another link, or to nothing, never gets
137    ///   here because `build` records the *resolved canonical* zone);
138    /// - the `Link`'s recorded `target_sha256` is non-empty and **equals** that zone's hash (the
139    ///   alias genuinely names those exact bytes — the jiff#258 duplication is real, not asserted);
140    /// - no entry is a **self-link** (`name == target`) — `resolve_link_target` already rejects
141    ///   self-links/cycles upstream (they are *skipped* in `plan::run` and counted `failed` in the
142    ///   link profile), so this is the alias-map-level guard that keeps that coverage honest;
143    /// - the summary counts (`canonical_zones`/`links`/`identifiers`) agree with the entries.
144    ///
145    /// `build` calls this before returning, so any [`AliasMap`] handed out is already consistent.
146    pub fn validate(&self) -> Result<()> {
147        let (mut zones, mut links) = (0usize, 0usize);
148        for (name, entry) in &self.entries {
149            match entry {
150                AliasEntry::Zone { sha256 } => {
151                    zones += 1;
152                    if sha256.len() != 64 {
153                        return Err(Error::message(format!(
154                            "alias-map: zone {name:?} has a malformed content hash"
155                        )));
156                    }
157                }
158                AliasEntry::Link {
159                    target,
160                    target_sha256,
161                    ..
162                } => {
163                    links += 1;
164                    if name == target {
165                        return Err(Error::message(format!(
166                            "alias-map: {name:?} is a self-link"
167                        )));
168                    }
169                    match self.entries.get(target) {
170                        Some(AliasEntry::Zone { sha256 }) => {
171                            if target_sha256 != sha256 {
172                                return Err(Error::message(format!(
173                                    "alias-map: link {name:?} records a target hash that does not \
174                                     match its zone {target:?}"
175                                )));
176                            }
177                        }
178                        Some(AliasEntry::Link { .. }) => {
179                            return Err(Error::message(format!(
180                                "alias-map: link {name:?} targets another link {target:?}, not a \
181                                 canonical zone"
182                            )))
183                        }
184                        None => {
185                            return Err(Error::message(format!(
186                                "alias-map: link {name:?} targets {target:?}, which is not a \
187                                 compiled zone in this map"
188                            )))
189                        }
190                    }
191                }
192            }
193        }
194        if zones != self.canonical_zones || links != self.links || self.identifiers != zones + links
195        {
196            return Err(Error::message(
197                "alias-map: summary counts disagree with the entries".to_string(),
198            ));
199        }
200        Ok(())
201    }
202
203    /// Render the manifest as deterministic, pretty-printed JSON (2-space indent, keys in a
204    /// fixed order; entries sorted by identifier via the `BTreeMap`).
205    pub fn to_json(&self) -> String {
206        let mut s = String::new();
207        s.push_str("{\n");
208        s.push_str(&format!("  \"schema\": {},\n", json_str(SCHEMA)));
209        s.push_str("  \"zones\": {");
210        let mut first = true;
211        for (name, entry) in &self.entries {
212            s.push_str(if first { "\n" } else { ",\n" });
213            first = false;
214            // CONTRACT.TYPING (T17.2): the `kind` literal is owned by `AliasEntry::kind_str()`, not
215            // hand-typed in the format string.
216            let kind = entry.kind_str();
217            match entry {
218                AliasEntry::Zone { sha256 } => {
219                    s.push_str(&format!(
220                        "    {}: {{ \"kind\": {}, \"sha256\": {} }}",
221                        json_str(name),
222                        json_str(kind),
223                        json_str(sha256)
224                    ));
225                }
226                AliasEntry::Link {
227                    target,
228                    target_sha256,
229                    materialised,
230                } => {
231                    s.push_str(&format!(
232                        "    {}: {{ \"kind\": {}, \"target\": {}, \"target_sha256\": {}, \"materialised\": {} }}",
233                        json_str(name),
234                        json_str(kind),
235                        json_str(target),
236                        json_str(target_sha256),
237                        json_str(materialised.as_str())
238                    ));
239                }
240            }
241        }
242        s.push_str(if self.entries.is_empty() {
243            "},\n"
244        } else {
245            "\n  },\n"
246        });
247        s.push_str("  \"summary\": {\n");
248        s.push_str(&format!("    \"identifiers\": {},\n", self.identifiers));
249        s.push_str(&format!(
250            "    \"canonical_zones\": {},\n",
251            self.canonical_zones
252        ));
253        s.push_str(&format!("    \"links\": {},\n", self.links));
254        s.push_str(&format!(
255            "    \"duplicated_byte_links\": {}\n",
256            self.duplicated_byte_links
257        ));
258        s.push_str("  }\n");
259        s.push_str("}\n");
260        s
261    }
262
263    /// Write the manifest JSON to `path`.
264    pub fn write_to(&self, path: &Path) -> Result<()> {
265        std::fs::write(path, self.to_json()).map_err(|e| Error::io(path, e))
266    }
267}
268
269// ===========================================================================================
270// Compile-provenance manifest (`zic-rs-manifest.json`, schema `zic-rs-compile-manifest-v8`).
271// ===========================================================================================
272//
273// Records *how this output tree was produced* — the **build identity**: source provenance (path +
274// hash + kind), the tzdb version (detected vs claimed, reconciled, never silently stamped), the
275// real `build_profile` (emit_style / range / redundant_until / link_mode / output_tree + leap
276// source), the zones/links touched, and the oracle result. **It describes this invocation, never
277// the repo's general test status nor a capability claim**: undetected source-set axes are honest
278// `"unknown"` (never an aspirational `"supported"`/`"unsupported"`), and a plain `compile` run did
279// not invoke `compare`, so the oracle block is `not-run` even though the fixtures are verified
280// elsewhere by the test suite.
281
282/// Stable schema identifier for the compile manifest. Schema changelog (newest first), each version
283/// a *consumer-gating* marker — bumped only for a genuine block addition/removal, never for an
284/// in-session correction to an unreleased version:
285///
286/// - **v8** (T12.5d): added the `source_profile.dataform_evidence` axis — the *encoding* form
287///   (`main`/`vanguard`/`rearguard`) as detected/claimed/status, **hash-backed** against the pinned
288///   2026b generated `.zi` artifacts via `source_inputs` membership (the `.zi` files are compilable
289///   sources, so this is category-correct — cf. `backzone`), plus two generated-artifact provenance
290///   fields: `recipe_hash` (binds archive · `Makefile` · `ziguard.awk` · command · toolchain, raw
291///   bytes) and `generated_from`. Never inferred from syntax/output/names/`PACKRATLIST`/`backzone`.
292/// - **v7** (T12.5c): added the `source_profile.packratlist_evidence` axis (backzone *scope*:
293///   `detected: subset_from_policy_input` only via an admitted `PACKRATLIST` *generation-policy*
294///   input hash-matching the pinned 2026b `zone.tab` plus a present `backzone`, else `unknown`; claim
295///   `full|subset|none`). `PACKRATLIST`/`zone.tab` is a generation-policy selector, **not** a `zic`
296///   compile source, so detection never consults `source_inputs`. Also removed the now-contradictory
297///   `build_profile.backzone` `"unknown"` stub (backzone is the `source_profile` axis since T12.5b).
298/// - **v6** (T12.5b): added the `source_profile.backzone_evidence` axis (detected/claimed/status +
299///   `evidence_sha256`), hash-anchored to the pinned reference `backzone` ([`REF_2026B_BACKZONE_SHA256`],
300///   admitted in T12.5a.2) — source *membership*, never inferred.
301/// - **v5** (T12.4d; corrected in T12.5a): added the `source_profile` block recording the `backward`
302///   **evidence axis** (detected/claimed/status + `evidence_sha256`) — admitted only from hash-backed
303///   source evidence or an explicit claim, never inferred from the alias/link surface; an extension
304///   seam for the later `backzone`/`rearguard`/`vanguard` axes. The T12.5a correction removed a
305///   contradictory `build_profile.backward` `"unknown"` stub that v5 first shipped (it was briefly
306///   double-listed; `backward` is authoritatively the `source_profile` axis) — fixed *in place*, no
307///   version bump, as v5 was never released/consumed. `rearguard`/`vanguard` remain `build_profile`
308///   `"unknown"` placeholders pending their own evidence axes (T12.5d, reference-first).
309/// - **v4** (T12.4b): added the `link_profile` block recording link/alias identity — counts
310///   (`zones_compiled`/`links_selected`/`links_materialized`/`links_omitted`/`links_failed`),
311///   `link_policy`, and stable hashes (`alias_map_sha256` + `selected`/`omitted_links_sha256`) that
312///   bind the build to its alias map.
313/// - **v3** (T12.3): added the `source_inputs` block recording the deterministic, **order-preserving**
314///   input identity (logical names + per-file hashes + `aggregate_hash`); the structural `source_kind`
315///   moved there (and `individual_files` → `multi_file`); `build_profile.source_set` retired
316///   (superseded).
317/// - **v2** (T12.2): `tzdb.version` split into `detected_version`/`claimed_version` (+
318///   `version_status`); the stub `generation_options` block replaced by a real `build_profile`
319///   recording *what this run actually used*.
320pub const COMPILE_SCHEMA: &str = "zic-rs-compile-manifest-v8";
321
322/// Source provenance for the tzdata that was compiled. Records **detected facts vs user claims**
323/// separately — the manifest never silently stamps a release. *Input identity* (which files, in
324/// what order, with what hashes) lives in [`SourceInputs`]; this block is the version provenance.
325#[derive(Debug, Clone)]
326pub struct TzdbProvenance {
327    /// tzdb release **sniffed** from the source's `# version …` comment, if present.
328    pub detected_version: Option<String>,
329    /// tzdb release the **user asserted** (`--tzdb-version`), if any.
330    pub claimed_version: Option<String>,
331    /// The input path(s), joined for display. **Environment context, not identity** — these are
332    /// machine-local paths (possibly absolute). The portable identity is [`SourceInputs`] (logical
333    /// names + content hashes).
334    pub source_path: String,
335    /// SHA-256 over the concatenation of all source files in **sorted (canonicalized) path
336    /// order** — an *order-independent* content identity ("are these the same bytes, however they
337    /// were ordered?"). The *order-sensitive* identity is [`SourceInputs::aggregate_hash`].
338    pub source_sha256: String,
339}
340
341impl TzdbProvenance {
342    /// Detected-vs-claimed reconciliation — prevents false conformance claims.
343    pub fn version_status(&self) -> &'static str {
344        match (&self.detected_version, &self.claimed_version) {
345            (Some(d), Some(c)) if d == c => "detected_matches_claim",
346            (Some(_), Some(_)) => "detected_differs_from_claim",
347            (Some(_), None) => "detected_only",
348            (None, Some(_)) => "claimed_only",
349            (None, None) => "unknown",
350        }
351    }
352}
353
354/// One input source file in the deterministic input list (T12.3). The portable identity is the
355/// `logical_name` (basename — never a machine-local absolute path) plus the content `sha256`;
356/// `order_index` records the file's position in the **input order** (part of the build identity).
357#[derive(Debug, Clone)]
358pub struct SourceFile {
359    /// The file's basename (e.g. `"northamerica"`, `"tzdata.zi"`) — a portable label, not the
360    /// absolute machine-local path. The real identity is `sha256` + `order_index`.
361    pub logical_name: String,
362    /// SHA-256 of this file's bytes (order-independent per file).
363    pub sha256: String,
364    /// Byte length of this file.
365    pub bytes: usize,
366    /// 0-based position in the input order. **Source order is part of the build identity** — the
367    /// manifest does not pretend two differently ordered inputs are the same.
368    pub order_index: usize,
369}
370
371/// The deterministic input source-set of *this run* (T12.3) — **input identity, not source
372/// semantics**. Records which files were used, in what order, with what hashes, under what
373/// structural `kind`. It deliberately does **not** infer source-set *membership*: those are
374/// reconciled as hash-backed/claim-only evidence axes in [`SourceProfile`] (`backward` T12.4d,
375/// `backzone` T12.5b, `PACKRATLIST` scope T12.5c), and the remaining `DATAFORM` encoding axes
376/// (`rearguard`/`vanguard`) are recorded as `"unknown"` in the build profile until a pinned,
377/// deterministic detector exists (T12.5d).
378#[derive(Debug, Clone)]
379pub struct SourceInputs {
380    /// Structural input *form* (not membership), typed (T17.2): [`SourceInputKind`]. Multi-file means
381    /// *source form* only — it never implies `backward`/`backzone` inclusion.
382    pub kind: SourceInputKind,
383    /// The input files in **input order** (directories expanded in sorted order, then in the order
384    /// the paths were supplied) — never re-sorted, so the order is faithfully recorded.
385    pub files: Vec<SourceFile>,
386    /// SHA-256 over the input-ordered sequence of per-file hashes — an **order-sensitive** identity
387    /// that changes if the same files are supplied in a different order (cf. the order-independent
388    /// [`TzdbProvenance::source_sha256`]).
389    pub aggregate_hash: String,
390}
391
392/// The leap-second source used by *this run* (T12.2). Describes the run, never the project's
393/// capabilities — `mode: "none"` for an ordinary compile, never `"unsupported"`.
394#[derive(Debug, Clone)]
395pub struct LeapSourceInfo {
396    /// `None` (ordinary/`posix` profile) or `File` (the `right/` profile, `-L`) — typed (T17.2).
397    pub mode: LeapSourceMode,
398    /// SHA-256 of the leap-source file, when `mode == "file"` and the path was available.
399    pub sha256: Option<String>,
400    pub entry_count: usize,
401    pub expires: bool,
402    pub rolling_entries: usize,
403}
404
405/// The build-profile identity of *this run* (T12.2) — structured fields, never a vague label. Only
406/// the `DATAFORM` encoding axes (`rearguard`/`vanguard`) are recorded here as `"unknown"` (no
407/// deterministic detector yet — kept explicit rather than guessed; T12.5d). The source-*membership*
408/// axes (`backward`, `backzone`, `PACKRATLIST` scope) moved to [`SourceProfile`] as reconciled
409/// evidence axes (T12.4d/T12.5b/T12.5c) — they are never build_profile placeholders.
410#[derive(Debug, Clone)]
411pub struct BuildProfile {
412    /// `Posix` (no leap table) or `Right` (leap table applied) — typed (T17.2).
413    pub output_tree: OutputTree,
414    pub leap_source: LeapSourceInfo,
415    /// Semantic emission identity, the typed [`crate::EmitStyle`] (T17.2; was a re-stringified `String`).
416    /// `--emit-style zic-slim` and `-b slim` map to the same value; rendered to its manifest literal at the
417    /// JSON boundary by the module-private `emit_style_str` (the enum is the source of truth, not a copy).
418    pub emit_style: crate::EmitStyle,
419    /// `-r` range, as `(lo, hi)` raw `@`-instants (`None` = no truncation).
420    pub range: Option<(Option<i64>, Option<i64>)>,
421    /// `-R` redundant-tail bound (`@`-instant), if any.
422    pub redundant_until: Option<i64>,
423    /// Link materialisation policy of this run — the typed [`crate::LinkMode`] (T17.2; was a
424    /// re-stringified `String`). Rendered `"copy"`/`"symlink"` at the boundary via [`crate::LinkMode::as_str`].
425    pub link_mode: crate::LinkMode,
426}
427
428// ── T17.2 (CONTRACT.TYPING) — the manifest's remaining finite claim-bearing vocabularies, born typed. ──
429//
430// Each of these was a free `String`/`&'static str` whose value came from a *closed* set but was emitted
431// as prose, so a future code path (or a careless edit) could leak an unintended value into the public
432// `zic-rs-compile-manifest-v8` JSON. Per the standing rule — *prose is the weakest guarantee; an
433// exhaustive `match` that won't compile if a variant is unclassified is the strongest* — they are now
434// enums owning their JSON literal via `as_str()`. The emitted strings are **byte-identical** to the
435// previous output (the manifest tests + the conformance golden pin them), so no schema bumps.
436
437/// Output-tree profile of *this run* (T12.2; typed at T17.2 — was `&'static str`). `posix` = no leap
438/// table applied; `right` = a leap table (`-L`) was applied. This is an *output-identity* claim a
439/// reproducer/report reader relies on, so it is owned by the enum, not a literal.
440#[derive(Debug, Clone, Copy, PartialEq, Eq)]
441pub enum OutputTree {
442    /// No leap table (the default / `posix` profile).
443    Posix,
444    /// A leap table was applied (the `right/` profile).
445    Right,
446}
447
448impl OutputTree {
449    /// The manifest literal (`"posix"` / `"right"`).
450    pub fn as_str(self) -> &'static str {
451        match self {
452            OutputTree::Posix => "posix",
453            OutputTree::Right => "right",
454        }
455    }
456}
457
458/// Leap-source mode of *this run* (T12.2; typed at T17.2 — was `&'static str`). Describes the run, never
459/// the project's capabilities — `None` for an ordinary compile, never an aspirational `"unsupported"`.
460#[derive(Debug, Clone, Copy, PartialEq, Eq)]
461pub enum LeapSourceMode {
462    /// No leap source (ordinary / `posix`).
463    None,
464    /// A leap-seconds file was supplied (`-L`, the `right/` profile).
465    File,
466}
467
468impl LeapSourceMode {
469    /// The manifest literal (`"none"` / `"file"`).
470    pub fn as_str(self) -> &'static str {
471        match self {
472            LeapSourceMode::None => "none",
473            LeapSourceMode::File => "file",
474        }
475    }
476}
477
478/// Structural input *form* of *this run* (T12.3; typed at T17.2 — was `String`). **Form only** — it
479/// never implies source-set *membership* (`backward`/`backzone` are reconciled evidence axes elsewhere).
480#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum SourceInputKind {
482    /// Exactly one `.zi` file (e.g. the zishrunk `tzdata.zi`).
483    TzdataZi,
484    /// Two or more source files.
485    MultiFile,
486    /// Exactly one non-`.zi` file.
487    SingleFile,
488    /// No input files.
489    Unknown,
490}
491
492impl SourceInputKind {
493    /// The manifest literal.
494    pub fn as_str(self) -> &'static str {
495        match self {
496            SourceInputKind::TzdataZi => "tzdata_zi",
497            SourceInputKind::MultiFile => "multi_file",
498            SourceInputKind::SingleFile => "single_file",
499            SourceInputKind::Unknown => "unknown",
500        }
501    }
502
503    /// Every variant, in stable order — for the totality test.
504    pub const ALL: [SourceInputKind; 4] = [
505        SourceInputKind::TzdataZi,
506        SourceInputKind::MultiFile,
507        SourceInputKind::SingleFile,
508        SourceInputKind::Unknown,
509    ];
510}
511
512/// The oracle **verdict** vocabulary of a manifest (T17.2 — was a free `String`). A bare `compile`
513/// never runs the oracle, so the only value today is [`OracleVerdict::NotRun`] (rendered `"not-run"`,
514/// preserving the legacy literal). Born typed so a future verdict (e.g. match / mismatch, if a manifest
515/// path ever runs the oracle) cannot enter as an unconstrained string — distinct from the *mode* axis
516/// ([`OracleMode`]), which says *which* oracle, not *what it concluded*.
517#[derive(Debug, Clone, Copy, PartialEq, Eq)]
518pub enum OracleVerdict {
519    /// The oracle was not run for this invocation (the honest default for `compile`).
520    NotRun,
521}
522
523impl OracleVerdict {
524    /// The manifest literal (`"not-run"` — hyphen preserved from the pre-T17.2 string).
525    pub fn as_str(self) -> &'static str {
526        match self {
527            OracleVerdict::NotRun => "not-run",
528        }
529    }
530}
531
532/// Render the typed [`crate::EmitStyle`] to its manifest literal (T17.2). The manifest now stores the
533/// enum directly (no re-stringified copy that could drift); this boundary fn owns the literal.
534fn emit_style_str(s: crate::EmitStyle) -> &'static str {
535    match s {
536        crate::EmitStyle::Default => "default",
537        crate::EmitStyle::ZicSlim => "zic-slim",
538        crate::EmitStyle::ZicFat => "zic-fat",
539    }
540}
541
542/// The link / alias identity of *this run* (T12.4b) — counts + stable hashes that bind the
543/// build to its `alias-map.json`. **Links are output identifiers, not source-set evidence**: this
544/// block never infers `backward`/`backzone` membership from the alias set (that is the build
545/// profile's `"unknown"` axis until T12.4d gives it hash-backed evidence).
546///
547/// `selected` / `omitted` / `failed` are kept **distinct** (panel rule):
548/// - **selected** — a parsed `Link` whose resolved canonical zone *is* in the compiled output set
549///   (eligible and materialised). In zic-rs a selected link always materialises (a write failure
550///   aborts the whole run), so `links_selected_count` and the db-link share of
551///   `links_materialized_count` coincide — they differ only by install-policy links like
552///   `localtime` (materialised but not a source `Link`).
553/// - **omitted** — a *valid* parsed `Link` not materialised because selection/profile excluded its
554///   target from the output set. A policy outcome, **not** an error.
555/// - **failed** — a `Link` whose chain does not terminate at a real zone (missing target / cycle).
556///   An error class, never folded into `omitted`. (A bare successful `compile` writes no manifest
557///   if a link fatally fails, so this is normally 0; it is recorded for completeness.)
558#[derive(Debug, Clone)]
559pub struct LinkProfile {
560    /// `"copy"` or `"symlink"` — how links were materialised this run.
561    pub link_policy: String,
562    /// Canonical zones actually compiled this run.
563    pub zones_compiled_count: usize,
564    /// Parsed `Link`s eligible & materialised (resolved target in the compiled set).
565    pub links_selected_count: usize,
566    /// Links actually written to the output tree (includes install-policy links like `localtime`).
567    pub links_materialized_count: usize,
568    /// Valid links excluded by selection/profile (target not compiled). Policy, not error.
569    pub links_omitted_count: usize,
570    /// Links whose chain does not resolve to a real zone (missing/cycle/self). Error, not omission.
571    pub links_failed_count: usize,
572    /// SHA-256 of the **deterministic** `alias-map.json` serialization (sorted by identifier, fixed
573    /// field order, LF, no timestamps) — binds this manifest to a specific alias map.
574    pub alias_map_sha256: String,
575    /// SHA-256 over the sorted selected-link names (LF-joined) — order-independent set identity.
576    pub selected_links_sha256: String,
577    /// SHA-256 over the sorted omitted-link names (LF-joined).
578    pub omitted_links_sha256: String,
579}
580
581/// What the *admitted source evidence* mechanically proves about whether the `backward` source
582/// participated in this build (T12.4d). **Bounded to an admitted artifact** — never a universal
583/// claim and never inferred from the alias/link surface.
584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
585pub enum BackwardDetected {
586    /// The admitted backward source's bytes are present among `source_inputs` (hash-backed).
587    Present,
588    /// The admitted backward source's bytes are **not** in the build (hash-backed). This is absence
589    /// *of the admitted artifact*, not a proof that "no backward data exists anywhere".
590    Absent,
591    /// No backward source was admitted, so nothing is mechanically proven. The honest default.
592    Unknown,
593}
594
595/// What the build/user/config **explicitly asserts** about `backward` membership (T12.4d) — a bare
596/// claim, recorded separately from (and never trusted as) detection.
597#[derive(Debug, Clone, Copy, PartialEq, Eq)]
598pub enum BackwardClaim {
599    Included,
600    Excluded,
601    /// No claim was made.
602    None,
603}
604
605/// The `backward` **evidence axis** (T12.4d) — mirrors T12.2's detected-vs-claimed version
606/// reconciliation. **The alias surface is output identity, not source provenance:** a build can
607/// expose legacy-looking aliases without proving the tzdb `backward` source participated, and the
608/// absence of such aliases does not prove `backward` was excluded. So `backward` is recorded as a
609/// reconciled evidence axis, **never a boolean**, and stays `Unknown` unless admitted by hash-backed
610/// evidence or an explicit claim. **Admission law — `backward` status may be admitted only from
611/// (1) hash-backed source evidence or (2) an explicit claim; it must NOT be inferred from alias
612/// count, alias names, output filenames, source filenames alone, link target names, selected/
613/// omitted/failed link counts, or legacy-looking identifiers.**
614#[derive(Debug, Clone)]
615pub struct BackwardEvidence {
616    pub detected: BackwardDetected,
617    pub claimed: BackwardClaim,
618    /// SHA-256 of the *admitted* backward source (present or absent), when one was admitted. This is
619    /// the hash whose presence/absence in `source_inputs` produced `detected`.
620    pub evidence_sha256: Option<String>,
621}
622
623impl BackwardEvidence {
624    /// Build the evidence axis from the admitted inputs (T12.4d). **This is the entire admission
625    /// law in code:** detection comes *only* from an admitted backward `source` file, hash-checked
626    /// against the build's `source_inputs`:
627    /// - admitted file's bytes are among `source_inputs` → `Present` (hash-backed);
628    /// - admitted file's bytes are **not** among them → `Absent` (hash-backed; bounded to *this*
629    ///   artifact — never a universal "no backward exists" claim);
630    /// - no file admitted → `Unknown` (we refuse to infer from alias counts, names, or filenames).
631    ///
632    /// The bare `claim` is recorded independently and never promoted to detection. Nothing here reads
633    /// the link/alias surface, so `backward` can never be inferred from it.
634    pub fn reconcile(source_inputs: &SourceInputs, args: &SourceVariantArgs) -> Result<Self> {
635        let claimed = match args.backward_claim {
636            Some(true) => BackwardClaim::Included,
637            Some(false) => BackwardClaim::Excluded,
638            None => BackwardClaim::None,
639        };
640        let (detected, evidence_sha256) = match &args.backward_source {
641            Some(path) => {
642                let bytes = std::fs::read(path).map_err(|e| Error::io(path, e))?;
643                let h = sha256_hex(&bytes);
644                let present = source_inputs.files.iter().any(|f| f.sha256 == h);
645                let d = if present {
646                    BackwardDetected::Present
647                } else {
648                    BackwardDetected::Absent
649                };
650                (d, Some(h))
651            }
652            None => (BackwardDetected::Unknown, None),
653        };
654        Ok(BackwardEvidence {
655            detected,
656            claimed,
657            evidence_sha256,
658        })
659    }
660
661    /// Reconcile detected vs claimed into a single status string (cf. [`TzdbProvenance::version_status`]).
662    /// Detection always outranks a bare claim for the agreement/conflict verdicts; an unverified claim
663    /// is explicitly labelled `*_unverified` so it can never be mistaken for a detected fact.
664    pub fn status(&self) -> &'static str {
665        use BackwardClaim as C;
666        use BackwardDetected as D;
667        match (self.detected, self.claimed) {
668            (D::Present, C::Included) | (D::Absent, C::Excluded) => "detected_matches_claim",
669            (D::Present, C::Excluded) | (D::Absent, C::Included) => "detected_contradicts_claim",
670            (D::Present, C::None) => "detected_present",
671            (D::Absent, C::None) => "detected_absent",
672            (D::Unknown, C::Included) => "claimed_present_unverified",
673            (D::Unknown, C::Excluded) => "claimed_absent_unverified",
674            (D::Unknown, C::None) => "unknown_no_evidence",
675        }
676    }
677
678    fn detected_str(&self) -> &'static str {
679        match self.detected {
680            BackwardDetected::Present => "present",
681            BackwardDetected::Absent => "absent",
682            BackwardDetected::Unknown => "unknown",
683        }
684    }
685
686    fn claimed_str(&self) -> &'static str {
687        match self.claimed {
688            BackwardClaim::Included => "included",
689            BackwardClaim::Excluded => "excluded",
690            BackwardClaim::None => "none",
691        }
692    }
693}
694
695/// SHA-256 of the **pristine IANA tzdb 2026b `backzone` file**, admitted + signature-verified +
696/// pinned in T12.5a.2 (`reports/t12_5a2-reference-admission.md`). The `backzone` evidence detector
697/// (T12.5b) checks whether *this exact file* participated in a build's `source_inputs` — hash-backed,
698/// version-scoped to 2026b (a later release has a different hash and needs its own admission).
699pub const REF_2026B_BACKZONE_SHA256: &str =
700    "63fb39adae0b0d8b2179629725a9dfb694c7a386b99750b636a017d896d28dfa";
701
702/// What the admitted source evidence proves about `backzone`/`PACKRATDATA` participation (T12.5b).
703/// **Presence is hash-backed; absence is NOT asserted** — the canonical `backzone` file not appearing
704/// among `source_inputs` does *not* prove backzone data is absent (it can be merged into a
705/// concatenated `.zi`), so non-presence is `Unknown`, never a false "absent".
706#[derive(Debug, Clone, Copy, PartialEq, Eq)]
707pub enum BackzoneDetected {
708    /// The pinned reference `backzone` file's bytes are present among `source_inputs` (hash-backed).
709    Present,
710    /// No hash-backed evidence either way (canonical file not seen; may be merged — cannot conclude).
711    Unknown,
712}
713
714/// What the build/user explicitly asserts about `backzone` membership (T12.5b) — recorded separately
715/// from detection, never promoted to it.
716#[derive(Debug, Clone, Copy, PartialEq, Eq)]
717pub enum BackzoneClaim {
718    Included,
719    Excluded,
720    None,
721}
722
723/// The `backzone` / `PACKRATDATA` **source-membership evidence axis** (T12.5b) — mirrors
724/// `BackwardEvidence`, but detection is anchored to the *pinned reference release's* `backzone` hash
725/// ([`REF_2026B_BACKZONE_SHA256`]). **Source membership, hash-backed or claim-only, never inferred**
726/// from aliases, zone names, link counts, output byte shape, pre-1970 differences, or `DATAFORM`.
727/// **Scope:** whether `backzone` participated at all — the *subset-vs-all* (`PACKRATLIST`) distinction
728/// is T12.5c, and `DATAFORM` is T12.5d.
729#[derive(Debug, Clone)]
730pub struct BackzoneEvidence {
731    pub detected: BackzoneDetected,
732    pub claimed: BackzoneClaim,
733    /// The pinned reference `backzone` hash, when detected present.
734    pub evidence_sha256: Option<String>,
735}
736
737impl BackzoneEvidence {
738    /// Detect `backzone` participation by checking whether the pinned reference `backzone` hash
739    /// (`reference_backzone_sha256`) appears among `source_inputs`. Hash-backed, version-scoped. The
740    /// claim is recorded independently. **Does not read the link/alias surface — inference is
741    /// impossible by construction.** (`reference_backzone_sha256` is injected so the detector is unit-
742    /// testable without vendoring the large reference file; production passes [`REF_2026B_BACKZONE_SHA256`].)
743    pub fn reconcile(
744        source_inputs: &SourceInputs,
745        claim: Option<bool>,
746        reference_backzone_sha256: &str,
747    ) -> Self {
748        let claimed = match claim {
749            Some(true) => BackzoneClaim::Included,
750            Some(false) => BackzoneClaim::Excluded,
751            None => BackzoneClaim::None,
752        };
753        let present = source_inputs
754            .files
755            .iter()
756            .any(|f| f.sha256 == reference_backzone_sha256);
757        let (detected, evidence_sha256) = if present {
758            (
759                BackzoneDetected::Present,
760                Some(reference_backzone_sha256.to_string()),
761            )
762        } else {
763            (BackzoneDetected::Unknown, None)
764        };
765        BackzoneEvidence {
766            detected,
767            claimed,
768            evidence_sha256,
769        }
770    }
771
772    /// Reconcile detected vs claimed (cf. [`BackwardEvidence::status`]). No `detected_absent` —
773    /// absence is never asserted for `backzone` (see [`BackzoneDetected`]).
774    pub fn status(&self) -> &'static str {
775        use BackzoneClaim as C;
776        use BackzoneDetected as D;
777        match (self.detected, self.claimed) {
778            (D::Present, C::Included) => "detected_matches_claim",
779            (D::Present, C::Excluded) => "detected_contradicts_claim",
780            (D::Present, C::None) => "detected_present",
781            (D::Unknown, C::Included) => "claimed_present_unverified",
782            (D::Unknown, C::Excluded) => "claimed_absent_unverified",
783            (D::Unknown, C::None) => "unknown_no_evidence",
784        }
785    }
786
787    fn detected_str(&self) -> &'static str {
788        match self.detected {
789            BackzoneDetected::Present => "present",
790            BackzoneDetected::Unknown => "unknown",
791        }
792    }
793
794    fn claimed_str(&self) -> &'static str {
795        match self.claimed {
796            BackzoneClaim::Included => "included",
797            BackzoneClaim::Excluded => "excluded",
798            BackzoneClaim::None => "none",
799        }
800    }
801}
802
803/// SHA-256 of the **pristine IANA tzdb 2026b `zone.tab`** (admitted + pinned in T12.5a.2). It is the
804/// canonical `PACKRATLIST` subset-selector. **Pinned for the test fixture / documentation only** — the
805/// detector does *not* trigger on `zone.tab` merely appearing among inputs (it is a normal selection
806/// table whose presence proves nothing about `PACKRATLIST`); see [`PackratlistEvidence`].
807pub const REF_2026B_ZONE_TAB_SHA256: &str =
808    "4d8e389e5f4b0ec0466d5b14f42e5dfb0308c4376165fcf478339afd9ddcb00c";
809
810/// What the admitted evidence proves about the **`backzone` *scope*** (`PACKRATLIST`) — T12.5c.
811/// **Category boundary (the empirical finding):** `PACKRATLIST` is a *generation-policy* input, **not a
812/// `zic` compile source** — its list (`zone.tab`) filters `backzone` at generation time and is baked
813/// into the produced `.zi`. So scope is **not recoverable from `source_inputs` (compile inputs)**:
814/// `zone.tab` appearing among inputs would be a category error to read as evidence, and absence proves
815/// nothing. The only hash-backed detection is **`SubsetFromPolicyInput`** — an *explicitly admitted*
816/// `PACKRATLIST` selector (`--packratlist-source`) hashed as a **policy input**, alongside a present
817/// `backzone`. Everything else is **`Unknown`**; `full`/`none` are claim-only.
818#[derive(Debug, Clone, Copy, PartialEq, Eq)]
819pub enum PackratlistDetected {
820    /// A hash-backed `PACKRATLIST` **policy input** was admitted alongside a present `backzone`
821    /// (bounded to that artifact — does **not** assert the generation step applied the filter).
822    SubsetFromPolicyInput,
823    /// No hash-backed scope evidence (no admitted policy input, or no backzone).
824    Unknown,
825}
826
827/// What the build/user explicitly asserts about `backzone` scope (T12.5c). `--packratlist
828/// {full|subset|none}`: `full` = all backzone (`PACKRATLIST` empty), `subset` = filtered
829/// (`PACKRATLIST=zone.tab`), `none` = no backzone (`PACKRATDATA` empty).
830#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831pub enum PackratlistClaim {
832    Full,
833    Subset,
834    None,
835    NotClaimed,
836}
837
838/// The `PACKRATLIST` **backzone-scope evidence axis** (T12.5c) — mirrors the other axes
839/// (`detected`/`claimed`/`status`/`evidence_sha256`). **Subset is the only hash-backed detection**
840/// (admitted subset-list participated + backzone present); never inferred from output zone counts,
841/// alias counts, filenames, `zone.tab`/`zone1970.tab` presence alone, link counts, pre-1970
842/// differences, or global-tz-like output shape.
843#[derive(Debug, Clone)]
844pub struct PackratlistEvidence {
845    pub detected: PackratlistDetected,
846    pub claimed: PackratlistClaim,
847    /// The admitted `PACKRATLIST`-source hash, when a subset was hash-detected.
848    pub evidence_sha256: Option<String>,
849}
850
851impl PackratlistEvidence {
852    /// Build the scope axis from a **generation-policy input** (T12.5c). **Category boundary (the
853    /// empirical finding):** `PACKRATLIST` is a *generation-time* selector (the Makefile filters
854    /// `backzone` *before* `zic`), and its list (`zone.tab`) is **not a compilable `zic` source** — so
855    /// it does **not** belong in `source_inputs` (the compile inputs) and detection must **never** be
856    /// keyed off `source_inputs` membership (that would be a category error). Instead, detection is
857    /// keyed off an **explicitly admitted policy input**: `admitted_policy_input_sha256` is the
858    /// SHA-256 of a `--packratlist-source` selector the caller supplied (`None` = none admitted).
859    /// `Subset` (from a policy input) only when such a selector is admitted **and** `backzone` is
860    /// present (a subset list is meaningless without backzone data). Everything else is `Unknown`;
861    /// `full`/`none` are claim-only. Nothing here reads compile inputs, the link/alias surface, or
862    /// output shape. **Bounded meaning:** an admitted hash-backed selector is stronger than a bare
863    /// claim, but does **not** prove the generation step actually applied the filter.
864    pub fn reconcile(
865        claim: Option<&str>,
866        admitted_policy_input_sha256: Option<&str>,
867        reference_zone_tab_sha256: &str,
868        backzone_present: bool,
869    ) -> Self {
870        let claimed = match claim {
871            Some("full") => PackratlistClaim::Full,
872            Some("subset") => PackratlistClaim::Subset,
873            Some("none") => PackratlistClaim::None,
874            _ => PackratlistClaim::NotClaimed,
875        };
876        // Version-scoped + category-correct: `SubsetFromPolicyInput` only when the admitted policy
877        // input **is the pinned reference `zone.tab`** (hash match) AND `backzone` is present. An
878        // arbitrary admitted file, a different release's table, or no backzone → `Unknown`. We never
879        // look at `source_inputs` (compile inputs) — `zone.tab` is a generation-policy input.
880        let detected_subset =
881            backzone_present && admitted_policy_input_sha256 == Some(reference_zone_tab_sha256);
882        let (detected, evidence_sha256) = if detected_subset {
883            (
884                PackratlistDetected::SubsetFromPolicyInput,
885                Some(reference_zone_tab_sha256.to_string()),
886            )
887        } else {
888            (PackratlistDetected::Unknown, None)
889        };
890        PackratlistEvidence {
891            detected,
892            claimed,
893            evidence_sha256,
894        }
895    }
896
897    /// Reconcile detected vs claimed (cf. [`BackzoneEvidence::status`]). A bare claim with no admitted
898    /// policy input is `claimed_*_not_hash_backed` — never promoted to detection.
899    pub fn status(&self) -> &'static str {
900        use PackratlistClaim as C;
901        use PackratlistDetected as D;
902        match (self.detected, &self.claimed) {
903            (D::SubsetFromPolicyInput, C::Subset) => "detected_matches_claim",
904            (D::SubsetFromPolicyInput, C::Full) | (D::SubsetFromPolicyInput, C::None) => {
905                "detected_contradicts_claim"
906            }
907            (D::SubsetFromPolicyInput, C::NotClaimed) => "detected_subset_from_policy_input",
908            (D::Unknown, C::Full) => "claimed_full_not_hash_backed",
909            (D::Unknown, C::Subset) => "claimed_subset_not_hash_backed",
910            (D::Unknown, C::None) => "claimed_none_not_hash_backed",
911            (D::Unknown, C::NotClaimed) => "unknown_no_evidence",
912        }
913    }
914
915    fn detected_str(&self) -> &'static str {
916        match self.detected {
917            PackratlistDetected::SubsetFromPolicyInput => "subset_from_policy_input",
918            PackratlistDetected::Unknown => "unknown",
919        }
920    }
921
922    fn claimed_str(&self) -> &'static str {
923        match self.claimed {
924            PackratlistClaim::Full => "full",
925            PackratlistClaim::Subset => "subset",
926            PackratlistClaim::None => "none",
927            PackratlistClaim::NotClaimed => "not_claimed",
928        }
929    }
930}
931
932// ---------------------------------------------------------------------------------------------
933// DATAFORM (`main`/`vanguard`/`rearguard`) — the *encoding* evidence axis (T12.5d).
934//
935// **Category (the clean mental model):** `backzone` = source-*membership* evidence; `zone.tab` =
936// generation-*policy* evidence; `vanguard.zi`/`main.zi`/`rearguard.zi` = **generated-artifact**
937// evidence; `DATAFORM` = the upstream *encoding-policy* those artifacts realise. Crucially, unlike
938// `PACKRATLIST`'s `zone.tab` (a non-compilable policy table), the three `.zi` artifacts **are
939// compilable `zic` sources** — so DATAFORM detection is *category-correct* from `source_inputs`
940// membership (it mirrors `backzone`, not `packratlist`): if you compiled `vanguard.zi`, its bytes
941// are a `source_input`, and that is the only honest hash-backed signal of the encoding form.
942//
943// **Central law:** DATAFORM is admitted **only** by a hash-backed match against the pinned 2026b
944// generated artifacts, or by an explicit claim — **never** by inspecting source syntax (mainline
945// 2026b already uses negative `SAVE`, so "negative SAVE ⇒ vanguard" is provably wrong), output
946// shape, zone names, filenames, `PACKRATLIST`/`backzone`, or diagnostic behaviour. `ziguard.awk` is
947// **not** treated as a general converter (it targets *current* tzdata, is neither idempotent nor
948// reversible); the `.zi` witnesses are recorded as *generated reference artifacts* with a
949// `recipe_hash`, not as something zic-rs can reproduce or transform.
950
951/// SHA-256 of the pinned 2026b complete-distribution archive (`tzdb-2026b.tar.lz`), admitted +
952/// signature-verified in T12.5a.2. A `recipe_hash` input — it transitively binds every shipped file
953/// (`Makefile`, `ziguard.awk`, the region sources) that the DATAFORM generation consumed.
954pub const REF_2026B_ARCHIVE_SHA256: &str =
955    "ffad46a04c8d1624197056630af475a35f3556d0887f028ac1bd33b7d47dc653";
956
957/// SHA-256 of the pinned 2026b `Makefile` (the `DATAFORM`/`ziguard.awk` generation rules). A
958/// `recipe_hash` input.
959pub const REF_2026B_MAKEFILE_SHA256: &str =
960    "0b4588ea467c969b23fc48335e91eb63f403574b4aac69380b84a00373c7e81d";
961
962/// SHA-256 of the pinned 2026b `ziguard.awk` (the DATAFORM transform). A `recipe_hash` input —
963/// pinned explicitly even though it ships inside the archive, so the recipe binding is legible.
964pub const REF_2026B_ZIGUARD_AWK_SHA256: &str =
965    "e4600a2360b692242d6da76666411ece8ada76b61e6f8fb69cec79592b261785";
966
967/// The exact `make` invocation that generated the pinned DATAFORM `.zi` artifacts. A `recipe_hash`
968/// input — changing the command changes the recipe identity.
969pub const REF_2026B_DATAFORM_COMMAND: &str = "make vanguard.zi main.zi rearguard.zi";
970
971/// The toolchain that ran the DATAFORM generation (recorded because the `.zi` witnesses are
972/// *derived* — a different awk could in principle differ). A `recipe_hash` input.
973pub const REF_2026B_DATAFORM_TOOLCHAIN: &str = "GNU Make 4.4.1; GNU Awk 5.4.0";
974
975/// A short, stable tag for the release the pinned DATAFORM artifacts were generated from. Stamped
976/// into `generated_from` when a form is detected.
977pub const REF_2026B_DATAFORM_GENERATED_FROM: &str = "tzdb-2026b";
978
979/// SHA-256 of the pinned 2026b `main.zi` (the default `DATAFORM=main` generated artifact, T12.5a.2).
980pub const REF_2026B_MAIN_ZI_SHA256: &str =
981    "e0225823ae0c3a99a016a4afd7e3c48cfd948132b65fbaa596a47c53ae45e4e1";
982
983/// SHA-256 of the pinned 2026b `vanguard.zi` (`DATAFORM=vanguard`).
984pub const REF_2026B_VANGUARD_ZI_SHA256: &str =
985    "49e16da4a6252a2e432fc1f68bf6daac9a6f73507dde3e3bdbcbbf78e86727ce";
986
987/// SHA-256 of the pinned 2026b `rearguard.zi` (`DATAFORM=rearguard`).
988pub const REF_2026B_REARGUARD_ZI_SHA256: &str =
989    "91c4f362a6bb297efd3cd35bce6b62367a4c00a9721a773bae0cbb0d1bf9fe23";
990
991/// Compute the **`recipe_hash`** that binds the *generation provenance* of the pinned DATAFORM `.zi`
992/// artifacts (T12.5d). It is a SHA-256 over a deterministic, labelled, newline-joined record of the
993/// recipe inputs — the archive hash, the `Makefile` hash, the `ziguard.awk` hash, the generation
994/// command, and the toolchain — **hashed as raw UTF-8 bytes, never line-ending-normalized** (a
995/// transformed copy with different newline bytes is a *different* artifact and must hash differently).
996/// The produced artifact itself is bound separately via the evidence axis's `evidence_sha256`; this
997/// value answers "by what recipe was that artifact generated", so a generated artifact is never just
998/// "hash matched" — it is "hash matched, and here is the recorded, reproducible recipe".
999pub fn dataform_recipe_hash(
1000    archive_sha256: &str,
1001    makefile_sha256: &str,
1002    ziguard_awk_sha256: &str,
1003    command: &str,
1004    toolchain: &str,
1005) -> String {
1006    let recipe = format!(
1007        "archive_sha256={archive_sha256}\nmakefile_sha256={makefile_sha256}\n\
1008         ziguard_awk_sha256={ziguard_awk_sha256}\ncommand={command}\ntoolchain={toolchain}\n"
1009    );
1010    crate::hash::sha256_hex(recipe.as_bytes())
1011}
1012
1013/// The pinned-release DATAFORM reference, injected into [`DataformEvidence::reconcile`] so the
1014/// detector is unit-testable without vendoring the large `.zi` files. Production builds this from the
1015/// `REF_2026B_*` consts plus the computed [`dataform_recipe_hash`].
1016#[derive(Debug, Clone, Copy)]
1017pub struct DataformReference<'a> {
1018    pub main_sha256: &'a str,
1019    pub vanguard_sha256: &'a str,
1020    pub rearguard_sha256: &'a str,
1021    /// The shared generation recipe hash, stamped into the evidence when a form is detected.
1022    pub recipe_hash: &'a str,
1023    /// The release tag the artifacts were generated from (e.g. `"tzdb-2026b"`).
1024    pub generated_from: &'a str,
1025}
1026
1027/// Which **encoding form** the admitted source bytes match (T12.5d) — hash-backed against the pinned
1028/// generated artifacts, **never** inferred from syntax/output/names.
1029#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1030pub enum DataformDetected {
1031    Main,
1032    Vanguard,
1033    Rearguard,
1034    /// No admitted source matched a pinned DATAFORM artifact hash (e.g. a zishrunk `tzdata.zi`, a
1035    /// concatenated build, or a different release) — the encoding form is not hash-recoverable.
1036    Unknown,
1037}
1038
1039/// What the build/user explicitly asserts about the encoding form (`--dataform
1040/// {main|vanguard|rearguard}`), recorded separately from detection and never promoted to it.
1041#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1042pub enum DataformClaim {
1043    Main,
1044    Vanguard,
1045    Rearguard,
1046    None,
1047}
1048
1049/// The `DATAFORM` **encoding evidence axis** (T12.5d). Mirrors the other axes
1050/// (`detected`/`claimed`/`status`/`evidence_sha256`) and adds two provenance fields specific to a
1051/// *generated* artifact: `recipe_hash` (how it was produced) and `generated_from` (which release).
1052/// **Encoding evidence — hash-backed or claim-only, never inferred** from negative `SAVE` or any
1053/// other syntax resemblance, output shape, zone names, source filenames, `PACKRATLIST`, `backzone`,
1054/// or diagnostics.
1055#[derive(Debug, Clone)]
1056pub struct DataformEvidence {
1057    pub detected: DataformDetected,
1058    pub claimed: DataformClaim,
1059    /// The matched pinned artifact hash, when a form was detected.
1060    pub evidence_sha256: Option<String>,
1061    /// The generation `recipe_hash` of the matched artifact, when detected (see [`dataform_recipe_hash`]).
1062    pub recipe_hash: Option<String>,
1063    /// The release the matched artifact was generated from (e.g. `"tzdb-2026b"`), when detected.
1064    pub generated_from: Option<String>,
1065}
1066
1067impl DataformEvidence {
1068    /// Detect the encoding form by checking whether any `source_input`'s hash equals one of the
1069    /// pinned DATAFORM artifact hashes (`reference`). Category-correct: the `.zi` artifacts are
1070    /// compilable `zic` sources, so `source_inputs` membership is the honest signal (cf. `backzone`).
1071    /// The claim is recorded independently. **Reads only file hashes — never source syntax, output
1072    /// shape, names, or the link/alias surface, so inference is impossible by construction.** When a
1073    /// form is detected, the shared `recipe_hash`/`generated_from` are stamped so the artifact carries
1074    /// its generation provenance, not merely a matched hash.
1075    pub fn reconcile(
1076        source_inputs: &SourceInputs,
1077        claim: Option<&str>,
1078        reference: &DataformReference,
1079    ) -> Self {
1080        let claimed = match claim {
1081            Some("main") => DataformClaim::Main,
1082            Some("vanguard") => DataformClaim::Vanguard,
1083            Some("rearguard") => DataformClaim::Rearguard,
1084            _ => DataformClaim::None,
1085        };
1086        // First admitted source whose hash matches a pinned artifact wins; the three reference hashes
1087        // are distinct, so at most one form can match a given file.
1088        let mut detected = DataformDetected::Unknown;
1089        let mut evidence_sha256 = None;
1090        for f in &source_inputs.files {
1091            if f.sha256 == reference.main_sha256 {
1092                detected = DataformDetected::Main;
1093            } else if f.sha256 == reference.vanguard_sha256 {
1094                detected = DataformDetected::Vanguard;
1095            } else if f.sha256 == reference.rearguard_sha256 {
1096                detected = DataformDetected::Rearguard;
1097            } else {
1098                continue;
1099            }
1100            evidence_sha256 = Some(f.sha256.clone());
1101            break;
1102        }
1103        let (recipe_hash, generated_from) = if evidence_sha256.is_some() {
1104            (
1105                Some(reference.recipe_hash.to_string()),
1106                Some(reference.generated_from.to_string()),
1107            )
1108        } else {
1109            (None, None)
1110        };
1111        DataformEvidence {
1112            detected,
1113            claimed,
1114            evidence_sha256,
1115            recipe_hash,
1116            generated_from,
1117        }
1118    }
1119
1120    /// Reconcile detected vs claimed. A bare claim with no hash-backed detection is `claim_only` —
1121    /// never promoted to detection.
1122    pub fn status(&self) -> &'static str {
1123        use DataformClaim as C;
1124        use DataformDetected as D;
1125        let claim_form = match self.claimed {
1126            C::Main => Some(D::Main),
1127            C::Vanguard => Some(D::Vanguard),
1128            C::Rearguard => Some(D::Rearguard),
1129            C::None => None,
1130        };
1131        match (self.detected, claim_form) {
1132            (D::Unknown, None) => "unknown_no_evidence",
1133            (D::Unknown, Some(_)) => "claim_only",
1134            (_, None) => "detected_only",
1135            (d, Some(c)) if d == c => "detected_matches_claim",
1136            (_, Some(_)) => "detected_contradicts_claim",
1137        }
1138    }
1139
1140    fn detected_str(&self) -> &'static str {
1141        match self.detected {
1142            DataformDetected::Main => "main",
1143            DataformDetected::Vanguard => "vanguard",
1144            DataformDetected::Rearguard => "rearguard",
1145            DataformDetected::Unknown => "unknown",
1146        }
1147    }
1148
1149    fn claimed_str(&self) -> &'static str {
1150        match self.claimed {
1151            DataformClaim::Main => "main",
1152            DataformClaim::Vanguard => "vanguard",
1153            DataformClaim::Rearguard => "rearguard",
1154            DataformClaim::None => "none",
1155        }
1156    }
1157}
1158
1159/// The **source profile** block (T12.4d; extended T12.5b/c/d) — a deliberate extension seam. It
1160/// carries the `backward` (T12.4d), `backzone` (T12.5b), `packratlist` backzone-scope (T12.5c), and
1161/// `dataform` encoding (T12.5d) evidence axes, each detected/claimed/status, hash-backed or
1162/// claim-only, never inferred.
1163#[derive(Debug, Clone)]
1164pub struct SourceProfile {
1165    pub backward: BackwardEvidence,
1166    pub backzone: BackzoneEvidence,
1167    pub packratlist: PackratlistEvidence,
1168    pub dataform: DataformEvidence,
1169}
1170
1171/// Caller-supplied inputs for the source-variant evidence axes (T12.4d `backward`; T12.5b `backzone`;
1172/// T12.5c/d to come). **Provenance-only** — these never influence compilation, link materialisation,
1173/// or the alias map; they only feed the manifest's `source_profile`. The bare claims come from
1174/// `--backward`/`--backzone`; `--backward-source` admits a file whose bytes are hash-checked.
1175#[derive(Debug, Clone, Default)]
1176pub struct SourceVariantArgs {
1177    /// `backward` claim: `Some(true)` = claimed included, `Some(false)` = excluded, `None` = no claim.
1178    pub backward_claim: Option<bool>,
1179    /// A file the caller asserts is the `backward` source; detection verifies whether its *bytes*
1180    /// participated in this build (it does **not** assert semantic identity as the IANA `backward`).
1181    pub backward_source: Option<std::path::PathBuf>,
1182    /// `backzone` (`PACKRATDATA`) claim: `Some(true)` = claimed included, `Some(false)` = excluded,
1183    /// `None` = no claim. Detection is hash-anchored to the pinned reference release (T12.5b); this is
1184    /// the *claim* side only. (`PACKRATLIST` subset selection → T12.5c; `DATAFORM` → T12.5d.)
1185    pub backzone_claim: Option<bool>,
1186    /// `backzone` *scope* (`PACKRATLIST`) claim (T12.5c): `"full"` / `"subset"` / `"none"` (else no
1187    /// claim). The bare `--packratlist` assertion; never promoted to detection.
1188    pub packratlist_claim: Option<String>,
1189    /// A file the caller explicitly admits as the `PACKRATLIST` subset source (T12.5c); detection
1190    /// confirms its *bytes* participated alongside `backzone` (→ `Subset`). Mere `zone.tab` presence
1191    /// among inputs is **not** admission and never triggers `Subset`.
1192    pub packratlist_source: Option<std::path::PathBuf>,
1193    /// `DATAFORM` *encoding* claim (T12.5d): `"main"` / `"vanguard"` / `"rearguard"` (else no claim).
1194    /// The bare `--dataform` assertion; never promoted to detection. Detection is hash-backed against
1195    /// the pinned 2026b `.zi` artifacts via `source_inputs` membership — there is intentionally **no**
1196    /// `--dataform-source`: the `.zi` artifacts *are* compile inputs, so admitting one you did not
1197    /// compile would assert provenance for bytes the build never used.
1198    pub dataform_claim: Option<String>,
1199}
1200
1201// ===========================================================================================
1202// Provenance capability statement (T12.6) — a STATIC, run-independent description of the manifest
1203// schema this build emits and the **source-variant reference-pin gate** state. Surfaced read-only in
1204// `support-report`/`structural-report` so an operator/packager sees the trust boundary without
1205// reading manifest internals. It is deliberately NOT a per-run profile: a report run is not a
1206// configured output compile, so it has no honest `build_profile`/`link_profile`/`backward_evidence`
1207// of its own — those live in `compile --manifest` and are pointed to, never fabricated here.
1208// ===========================================================================================
1209
1210/// Status of the source-variant reference-pin gate (T12.5a.1 created it; T12.5a.2 lifted it).
1211/// `"lifted_for_2026b"` — the pristine IANA tzdb 2026b reference set was fetched, **signature-verified**,
1212/// and SHA-256-pinned (`reports/t12_5a2-reference-admission.md`), so T12.5b–d are unblocked **for that
1213/// pinned reference only**. Version-scoped: a later release re-opens the gate until its own admission.
1214/// Single source of truth for the reports' provenance block. *(Admission ≠ implementation — see
1215/// [`SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED`].)*
1216pub const SOURCE_VARIANT_GATE_STATUS: &str = "lifted_for_2026b";
1217
1218/// **Which oracle backed a report's verdicts** (T15.2 — CONTRACT.TYPING). The single owner type for the
1219/// oracle-mode vocabulary: as of **T15.2a**, [`OracleResult::mode`](OracleResult) is this enum too (no
1220/// claim-bearing path emits the vocabulary as a free string). Reports render [`mode_str`](Self::mode_str)
1221/// (canonical snake_case); the `zic-rs-compile-manifest-v8` `oracle.mode` field renders
1222/// [`manifest_str`](Self::manifest_str), a **boundary-only compatibility shim** that preserves the one
1223/// legacy value (`"not-run"`) the manifest has ever emitted — *removal plan:* canonicalize to `mode_str`
1224/// at the next manifest major bump (a drift test pins that the shim diverges for that one value only).
1225/// The rule it enforces: **oracle *absence* is visible** — a report renders `Unavailable(reason)` (→
1226/// `skipped_with_reason`), never silence, so a verdict can never *silently* weaken when reference tools
1227/// are missing.
1228#[derive(Debug, Clone, PartialEq, Eq)]
1229pub enum OracleMode {
1230    /// No oracle was consulted by design (e.g. `support-report` is compile-coverage, not behaviour).
1231    NotRun,
1232    /// Reference `zic`'s emitted bytes were the oracle (e.g. `structural-report`).
1233    ReferenceZic,
1234    /// Reference `zdump`'s decoded behaviour was the oracle (the `compare` zdump mode).
1235    ReferenceZdump,
1236    /// A decoded-TZif structural comparison (the `compare` structural mode).
1237    StructuralDecode,
1238    /// The required oracle tool was unavailable; the verdict was skipped, with this reason.
1239    Unavailable(String),
1240}
1241
1242impl OracleMode {
1243    /// The stable snake_case discriminant.
1244    pub fn mode_str(&self) -> &'static str {
1245        match self {
1246            OracleMode::NotRun => "not_run",
1247            OracleMode::ReferenceZic => "reference_zic",
1248            OracleMode::ReferenceZdump => "reference_zdump",
1249            OracleMode::StructuralDecode => "structural_decode",
1250            OracleMode::Unavailable(_) => "unavailable",
1251        }
1252    }
1253
1254    /// The reason an oracle was skipped, when (and only when) it was [`Unavailable`](Self::Unavailable).
1255    pub fn skipped_with_reason(&self) -> Option<&str> {
1256        match self {
1257            OracleMode::Unavailable(reason) => Some(reason.as_str()),
1258            _ => None,
1259        }
1260    }
1261
1262    /// The **`zic-rs-compile-manifest-v8` boundary** rendering (T15.2a compatibility shim). The manifest
1263    /// path only ever holds [`NotRun`](Self::NotRun) and has historically emitted `"not-run"`
1264    /// (hyphenated); that one value is preserved here for back-compat. Every other variant has no legacy
1265    /// manifest form (they never appeared there), so this is identical to [`mode_str`](Self::mode_str) for
1266    /// them — i.e. the shim diverges for exactly one value, which a drift test pins. Removal plan:
1267    /// canonicalize to `mode_str` at the next manifest major bump.
1268    pub fn manifest_str(&self) -> &'static str {
1269        match self {
1270            OracleMode::NotRun => "not-run",
1271            other => other.mode_str(),
1272        }
1273    }
1274
1275    /// Render as the report's `oracle_mode` object: `{ "mode": …, "skipped_with_reason": …|null }`.
1276    /// Absence is always visible — `skipped_with_reason` is non-null exactly when the oracle was missing.
1277    pub fn to_json_field(&self) -> String {
1278        let reason = match self.skipped_with_reason() {
1279            Some(r) => json_str(r),
1280            None => "null".to_string(),
1281        };
1282        format!(
1283            "{{ \"mode\": {}, \"skipped_with_reason\": {} }}",
1284            json_str(self.mode_str()),
1285            reason
1286        )
1287    }
1288}
1289
1290/// A **non-claim**, made a first-class machine-visible contract (T15.2). Advertised restraint is
1291/// engineering, not decoration: each variant renders a stable snake_case string **and** names the
1292/// guard/test/receipt that *enforces* the boundary (`enforced_by`). "We don't claim X" is exactly where
1293/// infrastructure tools get sloppy — this makes each non-claim auditable.
1294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1295pub enum NegativeCapability {
1296    DoesNotClaimAllIanaReleasesWithoutAdmission,
1297    DoesNotClaimArbitraryTzifRoundtrip,
1298    DoesNotClaimFullToctouResistance,
1299    DoesNotClaimFutureCivilTimeAuthority,
1300    /// zic-rs emits **discrete** TZif leap-second records; it never implements leap *smearing*.
1301    DoesNotClaimLeapSmearSemantics,
1302    /// The interaction of range truncation (`-r`) with leap-expiry has **no semantic witness** and is
1303    /// not claimed (the `Rolling`-leap-under-`-r` case is a hard error, not a parity claim).
1304    DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness,
1305    DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext,
1306    DoesNotClaimTzifValidatorAsSecuritySandbox,
1307    DoesNotClaimUnadmittedVendorParity,
1308    DoesNotCurateTimeOrDefineDisplayNames,
1309    /// TZif is big-endian on disk; zic-rs writes it big-endian regardless of host endianness.
1310    DoesNotDependOnHostEndianness,
1311    DoesNotInferDataformFromContent,
1312    DoesNotInferSourceVariantFromOutputShape,
1313    DoesNotRequireManifestToReadTzif,
1314    /// The core repo admits vendor-oracle *receipts*; it does not run/ship QEMU/VM labs (T16.5).
1315    DoesNotShipOrOperateVendorQemuLabsInCoreRepo,
1316    DoesNotTreatManifestAsTzifSemantics,
1317}
1318
1319impl NegativeCapability {
1320    /// The stable snake_case identifier (the report's `capability` field).
1321    pub fn as_str(self) -> &'static str {
1322        use NegativeCapability::*;
1323        match self {
1324            DoesNotClaimAllIanaReleasesWithoutAdmission => {
1325                "does_not_claim_all_iana_releases_without_admission"
1326            }
1327            DoesNotClaimArbitraryTzifRoundtrip => "does_not_claim_arbitrary_tzif_roundtrip",
1328            DoesNotClaimFullToctouResistance => "does_not_claim_full_toctou_resistance",
1329            DoesNotClaimFutureCivilTimeAuthority => "does_not_claim_future_civil_time_authority",
1330            DoesNotClaimLeapSmearSemantics => "does_not_claim_leap_smear_semantics",
1331            DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness => {
1332                "does_not_claim_range_truncation_leap_expiry_interaction_parity_without_witness"
1333            }
1334            DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext => {
1335                "does_not_claim_report_authenticity_without_signature_or_reproducible_context"
1336            }
1337            DoesNotClaimTzifValidatorAsSecuritySandbox => {
1338                "does_not_claim_tzif_validator_as_security_sandbox"
1339            }
1340            DoesNotClaimUnadmittedVendorParity => "does_not_claim_unadmitted_vendor_parity",
1341            DoesNotCurateTimeOrDefineDisplayNames => "does_not_curate_time_or_define_display_names",
1342            DoesNotDependOnHostEndianness => "does_not_depend_on_host_endianness",
1343            DoesNotInferDataformFromContent => "does_not_infer_dataform_from_content",
1344            DoesNotInferSourceVariantFromOutputShape => {
1345                "does_not_infer_source_variant_from_output_shape"
1346            }
1347            DoesNotRequireManifestToReadTzif => "does_not_require_manifest_to_read_tzif",
1348            DoesNotShipOrOperateVendorQemuLabsInCoreRepo => {
1349                "does_not_ship_or_operate_vendor_qemu_labs_in_core_repo"
1350            }
1351            DoesNotTreatManifestAsTzifSemantics => "does_not_treat_manifest_as_tzif_semantics",
1352        }
1353    }
1354
1355    /// The guard/test/receipt that **enforces** this non-claim (never empty — a non-claim without an
1356    /// enforcing reference would be decorative, which T15.2 forbids).
1357    pub fn enforced_by(self) -> &'static str {
1358        use NegativeCapability::*;
1359        match self {
1360            DoesNotClaimAllIanaReleasesWithoutAdmission => {
1361                "T12.5a.3 release-admission matrix (only 2026b admitted)"
1362            }
1363            DoesNotClaimArbitraryTzifRoundtrip => {
1364                "T15.4 tzif/rfc9636 (a validator/reader is not a round-trip preservation claim)"
1365            }
1366            DoesNotClaimFullToctouResistance => {
1367                "T14.6 hostile-output-tree ledger (RequiresOpenatStyleHardening)"
1368            }
1369            DoesNotClaimFutureCivilTimeAuthority => {
1370                "docs/tzdb-governance.md + RFC 9557 (tzdb predicts; named-tz rules change; not a legal oracle)"
1371            }
1372            DoesNotClaimLeapSmearSemantics => {
1373                "T11 emits discrete TZif leap-second records (compile::apply_leaps / LeapRecord); no smearing path exists"
1374            }
1375            DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness => {
1376                "T11.4 — Rolling-leap-under-`-r` is a hard error (compile/leap.rs); the -r×leap-expiry interaction has no semantic witness"
1377            }
1378            DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext => {
1379                "T15.5 ConformanceStatus.report_provenance (default unsigned_local_report — not an attestation)"
1380            }
1381            DoesNotClaimTzifValidatorAsSecuritySandbox => {
1382                "T15.4 tzif/rfc9636 non-claim (bounds-safe, but not a hardened sandbox for hostile binaries)"
1383            }
1384            DoesNotClaimUnadmittedVendorParity => {
1385                "T13 reference-platform diagnostic matrix (only upstream_iana_2026b admitted)"
1386            }
1387            DoesNotCurateTimeOrDefineDisplayNames => {
1388                "docs/tzdb-governance.md (IANA/CLDR boundary; not zic-rs's role)"
1389            }
1390            DoesNotDependOnHostEndianness => {
1391                "tzif/header.rs + data writers emit big-endian fixed-width fields (to_be_bytes); byte-identical Etc/UTC fixture pins it"
1392            }
1393            DoesNotInferDataformFromContent => {
1394                "T12.5d test (negative-SAVE is not vanguard; hash-backed only)"
1395            }
1396            DoesNotInferSourceVariantFromOutputShape => {
1397                "T12.5 source_variants_not_inferred_* tests"
1398            }
1399            DoesNotRequireManifestToReadTzif => {
1400                "RFC 9636 (a TZif reader needs only the emitted bytes; manifest is a sidecar)"
1401            }
1402            DoesNotShipOrOperateVendorQemuLabsInCoreRepo => {
1403                "T16.5 vendor_oracle — core defines/admits receipts only; no VM images/QEMU orchestration vendored"
1404            }
1405            DoesNotTreatManifestAsTzifSemantics => {
1406                "reports/t12-close-receipt.md §5 (manifest is provenance, not TZif semantics)"
1407            }
1408        }
1409    }
1410}
1411
1412/// The **evidence category** of an artifact — the T12 doctrine spine, made a typed report field (T15.3).
1413/// This is the typed guardrail the `zone.tab`-is-policy-not-compile error (T12.5c) earned: a claim-bearing
1414/// artifact must declare which category it belongs to, so input/policy/reference/generated/output kinds
1415/// can never be silently conflated. **The rule: no claim-bearing artifact enters a report without a
1416/// category owner.** (`semantic_witness` and `structural_validation` are distinct *output-evidence*
1417/// categories — a semantic witness proves selected behaviour under an oracle, NOT RFC 9636 structural
1418/// validity, which is `structural_validation` / T15.4.)
1419#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1420pub enum ArtifactCategory {
1421    CompileInput,
1422    PolicyInput,
1423    ReferenceInput,
1424    GeneratedArtifact,
1425    OutputArtifact,
1426    DiagnosticArtifact,
1427    SemanticWitnessArtifact,
1428    StructuralValidationArtifact,
1429    /// Non-compiling prose that is *policy* evidence (e.g. `theory.html` / Makefile knobs / NEWS).
1430    PolicyProse,
1431    /// Release-note evidence (e.g. tzdb NEWS entries) consulted for release-delta review.
1432    ReleaseNoteEvidence,
1433}
1434
1435impl ArtifactCategory {
1436    /// The stable snake_case identifier rendered in reports.
1437    pub fn as_str(self) -> &'static str {
1438        use ArtifactCategory::*;
1439        match self {
1440            CompileInput => "compile_input",
1441            PolicyInput => "policy_input",
1442            ReferenceInput => "reference_input",
1443            GeneratedArtifact => "generated_artifact",
1444            OutputArtifact => "output_artifact",
1445            DiagnosticArtifact => "diagnostic_artifact",
1446            SemanticWitnessArtifact => "semantic_witness_artifact",
1447            StructuralValidationArtifact => "structural_validation_artifact",
1448            PolicyProse => "policy_prose",
1449            ReleaseNoteEvidence => "release_note_evidence",
1450        }
1451    }
1452}
1453
1454/// The **report kind** — so a reader never confuses a compile-coverage `support-report` with a
1455/// structural validation or a behaviour witness (each proves a different claim). (T15.5)
1456#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1457pub enum ReportKind {
1458    Support,
1459    Structural,
1460    Manifest,
1461    SemanticWitness,
1462    TzifValidation,
1463}
1464
1465impl ReportKind {
1466    pub fn as_str(self) -> &'static str {
1467        match self {
1468            ReportKind::Support => "support",
1469            ReportKind::Structural => "structural",
1470            ReportKind::Manifest => "manifest",
1471            ReportKind::SemanticWitness => "semantic_witness",
1472            ReportKind::TzifValidation => "tzif_validation",
1473        }
1474    }
1475}
1476
1477/// A **bounded** conformance level (T15.5). It reflects *scope, not ambition* — there is deliberately no
1478/// `compatible` / `conformant: true`. A standalone `support-report` establishes compile-coverage over an
1479/// admitted release; the behaviour / structural / diagnostic axes are *separate surfaces* (this points to
1480/// them, it does not roll their results into a single global verdict).
1481#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1482pub enum ConformanceLevel {
1483    NotEvaluated,
1484    /// What `support-report` alone establishes: the admitted release's zones compile; **no oracle ran here**.
1485    ReleaseAdmittedCompileCoverage,
1486    StructurallyValidatedOnly,
1487    SemanticWitnessedOnly,
1488    KnownDivergencePresent,
1489    OracleUnavailable,
1490}
1491
1492impl ConformanceLevel {
1493    pub fn as_str(self) -> &'static str {
1494        match self {
1495            ConformanceLevel::NotEvaluated => "not_evaluated",
1496            ConformanceLevel::ReleaseAdmittedCompileCoverage => "release_admitted_compile_coverage",
1497            ConformanceLevel::StructurallyValidatedOnly => "structurally_validated_only",
1498            ConformanceLevel::SemanticWitnessedOnly => "semantic_witnessed_only",
1499            ConformanceLevel::KnownDivergencePresent => "known_divergence_present",
1500            ConformanceLevel::OracleUnavailable => "oracle_unavailable",
1501        }
1502    }
1503}
1504
1505/// Whether the workspace that produced the report was clean (T15.5). Honest by default: without a git
1506/// tree (this project ships from an archive, not a checked-out repo, and has **no `build.rs`** to capture
1507/// VCS state) this is `Unknown` — never fabricated as clean.
1508#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1509pub enum WorkspaceProvenance {
1510    CleanGitTree,
1511    DirtyGitTree,
1512    SourceArchive,
1513    Unknown,
1514}
1515
1516/// The authenticity status of the report artifact itself (T15.5 — *a public report is a claim surface,
1517/// not an unexamined trust root*). Default is an unsigned local report: useful, but not an attestation.
1518#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1519pub enum ReportProvenance {
1520    UnsignedLocalReport,
1521    ReproducibleCiArtifact,
1522    SignedReleaseArtifact,
1523}
1524
1525/// Which tool build produced the output (T15.5). `rustc`/`git_commit`/full target-triple are **honestly
1526/// `unknown`** here because the project deliberately has no `build.rs` to capture them — disclosed, not
1527/// faked. `zic_rs_version` is the crate version; `target` is an `arch-os` approximation; `profile` is
1528/// debug/release.
1529#[derive(Debug, Clone)]
1530pub struct CompilerIdentity {
1531    pub zic_rs_version: &'static str,
1532    pub rustc: Option<&'static str>,
1533    pub target: String,
1534    pub profile: &'static str,
1535    pub git_commit: Option<&'static str>,
1536}
1537
1538impl CompilerIdentity {
1539    pub fn capture() -> Self {
1540        CompilerIdentity {
1541            zic_rs_version: env!("CARGO_PKG_VERSION"),
1542            // No `build.rs` → these are not captured at build time; honestly `None`, never invented.
1543            rustc: option_env!("ZIC_RS_RUSTC_VERSION"),
1544            target: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
1545            profile: if cfg!(debug_assertions) {
1546                "debug"
1547            } else {
1548                "release"
1549            },
1550            git_commit: option_env!("ZIC_RS_GIT_COMMIT"),
1551        }
1552    }
1553}
1554
1555/// The release-admission pin gate as a **type** (T15.5-remainder) rather than the bare
1556/// `SOURCE_VARIANT_GATE_STATUS` string. It renders the *same* literal at the JSON boundary (so no schema
1557/// churn), but the vocabulary is now exhaustive and totality-tested — a drift test pins
1558/// `current().as_str() == SOURCE_VARIANT_GATE_STATUS`. `Open` = no release admitted; `LiftedFor2026b` =
1559/// the single 2026b release is admitted (signature-verified + hash-pinned, per T12.5a.2).
1560#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1561pub enum ReferencePinGate {
1562    Open,
1563    LiftedFor2026b,
1564}
1565
1566impl ReferencePinGate {
1567    pub fn as_str(self) -> &'static str {
1568        match self {
1569            ReferencePinGate::Open => "open",
1570            ReferencePinGate::LiftedFor2026b => "lifted_for_2026b",
1571        }
1572    }
1573    /// The gate state as currently shipped — single-sourced against `SOURCE_VARIANT_GATE_STATUS`.
1574    pub fn current() -> Self {
1575        ReferencePinGate::LiftedFor2026b
1576    }
1577}
1578
1579/// **Where an admitted reference came from** (T16.3) — the "*which* reference?" question's *location*
1580/// half. The central rule: **only a `VersionedArchive` (a release tarball you can re-fetch and re-pin)
1581/// can back a *sealed* release claim**; the others support exploration/diagnosis but not a sealed claim.
1582#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1583pub enum ReferenceLocatorKind {
1584    /// A pinned, re-fetchable release archive (e.g. the T12.5a.2 `tzdb-2026b.tar.lz`). Sealed-claim grade.
1585    VersionedArchive,
1586    /// Whatever `zic`/`zdump` is on `PATH` right now — moves under your feet; exploration only.
1587    LiveCurrentDirectory,
1588    /// A local cached copy of bytes (integrity depends on how it was pinned).
1589    LocalCachedCopy,
1590    /// A distribution's source package (a patch-stack over upstream; distinct provenance).
1591    DistroSourcePackage,
1592    /// Provenance not established.
1593    Unknown,
1594}
1595
1596impl ReferenceLocatorKind {
1597    pub fn as_str(self) -> &'static str {
1598        match self {
1599            ReferenceLocatorKind::VersionedArchive => "versioned_archive",
1600            ReferenceLocatorKind::LiveCurrentDirectory => "live_current_directory",
1601            ReferenceLocatorKind::LocalCachedCopy => "local_cached_copy",
1602            ReferenceLocatorKind::DistroSourcePackage => "distro_source_package",
1603            ReferenceLocatorKind::Unknown => "unknown",
1604        }
1605    }
1606}
1607
1608/// **How an admitted reference is trusted** (T16.3) — the "*which* reference?" question's *trust* half,
1609/// kept precise so a reader knows *what kind* of trust they are getting. Crucially `HashOnly` proves
1610/// **integrity** (the bytes are what we pinned) but **not authenticity** (who produced them); it is never
1611/// rendered as "signature verified". `FingerprintAnchored` (the T12.5a.2 model — an OpenPGP signature
1612/// verified against a published key *fingerprint*) is **not** the weaker `WebOfTrustValidated`.
1613#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1614pub enum SignatureTrustModel {
1615    /// OpenPGP signature verified against a published key fingerprint (authenticity + integrity).
1616    FingerprintAnchored,
1617    /// Trust via a web-of-trust path (weaker than fingerprint-anchored; not claimed unless real).
1618    WebOfTrustValidated,
1619    /// Trust via an OS/platform keyring.
1620    PlatformKeyring,
1621    /// A content hash only — **integrity, not authenticity**; never "signature verified".
1622    HashOnly,
1623    /// Explicitly unsigned.
1624    Unsigned,
1625    /// Trust model not established.
1626    Unknown,
1627}
1628
1629impl SignatureTrustModel {
1630    pub fn as_str(self) -> &'static str {
1631        match self {
1632            SignatureTrustModel::FingerprintAnchored => "fingerprint_anchored",
1633            SignatureTrustModel::WebOfTrustValidated => "web_of_trust_validated",
1634            SignatureTrustModel::PlatformKeyring => "platform_keyring",
1635            SignatureTrustModel::HashOnly => "hash_only",
1636            SignatureTrustModel::Unsigned => "unsigned",
1637            SignatureTrustModel::Unknown => "unknown",
1638        }
1639    }
1640    /// Whether this trust model **pins integrity** (the bytes are what we expect). `HashOnly` qualifies
1641    /// (integrity without authenticity); `Unsigned`/`Unknown` do not. Authenticity is a *separate* axis —
1642    /// see `FingerprintAnchored`.
1643    pub fn pins_integrity(self) -> bool {
1644        matches!(
1645            self,
1646            SignatureTrustModel::FingerprintAnchored
1647                | SignatureTrustModel::WebOfTrustValidated
1648                | SignatureTrustModel::PlatformKeyring
1649                | SignatureTrustModel::HashOnly
1650        )
1651    }
1652}
1653
1654/// A reference's admission evidence (T16.3): *where it came from* × *how it is trusted*. The sealed-claim
1655/// rule is enforced here, not in prose: a claim may be *sealed* (re-verifiable, release-grade) **only** if
1656/// the locator is a `VersionedArchive` **and** the trust model pins integrity.
1657#[derive(Debug, Clone, Copy)]
1658pub struct ReferenceAdmission {
1659    pub locator: ReferenceLocatorKind,
1660    pub trust: SignatureTrustModel,
1661}
1662
1663impl ReferenceAdmission {
1664    /// Only a versioned archive with integrity-pinned trust can back a sealed release claim. A live
1665    /// PATH binary, a distro package, or any unsigned/unknown-trust material is exploration-grade only.
1666    pub fn supports_sealed_claim(&self) -> bool {
1667        matches!(self.locator, ReferenceLocatorKind::VersionedArchive)
1668            && self.trust.pins_integrity()
1669    }
1670    pub fn to_json(&self) -> String {
1671        format!(
1672            "{{ \"locator\": {}, \"signature_trust\": {}, \"supports_sealed_claim\": {} }}",
1673            json_str(self.locator.as_str()),
1674            json_str(self.trust.as_str()),
1675            self.supports_sealed_claim()
1676        )
1677    }
1678}
1679
1680/// The T12.5a.2 admitted 2026b reference: a **versioned archive** (`tzdb-2026b.tar.lz`), OpenPGP signature
1681/// verified against the published tz key **fingerprint** + SHA-256 hash-pinned. The one reference today
1682/// that backs a *sealed* claim. (Distinct from the *live* PATH `zic` a report's oracle runs against.)
1683pub const ADMITTED_2026B_REFERENCE: ReferenceAdmission = ReferenceAdmission {
1684    locator: ReferenceLocatorKind::VersionedArchive,
1685    trust: SignatureTrustModel::FingerprintAnchored,
1686};
1687
1688/// The dimension a claim is portable **along** (T15.5-remainder) — i.e. what it stays true *under*. A
1689/// claim is never "globally true": it is true *for* a declared release / oracle / platform / profile /
1690/// fixture set, or it is a general project policy. This makes "true where?" a typed field, not prose.
1691#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1692pub enum ClaimPortability {
1693    ReleaseSpecific,
1694    OracleSpecific,
1695    PlatformSpecific,
1696    ProfileSpecific,
1697    FixtureSpecific,
1698    GeneralProjectPolicy,
1699}
1700
1701impl ClaimPortability {
1702    pub fn as_str(self) -> &'static str {
1703        match self {
1704            ClaimPortability::ReleaseSpecific => "release_specific",
1705            ClaimPortability::OracleSpecific => "oracle_specific",
1706            ClaimPortability::PlatformSpecific => "platform_specific",
1707            ClaimPortability::ProfileSpecific => "profile_specific",
1708            ClaimPortability::FixtureSpecific => "fixture_specific",
1709            ClaimPortability::GeneralProjectPolicy => "general_project_policy",
1710        }
1711    }
1712}
1713
1714/// The **kind of authority** a claim's evidence carries (T15.5-remainder) — orthogonal to whether the
1715/// claim is true; it says *what backs it*, so a reviewer can tell a normative-spec citation from an
1716/// implementation observation from project doctrine. (`NormativeSpec` = RFC 9636 TZif format;
1717/// `PolicyGuidance` = BCP 175 / tzdb *process*, not format — the two never blur.)
1718#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1719pub enum EvidenceAuthorityKind {
1720    NormativeSpec,
1721    ImplementationObservation,
1722    ManpageDocumentation,
1723    PolicyGuidance,
1724    ReleaseNote,
1725    EmpiricalFixture,
1726    ProjectDoctrine,
1727}
1728
1729impl EvidenceAuthorityKind {
1730    pub fn as_str(self) -> &'static str {
1731        match self {
1732            EvidenceAuthorityKind::NormativeSpec => "normative_spec",
1733            EvidenceAuthorityKind::ImplementationObservation => "implementation_observation",
1734            EvidenceAuthorityKind::ManpageDocumentation => "manpage_documentation",
1735            EvidenceAuthorityKind::PolicyGuidance => "policy_guidance",
1736            EvidenceAuthorityKind::ReleaseNote => "release_note",
1737            EvidenceAuthorityKind::EmpiricalFixture => "empirical_fixture",
1738            EvidenceAuthorityKind::ProjectDoctrine => "project_doctrine",
1739        }
1740    }
1741}
1742
1743/// What a report's claim **proves**, does **not** prove, and **depends on** (T15.5-remainder) — the
1744/// compact, machine-readable form of the non-claim doctrine, attached to the rollup. Static for a given
1745/// report kind (the boundary is fixed by what the surface actually measures).
1746#[derive(Debug, Clone, Copy)]
1747pub struct ClaimBoundary {
1748    pub proves: &'static str,
1749    pub does_not_prove: &'static str,
1750    pub depends_on: &'static str,
1751}
1752
1753/// The distinct senses of "valid" the conformance engine keeps **impossible to blur** (T15.5-remainder /
1754/// T15.close). Emitted as a report field so "valid" can never be read as a single global verdict: each
1755/// entry is `<sense>: <what it is> — NOT <what it is not>`. The live behaviour claim (CORE.1) is the last
1756/// entry and is deliberately separate from structural / reader / release-admission validity.
1757pub const VALID_DISAMBIGUATION: &[&str] = &[
1758    "structurally_valid: RFC 9636 byte-format integrity (tzif-validate) — NOT behaviour or semantics",
1759    "semantically_witness_matching: offset/is_dst/abbr match zdump for the declared witness set — NOT all instants",
1760    "modern_reader_compatible: no v4/legacy reader hazards — NOT semantic correctness",
1761    "future_projection_matching: the POSIX footer projects like reference — separate from footer parseability",
1762    "release_admitted: the source release is signature-verified + hash-pinned — NOT all IANA releases",
1763    "compile_covered: the admitted release's zones compile — NOT behaviour-matched",
1764    "behaviour_matched: CORE.1 341/341 vs reference zic/zdump over 1900..2040 — the live claim, separate from all above",
1765];
1766
1767/// The one-line, **machine-readable** conformance rollup (T15.5) — the claim *envelope*, so a reviewer
1768/// gets the scope in one scan without reconstructing the whole ladder. It is a pointer-rich summary, not
1769/// a global pass/fail: it names the admitted release, the bounded level for *this* report kind, the
1770/// available proof surfaces, the report's own provenance, and a `declared_scope_hash` that changes
1771/// whenever any scope element changes. T15.5-remainder added the typed claim-shape axes
1772/// (`reference_pin_gate` · `claim_portability` · `evidence_authority` · `claim_boundary`) and the
1773/// `valid_disambiguation`, so the *shape* of the claim is as machine-readable as its result.
1774#[derive(Debug, Clone)]
1775pub struct ConformanceStatus {
1776    pub report_kind: ReportKind,
1777    pub level: ConformanceLevel,
1778    pub workspace: WorkspaceProvenance,
1779    pub report_provenance: ReportProvenance,
1780    pub compiler: CompilerIdentity,
1781    pub reference_pin_gate: ReferencePinGate,
1782    pub claim_portability: ClaimPortability,
1783    pub evidence_authority: EvidenceAuthorityKind,
1784    pub claim_boundary: ClaimBoundary,
1785}
1786
1787impl ConformanceStatus {
1788    /// The rollup for a `support-report` invocation (compile-coverage over the admitted release).
1789    pub fn support() -> Self {
1790        ConformanceStatus {
1791            report_kind: ReportKind::Support,
1792            level: ConformanceLevel::ReleaseAdmittedCompileCoverage,
1793            // No git tree / no build.rs here → honest Unknown.
1794            workspace: WorkspaceProvenance::Unknown,
1795            report_provenance: ReportProvenance::UnsignedLocalReport,
1796            compiler: CompilerIdentity::capture(),
1797            reference_pin_gate: ReferencePinGate::current(),
1798            // support-report's claim is about the admitted *release*; it is an observation of zic-rs's own
1799            // compile, not a normative-spec or oracle claim.
1800            claim_portability: ClaimPortability::ReleaseSpecific,
1801            evidence_authority: EvidenceAuthorityKind::ImplementationObservation,
1802            claim_boundary: ClaimBoundary {
1803                proves: "the admitted release's zones compile (compile-coverage), each accounted in exactly one bucket",
1804                does_not_prove: "behaviour / structural / reader-compatibility parity — those are separate surfaces (semantic-report · structural-report · tzif-validate)",
1805                depends_on: "the signature-verified + hash-pinned 2026b reference set and this zic-rs build",
1806            },
1807        }
1808    }
1809
1810    /// `declared_scope_hash` — a SHA-256 over the **claim envelope**: admitted-release gate · manifest +
1811    /// report schema versions · the sorted negative-capability ids · the CORE.1 claim string. If any
1812    /// scope element changes, the hash changes — a compact identifier reviewers can pin a claim to.
1813    pub fn declared_scope_hash(&self) -> String {
1814        let mut envelope = String::new();
1815        envelope.push_str(SOURCE_VARIANT_GATE_STATUS);
1816        envelope.push('|');
1817        envelope.push_str(COMPILE_SCHEMA);
1818        envelope.push_str("|zic-rs-support-report-v4|zic-rs-structural-report-v3");
1819        envelope.push_str("|zic-rs-semantic-report-v1|zic-rs-tzif-validation-v1|");
1820        for nc in NEGATIVE_CAPABILITIES {
1821            envelope.push_str(nc.as_str());
1822            envelope.push(',');
1823        }
1824        envelope.push_str("|CORE.1=341/341@1900..2040;0mismatch;0failclosed");
1825        crate::hash::sha256_hex(envelope.as_bytes())
1826    }
1827
1828    /// Render the `conformance_status` block (a comma-terminated object for insertion into a report).
1829    pub fn to_json_block(&self) -> String {
1830        let opt = |o: Option<&str>| match o {
1831            Some(v) => json_str(v),
1832            None => "null".to_string(),
1833        };
1834        // The valid-disambiguation array, rendered from the static const so the senses stay single-sourced.
1835        let mut valid_disambig = String::from("[");
1836        for (i, sense) in VALID_DISAMBIGUATION.iter().enumerate() {
1837            if i > 0 {
1838                valid_disambig.push_str(", ");
1839            }
1840            valid_disambig.push_str(&json_str(sense));
1841        }
1842        valid_disambig.push(']');
1843        format!(
1844            "  \"conformance_status\": {{\n\
1845             \"report_kind\": {}, \"conformance_level\": {}, \"declared_scope_hash\": {}, \
1846             \"admitted_release_gate\": {}, \"workspace_provenance\": {}, \"report_provenance\": {}, \
1847             \"claim_portability\": {}, \"evidence_authority\": {}, \
1848             \"claim_boundary\": {{ \"proves\": {}, \"does_not_prove\": {}, \"depends_on\": {} }}, \
1849             \"valid_disambiguation\": {}, \
1850             \"core1_claim\": {}, \
1851             \"available_surfaces\": [\"support-report\", \"structural-report\", \"semantic-report\", \
1852             \"tzif-validation\", \"compile-manifest\"], \
1853             \"compiler_identity\": {{ \"zic_rs_version\": {}, \"rustc\": {}, \"target\": {}, \
1854             \"profile\": {}, \"git_commit\": {} }} }},\n",
1855            json_str(self.report_kind.as_str()),
1856            json_str(self.level.as_str()),
1857            json_str(&self.declared_scope_hash()),
1858            json_str(self.reference_pin_gate.as_str()),
1859            json_str(match self.workspace {
1860                WorkspaceProvenance::CleanGitTree => "clean_git_tree",
1861                WorkspaceProvenance::DirtyGitTree => "dirty_git_tree",
1862                WorkspaceProvenance::SourceArchive => "source_archive",
1863                WorkspaceProvenance::Unknown => "unknown",
1864            }),
1865            json_str(match self.report_provenance {
1866                ReportProvenance::UnsignedLocalReport => "unsigned_local_report",
1867                ReportProvenance::ReproducibleCiArtifact => "reproducible_ci_artifact",
1868                ReportProvenance::SignedReleaseArtifact => "signed_release_artifact",
1869            }),
1870            json_str(self.claim_portability.as_str()),
1871            json_str(self.evidence_authority.as_str()),
1872            json_str(self.claim_boundary.proves),
1873            json_str(self.claim_boundary.does_not_prove),
1874            json_str(self.claim_boundary.depends_on),
1875            valid_disambig,
1876            json_str(
1877                "341/341 canonical zones behaviour-match reference zic/zdump over 1900..2040 \
1878                 (0 mismatch, 0 fail-closed)"
1879            ),
1880            json_str(self.compiler.zic_rs_version),
1881            opt(self.compiler.rustc),
1882            json_str(&self.compiler.target),
1883            json_str(self.compiler.profile),
1884            opt(self.compiler.git_commit),
1885        )
1886    }
1887}
1888
1889/// The canonical, **sorted-by-`as_str()`** non-claims list surfaced in every report's provenance block.
1890/// Sorted so the emitted JSON array is deterministic; the order is asserted by a test.
1891pub const NEGATIVE_CAPABILITIES: &[NegativeCapability] = &[
1892    NegativeCapability::DoesNotClaimAllIanaReleasesWithoutAdmission,
1893    NegativeCapability::DoesNotClaimArbitraryTzifRoundtrip,
1894    NegativeCapability::DoesNotClaimFullToctouResistance,
1895    NegativeCapability::DoesNotClaimFutureCivilTimeAuthority,
1896    NegativeCapability::DoesNotClaimLeapSmearSemantics,
1897    NegativeCapability::DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness,
1898    NegativeCapability::DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext,
1899    NegativeCapability::DoesNotClaimTzifValidatorAsSecuritySandbox,
1900    NegativeCapability::DoesNotClaimUnadmittedVendorParity,
1901    NegativeCapability::DoesNotCurateTimeOrDefineDisplayNames,
1902    NegativeCapability::DoesNotDependOnHostEndianness,
1903    NegativeCapability::DoesNotInferDataformFromContent,
1904    NegativeCapability::DoesNotInferSourceVariantFromOutputShape,
1905    NegativeCapability::DoesNotRequireManifestToReadTzif,
1906    NegativeCapability::DoesNotShipOrOperateVendorQemuLabsInCoreRepo,
1907    NegativeCapability::DoesNotTreatManifestAsTzifSemantics,
1908];
1909
1910/// Whether any backzone/PACKRATLIST/DATAFORM/rearguard/vanguard *behaviour* is implemented. Still
1911/// **false** — the gate lift (T12.5a.2) only *admitted the reference*; implementation begins at T12.5b.
1912pub const SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED: bool = false;
1913
1914/// Substeps still blocked by the gate. Empty since T12.5a.2 lifted it for 2026b — T12.5b–d are
1915/// unblocked (but not yet implemented; see [`SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED`]).
1916pub const SOURCE_VARIANT_BLOCKED_SUBSTEPS: &[&str] = &[];
1917
1918/// Required upstream tzdb reference files still **unpinned**. Empty since T12.5a.2 admitted +
1919/// SHA-256-pinned the full 2026b set (hashes in `reports/t12_5a2-reference-admission.md`).
1920pub const SOURCE_VARIANT_UNPINNED_FILES: &[&str] = &[];
1921
1922/// The provenance/capability statement as a deterministic JSON object block (key `"provenance"`),
1923/// 2-space-indented and **comma-terminated** for insertion right after a report's `"schema"` line.
1924/// Shared by both reports so the trust state is identical and single-sourced.
1925pub fn provenance_block_json() -> String {
1926    let arr = |items: &[&str]| -> String {
1927        let inner: Vec<String> = items.iter().map(|i| json_str(i)).collect();
1928        format!("[{}]", inner.join(", "))
1929    };
1930    let mut s = String::new();
1931    s.push_str("  \"provenance\": {\n");
1932    s.push_str(&format!(
1933        "    \"manifest_schema\": {},\n",
1934        json_str(COMPILE_SCHEMA)
1935    ));
1936    s.push_str(
1937        "    \"per_run_profile\": \"see `compile --manifest`: build_profile / source_inputs / \
1938         link_profile / source_profile.backward_evidence\",\n",
1939    );
1940    s.push_str(&format!(
1941        "    \"source_variant_reference_pin_gate\": {},\n",
1942        json_str(SOURCE_VARIANT_GATE_STATUS)
1943    ));
1944    s.push_str(&format!(
1945        "    \"blocked_substeps\": {},\n",
1946        arr(SOURCE_VARIANT_BLOCKED_SUBSTEPS)
1947    ));
1948    s.push_str(&format!(
1949        "    \"unpinned_required_files\": {},\n",
1950        arr(SOURCE_VARIANT_UNPINNED_FILES)
1951    ));
1952    s.push_str(&format!(
1953        "    \"source_variant_behavior_implemented\": {},\n",
1954        SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED
1955    ));
1956    s.push_str(
1957        "    \"note\": \"tzdb 2026b reference set admitted + signature-verified + SHA-256-pinned \
1958         (reports/t12_5a2-reference-admission.md); T12.5b–d source-variant **evidence axes** are \
1959         implemented for that pinned reference, while source-variant **behaviour** remains not \
1960         implemented or claimed. No backzone/PACKRATLIST/DATAFORM/rearguard/vanguard behaviour is \
1961         claimed; never inferred from aliases, filenames, link counts, or output byte shape.\",\n",
1962    );
1963    // T15.2 — `negative_capabilities`: the project's non-claims as a first-class, machine-visible array,
1964    // each tied to the guard/test/receipt that enforces it (never decorative). Sorted + deterministic.
1965    s.push_str("    \"negative_capabilities\": [");
1966    for (i, nc) in NEGATIVE_CAPABILITIES.iter().enumerate() {
1967        s.push_str(if i == 0 { "\n" } else { ",\n" });
1968        s.push_str(&format!(
1969            "      {{ \"capability\": {}, \"enforced_by\": {} }}",
1970            json_str(nc.as_str()),
1971            json_str(nc.enforced_by())
1972        ));
1973    }
1974    s.push_str("\n    ]\n");
1975    s.push_str("  },\n");
1976    s
1977}
1978
1979/// The provenance/capability statement as a human-readable text block, appended to a report's text
1980/// output. Mirrors [`provenance_block_json`].
1981pub fn provenance_block_text() -> String {
1982    let mut s = String::new();
1983    s.push_str("\nprovenance / capability:\n");
1984    s.push_str(&format!(
1985        "  manifest schema: {COMPILE_SCHEMA}  (per-run build/source/link/backward profile: see \
1986         `compile --manifest`)\n"
1987    ));
1988    s.push_str(&format!(
1989        "  source-variant reference-pin gate: {SOURCE_VARIANT_GATE_STATUS}  (tzdb 2026b admitted + \
1990         signature-verified + SHA-256-pinned — reports/t12_5a2-reference-admission.md)\n"
1991    ));
1992    s.push_str(&format!(
1993        "  source-variant behaviour: {} — T12.5b–d unblocked for the pinned reference but not yet \
1994         implemented; backzone/PACKRATLIST/DATAFORM/rearguard/vanguard never inferred from \
1995         aliases/filenames/link counts/output shape\n",
1996        if SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED {
1997            "implemented"
1998        } else {
1999            "NOT implemented or claimed"
2000        }
2001    ));
2002    s.push_str("  negative capabilities (non-claims, each enforced):\n");
2003    for nc in NEGATIVE_CAPABILITIES {
2004        s.push_str(&format!("    - {}  ({})\n", nc.as_str(), nc.enforced_by()));
2005    }
2006    s
2007}
2008
2009/// The oracle result for *this* invocation. A bare `compile` never runs the oracle, so it is
2010/// recorded as `not-run` — the manifest must not infer success from the repo's test suite.
2011#[derive(Debug, Clone)]
2012pub struct OracleResult {
2013    /// The oracle mode, **typed** (T15.2a — was a free `String`). Rendered at the manifest boundary via
2014    /// [`OracleMode::manifest_str`]. The companion `result` is the verdict vocabulary (a separate axis).
2015    pub mode: OracleMode,
2016    pub horizon: Option<String>,
2017    /// The oracle verdict, typed (T17.2 — was a free `String`). Distinct from `mode`: *what the oracle
2018    /// concluded*, not *which* oracle. Rendered via [`OracleVerdict::as_str`].
2019    pub result: OracleVerdict,
2020}
2021
2022impl OracleResult {
2023    /// The honest default for a `compile` invocation: the oracle was not run.
2024    pub fn not_run() -> Self {
2025        OracleResult {
2026            mode: OracleMode::NotRun,
2027            horizon: None,
2028            result: OracleVerdict::NotRun,
2029        }
2030    }
2031}
2032
2033/// The full compile-provenance manifest.
2034#[derive(Debug, Clone)]
2035pub struct CompileManifest {
2036    pub zic_rs_version: String,
2037    pub tzdb: TzdbProvenance,
2038    pub source_inputs: SourceInputs,
2039    pub build_profile: BuildProfile,
2040    pub link_profile: LinkProfile,
2041    pub source_profile: SourceProfile,
2042    pub zones_requested: Vec<String>,
2043    pub zones_compiled: Vec<String>,
2044    pub links_materialized: Vec<String>,
2045    pub unsupported_zones: Vec<String>,
2046    pub oracle: OracleResult,
2047}
2048
2049/// Render the `build_profile` block — the structured output identity of *this run* (T12.2). Fields
2050/// describe what was actually used; only the `DATAFORM` encoding axes `rearguard`/`vanguard` are
2051/// `"unknown"` here (no deterministic detector yet — kept explicit, never guessed or claimed; T12.5d).
2052/// Source-membership (`backward`/`backzone`/`PACKRATLIST`) lives in the `source_profile` evidence
2053/// axes, not here.
2054fn build_profile_json(p: &BuildProfile) -> String {
2055    let opt_at = |v: Option<i64>| match v {
2056        Some(n) => format!("\"@{n}\""),
2057        None => "null".to_string(),
2058    };
2059
2060    let mut s = String::new();
2061    s.push_str("  \"build_profile\": {\n");
2062    s.push_str(&format!(
2063        "    \"output_tree\": {},\n",
2064        json_str(p.output_tree.as_str())
2065    ));
2066    // leap_source: describes the run, never capabilities.
2067    s.push_str("    \"leap_source\": {\n");
2068    s.push_str(&format!(
2069        "      \"mode\": {},\n",
2070        json_str(p.leap_source.mode.as_str())
2071    ));
2072    match &p.leap_source.sha256 {
2073        Some(h) => s.push_str(&format!("      \"sha256\": {},\n", json_str(h))),
2074        None => s.push_str("      \"sha256\": null,\n"),
2075    }
2076    s.push_str(&format!(
2077        "      \"entry_count\": {},\n",
2078        p.leap_source.entry_count
2079    ));
2080    s.push_str(&format!("      \"expires\": {},\n", p.leap_source.expires));
2081    s.push_str(&format!(
2082        "      \"rolling_entries\": {}\n",
2083        p.leap_source.rolling_entries
2084    ));
2085    s.push_str("    },\n");
2086    s.push_str(&format!(
2087        "    \"emit_style\": {},\n",
2088        json_str(emit_style_str(p.emit_style))
2089    ));
2090    match p.range {
2091        Some((lo, hi)) => s.push_str(&format!(
2092            "    \"range\": {{ \"lo\": {}, \"hi\": {} }},\n",
2093            opt_at(lo),
2094            opt_at(hi)
2095        )),
2096        None => s.push_str("    \"range\": null,\n"),
2097    }
2098    s.push_str(&format!(
2099        "    \"redundant_until\": {},\n",
2100        opt_at(p.redundant_until)
2101    ));
2102    // `link_mode` is the last `build_profile` field: as of T12.5d there are **no** source-variant
2103    // placeholders here. Every source-variant axis (`backward` T12.4d, `backzone` T12.5b, `PACKRATLIST`
2104    // T12.5c, `DATAFORM`=`main`/`vanguard`/`rearguard` T12.5d) is an authoritative `source_profile`
2105    // evidence axis; carrying an `"unknown"` copy here too would be a contradiction (`"unknown"` vs a
2106    // real detected/claimed status). The arc that removed the `backward` and `backzone` stubs ends here
2107    // by removing the last `rearguard`/`vanguard` stubs — `build_profile` now describes only *how this
2108    // run emitted* (tree/leap/emit/range/links), not source-set membership or encoding.
2109    s.push_str(&format!(
2110        "    \"link_mode\": {}\n",
2111        json_str(p.link_mode.as_str())
2112    ));
2113    s.push_str("  },\n");
2114    s
2115}
2116
2117/// Render the `source_inputs` block — the deterministic *input identity* of this run (T12.3): the
2118/// structural `kind`, the **input-ordered** file list (logical name + content hash + size +
2119/// `order_index`), and the order-sensitive `aggregate_hash`. Portable: logical names, never
2120/// machine-local absolute paths.
2121fn source_inputs_json(si: &SourceInputs) -> String {
2122    let mut s = String::new();
2123    s.push_str("  \"source_inputs\": {\n");
2124    s.push_str(&format!("    \"kind\": {},\n", json_str(si.kind.as_str())));
2125    s.push_str("    \"files\": [");
2126    for (i, f) in si.files.iter().enumerate() {
2127        s.push_str(if i == 0 { "\n" } else { ",\n" });
2128        s.push_str(&format!(
2129            "      {{ \"order_index\": {}, \"logical_name\": {}, \"sha256\": {}, \"bytes\": {} }}",
2130            f.order_index,
2131            json_str(&f.logical_name),
2132            json_str(&f.sha256),
2133            f.bytes
2134        ));
2135    }
2136    s.push_str(if si.files.is_empty() {
2137        "],\n"
2138    } else {
2139        "\n    ],\n"
2140    });
2141    s.push_str(&format!(
2142        "    \"aggregate_hash\": {}\n",
2143        json_str(&si.aggregate_hash)
2144    ));
2145    s.push_str("  },\n");
2146    s
2147}
2148
2149/// Render the `link_profile` block — link/alias identity (T12.4b): counts, policy, and the stable
2150/// hashes that bind the build to its `alias-map.json`. Never asserts source-set membership.
2151fn link_profile_json(lp: &LinkProfile) -> String {
2152    let mut s = String::new();
2153    s.push_str("  \"link_profile\": {\n");
2154    s.push_str(&format!(
2155        "    \"link_policy\": {},\n",
2156        json_str(&lp.link_policy)
2157    ));
2158    s.push_str(&format!(
2159        "    \"zones_compiled_count\": {},\n",
2160        lp.zones_compiled_count
2161    ));
2162    s.push_str(&format!(
2163        "    \"links_selected_count\": {},\n",
2164        lp.links_selected_count
2165    ));
2166    s.push_str(&format!(
2167        "    \"links_materialized_count\": {},\n",
2168        lp.links_materialized_count
2169    ));
2170    s.push_str(&format!(
2171        "    \"links_omitted_count\": {},\n",
2172        lp.links_omitted_count
2173    ));
2174    s.push_str(&format!(
2175        "    \"links_failed_count\": {},\n",
2176        lp.links_failed_count
2177    ));
2178    s.push_str(&format!(
2179        "    \"alias_map_sha256\": {},\n",
2180        json_str(&lp.alias_map_sha256)
2181    ));
2182    s.push_str(&format!(
2183        "    \"selected_links_sha256\": {},\n",
2184        json_str(&lp.selected_links_sha256)
2185    ));
2186    s.push_str(&format!(
2187        "    \"omitted_links_sha256\": {}\n",
2188        json_str(&lp.omitted_links_sha256)
2189    ));
2190    s.push_str("  },\n");
2191    s
2192}
2193
2194/// Render the `source_profile` block — the source-evidence axes (T12.4d `backward`, T12.5b `backzone`,
2195/// T12.5c `packratlist` backzone-scope); an extension seam for `DATAFORM` later. Records detected vs
2196/// claimed vs reconciled `status` + the admitted `evidence_sha256` — never a boolean, never inferred.
2197fn source_profile_json(sp: &SourceProfile) -> String {
2198    // Both axes share the {detected, claimed, status, evidence_sha256} shape; render with one helper.
2199    let axis =
2200        |key: &str, detected: &str, claimed: &str, status: &str, ev: &Option<String>| -> String {
2201            let mut a = String::new();
2202            a.push_str(&format!("    {}: {{\n", json_str(key)));
2203            a.push_str(&format!("      \"detected\": {},\n", json_str(detected)));
2204            a.push_str(&format!("      \"claimed\": {},\n", json_str(claimed)));
2205            a.push_str(&format!("      \"status\": {},\n", json_str(status)));
2206            match ev {
2207                Some(h) => a.push_str(&format!("      \"evidence_sha256\": {}\n", json_str(h))),
2208                None => a.push_str("      \"evidence_sha256\": null\n"),
2209            }
2210            a.push_str("    }");
2211            a
2212        };
2213    let b = &sp.backward;
2214    let z = &sp.backzone;
2215    let mut s = String::new();
2216    s.push_str("  \"source_profile\": {\n");
2217    s.push_str(&axis(
2218        "backward_evidence",
2219        b.detected_str(),
2220        b.claimed_str(),
2221        b.status(),
2222        &b.evidence_sha256,
2223    ));
2224    s.push_str(",\n");
2225    s.push_str(&axis(
2226        "backzone_evidence",
2227        z.detected_str(),
2228        z.claimed_str(),
2229        z.status(),
2230        &z.evidence_sha256,
2231    ));
2232    s.push_str(",\n");
2233    let pl = &sp.packratlist;
2234    s.push_str(&axis(
2235        "packratlist_evidence",
2236        pl.detected_str(),
2237        pl.claimed_str(),
2238        pl.status(),
2239        &pl.evidence_sha256,
2240    ));
2241    s.push_str(",\n");
2242    // `dataform_evidence` shares the 4 standard fields but adds two generated-artifact provenance
2243    // fields (`recipe_hash`, `generated_from`), so it is rendered directly rather than via `axis`.
2244    let df = &sp.dataform;
2245    let opt = |v: &Option<String>| match v {
2246        Some(h) => json_str(h),
2247        None => "null".to_string(),
2248    };
2249    s.push_str("    \"dataform_evidence\": {\n");
2250    s.push_str(&format!(
2251        "      \"detected\": {},\n",
2252        json_str(df.detected_str())
2253    ));
2254    s.push_str(&format!(
2255        "      \"claimed\": {},\n",
2256        json_str(df.claimed_str())
2257    ));
2258    s.push_str(&format!("      \"status\": {},\n", json_str(df.status())));
2259    s.push_str(&format!(
2260        "      \"evidence_sha256\": {},\n",
2261        opt(&df.evidence_sha256)
2262    ));
2263    s.push_str(&format!(
2264        "      \"recipe_hash\": {},\n",
2265        opt(&df.recipe_hash)
2266    ));
2267    s.push_str(&format!(
2268        "      \"generated_from\": {}\n",
2269        opt(&df.generated_from)
2270    ));
2271    s.push_str("    }\n");
2272    s.push_str("  },\n");
2273    s
2274}
2275
2276impl CompileManifest {
2277    /// Render deterministic, pretty-printed JSON.
2278    pub fn to_json(&self) -> String {
2279        let arr = |items: &[String]| -> String {
2280            if items.is_empty() {
2281                "[]".to_string()
2282            } else {
2283                let inner: Vec<String> = items.iter().map(|i| json_str(i)).collect();
2284                format!("[{}]", inner.join(", "))
2285            }
2286        };
2287
2288        let mut s = String::new();
2289        s.push_str("{\n");
2290        s.push_str(&format!("  \"schema\": {},\n", json_str(COMPILE_SCHEMA)));
2291        s.push_str(&format!(
2292            "  \"zic_rs_version\": {},\n",
2293            json_str(&self.zic_rs_version)
2294        ));
2295        let opt_str = |v: &Option<String>| match v {
2296            Some(x) => json_str(x),
2297            None => "null".to_string(),
2298        };
2299        s.push_str("  \"tzdb\": {\n");
2300        s.push_str(&format!(
2301            "    \"detected_version\": {},\n",
2302            opt_str(&self.tzdb.detected_version)
2303        ));
2304        s.push_str(&format!(
2305            "    \"claimed_version\": {},\n",
2306            opt_str(&self.tzdb.claimed_version)
2307        ));
2308        s.push_str(&format!(
2309            "    \"version_status\": {},\n",
2310            json_str(self.tzdb.version_status())
2311        ));
2312        s.push_str(&format!(
2313            "    \"source_path\": {},\n",
2314            json_str(&self.tzdb.source_path)
2315        ));
2316        s.push_str(&format!(
2317            "    \"source_sha256\": {}\n",
2318            json_str(&self.tzdb.source_sha256)
2319        ));
2320        s.push_str("  },\n");
2321        s.push_str(&source_inputs_json(&self.source_inputs));
2322        s.push_str(&build_profile_json(&self.build_profile));
2323        s.push_str(&link_profile_json(&self.link_profile));
2324        s.push_str(&source_profile_json(&self.source_profile));
2325        s.push_str("  \"compile\": {\n");
2326        s.push_str(&format!(
2327            "    \"zones_requested\": {},\n",
2328            arr(&self.zones_requested)
2329        ));
2330        s.push_str(&format!(
2331            "    \"zones_compiled\": {},\n",
2332            arr(&self.zones_compiled)
2333        ));
2334        s.push_str(&format!(
2335            "    \"links_materialized\": {},\n",
2336            arr(&self.links_materialized)
2337        ));
2338        s.push_str(&format!(
2339            "    \"unsupported_zones\": {}\n",
2340            arr(&self.unsupported_zones)
2341        ));
2342        s.push_str("  },\n");
2343        s.push_str("  \"oracle\": {\n");
2344        s.push_str(&format!(
2345            "    \"mode\": {},\n",
2346            json_str(self.oracle.mode.manifest_str())
2347        ));
2348        match &self.oracle.horizon {
2349            Some(h) => s.push_str(&format!("    \"horizon\": {},\n", json_str(h))),
2350            None => s.push_str("    \"horizon\": null,\n"),
2351        }
2352        s.push_str(&format!(
2353            "    \"result\": {}\n",
2354            json_str(self.oracle.result.as_str())
2355        ));
2356        s.push_str("  }\n");
2357        s.push_str("}\n");
2358        s
2359    }
2360
2361    /// Write the manifest JSON to `path`.
2362    pub fn write_to(&self, path: &Path) -> Result<()> {
2363        std::fs::write(path, self.to_json()).map_err(|e| Error::io(path, e))
2364    }
2365}
2366
2367/// Build a [`CompileManifest`] from the run's inputs and report.
2368///
2369/// `requested` is the resolved list of identifiers the user asked for; `source_files` are the
2370/// expanded input files **in input order** (directories already expanded sorted by the caller).
2371/// The oracle is recorded as `not-run` because `compile` does not invoke `compare` — see the
2372/// module note.
2373///
2374/// Two complementary hashes are computed: `tzdb.source_sha256` over the source bytes in *sorted*
2375/// (canonicalized) order — an order-independent content identity — and
2376/// `source_inputs.aggregate_hash` over the *input-ordered* per-file hashes — an order-sensitive
2377/// identity. **Input order is part of the build identity**; the manifest records it faithfully.
2378///
2379/// This assembles the build identity from eight genuinely distinct provenance inputs (the requested
2380/// selection, the input file set, the compile report, the run config, the link database, the claimed
2381/// tzdb version, the leap-source path, and the source-variant claims). They do not naturally collapse
2382/// into a meaningful sub-struct — bundling would relocate the count, not reduce the complexity — so we
2383/// keep them explicit and silence the arity lint.
2384#[allow(clippy::too_many_arguments)]
2385pub fn build_compile_manifest(
2386    requested: &[String],
2387    source_files: &[std::path::PathBuf],
2388    report: &CompileReport,
2389    config: &crate::CompileConfig,
2390    db: &crate::model::Database,
2391    claimed_version: Option<&str>,
2392    leap_path: Option<&std::path::Path>,
2393    variants: &SourceVariantArgs,
2394) -> Result<CompileManifest> {
2395    // Per-file identity in INPUT ORDER (never re-sorted) — the order is part of the build identity.
2396    let mut input_files: Vec<SourceFile> = Vec::with_capacity(source_files.len());
2397    for (order_index, f) in source_files.iter().enumerate() {
2398        let bytes = std::fs::read(f).map_err(|e| Error::io(f, e))?;
2399        input_files.push(SourceFile {
2400            // Logical name = basename: a portable label, never the machine-local absolute path.
2401            logical_name: f
2402                .file_name()
2403                .map(|n| n.to_string_lossy().into_owned())
2404                .unwrap_or_else(|| f.display().to_string()),
2405            sha256: sha256_hex(&bytes),
2406            bytes: bytes.len(),
2407            order_index,
2408        });
2409    }
2410    // Order-sensitive aggregate identity: hash the input-ordered sequence of per-file hashes (a
2411    // newline separator so reordering two files always changes the digest).
2412    let aggregate_seed = input_files
2413        .iter()
2414        .map(|f| f.sha256.as_str())
2415        .collect::<Vec<_>>()
2416        .join("\n");
2417    let aggregate_hash = sha256_hex(aggregate_seed.as_bytes());
2418
2419    // Structural input *form* only — never a guess at source-set membership (backward/backzone).
2420    let kind = match source_files.len() {
2421        0 => SourceInputKind::Unknown,
2422        1 if source_files[0].extension().and_then(|e| e.to_str()) == Some("zi") => {
2423            SourceInputKind::TzdataZi
2424        }
2425        1 => SourceInputKind::SingleFile,
2426        _ => SourceInputKind::MultiFile,
2427    };
2428
2429    // Order-independent content hash + display path + version detection: read in SORTED path order
2430    // so this digest is invariant to argument ordering (the explicitly-canonicalized companion to
2431    // `aggregate_hash`).
2432    let mut sorted: Vec<&std::path::PathBuf> = source_files.iter().collect();
2433    sorted.sort();
2434    let mut all = Vec::new();
2435    for f in &sorted {
2436        all.extend(std::fs::read(f).map_err(|e| Error::io(f, e))?);
2437    }
2438    let source_sha256 = sha256_hex(&all);
2439    let source_path = sorted
2440        .iter()
2441        .map(|p| p.display().to_string())
2442        .collect::<Vec<_>>()
2443        .join(", ");
2444    let detected_version = crate::report::sniff_tzdb_version(&all);
2445
2446    let source_inputs = SourceInputs {
2447        kind,
2448        files: input_files,
2449        aggregate_hash,
2450    };
2451
2452    // Build-profile identity — what this run actually used (semantic, not argv).
2453    let leap_source = match &config.leaps {
2454        None => LeapSourceInfo {
2455            mode: LeapSourceMode::None,
2456            sha256: None,
2457            entry_count: 0,
2458            expires: false,
2459            rolling_entries: 0,
2460        },
2461        Some(table) => LeapSourceInfo {
2462            mode: LeapSourceMode::File,
2463            sha256: match leap_path {
2464                Some(p) => Some(sha256_hex(&std::fs::read(p).map_err(|e| Error::io(p, e))?)),
2465                None => None,
2466            },
2467            entry_count: table.entries.len(),
2468            expires: table.expires.is_some(),
2469            rolling_entries: table.entries.iter().filter(|e| e.rolling).count(),
2470        },
2471    };
2472    let build_profile = BuildProfile {
2473        output_tree: if config.leaps.is_some() {
2474            OutputTree::Right
2475        } else {
2476            OutputTree::Posix
2477        },
2478        leap_source,
2479        // T17.2: store the typed enums directly (the source of truth), rendered at the JSON boundary —
2480        // no re-stringified copy that could drift from `config`.
2481        emit_style: config.emit_style,
2482        range: config.range.map(|r| (r.lo, r.hi)),
2483        redundant_until: config.redundant_until,
2484        link_mode: config.link_mode,
2485    };
2486
2487    let zones_compiled: Vec<String> = report
2488        .zones_compiled
2489        .iter()
2490        .map(|z| z.name.clone())
2491        .collect();
2492    let links_materialized: Vec<String> = report
2493        .links_written
2494        .iter()
2495        .map(|l| l.link_name.clone())
2496        .collect();
2497
2498    // A requested identifier is "satisfied" if it was compiled as a canonical zone or written
2499    // as a link; anything else requested is reported as unsupported/skipped — honestly.
2500    let unsupported_zones: Vec<String> = requested
2501        .iter()
2502        .filter(|r| !zones_compiled.contains(r) && !links_materialized.contains(r))
2503        .cloned()
2504        .collect();
2505
2506    // Link / alias identity (T12.4b). We classify against the *full parsed* link set (`db.links`),
2507    // NOT `report.links_written` — the report only knows what was *materialised*, but the manifest
2508    // also wants what was *omitted* (and *failed*), which only the db's complete link list reveals.
2509    // For each link we resolve its chain exactly as the compile path does (`plan::run` → the link
2510    // loop), then bucket by whether the resolved canonical zone landed in the compiled output set:
2511    //   - selected: target compiled (eligible & — since a write failure aborts the whole run —
2512    //     materialised);
2513    //   - omitted: target NOT compiled, i.e. excluded by the zone *selection* (a policy outcome);
2514    //   - failed: the chain does not resolve to a real zone (missing target / cycle / self-link) —
2515    //     an *error* class, deliberately never folded into "omitted".
2516    // **No source-set membership (`backward`/`backzone`) is inferred from these links** — they are
2517    // output identifiers, not evidence of which source file produced them (see T12.4a inventory).
2518    let mut selected_links: Vec<String> = Vec::new();
2519    let mut omitted_links: Vec<String> = Vec::new();
2520    let mut links_failed_count = 0usize;
2521    for link in &db.links {
2522        match crate::resolve_link_target(db, &link.link_name) {
2523            Ok(canonical) if zones_compiled.iter().any(|z| z == canonical) => {
2524                selected_links.push(link.link_name.clone())
2525            }
2526            Ok(_) => omitted_links.push(link.link_name.clone()),
2527            Err(_) => links_failed_count += 1, // dangling / cycle — never folded into "omitted"
2528        }
2529    }
2530    // Sort for a stable set hash; `dedup` because `zic` allows two `Link` lines with the same name
2531    // (last wins — see `make_links`), so the parsed db may legitimately carry a duplicate name we
2532    // must not double-count. (Sort-then-dedup removes only *adjacent* equals, hence the sort first.)
2533    selected_links.sort();
2534    selected_links.dedup();
2535    omitted_links.sort();
2536    omitted_links.dedup();
2537    // Order-independent *set* identity: names are already sorted+deduped, LF-joined then hashed.
2538    // The empty set hashes to `sha256("")` (a fixed, well-known digest) — that is intentional and
2539    // stable; do not special-case it to "" or a sentinel, or two empty-set runs would stop matching.
2540    let hash_names = |names: &[String]| sha256_hex(names.join("\n").as_bytes());
2541    // `alias-map.json` is serialized deterministically (sorted by identifier, fixed field order,
2542    // LF, no timestamps — see `AliasMap::to_json`), so hashing its bytes is a stable cross-machine
2543    // identity that binds this manifest to a specific alias map. `build` re-reads the just-written
2544    // output files to hash them; at manifest time (a successful compile) they are all on disk.
2545    let alias_map_sha256 = sha256_hex(build(report, &config.output_dir)?.to_json().as_bytes());
2546    let link_profile = LinkProfile {
2547        link_policy: match config.link_mode {
2548            crate::LinkMode::Copy => "copy",
2549            crate::LinkMode::Symlink => "symlink",
2550        }
2551        .to_string(),
2552        zones_compiled_count: zones_compiled.len(),
2553        links_selected_count: selected_links.len(),
2554        links_materialized_count: report.links_written.len(),
2555        links_omitted_count: omitted_links.len(),
2556        links_failed_count,
2557        alias_map_sha256,
2558        selected_links_sha256: hash_names(&selected_links),
2559        omitted_links_sha256: hash_names(&omitted_links),
2560    };
2561
2562    // Source-evidence axes — reconciled against the *admitted* source inputs only (hash-backed
2563    // detection or explicit claim), never inferred from the link profile above. `backward` (T12.4d)
2564    // verifies an admitted file's participation; `backzone` (T12.5b) checks whether the pinned
2565    // reference `backzone` (T12.5a.2) participated, anchored to its release hash; `packratlist`
2566    // (T12.5c) `packratlist` is a **generation-policy** axis — detection comes ONLY from an admitted
2567    // policy input (`--packratlist-source`) whose hash equals the pinned 2026b `zone.tab`, never from
2568    // `source_inputs` (compile inputs); `zone.tab` is not a compilable `zic` source.
2569    let backzone = BackzoneEvidence::reconcile(
2570        &source_inputs,
2571        variants.backzone_claim,
2572        REF_2026B_BACKZONE_SHA256,
2573    );
2574    let backzone_present = backzone.detected == BackzoneDetected::Present;
2575    let packratlist_policy_sha = match &variants.packratlist_source {
2576        Some(p) => Some(sha256_hex(&std::fs::read(p).map_err(|e| Error::io(p, e))?)),
2577        None => None,
2578    };
2579    // `dataform` (T12.5d) — the *encoding* axis. Detection is hash-backed against the pinned 2026b
2580    // `.zi` artifacts via `source_inputs` membership (category-correct: the `.zi` files are compile
2581    // sources). The `recipe_hash` binds the generation provenance of those pinned artifacts.
2582    let dataform_recipe = dataform_recipe_hash(
2583        REF_2026B_ARCHIVE_SHA256,
2584        REF_2026B_MAKEFILE_SHA256,
2585        REF_2026B_ZIGUARD_AWK_SHA256,
2586        REF_2026B_DATAFORM_COMMAND,
2587        REF_2026B_DATAFORM_TOOLCHAIN,
2588    );
2589    let dataform_reference = DataformReference {
2590        main_sha256: REF_2026B_MAIN_ZI_SHA256,
2591        vanguard_sha256: REF_2026B_VANGUARD_ZI_SHA256,
2592        rearguard_sha256: REF_2026B_REARGUARD_ZI_SHA256,
2593        recipe_hash: &dataform_recipe,
2594        generated_from: REF_2026B_DATAFORM_GENERATED_FROM,
2595    };
2596    let source_profile = SourceProfile {
2597        backward: BackwardEvidence::reconcile(&source_inputs, variants)?,
2598        packratlist: PackratlistEvidence::reconcile(
2599            variants.packratlist_claim.as_deref(),
2600            packratlist_policy_sha.as_deref(),
2601            REF_2026B_ZONE_TAB_SHA256,
2602            backzone_present,
2603        ),
2604        dataform: DataformEvidence::reconcile(
2605            &source_inputs,
2606            variants.dataform_claim.as_deref(),
2607            &dataform_reference,
2608        ),
2609        backzone,
2610    };
2611
2612    Ok(CompileManifest {
2613        zic_rs_version: env!("CARGO_PKG_VERSION").to_string(),
2614        tzdb: TzdbProvenance {
2615            detected_version,
2616            claimed_version: claimed_version.map(str::to_string),
2617            source_path,
2618            source_sha256,
2619        },
2620        source_inputs,
2621        build_profile,
2622        link_profile,
2623        source_profile,
2624        zones_requested: requested.to_vec(),
2625        zones_compiled,
2626        links_materialized,
2627        unsupported_zones,
2628        oracle: OracleResult::not_run(),
2629    })
2630}
2631
2632// JSON string escaping is shared with the other deterministic emitters (`report`); the single
2633// implementation lives in `crate::json`. Identifier strings are already restricted by the
2634// output-tree validator, but we escape defensively — a name can contain a backslash, which JSON
2635// requires escaped. The `json_str` alias keeps this module's call sites unchanged.
2636use crate::json::escape as json_str;
2637
2638#[cfg(test)]
2639mod tests {
2640    use super::*;
2641
2642    // ── T17.2 CONTRACT.TYPING: totality of the newly-typed manifest vocabularies ──
2643    // Each enum owns its JSON literal via `as_str()`; these assert the exact literals are preserved
2644    // (so the `zic-rs-compile-manifest-v8` schema does not bump) and that the vocabularies stay closed.
2645
2646    #[test]
2647    fn source_input_kind_totality_and_literals() {
2648        use std::collections::BTreeSet;
2649        let labels: Vec<&str> = SourceInputKind::ALL.iter().map(|k| k.as_str()).collect();
2650        // exact literals (pinned)
2651        assert_eq!(
2652            labels,
2653            ["tzdata_zi", "multi_file", "single_file", "unknown"]
2654        );
2655        // unique + non-empty (totality)
2656        let set: BTreeSet<&str> = labels.iter().copied().collect();
2657        assert_eq!(set.len(), SourceInputKind::ALL.len());
2658        assert!(labels.iter().all(|l| !l.is_empty()));
2659    }
2660
2661    #[test]
2662    fn output_tree_leap_mode_oracle_verdict_literals() {
2663        assert_eq!(OutputTree::Posix.as_str(), "posix");
2664        assert_eq!(OutputTree::Right.as_str(), "right");
2665        assert_eq!(LeapSourceMode::None.as_str(), "none");
2666        assert_eq!(LeapSourceMode::File.as_str(), "file");
2667        // the hyphen is preserved from the pre-T17.2 free-string literal
2668        assert_eq!(OracleVerdict::NotRun.as_str(), "not-run");
2669    }
2670
2671    #[test]
2672    fn emit_style_boundary_literals_unchanged() {
2673        // The manifest stores the typed `EmitStyle`; `emit_style_str` owns the literal at the boundary.
2674        assert_eq!(emit_style_str(crate::EmitStyle::Default), "default");
2675        assert_eq!(emit_style_str(crate::EmitStyle::ZicSlim), "zic-slim");
2676        assert_eq!(emit_style_str(crate::EmitStyle::ZicFat), "zic-fat");
2677    }
2678
2679    #[test]
2680    fn alias_entry_kind_str() {
2681        let z = AliasEntry::Zone { sha256: "x".into() };
2682        let l = AliasEntry::Link {
2683            target: "t".into(),
2684            target_sha256: "y".into(),
2685            materialised: crate::LinkMode::Copy,
2686        };
2687        assert_eq!(z.kind_str(), "zone");
2688        assert_eq!(l.kind_str(), "link");
2689    }
2690
2691    #[test]
2692    fn json_escaping() {
2693        assert_eq!(json_str("Europe/London"), "\"Europe/London\"");
2694        assert_eq!(json_str("a\\b"), "\"a\\\\b\"");
2695        assert_eq!(json_str("a\"b"), "\"a\\\"b\"");
2696    }
2697
2698    #[test]
2699    fn empty_map_is_valid_json_shape() {
2700        let m = AliasMap {
2701            entries: BTreeMap::new(),
2702            identifiers: 0,
2703            canonical_zones: 0,
2704            links: 0,
2705            duplicated_byte_links: 0,
2706        };
2707        let j = m.to_json();
2708        assert!(j.contains("\"schema\": \"zic-rs-alias-map-v1\""));
2709        assert!(j.contains("\"zones\": {}"));
2710        assert!(j.contains("\"identifiers\": 0"));
2711    }
2712}