tzcompile/manifest.rs
1//! The **alias/canonical manifest** (`alias-map.json`) — a producer-side artifact that
2//! records, for a compile run, which identifiers are *canonical zones* and which are *links*
3//! (aliases), with content hashes and an explicit account of where link materialisation
4//! duplicates bytes.
5//!
6//! ## Why this exists
7//!
8//! Downstream consumers/bundlers of timezone data care about *how* a bundle was produced, not
9//! only the file bytes. jiff#258 is the concrete motivation: it observed that concatenated
10//! zoneinfo appears to **duplicate** data for aliases (e.g. a release with 597 identifiers but
11//! only 339 non-alias zones) and asked for that to be documented. This manifest answers that
12//! directly: every identifier is tagged `zone` or `link`, links carry their target + the
13//! target's hash, and the summary reports `identifiers` / `canonical_zones` / `links` plus
14//! `duplicated_byte_links` (links we materialised as byte copies). See `docs/rust-ecosystem.md`
15//! and `docs/generated-data-contract.md`.
16//!
17//! ## Dependency-free by design
18//!
19//! Hashing uses the in-house [`crate::hash`] SHA-256; JSON is written by a tiny deterministic
20//! serializer here (no `serde`). The schema is small and fixed, and identifier strings are
21//! escaped properly, so this stays correct without a serialization framework — consistent
22//! with the crate's minimal-dependency ethos.
23
24use std::collections::BTreeMap;
25use std::path::Path;
26
27use crate::error::{Error, Result};
28use crate::hash::sha256_hex;
29use crate::{CompileReport, LinkMode};
30
31/// Stable schema identifier embedded in the output so consumers can version-gate.
32pub const SCHEMA: &str = "zic-rs-alias-map-v1";
33
34/// One identifier's entry in the alias map.
35#[derive(Debug, Clone, PartialEq, Eq)]
36pub enum AliasEntry {
37 /// A canonical zone: the SHA-256 of its compiled TZif file.
38 Zone { sha256: String },
39 /// A link (alias) to a canonical zone, with the target's hash and how it was materialised.
40 Link {
41 target: String,
42 target_sha256: String,
43 /// How the link was materialised — the typed [`LinkMode`] (`Copy` = bytes duplicated; `Symlink`
44 /// = no duplication), rendered as `"copy"`/`"symlink"` only at the JSON boundary (CONTRACT.TYPING:
45 /// a finite-vocabulary claim-bearing field is owned by an enum, not a hand-emitted string).
46 materialised: LinkMode,
47 },
48}
49
50impl AliasEntry {
51 /// The alias-map `"kind"` vocabulary (`"zone"` / `"link"`). **T17.2 (CONTRACT.TYPING):** the `kind`
52 /// field was previously a hand-emitted string literal inside the JSON format string; the variant is
53 /// the finite claim ("is this identifier a canonical zone or an alias?"), so the literal is owned
54 /// here and rendered through this accessor — a new `kind` value cannot enter the alias map as prose.
55 pub fn kind_str(&self) -> &'static str {
56 match self {
57 AliasEntry::Zone { .. } => "zone",
58 AliasEntry::Link { .. } => "link",
59 }
60 }
61}
62
63/// The whole manifest: a deterministic (sorted) map of identifiers plus summary counts.
64#[derive(Debug, Clone)]
65pub struct AliasMap {
66 /// Identifier → entry, ordered by name for deterministic output.
67 pub entries: BTreeMap<String, AliasEntry>,
68 pub identifiers: usize,
69 pub canonical_zones: usize,
70 pub links: usize,
71 /// Links materialised as byte copies (i.e. where the same TZif bytes exist under two
72 /// names). This is the figure jiff#258 was asking to make visible.
73 pub duplicated_byte_links: usize,
74}
75
76/// Build an [`AliasMap`] from a finished [`CompileReport`], hashing each compiled file under
77/// `root`. Reads the just-written output files (cheap; they are small).
78pub fn build(report: &CompileReport, _root: &Path) -> Result<AliasMap> {
79 // Hash every canonical zone once, keyed by zone name, by reading its output file.
80 let mut zone_hash: BTreeMap<String, String> = BTreeMap::new();
81 for z in &report.zones_compiled {
82 let bytes = std::fs::read(&z.output_path).map_err(|e| Error::io(&z.output_path, e))?;
83 zone_hash.insert(z.name.clone(), sha256_hex(&bytes));
84 }
85
86 let mut entries: BTreeMap<String, AliasEntry> = BTreeMap::new();
87 for z in &report.zones_compiled {
88 entries.insert(
89 z.name.clone(),
90 AliasEntry::Zone {
91 sha256: zone_hash.get(&z.name).cloned().unwrap_or_default(),
92 },
93 );
94 }
95
96 let mut duplicated_byte_links = 0;
97 for l in &report.links_written {
98 // The typed materialisation policy (CONTRACT.TYPING owns the literal via `LinkMode::as_str()`).
99 let materialised = l.mode;
100 if materialised == LinkMode::Copy {
101 duplicated_byte_links += 1;
102 }
103 // `l.target` is the resolved canonical zone (see compile::plan), so its hash is known.
104 let target_sha256 = zone_hash.get(&l.target).cloned().unwrap_or_default();
105 entries.insert(
106 l.link_name.clone(),
107 AliasEntry::Link {
108 target: l.target.clone(),
109 target_sha256,
110 materialised,
111 },
112 );
113 }
114
115 let map = AliasMap {
116 identifiers: entries.len(),
117 canonical_zones: report.zones_compiled.len(),
118 links: report.links_written.len(),
119 duplicated_byte_links,
120 entries,
121 };
122 // T12.4c — fail closed if the produced map is not internally consistent (a link to a
123 // non-compiled zone, a hash that doesn't match its target, a self-link, or summary counts that
124 // disagree with the entries). In normal flow this always passes — `plan::run` only records a
125 // link whose resolved canonical zone was compiled — so this is a defensive invariant that turns
126 // any future regression into a hard error rather than a silently wrong artifact.
127 map.validate()?;
128 Ok(map)
129}
130
131impl AliasMap {
132 /// Validate the alias map's **internal consistency** (T12.4c). Guarantees that every `Link`
133 /// entry "corresponds to a materialised link/copy" of a real compiled zone:
134 ///
135 /// - every `Link`'s `target` is present in this map as a **`Zone`** entry (no dangling alias —
136 /// *"missing target fails"*; a link that resolved to another link, or to nothing, never gets
137 /// here because `build` records the *resolved canonical* zone);
138 /// - the `Link`'s recorded `target_sha256` is non-empty and **equals** that zone's hash (the
139 /// alias genuinely names those exact bytes — the jiff#258 duplication is real, not asserted);
140 /// - no entry is a **self-link** (`name == target`) — `resolve_link_target` already rejects
141 /// self-links/cycles upstream (they are *skipped* in `plan::run` and counted `failed` in the
142 /// link profile), so this is the alias-map-level guard that keeps that coverage honest;
143 /// - the summary counts (`canonical_zones`/`links`/`identifiers`) agree with the entries.
144 ///
145 /// `build` calls this before returning, so any [`AliasMap`] handed out is already consistent.
146 pub fn validate(&self) -> Result<()> {
147 let (mut zones, mut links) = (0usize, 0usize);
148 for (name, entry) in &self.entries {
149 match entry {
150 AliasEntry::Zone { sha256 } => {
151 zones += 1;
152 if sha256.len() != 64 {
153 return Err(Error::message(format!(
154 "alias-map: zone {name:?} has a malformed content hash"
155 )));
156 }
157 }
158 AliasEntry::Link {
159 target,
160 target_sha256,
161 ..
162 } => {
163 links += 1;
164 if name == target {
165 return Err(Error::message(format!(
166 "alias-map: {name:?} is a self-link"
167 )));
168 }
169 match self.entries.get(target) {
170 Some(AliasEntry::Zone { sha256 }) => {
171 if target_sha256 != sha256 {
172 return Err(Error::message(format!(
173 "alias-map: link {name:?} records a target hash that does not \
174 match its zone {target:?}"
175 )));
176 }
177 }
178 Some(AliasEntry::Link { .. }) => {
179 return Err(Error::message(format!(
180 "alias-map: link {name:?} targets another link {target:?}, not a \
181 canonical zone"
182 )))
183 }
184 None => {
185 return Err(Error::message(format!(
186 "alias-map: link {name:?} targets {target:?}, which is not a \
187 compiled zone in this map"
188 )))
189 }
190 }
191 }
192 }
193 }
194 if zones != self.canonical_zones || links != self.links || self.identifiers != zones + links
195 {
196 return Err(Error::message(
197 "alias-map: summary counts disagree with the entries".to_string(),
198 ));
199 }
200 Ok(())
201 }
202
203 /// Render the manifest as deterministic, pretty-printed JSON (2-space indent, keys in a
204 /// fixed order; entries sorted by identifier via the `BTreeMap`).
205 pub fn to_json(&self) -> String {
206 let mut s = String::new();
207 s.push_str("{\n");
208 s.push_str(&format!(" \"schema\": {},\n", json_str(SCHEMA)));
209 s.push_str(" \"zones\": {");
210 let mut first = true;
211 for (name, entry) in &self.entries {
212 s.push_str(if first { "\n" } else { ",\n" });
213 first = false;
214 // CONTRACT.TYPING (T17.2): the `kind` literal is owned by `AliasEntry::kind_str()`, not
215 // hand-typed in the format string.
216 let kind = entry.kind_str();
217 match entry {
218 AliasEntry::Zone { sha256 } => {
219 s.push_str(&format!(
220 " {}: {{ \"kind\": {}, \"sha256\": {} }}",
221 json_str(name),
222 json_str(kind),
223 json_str(sha256)
224 ));
225 }
226 AliasEntry::Link {
227 target,
228 target_sha256,
229 materialised,
230 } => {
231 s.push_str(&format!(
232 " {}: {{ \"kind\": {}, \"target\": {}, \"target_sha256\": {}, \"materialised\": {} }}",
233 json_str(name),
234 json_str(kind),
235 json_str(target),
236 json_str(target_sha256),
237 json_str(materialised.as_str())
238 ));
239 }
240 }
241 }
242 s.push_str(if self.entries.is_empty() {
243 "},\n"
244 } else {
245 "\n },\n"
246 });
247 s.push_str(" \"summary\": {\n");
248 s.push_str(&format!(" \"identifiers\": {},\n", self.identifiers));
249 s.push_str(&format!(
250 " \"canonical_zones\": {},\n",
251 self.canonical_zones
252 ));
253 s.push_str(&format!(" \"links\": {},\n", self.links));
254 s.push_str(&format!(
255 " \"duplicated_byte_links\": {}\n",
256 self.duplicated_byte_links
257 ));
258 s.push_str(" }\n");
259 s.push_str("}\n");
260 s
261 }
262
263 /// Write the manifest JSON to `path`.
264 pub fn write_to(&self, path: &Path) -> Result<()> {
265 std::fs::write(path, self.to_json()).map_err(|e| Error::io(path, e))
266 }
267}
268
269// ===========================================================================================
270// Compile-provenance manifest (`zic-rs-manifest.json`, schema `zic-rs-compile-manifest-v8`).
271// ===========================================================================================
272//
273// Records *how this output tree was produced* — the **build identity**: source provenance (path +
274// hash + kind), the tzdb version (detected vs claimed, reconciled, never silently stamped), the
275// real `build_profile` (emit_style / range / redundant_until / link_mode / output_tree + leap
276// source), the zones/links touched, and the oracle result. **It describes this invocation, never
277// the repo's general test status nor a capability claim**: undetected source-set axes are honest
278// `"unknown"` (never an aspirational `"supported"`/`"unsupported"`), and a plain `compile` run did
279// not invoke `compare`, so the oracle block is `not-run` even though the fixtures are verified
280// elsewhere by the test suite.
281
282/// Stable schema identifier for the compile manifest. Schema changelog (newest first), each version
283/// a *consumer-gating* marker — bumped only for a genuine block addition/removal, never for an
284/// in-session correction to an unreleased version:
285///
286/// - **v8** (T12.5d): added the `source_profile.dataform_evidence` axis — the *encoding* form
287/// (`main`/`vanguard`/`rearguard`) as detected/claimed/status, **hash-backed** against the pinned
288/// 2026b generated `.zi` artifacts via `source_inputs` membership (the `.zi` files are compilable
289/// sources, so this is category-correct — cf. `backzone`), plus two generated-artifact provenance
290/// fields: `recipe_hash` (binds archive · `Makefile` · `ziguard.awk` · command · toolchain, raw
291/// bytes) and `generated_from`. Never inferred from syntax/output/names/`PACKRATLIST`/`backzone`.
292/// - **v7** (T12.5c): added the `source_profile.packratlist_evidence` axis (backzone *scope*:
293/// `detected: subset_from_policy_input` only via an admitted `PACKRATLIST` *generation-policy*
294/// input hash-matching the pinned 2026b `zone.tab` plus a present `backzone`, else `unknown`; claim
295/// `full|subset|none`). `PACKRATLIST`/`zone.tab` is a generation-policy selector, **not** a `zic`
296/// compile source, so detection never consults `source_inputs`. Also removed the now-contradictory
297/// `build_profile.backzone` `"unknown"` stub (backzone is the `source_profile` axis since T12.5b).
298/// - **v6** (T12.5b): added the `source_profile.backzone_evidence` axis (detected/claimed/status +
299/// `evidence_sha256`), hash-anchored to the pinned reference `backzone` ([`REF_2026B_BACKZONE_SHA256`],
300/// admitted in T12.5a.2) — source *membership*, never inferred.
301/// - **v5** (T12.4d; corrected in T12.5a): added the `source_profile` block recording the `backward`
302/// **evidence axis** (detected/claimed/status + `evidence_sha256`) — admitted only from hash-backed
303/// source evidence or an explicit claim, never inferred from the alias/link surface; an extension
304/// seam for the later `backzone`/`rearguard`/`vanguard` axes. The T12.5a correction removed a
305/// contradictory `build_profile.backward` `"unknown"` stub that v5 first shipped (it was briefly
306/// double-listed; `backward` is authoritatively the `source_profile` axis) — fixed *in place*, no
307/// version bump, as v5 was never released/consumed. `rearguard`/`vanguard` remain `build_profile`
308/// `"unknown"` placeholders pending their own evidence axes (T12.5d, reference-first).
309/// - **v4** (T12.4b): added the `link_profile` block recording link/alias identity — counts
310/// (`zones_compiled`/`links_selected`/`links_materialized`/`links_omitted`/`links_failed`),
311/// `link_policy`, and stable hashes (`alias_map_sha256` + `selected`/`omitted_links_sha256`) that
312/// bind the build to its alias map.
313/// - **v3** (T12.3): added the `source_inputs` block recording the deterministic, **order-preserving**
314/// input identity (logical names + per-file hashes + `aggregate_hash`); the structural `source_kind`
315/// moved there (and `individual_files` → `multi_file`); `build_profile.source_set` retired
316/// (superseded).
317/// - **v2** (T12.2): `tzdb.version` split into `detected_version`/`claimed_version` (+
318/// `version_status`); the stub `generation_options` block replaced by a real `build_profile`
319/// recording *what this run actually used*.
320pub const COMPILE_SCHEMA: &str = "zic-rs-compile-manifest-v8";
321
322/// Source provenance for the tzdata that was compiled. Records **detected facts vs user claims**
323/// separately — the manifest never silently stamps a release. *Input identity* (which files, in
324/// what order, with what hashes) lives in [`SourceInputs`]; this block is the version provenance.
325#[derive(Debug, Clone)]
326pub struct TzdbProvenance {
327 /// tzdb release **sniffed** from the source's `# version …` comment, if present.
328 pub detected_version: Option<String>,
329 /// tzdb release the **user asserted** (`--tzdb-version`), if any.
330 pub claimed_version: Option<String>,
331 /// The input path(s), joined for display. **Environment context, not identity** — these are
332 /// machine-local paths (possibly absolute). The portable identity is [`SourceInputs`] (logical
333 /// names + content hashes).
334 pub source_path: String,
335 /// SHA-256 over the concatenation of all source files in **sorted (canonicalized) path
336 /// order** — an *order-independent* content identity ("are these the same bytes, however they
337 /// were ordered?"). The *order-sensitive* identity is [`SourceInputs::aggregate_hash`].
338 pub source_sha256: String,
339}
340
341impl TzdbProvenance {
342 /// Detected-vs-claimed reconciliation — prevents false conformance claims.
343 pub fn version_status(&self) -> &'static str {
344 match (&self.detected_version, &self.claimed_version) {
345 (Some(d), Some(c)) if d == c => "detected_matches_claim",
346 (Some(_), Some(_)) => "detected_differs_from_claim",
347 (Some(_), None) => "detected_only",
348 (None, Some(_)) => "claimed_only",
349 (None, None) => "unknown",
350 }
351 }
352}
353
354/// One input source file in the deterministic input list (T12.3). The portable identity is the
355/// `logical_name` (basename — never a machine-local absolute path) plus the content `sha256`;
356/// `order_index` records the file's position in the **input order** (part of the build identity).
357#[derive(Debug, Clone)]
358pub struct SourceFile {
359 /// The file's basename (e.g. `"northamerica"`, `"tzdata.zi"`) — a portable label, not the
360 /// absolute machine-local path. The real identity is `sha256` + `order_index`.
361 pub logical_name: String,
362 /// SHA-256 of this file's bytes (order-independent per file).
363 pub sha256: String,
364 /// Byte length of this file.
365 pub bytes: usize,
366 /// 0-based position in the input order. **Source order is part of the build identity** — the
367 /// manifest does not pretend two differently ordered inputs are the same.
368 pub order_index: usize,
369}
370
371/// The deterministic input source-set of *this run* (T12.3) — **input identity, not source
372/// semantics**. Records which files were used, in what order, with what hashes, under what
373/// structural `kind`. It deliberately does **not** infer source-set *membership*: those are
374/// reconciled as hash-backed/claim-only evidence axes in [`SourceProfile`] (`backward` T12.4d,
375/// `backzone` T12.5b, `PACKRATLIST` scope T12.5c), and the remaining `DATAFORM` encoding axes
376/// (`rearguard`/`vanguard`) are recorded as `"unknown"` in the build profile until a pinned,
377/// deterministic detector exists (T12.5d).
378#[derive(Debug, Clone)]
379pub struct SourceInputs {
380 /// Structural input *form* (not membership), typed (T17.2): [`SourceInputKind`]. Multi-file means
381 /// *source form* only — it never implies `backward`/`backzone` inclusion.
382 pub kind: SourceInputKind,
383 /// The input files in **input order** (directories expanded in sorted order, then in the order
384 /// the paths were supplied) — never re-sorted, so the order is faithfully recorded.
385 pub files: Vec<SourceFile>,
386 /// SHA-256 over the input-ordered sequence of per-file hashes — an **order-sensitive** identity
387 /// that changes if the same files are supplied in a different order (cf. the order-independent
388 /// [`TzdbProvenance::source_sha256`]).
389 pub aggregate_hash: String,
390}
391
392/// The leap-second source used by *this run* (T12.2). Describes the run, never the project's
393/// capabilities — `mode: "none"` for an ordinary compile, never `"unsupported"`.
394#[derive(Debug, Clone)]
395pub struct LeapSourceInfo {
396 /// `None` (ordinary/`posix` profile) or `File` (the `right/` profile, `-L`) — typed (T17.2).
397 pub mode: LeapSourceMode,
398 /// SHA-256 of the leap-source file, when `mode == "file"` and the path was available.
399 pub sha256: Option<String>,
400 pub entry_count: usize,
401 pub expires: bool,
402 pub rolling_entries: usize,
403}
404
405/// The build-profile identity of *this run* (T12.2) — structured fields, never a vague label. Only
406/// the `DATAFORM` encoding axes (`rearguard`/`vanguard`) are recorded here as `"unknown"` (no
407/// deterministic detector yet — kept explicit rather than guessed; T12.5d). The source-*membership*
408/// axes (`backward`, `backzone`, `PACKRATLIST` scope) moved to [`SourceProfile`] as reconciled
409/// evidence axes (T12.4d/T12.5b/T12.5c) — they are never build_profile placeholders.
410#[derive(Debug, Clone)]
411pub struct BuildProfile {
412 /// `Posix` (no leap table) or `Right` (leap table applied) — typed (T17.2).
413 pub output_tree: OutputTree,
414 pub leap_source: LeapSourceInfo,
415 /// Semantic emission identity, the typed [`crate::EmitStyle`] (T17.2; was a re-stringified `String`).
416 /// `--emit-style zic-slim` and `-b slim` map to the same value; rendered to its manifest literal at the
417 /// JSON boundary by the module-private `emit_style_str` (the enum is the source of truth, not a copy).
418 pub emit_style: crate::EmitStyle,
419 /// `-r` range, as `(lo, hi)` raw `@`-instants (`None` = no truncation).
420 pub range: Option<(Option<i64>, Option<i64>)>,
421 /// `-R` redundant-tail bound (`@`-instant), if any.
422 pub redundant_until: Option<i64>,
423 /// Link materialisation policy of this run — the typed [`crate::LinkMode`] (T17.2; was a
424 /// re-stringified `String`). Rendered `"copy"`/`"symlink"` at the boundary via [`crate::LinkMode::as_str`].
425 pub link_mode: crate::LinkMode,
426}
427
428// ── T17.2 (CONTRACT.TYPING) — the manifest's remaining finite claim-bearing vocabularies, born typed. ──
429//
430// Each of these was a free `String`/`&'static str` whose value came from a *closed* set but was emitted
431// as prose, so a future code path (or a careless edit) could leak an unintended value into the public
432// `zic-rs-compile-manifest-v8` JSON. Per the standing rule — *prose is the weakest guarantee; an
433// exhaustive `match` that won't compile if a variant is unclassified is the strongest* — they are now
434// enums owning their JSON literal via `as_str()`. The emitted strings are **byte-identical** to the
435// previous output (the manifest tests + the conformance golden pin them), so no schema bumps.
436
437/// Output-tree profile of *this run* (T12.2; typed at T17.2 — was `&'static str`). `posix` = no leap
438/// table applied; `right` = a leap table (`-L`) was applied. This is an *output-identity* claim a
439/// reproducer/report reader relies on, so it is owned by the enum, not a literal.
440#[derive(Debug, Clone, Copy, PartialEq, Eq)]
441pub enum OutputTree {
442 /// No leap table (the default / `posix` profile).
443 Posix,
444 /// A leap table was applied (the `right/` profile).
445 Right,
446}
447
448impl OutputTree {
449 /// The manifest literal (`"posix"` / `"right"`).
450 pub fn as_str(self) -> &'static str {
451 match self {
452 OutputTree::Posix => "posix",
453 OutputTree::Right => "right",
454 }
455 }
456}
457
458/// Leap-source mode of *this run* (T12.2; typed at T17.2 — was `&'static str`). Describes the run, never
459/// the project's capabilities — `None` for an ordinary compile, never an aspirational `"unsupported"`.
460#[derive(Debug, Clone, Copy, PartialEq, Eq)]
461pub enum LeapSourceMode {
462 /// No leap source (ordinary / `posix`).
463 None,
464 /// A leap-seconds file was supplied (`-L`, the `right/` profile).
465 File,
466}
467
468impl LeapSourceMode {
469 /// The manifest literal (`"none"` / `"file"`).
470 pub fn as_str(self) -> &'static str {
471 match self {
472 LeapSourceMode::None => "none",
473 LeapSourceMode::File => "file",
474 }
475 }
476}
477
478/// Structural input *form* of *this run* (T12.3; typed at T17.2 — was `String`). **Form only** — it
479/// never implies source-set *membership* (`backward`/`backzone` are reconciled evidence axes elsewhere).
480#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum SourceInputKind {
482 /// Exactly one `.zi` file (e.g. the zishrunk `tzdata.zi`).
483 TzdataZi,
484 /// Two or more source files.
485 MultiFile,
486 /// Exactly one non-`.zi` file.
487 SingleFile,
488 /// No input files.
489 Unknown,
490}
491
492impl SourceInputKind {
493 /// The manifest literal.
494 pub fn as_str(self) -> &'static str {
495 match self {
496 SourceInputKind::TzdataZi => "tzdata_zi",
497 SourceInputKind::MultiFile => "multi_file",
498 SourceInputKind::SingleFile => "single_file",
499 SourceInputKind::Unknown => "unknown",
500 }
501 }
502
503 /// Every variant, in stable order — for the totality test.
504 pub const ALL: [SourceInputKind; 4] = [
505 SourceInputKind::TzdataZi,
506 SourceInputKind::MultiFile,
507 SourceInputKind::SingleFile,
508 SourceInputKind::Unknown,
509 ];
510}
511
512/// The oracle **verdict** vocabulary of a manifest (T17.2 — was a free `String`). A bare `compile`
513/// never runs the oracle, so the only value today is [`OracleVerdict::NotRun`] (rendered `"not-run"`,
514/// preserving the legacy literal). Born typed so a future verdict (e.g. match / mismatch, if a manifest
515/// path ever runs the oracle) cannot enter as an unconstrained string — distinct from the *mode* axis
516/// ([`OracleMode`]), which says *which* oracle, not *what it concluded*.
517#[derive(Debug, Clone, Copy, PartialEq, Eq)]
518pub enum OracleVerdict {
519 /// The oracle was not run for this invocation (the honest default for `compile`).
520 NotRun,
521}
522
523impl OracleVerdict {
524 /// The manifest literal (`"not-run"` — hyphen preserved from the pre-T17.2 string).
525 pub fn as_str(self) -> &'static str {
526 match self {
527 OracleVerdict::NotRun => "not-run",
528 }
529 }
530}
531
532/// Render the typed [`crate::EmitStyle`] to its manifest literal (T17.2). The manifest now stores the
533/// enum directly (no re-stringified copy that could drift); this boundary fn owns the literal.
534fn emit_style_str(s: crate::EmitStyle) -> &'static str {
535 match s {
536 crate::EmitStyle::Default => "default",
537 crate::EmitStyle::ZicSlim => "zic-slim",
538 crate::EmitStyle::ZicFat => "zic-fat",
539 }
540}
541
542/// The link / alias identity of *this run* (T12.4b) — counts + stable hashes that bind the
543/// build to its `alias-map.json`. **Links are output identifiers, not source-set evidence**: this
544/// block never infers `backward`/`backzone` membership from the alias set (that is the build
545/// profile's `"unknown"` axis until T12.4d gives it hash-backed evidence).
546///
547/// `selected` / `omitted` / `failed` are kept **distinct** (panel rule):
548/// - **selected** — a parsed `Link` whose resolved canonical zone *is* in the compiled output set
549/// (eligible and materialised). In zic-rs a selected link always materialises (a write failure
550/// aborts the whole run), so `links_selected_count` and the db-link share of
551/// `links_materialized_count` coincide — they differ only by install-policy links like
552/// `localtime` (materialised but not a source `Link`).
553/// - **omitted** — a *valid* parsed `Link` not materialised because selection/profile excluded its
554/// target from the output set. A policy outcome, **not** an error.
555/// - **failed** — a `Link` whose chain does not terminate at a real zone (missing target / cycle).
556/// An error class, never folded into `omitted`. (A bare successful `compile` writes no manifest
557/// if a link fatally fails, so this is normally 0; it is recorded for completeness.)
558#[derive(Debug, Clone)]
559pub struct LinkProfile {
560 /// `"copy"` or `"symlink"` — how links were materialised this run.
561 pub link_policy: String,
562 /// Canonical zones actually compiled this run.
563 pub zones_compiled_count: usize,
564 /// Parsed `Link`s eligible & materialised (resolved target in the compiled set).
565 pub links_selected_count: usize,
566 /// Links actually written to the output tree (includes install-policy links like `localtime`).
567 pub links_materialized_count: usize,
568 /// Valid links excluded by selection/profile (target not compiled). Policy, not error.
569 pub links_omitted_count: usize,
570 /// Links whose chain does not resolve to a real zone (missing/cycle/self). Error, not omission.
571 pub links_failed_count: usize,
572 /// SHA-256 of the **deterministic** `alias-map.json` serialization (sorted by identifier, fixed
573 /// field order, LF, no timestamps) — binds this manifest to a specific alias map.
574 pub alias_map_sha256: String,
575 /// SHA-256 over the sorted selected-link names (LF-joined) — order-independent set identity.
576 pub selected_links_sha256: String,
577 /// SHA-256 over the sorted omitted-link names (LF-joined).
578 pub omitted_links_sha256: String,
579}
580
581/// What the *admitted source evidence* mechanically proves about whether the `backward` source
582/// participated in this build (T12.4d). **Bounded to an admitted artifact** — never a universal
583/// claim and never inferred from the alias/link surface.
584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
585pub enum BackwardDetected {
586 /// The admitted backward source's bytes are present among `source_inputs` (hash-backed).
587 Present,
588 /// The admitted backward source's bytes are **not** in the build (hash-backed). This is absence
589 /// *of the admitted artifact*, not a proof that "no backward data exists anywhere".
590 Absent,
591 /// No backward source was admitted, so nothing is mechanically proven. The honest default.
592 Unknown,
593}
594
595/// What the build/user/config **explicitly asserts** about `backward` membership (T12.4d) — a bare
596/// claim, recorded separately from (and never trusted as) detection.
597#[derive(Debug, Clone, Copy, PartialEq, Eq)]
598pub enum BackwardClaim {
599 Included,
600 Excluded,
601 /// No claim was made.
602 None,
603}
604
605/// The `backward` **evidence axis** (T12.4d) — mirrors T12.2's detected-vs-claimed version
606/// reconciliation. **The alias surface is output identity, not source provenance:** a build can
607/// expose legacy-looking aliases without proving the tzdb `backward` source participated, and the
608/// absence of such aliases does not prove `backward` was excluded. So `backward` is recorded as a
609/// reconciled evidence axis, **never a boolean**, and stays `Unknown` unless admitted by hash-backed
610/// evidence or an explicit claim. **Admission law — `backward` status may be admitted only from
611/// (1) hash-backed source evidence or (2) an explicit claim; it must NOT be inferred from alias
612/// count, alias names, output filenames, source filenames alone, link target names, selected/
613/// omitted/failed link counts, or legacy-looking identifiers.**
614#[derive(Debug, Clone)]
615pub struct BackwardEvidence {
616 pub detected: BackwardDetected,
617 pub claimed: BackwardClaim,
618 /// SHA-256 of the *admitted* backward source (present or absent), when one was admitted. This is
619 /// the hash whose presence/absence in `source_inputs` produced `detected`.
620 pub evidence_sha256: Option<String>,
621}
622
623impl BackwardEvidence {
624 /// Build the evidence axis from the admitted inputs (T12.4d). **This is the entire admission
625 /// law in code:** detection comes *only* from an admitted backward `source` file, hash-checked
626 /// against the build's `source_inputs`:
627 /// - admitted file's bytes are among `source_inputs` → `Present` (hash-backed);
628 /// - admitted file's bytes are **not** among them → `Absent` (hash-backed; bounded to *this*
629 /// artifact — never a universal "no backward exists" claim);
630 /// - no file admitted → `Unknown` (we refuse to infer from alias counts, names, or filenames).
631 ///
632 /// The bare `claim` is recorded independently and never promoted to detection. Nothing here reads
633 /// the link/alias surface, so `backward` can never be inferred from it.
634 pub fn reconcile(source_inputs: &SourceInputs, args: &SourceVariantArgs) -> Result<Self> {
635 let claimed = match args.backward_claim {
636 Some(true) => BackwardClaim::Included,
637 Some(false) => BackwardClaim::Excluded,
638 None => BackwardClaim::None,
639 };
640 let (detected, evidence_sha256) = match &args.backward_source {
641 Some(path) => {
642 let bytes = std::fs::read(path).map_err(|e| Error::io(path, e))?;
643 let h = sha256_hex(&bytes);
644 let present = source_inputs.files.iter().any(|f| f.sha256 == h);
645 let d = if present {
646 BackwardDetected::Present
647 } else {
648 BackwardDetected::Absent
649 };
650 (d, Some(h))
651 }
652 None => (BackwardDetected::Unknown, None),
653 };
654 Ok(BackwardEvidence {
655 detected,
656 claimed,
657 evidence_sha256,
658 })
659 }
660
661 /// Reconcile detected vs claimed into a single status string (cf. [`TzdbProvenance::version_status`]).
662 /// Detection always outranks a bare claim for the agreement/conflict verdicts; an unverified claim
663 /// is explicitly labelled `*_unverified` so it can never be mistaken for a detected fact.
664 pub fn status(&self) -> &'static str {
665 use BackwardClaim as C;
666 use BackwardDetected as D;
667 match (self.detected, self.claimed) {
668 (D::Present, C::Included) | (D::Absent, C::Excluded) => "detected_matches_claim",
669 (D::Present, C::Excluded) | (D::Absent, C::Included) => "detected_contradicts_claim",
670 (D::Present, C::None) => "detected_present",
671 (D::Absent, C::None) => "detected_absent",
672 (D::Unknown, C::Included) => "claimed_present_unverified",
673 (D::Unknown, C::Excluded) => "claimed_absent_unverified",
674 (D::Unknown, C::None) => "unknown_no_evidence",
675 }
676 }
677
678 fn detected_str(&self) -> &'static str {
679 match self.detected {
680 BackwardDetected::Present => "present",
681 BackwardDetected::Absent => "absent",
682 BackwardDetected::Unknown => "unknown",
683 }
684 }
685
686 fn claimed_str(&self) -> &'static str {
687 match self.claimed {
688 BackwardClaim::Included => "included",
689 BackwardClaim::Excluded => "excluded",
690 BackwardClaim::None => "none",
691 }
692 }
693}
694
695/// SHA-256 of the **pristine IANA tzdb 2026b `backzone` file**, admitted + signature-verified +
696/// pinned in T12.5a.2 (`reports/t12_5a2-reference-admission.md`). The `backzone` evidence detector
697/// (T12.5b) checks whether *this exact file* participated in a build's `source_inputs` — hash-backed,
698/// version-scoped to 2026b (a later release has a different hash and needs its own admission).
699pub const REF_2026B_BACKZONE_SHA256: &str =
700 "63fb39adae0b0d8b2179629725a9dfb694c7a386b99750b636a017d896d28dfa";
701
702/// What the admitted source evidence proves about `backzone`/`PACKRATDATA` participation (T12.5b).
703/// **Presence is hash-backed; absence is NOT asserted** — the canonical `backzone` file not appearing
704/// among `source_inputs` does *not* prove backzone data is absent (it can be merged into a
705/// concatenated `.zi`), so non-presence is `Unknown`, never a false "absent".
706#[derive(Debug, Clone, Copy, PartialEq, Eq)]
707pub enum BackzoneDetected {
708 /// The pinned reference `backzone` file's bytes are present among `source_inputs` (hash-backed).
709 Present,
710 /// No hash-backed evidence either way (canonical file not seen; may be merged — cannot conclude).
711 Unknown,
712}
713
714/// What the build/user explicitly asserts about `backzone` membership (T12.5b) — recorded separately
715/// from detection, never promoted to it.
716#[derive(Debug, Clone, Copy, PartialEq, Eq)]
717pub enum BackzoneClaim {
718 Included,
719 Excluded,
720 None,
721}
722
723/// The `backzone` / `PACKRATDATA` **source-membership evidence axis** (T12.5b) — mirrors
724/// `BackwardEvidence`, but detection is anchored to the *pinned reference release's* `backzone` hash
725/// ([`REF_2026B_BACKZONE_SHA256`]). **Source membership, hash-backed or claim-only, never inferred**
726/// from aliases, zone names, link counts, output byte shape, pre-1970 differences, or `DATAFORM`.
727/// **Scope:** whether `backzone` participated at all — the *subset-vs-all* (`PACKRATLIST`) distinction
728/// is T12.5c, and `DATAFORM` is T12.5d.
729#[derive(Debug, Clone)]
730pub struct BackzoneEvidence {
731 pub detected: BackzoneDetected,
732 pub claimed: BackzoneClaim,
733 /// The pinned reference `backzone` hash, when detected present.
734 pub evidence_sha256: Option<String>,
735}
736
737impl BackzoneEvidence {
738 /// Detect `backzone` participation by checking whether the pinned reference `backzone` hash
739 /// (`reference_backzone_sha256`) appears among `source_inputs`. Hash-backed, version-scoped. The
740 /// claim is recorded independently. **Does not read the link/alias surface — inference is
741 /// impossible by construction.** (`reference_backzone_sha256` is injected so the detector is unit-
742 /// testable without vendoring the large reference file; production passes [`REF_2026B_BACKZONE_SHA256`].)
743 pub fn reconcile(
744 source_inputs: &SourceInputs,
745 claim: Option<bool>,
746 reference_backzone_sha256: &str,
747 ) -> Self {
748 let claimed = match claim {
749 Some(true) => BackzoneClaim::Included,
750 Some(false) => BackzoneClaim::Excluded,
751 None => BackzoneClaim::None,
752 };
753 let present = source_inputs
754 .files
755 .iter()
756 .any(|f| f.sha256 == reference_backzone_sha256);
757 let (detected, evidence_sha256) = if present {
758 (
759 BackzoneDetected::Present,
760 Some(reference_backzone_sha256.to_string()),
761 )
762 } else {
763 (BackzoneDetected::Unknown, None)
764 };
765 BackzoneEvidence {
766 detected,
767 claimed,
768 evidence_sha256,
769 }
770 }
771
772 /// Reconcile detected vs claimed (cf. [`BackwardEvidence::status`]). No `detected_absent` —
773 /// absence is never asserted for `backzone` (see [`BackzoneDetected`]).
774 pub fn status(&self) -> &'static str {
775 use BackzoneClaim as C;
776 use BackzoneDetected as D;
777 match (self.detected, self.claimed) {
778 (D::Present, C::Included) => "detected_matches_claim",
779 (D::Present, C::Excluded) => "detected_contradicts_claim",
780 (D::Present, C::None) => "detected_present",
781 (D::Unknown, C::Included) => "claimed_present_unverified",
782 (D::Unknown, C::Excluded) => "claimed_absent_unverified",
783 (D::Unknown, C::None) => "unknown_no_evidence",
784 }
785 }
786
787 fn detected_str(&self) -> &'static str {
788 match self.detected {
789 BackzoneDetected::Present => "present",
790 BackzoneDetected::Unknown => "unknown",
791 }
792 }
793
794 fn claimed_str(&self) -> &'static str {
795 match self.claimed {
796 BackzoneClaim::Included => "included",
797 BackzoneClaim::Excluded => "excluded",
798 BackzoneClaim::None => "none",
799 }
800 }
801}
802
803/// SHA-256 of the **pristine IANA tzdb 2026b `zone.tab`** (admitted + pinned in T12.5a.2). It is the
804/// canonical `PACKRATLIST` subset-selector. **Pinned for the test fixture / documentation only** — the
805/// detector does *not* trigger on `zone.tab` merely appearing among inputs (it is a normal selection
806/// table whose presence proves nothing about `PACKRATLIST`); see [`PackratlistEvidence`].
807pub const REF_2026B_ZONE_TAB_SHA256: &str =
808 "4d8e389e5f4b0ec0466d5b14f42e5dfb0308c4376165fcf478339afd9ddcb00c";
809
810/// What the admitted evidence proves about the **`backzone` *scope*** (`PACKRATLIST`) — T12.5c.
811/// **Category boundary (the empirical finding):** `PACKRATLIST` is a *generation-policy* input, **not a
812/// `zic` compile source** — its list (`zone.tab`) filters `backzone` at generation time and is baked
813/// into the produced `.zi`. So scope is **not recoverable from `source_inputs` (compile inputs)**:
814/// `zone.tab` appearing among inputs would be a category error to read as evidence, and absence proves
815/// nothing. The only hash-backed detection is **`SubsetFromPolicyInput`** — an *explicitly admitted*
816/// `PACKRATLIST` selector (`--packratlist-source`) hashed as a **policy input**, alongside a present
817/// `backzone`. Everything else is **`Unknown`**; `full`/`none` are claim-only.
818#[derive(Debug, Clone, Copy, PartialEq, Eq)]
819pub enum PackratlistDetected {
820 /// A hash-backed `PACKRATLIST` **policy input** was admitted alongside a present `backzone`
821 /// (bounded to that artifact — does **not** assert the generation step applied the filter).
822 SubsetFromPolicyInput,
823 /// No hash-backed scope evidence (no admitted policy input, or no backzone).
824 Unknown,
825}
826
827/// What the build/user explicitly asserts about `backzone` scope (T12.5c). `--packratlist
828/// {full|subset|none}`: `full` = all backzone (`PACKRATLIST` empty), `subset` = filtered
829/// (`PACKRATLIST=zone.tab`), `none` = no backzone (`PACKRATDATA` empty).
830#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831pub enum PackratlistClaim {
832 Full,
833 Subset,
834 None,
835 NotClaimed,
836}
837
838/// The `PACKRATLIST` **backzone-scope evidence axis** (T12.5c) — mirrors the other axes
839/// (`detected`/`claimed`/`status`/`evidence_sha256`). **Subset is the only hash-backed detection**
840/// (admitted subset-list participated + backzone present); never inferred from output zone counts,
841/// alias counts, filenames, `zone.tab`/`zone1970.tab` presence alone, link counts, pre-1970
842/// differences, or global-tz-like output shape.
843#[derive(Debug, Clone)]
844pub struct PackratlistEvidence {
845 pub detected: PackratlistDetected,
846 pub claimed: PackratlistClaim,
847 /// The admitted `PACKRATLIST`-source hash, when a subset was hash-detected.
848 pub evidence_sha256: Option<String>,
849}
850
851impl PackratlistEvidence {
852 /// Build the scope axis from a **generation-policy input** (T12.5c). **Category boundary (the
853 /// empirical finding):** `PACKRATLIST` is a *generation-time* selector (the Makefile filters
854 /// `backzone` *before* `zic`), and its list (`zone.tab`) is **not a compilable `zic` source** — so
855 /// it does **not** belong in `source_inputs` (the compile inputs) and detection must **never** be
856 /// keyed off `source_inputs` membership (that would be a category error). Instead, detection is
857 /// keyed off an **explicitly admitted policy input**: `admitted_policy_input_sha256` is the
858 /// SHA-256 of a `--packratlist-source` selector the caller supplied (`None` = none admitted).
859 /// `Subset` (from a policy input) only when such a selector is admitted **and** `backzone` is
860 /// present (a subset list is meaningless without backzone data). Everything else is `Unknown`;
861 /// `full`/`none` are claim-only. Nothing here reads compile inputs, the link/alias surface, or
862 /// output shape. **Bounded meaning:** an admitted hash-backed selector is stronger than a bare
863 /// claim, but does **not** prove the generation step actually applied the filter.
864 pub fn reconcile(
865 claim: Option<&str>,
866 admitted_policy_input_sha256: Option<&str>,
867 reference_zone_tab_sha256: &str,
868 backzone_present: bool,
869 ) -> Self {
870 let claimed = match claim {
871 Some("full") => PackratlistClaim::Full,
872 Some("subset") => PackratlistClaim::Subset,
873 Some("none") => PackratlistClaim::None,
874 _ => PackratlistClaim::NotClaimed,
875 };
876 // Version-scoped + category-correct: `SubsetFromPolicyInput` only when the admitted policy
877 // input **is the pinned reference `zone.tab`** (hash match) AND `backzone` is present. An
878 // arbitrary admitted file, a different release's table, or no backzone → `Unknown`. We never
879 // look at `source_inputs` (compile inputs) — `zone.tab` is a generation-policy input.
880 let detected_subset =
881 backzone_present && admitted_policy_input_sha256 == Some(reference_zone_tab_sha256);
882 let (detected, evidence_sha256) = if detected_subset {
883 (
884 PackratlistDetected::SubsetFromPolicyInput,
885 Some(reference_zone_tab_sha256.to_string()),
886 )
887 } else {
888 (PackratlistDetected::Unknown, None)
889 };
890 PackratlistEvidence {
891 detected,
892 claimed,
893 evidence_sha256,
894 }
895 }
896
897 /// Reconcile detected vs claimed (cf. [`BackzoneEvidence::status`]). A bare claim with no admitted
898 /// policy input is `claimed_*_not_hash_backed` — never promoted to detection.
899 pub fn status(&self) -> &'static str {
900 use PackratlistClaim as C;
901 use PackratlistDetected as D;
902 match (self.detected, &self.claimed) {
903 (D::SubsetFromPolicyInput, C::Subset) => "detected_matches_claim",
904 (D::SubsetFromPolicyInput, C::Full) | (D::SubsetFromPolicyInput, C::None) => {
905 "detected_contradicts_claim"
906 }
907 (D::SubsetFromPolicyInput, C::NotClaimed) => "detected_subset_from_policy_input",
908 (D::Unknown, C::Full) => "claimed_full_not_hash_backed",
909 (D::Unknown, C::Subset) => "claimed_subset_not_hash_backed",
910 (D::Unknown, C::None) => "claimed_none_not_hash_backed",
911 (D::Unknown, C::NotClaimed) => "unknown_no_evidence",
912 }
913 }
914
915 fn detected_str(&self) -> &'static str {
916 match self.detected {
917 PackratlistDetected::SubsetFromPolicyInput => "subset_from_policy_input",
918 PackratlistDetected::Unknown => "unknown",
919 }
920 }
921
922 fn claimed_str(&self) -> &'static str {
923 match self.claimed {
924 PackratlistClaim::Full => "full",
925 PackratlistClaim::Subset => "subset",
926 PackratlistClaim::None => "none",
927 PackratlistClaim::NotClaimed => "not_claimed",
928 }
929 }
930}
931
932// ---------------------------------------------------------------------------------------------
933// DATAFORM (`main`/`vanguard`/`rearguard`) — the *encoding* evidence axis (T12.5d).
934//
935// **Category (the clean mental model):** `backzone` = source-*membership* evidence; `zone.tab` =
936// generation-*policy* evidence; `vanguard.zi`/`main.zi`/`rearguard.zi` = **generated-artifact**
937// evidence; `DATAFORM` = the upstream *encoding-policy* those artifacts realise. Crucially, unlike
938// `PACKRATLIST`'s `zone.tab` (a non-compilable policy table), the three `.zi` artifacts **are
939// compilable `zic` sources** — so DATAFORM detection is *category-correct* from `source_inputs`
940// membership (it mirrors `backzone`, not `packratlist`): if you compiled `vanguard.zi`, its bytes
941// are a `source_input`, and that is the only honest hash-backed signal of the encoding form.
942//
943// **Central law:** DATAFORM is admitted **only** by a hash-backed match against the pinned 2026b
944// generated artifacts, or by an explicit claim — **never** by inspecting source syntax (mainline
945// 2026b already uses negative `SAVE`, so "negative SAVE ⇒ vanguard" is provably wrong), output
946// shape, zone names, filenames, `PACKRATLIST`/`backzone`, or diagnostic behaviour. `ziguard.awk` is
947// **not** treated as a general converter (it targets *current* tzdata, is neither idempotent nor
948// reversible); the `.zi` witnesses are recorded as *generated reference artifacts* with a
949// `recipe_hash`, not as something zic-rs can reproduce or transform.
950
951/// SHA-256 of the pinned 2026b complete-distribution archive (`tzdb-2026b.tar.lz`), admitted +
952/// signature-verified in T12.5a.2. A `recipe_hash` input — it transitively binds every shipped file
953/// (`Makefile`, `ziguard.awk`, the region sources) that the DATAFORM generation consumed.
954pub const REF_2026B_ARCHIVE_SHA256: &str =
955 "ffad46a04c8d1624197056630af475a35f3556d0887f028ac1bd33b7d47dc653";
956
957/// SHA-256 of the pinned 2026b `Makefile` (the `DATAFORM`/`ziguard.awk` generation rules). A
958/// `recipe_hash` input.
959pub const REF_2026B_MAKEFILE_SHA256: &str =
960 "0b4588ea467c969b23fc48335e91eb63f403574b4aac69380b84a00373c7e81d";
961
962/// SHA-256 of the pinned 2026b `ziguard.awk` (the DATAFORM transform). A `recipe_hash` input —
963/// pinned explicitly even though it ships inside the archive, so the recipe binding is legible.
964pub const REF_2026B_ZIGUARD_AWK_SHA256: &str =
965 "e4600a2360b692242d6da76666411ece8ada76b61e6f8fb69cec79592b261785";
966
967/// The exact `make` invocation that generated the pinned DATAFORM `.zi` artifacts. A `recipe_hash`
968/// input — changing the command changes the recipe identity.
969pub const REF_2026B_DATAFORM_COMMAND: &str = "make vanguard.zi main.zi rearguard.zi";
970
971/// The toolchain that ran the DATAFORM generation (recorded because the `.zi` witnesses are
972/// *derived* — a different awk could in principle differ). A `recipe_hash` input.
973pub const REF_2026B_DATAFORM_TOOLCHAIN: &str = "GNU Make 4.4.1; GNU Awk 5.4.0";
974
975/// A short, stable tag for the release the pinned DATAFORM artifacts were generated from. Stamped
976/// into `generated_from` when a form is detected.
977pub const REF_2026B_DATAFORM_GENERATED_FROM: &str = "tzdb-2026b";
978
979/// SHA-256 of the pinned 2026b `main.zi` (the default `DATAFORM=main` generated artifact, T12.5a.2).
980pub const REF_2026B_MAIN_ZI_SHA256: &str =
981 "e0225823ae0c3a99a016a4afd7e3c48cfd948132b65fbaa596a47c53ae45e4e1";
982
983/// SHA-256 of the pinned 2026b `vanguard.zi` (`DATAFORM=vanguard`).
984pub const REF_2026B_VANGUARD_ZI_SHA256: &str =
985 "49e16da4a6252a2e432fc1f68bf6daac9a6f73507dde3e3bdbcbbf78e86727ce";
986
987/// SHA-256 of the pinned 2026b `rearguard.zi` (`DATAFORM=rearguard`).
988pub const REF_2026B_REARGUARD_ZI_SHA256: &str =
989 "91c4f362a6bb297efd3cd35bce6b62367a4c00a9721a773bae0cbb0d1bf9fe23";
990
991/// Compute the **`recipe_hash`** that binds the *generation provenance* of the pinned DATAFORM `.zi`
992/// artifacts (T12.5d). It is a SHA-256 over a deterministic, labelled, newline-joined record of the
993/// recipe inputs — the archive hash, the `Makefile` hash, the `ziguard.awk` hash, the generation
994/// command, and the toolchain — **hashed as raw UTF-8 bytes, never line-ending-normalized** (a
995/// transformed copy with different newline bytes is a *different* artifact and must hash differently).
996/// The produced artifact itself is bound separately via the evidence axis's `evidence_sha256`; this
997/// value answers "by what recipe was that artifact generated", so a generated artifact is never just
998/// "hash matched" — it is "hash matched, and here is the recorded, reproducible recipe".
999pub fn dataform_recipe_hash(
1000 archive_sha256: &str,
1001 makefile_sha256: &str,
1002 ziguard_awk_sha256: &str,
1003 command: &str,
1004 toolchain: &str,
1005) -> String {
1006 let recipe = format!(
1007 "archive_sha256={archive_sha256}\nmakefile_sha256={makefile_sha256}\n\
1008 ziguard_awk_sha256={ziguard_awk_sha256}\ncommand={command}\ntoolchain={toolchain}\n"
1009 );
1010 crate::hash::sha256_hex(recipe.as_bytes())
1011}
1012
1013/// The pinned-release DATAFORM reference, injected into [`DataformEvidence::reconcile`] so the
1014/// detector is unit-testable without vendoring the large `.zi` files. Production builds this from the
1015/// `REF_2026B_*` consts plus the computed [`dataform_recipe_hash`].
1016#[derive(Debug, Clone, Copy)]
1017pub struct DataformReference<'a> {
1018 pub main_sha256: &'a str,
1019 pub vanguard_sha256: &'a str,
1020 pub rearguard_sha256: &'a str,
1021 /// The shared generation recipe hash, stamped into the evidence when a form is detected.
1022 pub recipe_hash: &'a str,
1023 /// The release tag the artifacts were generated from (e.g. `"tzdb-2026b"`).
1024 pub generated_from: &'a str,
1025}
1026
1027/// Which **encoding form** the admitted source bytes match (T12.5d) — hash-backed against the pinned
1028/// generated artifacts, **never** inferred from syntax/output/names.
1029#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1030pub enum DataformDetected {
1031 Main,
1032 Vanguard,
1033 Rearguard,
1034 /// No admitted source matched a pinned DATAFORM artifact hash (e.g. a zishrunk `tzdata.zi`, a
1035 /// concatenated build, or a different release) — the encoding form is not hash-recoverable.
1036 Unknown,
1037}
1038
1039/// What the build/user explicitly asserts about the encoding form (`--dataform
1040/// {main|vanguard|rearguard}`), recorded separately from detection and never promoted to it.
1041#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1042pub enum DataformClaim {
1043 Main,
1044 Vanguard,
1045 Rearguard,
1046 None,
1047}
1048
1049/// The `DATAFORM` **encoding evidence axis** (T12.5d). Mirrors the other axes
1050/// (`detected`/`claimed`/`status`/`evidence_sha256`) and adds two provenance fields specific to a
1051/// *generated* artifact: `recipe_hash` (how it was produced) and `generated_from` (which release).
1052/// **Encoding evidence — hash-backed or claim-only, never inferred** from negative `SAVE` or any
1053/// other syntax resemblance, output shape, zone names, source filenames, `PACKRATLIST`, `backzone`,
1054/// or diagnostics.
1055#[derive(Debug, Clone)]
1056pub struct DataformEvidence {
1057 pub detected: DataformDetected,
1058 pub claimed: DataformClaim,
1059 /// The matched pinned artifact hash, when a form was detected.
1060 pub evidence_sha256: Option<String>,
1061 /// The generation `recipe_hash` of the matched artifact, when detected (see [`dataform_recipe_hash`]).
1062 pub recipe_hash: Option<String>,
1063 /// The release the matched artifact was generated from (e.g. `"tzdb-2026b"`), when detected.
1064 pub generated_from: Option<String>,
1065}
1066
1067impl DataformEvidence {
1068 /// Detect the encoding form by checking whether any `source_input`'s hash equals one of the
1069 /// pinned DATAFORM artifact hashes (`reference`). Category-correct: the `.zi` artifacts are
1070 /// compilable `zic` sources, so `source_inputs` membership is the honest signal (cf. `backzone`).
1071 /// The claim is recorded independently. **Reads only file hashes — never source syntax, output
1072 /// shape, names, or the link/alias surface, so inference is impossible by construction.** When a
1073 /// form is detected, the shared `recipe_hash`/`generated_from` are stamped so the artifact carries
1074 /// its generation provenance, not merely a matched hash.
1075 pub fn reconcile(
1076 source_inputs: &SourceInputs,
1077 claim: Option<&str>,
1078 reference: &DataformReference,
1079 ) -> Self {
1080 let claimed = match claim {
1081 Some("main") => DataformClaim::Main,
1082 Some("vanguard") => DataformClaim::Vanguard,
1083 Some("rearguard") => DataformClaim::Rearguard,
1084 _ => DataformClaim::None,
1085 };
1086 // First admitted source whose hash matches a pinned artifact wins; the three reference hashes
1087 // are distinct, so at most one form can match a given file.
1088 let mut detected = DataformDetected::Unknown;
1089 let mut evidence_sha256 = None;
1090 for f in &source_inputs.files {
1091 if f.sha256 == reference.main_sha256 {
1092 detected = DataformDetected::Main;
1093 } else if f.sha256 == reference.vanguard_sha256 {
1094 detected = DataformDetected::Vanguard;
1095 } else if f.sha256 == reference.rearguard_sha256 {
1096 detected = DataformDetected::Rearguard;
1097 } else {
1098 continue;
1099 }
1100 evidence_sha256 = Some(f.sha256.clone());
1101 break;
1102 }
1103 let (recipe_hash, generated_from) = if evidence_sha256.is_some() {
1104 (
1105 Some(reference.recipe_hash.to_string()),
1106 Some(reference.generated_from.to_string()),
1107 )
1108 } else {
1109 (None, None)
1110 };
1111 DataformEvidence {
1112 detected,
1113 claimed,
1114 evidence_sha256,
1115 recipe_hash,
1116 generated_from,
1117 }
1118 }
1119
1120 /// Reconcile detected vs claimed. A bare claim with no hash-backed detection is `claim_only` —
1121 /// never promoted to detection.
1122 pub fn status(&self) -> &'static str {
1123 use DataformClaim as C;
1124 use DataformDetected as D;
1125 let claim_form = match self.claimed {
1126 C::Main => Some(D::Main),
1127 C::Vanguard => Some(D::Vanguard),
1128 C::Rearguard => Some(D::Rearguard),
1129 C::None => None,
1130 };
1131 match (self.detected, claim_form) {
1132 (D::Unknown, None) => "unknown_no_evidence",
1133 (D::Unknown, Some(_)) => "claim_only",
1134 (_, None) => "detected_only",
1135 (d, Some(c)) if d == c => "detected_matches_claim",
1136 (_, Some(_)) => "detected_contradicts_claim",
1137 }
1138 }
1139
1140 fn detected_str(&self) -> &'static str {
1141 match self.detected {
1142 DataformDetected::Main => "main",
1143 DataformDetected::Vanguard => "vanguard",
1144 DataformDetected::Rearguard => "rearguard",
1145 DataformDetected::Unknown => "unknown",
1146 }
1147 }
1148
1149 fn claimed_str(&self) -> &'static str {
1150 match self.claimed {
1151 DataformClaim::Main => "main",
1152 DataformClaim::Vanguard => "vanguard",
1153 DataformClaim::Rearguard => "rearguard",
1154 DataformClaim::None => "none",
1155 }
1156 }
1157}
1158
1159/// The **source profile** block (T12.4d; extended T12.5b/c/d) — a deliberate extension seam. It
1160/// carries the `backward` (T12.4d), `backzone` (T12.5b), `packratlist` backzone-scope (T12.5c), and
1161/// `dataform` encoding (T12.5d) evidence axes, each detected/claimed/status, hash-backed or
1162/// claim-only, never inferred.
1163#[derive(Debug, Clone)]
1164pub struct SourceProfile {
1165 pub backward: BackwardEvidence,
1166 pub backzone: BackzoneEvidence,
1167 pub packratlist: PackratlistEvidence,
1168 pub dataform: DataformEvidence,
1169}
1170
1171/// Caller-supplied inputs for the source-variant evidence axes (T12.4d `backward`; T12.5b `backzone`;
1172/// T12.5c/d to come). **Provenance-only** — these never influence compilation, link materialisation,
1173/// or the alias map; they only feed the manifest's `source_profile`. The bare claims come from
1174/// `--backward`/`--backzone`; `--backward-source` admits a file whose bytes are hash-checked.
1175#[derive(Debug, Clone, Default)]
1176pub struct SourceVariantArgs {
1177 /// `backward` claim: `Some(true)` = claimed included, `Some(false)` = excluded, `None` = no claim.
1178 pub backward_claim: Option<bool>,
1179 /// A file the caller asserts is the `backward` source; detection verifies whether its *bytes*
1180 /// participated in this build (it does **not** assert semantic identity as the IANA `backward`).
1181 pub backward_source: Option<std::path::PathBuf>,
1182 /// `backzone` (`PACKRATDATA`) claim: `Some(true)` = claimed included, `Some(false)` = excluded,
1183 /// `None` = no claim. Detection is hash-anchored to the pinned reference release (T12.5b); this is
1184 /// the *claim* side only. (`PACKRATLIST` subset selection → T12.5c; `DATAFORM` → T12.5d.)
1185 pub backzone_claim: Option<bool>,
1186 /// `backzone` *scope* (`PACKRATLIST`) claim (T12.5c): `"full"` / `"subset"` / `"none"` (else no
1187 /// claim). The bare `--packratlist` assertion; never promoted to detection.
1188 pub packratlist_claim: Option<String>,
1189 /// A file the caller explicitly admits as the `PACKRATLIST` subset source (T12.5c); detection
1190 /// confirms its *bytes* participated alongside `backzone` (→ `Subset`). Mere `zone.tab` presence
1191 /// among inputs is **not** admission and never triggers `Subset`.
1192 pub packratlist_source: Option<std::path::PathBuf>,
1193 /// `DATAFORM` *encoding* claim (T12.5d): `"main"` / `"vanguard"` / `"rearguard"` (else no claim).
1194 /// The bare `--dataform` assertion; never promoted to detection. Detection is hash-backed against
1195 /// the pinned 2026b `.zi` artifacts via `source_inputs` membership — there is intentionally **no**
1196 /// `--dataform-source`: the `.zi` artifacts *are* compile inputs, so admitting one you did not
1197 /// compile would assert provenance for bytes the build never used.
1198 pub dataform_claim: Option<String>,
1199}
1200
1201// ===========================================================================================
1202// Provenance capability statement (T12.6) — a STATIC, run-independent description of the manifest
1203// schema this build emits and the **source-variant reference-pin gate** state. Surfaced read-only in
1204// `support-report`/`structural-report` so an operator/packager sees the trust boundary without
1205// reading manifest internals. It is deliberately NOT a per-run profile: a report run is not a
1206// configured output compile, so it has no honest `build_profile`/`link_profile`/`backward_evidence`
1207// of its own — those live in `compile --manifest` and are pointed to, never fabricated here.
1208// ===========================================================================================
1209
1210/// Status of the source-variant reference-pin gate (T12.5a.1 created it; T12.5a.2 lifted it).
1211/// `"lifted_for_2026b"` — the pristine IANA tzdb 2026b reference set was fetched, **signature-verified**,
1212/// and SHA-256-pinned (`reports/t12_5a2-reference-admission.md`), so T12.5b–d are unblocked **for that
1213/// pinned reference only**. Version-scoped: a later release re-opens the gate until its own admission.
1214/// Single source of truth for the reports' provenance block. *(Admission ≠ implementation — see
1215/// [`SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED`].)*
1216pub const SOURCE_VARIANT_GATE_STATUS: &str = "lifted_for_2026b";
1217
1218/// **Which oracle backed a report's verdicts** (T15.2 — CONTRACT.TYPING). The single owner type for the
1219/// oracle-mode vocabulary: as of **T15.2a**, [`OracleResult::mode`](OracleResult) is this enum too (no
1220/// claim-bearing path emits the vocabulary as a free string). Reports render [`mode_str`](Self::mode_str)
1221/// (canonical snake_case); the `zic-rs-compile-manifest-v8` `oracle.mode` field renders
1222/// [`manifest_str`](Self::manifest_str), a **boundary-only compatibility shim** that preserves the one
1223/// legacy value (`"not-run"`) the manifest has ever emitted — *removal plan:* canonicalize to `mode_str`
1224/// at the next manifest major bump (a drift test pins that the shim diverges for that one value only).
1225/// The rule it enforces: **oracle *absence* is visible** — a report renders `Unavailable(reason)` (→
1226/// `skipped_with_reason`), never silence, so a verdict can never *silently* weaken when reference tools
1227/// are missing.
1228#[derive(Debug, Clone, PartialEq, Eq)]
1229pub enum OracleMode {
1230 /// No oracle was consulted by design (e.g. `support-report` is compile-coverage, not behaviour).
1231 NotRun,
1232 /// Reference `zic`'s emitted bytes were the oracle (e.g. `structural-report`).
1233 ReferenceZic,
1234 /// Reference `zdump`'s decoded behaviour was the oracle (the `compare` zdump mode).
1235 ReferenceZdump,
1236 /// A decoded-TZif structural comparison (the `compare` structural mode).
1237 StructuralDecode,
1238 /// The required oracle tool was unavailable; the verdict was skipped, with this reason.
1239 Unavailable(String),
1240}
1241
1242impl OracleMode {
1243 /// The stable snake_case discriminant.
1244 pub fn mode_str(&self) -> &'static str {
1245 match self {
1246 OracleMode::NotRun => "not_run",
1247 OracleMode::ReferenceZic => "reference_zic",
1248 OracleMode::ReferenceZdump => "reference_zdump",
1249 OracleMode::StructuralDecode => "structural_decode",
1250 OracleMode::Unavailable(_) => "unavailable",
1251 }
1252 }
1253
1254 /// The reason an oracle was skipped, when (and only when) it was [`Unavailable`](Self::Unavailable).
1255 pub fn skipped_with_reason(&self) -> Option<&str> {
1256 match self {
1257 OracleMode::Unavailable(reason) => Some(reason.as_str()),
1258 _ => None,
1259 }
1260 }
1261
1262 /// The **`zic-rs-compile-manifest-v8` boundary** rendering (T15.2a compatibility shim). The manifest
1263 /// path only ever holds [`NotRun`](Self::NotRun) and has historically emitted `"not-run"`
1264 /// (hyphenated); that one value is preserved here for back-compat. Every other variant has no legacy
1265 /// manifest form (they never appeared there), so this is identical to [`mode_str`](Self::mode_str) for
1266 /// them — i.e. the shim diverges for exactly one value, which a drift test pins. Removal plan:
1267 /// canonicalize to `mode_str` at the next manifest major bump.
1268 pub fn manifest_str(&self) -> &'static str {
1269 match self {
1270 OracleMode::NotRun => "not-run",
1271 other => other.mode_str(),
1272 }
1273 }
1274
1275 /// Render as the report's `oracle_mode` object: `{ "mode": …, "skipped_with_reason": …|null }`.
1276 /// Absence is always visible — `skipped_with_reason` is non-null exactly when the oracle was missing.
1277 pub fn to_json_field(&self) -> String {
1278 let reason = match self.skipped_with_reason() {
1279 Some(r) => json_str(r),
1280 None => "null".to_string(),
1281 };
1282 format!(
1283 "{{ \"mode\": {}, \"skipped_with_reason\": {} }}",
1284 json_str(self.mode_str()),
1285 reason
1286 )
1287 }
1288}
1289
1290/// A **non-claim**, made a first-class machine-visible contract (T15.2). Advertised restraint is
1291/// engineering, not decoration: each variant renders a stable snake_case string **and** names the
1292/// guard/test/receipt that *enforces* the boundary (`enforced_by`). "We don't claim X" is exactly where
1293/// infrastructure tools get sloppy — this makes each non-claim auditable.
1294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1295pub enum NegativeCapability {
1296 DoesNotClaimAllIanaReleasesWithoutAdmission,
1297 DoesNotClaimArbitraryTzifRoundtrip,
1298 DoesNotClaimFullToctouResistance,
1299 DoesNotClaimFutureCivilTimeAuthority,
1300 /// zic-rs emits **discrete** TZif leap-second records; it never implements leap *smearing*.
1301 DoesNotClaimLeapSmearSemantics,
1302 /// The interaction of range truncation (`-r`) with leap-expiry has **no semantic witness** and is
1303 /// not claimed (the `Rolling`-leap-under-`-r` case is a hard error, not a parity claim).
1304 DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness,
1305 DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext,
1306 DoesNotClaimTzifValidatorAsSecuritySandbox,
1307 DoesNotClaimUnadmittedVendorParity,
1308 DoesNotCurateTimeOrDefineDisplayNames,
1309 /// TZif is big-endian on disk; zic-rs writes it big-endian regardless of host endianness.
1310 DoesNotDependOnHostEndianness,
1311 DoesNotInferDataformFromContent,
1312 DoesNotInferSourceVariantFromOutputShape,
1313 DoesNotRequireManifestToReadTzif,
1314 /// The core repo admits vendor-oracle *receipts*; it does not run/ship QEMU/VM labs (T16.5).
1315 DoesNotShipOrOperateVendorQemuLabsInCoreRepo,
1316 DoesNotTreatManifestAsTzifSemantics,
1317}
1318
1319impl NegativeCapability {
1320 /// The stable snake_case identifier (the report's `capability` field).
1321 pub fn as_str(self) -> &'static str {
1322 use NegativeCapability::*;
1323 match self {
1324 DoesNotClaimAllIanaReleasesWithoutAdmission => {
1325 "does_not_claim_all_iana_releases_without_admission"
1326 }
1327 DoesNotClaimArbitraryTzifRoundtrip => "does_not_claim_arbitrary_tzif_roundtrip",
1328 DoesNotClaimFullToctouResistance => "does_not_claim_full_toctou_resistance",
1329 DoesNotClaimFutureCivilTimeAuthority => "does_not_claim_future_civil_time_authority",
1330 DoesNotClaimLeapSmearSemantics => "does_not_claim_leap_smear_semantics",
1331 DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness => {
1332 "does_not_claim_range_truncation_leap_expiry_interaction_parity_without_witness"
1333 }
1334 DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext => {
1335 "does_not_claim_report_authenticity_without_signature_or_reproducible_context"
1336 }
1337 DoesNotClaimTzifValidatorAsSecuritySandbox => {
1338 "does_not_claim_tzif_validator_as_security_sandbox"
1339 }
1340 DoesNotClaimUnadmittedVendorParity => "does_not_claim_unadmitted_vendor_parity",
1341 DoesNotCurateTimeOrDefineDisplayNames => "does_not_curate_time_or_define_display_names",
1342 DoesNotDependOnHostEndianness => "does_not_depend_on_host_endianness",
1343 DoesNotInferDataformFromContent => "does_not_infer_dataform_from_content",
1344 DoesNotInferSourceVariantFromOutputShape => {
1345 "does_not_infer_source_variant_from_output_shape"
1346 }
1347 DoesNotRequireManifestToReadTzif => "does_not_require_manifest_to_read_tzif",
1348 DoesNotShipOrOperateVendorQemuLabsInCoreRepo => {
1349 "does_not_ship_or_operate_vendor_qemu_labs_in_core_repo"
1350 }
1351 DoesNotTreatManifestAsTzifSemantics => "does_not_treat_manifest_as_tzif_semantics",
1352 }
1353 }
1354
1355 /// The guard/test/receipt that **enforces** this non-claim (never empty — a non-claim without an
1356 /// enforcing reference would be decorative, which T15.2 forbids).
1357 pub fn enforced_by(self) -> &'static str {
1358 use NegativeCapability::*;
1359 match self {
1360 DoesNotClaimAllIanaReleasesWithoutAdmission => {
1361 "T12.5a.3 release-admission matrix (only 2026b admitted)"
1362 }
1363 DoesNotClaimArbitraryTzifRoundtrip => {
1364 "T15.4 tzif/rfc9636 (a validator/reader is not a round-trip preservation claim)"
1365 }
1366 DoesNotClaimFullToctouResistance => {
1367 "T14.6 hostile-output-tree ledger (RequiresOpenatStyleHardening)"
1368 }
1369 DoesNotClaimFutureCivilTimeAuthority => {
1370 "docs/tzdb-governance.md + RFC 9557 (tzdb predicts; named-tz rules change; not a legal oracle)"
1371 }
1372 DoesNotClaimLeapSmearSemantics => {
1373 "T11 emits discrete TZif leap-second records (compile::apply_leaps / LeapRecord); no smearing path exists"
1374 }
1375 DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness => {
1376 "T11.4 — Rolling-leap-under-`-r` is a hard error (compile/leap.rs); the -r×leap-expiry interaction has no semantic witness"
1377 }
1378 DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext => {
1379 "T15.5 ConformanceStatus.report_provenance (default unsigned_local_report — not an attestation)"
1380 }
1381 DoesNotClaimTzifValidatorAsSecuritySandbox => {
1382 "T15.4 tzif/rfc9636 non-claim (bounds-safe, but not a hardened sandbox for hostile binaries)"
1383 }
1384 DoesNotClaimUnadmittedVendorParity => {
1385 "T13 reference-platform diagnostic matrix (only upstream_iana_2026b admitted)"
1386 }
1387 DoesNotCurateTimeOrDefineDisplayNames => {
1388 "docs/tzdb-governance.md (IANA/CLDR boundary; not zic-rs's role)"
1389 }
1390 DoesNotDependOnHostEndianness => {
1391 "tzif/header.rs + data writers emit big-endian fixed-width fields (to_be_bytes); byte-identical Etc/UTC fixture pins it"
1392 }
1393 DoesNotInferDataformFromContent => {
1394 "T12.5d test (negative-SAVE is not vanguard; hash-backed only)"
1395 }
1396 DoesNotInferSourceVariantFromOutputShape => {
1397 "T12.5 source_variants_not_inferred_* tests"
1398 }
1399 DoesNotRequireManifestToReadTzif => {
1400 "RFC 9636 (a TZif reader needs only the emitted bytes; manifest is a sidecar)"
1401 }
1402 DoesNotShipOrOperateVendorQemuLabsInCoreRepo => {
1403 "T16.5 vendor_oracle — core defines/admits receipts only; no VM images/QEMU orchestration vendored"
1404 }
1405 DoesNotTreatManifestAsTzifSemantics => {
1406 "reports/t12-close-receipt.md §5 (manifest is provenance, not TZif semantics)"
1407 }
1408 }
1409 }
1410}
1411
1412/// The **evidence category** of an artifact — the T12 doctrine spine, made a typed report field (T15.3).
1413/// This is the typed guardrail the `zone.tab`-is-policy-not-compile error (T12.5c) earned: a claim-bearing
1414/// artifact must declare which category it belongs to, so input/policy/reference/generated/output kinds
1415/// can never be silently conflated. **The rule: no claim-bearing artifact enters a report without a
1416/// category owner.** (`semantic_witness` and `structural_validation` are distinct *output-evidence*
1417/// categories — a semantic witness proves selected behaviour under an oracle, NOT RFC 9636 structural
1418/// validity, which is `structural_validation` / T15.4.)
1419#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1420pub enum ArtifactCategory {
1421 CompileInput,
1422 PolicyInput,
1423 ReferenceInput,
1424 GeneratedArtifact,
1425 OutputArtifact,
1426 DiagnosticArtifact,
1427 SemanticWitnessArtifact,
1428 StructuralValidationArtifact,
1429 /// Non-compiling prose that is *policy* evidence (e.g. `theory.html` / Makefile knobs / NEWS).
1430 PolicyProse,
1431 /// Release-note evidence (e.g. tzdb NEWS entries) consulted for release-delta review.
1432 ReleaseNoteEvidence,
1433}
1434
1435impl ArtifactCategory {
1436 /// The stable snake_case identifier rendered in reports.
1437 pub fn as_str(self) -> &'static str {
1438 use ArtifactCategory::*;
1439 match self {
1440 CompileInput => "compile_input",
1441 PolicyInput => "policy_input",
1442 ReferenceInput => "reference_input",
1443 GeneratedArtifact => "generated_artifact",
1444 OutputArtifact => "output_artifact",
1445 DiagnosticArtifact => "diagnostic_artifact",
1446 SemanticWitnessArtifact => "semantic_witness_artifact",
1447 StructuralValidationArtifact => "structural_validation_artifact",
1448 PolicyProse => "policy_prose",
1449 ReleaseNoteEvidence => "release_note_evidence",
1450 }
1451 }
1452}
1453
1454/// The **report kind** — so a reader never confuses a compile-coverage `support-report` with a
1455/// structural validation or a behaviour witness (each proves a different claim). (T15.5)
1456#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1457pub enum ReportKind {
1458 Support,
1459 Structural,
1460 Manifest,
1461 SemanticWitness,
1462 TzifValidation,
1463}
1464
1465impl ReportKind {
1466 pub fn as_str(self) -> &'static str {
1467 match self {
1468 ReportKind::Support => "support",
1469 ReportKind::Structural => "structural",
1470 ReportKind::Manifest => "manifest",
1471 ReportKind::SemanticWitness => "semantic_witness",
1472 ReportKind::TzifValidation => "tzif_validation",
1473 }
1474 }
1475}
1476
1477/// A **bounded** conformance level (T15.5). It reflects *scope, not ambition* — there is deliberately no
1478/// `compatible` / `conformant: true`. A standalone `support-report` establishes compile-coverage over an
1479/// admitted release; the behaviour / structural / diagnostic axes are *separate surfaces* (this points to
1480/// them, it does not roll their results into a single global verdict).
1481#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1482pub enum ConformanceLevel {
1483 NotEvaluated,
1484 /// What `support-report` alone establishes: the admitted release's zones compile; **no oracle ran here**.
1485 ReleaseAdmittedCompileCoverage,
1486 StructurallyValidatedOnly,
1487 SemanticWitnessedOnly,
1488 KnownDivergencePresent,
1489 OracleUnavailable,
1490}
1491
1492impl ConformanceLevel {
1493 pub fn as_str(self) -> &'static str {
1494 match self {
1495 ConformanceLevel::NotEvaluated => "not_evaluated",
1496 ConformanceLevel::ReleaseAdmittedCompileCoverage => "release_admitted_compile_coverage",
1497 ConformanceLevel::StructurallyValidatedOnly => "structurally_validated_only",
1498 ConformanceLevel::SemanticWitnessedOnly => "semantic_witnessed_only",
1499 ConformanceLevel::KnownDivergencePresent => "known_divergence_present",
1500 ConformanceLevel::OracleUnavailable => "oracle_unavailable",
1501 }
1502 }
1503}
1504
1505/// Whether the workspace that produced the report was clean (T15.5). Honest by default: without a git
1506/// tree (this project ships from an archive, not a checked-out repo, and has **no `build.rs`** to capture
1507/// VCS state) this is `Unknown` — never fabricated as clean.
1508#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1509pub enum WorkspaceProvenance {
1510 CleanGitTree,
1511 DirtyGitTree,
1512 SourceArchive,
1513 Unknown,
1514}
1515
1516/// The authenticity status of the report artifact itself (T15.5 — *a public report is a claim surface,
1517/// not an unexamined trust root*). Default is an unsigned local report: useful, but not an attestation.
1518#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1519pub enum ReportProvenance {
1520 UnsignedLocalReport,
1521 ReproducibleCiArtifact,
1522 SignedReleaseArtifact,
1523}
1524
1525/// Which tool build produced the output (T15.5). `rustc`/`git_commit`/full target-triple are **honestly
1526/// `unknown`** here because the project deliberately has no `build.rs` to capture them — disclosed, not
1527/// faked. `zic_rs_version` is the crate version; `target` is an `arch-os` approximation; `profile` is
1528/// debug/release.
1529#[derive(Debug, Clone)]
1530pub struct CompilerIdentity {
1531 pub zic_rs_version: &'static str,
1532 pub rustc: Option<&'static str>,
1533 pub target: String,
1534 pub profile: &'static str,
1535 pub git_commit: Option<&'static str>,
1536}
1537
1538impl CompilerIdentity {
1539 pub fn capture() -> Self {
1540 CompilerIdentity {
1541 zic_rs_version: env!("CARGO_PKG_VERSION"),
1542 // No `build.rs` → these are not captured at build time; honestly `None`, never invented.
1543 rustc: option_env!("ZIC_RS_RUSTC_VERSION"),
1544 target: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
1545 profile: if cfg!(debug_assertions) {
1546 "debug"
1547 } else {
1548 "release"
1549 },
1550 git_commit: option_env!("ZIC_RS_GIT_COMMIT"),
1551 }
1552 }
1553}
1554
1555/// The release-admission pin gate as a **type** (T15.5-remainder) rather than the bare
1556/// `SOURCE_VARIANT_GATE_STATUS` string. It renders the *same* literal at the JSON boundary (so no schema
1557/// churn), but the vocabulary is now exhaustive and totality-tested — a drift test pins
1558/// `current().as_str() == SOURCE_VARIANT_GATE_STATUS`. `Open` = no release admitted; `LiftedFor2026b` =
1559/// the single 2026b release is admitted (signature-verified + hash-pinned, per T12.5a.2).
1560#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1561pub enum ReferencePinGate {
1562 Open,
1563 LiftedFor2026b,
1564}
1565
1566impl ReferencePinGate {
1567 pub fn as_str(self) -> &'static str {
1568 match self {
1569 ReferencePinGate::Open => "open",
1570 ReferencePinGate::LiftedFor2026b => "lifted_for_2026b",
1571 }
1572 }
1573 /// The gate state as currently shipped — single-sourced against `SOURCE_VARIANT_GATE_STATUS`.
1574 pub fn current() -> Self {
1575 ReferencePinGate::LiftedFor2026b
1576 }
1577}
1578
1579/// **Where an admitted reference came from** (T16.3) — the "*which* reference?" question's *location*
1580/// half. The central rule: **only a `VersionedArchive` (a release tarball you can re-fetch and re-pin)
1581/// can back a *sealed* release claim**; the others support exploration/diagnosis but not a sealed claim.
1582#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1583pub enum ReferenceLocatorKind {
1584 /// A pinned, re-fetchable release archive (e.g. the T12.5a.2 `tzdb-2026b.tar.lz`). Sealed-claim grade.
1585 VersionedArchive,
1586 /// Whatever `zic`/`zdump` is on `PATH` right now — moves under your feet; exploration only.
1587 LiveCurrentDirectory,
1588 /// A local cached copy of bytes (integrity depends on how it was pinned).
1589 LocalCachedCopy,
1590 /// A distribution's source package (a patch-stack over upstream; distinct provenance).
1591 DistroSourcePackage,
1592 /// Provenance not established.
1593 Unknown,
1594}
1595
1596impl ReferenceLocatorKind {
1597 pub fn as_str(self) -> &'static str {
1598 match self {
1599 ReferenceLocatorKind::VersionedArchive => "versioned_archive",
1600 ReferenceLocatorKind::LiveCurrentDirectory => "live_current_directory",
1601 ReferenceLocatorKind::LocalCachedCopy => "local_cached_copy",
1602 ReferenceLocatorKind::DistroSourcePackage => "distro_source_package",
1603 ReferenceLocatorKind::Unknown => "unknown",
1604 }
1605 }
1606}
1607
1608/// **How an admitted reference is trusted** (T16.3) — the "*which* reference?" question's *trust* half,
1609/// kept precise so a reader knows *what kind* of trust they are getting. Crucially `HashOnly` proves
1610/// **integrity** (the bytes are what we pinned) but **not authenticity** (who produced them); it is never
1611/// rendered as "signature verified". `FingerprintAnchored` (the T12.5a.2 model — an OpenPGP signature
1612/// verified against a published key *fingerprint*) is **not** the weaker `WebOfTrustValidated`.
1613#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1614pub enum SignatureTrustModel {
1615 /// OpenPGP signature verified against a published key fingerprint (authenticity + integrity).
1616 FingerprintAnchored,
1617 /// Trust via a web-of-trust path (weaker than fingerprint-anchored; not claimed unless real).
1618 WebOfTrustValidated,
1619 /// Trust via an OS/platform keyring.
1620 PlatformKeyring,
1621 /// A content hash only — **integrity, not authenticity**; never "signature verified".
1622 HashOnly,
1623 /// Explicitly unsigned.
1624 Unsigned,
1625 /// Trust model not established.
1626 Unknown,
1627}
1628
1629impl SignatureTrustModel {
1630 pub fn as_str(self) -> &'static str {
1631 match self {
1632 SignatureTrustModel::FingerprintAnchored => "fingerprint_anchored",
1633 SignatureTrustModel::WebOfTrustValidated => "web_of_trust_validated",
1634 SignatureTrustModel::PlatformKeyring => "platform_keyring",
1635 SignatureTrustModel::HashOnly => "hash_only",
1636 SignatureTrustModel::Unsigned => "unsigned",
1637 SignatureTrustModel::Unknown => "unknown",
1638 }
1639 }
1640 /// Whether this trust model **pins integrity** (the bytes are what we expect). `HashOnly` qualifies
1641 /// (integrity without authenticity); `Unsigned`/`Unknown` do not. Authenticity is a *separate* axis —
1642 /// see `FingerprintAnchored`.
1643 pub fn pins_integrity(self) -> bool {
1644 matches!(
1645 self,
1646 SignatureTrustModel::FingerprintAnchored
1647 | SignatureTrustModel::WebOfTrustValidated
1648 | SignatureTrustModel::PlatformKeyring
1649 | SignatureTrustModel::HashOnly
1650 )
1651 }
1652}
1653
1654/// A reference's admission evidence (T16.3): *where it came from* × *how it is trusted*. The sealed-claim
1655/// rule is enforced here, not in prose: a claim may be *sealed* (re-verifiable, release-grade) **only** if
1656/// the locator is a `VersionedArchive` **and** the trust model pins integrity.
1657#[derive(Debug, Clone, Copy)]
1658pub struct ReferenceAdmission {
1659 pub locator: ReferenceLocatorKind,
1660 pub trust: SignatureTrustModel,
1661}
1662
1663impl ReferenceAdmission {
1664 /// Only a versioned archive with integrity-pinned trust can back a sealed release claim. A live
1665 /// PATH binary, a distro package, or any unsigned/unknown-trust material is exploration-grade only.
1666 pub fn supports_sealed_claim(&self) -> bool {
1667 matches!(self.locator, ReferenceLocatorKind::VersionedArchive)
1668 && self.trust.pins_integrity()
1669 }
1670 pub fn to_json(&self) -> String {
1671 format!(
1672 "{{ \"locator\": {}, \"signature_trust\": {}, \"supports_sealed_claim\": {} }}",
1673 json_str(self.locator.as_str()),
1674 json_str(self.trust.as_str()),
1675 self.supports_sealed_claim()
1676 )
1677 }
1678}
1679
1680/// The T12.5a.2 admitted 2026b reference: a **versioned archive** (`tzdb-2026b.tar.lz`), OpenPGP signature
1681/// verified against the published tz key **fingerprint** + SHA-256 hash-pinned. The one reference today
1682/// that backs a *sealed* claim. (Distinct from the *live* PATH `zic` a report's oracle runs against.)
1683pub const ADMITTED_2026B_REFERENCE: ReferenceAdmission = ReferenceAdmission {
1684 locator: ReferenceLocatorKind::VersionedArchive,
1685 trust: SignatureTrustModel::FingerprintAnchored,
1686};
1687
1688/// The dimension a claim is portable **along** (T15.5-remainder) — i.e. what it stays true *under*. A
1689/// claim is never "globally true": it is true *for* a declared release / oracle / platform / profile /
1690/// fixture set, or it is a general project policy. This makes "true where?" a typed field, not prose.
1691#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1692pub enum ClaimPortability {
1693 ReleaseSpecific,
1694 OracleSpecific,
1695 PlatformSpecific,
1696 ProfileSpecific,
1697 FixtureSpecific,
1698 GeneralProjectPolicy,
1699}
1700
1701impl ClaimPortability {
1702 pub fn as_str(self) -> &'static str {
1703 match self {
1704 ClaimPortability::ReleaseSpecific => "release_specific",
1705 ClaimPortability::OracleSpecific => "oracle_specific",
1706 ClaimPortability::PlatformSpecific => "platform_specific",
1707 ClaimPortability::ProfileSpecific => "profile_specific",
1708 ClaimPortability::FixtureSpecific => "fixture_specific",
1709 ClaimPortability::GeneralProjectPolicy => "general_project_policy",
1710 }
1711 }
1712}
1713
1714/// The **kind of authority** a claim's evidence carries (T15.5-remainder) — orthogonal to whether the
1715/// claim is true; it says *what backs it*, so a reviewer can tell a normative-spec citation from an
1716/// implementation observation from project doctrine. (`NormativeSpec` = RFC 9636 TZif format;
1717/// `PolicyGuidance` = BCP 175 / tzdb *process*, not format — the two never blur.)
1718#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1719pub enum EvidenceAuthorityKind {
1720 NormativeSpec,
1721 ImplementationObservation,
1722 ManpageDocumentation,
1723 PolicyGuidance,
1724 ReleaseNote,
1725 EmpiricalFixture,
1726 ProjectDoctrine,
1727}
1728
1729impl EvidenceAuthorityKind {
1730 pub fn as_str(self) -> &'static str {
1731 match self {
1732 EvidenceAuthorityKind::NormativeSpec => "normative_spec",
1733 EvidenceAuthorityKind::ImplementationObservation => "implementation_observation",
1734 EvidenceAuthorityKind::ManpageDocumentation => "manpage_documentation",
1735 EvidenceAuthorityKind::PolicyGuidance => "policy_guidance",
1736 EvidenceAuthorityKind::ReleaseNote => "release_note",
1737 EvidenceAuthorityKind::EmpiricalFixture => "empirical_fixture",
1738 EvidenceAuthorityKind::ProjectDoctrine => "project_doctrine",
1739 }
1740 }
1741}
1742
1743/// What a report's claim **proves**, does **not** prove, and **depends on** (T15.5-remainder) — the
1744/// compact, machine-readable form of the non-claim doctrine, attached to the rollup. Static for a given
1745/// report kind (the boundary is fixed by what the surface actually measures).
1746#[derive(Debug, Clone, Copy)]
1747pub struct ClaimBoundary {
1748 pub proves: &'static str,
1749 pub does_not_prove: &'static str,
1750 pub depends_on: &'static str,
1751}
1752
1753/// The distinct senses of "valid" the conformance engine keeps **impossible to blur** (T15.5-remainder /
1754/// T15.close). Emitted as a report field so "valid" can never be read as a single global verdict: each
1755/// entry is `<sense>: <what it is> — NOT <what it is not>`. The live behaviour claim (CORE.1) is the last
1756/// entry and is deliberately separate from structural / reader / release-admission validity.
1757pub const VALID_DISAMBIGUATION: &[&str] = &[
1758 "structurally_valid: RFC 9636 byte-format integrity (tzif-validate) — NOT behaviour or semantics",
1759 "semantically_witness_matching: offset/is_dst/abbr match zdump for the declared witness set — NOT all instants",
1760 "modern_reader_compatible: no v4/legacy reader hazards — NOT semantic correctness",
1761 "future_projection_matching: the POSIX footer projects like reference — separate from footer parseability",
1762 "release_admitted: the source release is signature-verified + hash-pinned — NOT all IANA releases",
1763 "compile_covered: the admitted release's zones compile — NOT behaviour-matched",
1764 "behaviour_matched: CORE.1 341/341 vs reference zic/zdump over 1900..2040 — the live claim, separate from all above",
1765];
1766
1767/// The one-line, **machine-readable** conformance rollup (T15.5) — the claim *envelope*, so a reviewer
1768/// gets the scope in one scan without reconstructing the whole ladder. It is a pointer-rich summary, not
1769/// a global pass/fail: it names the admitted release, the bounded level for *this* report kind, the
1770/// available proof surfaces, the report's own provenance, and a `declared_scope_hash` that changes
1771/// whenever any scope element changes. T15.5-remainder added the typed claim-shape axes
1772/// (`reference_pin_gate` · `claim_portability` · `evidence_authority` · `claim_boundary`) and the
1773/// `valid_disambiguation`, so the *shape* of the claim is as machine-readable as its result.
1774#[derive(Debug, Clone)]
1775pub struct ConformanceStatus {
1776 pub report_kind: ReportKind,
1777 pub level: ConformanceLevel,
1778 pub workspace: WorkspaceProvenance,
1779 pub report_provenance: ReportProvenance,
1780 pub compiler: CompilerIdentity,
1781 pub reference_pin_gate: ReferencePinGate,
1782 pub claim_portability: ClaimPortability,
1783 pub evidence_authority: EvidenceAuthorityKind,
1784 pub claim_boundary: ClaimBoundary,
1785}
1786
1787impl ConformanceStatus {
1788 /// The rollup for a `support-report` invocation (compile-coverage over the admitted release).
1789 pub fn support() -> Self {
1790 ConformanceStatus {
1791 report_kind: ReportKind::Support,
1792 level: ConformanceLevel::ReleaseAdmittedCompileCoverage,
1793 // No git tree / no build.rs here → honest Unknown.
1794 workspace: WorkspaceProvenance::Unknown,
1795 report_provenance: ReportProvenance::UnsignedLocalReport,
1796 compiler: CompilerIdentity::capture(),
1797 reference_pin_gate: ReferencePinGate::current(),
1798 // support-report's claim is about the admitted *release*; it is an observation of zic-rs's own
1799 // compile, not a normative-spec or oracle claim.
1800 claim_portability: ClaimPortability::ReleaseSpecific,
1801 evidence_authority: EvidenceAuthorityKind::ImplementationObservation,
1802 claim_boundary: ClaimBoundary {
1803 proves: "the admitted release's zones compile (compile-coverage), each accounted in exactly one bucket",
1804 does_not_prove: "behaviour / structural / reader-compatibility parity — those are separate surfaces (semantic-report · structural-report · tzif-validate)",
1805 depends_on: "the signature-verified + hash-pinned 2026b reference set and this zic-rs build",
1806 },
1807 }
1808 }
1809
1810 /// `declared_scope_hash` — a SHA-256 over the **claim envelope**: admitted-release gate · manifest +
1811 /// report schema versions · the sorted negative-capability ids · the CORE.1 claim string. If any
1812 /// scope element changes, the hash changes — a compact identifier reviewers can pin a claim to.
1813 pub fn declared_scope_hash(&self) -> String {
1814 let mut envelope = String::new();
1815 envelope.push_str(SOURCE_VARIANT_GATE_STATUS);
1816 envelope.push('|');
1817 envelope.push_str(COMPILE_SCHEMA);
1818 envelope.push_str("|zic-rs-support-report-v4|zic-rs-structural-report-v3");
1819 envelope.push_str("|zic-rs-semantic-report-v1|zic-rs-tzif-validation-v1|");
1820 for nc in NEGATIVE_CAPABILITIES {
1821 envelope.push_str(nc.as_str());
1822 envelope.push(',');
1823 }
1824 envelope.push_str("|CORE.1=341/341@1900..2040;0mismatch;0failclosed");
1825 crate::hash::sha256_hex(envelope.as_bytes())
1826 }
1827
1828 /// Render the `conformance_status` block (a comma-terminated object for insertion into a report).
1829 pub fn to_json_block(&self) -> String {
1830 let opt = |o: Option<&str>| match o {
1831 Some(v) => json_str(v),
1832 None => "null".to_string(),
1833 };
1834 // The valid-disambiguation array, rendered from the static const so the senses stay single-sourced.
1835 let mut valid_disambig = String::from("[");
1836 for (i, sense) in VALID_DISAMBIGUATION.iter().enumerate() {
1837 if i > 0 {
1838 valid_disambig.push_str(", ");
1839 }
1840 valid_disambig.push_str(&json_str(sense));
1841 }
1842 valid_disambig.push(']');
1843 format!(
1844 " \"conformance_status\": {{\n\
1845 \"report_kind\": {}, \"conformance_level\": {}, \"declared_scope_hash\": {}, \
1846 \"admitted_release_gate\": {}, \"workspace_provenance\": {}, \"report_provenance\": {}, \
1847 \"claim_portability\": {}, \"evidence_authority\": {}, \
1848 \"claim_boundary\": {{ \"proves\": {}, \"does_not_prove\": {}, \"depends_on\": {} }}, \
1849 \"valid_disambiguation\": {}, \
1850 \"core1_claim\": {}, \
1851 \"available_surfaces\": [\"support-report\", \"structural-report\", \"semantic-report\", \
1852 \"tzif-validation\", \"compile-manifest\"], \
1853 \"compiler_identity\": {{ \"zic_rs_version\": {}, \"rustc\": {}, \"target\": {}, \
1854 \"profile\": {}, \"git_commit\": {} }} }},\n",
1855 json_str(self.report_kind.as_str()),
1856 json_str(self.level.as_str()),
1857 json_str(&self.declared_scope_hash()),
1858 json_str(self.reference_pin_gate.as_str()),
1859 json_str(match self.workspace {
1860 WorkspaceProvenance::CleanGitTree => "clean_git_tree",
1861 WorkspaceProvenance::DirtyGitTree => "dirty_git_tree",
1862 WorkspaceProvenance::SourceArchive => "source_archive",
1863 WorkspaceProvenance::Unknown => "unknown",
1864 }),
1865 json_str(match self.report_provenance {
1866 ReportProvenance::UnsignedLocalReport => "unsigned_local_report",
1867 ReportProvenance::ReproducibleCiArtifact => "reproducible_ci_artifact",
1868 ReportProvenance::SignedReleaseArtifact => "signed_release_artifact",
1869 }),
1870 json_str(self.claim_portability.as_str()),
1871 json_str(self.evidence_authority.as_str()),
1872 json_str(self.claim_boundary.proves),
1873 json_str(self.claim_boundary.does_not_prove),
1874 json_str(self.claim_boundary.depends_on),
1875 valid_disambig,
1876 json_str(
1877 "341/341 canonical zones behaviour-match reference zic/zdump over 1900..2040 \
1878 (0 mismatch, 0 fail-closed)"
1879 ),
1880 json_str(self.compiler.zic_rs_version),
1881 opt(self.compiler.rustc),
1882 json_str(&self.compiler.target),
1883 json_str(self.compiler.profile),
1884 opt(self.compiler.git_commit),
1885 )
1886 }
1887}
1888
1889/// The canonical, **sorted-by-`as_str()`** non-claims list surfaced in every report's provenance block.
1890/// Sorted so the emitted JSON array is deterministic; the order is asserted by a test.
1891pub const NEGATIVE_CAPABILITIES: &[NegativeCapability] = &[
1892 NegativeCapability::DoesNotClaimAllIanaReleasesWithoutAdmission,
1893 NegativeCapability::DoesNotClaimArbitraryTzifRoundtrip,
1894 NegativeCapability::DoesNotClaimFullToctouResistance,
1895 NegativeCapability::DoesNotClaimFutureCivilTimeAuthority,
1896 NegativeCapability::DoesNotClaimLeapSmearSemantics,
1897 NegativeCapability::DoesNotClaimRangeTruncationLeapExpiryInteractionParityWithoutWitness,
1898 NegativeCapability::DoesNotClaimReportAuthenticityWithoutSignatureOrReproducibleContext,
1899 NegativeCapability::DoesNotClaimTzifValidatorAsSecuritySandbox,
1900 NegativeCapability::DoesNotClaimUnadmittedVendorParity,
1901 NegativeCapability::DoesNotCurateTimeOrDefineDisplayNames,
1902 NegativeCapability::DoesNotDependOnHostEndianness,
1903 NegativeCapability::DoesNotInferDataformFromContent,
1904 NegativeCapability::DoesNotInferSourceVariantFromOutputShape,
1905 NegativeCapability::DoesNotRequireManifestToReadTzif,
1906 NegativeCapability::DoesNotShipOrOperateVendorQemuLabsInCoreRepo,
1907 NegativeCapability::DoesNotTreatManifestAsTzifSemantics,
1908];
1909
1910/// Whether any backzone/PACKRATLIST/DATAFORM/rearguard/vanguard *behaviour* is implemented. Still
1911/// **false** — the gate lift (T12.5a.2) only *admitted the reference*; implementation begins at T12.5b.
1912pub const SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED: bool = false;
1913
1914/// Substeps still blocked by the gate. Empty since T12.5a.2 lifted it for 2026b — T12.5b–d are
1915/// unblocked (but not yet implemented; see [`SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED`]).
1916pub const SOURCE_VARIANT_BLOCKED_SUBSTEPS: &[&str] = &[];
1917
1918/// Required upstream tzdb reference files still **unpinned**. Empty since T12.5a.2 admitted +
1919/// SHA-256-pinned the full 2026b set (hashes in `reports/t12_5a2-reference-admission.md`).
1920pub const SOURCE_VARIANT_UNPINNED_FILES: &[&str] = &[];
1921
1922/// The provenance/capability statement as a deterministic JSON object block (key `"provenance"`),
1923/// 2-space-indented and **comma-terminated** for insertion right after a report's `"schema"` line.
1924/// Shared by both reports so the trust state is identical and single-sourced.
1925pub fn provenance_block_json() -> String {
1926 let arr = |items: &[&str]| -> String {
1927 let inner: Vec<String> = items.iter().map(|i| json_str(i)).collect();
1928 format!("[{}]", inner.join(", "))
1929 };
1930 let mut s = String::new();
1931 s.push_str(" \"provenance\": {\n");
1932 s.push_str(&format!(
1933 " \"manifest_schema\": {},\n",
1934 json_str(COMPILE_SCHEMA)
1935 ));
1936 s.push_str(
1937 " \"per_run_profile\": \"see `compile --manifest`: build_profile / source_inputs / \
1938 link_profile / source_profile.backward_evidence\",\n",
1939 );
1940 s.push_str(&format!(
1941 " \"source_variant_reference_pin_gate\": {},\n",
1942 json_str(SOURCE_VARIANT_GATE_STATUS)
1943 ));
1944 s.push_str(&format!(
1945 " \"blocked_substeps\": {},\n",
1946 arr(SOURCE_VARIANT_BLOCKED_SUBSTEPS)
1947 ));
1948 s.push_str(&format!(
1949 " \"unpinned_required_files\": {},\n",
1950 arr(SOURCE_VARIANT_UNPINNED_FILES)
1951 ));
1952 s.push_str(&format!(
1953 " \"source_variant_behavior_implemented\": {},\n",
1954 SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED
1955 ));
1956 s.push_str(
1957 " \"note\": \"tzdb 2026b reference set admitted + signature-verified + SHA-256-pinned \
1958 (reports/t12_5a2-reference-admission.md); T12.5b–d source-variant **evidence axes** are \
1959 implemented for that pinned reference, while source-variant **behaviour** remains not \
1960 implemented or claimed. No backzone/PACKRATLIST/DATAFORM/rearguard/vanguard behaviour is \
1961 claimed; never inferred from aliases, filenames, link counts, or output byte shape.\",\n",
1962 );
1963 // T15.2 — `negative_capabilities`: the project's non-claims as a first-class, machine-visible array,
1964 // each tied to the guard/test/receipt that enforces it (never decorative). Sorted + deterministic.
1965 s.push_str(" \"negative_capabilities\": [");
1966 for (i, nc) in NEGATIVE_CAPABILITIES.iter().enumerate() {
1967 s.push_str(if i == 0 { "\n" } else { ",\n" });
1968 s.push_str(&format!(
1969 " {{ \"capability\": {}, \"enforced_by\": {} }}",
1970 json_str(nc.as_str()),
1971 json_str(nc.enforced_by())
1972 ));
1973 }
1974 s.push_str("\n ]\n");
1975 s.push_str(" },\n");
1976 s
1977}
1978
1979/// The provenance/capability statement as a human-readable text block, appended to a report's text
1980/// output. Mirrors [`provenance_block_json`].
1981pub fn provenance_block_text() -> String {
1982 let mut s = String::new();
1983 s.push_str("\nprovenance / capability:\n");
1984 s.push_str(&format!(
1985 " manifest schema: {COMPILE_SCHEMA} (per-run build/source/link/backward profile: see \
1986 `compile --manifest`)\n"
1987 ));
1988 s.push_str(&format!(
1989 " source-variant reference-pin gate: {SOURCE_VARIANT_GATE_STATUS} (tzdb 2026b admitted + \
1990 signature-verified + SHA-256-pinned — reports/t12_5a2-reference-admission.md)\n"
1991 ));
1992 s.push_str(&format!(
1993 " source-variant behaviour: {} — T12.5b–d unblocked for the pinned reference but not yet \
1994 implemented; backzone/PACKRATLIST/DATAFORM/rearguard/vanguard never inferred from \
1995 aliases/filenames/link counts/output shape\n",
1996 if SOURCE_VARIANT_BEHAVIOR_IMPLEMENTED {
1997 "implemented"
1998 } else {
1999 "NOT implemented or claimed"
2000 }
2001 ));
2002 s.push_str(" negative capabilities (non-claims, each enforced):\n");
2003 for nc in NEGATIVE_CAPABILITIES {
2004 s.push_str(&format!(" - {} ({})\n", nc.as_str(), nc.enforced_by()));
2005 }
2006 s
2007}
2008
2009/// The oracle result for *this* invocation. A bare `compile` never runs the oracle, so it is
2010/// recorded as `not-run` — the manifest must not infer success from the repo's test suite.
2011#[derive(Debug, Clone)]
2012pub struct OracleResult {
2013 /// The oracle mode, **typed** (T15.2a — was a free `String`). Rendered at the manifest boundary via
2014 /// [`OracleMode::manifest_str`]. The companion `result` is the verdict vocabulary (a separate axis).
2015 pub mode: OracleMode,
2016 pub horizon: Option<String>,
2017 /// The oracle verdict, typed (T17.2 — was a free `String`). Distinct from `mode`: *what the oracle
2018 /// concluded*, not *which* oracle. Rendered via [`OracleVerdict::as_str`].
2019 pub result: OracleVerdict,
2020}
2021
2022impl OracleResult {
2023 /// The honest default for a `compile` invocation: the oracle was not run.
2024 pub fn not_run() -> Self {
2025 OracleResult {
2026 mode: OracleMode::NotRun,
2027 horizon: None,
2028 result: OracleVerdict::NotRun,
2029 }
2030 }
2031}
2032
2033/// The full compile-provenance manifest.
2034#[derive(Debug, Clone)]
2035pub struct CompileManifest {
2036 pub zic_rs_version: String,
2037 pub tzdb: TzdbProvenance,
2038 pub source_inputs: SourceInputs,
2039 pub build_profile: BuildProfile,
2040 pub link_profile: LinkProfile,
2041 pub source_profile: SourceProfile,
2042 pub zones_requested: Vec<String>,
2043 pub zones_compiled: Vec<String>,
2044 pub links_materialized: Vec<String>,
2045 pub unsupported_zones: Vec<String>,
2046 pub oracle: OracleResult,
2047}
2048
2049/// Render the `build_profile` block — the structured output identity of *this run* (T12.2). Fields
2050/// describe what was actually used; only the `DATAFORM` encoding axes `rearguard`/`vanguard` are
2051/// `"unknown"` here (no deterministic detector yet — kept explicit, never guessed or claimed; T12.5d).
2052/// Source-membership (`backward`/`backzone`/`PACKRATLIST`) lives in the `source_profile` evidence
2053/// axes, not here.
2054fn build_profile_json(p: &BuildProfile) -> String {
2055 let opt_at = |v: Option<i64>| match v {
2056 Some(n) => format!("\"@{n}\""),
2057 None => "null".to_string(),
2058 };
2059
2060 let mut s = String::new();
2061 s.push_str(" \"build_profile\": {\n");
2062 s.push_str(&format!(
2063 " \"output_tree\": {},\n",
2064 json_str(p.output_tree.as_str())
2065 ));
2066 // leap_source: describes the run, never capabilities.
2067 s.push_str(" \"leap_source\": {\n");
2068 s.push_str(&format!(
2069 " \"mode\": {},\n",
2070 json_str(p.leap_source.mode.as_str())
2071 ));
2072 match &p.leap_source.sha256 {
2073 Some(h) => s.push_str(&format!(" \"sha256\": {},\n", json_str(h))),
2074 None => s.push_str(" \"sha256\": null,\n"),
2075 }
2076 s.push_str(&format!(
2077 " \"entry_count\": {},\n",
2078 p.leap_source.entry_count
2079 ));
2080 s.push_str(&format!(" \"expires\": {},\n", p.leap_source.expires));
2081 s.push_str(&format!(
2082 " \"rolling_entries\": {}\n",
2083 p.leap_source.rolling_entries
2084 ));
2085 s.push_str(" },\n");
2086 s.push_str(&format!(
2087 " \"emit_style\": {},\n",
2088 json_str(emit_style_str(p.emit_style))
2089 ));
2090 match p.range {
2091 Some((lo, hi)) => s.push_str(&format!(
2092 " \"range\": {{ \"lo\": {}, \"hi\": {} }},\n",
2093 opt_at(lo),
2094 opt_at(hi)
2095 )),
2096 None => s.push_str(" \"range\": null,\n"),
2097 }
2098 s.push_str(&format!(
2099 " \"redundant_until\": {},\n",
2100 opt_at(p.redundant_until)
2101 ));
2102 // `link_mode` is the last `build_profile` field: as of T12.5d there are **no** source-variant
2103 // placeholders here. Every source-variant axis (`backward` T12.4d, `backzone` T12.5b, `PACKRATLIST`
2104 // T12.5c, `DATAFORM`=`main`/`vanguard`/`rearguard` T12.5d) is an authoritative `source_profile`
2105 // evidence axis; carrying an `"unknown"` copy here too would be a contradiction (`"unknown"` vs a
2106 // real detected/claimed status). The arc that removed the `backward` and `backzone` stubs ends here
2107 // by removing the last `rearguard`/`vanguard` stubs — `build_profile` now describes only *how this
2108 // run emitted* (tree/leap/emit/range/links), not source-set membership or encoding.
2109 s.push_str(&format!(
2110 " \"link_mode\": {}\n",
2111 json_str(p.link_mode.as_str())
2112 ));
2113 s.push_str(" },\n");
2114 s
2115}
2116
2117/// Render the `source_inputs` block — the deterministic *input identity* of this run (T12.3): the
2118/// structural `kind`, the **input-ordered** file list (logical name + content hash + size +
2119/// `order_index`), and the order-sensitive `aggregate_hash`. Portable: logical names, never
2120/// machine-local absolute paths.
2121fn source_inputs_json(si: &SourceInputs) -> String {
2122 let mut s = String::new();
2123 s.push_str(" \"source_inputs\": {\n");
2124 s.push_str(&format!(" \"kind\": {},\n", json_str(si.kind.as_str())));
2125 s.push_str(" \"files\": [");
2126 for (i, f) in si.files.iter().enumerate() {
2127 s.push_str(if i == 0 { "\n" } else { ",\n" });
2128 s.push_str(&format!(
2129 " {{ \"order_index\": {}, \"logical_name\": {}, \"sha256\": {}, \"bytes\": {} }}",
2130 f.order_index,
2131 json_str(&f.logical_name),
2132 json_str(&f.sha256),
2133 f.bytes
2134 ));
2135 }
2136 s.push_str(if si.files.is_empty() {
2137 "],\n"
2138 } else {
2139 "\n ],\n"
2140 });
2141 s.push_str(&format!(
2142 " \"aggregate_hash\": {}\n",
2143 json_str(&si.aggregate_hash)
2144 ));
2145 s.push_str(" },\n");
2146 s
2147}
2148
2149/// Render the `link_profile` block — link/alias identity (T12.4b): counts, policy, and the stable
2150/// hashes that bind the build to its `alias-map.json`. Never asserts source-set membership.
2151fn link_profile_json(lp: &LinkProfile) -> String {
2152 let mut s = String::new();
2153 s.push_str(" \"link_profile\": {\n");
2154 s.push_str(&format!(
2155 " \"link_policy\": {},\n",
2156 json_str(&lp.link_policy)
2157 ));
2158 s.push_str(&format!(
2159 " \"zones_compiled_count\": {},\n",
2160 lp.zones_compiled_count
2161 ));
2162 s.push_str(&format!(
2163 " \"links_selected_count\": {},\n",
2164 lp.links_selected_count
2165 ));
2166 s.push_str(&format!(
2167 " \"links_materialized_count\": {},\n",
2168 lp.links_materialized_count
2169 ));
2170 s.push_str(&format!(
2171 " \"links_omitted_count\": {},\n",
2172 lp.links_omitted_count
2173 ));
2174 s.push_str(&format!(
2175 " \"links_failed_count\": {},\n",
2176 lp.links_failed_count
2177 ));
2178 s.push_str(&format!(
2179 " \"alias_map_sha256\": {},\n",
2180 json_str(&lp.alias_map_sha256)
2181 ));
2182 s.push_str(&format!(
2183 " \"selected_links_sha256\": {},\n",
2184 json_str(&lp.selected_links_sha256)
2185 ));
2186 s.push_str(&format!(
2187 " \"omitted_links_sha256\": {}\n",
2188 json_str(&lp.omitted_links_sha256)
2189 ));
2190 s.push_str(" },\n");
2191 s
2192}
2193
2194/// Render the `source_profile` block — the source-evidence axes (T12.4d `backward`, T12.5b `backzone`,
2195/// T12.5c `packratlist` backzone-scope); an extension seam for `DATAFORM` later. Records detected vs
2196/// claimed vs reconciled `status` + the admitted `evidence_sha256` — never a boolean, never inferred.
2197fn source_profile_json(sp: &SourceProfile) -> String {
2198 // Both axes share the {detected, claimed, status, evidence_sha256} shape; render with one helper.
2199 let axis =
2200 |key: &str, detected: &str, claimed: &str, status: &str, ev: &Option<String>| -> String {
2201 let mut a = String::new();
2202 a.push_str(&format!(" {}: {{\n", json_str(key)));
2203 a.push_str(&format!(" \"detected\": {},\n", json_str(detected)));
2204 a.push_str(&format!(" \"claimed\": {},\n", json_str(claimed)));
2205 a.push_str(&format!(" \"status\": {},\n", json_str(status)));
2206 match ev {
2207 Some(h) => a.push_str(&format!(" \"evidence_sha256\": {}\n", json_str(h))),
2208 None => a.push_str(" \"evidence_sha256\": null\n"),
2209 }
2210 a.push_str(" }");
2211 a
2212 };
2213 let b = &sp.backward;
2214 let z = &sp.backzone;
2215 let mut s = String::new();
2216 s.push_str(" \"source_profile\": {\n");
2217 s.push_str(&axis(
2218 "backward_evidence",
2219 b.detected_str(),
2220 b.claimed_str(),
2221 b.status(),
2222 &b.evidence_sha256,
2223 ));
2224 s.push_str(",\n");
2225 s.push_str(&axis(
2226 "backzone_evidence",
2227 z.detected_str(),
2228 z.claimed_str(),
2229 z.status(),
2230 &z.evidence_sha256,
2231 ));
2232 s.push_str(",\n");
2233 let pl = &sp.packratlist;
2234 s.push_str(&axis(
2235 "packratlist_evidence",
2236 pl.detected_str(),
2237 pl.claimed_str(),
2238 pl.status(),
2239 &pl.evidence_sha256,
2240 ));
2241 s.push_str(",\n");
2242 // `dataform_evidence` shares the 4 standard fields but adds two generated-artifact provenance
2243 // fields (`recipe_hash`, `generated_from`), so it is rendered directly rather than via `axis`.
2244 let df = &sp.dataform;
2245 let opt = |v: &Option<String>| match v {
2246 Some(h) => json_str(h),
2247 None => "null".to_string(),
2248 };
2249 s.push_str(" \"dataform_evidence\": {\n");
2250 s.push_str(&format!(
2251 " \"detected\": {},\n",
2252 json_str(df.detected_str())
2253 ));
2254 s.push_str(&format!(
2255 " \"claimed\": {},\n",
2256 json_str(df.claimed_str())
2257 ));
2258 s.push_str(&format!(" \"status\": {},\n", json_str(df.status())));
2259 s.push_str(&format!(
2260 " \"evidence_sha256\": {},\n",
2261 opt(&df.evidence_sha256)
2262 ));
2263 s.push_str(&format!(
2264 " \"recipe_hash\": {},\n",
2265 opt(&df.recipe_hash)
2266 ));
2267 s.push_str(&format!(
2268 " \"generated_from\": {}\n",
2269 opt(&df.generated_from)
2270 ));
2271 s.push_str(" }\n");
2272 s.push_str(" },\n");
2273 s
2274}
2275
2276impl CompileManifest {
2277 /// Render deterministic, pretty-printed JSON.
2278 pub fn to_json(&self) -> String {
2279 let arr = |items: &[String]| -> String {
2280 if items.is_empty() {
2281 "[]".to_string()
2282 } else {
2283 let inner: Vec<String> = items.iter().map(|i| json_str(i)).collect();
2284 format!("[{}]", inner.join(", "))
2285 }
2286 };
2287
2288 let mut s = String::new();
2289 s.push_str("{\n");
2290 s.push_str(&format!(" \"schema\": {},\n", json_str(COMPILE_SCHEMA)));
2291 s.push_str(&format!(
2292 " \"zic_rs_version\": {},\n",
2293 json_str(&self.zic_rs_version)
2294 ));
2295 let opt_str = |v: &Option<String>| match v {
2296 Some(x) => json_str(x),
2297 None => "null".to_string(),
2298 };
2299 s.push_str(" \"tzdb\": {\n");
2300 s.push_str(&format!(
2301 " \"detected_version\": {},\n",
2302 opt_str(&self.tzdb.detected_version)
2303 ));
2304 s.push_str(&format!(
2305 " \"claimed_version\": {},\n",
2306 opt_str(&self.tzdb.claimed_version)
2307 ));
2308 s.push_str(&format!(
2309 " \"version_status\": {},\n",
2310 json_str(self.tzdb.version_status())
2311 ));
2312 s.push_str(&format!(
2313 " \"source_path\": {},\n",
2314 json_str(&self.tzdb.source_path)
2315 ));
2316 s.push_str(&format!(
2317 " \"source_sha256\": {}\n",
2318 json_str(&self.tzdb.source_sha256)
2319 ));
2320 s.push_str(" },\n");
2321 s.push_str(&source_inputs_json(&self.source_inputs));
2322 s.push_str(&build_profile_json(&self.build_profile));
2323 s.push_str(&link_profile_json(&self.link_profile));
2324 s.push_str(&source_profile_json(&self.source_profile));
2325 s.push_str(" \"compile\": {\n");
2326 s.push_str(&format!(
2327 " \"zones_requested\": {},\n",
2328 arr(&self.zones_requested)
2329 ));
2330 s.push_str(&format!(
2331 " \"zones_compiled\": {},\n",
2332 arr(&self.zones_compiled)
2333 ));
2334 s.push_str(&format!(
2335 " \"links_materialized\": {},\n",
2336 arr(&self.links_materialized)
2337 ));
2338 s.push_str(&format!(
2339 " \"unsupported_zones\": {}\n",
2340 arr(&self.unsupported_zones)
2341 ));
2342 s.push_str(" },\n");
2343 s.push_str(" \"oracle\": {\n");
2344 s.push_str(&format!(
2345 " \"mode\": {},\n",
2346 json_str(self.oracle.mode.manifest_str())
2347 ));
2348 match &self.oracle.horizon {
2349 Some(h) => s.push_str(&format!(" \"horizon\": {},\n", json_str(h))),
2350 None => s.push_str(" \"horizon\": null,\n"),
2351 }
2352 s.push_str(&format!(
2353 " \"result\": {}\n",
2354 json_str(self.oracle.result.as_str())
2355 ));
2356 s.push_str(" }\n");
2357 s.push_str("}\n");
2358 s
2359 }
2360
2361 /// Write the manifest JSON to `path`.
2362 pub fn write_to(&self, path: &Path) -> Result<()> {
2363 std::fs::write(path, self.to_json()).map_err(|e| Error::io(path, e))
2364 }
2365}
2366
2367/// Build a [`CompileManifest`] from the run's inputs and report.
2368///
2369/// `requested` is the resolved list of identifiers the user asked for; `source_files` are the
2370/// expanded input files **in input order** (directories already expanded sorted by the caller).
2371/// The oracle is recorded as `not-run` because `compile` does not invoke `compare` — see the
2372/// module note.
2373///
2374/// Two complementary hashes are computed: `tzdb.source_sha256` over the source bytes in *sorted*
2375/// (canonicalized) order — an order-independent content identity — and
2376/// `source_inputs.aggregate_hash` over the *input-ordered* per-file hashes — an order-sensitive
2377/// identity. **Input order is part of the build identity**; the manifest records it faithfully.
2378///
2379/// This assembles the build identity from eight genuinely distinct provenance inputs (the requested
2380/// selection, the input file set, the compile report, the run config, the link database, the claimed
2381/// tzdb version, the leap-source path, and the source-variant claims). They do not naturally collapse
2382/// into a meaningful sub-struct — bundling would relocate the count, not reduce the complexity — so we
2383/// keep them explicit and silence the arity lint.
2384#[allow(clippy::too_many_arguments)]
2385pub fn build_compile_manifest(
2386 requested: &[String],
2387 source_files: &[std::path::PathBuf],
2388 report: &CompileReport,
2389 config: &crate::CompileConfig,
2390 db: &crate::model::Database,
2391 claimed_version: Option<&str>,
2392 leap_path: Option<&std::path::Path>,
2393 variants: &SourceVariantArgs,
2394) -> Result<CompileManifest> {
2395 // Per-file identity in INPUT ORDER (never re-sorted) — the order is part of the build identity.
2396 let mut input_files: Vec<SourceFile> = Vec::with_capacity(source_files.len());
2397 for (order_index, f) in source_files.iter().enumerate() {
2398 let bytes = std::fs::read(f).map_err(|e| Error::io(f, e))?;
2399 input_files.push(SourceFile {
2400 // Logical name = basename: a portable label, never the machine-local absolute path.
2401 logical_name: f
2402 .file_name()
2403 .map(|n| n.to_string_lossy().into_owned())
2404 .unwrap_or_else(|| f.display().to_string()),
2405 sha256: sha256_hex(&bytes),
2406 bytes: bytes.len(),
2407 order_index,
2408 });
2409 }
2410 // Order-sensitive aggregate identity: hash the input-ordered sequence of per-file hashes (a
2411 // newline separator so reordering two files always changes the digest).
2412 let aggregate_seed = input_files
2413 .iter()
2414 .map(|f| f.sha256.as_str())
2415 .collect::<Vec<_>>()
2416 .join("\n");
2417 let aggregate_hash = sha256_hex(aggregate_seed.as_bytes());
2418
2419 // Structural input *form* only — never a guess at source-set membership (backward/backzone).
2420 let kind = match source_files.len() {
2421 0 => SourceInputKind::Unknown,
2422 1 if source_files[0].extension().and_then(|e| e.to_str()) == Some("zi") => {
2423 SourceInputKind::TzdataZi
2424 }
2425 1 => SourceInputKind::SingleFile,
2426 _ => SourceInputKind::MultiFile,
2427 };
2428
2429 // Order-independent content hash + display path + version detection: read in SORTED path order
2430 // so this digest is invariant to argument ordering (the explicitly-canonicalized companion to
2431 // `aggregate_hash`).
2432 let mut sorted: Vec<&std::path::PathBuf> = source_files.iter().collect();
2433 sorted.sort();
2434 let mut all = Vec::new();
2435 for f in &sorted {
2436 all.extend(std::fs::read(f).map_err(|e| Error::io(f, e))?);
2437 }
2438 let source_sha256 = sha256_hex(&all);
2439 let source_path = sorted
2440 .iter()
2441 .map(|p| p.display().to_string())
2442 .collect::<Vec<_>>()
2443 .join(", ");
2444 let detected_version = crate::report::sniff_tzdb_version(&all);
2445
2446 let source_inputs = SourceInputs {
2447 kind,
2448 files: input_files,
2449 aggregate_hash,
2450 };
2451
2452 // Build-profile identity — what this run actually used (semantic, not argv).
2453 let leap_source = match &config.leaps {
2454 None => LeapSourceInfo {
2455 mode: LeapSourceMode::None,
2456 sha256: None,
2457 entry_count: 0,
2458 expires: false,
2459 rolling_entries: 0,
2460 },
2461 Some(table) => LeapSourceInfo {
2462 mode: LeapSourceMode::File,
2463 sha256: match leap_path {
2464 Some(p) => Some(sha256_hex(&std::fs::read(p).map_err(|e| Error::io(p, e))?)),
2465 None => None,
2466 },
2467 entry_count: table.entries.len(),
2468 expires: table.expires.is_some(),
2469 rolling_entries: table.entries.iter().filter(|e| e.rolling).count(),
2470 },
2471 };
2472 let build_profile = BuildProfile {
2473 output_tree: if config.leaps.is_some() {
2474 OutputTree::Right
2475 } else {
2476 OutputTree::Posix
2477 },
2478 leap_source,
2479 // T17.2: store the typed enums directly (the source of truth), rendered at the JSON boundary —
2480 // no re-stringified copy that could drift from `config`.
2481 emit_style: config.emit_style,
2482 range: config.range.map(|r| (r.lo, r.hi)),
2483 redundant_until: config.redundant_until,
2484 link_mode: config.link_mode,
2485 };
2486
2487 let zones_compiled: Vec<String> = report
2488 .zones_compiled
2489 .iter()
2490 .map(|z| z.name.clone())
2491 .collect();
2492 let links_materialized: Vec<String> = report
2493 .links_written
2494 .iter()
2495 .map(|l| l.link_name.clone())
2496 .collect();
2497
2498 // A requested identifier is "satisfied" if it was compiled as a canonical zone or written
2499 // as a link; anything else requested is reported as unsupported/skipped — honestly.
2500 let unsupported_zones: Vec<String> = requested
2501 .iter()
2502 .filter(|r| !zones_compiled.contains(r) && !links_materialized.contains(r))
2503 .cloned()
2504 .collect();
2505
2506 // Link / alias identity (T12.4b). We classify against the *full parsed* link set (`db.links`),
2507 // NOT `report.links_written` — the report only knows what was *materialised*, but the manifest
2508 // also wants what was *omitted* (and *failed*), which only the db's complete link list reveals.
2509 // For each link we resolve its chain exactly as the compile path does (`plan::run` → the link
2510 // loop), then bucket by whether the resolved canonical zone landed in the compiled output set:
2511 // - selected: target compiled (eligible & — since a write failure aborts the whole run —
2512 // materialised);
2513 // - omitted: target NOT compiled, i.e. excluded by the zone *selection* (a policy outcome);
2514 // - failed: the chain does not resolve to a real zone (missing target / cycle / self-link) —
2515 // an *error* class, deliberately never folded into "omitted".
2516 // **No source-set membership (`backward`/`backzone`) is inferred from these links** — they are
2517 // output identifiers, not evidence of which source file produced them (see T12.4a inventory).
2518 let mut selected_links: Vec<String> = Vec::new();
2519 let mut omitted_links: Vec<String> = Vec::new();
2520 let mut links_failed_count = 0usize;
2521 for link in &db.links {
2522 match crate::resolve_link_target(db, &link.link_name) {
2523 Ok(canonical) if zones_compiled.iter().any(|z| z == canonical) => {
2524 selected_links.push(link.link_name.clone())
2525 }
2526 Ok(_) => omitted_links.push(link.link_name.clone()),
2527 Err(_) => links_failed_count += 1, // dangling / cycle — never folded into "omitted"
2528 }
2529 }
2530 // Sort for a stable set hash; `dedup` because `zic` allows two `Link` lines with the same name
2531 // (last wins — see `make_links`), so the parsed db may legitimately carry a duplicate name we
2532 // must not double-count. (Sort-then-dedup removes only *adjacent* equals, hence the sort first.)
2533 selected_links.sort();
2534 selected_links.dedup();
2535 omitted_links.sort();
2536 omitted_links.dedup();
2537 // Order-independent *set* identity: names are already sorted+deduped, LF-joined then hashed.
2538 // The empty set hashes to `sha256("")` (a fixed, well-known digest) — that is intentional and
2539 // stable; do not special-case it to "" or a sentinel, or two empty-set runs would stop matching.
2540 let hash_names = |names: &[String]| sha256_hex(names.join("\n").as_bytes());
2541 // `alias-map.json` is serialized deterministically (sorted by identifier, fixed field order,
2542 // LF, no timestamps — see `AliasMap::to_json`), so hashing its bytes is a stable cross-machine
2543 // identity that binds this manifest to a specific alias map. `build` re-reads the just-written
2544 // output files to hash them; at manifest time (a successful compile) they are all on disk.
2545 let alias_map_sha256 = sha256_hex(build(report, &config.output_dir)?.to_json().as_bytes());
2546 let link_profile = LinkProfile {
2547 link_policy: match config.link_mode {
2548 crate::LinkMode::Copy => "copy",
2549 crate::LinkMode::Symlink => "symlink",
2550 }
2551 .to_string(),
2552 zones_compiled_count: zones_compiled.len(),
2553 links_selected_count: selected_links.len(),
2554 links_materialized_count: report.links_written.len(),
2555 links_omitted_count: omitted_links.len(),
2556 links_failed_count,
2557 alias_map_sha256,
2558 selected_links_sha256: hash_names(&selected_links),
2559 omitted_links_sha256: hash_names(&omitted_links),
2560 };
2561
2562 // Source-evidence axes — reconciled against the *admitted* source inputs only (hash-backed
2563 // detection or explicit claim), never inferred from the link profile above. `backward` (T12.4d)
2564 // verifies an admitted file's participation; `backzone` (T12.5b) checks whether the pinned
2565 // reference `backzone` (T12.5a.2) participated, anchored to its release hash; `packratlist`
2566 // (T12.5c) `packratlist` is a **generation-policy** axis — detection comes ONLY from an admitted
2567 // policy input (`--packratlist-source`) whose hash equals the pinned 2026b `zone.tab`, never from
2568 // `source_inputs` (compile inputs); `zone.tab` is not a compilable `zic` source.
2569 let backzone = BackzoneEvidence::reconcile(
2570 &source_inputs,
2571 variants.backzone_claim,
2572 REF_2026B_BACKZONE_SHA256,
2573 );
2574 let backzone_present = backzone.detected == BackzoneDetected::Present;
2575 let packratlist_policy_sha = match &variants.packratlist_source {
2576 Some(p) => Some(sha256_hex(&std::fs::read(p).map_err(|e| Error::io(p, e))?)),
2577 None => None,
2578 };
2579 // `dataform` (T12.5d) — the *encoding* axis. Detection is hash-backed against the pinned 2026b
2580 // `.zi` artifacts via `source_inputs` membership (category-correct: the `.zi` files are compile
2581 // sources). The `recipe_hash` binds the generation provenance of those pinned artifacts.
2582 let dataform_recipe = dataform_recipe_hash(
2583 REF_2026B_ARCHIVE_SHA256,
2584 REF_2026B_MAKEFILE_SHA256,
2585 REF_2026B_ZIGUARD_AWK_SHA256,
2586 REF_2026B_DATAFORM_COMMAND,
2587 REF_2026B_DATAFORM_TOOLCHAIN,
2588 );
2589 let dataform_reference = DataformReference {
2590 main_sha256: REF_2026B_MAIN_ZI_SHA256,
2591 vanguard_sha256: REF_2026B_VANGUARD_ZI_SHA256,
2592 rearguard_sha256: REF_2026B_REARGUARD_ZI_SHA256,
2593 recipe_hash: &dataform_recipe,
2594 generated_from: REF_2026B_DATAFORM_GENERATED_FROM,
2595 };
2596 let source_profile = SourceProfile {
2597 backward: BackwardEvidence::reconcile(&source_inputs, variants)?,
2598 packratlist: PackratlistEvidence::reconcile(
2599 variants.packratlist_claim.as_deref(),
2600 packratlist_policy_sha.as_deref(),
2601 REF_2026B_ZONE_TAB_SHA256,
2602 backzone_present,
2603 ),
2604 dataform: DataformEvidence::reconcile(
2605 &source_inputs,
2606 variants.dataform_claim.as_deref(),
2607 &dataform_reference,
2608 ),
2609 backzone,
2610 };
2611
2612 Ok(CompileManifest {
2613 zic_rs_version: env!("CARGO_PKG_VERSION").to_string(),
2614 tzdb: TzdbProvenance {
2615 detected_version,
2616 claimed_version: claimed_version.map(str::to_string),
2617 source_path,
2618 source_sha256,
2619 },
2620 source_inputs,
2621 build_profile,
2622 link_profile,
2623 source_profile,
2624 zones_requested: requested.to_vec(),
2625 zones_compiled,
2626 links_materialized,
2627 unsupported_zones,
2628 oracle: OracleResult::not_run(),
2629 })
2630}
2631
2632// JSON string escaping is shared with the other deterministic emitters (`report`); the single
2633// implementation lives in `crate::json`. Identifier strings are already restricted by the
2634// output-tree validator, but we escape defensively — a name can contain a backslash, which JSON
2635// requires escaped. The `json_str` alias keeps this module's call sites unchanged.
2636use crate::json::escape as json_str;
2637
2638#[cfg(test)]
2639mod tests {
2640 use super::*;
2641
2642 // ── T17.2 CONTRACT.TYPING: totality of the newly-typed manifest vocabularies ──
2643 // Each enum owns its JSON literal via `as_str()`; these assert the exact literals are preserved
2644 // (so the `zic-rs-compile-manifest-v8` schema does not bump) and that the vocabularies stay closed.
2645
2646 #[test]
2647 fn source_input_kind_totality_and_literals() {
2648 use std::collections::BTreeSet;
2649 let labels: Vec<&str> = SourceInputKind::ALL.iter().map(|k| k.as_str()).collect();
2650 // exact literals (pinned)
2651 assert_eq!(
2652 labels,
2653 ["tzdata_zi", "multi_file", "single_file", "unknown"]
2654 );
2655 // unique + non-empty (totality)
2656 let set: BTreeSet<&str> = labels.iter().copied().collect();
2657 assert_eq!(set.len(), SourceInputKind::ALL.len());
2658 assert!(labels.iter().all(|l| !l.is_empty()));
2659 }
2660
2661 #[test]
2662 fn output_tree_leap_mode_oracle_verdict_literals() {
2663 assert_eq!(OutputTree::Posix.as_str(), "posix");
2664 assert_eq!(OutputTree::Right.as_str(), "right");
2665 assert_eq!(LeapSourceMode::None.as_str(), "none");
2666 assert_eq!(LeapSourceMode::File.as_str(), "file");
2667 // the hyphen is preserved from the pre-T17.2 free-string literal
2668 assert_eq!(OracleVerdict::NotRun.as_str(), "not-run");
2669 }
2670
2671 #[test]
2672 fn emit_style_boundary_literals_unchanged() {
2673 // The manifest stores the typed `EmitStyle`; `emit_style_str` owns the literal at the boundary.
2674 assert_eq!(emit_style_str(crate::EmitStyle::Default), "default");
2675 assert_eq!(emit_style_str(crate::EmitStyle::ZicSlim), "zic-slim");
2676 assert_eq!(emit_style_str(crate::EmitStyle::ZicFat), "zic-fat");
2677 }
2678
2679 #[test]
2680 fn alias_entry_kind_str() {
2681 let z = AliasEntry::Zone { sha256: "x".into() };
2682 let l = AliasEntry::Link {
2683 target: "t".into(),
2684 target_sha256: "y".into(),
2685 materialised: crate::LinkMode::Copy,
2686 };
2687 assert_eq!(z.kind_str(), "zone");
2688 assert_eq!(l.kind_str(), "link");
2689 }
2690
2691 #[test]
2692 fn json_escaping() {
2693 assert_eq!(json_str("Europe/London"), "\"Europe/London\"");
2694 assert_eq!(json_str("a\\b"), "\"a\\\\b\"");
2695 assert_eq!(json_str("a\"b"), "\"a\\\"b\"");
2696 }
2697
2698 #[test]
2699 fn empty_map_is_valid_json_shape() {
2700 let m = AliasMap {
2701 entries: BTreeMap::new(),
2702 identifiers: 0,
2703 canonical_zones: 0,
2704 links: 0,
2705 duplicated_byte_links: 0,
2706 };
2707 let j = m.to_json();
2708 assert!(j.contains("\"schema\": \"zic-rs-alias-map-v1\""));
2709 assert!(j.contains("\"zones\": {}"));
2710 assert!(j.contains("\"identifiers\": 0"));
2711 }
2712}