Skip to main content

tzcompile/
report.rs

1//! `support-report` — an honest **frontier map** of which identifiers in a tzdata source file
2//! zic-rs can compile, and *why* the rest cannot.
3//!
4//! This turns "a partial compiler" into a transparent, machine-generated inventory: load a whole
5//! source file (the installed `/usr/share/zoneinfo/tzdata.zi` is the headline target — zic-rs
6//! parses the zishrink form directly), attempt to compile **every** canonical zone, and bucket
7//! the outcome. Links are accounted separately (production systems use aliases, not just
8//! canonical zones — a current-offset table does not describe a zone's historical data).
9//!
10//! ## What "supported" means here (no overclaiming)
11//!
12//! A zone is **compile-supported** if [`compile_zone`] returns `Ok` — i.e.
13//! zic-rs produces a valid TZif file for it. That is *not* a claim of behavioural correctness:
14//! the binding correctness contract is reference `zic`/`zdump` (see `compare`), which this report
15//! does not run. So the report answers "can I compile it?", not "is every transition right?".
16//! Every zone lands in exactly one bucket (supported, or a named unsupported reason); the
17//! accounting is exact (`supported + Σ unsupported == zones parsed`) and a catch-all `other`
18//! bucket keeps the raw diagnostic so nothing is ever silently dropped.
19
20use std::collections::BTreeMap;
21
22use crate::diagnostics::DiagnosticCode;
23use crate::json::escape;
24use crate::model::Database;
25use crate::{compile_zone, resolve_link_target, Error};
26
27/// Schema identifier for the JSON form.
28const SCHEMA: &str = "zic-rs-support-report-v4";
29
30/// How many example identifiers to show per bucket in the text report (the full list is always
31/// present in the JSON form — this is a display affordance, not a silent cap; the elision is
32/// shown explicitly as `(+N more)`).
33const TEXT_EXAMPLES: usize = 6;
34
35/// One unsupported bucket: the zones that hit it, plus a representative raw diagnostic message
36/// (so the `other` bucket — and any bucket — can be inspected without re-running).
37#[derive(Debug, Default)]
38pub struct Bucket {
39    pub zones: Vec<String>,
40    pub example_message: String,
41}
42
43/// How `Link` identifiers resolve, accounted separately from canonical zones.
44#[derive(Debug, Default)]
45pub struct LinkAccounting {
46    /// Links whose canonical zone is compile-supported.
47    pub to_supported: Vec<String>,
48    /// Links that resolve, but to a canonical zone that is *not* compile-supported.
49    pub to_unsupported: Vec<String>,
50    /// Links whose chain forms a cycle (e.g. `B -> A -> B`).
51    pub cycles: Vec<String>,
52    /// Links whose target names neither a zone nor another link.
53    pub missing: Vec<String>,
54}
55
56/// The complete frontier map for one source file.
57#[derive(Debug)]
58pub struct SupportReport {
59    /// tzdb release as declared by a leading `# version …` comment in the source, if any.
60    pub tzdb_version: Option<String>,
61    pub zones_parsed: usize,
62    pub links_parsed: usize,
63    /// Canonical zones that compile, sorted.
64    pub supported_zones: Vec<String>,
65    /// Unsupported canonical zones, keyed by a stable bucket label (sorted).
66    pub unsupported: BTreeMap<String, Bucket>,
67    pub links: LinkAccounting,
68}
69
70/// Build the frontier map for `db`. `tzdb_version` is the best-effort release string the caller
71/// sniffed from the source header (`None` if absent).
72pub fn build_support_report(db: &Database, tzdb_version: Option<String>) -> SupportReport {
73    let mut supported_zones: Vec<String> = Vec::new();
74    let mut unsupported: BTreeMap<String, Bucket> = BTreeMap::new();
75
76    for zone in &db.zones {
77        match compile_zone(db, &zone.name) {
78            Ok(_) => supported_zones.push(zone.name.clone()),
79            Err(e) => {
80                let (label, message) = classify(&e);
81                let bucket = unsupported.entry(label).or_default();
82                bucket.zones.push(zone.name.clone());
83                if bucket.example_message.is_empty() {
84                    bucket.example_message = message;
85                }
86            }
87        }
88    }
89    supported_zones.sort();
90    for b in unsupported.values_mut() {
91        b.zones.sort();
92    }
93
94    // Links resolve independently of *compile* support; classify each.
95    let supported_set: std::collections::BTreeSet<&str> =
96        supported_zones.iter().map(String::as_str).collect();
97    let mut links = LinkAccounting::default();
98    for link in &db.links {
99        match resolve_link_target(db, &link.link_name) {
100            Ok(canonical) => {
101                if supported_set.contains(canonical) {
102                    links.to_supported.push(link.link_name.clone());
103                } else {
104                    links.to_unsupported.push(link.link_name.clone());
105                }
106            }
107            Err(e) => {
108                // `resolve_link_target` distinguishes cycle vs missing by message.
109                if e.to_string().contains("cycle") {
110                    links.cycles.push(link.link_name.clone());
111                } else {
112                    links.missing.push(link.link_name.clone());
113                }
114            }
115        }
116    }
117    links.to_supported.sort();
118    links.to_unsupported.sort();
119    links.cycles.sort();
120    links.missing.sort();
121
122    SupportReport {
123        tzdb_version,
124        zones_parsed: db.zones.len(),
125        links_parsed: db.links.len(),
126        supported_zones,
127        unsupported,
128        links,
129    }
130}
131
132/// Map a compile error to a `(bucket_label, raw_message)`. The label is stable: it keys on the
133/// `ZIC0xx` diagnostic code, and for the broad `ZIC001_UNSUPPORTED_DIRECTIVE` it sub-classifies
134/// into a small fixed reason set matched from the message. Anything unrecognised falls into an
135/// explicit `other` reason **and** carries its raw message — we never silently drop a failure.
136fn classify(e: &Error) -> (String, String) {
137    let Some(diag) = e.diagnostic() else {
138        // Non-diagnostic errors (I/O, config, internal) — keep the whole message as the label
139        // tail so they are visible and never silently merged with real unsupported constructs.
140        let m = e.to_string();
141        return (format!("error: {m}"), m);
142    };
143    let code = diag.code.as_str();
144    let msg = diag.message.clone();
145    if diag.code == DiagnosticCode::UnsupportedDirective {
146        let m = &diag.message;
147        let reason = if m.contains("negative inline SAVE") {
148            "inline-save: negative SAVE"
149        } else if m.contains("inline-save FORMAT") {
150            "inline-save: %s or STD/DST slash FORMAT"
151        } else if m.contains("rule context") {
152            "no-rules era: %s or STD/DST slash FORMAT"
153        } else if m.contains("not POSIX-expressible") {
154            "recurring footer: non-POSIX day form"
155        } else if m.contains("unknown rule set") {
156            "unknown rule set"
157        } else {
158            "other (see message)"
159        };
160        (format!("{code}: {reason}"), msg)
161    } else {
162        // Other codes are already specific enough to be their own bucket (e.g. ZIC009
163        // too-many-transitions, ZIC010 leap seconds).
164        (code.to_string(), msg)
165    }
166}
167
168/// Map an unsupported/fail-closed **bucket label** to the deep `zic` semantic law it represents
169/// (the audit map in [`docs/zic-deep-semantics.md`](../docs/zic-deep-semantics.md)). This is the
170/// *single source of truth* shared by `--explain-buckets` and that doc, so they cannot drift. Match
171/// on the normalized reason substring (robust to the `ZIC0xx_…:` code prefix); unknown buckets
172/// return `None` (honest — we do not invent a law for a reason we have not pinned).
173pub fn deep_semantic(label: &str) -> Option<&'static str> {
174    if label.contains("negative SAVE") {
175        Some(
176            "law 7 — SAVE is signed state (negative SAVE is valid; Ireland). Implement as \
177              first-class signed SAVE, not a per-zone exception.",
178        )
179    } else if label.contains("non-POSIX day form") {
180        Some(
181            "law 10 — ON day forms can leave the nominal month (e.g. `Sun>=31`); a recurring such \
182              form is not POSIX-footer-expressible, so an exact footer cannot be synthesised.",
183        )
184    } else if label.contains("STD/DST slash") || label.contains("%s or STD/DST") {
185        Some("law 9 — `%s`, `%z`, and `STD/DST` slash are three distinct FORMAT paths; the slash/`%s` \
186              forms on this era are not yet pinned against reference `zic`.")
187    } else {
188        None
189    }
190}
191
192impl SupportReport {
193    /// Total identifiers (canonical zones + links) in the source.
194    pub fn identifiers(&self) -> usize {
195        self.zones_parsed + self.links_parsed
196    }
197
198    /// Identifiers that ultimately reach a compile-supported zone (supported zones + links to
199    /// supported zones).
200    pub fn supported_identifiers(&self) -> usize {
201        self.supported_zones.len() + self.links.to_supported.len()
202    }
203
204    /// The number of unsupported zones accounted across all buckets.
205    pub fn unsupported_zone_count(&self) -> usize {
206        self.unsupported.values().map(|b| b.zones.len()).sum()
207    }
208
209    /// Accounting invariant: every parsed zone is in exactly one place.
210    pub fn is_fully_accounted(&self) -> bool {
211        self.supported_zones.len() + self.unsupported_zone_count() == self.zones_parsed
212    }
213
214    /// The largest unsupported bucket — the "biggest unlock" pointer — as `(label, count)`.
215    pub fn largest_bucket(&self) -> Option<(&str, usize)> {
216        self.unsupported
217            .iter()
218            .map(|(k, b)| (k.as_str(), b.zones.len()))
219            .max_by_key(|(_, n)| *n)
220    }
221
222    /// Deterministic human-readable report.
223    pub fn to_text(&self) -> String {
224        self.render_text(false)
225    }
226
227    /// As [`Self::to_text`], plus a `↳ deep law:` line under each unsupported bucket mapping it to
228    /// the `zic` semantic it represents (the [`deep_semantic`] audit map — see
229    /// `docs/zic-deep-semantics.md`). Used by `support-report --explain-buckets`.
230    pub fn to_text_explained(&self) -> String {
231        self.render_text(true)
232    }
233
234    fn render_text(&self, explain: bool) -> String {
235        let mut s = String::new();
236        let version = self.tzdb_version.as_deref().unwrap_or("unknown");
237        s.push_str(&format!(
238            "zic-rs support report — tzdb release: {version}\n"
239        ));
240        s.push_str(
241            "(reports COMPILE support — a valid TZif is produced; behavioural correctness is a\n\
242             separate question answered by the reference `zic`/`zdump` oracle, not this report.)\n\n",
243        );
244        s.push_str(&format!("identifiers:      {}\n", self.identifiers()));
245        s.push_str(&format!(
246            "  canonical zones:  {} parsed, {} compile-supported\n",
247            self.zones_parsed,
248            self.supported_zones.len()
249        ));
250        s.push_str(&format!(
251            "  links:            {} parsed ({} → supported, {} → unsupported, {} cycle, {} missing)\n",
252            self.links_parsed,
253            self.links.to_supported.len(),
254            self.links.to_unsupported.len(),
255            self.links.cycles.len(),
256            self.links.missing.len(),
257        ));
258        s.push_str(&format!(
259            "  total supported:  {} / {} identifiers\n\n",
260            self.supported_identifiers(),
261            self.identifiers()
262        ));
263
264        if self.unsupported.is_empty() {
265            s.push_str("unsupported zones: none\n");
266        } else {
267            s.push_str(&format!(
268                "unsupported zones ({} across {} buckets):\n",
269                self.unsupported_zone_count(),
270                self.unsupported.len()
271            ));
272            for (label, bucket) in &self.unsupported {
273                s.push_str(&format!("  [{}] {}\n", bucket.zones.len(), label));
274                if explain {
275                    match deep_semantic(label) {
276                        Some(law) => s.push_str(&format!("      ↳ deep law: {law}\n")),
277                        None => s.push_str("      ↳ deep law: (not yet mapped)\n"),
278                    }
279                }
280                let shown = bucket.zones.len().min(TEXT_EXAMPLES);
281                for z in &bucket.zones[..shown] {
282                    s.push_str(&format!("      {z}\n"));
283                }
284                if bucket.zones.len() > shown {
285                    s.push_str(&format!("      (+{} more)\n", bucket.zones.len() - shown));
286                }
287            }
288            if let Some((label, n)) = self.largest_bucket() {
289                s.push_str(&format!(
290                    "\nbiggest unlock: the `{label}` bucket ({n} zones) — addressing it admits the most zones.\n"
291                ));
292            }
293        }
294        // Accounting check, surfaced (honesty: prove nothing was dropped).
295        s.push_str(&format!(
296            "\naccounting: {} supported + {} unsupported == {} zones parsed [{}]\n",
297            self.supported_zones.len(),
298            self.unsupported_zone_count(),
299            self.zones_parsed,
300            if self.is_fully_accounted() {
301                "OK"
302            } else {
303                "MISMATCH"
304            },
305        ));
306        // T12.6 — the static provenance/capability statement (manifest schema + source-variant
307        // reference-pin gate), so an operator sees the trust boundary without reading the manifest.
308        s.push_str(&crate::manifest::provenance_block_text());
309        s
310    }
311
312    /// Deterministic JSON (hand-rolled, shared escaper — no serde). Full lists, no elision.
313    pub fn to_json(&self) -> String {
314        let arr = |names: &[String]| -> String {
315            let items: Vec<String> = names.iter().map(|n| escape(n)).collect();
316            format!("[{}]", items.join(", "))
317        };
318        let mut s = String::new();
319        s.push_str("{\n");
320        s.push_str(&format!("  \"schema\": {},\n", escape(SCHEMA)));
321        // T12.6 — static provenance/capability block (schema + source-variant pin-gate state +
322        // T15.2 `negative_capabilities`).
323        s.push_str(&crate::manifest::provenance_block_json());
324        // T15.2 — oracle availability, typed. `support-report` is compile-coverage: it consults **no**
325        // oracle, so this is honestly `not_run` (it makes no behaviour-verified claim). Visible, never
326        // silent — see `docs/zic-conformance-engine.md`.
327        s.push_str(&format!(
328            "  \"oracle_mode\": {},\n",
329            crate::manifest::OracleMode::NotRun.to_json_field()
330        ));
331        // T15.5 — the one-line conformance rollup: the claim *envelope* (report kind · bounded level ·
332        // declared_scope_hash · compiler/workspace/report provenance · available proof surfaces), so a
333        // reviewer reads the scope in one scan. A *report is itself a claim surface*, hence its provenance.
334        s.push_str(&crate::manifest::ConformanceStatus::support().to_json_block());
335        match &self.tzdb_version {
336            Some(v) => s.push_str(&format!("  \"tzdb_version\": {},\n", escape(v))),
337            None => s.push_str("  \"tzdb_version\": null,\n"),
338        }
339        s.push_str(&format!("  \"zones_parsed\": {},\n", self.zones_parsed));
340        s.push_str(&format!("  \"links_parsed\": {},\n", self.links_parsed));
341        s.push_str(&format!(
342            "  \"supported_identifiers\": {},\n",
343            self.supported_identifiers()
344        ));
345        s.push_str(&format!(
346            "  \"fully_accounted\": {},\n",
347            self.is_fully_accounted()
348        ));
349        s.push_str(&format!(
350            "  \"supported_zones\": {},\n",
351            arr(&self.supported_zones)
352        ));
353        s.push_str("  \"unsupported\": {");
354        let mut first = true;
355        for (label, bucket) in &self.unsupported {
356            s.push_str(if first { "\n" } else { ",\n" });
357            first = false;
358            let law = match deep_semantic(label) {
359                Some(l) => escape(l),
360                None => "null".to_string(),
361            };
362            s.push_str(&format!(
363                "    {}: {{ \"count\": {}, \"deep_semantic\": {}, \"example_message\": {}, \"zones\": {} }}",
364                escape(label),
365                bucket.zones.len(),
366                law,
367                escape(&bucket.example_message),
368                arr(&bucket.zones),
369            ));
370        }
371        s.push_str(if self.unsupported.is_empty() {
372            "},\n"
373        } else {
374            "\n  },\n"
375        });
376        s.push_str("  \"links\": {\n");
377        s.push_str(&format!(
378            "    \"to_supported\": {},\n",
379            arr(&self.links.to_supported)
380        ));
381        s.push_str(&format!(
382            "    \"to_unsupported\": {},\n",
383            arr(&self.links.to_unsupported)
384        ));
385        s.push_str(&format!("    \"cycles\": {},\n", arr(&self.links.cycles)));
386        s.push_str(&format!("    \"missing\": {}\n", arr(&self.links.missing)));
387        s.push_str("  }\n");
388        s.push_str("}\n");
389        s
390    }
391}
392
393/// Best-effort tzdb release sniff: the first `# version <X>` comment line in `bytes` (the form
394/// `zic`'s single-file output and the IANA `version` file use). Returns the token after
395/// `version`, e.g. `2026b`. Comments are stripped by the lexer, so we read the raw source here.
396pub fn sniff_tzdb_version(bytes: &[u8]) -> Option<String> {
397    let text = std::str::from_utf8(bytes).ok()?;
398    for line in text.lines().take(40) {
399        let l = line.trim_start();
400        if let Some(rest) = l.strip_prefix('#') {
401            let rest = rest.trim_start();
402            if let Some(v) = rest.strip_prefix("version ") {
403                let token = v.split_whitespace().next()?;
404                return Some(token.to_string());
405            }
406        }
407    }
408    None
409}