Skip to main content

tzcompile/
release_diff.rs

1//! `release-diff` (T16.6a) — diff two IANA tzdb releases per identifier.
2//!
3//! Given two `tzdata.zi` source releases (OLD, NEW), this compiles each identifier in **both** and
4//! classifies how it changed. It keeps two axes strictly separate — exactly as the rest of the project
5//! does (CORE.1 behaviour vs T8 structural parity are never collapsed):
6//!
7//! * **structural** — always available: decode each side's TZif and compare the [`Shape`] (version /
8//!   `timecnt` / `typecnt` / `charcnt` / `isutcnt` / `isstdcnt` / `leapcnt` / footer), reusing the T8
9//!   taxonomy ([`ParityClass`]).
10//! * **behavioural** — only when a `zdump` oracle is available: dump each side over the declared horizon
11//!   and report whether behaviour changed in the **past** window (years `[lo, split-1]`), the **future**
12//!   window (years `[split, hi]`), or both. `split` is an **exclusive seam** (T17.3): the split year
13//!   belongs to the future window, so a change in the split year is `behavior_future`, never
14//!   double-counted as both. The split is a **declared year**, never host-`now` (determinism: the same
15//!   inputs always yield the same diff, independent of when or where it runs). Oracle absence is
16//!   surfaced (`oracle_mode = unavailable` + reason), never silently treated as "no change".
17//!
18//! Non-claims: a diff is scoped to the **declared horizon + split**, not all-time; the behaviour axis
19//! requires the oracle; and it says nothing about *why* a zone changed (that is IANA's NEWS, not ours).
20//! Identifiers zic-rs cannot compile (out of its declared subset) are reported as errors, never guessed.
21
22use std::collections::{BTreeMap, BTreeSet};
23use std::path::PathBuf;
24
25use crate::error::{Error, Result};
26use crate::manifest::OracleMode;
27use crate::model::Database;
28use crate::structural::{classify, differing_dims, ParityClass, Shape};
29
30/// How one identifier changed between the two releases. Exactly one kind per identifier (precedence is
31/// resolved in [`build_release_diff`]: presence → link → byte-identity → leap → behaviour).
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum ReleaseChangeKind {
34    /// Byte-for-byte identical compiled output (or an unchanged link).
35    Unchanged,
36    /// Present in NEW but not OLD.
37    Added,
38    /// Present in OLD but not NEW.
39    Removed,
40    /// The identifier's link status or target changed (zone↔link flip, or retargeted link).
41    LinkChanged,
42    /// Only the leap-second table (`leapcnt`) differs.
43    LeapOnly,
44    /// Bytes differ but `zdump` behaviour is **identical** over the whole horizon (footer / version /
45    /// encoding-only). Requires the oracle to assert.
46    MetadataOnly,
47    /// Behaviour differs only in the past window (years `[lo, split-1]`).
48    BehaviorPast,
49    /// Behaviour differs only in the future window (years `[split, hi]`; the split year is future).
50    BehaviorFuture,
51    /// Behaviour differs in both the past and future windows.
52    BehaviorPastAndFuture,
53    /// Bytes differ but **no `zdump` oracle** was available, so behaviour was not assessed (the
54    /// structural delta is still recorded). An honest "we did not check," never "no change."
55    BehaviourUnassessed,
56}
57
58impl ReleaseChangeKind {
59    /// Stable snake_case label used in both text and JSON.
60    pub fn as_str(self) -> &'static str {
61        match self {
62            ReleaseChangeKind::Unchanged => "unchanged",
63            ReleaseChangeKind::Added => "added",
64            ReleaseChangeKind::Removed => "removed",
65            ReleaseChangeKind::LinkChanged => "link_changed",
66            ReleaseChangeKind::LeapOnly => "leap_only",
67            ReleaseChangeKind::MetadataOnly => "metadata_only",
68            ReleaseChangeKind::BehaviorPast => "behavior_past",
69            ReleaseChangeKind::BehaviorFuture => "behavior_future",
70            ReleaseChangeKind::BehaviorPastAndFuture => "behavior_past_and_future",
71            ReleaseChangeKind::BehaviourUnassessed => "behaviour_unassessed",
72        }
73    }
74
75    /// Every variant, in stable order — for totality tests and summary tabulation.
76    pub const ALL: [ReleaseChangeKind; 10] = [
77        ReleaseChangeKind::Unchanged,
78        ReleaseChangeKind::Added,
79        ReleaseChangeKind::Removed,
80        ReleaseChangeKind::LinkChanged,
81        ReleaseChangeKind::LeapOnly,
82        ReleaseChangeKind::MetadataOnly,
83        ReleaseChangeKind::BehaviorPast,
84        ReleaseChangeKind::BehaviorFuture,
85        ReleaseChangeKind::BehaviorPastAndFuture,
86        ReleaseChangeKind::BehaviourUnassessed,
87    ];
88}
89
90/// The per-window behavioural delta (counts of differing `zdump` lines), present only when the oracle ran.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct BehaviourDelta {
93    pub past_diffs: usize,
94    pub future_diffs: usize,
95}
96
97/// Why the behaviour axis could not assess something (T17.3 — was conflated into one "flip the whole
98/// run unavailable" path). The distinction is load-bearing: a tool that cannot be resolved at all is a
99/// **global** outage (the whole axis is unavailable), but a failure assessing *one* identifier (a bad
100/// path/data for that zone) is **row-scoped** and must **not** poison the rest of the run.
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub enum OracleFailureScope {
103    /// The `zdump` tool itself could not be resolved/run — the entire behaviour axis is unavailable.
104    GlobalToolUnavailable,
105    /// The tool resolved, but assessing this one identifier failed; other identifiers stay assessable.
106    RowOrIdentifierFailure,
107}
108
109impl OracleFailureScope {
110    /// Stable snake_case label.
111    pub fn as_str(self) -> &'static str {
112        match self {
113            OracleFailureScope::GlobalToolUnavailable => "global_tool_unavailable",
114            OracleFailureScope::RowOrIdentifierFailure => "row_or_identifier_failure",
115        }
116    }
117}
118
119/// One identifier's release-to-release comparison.
120#[derive(Debug, Clone)]
121pub struct DiffRow {
122    pub name: String,
123    pub change_kind: ReleaseChangeKind,
124    /// Structural classification (present when both sides compiled to a zone and bytes differ).
125    pub parity_class: Option<ParityClass>,
126    /// The differing structural dimensions (stable order).
127    pub diffs: Vec<&'static str>,
128    /// Behavioural delta (present only when the `zdump` oracle ran for this row).
129    pub behaviour: Option<BehaviourDelta>,
130    /// Link target change `(old_target, new_target)` when this identifier is/was a link.
131    pub link_change: Option<(Option<String>, Option<String>)>,
132    /// T17.3: when the oracle resolved but failed *for this identifier* (an
133    /// [`OracleFailureScope::RowOrIdentifierFailure`]), the reason — so a row-scoped behaviour failure is
134    /// recorded on the row that hit it, while the rest of the run stays assessed. `None` otherwise.
135    /// Additive to `zic-rs-release-diff-v1` (only emitted when present) — no schema bump.
136    pub behaviour_error: Option<String>,
137}
138
139/// An identifier that could not be compared (one side failed to compile within zic-rs's subset).
140#[derive(Debug, Clone)]
141pub struct DiffError {
142    pub name: String,
143    pub reason: String,
144}
145
146/// Options for a release diff.
147#[derive(Debug, Clone)]
148pub struct ReleaseDiffOptions {
149    /// Behaviour horizon in **years** `(lo, hi)`.
150    pub horizon: (i32, i32),
151    /// The past/future split **year** (declared, deterministic — never host-`now`).
152    pub split: i32,
153    /// Restrict to a single identifier.
154    pub zone_filter: Option<String>,
155    /// The `zdump` program for the behaviour axis. `None` ⇒ behaviour not assessed.
156    pub zdump_program: Option<String>,
157}
158
159/// The complete release diff.
160#[derive(Debug)]
161pub struct ReleaseDiffReport {
162    pub oracle_mode: OracleMode,
163    pub horizon: (i32, i32),
164    pub split: i32,
165    pub rows: Vec<DiffRow>,
166    pub errors: Vec<DiffError>,
167}
168
169impl ReleaseDiffReport {
170    /// Count of rows per change kind, in the enum's canonical order.
171    pub fn kind_counts(&self) -> BTreeMap<&'static str, usize> {
172        let mut m: BTreeMap<&'static str, usize> = BTreeMap::new();
173        for k in ReleaseChangeKind::ALL {
174            m.insert(k.as_str(), 0);
175        }
176        for r in &self.rows {
177            *m.entry(r.change_kind.as_str()).or_default() += 1;
178        }
179        m
180    }
181}
182
183/// Build the link-name → target map for a database (last-wins, matching `zic`'s `make_links` dedup).
184fn link_map(db: &Database) -> BTreeMap<String, String> {
185    let mut m = BTreeMap::new();
186    for l in &db.links {
187        m.insert(l.link_name.clone(), l.target.clone());
188    }
189    m
190}
191
192/// Run `zdump` over `[lo, hi]` on a freshly-written copy of `bytes`, returning normalised lines.
193fn dump(
194    program: &str,
195    root: &std::path::Path,
196    name: &str,
197    bytes: &[u8],
198    lo: i32,
199    hi: i32,
200) -> Result<Vec<String>> {
201    let path = crate::fs::output_tree::write_zone_file(root, name, bytes, true, false)?;
202    crate::compare::zdump::run(program, &path, lo, hi)
203}
204
205/// Compute the behaviour delta for one zone across the two releases. Returns `Err` if the oracle could
206/// not run (so the caller can flip to "unavailable" and stop attempting it).
207fn behaviour_delta(
208    program: &str,
209    name: &str,
210    old_bytes: &[u8],
211    new_bytes: &[u8],
212    opts: &ReleaseDiffOptions,
213    work: &std::path::Path,
214) -> Result<BehaviourDelta> {
215    let (lo, hi) = opts.horizon;
216    let split = opts.split;
217    let old_root = work.join("old");
218    let new_root = work.join("new");
219    // T17.3 — `split` is an **exclusive seam**: past = years `[lo, split-1]`, future = years
220    // `[split, hi]`, so a change in the split year is attributed to the **future** window and **never
221    // double-counted** into both. `zdump -c` year bounds are inclusive, hence `split - 1` for the past
222    // upper bound. Each window is skipped (no diff) when it is empty (`split <= lo` ⇒ no past window;
223    // `split > hi` ⇒ no future window), so a degenerate split never asks zdump for an inverted range.
224    let past = if split > lo {
225        crate::compare::zdump::diff(
226            &dump(program, &old_root, name, old_bytes, lo, split - 1)?,
227            &dump(program, &new_root, name, new_bytes, lo, split - 1)?,
228        )
229    } else {
230        Vec::new()
231    };
232    let future = if split <= hi {
233        crate::compare::zdump::diff(
234            &dump(program, &old_root, name, old_bytes, split, hi)?,
235            &dump(program, &new_root, name, new_bytes, split, hi)?,
236        )
237    } else {
238        Vec::new()
239    };
240    Ok(BehaviourDelta {
241        past_diffs: past.len(),
242        future_diffs: future.len(),
243    })
244}
245
246/// Diff two parsed releases into a [`ReleaseDiffReport`].
247pub fn build_release_diff(
248    old_db: &Database,
249    new_db: &Database,
250    opts: &ReleaseDiffOptions,
251) -> Result<ReleaseDiffReport> {
252    let old_zones: BTreeSet<&str> = old_db.zones.iter().map(|z| z.name.as_str()).collect();
253    let new_zones: BTreeSet<&str> = new_db.zones.iter().map(|z| z.name.as_str()).collect();
254    let old_links = link_map(old_db);
255    let new_links = link_map(new_db);
256
257    // The identifier universe: every zone + link name on either side (sorted, deterministic).
258    let mut names: BTreeSet<String> = BTreeSet::new();
259    for z in &old_db.zones {
260        names.insert(z.name.clone());
261    }
262    for z in &new_db.zones {
263        names.insert(z.name.clone());
264    }
265    for k in old_links.keys().chain(new_links.keys()) {
266        names.insert(k.clone());
267    }
268    if let Some(only) = &opts.zone_filter {
269        names.retain(|n| n == only);
270    }
271
272    // Scratch tree for the behaviour axis (absolute, auto-cleaned). Created even if the oracle is off
273    // (cheap); only written to when zdump actually runs.
274    let work = tempfile::Builder::new()
275        .prefix("zic-rs-release-diff-")
276        .tempdir()
277        .map_err(|e| Error::io(PathBuf::from("<tempdir>"), e))?;
278
279    let mut rows = Vec::new();
280    let mut errors = Vec::new();
281    // T17.3 — distinguish global tool outage from per-identifier failure (OracleFailureScope). Probe the
282    // tool ONCE up front: if it cannot be resolved, the behaviour axis is GlobalToolUnavailable for the
283    // whole run; if it resolves, a later per-row failure is a RowOrIdentifierFailure recorded on *that*
284    // row only — it must not poison the rest. `oracle_unavailable` is now set only by a *global* outage.
285    let oracle_unavailable: Option<String> = match &opts.zdump_program {
286        None => Some("behaviour axis not requested (pass --reference-zdump to assess)".into()),
287        Some(prog) => {
288            if crate::doctor::resolve(prog).is_some() {
289                None
290            } else {
291                Some(format!(
292                    "{}: zdump program {prog:?} could not be resolved on PATH or as an explicit path",
293                    OracleFailureScope::GlobalToolUnavailable.as_str()
294                ))
295            }
296        }
297    };
298
299    for name in &names {
300        let in_old = old_zones.contains(name.as_str()) || old_links.contains_key(name);
301        let in_new = new_zones.contains(name.as_str()) || new_links.contains_key(name);
302        let old_link = old_links.get(name);
303        let new_link = new_links.get(name);
304        let old_zone = old_zones.contains(name.as_str());
305        let new_zone = new_zones.contains(name.as_str());
306
307        // 1. presence
308        if !in_old && in_new {
309            rows.push(simple_row(name, ReleaseChangeKind::Added));
310            continue;
311        }
312        if in_old && !in_new {
313            rows.push(simple_row(name, ReleaseChangeKind::Removed));
314            continue;
315        }
316        // 2. link involvement (a link on either side, or a zone↔link flip)
317        if old_link.is_some() || new_link.is_some() {
318            let unchanged_link = old_link.is_some()
319                && new_link.is_some()
320                && old_link == new_link
321                && !old_zone
322                && !new_zone;
323            let kind = if unchanged_link {
324                ReleaseChangeKind::Unchanged
325            } else {
326                ReleaseChangeKind::LinkChanged
327            };
328            let mut row = simple_row(name, kind);
329            row.link_change = Some((old_link.cloned(), new_link.cloned()));
330            rows.push(row);
331            continue;
332        }
333        // 3. both are zones → compile both and diff
334        debug_assert!(old_zone && new_zone);
335        let old_bytes = match crate::compile_zone_to_bytes(old_db, name) {
336            Ok(b) => b,
337            Err(e) => {
338                errors.push(DiffError {
339                    name: name.clone(),
340                    reason: format!("OLD: {e}"),
341                });
342                continue;
343            }
344        };
345        let new_bytes = match crate::compile_zone_to_bytes(new_db, name) {
346            Ok(b) => b,
347            Err(e) => {
348                errors.push(DiffError {
349                    name: name.clone(),
350                    reason: format!("NEW: {e}"),
351                });
352                continue;
353            }
354        };
355        if old_bytes == new_bytes {
356            rows.push(simple_row(name, ReleaseChangeKind::Unchanged));
357            continue;
358        }
359        // bytes differ → structural classification
360        let (op, np) = match (
361            crate::tzif::validate::parse(&old_bytes),
362            crate::tzif::validate::parse(&new_bytes),
363        ) {
364            (Ok(o), Ok(n)) => (o, n),
365            _ => {
366                errors.push(DiffError {
367                    name: name.clone(),
368                    reason: "could not decode compiled TZif on one side".into(),
369                });
370                continue;
371            }
372        };
373        let dims = differing_dims(&Shape::of(&op), &Shape::of(&np));
374        let parity = classify(false, &dims);
375
376        // leap-only short-circuits (independent of behaviour).
377        if dims.as_slice() == ["leapcnt"] {
378            let mut row = simple_row(name, ReleaseChangeKind::LeapOnly);
379            row.parity_class = Some(parity);
380            row.diffs = dims;
381            rows.push(row);
382            continue;
383        }
384
385        // behaviour axis. The oracle was probed up front, so a failure HERE is row-scoped
386        // (OracleFailureScope::RowOrIdentifierFailure): record it on this row and keep assessing the
387        // rest — never flip the whole run unavailable (that is reserved for the global probe above).
388        let mut behaviour = None;
389        let mut behaviour_error = None;
390        let kind = if let (Some(program), None) = (&opts.zdump_program, &oracle_unavailable) {
391            match behaviour_delta(program, name, &old_bytes, &new_bytes, opts, work.path()) {
392                Ok(d) => {
393                    behaviour = Some(d);
394                    match (d.past_diffs > 0, d.future_diffs > 0) {
395                        (true, true) => ReleaseChangeKind::BehaviorPastAndFuture,
396                        (true, false) => ReleaseChangeKind::BehaviorPast,
397                        (false, true) => ReleaseChangeKind::BehaviorFuture,
398                        (false, false) => ReleaseChangeKind::MetadataOnly,
399                    }
400                }
401                Err(e) => {
402                    behaviour_error = Some(format!(
403                        "{}: {e}",
404                        OracleFailureScope::RowOrIdentifierFailure.as_str()
405                    ));
406                    ReleaseChangeKind::BehaviourUnassessed
407                }
408            }
409        } else {
410            ReleaseChangeKind::BehaviourUnassessed
411        };
412        let mut row = simple_row(name, kind);
413        row.parity_class = Some(parity);
414        row.diffs = dims;
415        row.behaviour = behaviour;
416        row.behaviour_error = behaviour_error;
417        rows.push(row);
418    }
419
420    let oracle_mode = match (&opts.zdump_program, oracle_unavailable) {
421        (Some(_), None) => OracleMode::ReferenceZdump,
422        (_, Some(reason)) => OracleMode::Unavailable(reason),
423        (None, None) => OracleMode::Unavailable("behaviour axis not requested".into()),
424    };
425
426    Ok(ReleaseDiffReport {
427        oracle_mode,
428        horizon: opts.horizon,
429        split: opts.split,
430        rows,
431        errors,
432    })
433}
434
435fn simple_row(name: &str, kind: ReleaseChangeKind) -> DiffRow {
436    DiffRow {
437        name: name.to_string(),
438        change_kind: kind,
439        parity_class: None,
440        diffs: Vec::new(),
441        behaviour: None,
442        link_change: None,
443        behaviour_error: None,
444    }
445}
446
447/// The schema id (versioned, immutable).
448pub const SCHEMA: &str = "zic-rs-release-diff-v1";
449
450impl ReleaseDiffReport {
451    /// Render the report as deterministic JSON (`zic-rs-release-diff-v1`).
452    pub fn to_json(&self) -> String {
453        use crate::json::escape;
454        let mut s = String::new();
455        s.push_str("{\n");
456        s.push_str(&format!("  \"schema\": {},\n", escape(SCHEMA)));
457        s.push_str(&crate::manifest::provenance_block_json());
458        s.push_str(&format!(
459            "  \"oracle_mode\": {},\n",
460            self.oracle_mode.to_json_field()
461        ));
462        s.push_str(&format!(
463            "  \"horizon\": {{ \"lo\": {}, \"hi\": {} }},\n",
464            self.horizon.0, self.horizon.1
465        ));
466        s.push_str(&format!("  \"split\": {},\n", self.split));
467        s.push_str(
468            "  \"non_claim\": \"a release-diff is scoped to the declared horizon + split, not all-time; \
469             the behaviour axis requires a zdump oracle (absence ⇒ behaviour_unassessed, never 'no change'); \
470             it does not state WHY a zone changed (that is IANA NEWS); identifiers outside zic-rs's compile \
471             subset are reported as errors, never guessed\",\n",
472        );
473        // summary counts
474        let counts = self.kind_counts();
475        s.push_str("  \"summary\": {");
476        let mut first = true;
477        for (k, v) in &counts {
478            s.push_str(if first { "\n" } else { ",\n" });
479            first = false;
480            s.push_str(&format!("    {}: {}", escape(k), v));
481        }
482        s.push_str("\n  },\n");
483        // rows
484        s.push_str("  \"identifiers\": [");
485        for (i, r) in self.rows.iter().enumerate() {
486            s.push_str(if i == 0 { "\n" } else { ",\n" });
487            s.push_str(&row_json(r));
488        }
489        s.push_str(if self.rows.is_empty() {
490            "],\n"
491        } else {
492            "\n  ],\n"
493        });
494        // errors
495        s.push_str("  \"errors\": [");
496        for (i, e) in self.errors.iter().enumerate() {
497            s.push_str(if i == 0 { "\n" } else { ",\n" });
498            s.push_str(&format!(
499                "    {{ \"name\": {}, \"reason\": {} }}",
500                escape(&e.name),
501                escape(&e.reason)
502            ));
503        }
504        s.push_str(if self.errors.is_empty() {
505            "]\n"
506        } else {
507            "\n  ]\n"
508        });
509        s.push_str("}\n");
510        s
511    }
512}
513
514fn row_json(r: &DiffRow) -> String {
515    use crate::json::escape;
516    let mut s = String::new();
517    s.push_str(&format!(
518        "    {{ \"name\": {}, \"change_kind\": {}",
519        escape(&r.name),
520        escape(r.change_kind.as_str())
521    ));
522    if let Some(p) = r.parity_class {
523        let dims: Vec<String> = r.diffs.iter().map(|d| escape(d)).collect();
524        s.push_str(&format!(
525            ", \"structural\": {{ \"parity_class\": {}, \"differing\": [{}] }}",
526            escape(p.label()),
527            dims.join(", ")
528        ));
529    }
530    if let Some(b) = r.behaviour {
531        s.push_str(&format!(
532            ", \"behaviour\": {{ \"past_diffs\": {}, \"future_diffs\": {} }}",
533            b.past_diffs, b.future_diffs
534        ));
535    }
536    if let Some((old_t, new_t)) = &r.link_change {
537        let f = |o: &Option<String>| {
538            o.as_ref()
539                .map(|t| escape(t))
540                .unwrap_or_else(|| "null".into())
541        };
542        s.push_str(&format!(
543            ", \"link\": {{ \"old_target\": {}, \"new_target\": {} }}",
544            f(old_t),
545            f(new_t)
546        ));
547    }
548    // T17.3: a row-scoped behaviour-oracle failure, emitted only when present (additive).
549    if let Some(reason) = &r.behaviour_error {
550        s.push_str(&format!(", \"behaviour_error\": {}", escape(reason)));
551    }
552    s.push_str(" }");
553    s
554}