Skip to main content

tzcompile/
aux_tables.rs

1//! T16.4 — **auxiliary-table validator** (`zone.tab` / `zone1970.tab` / `zonenow.tab` / `iso3166.tab`)
2//! plus a **bounded** install-ecology status.
3//!
4//! **The central category law (the reusable T12.5c lesson):** these `.tab` files are **policy / index /
5//! reference** artifacts — they are **not compile inputs** and **not semantic-output witnesses**. This
6//! validator checks only **table structural admissibility**:
7//!
8//! > A valid zone-table row proves the row is *structurally well-formed*. It does **not** prove the named
9//! > zone was compiled, semantically witnessed, historically equivalent, or present in an installed tree.
10//!
11//! Two further doctrine lines (T16.4):
12//! - *Auxiliary tables are policy/index artifacts with **table-specific** invariants; they are not compile
13//!   inputs, not semantic witnesses, and **not one-row-per-country maps*** — a country legitimately spans
14//!   many rows (the bug this validator caught: `US` ≈ 29 rows in `zone.tab`).
15//! - *Auxiliary-table validation is **release-ecology evidence, not reference-oracle identity*** — it is a
16//!   separate `zic-rs-aux-table-validation-v1` report surface, never emitted as `oracle_identity` evidence.
17//! - **A validator must always name the universe it validates against:** this one resolves **no** zone
18//!   names (structural-only), so "unknown zone name" is deliberately not evaluated (see [`ZoneUniverse`]).
19//!
20//! It is bounds-safe (malformed bytes → typed findings, never a panic) and reads no files itself — the
21//! caller supplies bytes, exactly like the rest of the no-host library core.
22
23use crate::json::escape;
24use crate::manifest::ArtifactCategory;
25use std::collections::BTreeSet;
26
27/// Which auxiliary table this is (T16.4). A **finite** vocabulary — each table has a *different* column
28/// shape and a *different* evidence role; they are never conflated.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum ZoneTableKind {
31    /// Legacy country→zone index (`CC \t coord \t TZ [\t comment]`). A country may appear in **many**
32    /// rows (one per represented zone — e.g. `US` ≈ 29); it is **not** a one-row-per-country map.
33    ZoneTab,
34    /// Post-1970 country/location table (`CC[,CC…] \t coord \t TZ [\t comment]`).
35    Zone1970Tab,
36    /// **Now/future-agreement** table (`CC \t coord \t TZ [\t comment]`; `XX` allowed) — *not* all-history.
37    ZonenowTab,
38    /// ISO-3166 country-code reference (`CC \t name`) — a code reference, not a zone table.
39    Iso3166Tab,
40}
41
42impl ZoneTableKind {
43    pub fn as_str(self) -> &'static str {
44        match self {
45            ZoneTableKind::ZoneTab => "zone_tab",
46            ZoneTableKind::Zone1970Tab => "zone1970_tab",
47            ZoneTableKind::ZonenowTab => "zonenow_tab",
48            ZoneTableKind::Iso3166Tab => "iso3166_tab",
49        }
50    }
51
52    /// The evidence category — **never `compile_input`**. Zone tables are generation/selection *policy*;
53    /// `iso3166.tab` is a *reference* code table. (The typed form of the `zone.tab`-is-not-compile law.)
54    pub fn artifact_category(self) -> ArtifactCategory {
55        match self {
56            ZoneTableKind::Iso3166Tab => ArtifactCategory::ReferenceInput,
57            _ => ArtifactCategory::PolicyInput,
58        }
59    }
60
61    /// What the table *covers* — so `zonenow.tab` is never overread as all-history equivalence.
62    pub fn coverage(self) -> &'static str {
63        match self {
64            ZoneTableKind::ZoneTab => "country_zone_index_all_eras",
65            ZoneTableKind::Zone1970Tab => "post_1970_country_location",
66            ZoneTableKind::ZonenowTab => "now_future_agreement_only",
67            ZoneTableKind::Iso3166Tab => "country_code_reference",
68        }
69    }
70}
71
72/// A single typed structural finding (T16.4) — wording-independent, like the diagnostic classes.
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum ZoneTableFinding {
75    NonUtf8,
76    EmptyTable,
77    InvalidColumnCount,
78    InvalidCountryCode,
79    InvalidCoordinateFormat,
80    /// The zone-name (zone tables) or country-name (`iso3166.tab`) field is empty.
81    EmptyNameField,
82    /// A **semantic** duplicate: the same identity tuple repeats — `(country-code-set, coordinates,
83    /// zone-name)` for zone tables, or the country *code* for `iso3166.tab`. **Comments are excluded**
84    /// from row identity. NOT a duplicate country *code* across rows — a country legitimately spans many
85    /// rows in `zone.tab`/`zone1970.tab` (e.g. `US` ≈ 29); only an identical identity tuple is suspect.
86    DuplicateSemanticRow,
87    /// A country code repeated *within a single* `zone1970.tab` comma-list (set-semantics, not string).
88    DuplicateCodeInRow,
89}
90
91impl ZoneTableFinding {
92    pub fn as_str(self) -> &'static str {
93        match self {
94            ZoneTableFinding::NonUtf8 => "non_utf8",
95            ZoneTableFinding::EmptyTable => "empty_table",
96            ZoneTableFinding::InvalidColumnCount => "invalid_column_count",
97            ZoneTableFinding::InvalidCountryCode => "invalid_country_code",
98            ZoneTableFinding::InvalidCoordinateFormat => "invalid_coordinate_format",
99            ZoneTableFinding::EmptyNameField => "empty_name_field",
100            ZoneTableFinding::DuplicateSemanticRow => "duplicate_semantic_row",
101            ZoneTableFinding::DuplicateCodeInRow => "duplicate_code_in_row",
102        }
103    }
104}
105
106/// **The universe a table validator resolves *names* against** (T16.4 — the panel's central rule: *a
107/// validator must always name the universe it validates against*). zic-rs's aux-table validator checks
108/// **row structure only** and does **not** resolve zone names against any universe — so "unknown zone
109/// name" is deliberately *not evaluated* (it would be meaningless without a declared universe). Resolving
110/// names against admitted-source / compiled-output / reference-distribution is tracked, not done here.
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112pub enum ZoneUniverse {
113    /// Names are **not** resolved — structural row admissibility only (the current honest state).
114    NotResolvedStructuralOnly,
115    AdmittedSourceDefinitions,
116    CompiledOutputTree,
117    ReferenceDistributionTable,
118    SourcePlusBackwardLinks,
119    Unknown,
120}
121
122impl ZoneUniverse {
123    pub fn as_str(self) -> &'static str {
124        match self {
125            ZoneUniverse::NotResolvedStructuralOnly => "not_resolved_structural_only",
126            ZoneUniverse::AdmittedSourceDefinitions => "admitted_source_definitions",
127            ZoneUniverse::CompiledOutputTree => "compiled_output_tree",
128            ZoneUniverse::ReferenceDistributionTable => "reference_distribution_table",
129            ZoneUniverse::SourcePlusBackwardLinks => "source_plus_backward_links",
130            ZoneUniverse::Unknown => "unknown",
131        }
132    }
133}
134
135/// **Which authority `zone1970.tab` country codes are cross-validated against** (T16.4). The safe choice
136/// is the **same admitted release's** `iso3166.tab` — never a host/system ISO list (which drifts across
137/// releases). `NotCrossValidated` when no `iso3166.tab` was supplied (codes are still shape-checked).
138#[derive(Debug, Clone, Copy, PartialEq, Eq)]
139pub enum CountryCodeAuthority {
140    SameAdmittedReleaseIso3166Tab,
141    ExternalIsoRegistry,
142    HostLibrary,
143    NotCrossValidated,
144}
145
146impl CountryCodeAuthority {
147    pub fn as_str(self) -> &'static str {
148        match self {
149            CountryCodeAuthority::SameAdmittedReleaseIso3166Tab => {
150                "same_admitted_release_iso3166_tab"
151            }
152            CountryCodeAuthority::ExternalIsoRegistry => "external_iso_registry",
153            CountryCodeAuthority::HostLibrary => "host_library",
154            CountryCodeAuthority::NotCrossValidated => "not_cross_validated",
155        }
156    }
157}
158
159/// The table-structural verdict (T16.4). Deliberately **not** a single `valid: true` — `Conformant` means
160/// *structurally admissible as a table*, nothing more (see the module non-claim).
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum ZoneTableStructuralVerdict {
163    Conformant,
164    Violation,
165}
166
167impl ZoneTableStructuralVerdict {
168    pub fn as_str(self) -> &'static str {
169        match self {
170            ZoneTableStructuralVerdict::Conformant => "conformant",
171            ZoneTableStructuralVerdict::Violation => "violation",
172        }
173    }
174}
175
176/// The validation of one table.
177#[derive(Debug, Clone)]
178pub struct AuxTableValidation {
179    pub kind: ZoneTableKind,
180    pub verdict: ZoneTableStructuralVerdict,
181    pub rows_checked: usize,
182    /// Which authority country codes were cross-validated against (T16.4).
183    pub country_code_authority: CountryCodeAuthority,
184    /// Typed findings with their 1-based data-row line (bounded to the first few, to avoid spam).
185    pub findings: Vec<(usize, ZoneTableFinding)>,
186}
187
188impl AuxTableValidation {
189    fn to_json(&self) -> String {
190        let mut findings = String::from("[");
191        for (i, (line, f)) in self.findings.iter().enumerate() {
192            if i > 0 {
193                findings.push_str(", ");
194            }
195            findings.push_str(&format!(
196                "{{ \"line\": {}, \"finding\": {} }}",
197                line,
198                escape(f.as_str())
199            ));
200        }
201        findings.push(']');
202        format!(
203            "{{ \"kind\": {}, \"artifact_category\": {}, \"coverage\": {}, \"verdict\": {}, \
204             \"rows_checked\": {}, \"country_code_authority\": {}, \"findings\": {} }}",
205            escape(self.kind.as_str()),
206            escape(self.kind.artifact_category().as_str()),
207            escape(self.kind.coverage()),
208            escape(self.verdict.as_str()),
209            self.rows_checked,
210            escape(self.country_code_authority.as_str()),
211            findings
212        )
213    }
214}
215
216/// A 2-letter uppercase ASCII country code.
217fn is_country_code(s: &str) -> bool {
218    s.len() == 2 && s.bytes().all(|b| b.is_ascii_uppercase())
219}
220
221/// An ISO-6709-style `±DDMM[SS]±DDDMM[SS]` coordinate as used by the zone tables: a sign, 4 or 6 digits
222/// (latitude DDMM / DDMMSS), then a sign, 5 or 7 digits (longitude DDDMM / DDDMMSS).
223fn is_coordinate(s: &str) -> bool {
224    let b = s.as_bytes();
225    if b.is_empty() || (b[0] != b'+' && b[0] != b'-') {
226        return false;
227    }
228    // Split into [sign digits][sign digits].
229    let rest = &s[1..];
230    let lon_sign = match rest.find(['+', '-']) {
231        Some(i) => i,
232        None => return false,
233    };
234    let lat_digits = &rest[..lon_sign];
235    let lon_part = &rest[lon_sign + 1..];
236    let lat_ok = (lat_digits.len() == 4 || lat_digits.len() == 6)
237        && lat_digits.bytes().all(|c| c.is_ascii_digit());
238    let lon_ok = (lon_part.len() == 5 || lon_part.len() == 7)
239        && lon_part.bytes().all(|c| c.is_ascii_digit());
240    lat_ok && lon_ok
241}
242
243/// Validate one auxiliary table from raw bytes (bounds-safe; never panics). `iso3166_codes`, when
244/// supplied, cross-validates the country-code columns of `zone1970.tab`. **`zonenow.tab` allows `XX`** (its
245/// placeholder convention) so country codes there are not cross-checked.
246pub fn validate_zone_table(
247    kind: ZoneTableKind,
248    bytes: &[u8],
249    iso3166_codes: Option<&BTreeSet<String>>,
250) -> AuxTableValidation {
251    let mut findings: Vec<(usize, ZoneTableFinding)> = Vec::new();
252    let push = |findings: &mut Vec<(usize, ZoneTableFinding)>, line: usize, f: ZoneTableFinding| {
253        if findings.len() < 32 {
254            findings.push((line, f));
255        }
256    };
257
258    // Which authority did we cross-validate country codes against? `zone.tab` + `zone1970.tab` are
259    // cross-checked against a supplied **same-release** `iso3166.tab` (never a host/world ISO list).
260    // `zonenow.tab` uses the `XX` placeholder so it is not cross-validated; `iso3166.tab` *is* the source.
261    let cross_validates = matches!(kind, ZoneTableKind::ZoneTab | ZoneTableKind::Zone1970Tab);
262    let country_code_authority = if cross_validates && iso3166_codes.is_some() {
263        CountryCodeAuthority::SameAdmittedReleaseIso3166Tab
264    } else {
265        CountryCodeAuthority::NotCrossValidated
266    };
267
268    let text = match std::str::from_utf8(bytes) {
269        Ok(t) => t,
270        Err(_) => {
271            return AuxTableValidation {
272                kind,
273                verdict: ZoneTableStructuralVerdict::Violation,
274                rows_checked: 0,
275                country_code_authority,
276                findings: vec![(0, ZoneTableFinding::NonUtf8)],
277            };
278        }
279    };
280
281    let mut rows_checked = 0usize;
282    // **Semantic-row** duplicate detection: the *identity tuple* (not the whole line incl. comment, and
283    // NOT the country code alone — a country legitimately spans many rows). Zone tables key on
284    // `(cc-set, coord, zone-name)`; `iso3166.tab` keys on the country code (codes are a unique reference).
285    let mut seen_identity: BTreeSet<String> = BTreeSet::new();
286    for (idx, raw) in text.lines().enumerate() {
287        let line = idx + 1;
288        // Comments + blank lines are not data rows (tz tables use `#`).
289        if raw.is_empty() || raw.starts_with('#') {
290            continue;
291        }
292        rows_checked += 1;
293        let fields: Vec<&str> = raw.split('\t').collect();
294
295        match kind {
296            ZoneTableKind::Iso3166Tab => {
297                // `CC \t name` — exactly 2 fields; code 2-upper; name non-empty; code unique.
298                if fields.len() != 2 {
299                    push(&mut findings, line, ZoneTableFinding::InvalidColumnCount);
300                    continue;
301                }
302                if !is_country_code(fields[0]) {
303                    push(&mut findings, line, ZoneTableFinding::InvalidCountryCode);
304                } else if !seen_identity.insert(fields[0].to_string()) {
305                    push(&mut findings, line, ZoneTableFinding::DuplicateSemanticRow);
306                }
307                if fields[1].is_empty() {
308                    push(&mut findings, line, ZoneTableFinding::EmptyNameField);
309                }
310            }
311            ZoneTableKind::ZoneTab | ZoneTableKind::Zone1970Tab | ZoneTableKind::ZonenowTab => {
312                // `CC[,CC…] \t coord \t TZ [\t comment]` — at least 3 fields.
313                if fields.len() < 3 {
314                    push(&mut findings, line, ZoneTableFinding::InvalidColumnCount);
315                    continue;
316                }
317                // Country code(s): a comma-separated list for zone1970; single for zone.tab; `XX` allowed
318                // for zonenow (its placeholder convention) so we do not cross-check those.
319                let codes = fields[0];
320                let code_ok = codes.split(',').all(|c| {
321                    is_country_code(c) || (kind == ZoneTableKind::ZonenowTab && c == "XX")
322                });
323                if !code_ok {
324                    push(&mut findings, line, ZoneTableFinding::InvalidCountryCode);
325                } else {
326                    // Set semantics for the comma-list (zone1970): a code repeated within the row is a
327                    // finding. (zone.tab is single-code, so this never fires there.)
328                    if kind == ZoneTableKind::Zone1970Tab {
329                        let mut in_row: BTreeSet<&str> = BTreeSet::new();
330                        for c in codes.split(',') {
331                            if !in_row.insert(c) {
332                                push(&mut findings, line, ZoneTableFinding::DuplicateCodeInRow);
333                            }
334                        }
335                    }
336                    // Cross-validate each code against the same-release ISO-3166 authority — for BOTH
337                    // `zone.tab` and `zone1970.tab` (zonenow's `XX` is excluded by `cross_validates`).
338                    if cross_validates {
339                        if let Some(set) = iso3166_codes {
340                            if !codes.split(',').all(|c| set.contains(c)) {
341                                push(&mut findings, line, ZoneTableFinding::InvalidCountryCode);
342                            }
343                        }
344                    }
345                }
346                if !is_coordinate(fields[1]) {
347                    push(
348                        &mut findings,
349                        line,
350                        ZoneTableFinding::InvalidCoordinateFormat,
351                    );
352                }
353                // Zone-name field must be non-empty.
354                if fields[2].is_empty() {
355                    push(&mut findings, line, ZoneTableFinding::EmptyNameField);
356                }
357                // Semantic-row identity = (cc-set, coord, zone-name), comments excluded. A country
358                // spanning many rows is legal; an identical identity tuple is the real duplicate.
359                let identity = format!("{}\t{}\t{}", fields[0], fields[1], fields[2]);
360                if !seen_identity.insert(identity) {
361                    push(&mut findings, line, ZoneTableFinding::DuplicateSemanticRow);
362                }
363            }
364        }
365    }
366
367    if rows_checked == 0 {
368        push(&mut findings, 0, ZoneTableFinding::EmptyTable);
369    }
370    let verdict = if findings.is_empty() {
371        ZoneTableStructuralVerdict::Conformant
372    } else {
373        ZoneTableStructuralVerdict::Violation
374    };
375    AuxTableValidation {
376        kind,
377        verdict,
378        rows_checked,
379        country_code_authority,
380        findings,
381    }
382}
383
384/// Parse the set of country codes from an `iso3166.tab` (first field of each data row) — used to
385/// cross-validate `zone1970.tab`. Bounds-safe; non-UTF-8 → empty set.
386pub fn iso3166_codes(bytes: &[u8]) -> BTreeSet<String> {
387    let mut set = BTreeSet::new();
388    if let Ok(text) = std::str::from_utf8(bytes) {
389        for raw in text.lines() {
390            if raw.is_empty() || raw.starts_with('#') {
391                continue;
392            }
393            if let Some(code) = raw.split('\t').next() {
394                if is_country_code(code) {
395                    set.insert(code.to_string());
396                }
397            }
398        }
399    }
400    set
401}
402
403/// **Bounded** install-ecology status (T16.4) — deliberately narrow so it never implies packager parity.
404/// zic-rs writes a **compile output tree under an explicit `--out`** and nothing more: it claims no
405/// reference install layout, no `posix`/`right` REDO layout, no `localtime`/`posixrules` completeness, and
406/// no runtime tzfile-refresh. The status is a typed inventory value, not a parity claim.
407#[derive(Debug, Clone, Copy, PartialEq, Eq)]
408pub enum InstallEcologyStatus {
409    /// Nothing about install ecology is claimed.
410    NotClaimed,
411    /// Only inventoried (named in the T16.1 inventory), not executed.
412    InventoryOnly,
413    /// zic-rs materialises only a compile output tree under `--out` — the current, honest state.
414    CompileOutputTreeOnly,
415}
416
417impl InstallEcologyStatus {
418    pub fn as_str(self) -> &'static str {
419        match self {
420            InstallEcologyStatus::NotClaimed => "not_claimed",
421            InstallEcologyStatus::InventoryOnly => "inventory_only",
422            InstallEcologyStatus::CompileOutputTreeOnly => "compile_output_tree_only",
423        }
424    }
425    /// The current shipped status: a compile output tree only — no install-layout parity is claimed.
426    pub fn current() -> Self {
427        InstallEcologyStatus::CompileOutputTreeOnly
428    }
429}
430
431/// The aux-table validation report (T16.4) — schema `zic-rs-aux-table-validation-v1`. A **separate** proof
432/// surface from compile/semantic/structural: it asserts table *structural admissibility* only.
433#[derive(Debug, Clone)]
434pub struct AuxTableValidationReport {
435    pub tables: Vec<AuxTableValidation>,
436    pub install_ecology: InstallEcologyStatus,
437}
438
439impl AuxTableValidationReport {
440    pub fn to_json(&self) -> String {
441        let mut tables = String::from("[");
442        for (i, t) in self.tables.iter().enumerate() {
443            if i > 0 {
444                tables.push_str(", ");
445            }
446            tables.push_str(&t.to_json());
447        }
448        tables.push(']');
449        format!(
450            "{{\n  \"schema\": \"zic-rs-aux-table-validation-v1\",\n  \
451             \"non_claim\": \"a conformant table row proves table structural admissibility only — NOT \
452             that the named zone was compiled, semantically witnessed, historically equivalent, or \
453             installed; coordinate syntax does NOT claim geodetic accuracy; a public-domain notice is not \
454             provenance; release identity is never inferred from table comments\",\n  \
455             \"zone_universe\": {},\n  \"table_diagnostic_code_space\": \"separate_table_codes\",\n  \
456             \"coordinate_verdict\": \"syntax_only_geodetic_truth_not_claimed\",\n  \
457             \"table_comment_disposition\": \"ignored_for_validation\",\n  \
458             \"install_ecology_status\": {},\n  \"tables\": {}\n}}\n",
459            // The validator resolves NO zone names — structural admissibility only (name the universe!).
460            escape(ZoneUniverse::NotResolvedStructuralOnly.as_str()),
461            escape(self.install_ecology.as_str()),
462            tables
463        )
464    }
465}