tzcompile/lib.rs
1//! `tzcompile` — a memory-safe Rust implementation of (a declared subset of) the IANA
2//! timezone compiler `zic`.
3//!
4//! # What this crate is
5//!
6//! It compiles IANA *tzdata source text* (`Zone` / `Rule` / `Link` lines) into binary
7//! **TZif** files as specified by [RFC 9636] / `tzfile(5)`. It is library-first: the CLI
8//! in the `zic-rs` binary is a thin shell over the API exposed here.
9//!
10//! # Compiler pipeline
11//!
12//! ```text
13//! source text -> lexer -> parser -> Database -> compile -> TzifData -> TZif bytes
14//! (source::) (model records) (compile::) (tzif::)
15//! ```
16//!
17//! The companion [`compare`] module is an *oracle*: it can run the reference C `zic` and
18//! diff its output against ours, semantically (the contract) and byte-for-byte (a stretch
19//! goal for the simplest zones). It is the only place that ever executes an external
20//! `zic`; the compile path never shells out.
21//!
22//! # Scope (current declared subset)
23//!
24//! `zic-rs` grows strictly by fixture class, so the supported surface is exactly what the
25//! checked-in fixtures (and their reference-`zic`/`zdump` oracle results) prove. At present
26//! that is: fixed-offset zones and `Link`s; finite named-`Rule` DST sets
27//! (`lastSun`/`Sun>=N`/`Sun<=N`, `AT` suffixes `w`/`s`/`u`, `FORMAT` `%s`/slash/`%z`);
28//! recurring (`TO=maximum`) rule sets summarised by an exact POSIX-TZ footer; and multi-era
29//! `UNTIL` continuations (including the final-recurring-era anchor). Everything outside the
30//! proven subset — e.g. inline-saving eras with a `%s`/slash `FORMAT` or a negative save,
31//! `24:00`/negative compiled times, leap seconds — is **rejected with an explicit diagnostic**
32//! rather than approximated (the fail-closed doctrine). (Accepted breadth: inline-save eras with
33//! a literal/`%z` `FORMAT`; both mixed finite+recurring final-era shapes — effectively
34//! recurring-only as in Europe/London, and genuinely mixed-in-era as in America/New_York; and
35//! the obsolete `FROM=minimum`, coerced to 1900 like reference `zic`.) The authoritative,
36//! always-current lists live in `docs/supported-syntax.md` and `docs/unsupported-syntax.md`;
37//! the milestone history is in `docs/roadmap.md`.
38//!
39//! [RFC 9636]: https://www.rfc-editor.org/rfc/rfc9636
40
41#![forbid(unsafe_code)]
42#![warn(missing_debug_implementations)]
43
44pub mod aux_tables;
45pub mod cli;
46pub mod compare;
47pub mod compile;
48pub mod diagnostics;
49pub mod doctor;
50pub mod error;
51pub mod fs;
52pub mod hash;
53pub(crate) mod json;
54pub mod limits;
55pub mod manifest;
56pub mod model;
57pub mod release_diff;
58pub mod report;
59pub mod semantic_witness;
60pub mod size_report;
61pub mod source;
62pub mod structural;
63pub mod tzif;
64pub mod vendor_oracle;
65
66pub use diagnostics::{
67 Diagnostic, DiagnosticCode, DiagnosticLayer, DiagnosticSpanPrecision, DiagnosticVerbosity,
68 Severity,
69};
70pub use error::{Error, Result};
71
72use std::path::{Path, PathBuf};
73
74/// How `Link` records are materialised in the output tree.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
76pub enum LinkMode {
77 /// Copy the target file's bytes to the link name (portable; the default).
78 #[default]
79 Copy,
80 /// Create a relative symbolic link to the target (smaller; Unix-only semantics).
81 Symlink,
82}
83
84impl LinkMode {
85 /// The boundary rendering used in the alias-map JSON (`"copy"` / `"symlink"`). Owning the literal
86 /// here (CONTRACT.TYPING) means the alias-map's `materialised` field is a typed `LinkMode`, not a
87 /// hand-emitted string that could drift from the actual materialisation policy.
88 pub fn as_str(self) -> &'static str {
89 match self {
90 LinkMode::Copy => "copy",
91 LinkMode::Symlink => "symlink",
92 }
93 }
94}
95
96/// Explicit-transition **emission style** (campaign T8-slim). This is an *emission policy* — it
97/// never changes behaviour (every mode is `zdump`-equivalent: the POSIX footer governs the tail
98/// identically); it only changes *how many explicit transitions* are written before the footer
99/// takes over.
100///
101/// * [`EmitStyle::Default`] — zic-rs's behaviour-matched output: explicit transitions expanded
102/// through `RECUR_HI`. This is the CORE.1-gated default and is **never** altered by this enum.
103/// * [`EmitStyle::ZicSlim`] — reproduce reference `zic`'s slim (`-b slim`) output: drop the
104/// footer-governed recurring tail, keeping explicit transitions only up to the first
105/// footer-governed (recurring) transition (`zic.c`'s `keep_at_max = TZstarttime`). Byte parity is
106/// claimed only in this mode, never for the default.
107/// * [`EmitStyle::ZicFat`] — reference `zic`'s fat (`-b fat`) policy. zic-rs's default emission is
108/// already fat-style, so this currently aliases [`EmitStyle::Default`]; kept distinct so the CLI
109/// surface matches `zic` and a future redundant-tail refinement has a home.
110#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
111pub enum EmitStyle {
112 /// Behaviour-matched default (fat-ish, CORE.1-gated). Never altered.
113 #[default]
114 Default,
115 /// Reference `zic` slim: truncate the footer-governed recurring tail.
116 ZicSlim,
117 /// Reference `zic` fat (currently == [`EmitStyle::Default`]).
118 ZicFat,
119}
120
121/// Full emission options: an [`EmitStyle`] plus the optional `-R` redundant-tail bound (T10.3).
122///
123/// `redundant_until` is reference `zic`'s `-R @hi`. It is meaningful **only with
124/// [`EmitStyle::ZicSlim`]**: it keeps the otherwise-droppable, footer-governed explicit transitions
125/// out to this UT instant (inclusive), so old readers that ignore the POSIX footer still see correct
126/// timestamps up to that point. It is **not** fat mode (`-b`) and **not** range truncation (`-r`).
127/// With the fat-style default it is a no-op (everything is kept already). [`EmitStyle`] converts into
128/// `EmitOptions` with `redundant_until: None`, so callers that don't use `-R` are unchanged.
129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
130pub struct EmitOptions {
131 pub style: EmitStyle,
132 pub redundant_until: Option<i64>,
133 /// `-r '[@lo][/@hi]'` range truncation (T10.4d). A **distinct** input from
134 /// [`Self::redundant_until`] — they both inform what is emitted but are never merged: `-R` widens
135 /// what slim keeps, `-r` reshapes the representable interval (and introduces the `-00`
136 /// "local time unspecified" boundary type(s)). `None` = no truncation.
137 pub range: Option<RangeSpec>,
138}
139
140impl From<EmitStyle> for EmitOptions {
141 fn from(style: EmitStyle) -> Self {
142 EmitOptions {
143 style,
144 redundant_until: None,
145 range: None,
146 }
147 }
148}
149
150impl Default for EmitOptions {
151 fn default() -> Self {
152 EmitStyle::Default.into()
153 }
154}
155
156/// Reference `zic`'s `-r '[@lo][/@hi]'` range-truncation bounds (T10.4), as **raw parsed** Unix-second
157/// instants — `None` on a side means that side is unbounded (`zic`'s `min_time`/`max_time` default).
158///
159/// **T10.4b (parse-only):** this is currently parsed and validated but **not yet applied** — a value
160/// fails closed rather than silently producing un-truncated output (range truncation lands in
161/// T10.4d). When applied, truncation introduces a leading (and, if the end is cut, trailing)
162/// **`-00` "local time unspecified"** type — it is *not* a plain transition filter, and `-r` is a
163/// *different* input to the clamp than `-R` ([`CompileConfig::redundant_until`]); they do not merge.
164/// The `hi -= 1` / `limitrange` resolution is deferred to T10.4d (the stored `hi` is the raw `@hi`).
165#[derive(Debug, Clone, Copy, PartialEq, Eq)]
166pub struct RangeSpec {
167 pub lo: Option<i64>,
168 pub hi: Option<i64>,
169}
170
171/// Policy for source constructs this version does not implement.
172#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
173pub enum UnsupportedPolicy {
174 /// Abort the whole compilation (fail closed). The safe default.
175 #[default]
176 Error,
177 /// Emit a diagnostic, skip the offending zone, and keep going.
178 WarnAndSkipZone,
179}
180
181/// Selection of which zones (and their links) to compile.
182#[derive(Debug, Clone)]
183pub enum ZoneSelection {
184 /// A single explicit zone name.
185 One(String),
186 /// An explicit list of zone names.
187 Many(Vec<String>),
188 /// Every zone the compiler currently supports; unsupported zones are reported.
189 AllSupported,
190}
191
192/// Fully-resolved configuration for a compile run.
193///
194/// Built by the CLI but usable directly from the library.
195#[derive(Debug, Clone)]
196pub struct CompileConfig {
197 /// Source files and/or directories to read tzdata from.
198 pub input_paths: Vec<PathBuf>,
199 /// Root of the output tree. Required — there is no implicit system default.
200 pub output_dir: PathBuf,
201 /// Which zones to compile.
202 pub zones: ZoneSelection,
203 /// How to write `Link`s.
204 pub link_mode: LinkMode,
205 /// Overwrite existing output files.
206 pub overwrite: bool,
207 /// What to do when a zone uses unsupported syntax.
208 pub unsupported_policy: UnsupportedPolicy,
209 /// Hard cap on transitions emitted per zone (defence against pathological input).
210 pub transition_limit: usize,
211 /// Explicit-transition emission style (T8-slim). [`EmitStyle::Default`] is behaviour-matched
212 /// and CORE.1-gated; `zic-slim`/`zic-fat` are opt-in structural-parity modes.
213 pub emit_style: EmitStyle,
214 /// Do **not** create the `--out` tree (reference `zic`'s `-D`): if set, the output directory must
215 /// already exist. Default `false` — zic-rs creates the (explicit, never system-default) `--out`
216 /// root. The output-safety boundary is unchanged either way (T9.3).
217 pub no_create_dirs: bool,
218 /// Optional `localtime` install-policy target (reference `zic`'s `-l <zone>`): after a successful
219 /// compile, create a `localtime` entry in the output tree linking the named (already-compiled)
220 /// zone. `None` (default) writes no such link. Opt-in install policy — it **never** affects the
221 /// canonical-zone behaviour sweep (CORE.1) and is materialised in phase 2 with the same
222 /// no-partial-install guarantee as every other output file (T9.4).
223 pub localtime: Option<String>,
224 /// Name of the `localtime` link within the output tree (reference `zic`'s `-t`, default
225 /// `"localtime"`). Constrained to a **safe relative name under `--out`** — unlike reference `zic`,
226 /// zic-rs will not write the link to an arbitrary/absolute/system path (e.g. `/etc/localtime`);
227 /// that is an *intentional safer divergence* (bucket 3). Ignored when [`Self::localtime`] is
228 /// `None`.
229 pub localtime_name: Option<String>,
230 /// Optional file permission bits (reference `zic`'s `-m <mode>`), as parsed **octal** (e.g. `644`
231 /// → `0o644`). Applied to the created regular files (compiled TZif zones and **copied** link
232 /// files) after they are written; **not** applied to symlink entries (it would follow the link).
233 /// `None` (default) leaves the process umask in effect. **Unix-only** install metadata: a value on
234 /// a non-Unix platform is a config error caught *before any write*. Never affects the compiled
235 /// bytes or the canonical-zone behaviour sweep (T9.5). Note: zic-rs accepts the **octal** subset of
236 /// `zic`'s `-m`; symbolic chmod expressions (`u+rwx`) are not parsed.
237 pub file_mode: Option<u32>,
238 /// Optional `-R @hi` redundant-tail bound (T10.3): under [`EmitStyle::ZicSlim`], keep the
239 /// otherwise-droppable footer-governed explicit transitions out to this UT instant (inclusive),
240 /// for readers that ignore the POSIX footer. `None` (default) = pure slim. A no-op with the
241 /// fat-style default (everything is already kept). Independent of [`Self::emit_style`]'s slim/fat
242 /// choice — it only *widens* what slim keeps; it never changes behaviour (the kept transitions are
243 /// redundant with the footer) and never the TZif version.
244 pub redundant_until: Option<i64>,
245 /// Optional `-r '[@lo][/@hi]'` range-truncation bounds (T10.4). **Parse-only for now** — a value
246 /// is validated but **not yet applied**; the compile fails closed (rather than silently emitting
247 /// un-truncated output) until T10.4d lands the clamp + the `-00` placeholder semantics.
248 pub range: Option<RangeSpec>,
249 /// Optional parsed leap-second table (reference `zic`'s `-L leapseconds`) — the **`right/` build
250 /// profile** (T11.6). When `Some`, the leap table is applied to **every** compiled zone via
251 /// [`compile::apply_leaps`]. `None` (default) is the ordinary/`posix` profile: no leap table, and
252 /// ordinary canonical-zone conformance is unchanged. Opt-in; never the default.
253 pub leaps: Option<model::LeapTable>,
254}
255
256/// Sensible, safe defaults (no default output dir — that is always explicit).
257pub const DEFAULT_TRANSITION_LIMIT: usize = 100_000;
258
259/// Per-zone outcome record for the compile report.
260#[derive(Debug, Clone)]
261pub struct ZoneReport {
262 pub name: String,
263 pub output_path: PathBuf,
264 pub tzif_version: u8,
265 pub transition_count: usize,
266}
267
268/// Per-link outcome record for the compile report.
269#[derive(Debug, Clone)]
270pub struct LinkReport {
271 pub link_name: String,
272 pub target: String,
273 pub mode: LinkMode,
274}
275
276/// Structured, deterministic summary of a compile run.
277#[derive(Debug, Default, Clone)]
278pub struct CompileReport {
279 pub zones_compiled: Vec<ZoneReport>,
280 pub links_written: Vec<LinkReport>,
281 pub diagnostics: Vec<Diagnostic>,
282}
283
284/// Read every tzdata source file referenced by `paths` and parse it into a
285/// [`Database`](model::Database).
286///
287/// Directories are read non-recursively for files ending in `.zi`/`.tab`-free tzdata
288/// (we accept any regular file when given explicitly). Order is deterministic: paths in
289/// the order given, files within a directory sorted by name.
290pub fn load_database(paths: &[PathBuf]) -> Result<model::Database> {
291 // T17.1b: bound input-driven resource use (bucket-3 safer divergence — reference `zic` caps none
292 // of these; the defaults sit far above any real tzdb, so legitimate input is never rejected).
293 let limits = limits::ResourceLimits::default();
294 let files = collect_source_files(paths)?;
295 let mut db = model::Database::default();
296 for f in &files {
297 let bytes = std::fs::read(f).map_err(|e| Error::io(f, e))?;
298 limits.check_source_bytes(bytes.len(), f)?;
299 source::parse_into(&bytes, f, &mut db)?;
300 }
301 limits.enforce(&db)?;
302 Ok(db)
303}
304
305/// Expand input paths into a deterministic, flat list of source files.
306///
307/// Directories are read non-recursively and their regular files included in name order;
308/// explicitly-named files are taken as given. This is shared by [`load_database`] and the
309/// `compare` oracle, so our parser and the reference `zic` are fed exactly the same files.
310pub fn collect_source_files(paths: &[PathBuf]) -> Result<Vec<PathBuf>> {
311 let mut files: Vec<PathBuf> = Vec::new();
312 for p in paths {
313 if p.is_dir() {
314 let mut entries: Vec<PathBuf> = std::fs::read_dir(p)
315 .map_err(|e| Error::io(p, e))?
316 .filter_map(|e| e.ok().map(|e| e.path()))
317 .filter(|p| p.is_file())
318 .collect();
319 entries.sort();
320 files.extend(entries);
321 } else {
322 files.push(p.clone());
323 }
324 }
325 Ok(files)
326}
327
328/// Compile one zone (by name) from a parsed database into in-memory TZif data.
329///
330/// This is the semantic heart of the compiler; it does not touch the filesystem.
331pub fn compile_zone(db: &model::Database, name: &str) -> Result<tzif::TzifData> {
332 compile::compile_zone(db, name)
333}
334
335/// Compile one zone with an explicit emission policy. Accepts a bare [`EmitStyle`] *or* a full
336/// [`EmitOptions`] (style + `-R` redundant-tail bound). [`compile_zone`] is exactly this with
337/// [`EmitStyle::Default`]; the default output is byte-for-byte unchanged from before T8-slim.
338pub fn compile_zone_styled(
339 db: &model::Database,
340 name: &str,
341 opts: impl Into<EmitOptions>,
342) -> Result<tzif::TzifData> {
343 compile::compile_zone_styled(db, name, opts.into())
344}
345
346/// Convenience: compile a zone straight to TZif bytes (default emission style).
347pub fn compile_zone_to_bytes(db: &model::Database, name: &str) -> Result<Vec<u8>> {
348 let data = compile_zone(db, name)?;
349 tzif::write_bytes(&data)
350}
351
352/// Convenience: compile a zone straight to TZif bytes with an explicit emission policy
353/// ([`EmitStyle`] or [`EmitOptions`]).
354pub fn compile_zone_to_bytes_styled(
355 db: &model::Database,
356 name: &str,
357 opts: impl Into<EmitOptions>,
358) -> Result<Vec<u8>> {
359 let data = compile_zone_styled(db, name, opts.into())?;
360 tzif::write_bytes(&data)
361}
362
363/// Resolve a link target name to the underlying canonical zone name, following link chains.
364///
365/// Distinguishes the two failure modes a downstream consumer cares about: a **cycle**
366/// (`A → B → A`, which would otherwise loop forever) versus a **missing target** (the chain
367/// ends at a name that is neither a zone nor a link). Both return a clear message; the cycle
368/// case reports the path it detected so the offending `Link` lines are easy to find.
369pub fn resolve_link_target<'a>(db: &'a model::Database, name: &'a str) -> Result<&'a str> {
370 let mut current = name;
371 // Track visited link names to detect a cycle precisely (rather than bounding hops).
372 let mut visited: Vec<&'a str> = Vec::new();
373 loop {
374 if db.zones.iter().any(|z| z.name == current) {
375 return Ok(current);
376 }
377 // T17.1b: bound a pathological *acyclic* chain (the cycle check below catches loops, but a
378 // straight chain of millions of links would still run — and cost O(n²) via `visited.contains`).
379 if visited.len() >= limits::DEFAULT_LINK_CHAIN_DEPTH_MAX {
380 return Err(Error::config(format!(
381 "link chain for {name:?} exceeds the zic-rs depth limit of {} hops",
382 limits::DEFAULT_LINK_CHAIN_DEPTH_MAX
383 )));
384 }
385 if visited.contains(¤t) {
386 visited.push(current);
387 return Err(Error::message(format!(
388 "link chain for {name:?} forms a cycle: {}",
389 visited.join(" -> ")
390 )));
391 }
392 match db.links.iter().find(|l| l.link_name == current) {
393 Some(l) => {
394 visited.push(current);
395 current = &l.target;
396 }
397 None => {
398 return Err(Error::message(format!(
399 "link target {current:?} does not name a zone or link (resolving {name:?})"
400 )))
401 }
402 }
403 }
404}
405
406/// True when `path` is strictly contained within `root` after normalising `.`/`..`.
407///
408/// Used by the output tree; exposed here so tests can exercise it directly.
409pub fn is_contained(root: &Path, candidate: &Path) -> bool {
410 fs::output_tree::is_contained(root, candidate)
411}