Skip to main content

tzcompile/
size_report.rs

1//! `size-report` (T21.2) — a **read-only** footprint report over a produced output tree (`--out <dir>`),
2//! for the container / embedded image builder (see `docs/container-embedded-builder.md`).
3//!
4//! It answers *"how big is this timezone bundle, what is in it, and is it reproducible?"* — it **never
5//! compiles, never writes, never admits**. It walks the tree, classifies each entry **structurally** (a
6//! regular file that parses as TZif · a symlink alias · any other file), tallies the footprint, and
7//! computes a **deterministic `bundle_hash`** (sorted (relative-path, content-hash) pairs → one SHA-256,
8//! independent of readdir order).
9//!
10//! **Honest boundary (non-claims):** size-report reports the **tree as it is on disk**. It does **not**
11//! distinguish a real zone from a *copied* link (copy-mode produces a byte-identical TZif file — telling
12//! them apart needs the `alias-map.json`, a future cross-reference), and it makes **no** runtime/reader
13//! approval claim (the reader-compatibility gauntlet is T21/T22). A symlink is counted as a link; a
14//! regular file that parses as TZif is `tzif_files` (zone-or-copied-link); everything else is `other_files`.
15
16use std::path::{Path, PathBuf};
17
18use crate::error::{Error, Result};
19use crate::json::escape;
20use crate::manifest::CompilerIdentity;
21
22/// The schema id for the JSON form. `v1` — first cut; additive optional fields would not bump it
23/// (`docs/schema-compatibility-policy.md`).
24pub const SCHEMA: &str = "zic-rs-size-report-v1";
25
26/// Options for [`run_size_report`].
27#[derive(Debug)]
28pub struct SizeReportOptions {
29    /// The output tree to measure (an existing directory produced by `compile --out`).
30    pub out: PathBuf,
31}
32
33/// One TZif file's footprint, for the largest-file pick.
34#[derive(Debug, Clone)]
35pub struct TzifEntry {
36    pub rel: String,
37    pub bytes: u64,
38}
39
40/// The size report.
41#[derive(Debug)]
42pub struct SizeReport {
43    pub root: String,
44    /// Regular files that parse as TZif (a zone *or* a copy-mode link — not distinguished here).
45    pub tzif_files: u64,
46    /// Symlink entries (alias links in symlink-mode).
47    pub symlink_links: u64,
48    /// Regular files that are **not** valid TZif (e.g. `manifest.json`, `alias-map.json`).
49    pub other_files: u64,
50    /// Total bytes of all regular files (the on-disk bundle size).
51    pub total_bytes: u64,
52    /// Total bytes of the TZif files only (the timezone payload).
53    pub total_tzif_bytes: u64,
54    /// The largest TZif file (relative path, bytes), if any.
55    pub largest_tzif: Option<TzifEntry>,
56    /// Count of TZif files at version v1 / v2 / v3 / v4 (index 0..3).
57    pub version_histogram: [u64; 4],
58    /// TZif files carrying a non-empty POSIX-TZ footer.
59    pub footer_present: u64,
60    /// Deterministic bundle hash: SHA-256 over the sorted `relpath\0content-hash` lines of **every**
61    /// regular file + `relpath\0symlink:<target>` for each symlink (so link structure is captured).
62    /// Same tree → same hash, regardless of traversal order.
63    pub bundle_hash: String,
64    pub compiler: CompilerIdentity,
65}
66
67/// Walk `out` (read-only) and build the report. Errors only if `out` is not a readable directory — the
68/// builder asked to measure a tree that is not there (`Error::config`, exit 1); a *present but odd* tree
69/// is reported, never an error.
70pub fn run_size_report(opts: &SizeReportOptions) -> Result<SizeReport> {
71    let root = &opts.out;
72    if !root.is_dir() {
73        return Err(Error::config(format!(
74            "size-report: --out {} is not a readable directory",
75            root.display()
76        )));
77    }
78
79    let mut tzif_files = 0u64;
80    let mut symlink_links = 0u64;
81    let mut other_files = 0u64;
82    let mut total_bytes = 0u64;
83    let mut total_tzif_bytes = 0u64;
84    let mut largest_tzif: Option<TzifEntry> = None;
85    let mut version_histogram = [0u64; 4];
86    let mut footer_present = 0u64;
87    // (relpath, hash-or-symlink-marker) pairs for **every** entry — sorted before hashing for
88    // order-independence. We include non-TZif files (e.g. `manifest.json`) and symlink targets too, so
89    // `bundle_hash` witnesses the *entire* tree on disk, not just the timezone payload: any change to any
90    // bundled file (or a retargeted alias) changes the hash, which is what a reproducible-image builder needs.
91    let mut hash_lines: Vec<String> = Vec::new();
92
93    // Iterative DFS over the output tree (explicit stack, not recursion: bounded stack memory on a deep
94    // tree, and no `walkdir` dependency — the core stays lean). Traversal order does not matter: the report
95    // tallies are commutative and `hash_lines` is sorted before hashing.
96    let mut stack: Vec<PathBuf> = vec![root.clone()];
97    while let Some(dir) = stack.pop() {
98        let entries = std::fs::read_dir(&dir).map_err(|e| {
99            Error::config(format!("size-report: cannot read {}: {e}", dir.display()))
100        })?;
101        for entry in entries {
102            let entry =
103                entry.map_err(|e| Error::config(format!("size-report: dir entry error: {e}")))?;
104            let path = entry.path();
105            // `symlink_metadata` does NOT follow the link — so a symlink is classified as a link, never
106            // silently followed into its target's bytes.
107            let meta = std::fs::symlink_metadata(&path)
108                .map_err(|e| Error::config(format!("size-report: stat {}: {e}", path.display())))?;
109            let rel = rel_path(root, &path);
110            // Classification order is deliberate: **symlink first** (a symlink to a directory must NOT be
111            // recursed into, and a symlink to a TZif must NOT be counted as a second zone — it is an alias);
112            // then directories (recurse); then regular files (classify as TZif vs other).
113            if meta.file_type().is_symlink() {
114                symlink_links += 1;
115                let target = std::fs::read_link(&path)
116                    .map(|t| t.to_string_lossy().into_owned())
117                    .unwrap_or_default();
118                hash_lines.push(format!("{rel}\0symlink:{target}"));
119            } else if meta.is_dir() {
120                stack.push(path);
121            } else if meta.is_file() {
122                let bytes = std::fs::read(&path).map_err(|e| {
123                    Error::config(format!("size-report: read {}: {e}", path.display()))
124                })?;
125                let len = bytes.len() as u64;
126                total_bytes += len;
127                hash_lines.push(format!("{rel}\0{}", crate::hash::sha256_hex(&bytes)));
128                match crate::tzif::validate::parse(&bytes) {
129                    Ok(parsed) => {
130                        tzif_files += 1;
131                        total_tzif_bytes += len;
132                        if let Some(idx) = version_index(parsed.version) {
133                            version_histogram[idx] += 1;
134                        }
135                        if !parsed.footer.is_empty() {
136                            footer_present += 1;
137                        }
138                        // MSRV-safe (`Option::is_none_or` is 1.82+; the floor is 1.74).
139                        let is_larger = match &largest_tzif {
140                            Some(e) => len > e.bytes,
141                            None => true,
142                        };
143                        if is_larger {
144                            largest_tzif = Some(TzifEntry {
145                                rel: rel.clone(),
146                                bytes: len,
147                            });
148                        }
149                    }
150                    // A regular file that is not valid TZif (e.g. `manifest.json`, `alias-map.json`, or a
151                    // copy-mode link that happens not to parse) — counted + hashed above, but not a zone.
152                    Err(_) => other_files += 1,
153                }
154            }
155            // anything else (fifo/socket/device) is ignored — a zoneinfo tree never contains them.
156        }
157    }
158
159    hash_lines.sort();
160    let bundle_hash = crate::hash::sha256_hex(hash_lines.join("\n").as_bytes());
161
162    Ok(SizeReport {
163        root: root.to_string_lossy().into_owned(),
164        tzif_files,
165        symlink_links,
166        other_files,
167        total_bytes,
168        total_tzif_bytes,
169        largest_tzif,
170        version_histogram,
171        footer_present,
172        bundle_hash,
173        compiler: CompilerIdentity::capture(),
174    })
175}
176
177/// Map a TZif header version byte to a histogram index: `\0`(NUL)→v1, `'2'`→v2, `'3'`→v3, `'4'`→v4.
178fn version_index(version: u8) -> Option<usize> {
179    match version {
180        0 => Some(0),
181        b'2' => Some(1),
182        b'3' => Some(2),
183        b'4' => Some(3),
184        _ => None,
185    }
186}
187
188/// Path of `path` relative to `root`, with `/` separators (portable, deterministic). Falls back to the
189/// full lossy path if `path` is somehow not under `root`.
190fn rel_path(root: &Path, path: &Path) -> String {
191    path.strip_prefix(root)
192        .ok()
193        .map(|p| {
194            p.components()
195                .map(|c| c.as_os_str().to_string_lossy())
196                .collect::<Vec<_>>()
197                .join("/")
198        })
199        .unwrap_or_else(|| path.to_string_lossy().into_owned())
200}
201
202impl SizeReport {
203    /// Render as deterministic JSON (`zic-rs-size-report-v1`).
204    pub fn to_json(&self) -> String {
205        let mut s = String::new();
206        s.push_str("{\n");
207        s.push_str(&format!("  \"schema\": {},\n", escape(SCHEMA)));
208        s.push_str(&crate::manifest::provenance_block_json());
209        s.push_str(
210            "  \"non_claim\": \"size-report measures the output tree ON DISK (read-only). It does NOT \
211             distinguish a zone from a copy-mode link (a copied link is a byte-identical TZif; telling them \
212             apart needs alias-map.json), and makes NO runtime/reader approval claim (the reader gauntlet \
213             is future). bundle_hash is deterministic over the tree, not a signed attestation.\",\n",
214        );
215        s.push_str(&format!("  \"root\": {},\n", escape(&self.root)));
216        s.push_str(&format!("  \"tzif_files\": {},\n", self.tzif_files));
217        s.push_str(&format!("  \"symlink_links\": {},\n", self.symlink_links));
218        s.push_str(&format!("  \"other_files\": {},\n", self.other_files));
219        s.push_str(&format!("  \"total_bytes\": {},\n", self.total_bytes));
220        s.push_str(&format!(
221            "  \"total_tzif_bytes\": {},\n",
222            self.total_tzif_bytes
223        ));
224        match &self.largest_tzif {
225            Some(e) => s.push_str(&format!(
226                "  \"largest_tzif\": {{ \"path\": {}, \"bytes\": {} }},\n",
227                escape(&e.rel),
228                e.bytes
229            )),
230            None => s.push_str("  \"largest_tzif\": null,\n"),
231        }
232        let h = &self.version_histogram;
233        s.push_str(&format!(
234            "  \"version_histogram\": {{ \"v1\": {}, \"v2\": {}, \"v3\": {}, \"v4\": {} }},\n",
235            h[0], h[1], h[2], h[3]
236        ));
237        s.push_str(&format!("  \"footer_present\": {},\n", self.footer_present));
238        s.push_str(&format!(
239            "  \"bundle_hash\": {},\n",
240            escape(&self.bundle_hash)
241        ));
242        let c = &self.compiler;
243        let opt = |o: Option<&str>| o.map(escape).unwrap_or_else(|| "null".into());
244        s.push_str(&format!(
245            "  \"compiler_identity\": {{ \"zic_rs_version\": {}, \"rustc\": {}, \"target\": {}, \
246             \"profile\": {}, \"git_commit\": {} }}\n",
247            escape(c.zic_rs_version),
248            opt(c.rustc),
249            escape(&c.target),
250            escape(c.profile),
251            opt(c.git_commit),
252        ));
253        s.push_str("}\n");
254        s
255    }
256
257    /// Render as a short human-readable summary.
258    pub fn to_text(&self) -> String {
259        let h = &self.version_histogram;
260        let largest = self
261            .largest_tzif
262            .as_ref()
263            .map(|e| format!("{} ({} bytes)", e.rel, e.bytes))
264            .unwrap_or_else(|| "(none)".into());
265        format!(
266            "size-report for {root}\n\
267             TZif files:      {tzif} ({tzif_bytes} bytes)\n\
268             symlink links:   {links}\n\
269             other files:     {other}\n\
270             total on disk:   {total} bytes\n\
271             versions:        v1={v1} v2={v2} v3={v3} v4={v4}\n\
272             footer present:  {footer}\n\
273             largest TZif:    {largest}\n\
274             bundle_hash:     {hash}\n",
275            root = self.root,
276            tzif = self.tzif_files,
277            tzif_bytes = self.total_tzif_bytes,
278            links = self.symlink_links,
279            other = self.other_files,
280            total = self.total_bytes,
281            v1 = h[0],
282            v2 = h[1],
283            v3 = h[2],
284            v4 = h[3],
285            footer = self.footer_present,
286            largest = largest,
287            hash = self.bundle_hash,
288        )
289    }
290}