Skip to main content

module_info/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Embed metadata into ELF binaries as `.note.package` sections so it
5//! **survives crashes**, visible to `coredumpctl`, `readelf -n`, and any
6//! other consumer of the [systemd package-metadata
7//! format](https://uapi-group.org/specifications/specs/package_metadata_for_executable_files/).
8//! The crate's main feature is crash-dump preservation: when your process dies,
9//! the version of code that crashed is recoverable from the core dump without external
10//! symbol files or build-system context.
11//!
12//! Runtime read-back via the [`get_module_info!`] macro is a *convenience
13//! accessor*, useful while the process is still alive but not the reason
14//! the crate exists.
15//!
16//! Consumers call [`generate_project_metadata_and_linker_script`] from
17//! `build.rs` to generate the linker script and Cargo directives. At
18//! runtime, metadata fields can be read via [`get_module_info!`] (returns
19//! `ModuleInfoResult<String>` for a single field, or a `HashMap` of all
20//! readable fields when called with no arguments). On non-Linux platforms
21//! the crate exposes no-op stubs so cross-platform builds still compile;
22//! runtime accessors return `ModuleInfoError::NotAvailable`.
23//!
24//! See the README and the `examples/` directory for an end-to-end integration.
25//!
26//! # Limitations
27//!
28//! **`rlib` consumers read the host binary's metadata, not their own.**
29//! When a downstream library's `build.rs` calls
30//! [`generate_project_metadata_and_linker_script`], the resulting
31//! `cargo:rustc-link-arg=-T<linker_script>.ld` directive is attached to that
32//! library's own build and does not propagate to the final executable's link
33//! command, so the library's linker script never runs at the link step that
34//! produces the binary. Meanwhile, every [`get_module_info!`] call inside
35//! the library expands to an `extern "C" { static module_info_*: u8; }`
36//! declaration. At the final link those undefined references resolve
37//! against the executable's linker script, which defines a single set of
38//! `module_info_*` symbols pointing at the executable's `.note.package`
39//! payload, so library code reading them gets the executable's values. The
40//! same applies to anything statically linked into a Rust executable:
41//! `rlib`, `staticlib` linked via `#[link(kind = "static")]`, or in-tree
42//! workspace libraries.
43//!
44//! **`staticlib` consumed by an outer (non-cargo) build can embed its own
45//! metadata.** Set `EmbedOptions::emit_cargo_link_arg` to `false`; the
46//! crate then writes `linker_script.ld` to `out_dir` without emitting the
47//! `cargo:rustc-link-arg` directive. The outer build (Make, CMake, MSBuild,
48//! …) passes that script to its own linker, at which point the
49//! `module_info_*` symbols are defined by the staticlib's linker script and
50//! the staticlib reads its own metadata. See "Option B" in the README for
51//! the full flow.
52//!
53//! **`cdylib` shared libraries loaded via `dlopen` are not affected.** A
54//! `cdylib` runs its own link step and applies its own linker script, so
55//! the `module_info_*` symbols inside the resulting `.so` are local to it
56//! (not exported in the dynamic symbol table). Code inside the library
57//! reads its own metadata correctly, even when the host process's main
58//! executable also embeds `.note.package`. To consume a `cdylib`'s metadata
59//! at runtime, expose an `extern "C"` accessor and call it via `dlopen`;
60//! see `examples/sample_elf_bin_with_lib` for the full pattern. Reading a
61//! library file's metadata without loading it (e.g. for crash triage) is
62//! always possible by parsing the ELF note section from the `.so` on disk.
63//!
64//! **Little-endian targets only.** The ELF note header is serialized with
65//! `u32::to_le_bytes` at `build.rs` time. Supported targets today are
66//! `x86_64-unknown-linux-gnu`, `aarch64-unknown-linux-gnu`, and
67//! `i686-unknown-linux-gnu` (all little-endian). Cross-compiling for a
68//! big-endian Linux target (s390x, powerpc-be, mips-be) will silently emit
69//! a byte-swapped note section that `readelf -n` and `systemd-coredump`
70//! cannot parse. Adding big-endian support would mean selecting `to_le_bytes`
71//! vs `to_be_bytes` from `CARGO_CFG_TARGET_ENDIAN`.
72
73mod error;
74mod fields;
75// `#[macro_use]` makes the non-exported build-time helpers (note!/error!/
76// warn!/debug!) visible to sibling modules without exporting them.
77#[macro_use]
78mod macros;
79use cfg_if::cfg_if;
80pub use error::{ModuleInfoError, ModuleInfoResult};
81pub use fields::ModuleInfoField;
82
83cfg_if! {
84    if #[cfg(target_os = "linux")] {
85        use std::{env, path::{Path, PathBuf}};
86
87        mod constants;
88        mod metadata;
89        mod note_section;
90        mod utils;
91
92        pub use metadata::PackageMetadata;
93
94        pub(crate) use constants::*;
95    }
96}
97
98cfg_if! {
99    if #[cfg(all(feature = "embed-module-info", target_os = "linux"))] {
100        /// Static symbol that marks the beginning of our custom note section
101        ///
102        /// This empty array is placed in the .note.package section and serves as an anchor
103        /// for the linker script to place our metadata properly.
104        #[link_section = ".note.package"]
105        #[no_mangle]
106        #[used]
107        #[doc(hidden)]
108        pub static PACKAGE_NOTE_SECTION: [u8; 0] = [];
109
110        /// Force the `module_info` rlib to be linked into the consuming binary so the
111        /// `.note.package` section is emitted with ELF type `SHT_NOTE`.
112        ///
113        /// # Why this is needed
114        ///
115        /// The note data is produced by the linker script that `build.rs` generates.
116        /// GNU ld assigns `SHT_NOTE` to the output `.note.package` only when an
117        /// input object file contributes a same-named input section already typed
118        /// `SHT_NOTE`; this crate provides exactly that input section through the
119        /// `#[link_section = ".note.package"]` static `PACKAGE_NOTE_SECTION`.
120        /// Without a source-level reference to this crate, cargo/rustc drops the
121        /// `module_info` rlib from the final link, no `SHT_NOTE` input section is
122        /// present, and ld synthesizes the output section from the script's
123        /// `BYTE(...)` directives alone, which yields `SHT_PROGBITS`. The bytes
124        /// are present, but tools like `readelf -n` and `systemd-coredump` filter
125        /// by section type and ignore it.
126        ///
127        /// Invoking `module_info::embed!()` at the crate root creates a `#[used]`
128        /// reference to [`PACKAGE_NOTE_SECTION`], which forces the rlib to link and
129        /// restores the correct section type.
130        ///
131        /// # When to use it
132        ///
133        /// Use `embed!()` when the consuming crate does **not** call `get_module_info!`
134        /// or reference any other `module_info` item at runtime (pure build-time
135        /// embedding). When the consuming crate already calls
136        /// `module_info::get_module_info!(...)` or imports any item from the crate,
137        /// this macro is unnecessary; the rlib is already linked.
138        ///
139        /// # Example
140        ///
141        /// ```ignore
142        /// // Top of src/main.rs or src/lib.rs:
143        /// module_info::embed!();
144        ///
145        /// fn main() {
146        ///     // No other module_info references needed for the .note.package
147        ///     // section to end up in the binary with SHT_NOTE type.
148        /// }
149        /// ```
150        #[macro_export]
151        macro_rules! embed {
152            () => {
153                #[allow(dead_code)]
154                const _: () = {
155                    #[used]
156                    static __MODULE_INFO_FORCE_LINK: &'static [u8; 0] =
157                        &$crate::PACKAGE_NOTE_SECTION;
158                };
159            };
160        }
161    } else if #[cfg(all(feature = "embed-module-info", not(target_os = "linux")))] {
162        /// No-op stub of `embed!` for non-Linux targets. Present so
163        /// cross-platform builds compile without `#[cfg]` guards at each call site.
164        #[macro_export]
165        macro_rules! embed {
166            () => {};
167        }
168    } else {
169        /// No-op stub of `embed!` for feature-off builds (the
170        /// `embed-module-info` feature is disabled). Present so a consumer that
171        /// uses `module_info` only for `get_version()` / `get_module_version()`
172        /// can still call `module_info::embed!()` in their crate root without a
173        /// feature-gated `#[cfg]` guard. The macro expands to nothing because
174        /// there is no note section to anchor when the feature is off.
175        #[macro_export]
176        macro_rules! embed {
177            () => {};
178        }
179    }
180}
181
182/// Options controlling how [`embed_package_metadata`] writes artifacts and
183/// whether it emits cargo link-arg directives.
184///
185/// `EmbedOptions::default()` preserves the original zero-config behavior:
186/// write to `$OUT_DIR` and emit `cargo:rustc-link-arg=-T<linker_script.ld>`.
187/// Override when the crate is a static library whose final link happens later
188/// in the outer build system.
189///
190/// # Non-exhaustive
191///
192/// This struct is `#[non_exhaustive]` so new options can land without a
193/// SemVer break. Use `..Default::default()` when constructing.
194///
195/// # Example
196/// ```rust,no_run
197/// # use module_info::EmbedOptions;
198/// // Static-library flow: write the linker script to a directory the outer
199/// // build system knows about, so it can pass the script to the final linker.
200/// // In practice `out_dir` comes from an env var the outer build sets, or a
201/// // subdirectory of `OUT_DIR`; here we use `env::temp_dir()` as a portable
202/// // placeholder. `EmbedOptions` is `#[non_exhaustive]`, so construct via
203/// // `Default` and assign fields rather than using struct-literal syntax.
204/// let mut opts = EmbedOptions::default();
205/// opts.out_dir = Some(std::env::temp_dir().join("module_info_linker"));
206/// opts.emit_cargo_link_arg = false;
207/// ```
208#[cfg(target_os = "linux")]
209#[derive(Debug, Clone)]
210#[non_exhaustive]
211pub struct EmbedOptions {
212    /// Directory where `linker_script.ld`, `note.package.bin`, and
213    /// `module_info.json` are written. When `None`, the `OUT_DIR` environment
214    /// variable is used (the normal cargo build-script case).
215    pub out_dir: Option<PathBuf>,
216
217    /// When `true`, emit `cargo:rustc-link-arg=-T<path-to-linker_script.ld>`
218    /// on stdout so cargo passes the script to the final link step.
219    ///
220    /// Set to `false` when the current crate is a static library whose final
221    /// link happens later in the outer build system. Have that system pass
222    /// the linker script to its own linker.
223    pub emit_cargo_link_arg: bool,
224}
225
226#[cfg(target_os = "linux")]
227impl Default for EmbedOptions {
228    fn default() -> Self {
229        Self {
230            out_dir: None,
231            emit_cargo_link_arg: true,
232        }
233    }
234}
235
236/// Artifacts written by [`embed_package_metadata`].
237///
238/// Returned so consumers can log, inspect, or pass paths to a later build
239/// step (for the static-library flow, typically `linker_script_path`).
240///
241/// # Non-exhaustive
242///
243/// `#[non_exhaustive]`. Constructed by the crate, not by consumers.
244#[cfg(target_os = "linux")]
245#[derive(Debug, Clone)]
246#[non_exhaustive]
247pub struct EmbedArtifacts {
248    /// Absolute path to the generated linker script (`linker_script.ld`).
249    pub linker_script_path: PathBuf,
250    /// Absolute path to the raw `.note.package` binary dump.
251    pub note_bin_path: PathBuf,
252    /// Absolute path to the embedded JSON metadata (`module_info.json`). One
253    /// key:value pair per line; matches the bytes the linker script writes
254    /// into the `.note.package` descriptor (see `json` below).
255    pub json_path: PathBuf,
256    /// JSON string written to `module_info.json` and embedded as the note
257    /// section's descriptor. One key:value pair per line (not strictly
258    /// "compact"); the runtime scan in `extract_module_info` tolerates the
259    /// embedded newlines.
260    pub json: String,
261    /// Byte-encoded linker script body that produced `linker_script.ld`.
262    pub linker_script_body: String,
263}
264
265/// Convenience struct-literal view over [`PackageMetadata`] with field names
266/// shaped like the JSON keys rather than the internal Rust snake_case names.
267///
268/// `Info` exists so call sites can read the same way the embedded JSON reads:
269/// `r#type`, `moduleVersion`, `osVersion` instead of `module_type`,
270/// `module_version`, `os_version`. It's deliberately **not** `#[non_exhaustive]`:
271/// struct-literal construction is the whole point. Pass it to [`new`] to build
272/// the note artifacts in one call:
273///
274/// # Forward compatibility
275///
276/// **Always terminate the struct literal with `..Default::default()`.** Unlike
277/// [`PackageMetadata`] (which is `#[non_exhaustive]` and forbids struct-literal
278/// construction from outside the crate, forcing consumers into the
279/// field-assignment pattern that is intrinsically forward-compatible), `Info`
280/// permits a fully-exhaustive literal. That means a minor release of this
281/// crate that adds a new field will break any `Info { … }` call site that
282/// listed every field by name. The `..Default::default()` terminator is how
283/// consumers buy forward compatibility: new fields fall back to their
284/// `Default` value (empty string / disabled) instead of failing to compile.
285/// This is the *only* reason `Info` is safe to add fields to in minor
286/// releases. Omit the terminator and the crate can no longer do that
287/// without breaking you.
288///
289/// ```rust,no_run
290/// # use module_info::Info;
291/// let _ = module_info::new(Info {
292///     binary: "my_tool".into(),
293///     name: "my_tool".into(),
294///     maintainer: "team@contoso.com".into(),
295///     version: "1.2.3".into(),
296///     moduleVersion: "1.2.3.4".into(),
297///     os: "linux".into(),
298///     osVersion: "22.04".into(),
299///     r#type: "agent".into(),
300///     hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
301///     ..Default::default()
302/// });
303/// ```
304///
305/// Under the hood `new` converts this to a [`PackageMetadata`] and calls
306/// [`embed_package_metadata`] with [`EmbedOptions::default()`].
307///
308/// # No auto-detection on this path
309///
310/// Every field in the `Info` literal ships verbatim. `os`/`osVersion` are
311/// **not** read from `/etc/os-release`, and `repo`/`branch`/`hash` are
312/// **not** read from git. The caller owns every value. If you want the
313/// `/etc/os-release` + git auto-detection that the zero-config entry point
314/// provides, reach for [`PackageMetadata::from_cargo_toml`] instead,
315/// mutate the fields you want to override, and pass the result to
316/// [`embed_package_metadata`].
317///
318/// # Disabling fields
319///
320/// Seven keys are required at validation time:
321/// `binary`, `version`, `moduleVersion`, `name`, `maintainer`, `os`, and
322/// `osVersion`. The rest (`r#type`, `repo`, `branch`, `hash`, `copyright`)
323/// may be left as the empty string (the `Default` value);
324/// `..Default::default()` in the literal above is the idiomatic way to
325/// opt out. The embedded JSON still carries every key (the
326/// `.note.package` layout is fixed), but the value ships as `""`, which
327/// downstream tooling can treat as "disabled."
328///
329/// # `r#type` tradeoff
330///
331/// The JSON key is `type`, which collides with Rust's `type` keyword. We use
332/// the raw-identifier form `r#type` rather than a `#[serde(rename = "type")]`
333/// alias on a differently-named field (say, `module_type`), because the
334/// latter would require call sites to remember the rename when constructing
335/// the struct literal, re-creating the original mismatch this type is meant
336/// to solve. `r#type` is ugly but pays off once: downstream construction
337/// reads `r#type: "agent".into()` and the JSON reads `"type":"agent"`.
338#[cfg(target_os = "linux")]
339#[allow(non_snake_case)] // JSON-key-shaped field names (moduleVersion, osVersion) are intentional.
340#[derive(Debug, Clone, Default)]
341pub struct Info {
342    /// Binary name (matches JSON key `binary`).
343    pub binary: String,
344    /// Crate version from Cargo.toml (matches JSON key `version`).
345    pub version: String,
346    /// Full 4-part module version (matches JSON key `moduleVersion`).
347    pub moduleVersion: String,
348    /// Maintainer contact information (matches JSON key `maintainer`).
349    pub maintainer: String,
350    /// Package name (matches JSON key `name`).
351    pub name: String,
352    /// Module type: agent, library, executable, etc. (matches JSON key `type`).
353    pub r#type: String,
354    /// Git repository name (matches JSON key `repo`).
355    pub repo: String,
356    /// Git branch name (matches JSON key `branch`).
357    pub branch: String,
358    /// Git commit hash (matches JSON key `hash`).
359    pub hash: String,
360    /// Copyright information (matches JSON key `copyright`).
361    pub copyright: String,
362    /// Operating system name (matches JSON key `os`).
363    pub os: String,
364    /// Operating system version (matches JSON key `osVersion`).
365    pub osVersion: String,
366}
367
368#[cfg(target_os = "linux")]
369impl From<Info> for PackageMetadata {
370    fn from(info: Info) -> Self {
371        PackageMetadata {
372            binary: info.binary,
373            version: info.version,
374            module_version: info.moduleVersion,
375            maintainer: info.maintainer,
376            name: info.name,
377            module_type: info.r#type,
378            repo: info.repo,
379            branch: info.branch,
380            hash: info.hash,
381            copyright: info.copyright,
382            os: info.os,
383            os_version: info.osVersion,
384        }
385    }
386}
387
388/// One-call entry point: convert [`Info`] → [`PackageMetadata`] and embed via
389/// [`embed_package_metadata`] with [`EmbedOptions::default()`].
390///
391/// Use this from `build.rs` when you want to supply metadata programmatically
392/// without touching `Cargo.toml` and don't need to override any
393/// [`EmbedOptions`] (custom `out_dir`, suppressed `cargo:rustc-link-arg`, …).
394/// For those cases, convert `Info` to `PackageMetadata` with `.into()` and
395/// call [`embed_package_metadata`] directly.
396///
397/// # Errors
398/// Propagates everything [`embed_package_metadata`] can return, plus
399/// `ModuleInfoError::MalformedJson` if `moduleVersion` is not four
400/// dot-separated numeric parts that each fit in a `u16`.
401#[cfg(target_os = "linux")]
402#[must_use = "new returns EmbedArtifacts; discarding it hides both the written paths and any I/O errors"]
403pub fn new(info: Info) -> ModuleInfoResult<EmbedArtifacts> {
404    embed_package_metadata(&info.into(), &EmbedOptions::default())
405}
406
407/// Validate that `module_version` is exactly four dot-separated numeric parts,
408/// each of which fits in a `u16` (0..=65535).
409///
410/// This mirrors the Windows `VS_FIXEDFILEINFO::FILEVERSION` shape (four
411/// `WORD`-sized components) that Windows-style crash consumers expect to
412/// parse. An out-of-range value silently truncating on the consumer side
413/// would be worse than failing the build, so we enforce the range at embed
414/// time.
415#[cfg(target_os = "linux")]
416fn validate_module_version(module_version: &str) -> ModuleInfoResult<()> {
417    let parts: Vec<&str> = module_version.split('.').collect();
418    if parts.len() != 4 {
419        return Err(ModuleInfoError::MalformedJson(format!(
420            "moduleVersion must have exactly 4 dot-separated parts, got {} in {module_version:?}",
421            parts.len()
422        )));
423    }
424    for (i, part) in parts.iter().enumerate() {
425        if part.is_empty() {
426            return Err(ModuleInfoError::MalformedJson(format!(
427                "moduleVersion part {i} is empty in {module_version:?}"
428            )));
429        }
430        if part.parse::<u16>().is_err() {
431            return Err(ModuleInfoError::MalformedJson(format!(
432                "moduleVersion part {i} ({part:?}) must be a non-negative integer \
433                 that fits in 16 bits (0..=65535) in {module_version:?}"
434            )));
435        }
436    }
437    Ok(())
438}
439
440/// Embed a [`PackageMetadata`] value into ELF note artifacts on disk.
441///
442/// Consumers that want to supply metadata programmatically (e.g. from
443/// `build.rs` without editing `Cargo.toml`) or suppress the
444/// `cargo:rustc-link-arg` directive (e.g. a static library whose final link
445/// happens in a later build step) call this directly; the zero-config
446/// [`generate_project_metadata_and_linker_script`] is a thin wrapper over
447/// this function with the default options.
448///
449/// # Errors
450/// Returns `ModuleInfoError::MetadataTooLarge` if the serialized JSON exceeds
451/// the 1 KiB `.note.package` payload limit, or `ModuleInfoError::MalformedJson`
452/// if a required field is missing. `IoError` on filesystem failures.
453#[cfg(target_os = "linux")]
454#[must_use = "embed_package_metadata returns EmbedArtifacts; discarding it hides both the written paths and any I/O errors"]
455pub fn embed_package_metadata(
456    md: &PackageMetadata,
457    opts: &EmbedOptions,
458) -> ModuleInfoResult<EmbedArtifacts> {
459    // Emit rerun directives *before* any failure path. Emitting any `cargo:`
460    // directive opts out of cargo's default "rerun on any file change", so
461    // without these the build script wouldn't re-run when Cargo.toml, git
462    // HEAD, or env vars change. Stamped metadata would silently go stale.
463    emit_rerun_if_directives();
464
465    let (compact_json, linker_script_body) = metadata::render_note_payloads(md)?;
466
467    validate_embedded_json(&compact_json)?;
468
469    note!();
470    note!("-- Module Info --");
471    emit_metadata_notes(&compact_json);
472
473    let out_dir: PathBuf = match &opts.out_dir {
474        Some(p) => p.clone(),
475        None => PathBuf::from(env::var("OUT_DIR")?),
476    };
477    debug!("OUT_DIR: {}", out_dir.display());
478
479    std::fs::create_dir_all(&out_dir)?;
480    // `.ld.inc` signals include fragment (no SECTIONS/INSERT wrapper;
481    // inlined inside linker_script.ld).
482    let linker_script_body_path = out_dir.join("linker_script_body.ld.inc");
483    debug!(
484        "Writing linker script body to: {}",
485        linker_script_body_path.display()
486    );
487    // Header comment + trim of leading blank line prevents the standalone file
488    // from looking like a truncated linker script.
489    let linker_script_body_on_disk = format!(
490        "/* Linker-script fragment. Inlined inside linker_script.ld; not a standalone script. */\n{}",
491        linker_script_body.trim_start_matches('\n')
492    );
493    std::fs::write(
494        &linker_script_body_path,
495        linker_script_body_on_disk.as_bytes(),
496    )?;
497
498    let json_path = out_dir.join("module_info.json");
499    debug!("Writing module info to: {}", json_path.display());
500    std::fs::write(&json_path, compact_json.as_bytes())?;
501
502    // Descriptor must include the same NUL padding the linker script emits
503    // after the JSON (see `render_note_payloads`); otherwise `descsz` covers
504    // only JSON bytes while the section includes padding, and `readelf -n`
505    // warns "Corrupt note: only N bytes remain".
506    let padding = NOTE_ALIGN - (compact_json.len() % NOTE_ALIGN);
507    let mut descriptor = String::with_capacity(compact_json.len() + padding);
508    descriptor.push_str(&compact_json);
509    for _ in 0..padding {
510        descriptor.push('\0');
511    }
512
513    let note = note_section::NoteSection::new(
514        N_TYPE,
515        OWNER,
516        &descriptor,
517        &linker_script_body,
518        NOTE_ALIGN,
519    )?;
520    debug!(
521        "Created note section with {} bytes of data",
522        note.note_section.len()
523    );
524
525    // Strip the leading `.` so the dump isn't a dotfile hidden by default.
526    let note_bin_path = out_dir.join(format!("{}.bin", NOTE_SECTION_NAME.trim_start_matches('.')));
527    debug!("Saving binary note section to: {}", note_bin_path.display());
528    note.save_section(&note_bin_path)?;
529
530    debug!("Saving linker script...");
531    let linker_script_path = note.save_linker_script(&out_dir)?;
532    debug!("Linker script saved to: {}", linker_script_path.display());
533
534    match link_arg_directive(&linker_script_path, opts.emit_cargo_link_arg) {
535        Some(d) => {
536            debug!("Adding cargo directive: {}", d);
537            println!("{d}");
538        }
539        None => {
540            debug!(
541                "emit_cargo_link_arg=false: caller will pass {} to the final linker",
542                linker_script_path.display()
543            );
544        }
545    }
546
547    Ok(EmbedArtifacts {
548        linker_script_path,
549        note_bin_path,
550        json_path,
551        json: compact_json,
552        linker_script_body,
553    })
554}
555
556/// Validate the serialized metadata JSON: size limit, object shape, required fields.
557#[cfg(target_os = "linux")]
558fn validate_embedded_json(desc_json: &str) -> ModuleInfoResult<()> {
559    if desc_json.len() > constants::MAX_JSON_SIZE {
560        return Err(ModuleInfoError::MetadataTooLarge(format!(
561            "Metadata size {} exceeds limit of {} bytes",
562            desc_json.len(),
563            constants::MAX_JSON_SIZE
564        )));
565    }
566
567    let value: serde_json::Value = serde_json::from_str(desc_json)
568        .map_err(|e| ModuleInfoError::MalformedJson(e.to_string()))?;
569
570    if !value.is_object() {
571        return Err(ModuleInfoError::MalformedJson(
572            "Metadata must be a JSON object".to_string(),
573        ));
574    }
575
576    for field in constants::REQUIRED_JSON_KEYS {
577        // `PackageMetadata` derives `Serialize` with no skip_if, so every key
578        // is always present in the JSON; a bare `is_none()` check here would
579        // pass a `PackageMetadata::default()` value through untouched. Treat
580        // both "missing key" and "empty string value" as missing so a
581        // Default-constructed `PackageMetadata` with a forgotten required
582        // field fails the build instead of silently embedding `""`.
583        let present_and_nonempty = value
584            .get(field)
585            .and_then(|v| v.as_str())
586            .map(|s| !s.is_empty())
587            .unwrap_or(false);
588        if !present_and_nonempty {
589            return Err(ModuleInfoError::MalformedJson(format!(
590                "Required field '{field}' is missing or empty"
591            )));
592        }
593    }
594
595    // `moduleVersion` is a required key and the loop above has already
596    // rejected any payload where it's missing, non-string, or empty. Fetch
597    // it unconditionally here; an `if let Some(...) = ...` arm would silently
598    // no-op if a future refactor ever split the required-keys check from the
599    // presence check, letting malformed payloads slip through a code path
600    // that is supposed to be the range guardrail. Using `.ok_or_else` makes
601    // the dependency on the loop above load-bearing and visible.
602    let mv = value
603        .get("moduleVersion")
604        .and_then(|v| v.as_str())
605        .ok_or_else(|| {
606            ModuleInfoError::MalformedJson(
607                "moduleVersion must be a non-empty string by this point (required-keys check above enforces it)"
608                    .to_string(),
609            )
610        })?;
611    validate_module_version(mv)?;
612
613    Ok(())
614}
615
616/// Print metadata key/value pairs as cargo `note!` lines in a stable order.
617#[cfg(target_os = "linux")]
618fn emit_metadata_notes(desc_json: &str) {
619    // Presentation step only; validation already ran. Log via `debug!` so
620    // `MODULE_INFO_DEBUG=true` reveals why the notes pane is empty on the
621    // impossible case of a parse failure slipping through.
622    let map = match serde_json::from_str::<serde_json::Value>(desc_json) {
623        Ok(serde_json::Value::Object(map)) => map,
624        Ok(other) => {
625            debug!("emit_metadata_notes: expected a JSON object, got {}", other);
626            return;
627        }
628        Err(e) => {
629            debug!("emit_metadata_notes: JSON parse failed: {}", e);
630            return;
631        }
632    };
633
634    // Walk `ModuleInfoField::ALL` for stable order. No extra-keys fallback
635    // needed: `PackageMetadata` is `#[non_exhaustive]`, so the key set is
636    // always exactly `ModuleInfoField::ALL`.
637    for field in ModuleInfoField::ALL {
638        let key = field.to_key();
639        if let Some(value) = map.get(key) {
640            match value.as_str() {
641                Some(s) => note!("{}: {}", key, s),
642                None => note!("{}: {}", key, value.to_string()),
643            }
644        }
645    }
646}
647
648/// Format the `cargo:rustc-link-arg=-T<path>` directive, or `None` when the
649/// caller opted out via `EmbedOptions::emit_cargo_link_arg = false`. Free
650/// function so tests can observe the gating without capturing stdout.
651#[cfg(target_os = "linux")]
652fn link_arg_directive(linker_script_path: &Path, emit: bool) -> Option<String> {
653    if emit {
654        Some(format!(
655            "cargo:rustc-link-arg=-T{}",
656            linker_script_path.display()
657        ))
658    } else {
659        None
660    }
661}
662
663/// Emit `cargo:rerun-if-changed` / `cargo:rerun-if-env-changed` directives
664/// covering the inputs this crate reads.
665///
666/// Cargo's default behavior is "rerun build.rs on any file change in the
667/// crate directory." Emitting *any* `cargo:` directive (we emit
668/// `rustc-link-arg` further down) flips cargo into explicit-only mode,
669/// after which it reruns only when a path/env we name here changes. So we
670/// have to list every input, or builds silently reuse stale stamped metadata.
671///
672/// What we cover:
673/// - `Cargo.toml` - `[package]` version/name + `[package.metadata.module_info]`
674/// - `build.rs` - the caller's build script itself
675/// - `.git/HEAD` + `.git/refs` - so branch switches and new commits retrigger
676///   the git-derived fields (`branch`, `hash`, `repo`)
677/// - `/etc/os-release` - so a distro upgrade retriggers `os` / `osVersion`
678/// - `MODULE_INFO_DEBUG` - the crate's own debug knob
679/// - `CARGO_PKG_*` env vars - Cargo sets these from Cargo.toml so they're
680///   technically redundant with `rerun-if-changed=Cargo.toml`, but listing
681///   them is cheap and removes a foot-gun if a caller ever sets them
682///   externally.
683///
684/// What we *don't* cover: caller-custom env vars (e.g. `BUILD_BUILDNUMBER`
685/// named via `[package.metadata.module_info].version_env_var_name`). The
686/// zero-config path emits those itself in `collect_package_metadata` because
687/// only that path knows the names. Builder-API consumers that read arbitrary
688/// env vars must emit their own `cargo:rerun-if-env-changed=<name>` for
689/// each, the crate can't guess.
690#[cfg(target_os = "linux")]
691fn emit_rerun_if_directives() {
692    // Paths the crate reads during build. Using forward-slash relative paths
693    // makes these valid on all Cargo-supported hosts; the git and
694    // os-release watches silently no-op when the path doesn't exist (e.g.
695    // building a tarballed source tree, or on a non-Linux host).
696    for path in [
697        "Cargo.toml",
698        "build.rs",
699        ".git/HEAD",
700        ".git/refs",
701        ".git/packed-refs",
702        "/etc/os-release",
703    ] {
704        println!("cargo:rerun-if-changed={path}");
705    }
706
707    // Env vars the crate itself reads. Custom ones named in Cargo.toml are
708    // handled in `collect_package_metadata`.
709    for env_var in ["MODULE_INFO_DEBUG", "CARGO_PKG_NAME", "CARGO_PKG_VERSION"] {
710        println!("cargo:rerun-if-env-changed={env_var}");
711    }
712}
713
714/// Zero-configuration build-script entry point.
715///
716/// Reads metadata from `Cargo.toml`, env overrides, git, and OS release info,
717/// then embeds it via [`embed_package_metadata`] with
718/// [`EmbedOptions::default()`]. Reach for [`embed_package_metadata`] directly
719/// when you need to supply metadata programmatically or suppress the
720/// `cargo:rustc-link-arg` directive.
721///
722/// # IMPORTANT
723/// Only call from `build.rs`. Cargo sets `OUT_DIR` and related variables for
724/// build scripts; outside that context the call will fail.
725///
726/// # Example
727/// ```rust,no_run
728/// // In build.rs
729/// fn main() -> Result<(), Box<dyn std::error::Error>> {
730///     module_info::generate_project_metadata_and_linker_script()?;
731///     Ok(())
732/// }
733/// ```
734///
735/// # Errors
736/// Returns an error if metadata generation or file operations fail.
737#[cfg(target_os = "linux")]
738#[must_use = "build.rs must propagate errors from this function, otherwise a missing linker script will silently break the ELF note section"]
739pub fn generate_project_metadata_and_linker_script() -> Result<(), Box<dyn std::error::Error>> {
740    let md = PackageMetadata::from_cargo_toml().map_err(|e| {
741        error!("Failed to get project metadata: {}", e);
742        e
743    })?;
744    // Named binding (not `let _`) so `#[must_use]` keeps firing on future
745    // signature changes; paths below are useful in build logs.
746    let artifacts = embed_package_metadata(&md, &EmbedOptions::default())?;
747    debug!(
748        "Wrote linker script: {}",
749        artifacts.linker_script_path.display()
750    );
751    Ok(())
752}
753
754#[cfg(not(target_os = "linux"))]
755#[must_use = "build.rs must propagate errors from this function, otherwise a missing linker script will silently break the ELF note section"]
756pub fn generate_project_metadata_and_linker_script() -> Result<(), Box<dyn std::error::Error>> {
757    Ok(())
758}
759
760/// Prints all available module info to stdout and returns a result indicating success or failure
761///
762/// This utility function retrieves all embedded module information and
763/// outputs it to the console with labels. It's useful for debugging or displaying
764/// version information in command-line tools.
765///
766/// # Examples
767///
768/// Basic usage with simple error handling:
769/// ```rust,no_run
770/// if module_info::print_module_info().is_ok() {
771///     println!("Module info displayed successfully");
772/// }
773/// ```
774///
775/// Error handling:
776/// ```rust,no_run
777/// use module_info::{print_module_info, ModuleInfoError};
778///
779/// match print_module_info() {
780///     Ok(_) => println!("Module info displayed successfully"),
781///     Err(ModuleInfoError::NotAvailable(msg)) => eprintln!("Module info not available: {}", msg),
782///     Err(e) => eprintln!("Failed to display module info: {}", e),
783/// }
784/// ```
785///
786/// # Errors
787///
788/// This function will return an error in the following situations:
789/// - If any of the seven required identity-plus-platform fields (`binary`,
790///   `version`, `moduleVersion`, `name`, `maintainer`, `os`, `osVersion`) is
791///   missing or empty, suggesting the metadata is missing or corrupted
792///   (returns `ModuleInfoError::NotAvailable`)
793/// - If running on a non-Linux platform where module info isn't supported (returns `ModuleInfoError::NotAvailable`)
794///
795/// # Note
796/// This function is only available when the "embed-module-info" feature is
797/// enabled *and* the target OS is Linux. On other platforms the function
798/// exists as a no-op stub that returns `NotAvailable`, matching the
799/// non-Linux `get_module_info!` macro behavior so cross-platform callers
800/// compile unchanged.
801#[cfg(all(feature = "embed-module-info", target_os = "linux"))]
802#[must_use = "print_module_info returns a Result indicating whether the embedded note section was readable; ignoring it will hide missing-metadata errors"]
803pub fn print_module_info() -> ModuleInfoResult<()> {
804    // Delegate to the `get_module_info!()` macro: it handles the extern-static
805    // declarations, the per-field `extract_module_info` call, platform gating,
806    // and error swallowing for individual fields. On non-Linux it returns
807    // `NotAvailable` directly, which propagates via `?`.
808    let info = get_module_info!()?;
809
810    // Optional fields may legitimately be empty (see README "Disabling fields"),
811    // so only required keys are checked here.
812    let missing: Vec<&str> = constants::REQUIRED_JSON_KEYS
813        .iter()
814        .filter(|key| info.get(**key).map_or(true, |v| v.is_empty()))
815        .copied()
816        .collect();
817    if !missing.is_empty() {
818        return Err(ModuleInfoError::NotAvailable(format!(
819            "Module info appears to be missing or corrupted: required field(s) missing or empty: {}",
820            missing.join(", ")
821        )));
822    }
823
824    for field in ModuleInfoField::ALL {
825        let key = field.to_key();
826        match info.get(key) {
827            Some(value) => println!("{key}: {value}"),
828            None => println!("{key}: <unavailable>"),
829        }
830    }
831    Ok(())
832}
833
834/// Non-Linux stub: the embedded note section only exists on Linux, so there's
835/// nothing to read. Returns `NotAvailable` with a platform-specific message,
836/// matching the non-Linux `get_module_info!` macro so cross-platform callers
837/// don't need their own `#[cfg]` gate.
838#[cfg(any(not(feature = "embed-module-info"), not(target_os = "linux")))]
839#[must_use = "print_module_info returns a Result indicating whether the embedded note section was readable; ignoring it will hide missing-metadata errors"]
840pub fn print_module_info() -> ModuleInfoResult<()> {
841    Err(ModuleInfoError::NotAvailable(
842        "Module info is only available on Linux platforms with the embed-module-info feature enabled.".to_string(),
843    ))
844}
845
846/// Returns the embedded `version` field (from `Cargo.toml`'s `package.version`
847/// or `version_env_var_name`) as a `String`.
848///
849/// Thin wrapper around `get_module_info!(ModuleInfoField::Version)`. See the
850/// crate-level "Limitations" section for shared-library symbol-resolution
851/// caveats.
852///
853/// # Errors
854///
855/// Returns `ModuleInfoError::NotAvailable` on non-Linux targets or when the
856/// `embed-module-info` feature is not enabled. Returns `NullPointer`,
857/// `Utf8Error`, or `MalformedJson` if the note section is missing or corrupt.
858#[cfg(feature = "embed-module-info")]
859#[must_use = "get_version returns the embedded version string; discarding it hides missing-metadata errors"]
860pub fn get_version() -> ModuleInfoResult<String> {
861    get_module_info!(ModuleInfoField::Version)
862}
863
864/// Returns the embedded `moduleVersion` field (a 4-part identifier typically
865/// produced by the build pipeline; see `module_version_env_var_name` in
866/// `Cargo.toml`'s `[package.metadata.module_info]`).
867///
868/// See [`get_version`] for symbol-resolution and error semantics.
869#[cfg(feature = "embed-module-info")]
870#[must_use = "get_module_version returns the embedded 4-part module version; discarding it hides missing-metadata errors"]
871pub fn get_module_version() -> ModuleInfoResult<String> {
872    get_module_info!(ModuleInfoField::ModuleVersion)
873}
874
875/// Extract a single module-info field from a linker-script-placed symbol.
876///
877/// Reads a JSON string value (`"..."`) starting at `ptr`, terminated by NUL,
878/// and returns the bytes between the first two `"` characters.
879///
880/// Prefer the [`get_module_info!`] macro: it declares the matching extern
881/// static and forwards its address here, so the caller never holds a raw
882/// pointer.
883///
884/// # Safety
885/// `ptr` must point to a valid, properly aligned, null-terminated byte
886/// sequence inside the read-only `.note.package` payload (i.e. the address
887/// of one of the `module_info_*` symbols emitted by the linker script). The
888/// memory must remain valid for the duration of the call. Passing any other
889/// pointer is undefined behavior. The internal scan is bounded by
890/// `MAX_JSON_SIZE + NOTE_ALIGN`, so a missing/corrupted section produces
891/// `MalformedJson` rather than reading off the end.
892///
893/// # Errors
894/// - `ModuleInfoError::NullPointer` if `ptr` is null
895/// - `ModuleInfoError::Utf8Error` if the bytes are not valid UTF-8
896/// - `ModuleInfoError::MalformedJson` if the section is missing/stripped or
897///   the value is not surrounded by `"` characters
898/// - `ModuleInfoError::NotAvailable` on non-Linux targets
899///
900/// # Example
901/// ```rust,no_run
902/// use module_info::{get_module_info, ModuleInfoField, ModuleInfoResult};
903/// let binary: ModuleInfoResult<String> = get_module_info!(ModuleInfoField::Binary);
904/// ```
905///
906/// Available only when the `embed-module-info` feature is enabled on Linux.
907#[cfg(all(feature = "embed-module-info", target_os = "linux"))]
908#[must_use = "extract_module_info returns the parsed field value; discarding it defeats the point of calling it"]
909pub unsafe fn extract_module_info(ptr: *const u8) -> ModuleInfoResult<String> {
910    if ptr.is_null() {
911        return Err(ModuleInfoError::NullPointer);
912    }
913
914    // Single-pass scan: walk forward looking for the opening `"` and then
915    // the closing `"` of the JSON value. Exits as soon as both are found,
916    // so a healthy field read costs O(value_len) rather than walking the
917    // entire `.note.package` payload to the trailing NUL. The cap still
918    // bounds the worst case so a stripped/missing/corrupted section can't
919    // read off the end of the mapped region.
920    //
921    // Why `MAX_JSON_SIZE + NOTE_ALIGN`: pre-refactor the scan went all
922    // the way to NUL (the JSON body up to `MAX_JSON_SIZE` plus the
923    // `1..=NOTE_ALIGN` padding). The new short-circuit only walks to the
924    // closing `"`, but keeping the same upper bound preserves the prior
925    // worst-case safety margin at no correctness cost.
926    const MAX_NOTE_VALUE_LEN: usize = constants::MAX_JSON_SIZE + constants::NOTE_ALIGN;
927
928    // SAFETY: Caller (via `get_module_info!`) passes the address of an
929    // `extern "C" static: u8` placed by the linker script inside the
930    // `.note.package` payload (read-only for program lifetime, never
931    // mutated). The loop is bounded by `MAX_NOTE_VALUE_LEN`, so a
932    // stripped/missing/corrupted section produces an error rather than
933    // walking off the end of the mapped region.
934    let mut open_quote: Option<usize> = None;
935    for i in 0..MAX_NOTE_VALUE_LEN {
936        let byte = unsafe { *ptr.add(i) };
937        if byte == 0 {
938            // NUL inside the value means the section is truncated or
939            // malformed (sanitization strips embedded NULs from every
940            // embedded value at build time). Distinguish "no opening
941            // quote yet" from "opening found, missing closing" so a
942            // stripped/zeroed memory region is easier to triage than a
943            // payload that just lost its trailing `"`.
944            let message = if open_quote.is_none() {
945                "Unexpected NUL before opening quote of JSON value"
946            } else {
947                "Unexpected NUL before closing quote of JSON value"
948            };
949            return Err(ModuleInfoError::MalformedJson(message.to_string()));
950        }
951        if byte == b'"' {
952            match open_quote {
953                None => open_quote = Some(i),
954                Some(open) => {
955                    // Found both quotes. Bytes between are the value.
956                    // Sanitization strips `"` and `\` from values at embed
957                    // time, so a direct slice between the quotes is
958                    // sufficient (no JSON escapes to unescape).
959                    let len = i - open - 1;
960                    let bytes = unsafe { std::slice::from_raw_parts(ptr.add(open + 1), len) };
961                    let value = std::str::from_utf8(bytes)?;
962                    return Ok(value.to_string());
963                }
964            }
965        }
966    }
967
968    // Cap hit without finding both quotes. Branch on `open_quote` so the
969    // diagnostic distinguishes "no opening quote ever seen" (section
970    // absent, stripped, or zeroed) from "opening found but trailing `"`
971    // missing" (truncation mid-value). Both still imply a build-vs-runtime
972    // mismatch worth surfacing in core dumps, but the cause is different.
973    let detail = if open_quote.is_none() {
974        "no opening quote found"
975    } else {
976        "opening quote found but no closing quote"
977    };
978    Err(ModuleInfoError::MalformedJson(format!(
979        "{detail} within {MAX_NOTE_VALUE_LEN} bytes; \
980         .note.package section is missing, stripped, or corrupted"
981    )))
982}
983
984/// Non-Linux stub of [`extract_module_info`]. Always returns
985/// `ModuleInfoError::NotAvailable`. The ELF `.note.package` section this
986/// reads only exists on Linux, so there's nothing to extract.
987///
988/// # Safety
989/// No safety requirements on this platform: the pointer is never dereferenced
990/// (the function returns before touching it). The `unsafe` qualifier is kept
991/// only so the signature matches the Linux implementation, letting
992/// cross-platform callers use a single call site.
993#[cfg(all(feature = "embed-module-info", not(target_os = "linux")))]
994#[must_use = "extract_module_info returns the parsed field value; discarding it defeats the point of calling it"]
995pub unsafe fn extract_module_info(_ptr: *const u8) -> ModuleInfoResult<String> {
996    Err(ModuleInfoError::NotAvailable(
997        "Extract module info is only available on Linux platforms with embed-module-info feature."
998            .to_string(),
999    ))
1000}
1001
1002#[cfg(all(test, target_os = "linux"))]
1003mod tests {
1004    use std::{error::Error, fs::File, io::Read, path::Path};
1005
1006    use tempfile::NamedTempFile;
1007
1008    use super::*;
1009
1010    /// Shorthand for tests that propagate with `?`. `Result<(), Box<dyn Error>>`
1011    /// lets us replace `.expect(...)` with `?` and keeps the test module free
1012    /// of the workspace-wide `clippy::disallowed_methods` ban on `expect`.
1013    type TestResult = Result<(), Box<dyn Error>>;
1014
1015    /// Test-only helper: returns true when `git --version` runs cleanly on
1016    /// the test host. Tests that depend on a real git checkout (branch/hash
1017    /// lookup, repo-name parsing) skip gracefully when this returns false so
1018    /// the suite stays green in stripped-down CI images. Lives inside the
1019    /// tests module rather than in `utils.rs` so `#[cfg(test)]` doesn't have
1020    /// to be scattered across production files.
1021    fn git_is_available() -> bool {
1022        match std::process::Command::new("git")
1023            .arg("--version")
1024            .stdin(std::process::Stdio::null())
1025            .output()
1026        {
1027            Ok(output) => output.status.success(),
1028            Err(_) => false,
1029        }
1030    }
1031
1032    #[cfg(feature = "embed-module-info")]
1033    #[test]
1034    #[allow(clippy::unnecessary_cast)]
1035    fn test_extract_module_info() -> TestResult {
1036        let test_str = "\"test_value\"";
1037        let c_str = std::ffi::CString::new(test_str)?;
1038        let ptr = c_str.as_ptr() as *const u8;
1039        // SAFETY: This is safe because we're creating a valid null-terminated C string
1040        // using std::ffi::CString which guarantees that the pointer is valid and properly
1041        // null-terminated for the duration of this function call
1042        let value = unsafe { extract_module_info(ptr) }?;
1043        assert_eq!(value, "test_value");
1044        Ok(())
1045    }
1046
1047    /// Lock in the early-exit refactor: a value followed by a long
1048    /// non-NUL trailer must still return only the bytes between the
1049    /// quotes. Pre-refactor the function walked all the way to NUL; the
1050    /// new implementation should never read past the closing quote, so
1051    /// the trailer never affects the parsed value.
1052    #[cfg(feature = "embed-module-info")]
1053    #[test]
1054    fn extract_module_info_stops_at_closing_quote() -> TestResult {
1055        // `"hello",\n"version":..." then NUL: a snapshot of how the
1056        // bytes look in a real `.note.package` payload between fields.
1057        let bytes: Vec<u8> = b"\"hello\",\n\"version\":\"1.2.3\"\0".to_vec();
1058        // SAFETY: the vec lives for the duration of the `unsafe` block
1059        // and is NUL-terminated within MAX_NOTE_VALUE_LEN.
1060        let value = unsafe { extract_module_info(bytes.as_ptr()) }?;
1061        assert_eq!(
1062            value, "hello",
1063            "scan must stop at the closing quote, not walk past into the next field"
1064        );
1065        Ok(())
1066    }
1067
1068    /// A NUL byte before any quote (e.g., the section was stripped or
1069    /// the symbol resolved against zeroed memory) must surface as
1070    /// `MalformedJson`, not silently return an empty string.
1071    #[cfg(feature = "embed-module-info")]
1072    #[test]
1073    fn extract_module_info_rejects_leading_nul() {
1074        let bytes: [u8; 4] = [0, 0, 0, 0];
1075        match unsafe { extract_module_info(bytes.as_ptr()) } {
1076            Err(ModuleInfoError::MalformedJson(msg)) => assert!(
1077                msg.contains("NUL"),
1078                "error must mention the NUL trigger: {msg}"
1079            ),
1080            other => panic!("expected MalformedJson(...NUL...), got {other:?}"),
1081        }
1082    }
1083
1084    /// A buffer with an opening quote but no closing quote within the
1085    /// cap should report the cap-hit diagnostic, not a generic "missing
1086    /// quote" error. This is the path that fires when the section was
1087    /// stripped from the binary at link time.
1088    #[cfg(feature = "embed-module-info")]
1089    #[test]
1090    fn extract_module_info_reports_cap_on_runaway_scan() {
1091        // 2 KB of `'a'` bytes (well over MAX_JSON_SIZE = 1024 +
1092        // NOTE_ALIGN = 4) with one opening quote at byte 0 and no
1093        // closing quote anywhere in the cap.
1094        let mut bytes = vec![b'a'; 2048];
1095        bytes[0] = b'"';
1096        match unsafe { extract_module_info(bytes.as_ptr()) } {
1097            Err(ModuleInfoError::MalformedJson(msg)) => assert!(
1098                msg.contains("missing, stripped, or corrupted"),
1099                "cap-hit error must keep the diagnostic phrasing: {msg}"
1100            ),
1101            other => panic!("expected MalformedJson(...corrupted...), got {other:?}"),
1102        }
1103    }
1104
1105    #[test]
1106    fn test_align_len() {
1107        assert_eq!(utils::align_len(5, NOTE_ALIGN), 8);
1108        assert_eq!(utils::align_len(8, NOTE_ALIGN), 8);
1109        assert_eq!(utils::align_len(9, NOTE_ALIGN), 12);
1110    }
1111
1112    /// Locks in the saturating-overflow contract for `align_len`: when
1113    /// `len + (align - 1)` would overflow `u32`, the function must
1114    /// saturate to `u32::MAX` (so downstream size checks notice) rather
1115    /// than wrap to a value below `len` (which the older naive
1116    /// implementation did, silently corrupting the `.note.package`
1117    /// layout). Without this test the `None => u32::MAX` arm is dead
1118    /// code per llvm-cov.
1119    #[test]
1120    fn align_len_saturates_on_u32_overflow() {
1121        // u32::MAX + 3 (mask for align=4) overflows; must saturate.
1122        assert_eq!(utils::align_len(u32::MAX, 4), u32::MAX);
1123        // u32::MAX is already aligned to 1, so the add doesn't overflow
1124        // there; pick a value where the carry actually fires.
1125        assert_eq!(utils::align_len(u32::MAX - 1, 4), u32::MAX);
1126    }
1127
1128    /// `NoteSection::new` rejects any owner string whose name+NUL
1129    /// length is below 4 bytes. The check is a guard against a swapped
1130    /// or empty owner accidentally producing a malformed note in
1131    /// release-mode build scripts (where `debug_assert!` would no-op).
1132    /// Without this test the `n_namesz < 4` arm is dead code per
1133    /// llvm-cov.
1134    #[test]
1135    fn note_section_rejects_short_owner() {
1136        use crate::note_section::NoteSection;
1137        // Empty owner: namesz = 0 + 1 (NUL) = 1, < 4.
1138        // `NoteSection` doesn't impl `Debug`, so `.expect_err(...)` is
1139        // unavailable and we have to match explicitly.
1140        match NoteSection::new(N_TYPE, "", "desc", "", NOTE_ALIGN) {
1141            Err(ModuleInfoError::Other(boxed)) => assert!(
1142                boxed.to_string().contains("n_namesz"),
1143                "diagnostic must name the field: {boxed}"
1144            ),
1145            Err(other) => panic!("expected Other(...n_namesz...), got {other:?}"),
1146            Ok(_) => panic!("empty owner must be rejected"),
1147        }
1148        // Two-byte owner: namesz = 2 + 1 = 3, still < 4.
1149        match NoteSection::new(N_TYPE, "AB", "desc", "", NOTE_ALIGN) {
1150            Err(ModuleInfoError::Other(_)) => {}
1151            Err(other) => panic!("expected Other(_), got {other:?}"),
1152            Ok(_) => panic!("two-byte owner must be rejected"),
1153        }
1154    }
1155
1156    /// `validate_embedded_json` rejects payloads larger than
1157    /// `MAX_JSON_SIZE`. The cap exists because the `.note.package`
1158    /// payload limit is documented as 1 KiB; without this test the
1159    /// `MetadataTooLarge` arm of `embed_package_metadata`'s call to
1160    /// `validate_embedded_json` is dead code per llvm-cov.
1161    #[test]
1162    fn validate_embedded_json_rejects_oversized_payload() {
1163        // Build a JSON payload over MAX_JSON_SIZE by stuffing a single
1164        // string field. The shape doesn't have to be valid metadata;
1165        // the size check fires first.
1166        let big_value = "x".repeat(constants::MAX_JSON_SIZE + 16);
1167        let json = format!(r#"{{"binary":"{big_value}"}}"#);
1168        let err = validate_embedded_json(&json)
1169            .expect_err("payloads over MAX_JSON_SIZE must be rejected");
1170        match err {
1171            ModuleInfoError::MetadataTooLarge(msg) => assert!(
1172                msg.contains("exceeds limit"),
1173                "diagnostic must mention the cap: {msg}"
1174            ),
1175            other => panic!("expected MetadataTooLarge, got {other:?}"),
1176        }
1177    }
1178
1179    /// `validate_embedded_json` rejects non-object JSON shapes. The
1180    /// `is_object()` branch fires for arrays / scalars / etc.; without
1181    /// a focused test this arm is uncovered per llvm-cov even though
1182    /// the runtime risk (someone hand-crafting a bad payload through
1183    /// the builder API) is real.
1184    #[test]
1185    fn validate_embedded_json_rejects_non_object_shapes() {
1186        for bad in ["[]", "null", "42", r#""string""#] {
1187            let err = validate_embedded_json(bad).expect_err("non-object JSON must be rejected");
1188            assert!(
1189                matches!(err, ModuleInfoError::MalformedJson(_)),
1190                "expected MalformedJson for {bad:?}"
1191            );
1192        }
1193    }
1194
1195    /// `module_info::new(Info { … })` is the one-call entry point. The
1196    /// existing `info_embed_round_trip_writes_artifacts` test goes
1197    /// directly through `embed_package_metadata` instead of through
1198    /// `new`, so the `new` body itself stays uncovered. Exercise it
1199    /// here. The function reads `OUT_DIR` (because `EmbedOptions`
1200    /// defaults to `out_dir = None`), so set a temp directory before
1201    /// the call and restore the prior value after.
1202    ///
1203    /// This is the *only* test that touches the process-global env;
1204    /// keeping it self-contained avoids racing the rest of the suite,
1205    /// which uses explicit `out_dir` overrides.
1206    #[cfg(feature = "embed-module-info")]
1207    #[test]
1208    fn new_one_call_entry_point_writes_artifacts() -> TestResult {
1209        use std::sync::Mutex;
1210        // Single global lock around `OUT_DIR` mutation: every test that
1211        // touches process-global env must serialize, otherwise parallel
1212        // test execution will see stale values. We're the only mutator
1213        // today, but the lock makes that contract explicit.
1214        static ENV_LOCK: Mutex<()> = Mutex::new(());
1215        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1216
1217        let tmp = tempfile::tempdir()?;
1218        let prior = std::env::var_os("OUT_DIR");
1219        // SAFETY: `set_var`/`remove_var` are `unsafe` on Rust 1.80+ but
1220        // safe on the MSRV (1.74). Use the safe API; CI's stable cell
1221        // will warn but not fail because the deprecation is `unsafe`,
1222        // not a compile error.
1223        std::env::set_var("OUT_DIR", tmp.path());
1224        let result = new(Info {
1225            binary: "one_call_test".into(),
1226            name: "one_call_test".into(),
1227            version: "1.0.0".into(),
1228            moduleVersion: "1.0.0.0".into(),
1229            maintainer: "team@contoso.com".into(),
1230            os: "linux".into(),
1231            osVersion: "test".into(),
1232            ..Default::default()
1233        });
1234        match prior {
1235            Some(p) => std::env::set_var("OUT_DIR", p),
1236            None => std::env::remove_var("OUT_DIR"),
1237        }
1238
1239        let artifacts = result?;
1240        // The artifacts must land under the OUT_DIR we set.
1241        assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1242        assert!(artifacts.json_path.exists());
1243        let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1244        assert_eq!(parsed["binary"], "one_call_test");
1245        Ok(())
1246    }
1247
1248    #[test]
1249    fn test_get_distro_info() -> TestResult {
1250        use crate::utils::get_distro_info;
1251        let distro_info = get_distro_info()?;
1252        assert!(!distro_info.0.is_empty());
1253        assert!(!distro_info.1.is_empty());
1254        Ok(())
1255    }
1256
1257    /// The binary note section assembled by `NoteSection::new` must be 4-byte
1258    /// aligned in total length. The ELF spec requires it, and a misaligned
1259    /// section silently corrupts subsequent note entries. `NoteSection`
1260    /// handles this via `align_len` on the owner and desc blocks. This test
1261    /// exercises desc lengths that stress every residue class mod 4 so a
1262    /// future refactor that drops the
1263    /// alignment padding on one of the blocks is caught immediately.
1264    #[test]
1265    fn note_section_is_4byte_aligned_for_every_residue() {
1266        use crate::note_section::NoteSection;
1267        for desc_len in [0usize, 1, 2, 3, 4, 5, 7, 8, 17, 100, 1023] {
1268            let desc = "x".repeat(desc_len);
1269            let note = match NoteSection::new(N_TYPE, OWNER, &desc, "", NOTE_ALIGN) {
1270                Ok(n) => n,
1271                Err(e) => panic!("NoteSection::new failed for desc_len={desc_len}: {e}"),
1272            };
1273            assert_eq!(
1274                note.note_section.len() % NOTE_ALIGN,
1275                0,
1276                "note section must be 4-byte aligned (desc_len={desc_len}, got {})",
1277                note.note_section.len()
1278            );
1279        }
1280    }
1281
1282    #[test]
1283    fn test_project_metadata() {
1284        if !git_is_available() {
1285            println!("Skipping test_project_metadata because git cli is not available");
1286            return;
1287        }
1288
1289        use crate::metadata::project_metadata;
1290        let result = project_metadata();
1291
1292        assert!(
1293            result.is_ok(),
1294            "Project metadata should be created successfully: {:?}",
1295            result.err()
1296        );
1297
1298        if let Ok(res) = result {
1299            let metadata = res.0;
1300            assert!(
1301                metadata.contains("\"binary\":"),
1302                "JSON should contain binary field"
1303            );
1304            assert!(
1305                metadata.contains("\"moduleVersion\":"),
1306                "JSON should contain moduleVersion field"
1307            );
1308            assert!(
1309                metadata.contains("\"version\":"),
1310                "JSON should contain version field"
1311            );
1312            assert!(
1313                metadata.contains("\"maintainer\":"),
1314                "JSON should contain maintainer field"
1315            );
1316            assert!(
1317                metadata.contains("\"name\":"),
1318                "JSON should contain name field"
1319            );
1320            assert!(
1321                metadata.contains("\"type\":"),
1322                "JSON should contain type field"
1323            );
1324
1325            assert!(
1326                metadata.contains("\"repo\":") || metadata.contains("\"Unknown\""),
1327                "JSON should contain repo field or fallback"
1328            );
1329            assert!(
1330                metadata.contains("\"branch\":")
1331                    || metadata.contains("\"main\"")
1332                    || metadata.contains("\"unknown\""),
1333                "JSON should contain branch field or fallback"
1334            );
1335            assert!(
1336                metadata.contains("\"hash\":") || metadata.contains("\"unknown\""),
1337                "JSON should contain hash field or fallback"
1338            );
1339
1340            // Other required fields
1341            assert!(
1342                metadata.contains("\"copyright\":"),
1343                "JSON should contain copyright field"
1344            );
1345            assert!(metadata.contains("\"os\":"), "JSON should contain os field");
1346            assert!(
1347                metadata.contains("\"osVersion\":"),
1348                "JSON should contain osVersion field"
1349            );
1350        }
1351    }
1352
1353    /// Exercises the production Cargo.toml-reading path end-to-end against
1354    /// this crate's own manifest. The assertions are intentionally fork-safe:
1355    /// an external fork may change `copyright` (and must), but the contract
1356    /// that Cargo.toml values round-trip through `from_cargo_toml` and
1357    /// populate the expected fields stays fixed.
1358    #[test]
1359    fn test_package_metadata_from_cargo_toml() -> TestResult {
1360        let md = PackageMetadata::from_cargo_toml()?;
1361
1362        assert_eq!(md.name, "module-info");
1363        assert_eq!(md.binary, "module-info");
1364
1365        // Version is formatted to 3 numeric parts by `format_version_parts`.
1366        let parts: Vec<&str> = md.version.split('.').collect();
1367        assert_eq!(
1368            parts.len(),
1369            3,
1370            "version should have three dot-separated parts, got {:?}",
1371            md.version
1372        );
1373        for part in &parts {
1374            assert!(
1375                part.chars().all(|c| c.is_ascii_digit()),
1376                "version part {part:?} must be numeric"
1377            );
1378        }
1379
1380        // `copyright` comes from `[package.metadata.module_info].copyright`
1381        // in this crate's own Cargo.toml. Forks will legitimately set their
1382        // own value, so the contract we lock in is "non-empty and not the
1383        // `Unknown` fallback that triggers when the key is missing",
1384        // nothing organization-specific.
1385        assert!(
1386            !md.copyright.is_empty() && md.copyright != "Unknown",
1387            "copyright must come from Cargo.toml, not the Unknown fallback; got {:?}",
1388            md.copyright
1389        );
1390        Ok(())
1391    }
1392
1393    #[test]
1394    fn test_get_git_info() -> TestResult {
1395        if !git_is_available() {
1396            println!("Skipping test_get_git_info because git is not available");
1397            return Ok(());
1398        }
1399
1400        use crate::utils::get_git_info;
1401        let git_info = get_git_info()?;
1402
1403        // Just verify we get back something for the repo name
1404        // Don't assert exact values since they can change
1405        // Verify we get back non-empty values
1406        assert!(!git_info.0.is_empty(), "Branch name should not be empty"); // branch
1407        assert!(!git_info.1.is_empty(), "Commit hash should not be empty"); // hash
1408        assert!(
1409            !git_info.2.is_empty(),
1410            "Repository name should not be empty"
1411        ); // repo name
1412
1413        // In a git repo this returns the parsed remote name; outside one
1414        // (e.g. testing from a published tarball) it falls back to the
1415        // "unknown" sentinel. Either is valid here.
1416        assert!(git_info.2 == "unknown" || !git_info.2.is_empty());
1417
1418        println!(
1419            "Git Info - Branch: {}, Hash: {}, Repo: {}",
1420            git_info.0, git_info.1, git_info.2
1421        );
1422        Ok(())
1423    }
1424
1425    #[test]
1426    fn test_json_key_value_parse() -> TestResult {
1427        let json_input = r#"{
1428"binary": "sample_crashing_process",
1429"moduleVersion": "0.1.0.0",
1430"version": "0.1.0",
1431"maintainer": "Maintainer contact/UUID etc",
1432"name": "sample_crashing_process",
1433"type": "agent",
1434"repo": "Module_Info",
1435"branch": "main",
1436"hash": "76930c41aa16e31bb1e565b12c4285cde1939af3",
1437"copyright": "Microsoft",
1438"os": "Ubuntu",
1439"osVersion": "20.04"
1440}
1441"#;
1442
1443        let parsed: serde_json::Value = serde_json::from_str(json_input)?;
1444        assert_eq!(parsed["binary"], "sample_crashing_process");
1445        assert_eq!(parsed["moduleVersion"], "0.1.0.0");
1446        assert_eq!(parsed["version"], "0.1.0");
1447        assert_eq!(parsed["maintainer"], "Maintainer contact/UUID etc");
1448        assert_eq!(parsed["name"], "sample_crashing_process");
1449        assert_eq!(parsed["type"], "agent");
1450        assert_eq!(parsed["repo"], "Module_Info");
1451        assert_eq!(parsed["branch"], "main");
1452        assert_eq!(parsed["hash"], "76930c41aa16e31bb1e565b12c4285cde1939af3");
1453        assert_eq!(parsed["copyright"], "Microsoft");
1454        assert_eq!(parsed["os"], "Ubuntu");
1455        assert_eq!(parsed["osVersion"], "20.04");
1456        Ok(())
1457    }
1458
1459    #[test]
1460    fn test_get_project_path() {
1461        use crate::utils::get_project_path;
1462        let project_path = get_project_path();
1463        assert!(project_path.exists());
1464    }
1465
1466    #[test]
1467    fn test_get_cargo_toml_content() -> TestResult {
1468        use crate::utils::get_cargo_toml_content;
1469        let cargo_toml = get_cargo_toml_content()?;
1470        assert!(cargo_toml.get("package").is_some());
1471        Ok(())
1472    }
1473
1474    #[test]
1475    fn test_save_section() -> TestResult {
1476        // Create a temporary file
1477        let temp_file = NamedTempFile::new()?;
1478        let file_path = temp_file.path().to_path_buf();
1479
1480        // Create sample section data
1481        let desc_json = r#"{"binary":"test","version":"1.0.0"}"#;
1482        let linker_script_body = "BYTE(0x01); BYTE(0x02);";
1483
1484        // Create a note section
1485        use crate::note_section::NoteSection;
1486        let note = NoteSection::new(N_TYPE, OWNER, desc_json, linker_script_body, NOTE_ALIGN)?;
1487
1488        // Save the section to the temporary file
1489        note.save_section(&file_path)?;
1490
1491        // Read the file back
1492        let mut file = File::open(&file_path)?;
1493        let mut buffer = Vec::new();
1494        file.read_to_end(&mut buffer)?;
1495
1496        // Verify the content
1497        assert!(!buffer.is_empty());
1498        assert_eq!(buffer.len(), note.note_section.len());
1499        assert_eq!(buffer, note.note_section);
1500
1501        // Check that the file contains expected ELF note header values
1502        // The first 12 bytes should be the ELF note header (n_namesz, n_descsz, n_type)
1503        assert!(buffer.len() >= 12);
1504
1505        // Check for the owner string "FDO" followed by null terminator
1506        let owner_offset = 12; // After the header
1507        let owner_bytes = OWNER.as_bytes();
1508        let owner_slice = buffer
1509            .get(owner_offset..owner_offset + owner_bytes.len())
1510            .ok_or("owner slice is out of bounds")?;
1511        assert_eq!(owner_slice, owner_bytes);
1512
1513        // Ensure the N_TYPE value is present in the header (little endian)
1514        let n_type_bytes = N_TYPE.to_le_bytes();
1515        let n_type_slice = buffer.get(8..12).ok_or("n_type slice is out of bounds")?;
1516        assert_eq!(n_type_slice, &n_type_bytes);
1517        Ok(())
1518    }
1519
1520    /// `PackageMetadata` is public and implements `Default` so callers can
1521    /// use `..Default::default()` in struct-literal construction. This is
1522    /// the forward-compatible pattern recommended for build.rs consumers
1523    /// that supply metadata programmatically.
1524    #[test]
1525    fn test_package_metadata_default_construction() {
1526        let md = PackageMetadata {
1527            binary: "my_tool".into(),
1528            name: "my_tool".into(),
1529            version: "1.2.3".into(),
1530            module_version: "1.2.3.4".into(),
1531            maintainer: "team@contoso.com".into(),
1532            hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
1533            ..Default::default()
1534        };
1535
1536        // Fields we set round-trip.
1537        assert_eq!(md.binary, "my_tool");
1538        assert_eq!(md.version, "1.2.3");
1539        assert_eq!(md.module_version, "1.2.3.4");
1540        // Fields we didn't set come from `Default`: empty strings.
1541        assert_eq!(md.module_type, "");
1542        assert_eq!(md.repo, "");
1543        assert_eq!(md.os, "");
1544    }
1545
1546    /// `embed_package_metadata` with a caller-supplied `out_dir` and
1547    /// `emit_cargo_link_arg = false` must write all three artifacts
1548    /// (linker script, note bin, JSON) into the specified directory.
1549    /// This is the static-library flow: the outer build system handles
1550    /// the final link, so we write artifacts to a known location and
1551    /// skip the `cargo:rustc-link-arg` directive.
1552    #[cfg(feature = "embed-module-info")]
1553    #[test]
1554    fn test_embed_package_metadata_custom_out_dir_no_link_arg() -> TestResult {
1555        let tmp = tempfile::tempdir()?;
1556        let md = PackageMetadata {
1557            binary: "test_binary".into(),
1558            name: "test_binary".into(),
1559            version: "1.2.3".into(),
1560            module_version: "1.2.3.4".into(),
1561            maintainer: "team@contoso.com".into(),
1562            hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
1563            module_type: "agent".into(),
1564            repo: "test_repo".into(),
1565            branch: "main".into(),
1566            copyright: "Test".into(),
1567            os: "Ubuntu".into(),
1568            os_version: "22.04".into(),
1569            ..Default::default()
1570        };
1571
1572        let opts = EmbedOptions {
1573            out_dir: Some(tmp.path().to_path_buf()),
1574            emit_cargo_link_arg: false,
1575            ..Default::default()
1576        };
1577
1578        let artifacts = embed_package_metadata(&md, &opts)?;
1579
1580        // All three artifact paths must live under the custom out_dir.
1581        assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1582        assert!(artifacts.note_bin_path.starts_with(tmp.path()));
1583        assert!(artifacts.json_path.starts_with(tmp.path()));
1584
1585        // And the files must actually exist on disk.
1586        assert!(artifacts.linker_script_path.exists());
1587        assert!(artifacts.note_bin_path.exists());
1588        assert!(artifacts.json_path.exists());
1589
1590        // And the returned JSON is parseable and contains the supplied values.
1591        let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1592        assert_eq!(parsed["binary"], "test_binary");
1593        assert_eq!(parsed["version"], "1.2.3");
1594        assert_eq!(parsed["moduleVersion"], "1.2.3.4");
1595        Ok(())
1596    }
1597
1598    /// `embed_package_metadata` must reject `PackageMetadata` values whose
1599    /// serialized JSON lacks a required field. Required fields are a safety
1600    /// guardrail so consumers do not accidentally emit a note section that
1601    /// `print_module_info` / `get_module_info!` cannot parse. Since
1602    /// `PackageMetadata` always serializes every field, "missing" in practice
1603    /// means "empty string". Leave a required field as `Default::default()`
1604    /// and the validator must reject it.
1605    #[cfg(feature = "embed-module-info")]
1606    #[test]
1607    fn test_embed_package_metadata_rejects_empty_required_field() -> TestResult {
1608        let tmp = tempfile::tempdir()?;
1609        // `osVersion` is required; leaving it at `Default::default()` ("")
1610        // exercises the validator's empty-string rejection path, and its
1611        // `#[serde(rename = "osVersion")]` mapping is the same one the runtime
1612        // map consumers see.
1613        let md = PackageMetadata {
1614            binary: "b".into(),
1615            name: "n".into(),
1616            version: "1.0.0".into(),
1617            module_version: "1.0.0.0".into(),
1618            maintainer: "m".into(),
1619            os: "linux".into(),
1620            // os_version omitted on purpose; `..Default::default()` gives "".
1621            ..Default::default()
1622        };
1623        let opts = EmbedOptions {
1624            out_dir: Some(tmp.path().to_path_buf()),
1625            emit_cargo_link_arg: false,
1626            ..Default::default()
1627        };
1628        let err = embed_package_metadata(&md, &opts)
1629            .expect_err("embed must reject PackageMetadata with empty required field");
1630        match err {
1631            ModuleInfoError::MalformedJson(msg) => {
1632                assert!(
1633                    msg.contains("osVersion"),
1634                    "error must name the empty required field: {msg}"
1635                );
1636            }
1637            other => panic!("expected MalformedJson, got {other:?}"),
1638        }
1639        Ok(())
1640    }
1641
1642    /// Direct test for the required-field guardrail: feed JSON missing a
1643    /// required field and confirm it's rejected with a `MalformedJson` error.
1644    #[test]
1645    fn test_validate_embedded_json_rejects_missing_required_fields() {
1646        // Missing "maintainer" (one of the seven required identity-plus-
1647        // platform keys that stays required even when optional fields like
1648        // `hash`/`repo`/`branch` are deliberately left empty).
1649        let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n"}"#;
1650        let err =
1651            validate_embedded_json(bad_json).expect_err("missing required field must be rejected");
1652        match err {
1653            ModuleInfoError::MalformedJson(msg) => {
1654                assert!(
1655                    msg.contains("maintainer"),
1656                    "error must name the missing field: {msg}"
1657                );
1658            }
1659            other => panic!("expected MalformedJson, got {other:?}"),
1660        }
1661    }
1662
1663    /// Direct test for the empty-string half of the required-field guardrail.
1664    /// `PackageMetadata::default()` fields serialize as `""`; we treat that
1665    /// as "missing" too for the required identity keys, so consumers can't
1666    /// silently ship a note section with an empty `binary` or `maintainer`.
1667    /// Non-required fields (hash/repo/branch/type/copyright) are *allowed*
1668    /// to be empty; that's the documented "disable" knob.
1669    #[test]
1670    fn test_validate_embedded_json_rejects_empty_required_fields() {
1671        // "maintainer" present but empty.
1672        let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n","maintainer":""}"#;
1673        let err =
1674            validate_embedded_json(bad_json).expect_err("empty required field must be rejected");
1675        match err {
1676            ModuleInfoError::MalformedJson(msg) => {
1677                assert!(
1678                    msg.contains("maintainer"),
1679                    "error must name the empty field: {msg}"
1680                );
1681            }
1682            other => panic!("expected MalformedJson, got {other:?}"),
1683        }
1684    }
1685
1686    /// Complement to the rejection tests: a payload that supplies the five
1687    /// required identity keys but leaves every optional field empty must
1688    /// pass validation. This pins the "disabled field = empty string"
1689    /// contract against accidental regressions (e.g., re-adding `hash` to
1690    /// `REQUIRED_JSON_KEYS`).
1691    #[test]
1692    fn test_validate_embedded_json_accepts_empty_optional_fields() {
1693        let ok_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n","maintainer":"m","type":"","repo":"","branch":"","hash":"","copyright":"","os":"linux","osVersion":"1"}"#;
1694        if let Err(e) = validate_embedded_json(ok_json) {
1695            panic!("optional fields may be empty; only the identity keys are required. got {e:?}");
1696        }
1697    }
1698
1699    /// `EmbedOptions::default()` pins the zero-config behavior:
1700    /// `out_dir = None` (use `$OUT_DIR`) and `emit_cargo_link_arg = true` so
1701    /// plain build.rs consumers don't have to set any options.
1702    #[test]
1703    fn test_embed_options_default_preserves_bc_behavior() {
1704        let opts = EmbedOptions::default();
1705        assert!(opts.out_dir.is_none());
1706        assert!(opts.emit_cargo_link_arg);
1707    }
1708
1709    /// The linker script body must always carry at least one `BYTE(0x00);`
1710    /// NUL terminator, regardless of the JSON byte-length mod 4. Without it,
1711    /// `extract_module_info` at runtime would scan past the end of
1712    /// `.note.package` looking for the sentinel: harmless in practice
1713    /// (read-only mapped memory) but a latent SIGSEGV risk when the section
1714    /// sits at a segment boundary. This test constructs a `PackageMetadata`
1715    /// specifically shaped so the total payload byte-count is a multiple
1716    /// of 4, which is the tricky case the original `padding_needed = (... % 4)`
1717    /// formula got wrong (it computed 0 and emitted no padding).
1718    #[test]
1719    fn render_note_payloads_always_emits_nul_padding() -> TestResult {
1720        // Any well-formed PackageMetadata works; we just need the payload.
1721        // 4-aligned input isn't easy to construct deliberately since the
1722        // JSON shape mixes fixed keys with variable values, so we assert
1723        // the stronger "always emits NUL padding" invariant across every
1724        // permutation of field lengths we can reach with a 2-character probe.
1725        for suffix_len in 0..=4 {
1726            let suffix = "x".repeat(suffix_len);
1727            let md = PackageMetadata {
1728                binary: format!("b{suffix}"),
1729                name: format!("n{suffix}"),
1730                version: "1.0.0".into(),
1731                module_version: "1.0.0.0".into(),
1732                maintainer: "m".into(),
1733                os: "linux".into(),
1734                os_version: "22.04".into(),
1735                ..Default::default()
1736            };
1737            let (_json, linker_script_body) = crate::metadata::render_note_payloads(&md)?;
1738            assert!(
1739                linker_script_body.contains("BYTE(0x00);"),
1740                "linker script must contain a BYTE(0x00) even when the payload is 4-aligned (suffix_len={suffix_len})"
1741            );
1742        }
1743        Ok(())
1744    }
1745
1746    /// `link_arg_directive` is the single branch that decides whether
1747    /// `cargo:rustc-link-arg=-T<path>` is emitted. Asserting both arms here
1748    /// locks in the "emit_cargo_link_arg=false means no directive" contract
1749    /// that static-library flows depend on.
1750    #[test]
1751    fn link_arg_directive_gates_on_flag() {
1752        let p = Path::new("/tmp/linker_script.ld");
1753        match link_arg_directive(p, true) {
1754            Some(d) => assert_eq!(d, "cargo:rustc-link-arg=-T/tmp/linker_script.ld"),
1755            None => panic!("emit_cargo_link_arg=true must produce a directive"),
1756        }
1757        assert!(
1758            link_arg_directive(p, false).is_none(),
1759            "emit_cargo_link_arg=false must suppress the directive"
1760        );
1761    }
1762
1763    /// Drift guard: every key in `REQUIRED_JSON_KEYS` must appear in
1764    /// `ModuleInfoField::ALL.to_key()`. If someone adds a required field
1765    /// without extending the enum (or vice versa), this test fails before
1766    /// the divergence reaches a consumer.
1767    #[test]
1768    fn required_keys_are_subset_of_module_info_fields() {
1769        let known: std::collections::HashSet<&str> =
1770            ModuleInfoField::ALL.iter().map(|f| f.to_key()).collect();
1771        for key in constants::REQUIRED_JSON_KEYS {
1772            assert!(
1773                known.contains(key),
1774                "REQUIRED_JSON_KEYS contains {key:?} which is not in ModuleInfoField::ALL"
1775            );
1776        }
1777    }
1778
1779    /// `Info` must be constructible from a struct literal (that's the whole
1780    /// point of the type), and `From<Info> for PackageMetadata` must carry
1781    /// every field across with the JSON-key-shaped name on the `Info` side and
1782    /// the snake_case name on the `PackageMetadata` side.
1783    #[test]
1784    fn info_struct_literal_and_conversion() {
1785        let info = Info {
1786            binary: "b".into(),
1787            version: "1.2.3".into(),
1788            moduleVersion: "1.2.3.4".into(),
1789            maintainer: "m".into(),
1790            name: "n".into(),
1791            r#type: "agent".into(),
1792            repo: "r".into(),
1793            branch: "br".into(),
1794            hash: "h".into(),
1795            copyright: "c".into(),
1796            os: "o".into(),
1797            osVersion: "ov".into(),
1798        };
1799        let md: PackageMetadata = info.into();
1800        assert_eq!(md.binary, "b");
1801        assert_eq!(md.version, "1.2.3");
1802        assert_eq!(md.module_version, "1.2.3.4");
1803        assert_eq!(md.maintainer, "m");
1804        assert_eq!(md.name, "n");
1805        assert_eq!(md.module_type, "agent");
1806        assert_eq!(md.repo, "r");
1807        assert_eq!(md.branch, "br");
1808        assert_eq!(md.hash, "h");
1809        assert_eq!(md.copyright, "c");
1810        assert_eq!(md.os, "o");
1811        assert_eq!(md.os_version, "ov");
1812    }
1813
1814    /// `Info::default()` plus `..Default::default()` struct-literal syntax is
1815    /// the forward-compatible pattern consumers should use. Unlike
1816    /// `PackageMetadata`, `Info` is intentionally not `#[non_exhaustive]`, so
1817    /// both full struct literals and `..Default::default()` must compile and
1818    /// produce empty strings for unassigned fields.
1819    #[test]
1820    fn info_default_fills_missing_fields_with_empty_strings() {
1821        let info = Info {
1822            binary: "b".into(),
1823            moduleVersion: "1.2.3.4".into(),
1824            ..Default::default()
1825        };
1826        assert_eq!(info.binary, "b");
1827        assert_eq!(info.moduleVersion, "1.2.3.4");
1828        assert_eq!(info.version, "");
1829        assert_eq!(info.r#type, "");
1830        assert_eq!(info.osVersion, "");
1831    }
1832
1833    /// `Info` → `PackageMetadata` → `embed_package_metadata` is the path
1834    /// `new(Info { .. })` takes internally (`new` is just two lines: convert
1835    /// and dispatch). Exercise it end-to-end with an explicit `out_dir` so
1836    /// the test doesn't have to mutate `OUT_DIR` on the shared process
1837    /// environment: `std::env::set_var` is `unsafe fn` on Rust 1.80+ and
1838    /// racy when tests run in parallel. The actual `new` function is so
1839    /// thin that the conversion test and this embed test together cover
1840    /// everything it does.
1841    #[cfg(feature = "embed-module-info")]
1842    #[test]
1843    fn info_embed_round_trip_writes_artifacts() -> TestResult {
1844        let tmp = tempfile::tempdir()?;
1845        let md: PackageMetadata = Info {
1846            binary: "b".into(),
1847            name: "n".into(),
1848            version: "1.2.3".into(),
1849            moduleVersion: "1.2.3.4".into(),
1850            maintainer: "m".into(),
1851            r#type: "agent".into(),
1852            hash: "deadbeef".into(),
1853            os: "linux".into(),
1854            osVersion: "22.04".into(),
1855            ..Default::default()
1856        }
1857        .into();
1858
1859        let opts = EmbedOptions {
1860            out_dir: Some(tmp.path().to_path_buf()),
1861            emit_cargo_link_arg: false,
1862            ..Default::default()
1863        };
1864        let artifacts = embed_package_metadata(&md, &opts)?;
1865
1866        assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1867        assert!(artifacts.json_path.exists());
1868        let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1869        assert_eq!(parsed["moduleVersion"], "1.2.3.4");
1870        assert_eq!(parsed["type"], "agent");
1871        Ok(())
1872    }
1873
1874    /// `validate_module_version` accepts the full u16 range on every part.
1875    #[test]
1876    fn validate_module_version_accepts_valid_values() -> TestResult {
1877        for v in ["0.0.0.0", "1.2.3.4", "65535.65535.65535.65535", "10.0.0.1"] {
1878            validate_module_version(v)?;
1879        }
1880        Ok(())
1881    }
1882
1883    /// Wrong number of dot-separated parts must fail loudly, not silently
1884    /// pad or truncate.
1885    #[test]
1886    fn validate_module_version_rejects_wrong_part_count() {
1887        for v in ["", "1", "1.2", "1.2.3", "1.2.3.4.5"] {
1888            let err = validate_module_version(v).expect_err("wrong part count must be rejected");
1889            match err {
1890                ModuleInfoError::MalformedJson(msg) => {
1891                    assert!(
1892                        msg.contains("exactly 4"),
1893                        "error must explain the 4-part rule: {msg}"
1894                    );
1895                }
1896                other => panic!("expected MalformedJson, got {other:?}"),
1897            }
1898        }
1899    }
1900
1901    /// A u16 overflows at 65536, and consumers parsing the 4-WORD
1902    /// VS_FIXEDFILEINFO shape would truncate, so reject at embed time.
1903    #[test]
1904    fn validate_module_version_rejects_overflow() {
1905        // 65536 = u16::MAX + 1, on each of the four positions.
1906        for v in [
1907            "65536.0.0.0",
1908            "0.65536.0.0",
1909            "0.0.65536.0",
1910            "0.0.0.65536",
1911            "99999.1.2.3",
1912        ] {
1913            let err = validate_module_version(v).expect_err("u16 overflow must be rejected");
1914            match err {
1915                ModuleInfoError::MalformedJson(msg) => {
1916                    assert!(
1917                        msg.contains("16 bits"),
1918                        "error must mention the u16 constraint: {msg}"
1919                    );
1920                }
1921                other => panic!("expected MalformedJson, got {other:?}"),
1922            }
1923        }
1924    }
1925
1926    /// Negative numbers and non-numeric text never fit a u16, and would
1927    /// silently turn into `0` under lossy casts, so reject them up front.
1928    #[test]
1929    fn validate_module_version_rejects_non_numeric() {
1930        for v in ["-1.0.0.0", "a.b.c.d", "1.2.x.4", "1.2.3.4a", "v1.2.3.4"] {
1931            validate_module_version(v).expect_err("non-numeric parts must be rejected");
1932        }
1933    }
1934
1935    /// Empty component between dots is rejected explicitly (not just
1936    /// `parse::<u16>()` fallout) so the error message names the position.
1937    #[test]
1938    fn validate_module_version_rejects_empty_part() {
1939        for v in ["1.2.3.", "1..3.4", "..1.2", "1.2..4"] {
1940            let err = validate_module_version(v).expect_err("empty part must be rejected");
1941            if let ModuleInfoError::MalformedJson(msg) = err {
1942                // Either the part-count check or the empty-part check can
1943                // fire first depending on the shape; both are acceptable.
1944                assert!(
1945                    msg.contains("empty") || msg.contains("exactly 4"),
1946                    "unexpected error message: {msg}"
1947                );
1948            } else {
1949                panic!("expected MalformedJson");
1950            }
1951        }
1952    }
1953
1954    /// `validate_embedded_json` must enforce the u16 constraint on
1955    /// `moduleVersion`, not just the presence check, so the guardrail
1956    /// applies to every path into `embed_package_metadata`.
1957    #[test]
1958    fn validate_embedded_json_rejects_bad_module_version() {
1959        let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.2.3.99999","name":"n","maintainer":"m","os":"linux","osVersion":"22.04"}"#;
1960        let err = validate_embedded_json(bad_json)
1961            .expect_err("out-of-range moduleVersion must be rejected");
1962        match err {
1963            ModuleInfoError::MalformedJson(msg) => {
1964                assert!(
1965                    msg.contains("moduleVersion"),
1966                    "error must name the field: {msg}"
1967                );
1968            }
1969            other => panic!("expected MalformedJson, got {other:?}"),
1970        }
1971    }
1972
1973    /// Drift guard: `PackageMetadata::field_value` covers every variant in
1974    /// `ModuleInfoField::ALL`, and every produced value matches the struct
1975    /// field serde serializes for the same JSON key. Catches the case where
1976    /// a new enum variant lands but `field_value` / the struct isn't
1977    /// extended.
1978    #[test]
1979    fn package_metadata_field_value_covers_all_variants() -> TestResult {
1980        let md = PackageMetadata {
1981            binary: "bv".into(),
1982            version: "vv".into(),
1983            module_version: "mv".into(),
1984            maintainer: "mn".into(),
1985            name: "nv".into(),
1986            module_type: "tv".into(),
1987            repo: "rv".into(),
1988            branch: "bn".into(),
1989            hash: "hv".into(),
1990            copyright: "cv".into(),
1991            os: "ov".into(),
1992            os_version: "ov2".into(),
1993        };
1994
1995        let json: serde_json::Value = serde_json::from_str(&serde_json::to_string(&md)?)?;
1996        for field in ModuleInfoField::ALL {
1997            let from_method = md.field_value(*field);
1998            let from_json = json
1999                .get(field.to_key())
2000                .and_then(|v| v.as_str())
2001                .unwrap_or_else(|| panic!("JSON missing key for {field:?}"));
2002            assert_eq!(
2003                from_method, from_json,
2004                "field_value and serde output disagree for {field:?}"
2005            );
2006        }
2007        Ok(())
2008    }
2009}