module_info/lib.rs
1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Embed metadata into ELF binaries as `.note.package` sections so it
5//! **survives crashes**, visible to `coredumpctl`, `readelf -n`, and any
6//! other consumer of the [systemd package-metadata
7//! format](https://uapi-group.org/specifications/specs/package_metadata_for_executable_files/).
8//! The crate's main feature is crash-dump preservation: when your process dies,
9//! the version of code that crashed is recoverable from the core dump without external
10//! symbol files or build-system context.
11//!
12//! Runtime read-back via the [`get_module_info!`] macro is a *convenience
13//! accessor*, useful while the process is still alive but not the reason
14//! the crate exists.
15//!
16//! Consumers call [`generate_project_metadata_and_linker_script`] from
17//! `build.rs` to generate the linker script and Cargo directives. At
18//! runtime, metadata fields can be read via [`get_module_info!`] (returns
19//! `ModuleInfoResult<String>` for a single field, or a `HashMap` of all
20//! readable fields when called with no arguments). On non-Linux platforms
21//! the crate exposes no-op stubs so cross-platform builds still compile;
22//! runtime accessors return `ModuleInfoError::NotAvailable`.
23//!
24//! See the README and the `examples/` directory for an end-to-end integration.
25//!
26//! # Limitations
27//!
28//! **`rlib` consumers read the host binary's metadata, not their own.**
29//! When a downstream library's `build.rs` calls
30//! [`generate_project_metadata_and_linker_script`], the resulting
31//! `cargo:rustc-link-arg=-T<linker_script>.ld` directive is attached to that
32//! library's own build and does not propagate to the final executable's link
33//! command, so the library's linker script never runs at the link step that
34//! produces the binary. Meanwhile, every [`get_module_info!`] call inside
35//! the library expands to an `extern "C" { static module_info_*: u8; }`
36//! declaration. At the final link those undefined references resolve
37//! against the executable's linker script, which defines a single set of
38//! `module_info_*` symbols pointing at the executable's `.note.package`
39//! payload, so library code reading them gets the executable's values. The
40//! same applies to anything statically linked into a Rust executable:
41//! `rlib`, `staticlib` linked via `#[link(kind = "static")]`, or in-tree
42//! workspace libraries.
43//!
44//! **`staticlib` consumed by an outer (non-cargo) build can embed its own
45//! metadata.** Set `EmbedOptions::emit_cargo_link_arg` to `false`; the
46//! crate then writes `linker_script.ld` to `out_dir` without emitting the
47//! `cargo:rustc-link-arg` directive. The outer build (Make, CMake, MSBuild,
48//! …) passes that script to its own linker, at which point the
49//! `module_info_*` symbols are defined by the staticlib's linker script and
50//! the staticlib reads its own metadata. See "Option B" in the README for
51//! the full flow.
52//!
53//! **`cdylib` shared libraries loaded via `dlopen` are not affected.** A
54//! `cdylib` runs its own link step and applies its own linker script, so
55//! the `module_info_*` symbols inside the resulting `.so` are local to it
56//! (not exported in the dynamic symbol table). Code inside the library
57//! reads its own metadata correctly, even when the host process's main
58//! executable also embeds `.note.package`. To consume a `cdylib`'s metadata
59//! at runtime, expose an `extern "C"` accessor and call it via `dlopen`;
60//! see `examples/sample_elf_bin_with_lib` for the full pattern. Reading a
61//! library file's metadata without loading it (e.g. for crash triage) is
62//! always possible by parsing the ELF note section from the `.so` on disk.
63//!
64//! **Little-endian targets only.** The ELF note header is serialized with
65//! `u32::to_le_bytes` at `build.rs` time. Supported targets today are
66//! `x86_64-unknown-linux-gnu`, `aarch64-unknown-linux-gnu`, and
67//! `i686-unknown-linux-gnu` (all little-endian). Cross-compiling for a
68//! big-endian Linux target (s390x, powerpc-be, mips-be) will silently emit
69//! a byte-swapped note section that `readelf -n` and `systemd-coredump`
70//! cannot parse. Adding big-endian support would mean selecting `to_le_bytes`
71//! vs `to_be_bytes` from `CARGO_CFG_TARGET_ENDIAN`.
72
73mod error;
74mod fields;
75// `#[macro_use]` makes the non-exported build-time helpers (note!/error!/
76// warn!/debug!) visible to sibling modules without exporting them.
77#[macro_use]
78mod macros;
79use cfg_if::cfg_if;
80pub use error::{ModuleInfoError, ModuleInfoResult};
81pub use fields::ModuleInfoField;
82
83cfg_if! {
84 if #[cfg(target_os = "linux")] {
85 use std::{env, path::{Path, PathBuf}};
86
87 mod constants;
88 mod metadata;
89 mod note_section;
90 mod utils;
91
92 pub use metadata::PackageMetadata;
93
94 pub(crate) use constants::*;
95 }
96}
97
98cfg_if! {
99 if #[cfg(all(feature = "embed-module-info", target_os = "linux"))] {
100 /// Static symbol that marks the beginning of our custom note section
101 ///
102 /// This empty array is placed in the .note.package section and serves as an anchor
103 /// for the linker script to place our metadata properly.
104 #[link_section = ".note.package"]
105 #[no_mangle]
106 #[used]
107 #[doc(hidden)]
108 pub static PACKAGE_NOTE_SECTION: [u8; 0] = [];
109
110 /// Force the `module_info` rlib to be linked into the consuming binary so the
111 /// `.note.package` section is emitted with ELF type `SHT_NOTE`.
112 ///
113 /// # Why this is needed
114 ///
115 /// The note data is produced by the linker script that `build.rs` generates.
116 /// GNU ld assigns `SHT_NOTE` to the output `.note.package` only when an
117 /// input object file contributes a same-named input section already typed
118 /// `SHT_NOTE`; this crate provides exactly that input section through the
119 /// `#[link_section = ".note.package"]` static `PACKAGE_NOTE_SECTION`.
120 /// Without a source-level reference to this crate, cargo/rustc drops the
121 /// `module_info` rlib from the final link, no `SHT_NOTE` input section is
122 /// present, and ld synthesizes the output section from the script's
123 /// `BYTE(...)` directives alone, which yields `SHT_PROGBITS`. The bytes
124 /// are present, but tools like `readelf -n` and `systemd-coredump` filter
125 /// by section type and ignore it.
126 ///
127 /// Invoking `module_info::embed!()` at the crate root creates a `#[used]`
128 /// reference to [`PACKAGE_NOTE_SECTION`], which forces the rlib to link and
129 /// restores the correct section type.
130 ///
131 /// # When to use it
132 ///
133 /// Use `embed!()` when the consuming crate does **not** call `get_module_info!`
134 /// or reference any other `module_info` item at runtime (pure build-time
135 /// embedding). When the consuming crate already calls
136 /// `module_info::get_module_info!(...)` or imports any item from the crate,
137 /// this macro is unnecessary; the rlib is already linked.
138 ///
139 /// # Example
140 ///
141 /// ```ignore
142 /// // Top of src/main.rs or src/lib.rs:
143 /// module_info::embed!();
144 ///
145 /// fn main() {
146 /// // No other module_info references needed for the .note.package
147 /// // section to end up in the binary with SHT_NOTE type.
148 /// }
149 /// ```
150 #[macro_export]
151 macro_rules! embed {
152 () => {
153 #[allow(dead_code)]
154 const _: () = {
155 #[used]
156 static __MODULE_INFO_FORCE_LINK: &'static [u8; 0] =
157 &$crate::PACKAGE_NOTE_SECTION;
158 };
159 };
160 }
161 } else if #[cfg(all(feature = "embed-module-info", not(target_os = "linux")))] {
162 /// No-op stub of `embed!` for non-Linux targets. Present so
163 /// cross-platform builds compile without `#[cfg]` guards at each call site.
164 #[macro_export]
165 macro_rules! embed {
166 () => {};
167 }
168 } else {
169 /// No-op stub of `embed!` for feature-off builds (the
170 /// `embed-module-info` feature is disabled). Present so a consumer that
171 /// uses `module_info` only for `get_version()` / `get_module_version()`
172 /// can still call `module_info::embed!()` in their crate root without a
173 /// feature-gated `#[cfg]` guard. The macro expands to nothing because
174 /// there is no note section to anchor when the feature is off.
175 #[macro_export]
176 macro_rules! embed {
177 () => {};
178 }
179 }
180}
181
182/// Options controlling how [`embed_package_metadata`] writes artifacts and
183/// whether it emits cargo link-arg directives.
184///
185/// `EmbedOptions::default()` preserves the original zero-config behavior:
186/// write to `$OUT_DIR` and emit `cargo:rustc-link-arg=-T<linker_script.ld>`.
187/// Override when the crate is a static library whose final link happens later
188/// in the outer build system.
189///
190/// # Non-exhaustive
191///
192/// This struct is `#[non_exhaustive]` so new options can land without a
193/// SemVer break. Use `..Default::default()` when constructing.
194///
195/// # Example
196/// ```rust,no_run
197/// # use module_info::EmbedOptions;
198/// // Static-library flow: write the linker script to a directory the outer
199/// // build system knows about, so it can pass the script to the final linker.
200/// // In practice `out_dir` comes from an env var the outer build sets, or a
201/// // subdirectory of `OUT_DIR`; here we use `env::temp_dir()` as a portable
202/// // placeholder. `EmbedOptions` is `#[non_exhaustive]`, so construct via
203/// // `Default` and assign fields rather than using struct-literal syntax.
204/// let mut opts = EmbedOptions::default();
205/// opts.out_dir = Some(std::env::temp_dir().join("module_info_linker"));
206/// opts.emit_cargo_link_arg = false;
207/// ```
208#[cfg(target_os = "linux")]
209#[derive(Debug, Clone)]
210#[non_exhaustive]
211pub struct EmbedOptions {
212 /// Directory where `linker_script.ld`, `note.package.bin`, and
213 /// `module_info.json` are written. When `None`, the `OUT_DIR` environment
214 /// variable is used (the normal cargo build-script case).
215 pub out_dir: Option<PathBuf>,
216
217 /// When `true`, emit `cargo:rustc-link-arg=-T<path-to-linker_script.ld>`
218 /// on stdout so cargo passes the script to the final link step.
219 ///
220 /// Set to `false` when the current crate is a static library whose final
221 /// link happens later in the outer build system. Have that system pass
222 /// the linker script to its own linker.
223 pub emit_cargo_link_arg: bool,
224}
225
226#[cfg(target_os = "linux")]
227impl Default for EmbedOptions {
228 fn default() -> Self {
229 Self {
230 out_dir: None,
231 emit_cargo_link_arg: true,
232 }
233 }
234}
235
236/// Artifacts written by [`embed_package_metadata`].
237///
238/// Returned so consumers can log, inspect, or pass paths to a later build
239/// step (for the static-library flow, typically `linker_script_path`).
240///
241/// # Non-exhaustive
242///
243/// `#[non_exhaustive]`. Constructed by the crate, not by consumers.
244#[cfg(target_os = "linux")]
245#[derive(Debug, Clone)]
246#[non_exhaustive]
247pub struct EmbedArtifacts {
248 /// Absolute path to the generated linker script (`linker_script.ld`).
249 pub linker_script_path: PathBuf,
250 /// Absolute path to the raw `.note.package` binary dump.
251 pub note_bin_path: PathBuf,
252 /// Absolute path to the embedded JSON metadata (`module_info.json`). One
253 /// key:value pair per line; matches the bytes the linker script writes
254 /// into the `.note.package` descriptor (see `json` below).
255 pub json_path: PathBuf,
256 /// JSON string written to `module_info.json` and embedded as the note
257 /// section's descriptor. One key:value pair per line (not strictly
258 /// "compact"); the runtime scan in `extract_module_info` tolerates the
259 /// embedded newlines.
260 pub json: String,
261 /// Byte-encoded linker script body that produced `linker_script.ld`.
262 pub linker_script_body: String,
263}
264
265/// Convenience struct-literal view over [`PackageMetadata`] with field names
266/// shaped like the JSON keys rather than the internal Rust snake_case names.
267///
268/// `Info` exists so call sites can read the same way the embedded JSON reads:
269/// `r#type`, `moduleVersion`, `osVersion` instead of `module_type`,
270/// `module_version`, `os_version`. It's deliberately **not** `#[non_exhaustive]`:
271/// struct-literal construction is the whole point. Pass it to [`new`] to build
272/// the note artifacts in one call:
273///
274/// # Forward compatibility
275///
276/// **Always terminate the struct literal with `..Default::default()`.** Unlike
277/// [`PackageMetadata`] (which is `#[non_exhaustive]` and forbids struct-literal
278/// construction from outside the crate, forcing consumers into the
279/// field-assignment pattern that is intrinsically forward-compatible), `Info`
280/// permits a fully-exhaustive literal. That means a minor release of this
281/// crate that adds a new field will break any `Info { … }` call site that
282/// listed every field by name. The `..Default::default()` terminator is how
283/// consumers buy forward compatibility: new fields fall back to their
284/// `Default` value (empty string / disabled) instead of failing to compile.
285/// This is the *only* reason `Info` is safe to add fields to in minor
286/// releases. Omit the terminator and the crate can no longer do that
287/// without breaking you.
288///
289/// ```rust,no_run
290/// # use module_info::Info;
291/// let _ = module_info::new(Info {
292/// binary: "my_tool".into(),
293/// name: "my_tool".into(),
294/// maintainer: "team@contoso.com".into(),
295/// version: "1.2.3".into(),
296/// moduleVersion: "1.2.3.4".into(),
297/// os: "linux".into(),
298/// osVersion: "22.04".into(),
299/// r#type: "agent".into(),
300/// hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
301/// ..Default::default()
302/// });
303/// ```
304///
305/// Under the hood `new` converts this to a [`PackageMetadata`] and calls
306/// [`embed_package_metadata`] with [`EmbedOptions::default()`].
307///
308/// # No auto-detection on this path
309///
310/// Every field in the `Info` literal ships verbatim. `os`/`osVersion` are
311/// **not** read from `/etc/os-release`, and `repo`/`branch`/`hash` are
312/// **not** read from git. The caller owns every value. If you want the
313/// `/etc/os-release` + git auto-detection that the zero-config entry point
314/// provides, reach for [`PackageMetadata::from_cargo_toml`] instead,
315/// mutate the fields you want to override, and pass the result to
316/// [`embed_package_metadata`].
317///
318/// # Disabling fields
319///
320/// Seven keys are required at validation time:
321/// `binary`, `version`, `moduleVersion`, `name`, `maintainer`, `os`, and
322/// `osVersion`. The rest (`r#type`, `repo`, `branch`, `hash`, `copyright`)
323/// may be left as the empty string (the `Default` value);
324/// `..Default::default()` in the literal above is the idiomatic way to
325/// opt out. The embedded JSON still carries every key (the
326/// `.note.package` layout is fixed), but the value ships as `""`, which
327/// downstream tooling can treat as "disabled."
328///
329/// # `r#type` tradeoff
330///
331/// The JSON key is `type`, which collides with Rust's `type` keyword. We use
332/// the raw-identifier form `r#type` rather than a `#[serde(rename = "type")]`
333/// alias on a differently-named field (say, `module_type`), because the
334/// latter would require call sites to remember the rename when constructing
335/// the struct literal, re-creating the original mismatch this type is meant
336/// to solve. `r#type` is ugly but pays off once: downstream construction
337/// reads `r#type: "agent".into()` and the JSON reads `"type":"agent"`.
338#[cfg(target_os = "linux")]
339#[allow(non_snake_case)] // JSON-key-shaped field names (moduleVersion, osVersion) are intentional.
340#[derive(Debug, Clone, Default)]
341pub struct Info {
342 /// Binary name (matches JSON key `binary`).
343 pub binary: String,
344 /// Crate version from Cargo.toml (matches JSON key `version`).
345 pub version: String,
346 /// Full 4-part module version (matches JSON key `moduleVersion`).
347 pub moduleVersion: String,
348 /// Maintainer contact information (matches JSON key `maintainer`).
349 pub maintainer: String,
350 /// Package name (matches JSON key `name`).
351 pub name: String,
352 /// Module type: agent, library, executable, etc. (matches JSON key `type`).
353 pub r#type: String,
354 /// Git repository name (matches JSON key `repo`).
355 pub repo: String,
356 /// Git branch name (matches JSON key `branch`).
357 pub branch: String,
358 /// Git commit hash (matches JSON key `hash`).
359 pub hash: String,
360 /// Copyright information (matches JSON key `copyright`).
361 pub copyright: String,
362 /// Operating system name (matches JSON key `os`).
363 pub os: String,
364 /// Operating system version (matches JSON key `osVersion`).
365 pub osVersion: String,
366}
367
368#[cfg(target_os = "linux")]
369impl From<Info> for PackageMetadata {
370 fn from(info: Info) -> Self {
371 PackageMetadata {
372 binary: info.binary,
373 version: info.version,
374 module_version: info.moduleVersion,
375 maintainer: info.maintainer,
376 name: info.name,
377 module_type: info.r#type,
378 repo: info.repo,
379 branch: info.branch,
380 hash: info.hash,
381 copyright: info.copyright,
382 os: info.os,
383 os_version: info.osVersion,
384 }
385 }
386}
387
388/// One-call entry point: convert [`Info`] → [`PackageMetadata`] and embed via
389/// [`embed_package_metadata`] with [`EmbedOptions::default()`].
390///
391/// Use this from `build.rs` when you want to supply metadata programmatically
392/// without touching `Cargo.toml` and don't need to override any
393/// [`EmbedOptions`] (custom `out_dir`, suppressed `cargo:rustc-link-arg`, …).
394/// For those cases, convert `Info` to `PackageMetadata` with `.into()` and
395/// call [`embed_package_metadata`] directly.
396///
397/// # Errors
398/// Propagates everything [`embed_package_metadata`] can return, plus
399/// `ModuleInfoError::MalformedJson` if `moduleVersion` is not four
400/// dot-separated numeric parts that each fit in a `u16`.
401#[cfg(target_os = "linux")]
402#[must_use = "new returns EmbedArtifacts; discarding it hides both the written paths and any I/O errors"]
403pub fn new(info: Info) -> ModuleInfoResult<EmbedArtifacts> {
404 embed_package_metadata(&info.into(), &EmbedOptions::default())
405}
406
407/// Validate that `module_version` is exactly four dot-separated numeric parts,
408/// each of which fits in a `u16` (0..=65535).
409///
410/// This mirrors the Windows `VS_FIXEDFILEINFO::FILEVERSION` shape (four
411/// `WORD`-sized components) that Windows-style crash consumers expect to
412/// parse. An out-of-range value silently truncating on the consumer side
413/// would be worse than failing the build, so we enforce the range at embed
414/// time.
415#[cfg(target_os = "linux")]
416fn validate_module_version(module_version: &str) -> ModuleInfoResult<()> {
417 let parts: Vec<&str> = module_version.split('.').collect();
418 if parts.len() != 4 {
419 return Err(ModuleInfoError::MalformedJson(format!(
420 "moduleVersion must have exactly 4 dot-separated parts, got {} in {module_version:?}",
421 parts.len()
422 )));
423 }
424 for (i, part) in parts.iter().enumerate() {
425 if part.is_empty() {
426 return Err(ModuleInfoError::MalformedJson(format!(
427 "moduleVersion part {i} is empty in {module_version:?}"
428 )));
429 }
430 if part.parse::<u16>().is_err() {
431 return Err(ModuleInfoError::MalformedJson(format!(
432 "moduleVersion part {i} ({part:?}) must be a non-negative integer \
433 that fits in 16 bits (0..=65535) in {module_version:?}"
434 )));
435 }
436 }
437 Ok(())
438}
439
440/// Embed a [`PackageMetadata`] value into ELF note artifacts on disk.
441///
442/// Consumers that want to supply metadata programmatically (e.g. from
443/// `build.rs` without editing `Cargo.toml`) or suppress the
444/// `cargo:rustc-link-arg` directive (e.g. a static library whose final link
445/// happens in a later build step) call this directly; the zero-config
446/// [`generate_project_metadata_and_linker_script`] is a thin wrapper over
447/// this function with the default options.
448///
449/// # Errors
450/// Returns `ModuleInfoError::MetadataTooLarge` if the serialized JSON exceeds
451/// the 1 KiB `.note.package` payload limit, or `ModuleInfoError::MalformedJson`
452/// if a required field is missing. `IoError` on filesystem failures.
453#[cfg(target_os = "linux")]
454#[must_use = "embed_package_metadata returns EmbedArtifacts; discarding it hides both the written paths and any I/O errors"]
455pub fn embed_package_metadata(
456 md: &PackageMetadata,
457 opts: &EmbedOptions,
458) -> ModuleInfoResult<EmbedArtifacts> {
459 // Emit rerun directives *before* any failure path. Emitting any `cargo:`
460 // directive opts out of cargo's default "rerun on any file change", so
461 // without these the build script wouldn't re-run when Cargo.toml, git
462 // HEAD, or env vars change. Stamped metadata would silently go stale.
463 emit_rerun_if_directives();
464
465 let (compact_json, linker_script_body) = metadata::render_note_payloads(md)?;
466
467 validate_embedded_json(&compact_json)?;
468
469 note!();
470 note!("-- Module Info --");
471 emit_metadata_notes(&compact_json);
472
473 let out_dir: PathBuf = match &opts.out_dir {
474 Some(p) => p.clone(),
475 None => PathBuf::from(env::var("OUT_DIR")?),
476 };
477 debug!("OUT_DIR: {}", out_dir.display());
478
479 std::fs::create_dir_all(&out_dir)?;
480 // `.ld.inc` signals include fragment (no SECTIONS/INSERT wrapper;
481 // inlined inside linker_script.ld).
482 let linker_script_body_path = out_dir.join("linker_script_body.ld.inc");
483 debug!(
484 "Writing linker script body to: {}",
485 linker_script_body_path.display()
486 );
487 // Header comment + trim of leading blank line prevents the standalone file
488 // from looking like a truncated linker script.
489 let linker_script_body_on_disk = format!(
490 "/* Linker-script fragment. Inlined inside linker_script.ld; not a standalone script. */\n{}",
491 linker_script_body.trim_start_matches('\n')
492 );
493 std::fs::write(
494 &linker_script_body_path,
495 linker_script_body_on_disk.as_bytes(),
496 )?;
497
498 let json_path = out_dir.join("module_info.json");
499 debug!("Writing module info to: {}", json_path.display());
500 std::fs::write(&json_path, compact_json.as_bytes())?;
501
502 // Descriptor must include the same NUL padding the linker script emits
503 // after the JSON (see `render_note_payloads`); otherwise `descsz` covers
504 // only JSON bytes while the section includes padding, and `readelf -n`
505 // warns "Corrupt note: only N bytes remain".
506 let padding = NOTE_ALIGN - (compact_json.len() % NOTE_ALIGN);
507 let mut descriptor = String::with_capacity(compact_json.len() + padding);
508 descriptor.push_str(&compact_json);
509 for _ in 0..padding {
510 descriptor.push('\0');
511 }
512
513 let note = note_section::NoteSection::new(
514 N_TYPE,
515 OWNER,
516 &descriptor,
517 &linker_script_body,
518 NOTE_ALIGN,
519 )?;
520 debug!(
521 "Created note section with {} bytes of data",
522 note.note_section.len()
523 );
524
525 // Strip the leading `.` so the dump isn't a dotfile hidden by default.
526 let note_bin_path = out_dir.join(format!("{}.bin", NOTE_SECTION_NAME.trim_start_matches('.')));
527 debug!("Saving binary note section to: {}", note_bin_path.display());
528 note.save_section(¬e_bin_path)?;
529
530 debug!("Saving linker script...");
531 let linker_script_path = note.save_linker_script(&out_dir)?;
532 debug!("Linker script saved to: {}", linker_script_path.display());
533
534 match link_arg_directive(&linker_script_path, opts.emit_cargo_link_arg) {
535 Some(d) => {
536 debug!("Adding cargo directive: {}", d);
537 println!("{d}");
538 }
539 None => {
540 debug!(
541 "emit_cargo_link_arg=false: caller will pass {} to the final linker",
542 linker_script_path.display()
543 );
544 }
545 }
546
547 Ok(EmbedArtifacts {
548 linker_script_path,
549 note_bin_path,
550 json_path,
551 json: compact_json,
552 linker_script_body,
553 })
554}
555
556/// Validate the serialized metadata JSON: size limit, object shape, required fields.
557#[cfg(target_os = "linux")]
558fn validate_embedded_json(desc_json: &str) -> ModuleInfoResult<()> {
559 if desc_json.len() > constants::MAX_JSON_SIZE {
560 return Err(ModuleInfoError::MetadataTooLarge(format!(
561 "Metadata size {} exceeds limit of {} bytes",
562 desc_json.len(),
563 constants::MAX_JSON_SIZE
564 )));
565 }
566
567 let value: serde_json::Value = serde_json::from_str(desc_json)
568 .map_err(|e| ModuleInfoError::MalformedJson(e.to_string()))?;
569
570 if !value.is_object() {
571 return Err(ModuleInfoError::MalformedJson(
572 "Metadata must be a JSON object".to_string(),
573 ));
574 }
575
576 for field in constants::REQUIRED_JSON_KEYS {
577 // `PackageMetadata` derives `Serialize` with no skip_if, so every key
578 // is always present in the JSON; a bare `is_none()` check here would
579 // pass a `PackageMetadata::default()` value through untouched. Treat
580 // both "missing key" and "empty string value" as missing so a
581 // Default-constructed `PackageMetadata` with a forgotten required
582 // field fails the build instead of silently embedding `""`.
583 let present_and_nonempty = value
584 .get(field)
585 .and_then(|v| v.as_str())
586 .map(|s| !s.is_empty())
587 .unwrap_or(false);
588 if !present_and_nonempty {
589 return Err(ModuleInfoError::MalformedJson(format!(
590 "Required field '{field}' is missing or empty"
591 )));
592 }
593 }
594
595 // `moduleVersion` is a required key and the loop above has already
596 // rejected any payload where it's missing, non-string, or empty. Fetch
597 // it unconditionally here; an `if let Some(...) = ...` arm would silently
598 // no-op if a future refactor ever split the required-keys check from the
599 // presence check, letting malformed payloads slip through a code path
600 // that is supposed to be the range guardrail. Using `.ok_or_else` makes
601 // the dependency on the loop above load-bearing and visible.
602 let mv = value
603 .get("moduleVersion")
604 .and_then(|v| v.as_str())
605 .ok_or_else(|| {
606 ModuleInfoError::MalformedJson(
607 "moduleVersion must be a non-empty string by this point (required-keys check above enforces it)"
608 .to_string(),
609 )
610 })?;
611 validate_module_version(mv)?;
612
613 Ok(())
614}
615
616/// Print metadata key/value pairs as cargo `note!` lines in a stable order.
617#[cfg(target_os = "linux")]
618fn emit_metadata_notes(desc_json: &str) {
619 // Presentation step only; validation already ran. Log via `debug!` so
620 // `MODULE_INFO_DEBUG=true` reveals why the notes pane is empty on the
621 // impossible case of a parse failure slipping through.
622 let map = match serde_json::from_str::<serde_json::Value>(desc_json) {
623 Ok(serde_json::Value::Object(map)) => map,
624 Ok(other) => {
625 debug!("emit_metadata_notes: expected a JSON object, got {}", other);
626 return;
627 }
628 Err(e) => {
629 debug!("emit_metadata_notes: JSON parse failed: {}", e);
630 return;
631 }
632 };
633
634 // Walk `ModuleInfoField::ALL` for stable order. No extra-keys fallback
635 // needed: `PackageMetadata` is `#[non_exhaustive]`, so the key set is
636 // always exactly `ModuleInfoField::ALL`.
637 for field in ModuleInfoField::ALL {
638 let key = field.to_key();
639 if let Some(value) = map.get(key) {
640 match value.as_str() {
641 Some(s) => note!("{}: {}", key, s),
642 None => note!("{}: {}", key, value.to_string()),
643 }
644 }
645 }
646}
647
648/// Format the `cargo:rustc-link-arg=-T<path>` directive, or `None` when the
649/// caller opted out via `EmbedOptions::emit_cargo_link_arg = false`. Free
650/// function so tests can observe the gating without capturing stdout.
651#[cfg(target_os = "linux")]
652fn link_arg_directive(linker_script_path: &Path, emit: bool) -> Option<String> {
653 if emit {
654 Some(format!(
655 "cargo:rustc-link-arg=-T{}",
656 linker_script_path.display()
657 ))
658 } else {
659 None
660 }
661}
662
663/// Emit `cargo:rerun-if-changed` / `cargo:rerun-if-env-changed` directives
664/// covering the inputs this crate reads.
665///
666/// Cargo's default behavior is "rerun build.rs on any file change in the
667/// crate directory." Emitting *any* `cargo:` directive (we emit
668/// `rustc-link-arg` further down) flips cargo into explicit-only mode,
669/// after which it reruns only when a path/env we name here changes. So we
670/// have to list every input, or builds silently reuse stale stamped metadata.
671///
672/// What we cover:
673/// - `Cargo.toml` - `[package]` version/name + `[package.metadata.module_info]`
674/// - `build.rs` - the caller's build script itself
675/// - `.git/HEAD` + `.git/refs` - so branch switches and new commits retrigger
676/// the git-derived fields (`branch`, `hash`, `repo`)
677/// - `/etc/os-release` - so a distro upgrade retriggers `os` / `osVersion`
678/// - `MODULE_INFO_DEBUG` - the crate's own debug knob
679/// - `CARGO_PKG_*` env vars - Cargo sets these from Cargo.toml so they're
680/// technically redundant with `rerun-if-changed=Cargo.toml`, but listing
681/// them is cheap and removes a foot-gun if a caller ever sets them
682/// externally.
683///
684/// What we *don't* cover: caller-custom env vars (e.g. `BUILD_BUILDNUMBER`
685/// named via `[package.metadata.module_info].version_env_var_name`). The
686/// zero-config path emits those itself in `collect_package_metadata` because
687/// only that path knows the names. Builder-API consumers that read arbitrary
688/// env vars must emit their own `cargo:rerun-if-env-changed=<name>` for
689/// each, the crate can't guess.
690#[cfg(target_os = "linux")]
691fn emit_rerun_if_directives() {
692 // Paths the crate reads during build. Using forward-slash relative paths
693 // makes these valid on all Cargo-supported hosts; the git and
694 // os-release watches silently no-op when the path doesn't exist (e.g.
695 // building a tarballed source tree, or on a non-Linux host).
696 for path in [
697 "Cargo.toml",
698 "build.rs",
699 ".git/HEAD",
700 ".git/refs",
701 ".git/packed-refs",
702 "/etc/os-release",
703 ] {
704 println!("cargo:rerun-if-changed={path}");
705 }
706
707 // Env vars the crate itself reads. Custom ones named in Cargo.toml are
708 // handled in `collect_package_metadata`.
709 for env_var in ["MODULE_INFO_DEBUG", "CARGO_PKG_NAME", "CARGO_PKG_VERSION"] {
710 println!("cargo:rerun-if-env-changed={env_var}");
711 }
712}
713
714/// Zero-configuration build-script entry point.
715///
716/// Reads metadata from `Cargo.toml`, env overrides, git, and OS release info,
717/// then embeds it via [`embed_package_metadata`] with
718/// [`EmbedOptions::default()`]. Reach for [`embed_package_metadata`] directly
719/// when you need to supply metadata programmatically or suppress the
720/// `cargo:rustc-link-arg` directive.
721///
722/// # IMPORTANT
723/// Only call from `build.rs`. Cargo sets `OUT_DIR` and related variables for
724/// build scripts; outside that context the call will fail.
725///
726/// # Example
727/// ```rust,no_run
728/// // In build.rs
729/// fn main() -> Result<(), Box<dyn std::error::Error>> {
730/// module_info::generate_project_metadata_and_linker_script()?;
731/// Ok(())
732/// }
733/// ```
734///
735/// # Errors
736/// Returns an error if metadata generation or file operations fail.
737#[cfg(target_os = "linux")]
738#[must_use = "build.rs must propagate errors from this function, otherwise a missing linker script will silently break the ELF note section"]
739pub fn generate_project_metadata_and_linker_script() -> Result<(), Box<dyn std::error::Error>> {
740 let md = PackageMetadata::from_cargo_toml().map_err(|e| {
741 error!("Failed to get project metadata: {}", e);
742 e
743 })?;
744 // Named binding (not `let _`) so `#[must_use]` keeps firing on future
745 // signature changes; paths below are useful in build logs.
746 let artifacts = embed_package_metadata(&md, &EmbedOptions::default())?;
747 debug!(
748 "Wrote linker script: {}",
749 artifacts.linker_script_path.display()
750 );
751 Ok(())
752}
753
754#[cfg(not(target_os = "linux"))]
755#[must_use = "build.rs must propagate errors from this function, otherwise a missing linker script will silently break the ELF note section"]
756pub fn generate_project_metadata_and_linker_script() -> Result<(), Box<dyn std::error::Error>> {
757 Ok(())
758}
759
760/// Prints all available module info to stdout and returns a result indicating success or failure
761///
762/// This utility function retrieves all embedded module information and
763/// outputs it to the console with labels. It's useful for debugging or displaying
764/// version information in command-line tools.
765///
766/// # Examples
767///
768/// Basic usage with simple error handling:
769/// ```rust,no_run
770/// if module_info::print_module_info().is_ok() {
771/// println!("Module info displayed successfully");
772/// }
773/// ```
774///
775/// Error handling:
776/// ```rust,no_run
777/// use module_info::{print_module_info, ModuleInfoError};
778///
779/// match print_module_info() {
780/// Ok(_) => println!("Module info displayed successfully"),
781/// Err(ModuleInfoError::NotAvailable(msg)) => eprintln!("Module info not available: {}", msg),
782/// Err(e) => eprintln!("Failed to display module info: {}", e),
783/// }
784/// ```
785///
786/// # Errors
787///
788/// This function will return an error in the following situations:
789/// - If any of the seven required identity-plus-platform fields (`binary`,
790/// `version`, `moduleVersion`, `name`, `maintainer`, `os`, `osVersion`) is
791/// missing or empty, suggesting the metadata is missing or corrupted
792/// (returns `ModuleInfoError::NotAvailable`)
793/// - If running on a non-Linux platform where module info isn't supported (returns `ModuleInfoError::NotAvailable`)
794///
795/// # Note
796/// This function is only available when the "embed-module-info" feature is
797/// enabled *and* the target OS is Linux. On other platforms the function
798/// exists as a no-op stub that returns `NotAvailable`, matching the
799/// non-Linux `get_module_info!` macro behavior so cross-platform callers
800/// compile unchanged.
801#[cfg(all(feature = "embed-module-info", target_os = "linux"))]
802#[must_use = "print_module_info returns a Result indicating whether the embedded note section was readable; ignoring it will hide missing-metadata errors"]
803pub fn print_module_info() -> ModuleInfoResult<()> {
804 // Delegate to the `get_module_info!()` macro: it handles the extern-static
805 // declarations, the per-field `extract_module_info` call, platform gating,
806 // and error swallowing for individual fields. On non-Linux it returns
807 // `NotAvailable` directly, which propagates via `?`.
808 let info = get_module_info!()?;
809
810 // Optional fields may legitimately be empty (see README "Disabling fields"),
811 // so only required keys are checked here.
812 let missing: Vec<&str> = constants::REQUIRED_JSON_KEYS
813 .iter()
814 .filter(|key| info.get(**key).map_or(true, |v| v.is_empty()))
815 .copied()
816 .collect();
817 if !missing.is_empty() {
818 return Err(ModuleInfoError::NotAvailable(format!(
819 "Module info appears to be missing or corrupted: required field(s) missing or empty: {}",
820 missing.join(", ")
821 )));
822 }
823
824 for field in ModuleInfoField::ALL {
825 let key = field.to_key();
826 match info.get(key) {
827 Some(value) => println!("{key}: {value}"),
828 None => println!("{key}: <unavailable>"),
829 }
830 }
831 Ok(())
832}
833
834/// Non-Linux stub: the embedded note section only exists on Linux, so there's
835/// nothing to read. Returns `NotAvailable` with a platform-specific message,
836/// matching the non-Linux `get_module_info!` macro so cross-platform callers
837/// don't need their own `#[cfg]` gate.
838#[cfg(any(not(feature = "embed-module-info"), not(target_os = "linux")))]
839#[must_use = "print_module_info returns a Result indicating whether the embedded note section was readable; ignoring it will hide missing-metadata errors"]
840pub fn print_module_info() -> ModuleInfoResult<()> {
841 Err(ModuleInfoError::NotAvailable(
842 "Module info is only available on Linux platforms with the embed-module-info feature enabled.".to_string(),
843 ))
844}
845
846/// Returns the embedded `version` field (from `Cargo.toml`'s `package.version`
847/// or `version_env_var_name`) as a `String`.
848///
849/// Thin wrapper around `get_module_info!(ModuleInfoField::Version)`. See the
850/// crate-level "Limitations" section for shared-library symbol-resolution
851/// caveats.
852///
853/// # Errors
854///
855/// Returns `ModuleInfoError::NotAvailable` on non-Linux targets or when the
856/// `embed-module-info` feature is not enabled. Returns `NullPointer`,
857/// `Utf8Error`, or `MalformedJson` if the note section is missing or corrupt.
858#[cfg(feature = "embed-module-info")]
859#[must_use = "get_version returns the embedded version string; discarding it hides missing-metadata errors"]
860pub fn get_version() -> ModuleInfoResult<String> {
861 get_module_info!(ModuleInfoField::Version)
862}
863
864/// Returns the embedded `moduleVersion` field (a 4-part identifier typically
865/// produced by the build pipeline; see `module_version_env_var_name` in
866/// `Cargo.toml`'s `[package.metadata.module_info]`).
867///
868/// See [`get_version`] for symbol-resolution and error semantics.
869#[cfg(feature = "embed-module-info")]
870#[must_use = "get_module_version returns the embedded 4-part module version; discarding it hides missing-metadata errors"]
871pub fn get_module_version() -> ModuleInfoResult<String> {
872 get_module_info!(ModuleInfoField::ModuleVersion)
873}
874
875/// Extract a single module-info field from a linker-script-placed symbol.
876///
877/// Reads a JSON string value (`"..."`) starting at `ptr`, terminated by NUL,
878/// and returns the bytes between the first two `"` characters.
879///
880/// Prefer the [`get_module_info!`] macro: it declares the matching extern
881/// static and forwards its address here, so the caller never holds a raw
882/// pointer.
883///
884/// # Safety
885/// `ptr` must point to a valid, properly aligned, null-terminated byte
886/// sequence inside the read-only `.note.package` payload (i.e. the address
887/// of one of the `module_info_*` symbols emitted by the linker script). The
888/// memory must remain valid for the duration of the call. Passing any other
889/// pointer is undefined behavior. The internal scan is bounded by
890/// `MAX_JSON_SIZE + NOTE_ALIGN`, so a missing/corrupted section produces
891/// `MalformedJson` rather than reading off the end.
892///
893/// # Errors
894/// - `ModuleInfoError::NullPointer` if `ptr` is null
895/// - `ModuleInfoError::Utf8Error` if the bytes are not valid UTF-8
896/// - `ModuleInfoError::MalformedJson` if the section is missing/stripped or
897/// the value is not surrounded by `"` characters
898/// - `ModuleInfoError::NotAvailable` on non-Linux targets
899///
900/// # Example
901/// ```rust,no_run
902/// use module_info::{get_module_info, ModuleInfoField, ModuleInfoResult};
903/// let binary: ModuleInfoResult<String> = get_module_info!(ModuleInfoField::Binary);
904/// ```
905///
906/// Available only when the `embed-module-info` feature is enabled on Linux.
907#[cfg(all(feature = "embed-module-info", target_os = "linux"))]
908#[must_use = "extract_module_info returns the parsed field value; discarding it defeats the point of calling it"]
909pub unsafe fn extract_module_info(ptr: *const u8) -> ModuleInfoResult<String> {
910 if ptr.is_null() {
911 return Err(ModuleInfoError::NullPointer);
912 }
913
914 // Single-pass scan: walk forward looking for the opening `"` and then
915 // the closing `"` of the JSON value. Exits as soon as both are found,
916 // so a healthy field read costs O(value_len) rather than walking the
917 // entire `.note.package` payload to the trailing NUL. The cap still
918 // bounds the worst case so a stripped/missing/corrupted section can't
919 // read off the end of the mapped region.
920 //
921 // Why `MAX_JSON_SIZE + NOTE_ALIGN`: pre-refactor the scan went all
922 // the way to NUL (the JSON body up to `MAX_JSON_SIZE` plus the
923 // `1..=NOTE_ALIGN` padding). The new short-circuit only walks to the
924 // closing `"`, but keeping the same upper bound preserves the prior
925 // worst-case safety margin at no correctness cost.
926 const MAX_NOTE_VALUE_LEN: usize = constants::MAX_JSON_SIZE + constants::NOTE_ALIGN;
927
928 // SAFETY: Caller (via `get_module_info!`) passes the address of an
929 // `extern "C" static: u8` placed by the linker script inside the
930 // `.note.package` payload (read-only for program lifetime, never
931 // mutated). The loop is bounded by `MAX_NOTE_VALUE_LEN`, so a
932 // stripped/missing/corrupted section produces an error rather than
933 // walking off the end of the mapped region.
934 let mut open_quote: Option<usize> = None;
935 for i in 0..MAX_NOTE_VALUE_LEN {
936 let byte = unsafe { *ptr.add(i) };
937 if byte == 0 {
938 // NUL inside the value means the section is truncated or
939 // malformed (sanitization strips embedded NULs from every
940 // embedded value at build time). Distinguish "no opening
941 // quote yet" from "opening found, missing closing" so a
942 // stripped/zeroed memory region is easier to triage than a
943 // payload that just lost its trailing `"`.
944 let message = if open_quote.is_none() {
945 "Unexpected NUL before opening quote of JSON value"
946 } else {
947 "Unexpected NUL before closing quote of JSON value"
948 };
949 return Err(ModuleInfoError::MalformedJson(message.to_string()));
950 }
951 if byte == b'"' {
952 match open_quote {
953 None => open_quote = Some(i),
954 Some(open) => {
955 // Found both quotes. Bytes between are the value.
956 // Sanitization strips `"` and `\` from values at embed
957 // time, so a direct slice between the quotes is
958 // sufficient (no JSON escapes to unescape).
959 let len = i - open - 1;
960 let bytes = unsafe { std::slice::from_raw_parts(ptr.add(open + 1), len) };
961 let value = std::str::from_utf8(bytes)?;
962 return Ok(value.to_string());
963 }
964 }
965 }
966 }
967
968 // Cap hit without finding both quotes. Branch on `open_quote` so the
969 // diagnostic distinguishes "no opening quote ever seen" (section
970 // absent, stripped, or zeroed) from "opening found but trailing `"`
971 // missing" (truncation mid-value). Both still imply a build-vs-runtime
972 // mismatch worth surfacing in core dumps, but the cause is different.
973 let detail = if open_quote.is_none() {
974 "no opening quote found"
975 } else {
976 "opening quote found but no closing quote"
977 };
978 Err(ModuleInfoError::MalformedJson(format!(
979 "{detail} within {MAX_NOTE_VALUE_LEN} bytes; \
980 .note.package section is missing, stripped, or corrupted"
981 )))
982}
983
984/// Non-Linux stub of [`extract_module_info`]. Always returns
985/// `ModuleInfoError::NotAvailable`. The ELF `.note.package` section this
986/// reads only exists on Linux, so there's nothing to extract.
987///
988/// # Safety
989/// No safety requirements on this platform: the pointer is never dereferenced
990/// (the function returns before touching it). The `unsafe` qualifier is kept
991/// only so the signature matches the Linux implementation, letting
992/// cross-platform callers use a single call site.
993#[cfg(all(feature = "embed-module-info", not(target_os = "linux")))]
994#[must_use = "extract_module_info returns the parsed field value; discarding it defeats the point of calling it"]
995pub unsafe fn extract_module_info(_ptr: *const u8) -> ModuleInfoResult<String> {
996 Err(ModuleInfoError::NotAvailable(
997 "Extract module info is only available on Linux platforms with embed-module-info feature."
998 .to_string(),
999 ))
1000}
1001
1002#[cfg(all(test, target_os = "linux"))]
1003mod tests {
1004 use std::{error::Error, fs::File, io::Read, path::Path};
1005
1006 use tempfile::NamedTempFile;
1007
1008 use super::*;
1009
1010 /// Shorthand for tests that propagate with `?`. `Result<(), Box<dyn Error>>`
1011 /// lets us replace `.expect(...)` with `?` and keeps the test module free
1012 /// of the workspace-wide `clippy::disallowed_methods` ban on `expect`.
1013 type TestResult = Result<(), Box<dyn Error>>;
1014
1015 /// Test-only helper: returns true when `git --version` runs cleanly on
1016 /// the test host. Tests that depend on a real git checkout (branch/hash
1017 /// lookup, repo-name parsing) skip gracefully when this returns false so
1018 /// the suite stays green in stripped-down CI images. Lives inside the
1019 /// tests module rather than in `utils.rs` so `#[cfg(test)]` doesn't have
1020 /// to be scattered across production files.
1021 fn git_is_available() -> bool {
1022 match std::process::Command::new("git")
1023 .arg("--version")
1024 .stdin(std::process::Stdio::null())
1025 .output()
1026 {
1027 Ok(output) => output.status.success(),
1028 Err(_) => false,
1029 }
1030 }
1031
1032 #[cfg(feature = "embed-module-info")]
1033 #[test]
1034 #[allow(clippy::unnecessary_cast)]
1035 fn test_extract_module_info() -> TestResult {
1036 let test_str = "\"test_value\"";
1037 let c_str = std::ffi::CString::new(test_str)?;
1038 let ptr = c_str.as_ptr() as *const u8;
1039 // SAFETY: This is safe because we're creating a valid null-terminated C string
1040 // using std::ffi::CString which guarantees that the pointer is valid and properly
1041 // null-terminated for the duration of this function call
1042 let value = unsafe { extract_module_info(ptr) }?;
1043 assert_eq!(value, "test_value");
1044 Ok(())
1045 }
1046
1047 /// Lock in the early-exit refactor: a value followed by a long
1048 /// non-NUL trailer must still return only the bytes between the
1049 /// quotes. Pre-refactor the function walked all the way to NUL; the
1050 /// new implementation should never read past the closing quote, so
1051 /// the trailer never affects the parsed value.
1052 #[cfg(feature = "embed-module-info")]
1053 #[test]
1054 fn extract_module_info_stops_at_closing_quote() -> TestResult {
1055 // `"hello",\n"version":..." then NUL: a snapshot of how the
1056 // bytes look in a real `.note.package` payload between fields.
1057 let bytes: Vec<u8> = b"\"hello\",\n\"version\":\"1.2.3\"\0".to_vec();
1058 // SAFETY: the vec lives for the duration of the `unsafe` block
1059 // and is NUL-terminated within MAX_NOTE_VALUE_LEN.
1060 let value = unsafe { extract_module_info(bytes.as_ptr()) }?;
1061 assert_eq!(
1062 value, "hello",
1063 "scan must stop at the closing quote, not walk past into the next field"
1064 );
1065 Ok(())
1066 }
1067
1068 /// A NUL byte before any quote (e.g., the section was stripped or
1069 /// the symbol resolved against zeroed memory) must surface as
1070 /// `MalformedJson`, not silently return an empty string.
1071 #[cfg(feature = "embed-module-info")]
1072 #[test]
1073 fn extract_module_info_rejects_leading_nul() {
1074 let bytes: [u8; 4] = [0, 0, 0, 0];
1075 match unsafe { extract_module_info(bytes.as_ptr()) } {
1076 Err(ModuleInfoError::MalformedJson(msg)) => assert!(
1077 msg.contains("NUL"),
1078 "error must mention the NUL trigger: {msg}"
1079 ),
1080 other => panic!("expected MalformedJson(...NUL...), got {other:?}"),
1081 }
1082 }
1083
1084 /// A buffer with an opening quote but no closing quote within the
1085 /// cap should report the cap-hit diagnostic, not a generic "missing
1086 /// quote" error. This is the path that fires when the section was
1087 /// stripped from the binary at link time.
1088 #[cfg(feature = "embed-module-info")]
1089 #[test]
1090 fn extract_module_info_reports_cap_on_runaway_scan() {
1091 // 2 KB of `'a'` bytes (well over MAX_JSON_SIZE = 1024 +
1092 // NOTE_ALIGN = 4) with one opening quote at byte 0 and no
1093 // closing quote anywhere in the cap.
1094 let mut bytes = vec![b'a'; 2048];
1095 bytes[0] = b'"';
1096 match unsafe { extract_module_info(bytes.as_ptr()) } {
1097 Err(ModuleInfoError::MalformedJson(msg)) => assert!(
1098 msg.contains("missing, stripped, or corrupted"),
1099 "cap-hit error must keep the diagnostic phrasing: {msg}"
1100 ),
1101 other => panic!("expected MalformedJson(...corrupted...), got {other:?}"),
1102 }
1103 }
1104
1105 #[test]
1106 fn test_align_len() {
1107 assert_eq!(utils::align_len(5, NOTE_ALIGN), 8);
1108 assert_eq!(utils::align_len(8, NOTE_ALIGN), 8);
1109 assert_eq!(utils::align_len(9, NOTE_ALIGN), 12);
1110 }
1111
1112 /// Locks in the saturating-overflow contract for `align_len`: when
1113 /// `len + (align - 1)` would overflow `u32`, the function must
1114 /// saturate to `u32::MAX` (so downstream size checks notice) rather
1115 /// than wrap to a value below `len` (which the older naive
1116 /// implementation did, silently corrupting the `.note.package`
1117 /// layout). Without this test the `None => u32::MAX` arm is dead
1118 /// code per llvm-cov.
1119 #[test]
1120 fn align_len_saturates_on_u32_overflow() {
1121 // u32::MAX + 3 (mask for align=4) overflows; must saturate.
1122 assert_eq!(utils::align_len(u32::MAX, 4), u32::MAX);
1123 // u32::MAX is already aligned to 1, so the add doesn't overflow
1124 // there; pick a value where the carry actually fires.
1125 assert_eq!(utils::align_len(u32::MAX - 1, 4), u32::MAX);
1126 }
1127
1128 /// `NoteSection::new` rejects any owner string whose name+NUL
1129 /// length is below 4 bytes. The check is a guard against a swapped
1130 /// or empty owner accidentally producing a malformed note in
1131 /// release-mode build scripts (where `debug_assert!` would no-op).
1132 /// Without this test the `n_namesz < 4` arm is dead code per
1133 /// llvm-cov.
1134 #[test]
1135 fn note_section_rejects_short_owner() {
1136 use crate::note_section::NoteSection;
1137 // Empty owner: namesz = 0 + 1 (NUL) = 1, < 4.
1138 // `NoteSection` doesn't impl `Debug`, so `.expect_err(...)` is
1139 // unavailable and we have to match explicitly.
1140 match NoteSection::new(N_TYPE, "", "desc", "", NOTE_ALIGN) {
1141 Err(ModuleInfoError::Other(boxed)) => assert!(
1142 boxed.to_string().contains("n_namesz"),
1143 "diagnostic must name the field: {boxed}"
1144 ),
1145 Err(other) => panic!("expected Other(...n_namesz...), got {other:?}"),
1146 Ok(_) => panic!("empty owner must be rejected"),
1147 }
1148 // Two-byte owner: namesz = 2 + 1 = 3, still < 4.
1149 match NoteSection::new(N_TYPE, "AB", "desc", "", NOTE_ALIGN) {
1150 Err(ModuleInfoError::Other(_)) => {}
1151 Err(other) => panic!("expected Other(_), got {other:?}"),
1152 Ok(_) => panic!("two-byte owner must be rejected"),
1153 }
1154 }
1155
1156 /// `validate_embedded_json` rejects payloads larger than
1157 /// `MAX_JSON_SIZE`. The cap exists because the `.note.package`
1158 /// payload limit is documented as 1 KiB; without this test the
1159 /// `MetadataTooLarge` arm of `embed_package_metadata`'s call to
1160 /// `validate_embedded_json` is dead code per llvm-cov.
1161 #[test]
1162 fn validate_embedded_json_rejects_oversized_payload() {
1163 // Build a JSON payload over MAX_JSON_SIZE by stuffing a single
1164 // string field. The shape doesn't have to be valid metadata;
1165 // the size check fires first.
1166 let big_value = "x".repeat(constants::MAX_JSON_SIZE + 16);
1167 let json = format!(r#"{{"binary":"{big_value}"}}"#);
1168 let err = validate_embedded_json(&json)
1169 .expect_err("payloads over MAX_JSON_SIZE must be rejected");
1170 match err {
1171 ModuleInfoError::MetadataTooLarge(msg) => assert!(
1172 msg.contains("exceeds limit"),
1173 "diagnostic must mention the cap: {msg}"
1174 ),
1175 other => panic!("expected MetadataTooLarge, got {other:?}"),
1176 }
1177 }
1178
1179 /// `validate_embedded_json` rejects non-object JSON shapes. The
1180 /// `is_object()` branch fires for arrays / scalars / etc.; without
1181 /// a focused test this arm is uncovered per llvm-cov even though
1182 /// the runtime risk (someone hand-crafting a bad payload through
1183 /// the builder API) is real.
1184 #[test]
1185 fn validate_embedded_json_rejects_non_object_shapes() {
1186 for bad in ["[]", "null", "42", r#""string""#] {
1187 let err = validate_embedded_json(bad).expect_err("non-object JSON must be rejected");
1188 assert!(
1189 matches!(err, ModuleInfoError::MalformedJson(_)),
1190 "expected MalformedJson for {bad:?}"
1191 );
1192 }
1193 }
1194
1195 /// `module_info::new(Info { … })` is the one-call entry point. The
1196 /// existing `info_embed_round_trip_writes_artifacts` test goes
1197 /// directly through `embed_package_metadata` instead of through
1198 /// `new`, so the `new` body itself stays uncovered. Exercise it
1199 /// here. The function reads `OUT_DIR` (because `EmbedOptions`
1200 /// defaults to `out_dir = None`), so set a temp directory before
1201 /// the call and restore the prior value after.
1202 ///
1203 /// This is the *only* test that touches the process-global env;
1204 /// keeping it self-contained avoids racing the rest of the suite,
1205 /// which uses explicit `out_dir` overrides.
1206 #[cfg(feature = "embed-module-info")]
1207 #[test]
1208 fn new_one_call_entry_point_writes_artifacts() -> TestResult {
1209 use std::sync::Mutex;
1210 // Single global lock around `OUT_DIR` mutation: every test that
1211 // touches process-global env must serialize, otherwise parallel
1212 // test execution will see stale values. We're the only mutator
1213 // today, but the lock makes that contract explicit.
1214 static ENV_LOCK: Mutex<()> = Mutex::new(());
1215 let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1216
1217 let tmp = tempfile::tempdir()?;
1218 let prior = std::env::var_os("OUT_DIR");
1219 // SAFETY: `set_var`/`remove_var` are `unsafe` on Rust 1.80+ but
1220 // safe on the MSRV (1.74). Use the safe API; CI's stable cell
1221 // will warn but not fail because the deprecation is `unsafe`,
1222 // not a compile error.
1223 std::env::set_var("OUT_DIR", tmp.path());
1224 let result = new(Info {
1225 binary: "one_call_test".into(),
1226 name: "one_call_test".into(),
1227 version: "1.0.0".into(),
1228 moduleVersion: "1.0.0.0".into(),
1229 maintainer: "team@contoso.com".into(),
1230 os: "linux".into(),
1231 osVersion: "test".into(),
1232 ..Default::default()
1233 });
1234 match prior {
1235 Some(p) => std::env::set_var("OUT_DIR", p),
1236 None => std::env::remove_var("OUT_DIR"),
1237 }
1238
1239 let artifacts = result?;
1240 // The artifacts must land under the OUT_DIR we set.
1241 assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1242 assert!(artifacts.json_path.exists());
1243 let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1244 assert_eq!(parsed["binary"], "one_call_test");
1245 Ok(())
1246 }
1247
1248 #[test]
1249 fn test_get_distro_info() -> TestResult {
1250 use crate::utils::get_distro_info;
1251 let distro_info = get_distro_info()?;
1252 assert!(!distro_info.0.is_empty());
1253 assert!(!distro_info.1.is_empty());
1254 Ok(())
1255 }
1256
1257 /// The binary note section assembled by `NoteSection::new` must be 4-byte
1258 /// aligned in total length. The ELF spec requires it, and a misaligned
1259 /// section silently corrupts subsequent note entries. `NoteSection`
1260 /// handles this via `align_len` on the owner and desc blocks. This test
1261 /// exercises desc lengths that stress every residue class mod 4 so a
1262 /// future refactor that drops the
1263 /// alignment padding on one of the blocks is caught immediately.
1264 #[test]
1265 fn note_section_is_4byte_aligned_for_every_residue() {
1266 use crate::note_section::NoteSection;
1267 for desc_len in [0usize, 1, 2, 3, 4, 5, 7, 8, 17, 100, 1023] {
1268 let desc = "x".repeat(desc_len);
1269 let note = match NoteSection::new(N_TYPE, OWNER, &desc, "", NOTE_ALIGN) {
1270 Ok(n) => n,
1271 Err(e) => panic!("NoteSection::new failed for desc_len={desc_len}: {e}"),
1272 };
1273 assert_eq!(
1274 note.note_section.len() % NOTE_ALIGN,
1275 0,
1276 "note section must be 4-byte aligned (desc_len={desc_len}, got {})",
1277 note.note_section.len()
1278 );
1279 }
1280 }
1281
1282 #[test]
1283 fn test_project_metadata() {
1284 if !git_is_available() {
1285 println!("Skipping test_project_metadata because git cli is not available");
1286 return;
1287 }
1288
1289 use crate::metadata::project_metadata;
1290 let result = project_metadata();
1291
1292 assert!(
1293 result.is_ok(),
1294 "Project metadata should be created successfully: {:?}",
1295 result.err()
1296 );
1297
1298 if let Ok(res) = result {
1299 let metadata = res.0;
1300 assert!(
1301 metadata.contains("\"binary\":"),
1302 "JSON should contain binary field"
1303 );
1304 assert!(
1305 metadata.contains("\"moduleVersion\":"),
1306 "JSON should contain moduleVersion field"
1307 );
1308 assert!(
1309 metadata.contains("\"version\":"),
1310 "JSON should contain version field"
1311 );
1312 assert!(
1313 metadata.contains("\"maintainer\":"),
1314 "JSON should contain maintainer field"
1315 );
1316 assert!(
1317 metadata.contains("\"name\":"),
1318 "JSON should contain name field"
1319 );
1320 assert!(
1321 metadata.contains("\"type\":"),
1322 "JSON should contain type field"
1323 );
1324
1325 assert!(
1326 metadata.contains("\"repo\":") || metadata.contains("\"Unknown\""),
1327 "JSON should contain repo field or fallback"
1328 );
1329 assert!(
1330 metadata.contains("\"branch\":")
1331 || metadata.contains("\"main\"")
1332 || metadata.contains("\"unknown\""),
1333 "JSON should contain branch field or fallback"
1334 );
1335 assert!(
1336 metadata.contains("\"hash\":") || metadata.contains("\"unknown\""),
1337 "JSON should contain hash field or fallback"
1338 );
1339
1340 // Other required fields
1341 assert!(
1342 metadata.contains("\"copyright\":"),
1343 "JSON should contain copyright field"
1344 );
1345 assert!(metadata.contains("\"os\":"), "JSON should contain os field");
1346 assert!(
1347 metadata.contains("\"osVersion\":"),
1348 "JSON should contain osVersion field"
1349 );
1350 }
1351 }
1352
1353 /// Exercises the production Cargo.toml-reading path end-to-end against
1354 /// this crate's own manifest. The assertions are intentionally fork-safe:
1355 /// an external fork may change `copyright` (and must), but the contract
1356 /// that Cargo.toml values round-trip through `from_cargo_toml` and
1357 /// populate the expected fields stays fixed.
1358 #[test]
1359 fn test_package_metadata_from_cargo_toml() -> TestResult {
1360 let md = PackageMetadata::from_cargo_toml()?;
1361
1362 assert_eq!(md.name, "module-info");
1363 assert_eq!(md.binary, "module-info");
1364
1365 // Version is formatted to 3 numeric parts by `format_version_parts`.
1366 let parts: Vec<&str> = md.version.split('.').collect();
1367 assert_eq!(
1368 parts.len(),
1369 3,
1370 "version should have three dot-separated parts, got {:?}",
1371 md.version
1372 );
1373 for part in &parts {
1374 assert!(
1375 part.chars().all(|c| c.is_ascii_digit()),
1376 "version part {part:?} must be numeric"
1377 );
1378 }
1379
1380 // `copyright` comes from `[package.metadata.module_info].copyright`
1381 // in this crate's own Cargo.toml. Forks will legitimately set their
1382 // own value, so the contract we lock in is "non-empty and not the
1383 // `Unknown` fallback that triggers when the key is missing",
1384 // nothing organization-specific.
1385 assert!(
1386 !md.copyright.is_empty() && md.copyright != "Unknown",
1387 "copyright must come from Cargo.toml, not the Unknown fallback; got {:?}",
1388 md.copyright
1389 );
1390 Ok(())
1391 }
1392
1393 #[test]
1394 fn test_get_git_info() -> TestResult {
1395 if !git_is_available() {
1396 println!("Skipping test_get_git_info because git is not available");
1397 return Ok(());
1398 }
1399
1400 use crate::utils::get_git_info;
1401 let git_info = get_git_info()?;
1402
1403 // Just verify we get back something for the repo name
1404 // Don't assert exact values since they can change
1405 // Verify we get back non-empty values
1406 assert!(!git_info.0.is_empty(), "Branch name should not be empty"); // branch
1407 assert!(!git_info.1.is_empty(), "Commit hash should not be empty"); // hash
1408 assert!(
1409 !git_info.2.is_empty(),
1410 "Repository name should not be empty"
1411 ); // repo name
1412
1413 // In a git repo this returns the parsed remote name; outside one
1414 // (e.g. testing from a published tarball) it falls back to the
1415 // "unknown" sentinel. Either is valid here.
1416 assert!(git_info.2 == "unknown" || !git_info.2.is_empty());
1417
1418 println!(
1419 "Git Info - Branch: {}, Hash: {}, Repo: {}",
1420 git_info.0, git_info.1, git_info.2
1421 );
1422 Ok(())
1423 }
1424
1425 #[test]
1426 fn test_json_key_value_parse() -> TestResult {
1427 let json_input = r#"{
1428"binary": "sample_crashing_process",
1429"moduleVersion": "0.1.0.0",
1430"version": "0.1.0",
1431"maintainer": "Maintainer contact/UUID etc",
1432"name": "sample_crashing_process",
1433"type": "agent",
1434"repo": "Module_Info",
1435"branch": "main",
1436"hash": "76930c41aa16e31bb1e565b12c4285cde1939af3",
1437"copyright": "Microsoft",
1438"os": "Ubuntu",
1439"osVersion": "20.04"
1440}
1441"#;
1442
1443 let parsed: serde_json::Value = serde_json::from_str(json_input)?;
1444 assert_eq!(parsed["binary"], "sample_crashing_process");
1445 assert_eq!(parsed["moduleVersion"], "0.1.0.0");
1446 assert_eq!(parsed["version"], "0.1.0");
1447 assert_eq!(parsed["maintainer"], "Maintainer contact/UUID etc");
1448 assert_eq!(parsed["name"], "sample_crashing_process");
1449 assert_eq!(parsed["type"], "agent");
1450 assert_eq!(parsed["repo"], "Module_Info");
1451 assert_eq!(parsed["branch"], "main");
1452 assert_eq!(parsed["hash"], "76930c41aa16e31bb1e565b12c4285cde1939af3");
1453 assert_eq!(parsed["copyright"], "Microsoft");
1454 assert_eq!(parsed["os"], "Ubuntu");
1455 assert_eq!(parsed["osVersion"], "20.04");
1456 Ok(())
1457 }
1458
1459 #[test]
1460 fn test_get_project_path() {
1461 use crate::utils::get_project_path;
1462 let project_path = get_project_path();
1463 assert!(project_path.exists());
1464 }
1465
1466 #[test]
1467 fn test_get_cargo_toml_content() -> TestResult {
1468 use crate::utils::get_cargo_toml_content;
1469 let cargo_toml = get_cargo_toml_content()?;
1470 assert!(cargo_toml.get("package").is_some());
1471 Ok(())
1472 }
1473
1474 #[test]
1475 fn test_save_section() -> TestResult {
1476 // Create a temporary file
1477 let temp_file = NamedTempFile::new()?;
1478 let file_path = temp_file.path().to_path_buf();
1479
1480 // Create sample section data
1481 let desc_json = r#"{"binary":"test","version":"1.0.0"}"#;
1482 let linker_script_body = "BYTE(0x01); BYTE(0x02);";
1483
1484 // Create a note section
1485 use crate::note_section::NoteSection;
1486 let note = NoteSection::new(N_TYPE, OWNER, desc_json, linker_script_body, NOTE_ALIGN)?;
1487
1488 // Save the section to the temporary file
1489 note.save_section(&file_path)?;
1490
1491 // Read the file back
1492 let mut file = File::open(&file_path)?;
1493 let mut buffer = Vec::new();
1494 file.read_to_end(&mut buffer)?;
1495
1496 // Verify the content
1497 assert!(!buffer.is_empty());
1498 assert_eq!(buffer.len(), note.note_section.len());
1499 assert_eq!(buffer, note.note_section);
1500
1501 // Check that the file contains expected ELF note header values
1502 // The first 12 bytes should be the ELF note header (n_namesz, n_descsz, n_type)
1503 assert!(buffer.len() >= 12);
1504
1505 // Check for the owner string "FDO" followed by null terminator
1506 let owner_offset = 12; // After the header
1507 let owner_bytes = OWNER.as_bytes();
1508 let owner_slice = buffer
1509 .get(owner_offset..owner_offset + owner_bytes.len())
1510 .ok_or("owner slice is out of bounds")?;
1511 assert_eq!(owner_slice, owner_bytes);
1512
1513 // Ensure the N_TYPE value is present in the header (little endian)
1514 let n_type_bytes = N_TYPE.to_le_bytes();
1515 let n_type_slice = buffer.get(8..12).ok_or("n_type slice is out of bounds")?;
1516 assert_eq!(n_type_slice, &n_type_bytes);
1517 Ok(())
1518 }
1519
1520 /// `PackageMetadata` is public and implements `Default` so callers can
1521 /// use `..Default::default()` in struct-literal construction. This is
1522 /// the forward-compatible pattern recommended for build.rs consumers
1523 /// that supply metadata programmatically.
1524 #[test]
1525 fn test_package_metadata_default_construction() {
1526 let md = PackageMetadata {
1527 binary: "my_tool".into(),
1528 name: "my_tool".into(),
1529 version: "1.2.3".into(),
1530 module_version: "1.2.3.4".into(),
1531 maintainer: "team@contoso.com".into(),
1532 hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
1533 ..Default::default()
1534 };
1535
1536 // Fields we set round-trip.
1537 assert_eq!(md.binary, "my_tool");
1538 assert_eq!(md.version, "1.2.3");
1539 assert_eq!(md.module_version, "1.2.3.4");
1540 // Fields we didn't set come from `Default`: empty strings.
1541 assert_eq!(md.module_type, "");
1542 assert_eq!(md.repo, "");
1543 assert_eq!(md.os, "");
1544 }
1545
1546 /// `embed_package_metadata` with a caller-supplied `out_dir` and
1547 /// `emit_cargo_link_arg = false` must write all three artifacts
1548 /// (linker script, note bin, JSON) into the specified directory.
1549 /// This is the static-library flow: the outer build system handles
1550 /// the final link, so we write artifacts to a known location and
1551 /// skip the `cargo:rustc-link-arg` directive.
1552 #[cfg(feature = "embed-module-info")]
1553 #[test]
1554 fn test_embed_package_metadata_custom_out_dir_no_link_arg() -> TestResult {
1555 let tmp = tempfile::tempdir()?;
1556 let md = PackageMetadata {
1557 binary: "test_binary".into(),
1558 name: "test_binary".into(),
1559 version: "1.2.3".into(),
1560 module_version: "1.2.3.4".into(),
1561 maintainer: "team@contoso.com".into(),
1562 hash: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".into(),
1563 module_type: "agent".into(),
1564 repo: "test_repo".into(),
1565 branch: "main".into(),
1566 copyright: "Test".into(),
1567 os: "Ubuntu".into(),
1568 os_version: "22.04".into(),
1569 ..Default::default()
1570 };
1571
1572 let opts = EmbedOptions {
1573 out_dir: Some(tmp.path().to_path_buf()),
1574 emit_cargo_link_arg: false,
1575 ..Default::default()
1576 };
1577
1578 let artifacts = embed_package_metadata(&md, &opts)?;
1579
1580 // All three artifact paths must live under the custom out_dir.
1581 assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1582 assert!(artifacts.note_bin_path.starts_with(tmp.path()));
1583 assert!(artifacts.json_path.starts_with(tmp.path()));
1584
1585 // And the files must actually exist on disk.
1586 assert!(artifacts.linker_script_path.exists());
1587 assert!(artifacts.note_bin_path.exists());
1588 assert!(artifacts.json_path.exists());
1589
1590 // And the returned JSON is parseable and contains the supplied values.
1591 let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1592 assert_eq!(parsed["binary"], "test_binary");
1593 assert_eq!(parsed["version"], "1.2.3");
1594 assert_eq!(parsed["moduleVersion"], "1.2.3.4");
1595 Ok(())
1596 }
1597
1598 /// `embed_package_metadata` must reject `PackageMetadata` values whose
1599 /// serialized JSON lacks a required field. Required fields are a safety
1600 /// guardrail so consumers do not accidentally emit a note section that
1601 /// `print_module_info` / `get_module_info!` cannot parse. Since
1602 /// `PackageMetadata` always serializes every field, "missing" in practice
1603 /// means "empty string". Leave a required field as `Default::default()`
1604 /// and the validator must reject it.
1605 #[cfg(feature = "embed-module-info")]
1606 #[test]
1607 fn test_embed_package_metadata_rejects_empty_required_field() -> TestResult {
1608 let tmp = tempfile::tempdir()?;
1609 // `osVersion` is required; leaving it at `Default::default()` ("")
1610 // exercises the validator's empty-string rejection path, and its
1611 // `#[serde(rename = "osVersion")]` mapping is the same one the runtime
1612 // map consumers see.
1613 let md = PackageMetadata {
1614 binary: "b".into(),
1615 name: "n".into(),
1616 version: "1.0.0".into(),
1617 module_version: "1.0.0.0".into(),
1618 maintainer: "m".into(),
1619 os: "linux".into(),
1620 // os_version omitted on purpose; `..Default::default()` gives "".
1621 ..Default::default()
1622 };
1623 let opts = EmbedOptions {
1624 out_dir: Some(tmp.path().to_path_buf()),
1625 emit_cargo_link_arg: false,
1626 ..Default::default()
1627 };
1628 let err = embed_package_metadata(&md, &opts)
1629 .expect_err("embed must reject PackageMetadata with empty required field");
1630 match err {
1631 ModuleInfoError::MalformedJson(msg) => {
1632 assert!(
1633 msg.contains("osVersion"),
1634 "error must name the empty required field: {msg}"
1635 );
1636 }
1637 other => panic!("expected MalformedJson, got {other:?}"),
1638 }
1639 Ok(())
1640 }
1641
1642 /// Direct test for the required-field guardrail: feed JSON missing a
1643 /// required field and confirm it's rejected with a `MalformedJson` error.
1644 #[test]
1645 fn test_validate_embedded_json_rejects_missing_required_fields() {
1646 // Missing "maintainer" (one of the seven required identity-plus-
1647 // platform keys that stays required even when optional fields like
1648 // `hash`/`repo`/`branch` are deliberately left empty).
1649 let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n"}"#;
1650 let err =
1651 validate_embedded_json(bad_json).expect_err("missing required field must be rejected");
1652 match err {
1653 ModuleInfoError::MalformedJson(msg) => {
1654 assert!(
1655 msg.contains("maintainer"),
1656 "error must name the missing field: {msg}"
1657 );
1658 }
1659 other => panic!("expected MalformedJson, got {other:?}"),
1660 }
1661 }
1662
1663 /// Direct test for the empty-string half of the required-field guardrail.
1664 /// `PackageMetadata::default()` fields serialize as `""`; we treat that
1665 /// as "missing" too for the required identity keys, so consumers can't
1666 /// silently ship a note section with an empty `binary` or `maintainer`.
1667 /// Non-required fields (hash/repo/branch/type/copyright) are *allowed*
1668 /// to be empty; that's the documented "disable" knob.
1669 #[test]
1670 fn test_validate_embedded_json_rejects_empty_required_fields() {
1671 // "maintainer" present but empty.
1672 let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n","maintainer":""}"#;
1673 let err =
1674 validate_embedded_json(bad_json).expect_err("empty required field must be rejected");
1675 match err {
1676 ModuleInfoError::MalformedJson(msg) => {
1677 assert!(
1678 msg.contains("maintainer"),
1679 "error must name the empty field: {msg}"
1680 );
1681 }
1682 other => panic!("expected MalformedJson, got {other:?}"),
1683 }
1684 }
1685
1686 /// Complement to the rejection tests: a payload that supplies the five
1687 /// required identity keys but leaves every optional field empty must
1688 /// pass validation. This pins the "disabled field = empty string"
1689 /// contract against accidental regressions (e.g., re-adding `hash` to
1690 /// `REQUIRED_JSON_KEYS`).
1691 #[test]
1692 fn test_validate_embedded_json_accepts_empty_optional_fields() {
1693 let ok_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.0.0.0","name":"n","maintainer":"m","type":"","repo":"","branch":"","hash":"","copyright":"","os":"linux","osVersion":"1"}"#;
1694 if let Err(e) = validate_embedded_json(ok_json) {
1695 panic!("optional fields may be empty; only the identity keys are required. got {e:?}");
1696 }
1697 }
1698
1699 /// `EmbedOptions::default()` pins the zero-config behavior:
1700 /// `out_dir = None` (use `$OUT_DIR`) and `emit_cargo_link_arg = true` so
1701 /// plain build.rs consumers don't have to set any options.
1702 #[test]
1703 fn test_embed_options_default_preserves_bc_behavior() {
1704 let opts = EmbedOptions::default();
1705 assert!(opts.out_dir.is_none());
1706 assert!(opts.emit_cargo_link_arg);
1707 }
1708
1709 /// The linker script body must always carry at least one `BYTE(0x00);`
1710 /// NUL terminator, regardless of the JSON byte-length mod 4. Without it,
1711 /// `extract_module_info` at runtime would scan past the end of
1712 /// `.note.package` looking for the sentinel: harmless in practice
1713 /// (read-only mapped memory) but a latent SIGSEGV risk when the section
1714 /// sits at a segment boundary. This test constructs a `PackageMetadata`
1715 /// specifically shaped so the total payload byte-count is a multiple
1716 /// of 4, which is the tricky case the original `padding_needed = (... % 4)`
1717 /// formula got wrong (it computed 0 and emitted no padding).
1718 #[test]
1719 fn render_note_payloads_always_emits_nul_padding() -> TestResult {
1720 // Any well-formed PackageMetadata works; we just need the payload.
1721 // 4-aligned input isn't easy to construct deliberately since the
1722 // JSON shape mixes fixed keys with variable values, so we assert
1723 // the stronger "always emits NUL padding" invariant across every
1724 // permutation of field lengths we can reach with a 2-character probe.
1725 for suffix_len in 0..=4 {
1726 let suffix = "x".repeat(suffix_len);
1727 let md = PackageMetadata {
1728 binary: format!("b{suffix}"),
1729 name: format!("n{suffix}"),
1730 version: "1.0.0".into(),
1731 module_version: "1.0.0.0".into(),
1732 maintainer: "m".into(),
1733 os: "linux".into(),
1734 os_version: "22.04".into(),
1735 ..Default::default()
1736 };
1737 let (_json, linker_script_body) = crate::metadata::render_note_payloads(&md)?;
1738 assert!(
1739 linker_script_body.contains("BYTE(0x00);"),
1740 "linker script must contain a BYTE(0x00) even when the payload is 4-aligned (suffix_len={suffix_len})"
1741 );
1742 }
1743 Ok(())
1744 }
1745
1746 /// `link_arg_directive` is the single branch that decides whether
1747 /// `cargo:rustc-link-arg=-T<path>` is emitted. Asserting both arms here
1748 /// locks in the "emit_cargo_link_arg=false means no directive" contract
1749 /// that static-library flows depend on.
1750 #[test]
1751 fn link_arg_directive_gates_on_flag() {
1752 let p = Path::new("/tmp/linker_script.ld");
1753 match link_arg_directive(p, true) {
1754 Some(d) => assert_eq!(d, "cargo:rustc-link-arg=-T/tmp/linker_script.ld"),
1755 None => panic!("emit_cargo_link_arg=true must produce a directive"),
1756 }
1757 assert!(
1758 link_arg_directive(p, false).is_none(),
1759 "emit_cargo_link_arg=false must suppress the directive"
1760 );
1761 }
1762
1763 /// Drift guard: every key in `REQUIRED_JSON_KEYS` must appear in
1764 /// `ModuleInfoField::ALL.to_key()`. If someone adds a required field
1765 /// without extending the enum (or vice versa), this test fails before
1766 /// the divergence reaches a consumer.
1767 #[test]
1768 fn required_keys_are_subset_of_module_info_fields() {
1769 let known: std::collections::HashSet<&str> =
1770 ModuleInfoField::ALL.iter().map(|f| f.to_key()).collect();
1771 for key in constants::REQUIRED_JSON_KEYS {
1772 assert!(
1773 known.contains(key),
1774 "REQUIRED_JSON_KEYS contains {key:?} which is not in ModuleInfoField::ALL"
1775 );
1776 }
1777 }
1778
1779 /// `Info` must be constructible from a struct literal (that's the whole
1780 /// point of the type), and `From<Info> for PackageMetadata` must carry
1781 /// every field across with the JSON-key-shaped name on the `Info` side and
1782 /// the snake_case name on the `PackageMetadata` side.
1783 #[test]
1784 fn info_struct_literal_and_conversion() {
1785 let info = Info {
1786 binary: "b".into(),
1787 version: "1.2.3".into(),
1788 moduleVersion: "1.2.3.4".into(),
1789 maintainer: "m".into(),
1790 name: "n".into(),
1791 r#type: "agent".into(),
1792 repo: "r".into(),
1793 branch: "br".into(),
1794 hash: "h".into(),
1795 copyright: "c".into(),
1796 os: "o".into(),
1797 osVersion: "ov".into(),
1798 };
1799 let md: PackageMetadata = info.into();
1800 assert_eq!(md.binary, "b");
1801 assert_eq!(md.version, "1.2.3");
1802 assert_eq!(md.module_version, "1.2.3.4");
1803 assert_eq!(md.maintainer, "m");
1804 assert_eq!(md.name, "n");
1805 assert_eq!(md.module_type, "agent");
1806 assert_eq!(md.repo, "r");
1807 assert_eq!(md.branch, "br");
1808 assert_eq!(md.hash, "h");
1809 assert_eq!(md.copyright, "c");
1810 assert_eq!(md.os, "o");
1811 assert_eq!(md.os_version, "ov");
1812 }
1813
1814 /// `Info::default()` plus `..Default::default()` struct-literal syntax is
1815 /// the forward-compatible pattern consumers should use. Unlike
1816 /// `PackageMetadata`, `Info` is intentionally not `#[non_exhaustive]`, so
1817 /// both full struct literals and `..Default::default()` must compile and
1818 /// produce empty strings for unassigned fields.
1819 #[test]
1820 fn info_default_fills_missing_fields_with_empty_strings() {
1821 let info = Info {
1822 binary: "b".into(),
1823 moduleVersion: "1.2.3.4".into(),
1824 ..Default::default()
1825 };
1826 assert_eq!(info.binary, "b");
1827 assert_eq!(info.moduleVersion, "1.2.3.4");
1828 assert_eq!(info.version, "");
1829 assert_eq!(info.r#type, "");
1830 assert_eq!(info.osVersion, "");
1831 }
1832
1833 /// `Info` → `PackageMetadata` → `embed_package_metadata` is the path
1834 /// `new(Info { .. })` takes internally (`new` is just two lines: convert
1835 /// and dispatch). Exercise it end-to-end with an explicit `out_dir` so
1836 /// the test doesn't have to mutate `OUT_DIR` on the shared process
1837 /// environment: `std::env::set_var` is `unsafe fn` on Rust 1.80+ and
1838 /// racy when tests run in parallel. The actual `new` function is so
1839 /// thin that the conversion test and this embed test together cover
1840 /// everything it does.
1841 #[cfg(feature = "embed-module-info")]
1842 #[test]
1843 fn info_embed_round_trip_writes_artifacts() -> TestResult {
1844 let tmp = tempfile::tempdir()?;
1845 let md: PackageMetadata = Info {
1846 binary: "b".into(),
1847 name: "n".into(),
1848 version: "1.2.3".into(),
1849 moduleVersion: "1.2.3.4".into(),
1850 maintainer: "m".into(),
1851 r#type: "agent".into(),
1852 hash: "deadbeef".into(),
1853 os: "linux".into(),
1854 osVersion: "22.04".into(),
1855 ..Default::default()
1856 }
1857 .into();
1858
1859 let opts = EmbedOptions {
1860 out_dir: Some(tmp.path().to_path_buf()),
1861 emit_cargo_link_arg: false,
1862 ..Default::default()
1863 };
1864 let artifacts = embed_package_metadata(&md, &opts)?;
1865
1866 assert!(artifacts.linker_script_path.starts_with(tmp.path()));
1867 assert!(artifacts.json_path.exists());
1868 let parsed: serde_json::Value = serde_json::from_str(&artifacts.json)?;
1869 assert_eq!(parsed["moduleVersion"], "1.2.3.4");
1870 assert_eq!(parsed["type"], "agent");
1871 Ok(())
1872 }
1873
1874 /// `validate_module_version` accepts the full u16 range on every part.
1875 #[test]
1876 fn validate_module_version_accepts_valid_values() -> TestResult {
1877 for v in ["0.0.0.0", "1.2.3.4", "65535.65535.65535.65535", "10.0.0.1"] {
1878 validate_module_version(v)?;
1879 }
1880 Ok(())
1881 }
1882
1883 /// Wrong number of dot-separated parts must fail loudly, not silently
1884 /// pad or truncate.
1885 #[test]
1886 fn validate_module_version_rejects_wrong_part_count() {
1887 for v in ["", "1", "1.2", "1.2.3", "1.2.3.4.5"] {
1888 let err = validate_module_version(v).expect_err("wrong part count must be rejected");
1889 match err {
1890 ModuleInfoError::MalformedJson(msg) => {
1891 assert!(
1892 msg.contains("exactly 4"),
1893 "error must explain the 4-part rule: {msg}"
1894 );
1895 }
1896 other => panic!("expected MalformedJson, got {other:?}"),
1897 }
1898 }
1899 }
1900
1901 /// A u16 overflows at 65536, and consumers parsing the 4-WORD
1902 /// VS_FIXEDFILEINFO shape would truncate, so reject at embed time.
1903 #[test]
1904 fn validate_module_version_rejects_overflow() {
1905 // 65536 = u16::MAX + 1, on each of the four positions.
1906 for v in [
1907 "65536.0.0.0",
1908 "0.65536.0.0",
1909 "0.0.65536.0",
1910 "0.0.0.65536",
1911 "99999.1.2.3",
1912 ] {
1913 let err = validate_module_version(v).expect_err("u16 overflow must be rejected");
1914 match err {
1915 ModuleInfoError::MalformedJson(msg) => {
1916 assert!(
1917 msg.contains("16 bits"),
1918 "error must mention the u16 constraint: {msg}"
1919 );
1920 }
1921 other => panic!("expected MalformedJson, got {other:?}"),
1922 }
1923 }
1924 }
1925
1926 /// Negative numbers and non-numeric text never fit a u16, and would
1927 /// silently turn into `0` under lossy casts, so reject them up front.
1928 #[test]
1929 fn validate_module_version_rejects_non_numeric() {
1930 for v in ["-1.0.0.0", "a.b.c.d", "1.2.x.4", "1.2.3.4a", "v1.2.3.4"] {
1931 validate_module_version(v).expect_err("non-numeric parts must be rejected");
1932 }
1933 }
1934
1935 /// Empty component between dots is rejected explicitly (not just
1936 /// `parse::<u16>()` fallout) so the error message names the position.
1937 #[test]
1938 fn validate_module_version_rejects_empty_part() {
1939 for v in ["1.2.3.", "1..3.4", "..1.2", "1.2..4"] {
1940 let err = validate_module_version(v).expect_err("empty part must be rejected");
1941 if let ModuleInfoError::MalformedJson(msg) = err {
1942 // Either the part-count check or the empty-part check can
1943 // fire first depending on the shape; both are acceptable.
1944 assert!(
1945 msg.contains("empty") || msg.contains("exactly 4"),
1946 "unexpected error message: {msg}"
1947 );
1948 } else {
1949 panic!("expected MalformedJson");
1950 }
1951 }
1952 }
1953
1954 /// `validate_embedded_json` must enforce the u16 constraint on
1955 /// `moduleVersion`, not just the presence check, so the guardrail
1956 /// applies to every path into `embed_package_metadata`.
1957 #[test]
1958 fn validate_embedded_json_rejects_bad_module_version() {
1959 let bad_json = r#"{"binary":"b","version":"1.0.0","moduleVersion":"1.2.3.99999","name":"n","maintainer":"m","os":"linux","osVersion":"22.04"}"#;
1960 let err = validate_embedded_json(bad_json)
1961 .expect_err("out-of-range moduleVersion must be rejected");
1962 match err {
1963 ModuleInfoError::MalformedJson(msg) => {
1964 assert!(
1965 msg.contains("moduleVersion"),
1966 "error must name the field: {msg}"
1967 );
1968 }
1969 other => panic!("expected MalformedJson, got {other:?}"),
1970 }
1971 }
1972
1973 /// Drift guard: `PackageMetadata::field_value` covers every variant in
1974 /// `ModuleInfoField::ALL`, and every produced value matches the struct
1975 /// field serde serializes for the same JSON key. Catches the case where
1976 /// a new enum variant lands but `field_value` / the struct isn't
1977 /// extended.
1978 #[test]
1979 fn package_metadata_field_value_covers_all_variants() -> TestResult {
1980 let md = PackageMetadata {
1981 binary: "bv".into(),
1982 version: "vv".into(),
1983 module_version: "mv".into(),
1984 maintainer: "mn".into(),
1985 name: "nv".into(),
1986 module_type: "tv".into(),
1987 repo: "rv".into(),
1988 branch: "bn".into(),
1989 hash: "hv".into(),
1990 copyright: "cv".into(),
1991 os: "ov".into(),
1992 os_version: "ov2".into(),
1993 };
1994
1995 let json: serde_json::Value = serde_json::from_str(&serde_json::to_string(&md)?)?;
1996 for field in ModuleInfoField::ALL {
1997 let from_method = md.field_value(*field);
1998 let from_json = json
1999 .get(field.to_key())
2000 .and_then(|v| v.as_str())
2001 .unwrap_or_else(|| panic!("JSON missing key for {field:?}"));
2002 assert_eq!(
2003 from_method, from_json,
2004 "field_value and serde output disagree for {field:?}"
2005 );
2006 }
2007 Ok(())
2008 }
2009}