Skip to main content

host_identity/sources/
linux.rs

1//! Linux identity sources: `/etc/machine-id`, D-Bus machine-id, SMBIOS/DMI,
2//! and the opt-in glibc `/etc/hostid` binary file.
3//!
4//! # Identity scope
5//!
6//! These sources live at two distinct scopes:
7//!
8//! - `MachineIdFile`, `DbusMachineIdFile`, and `LinuxHostIdFile` are
9//!   **per-host-OS**: written once when the OS is provisioned (or, for
10//!   `/etc/hostid`, by `sethostid(2)` / `zgenhostid` / the image build)
11//!   and tied to the install.
12//! - `DmiProductUuid` is **per-instance**: the SMBIOS system UUID is
13//!   assigned by the hypervisor (on VMs) or the OEM (on bare metal)
14//!   and identifies the hardware/VM, not the OS install.
15//!
16//! In container deployments the distinction collapses: none of these
17//! namespaces are container-isolated, so a process inside a container
18//! reads the same value every sibling container on that host reads.
19//! `/sys/devices/virtual/dmi/id/product_uuid` isn't namespaced at all
20//! — the container sees the underlying VM's SMBIOS UUID directly.
21//! Red Hat container images go further and bind-mount the host's
22//! `/etc/machine-id` into the container, so even the "file" path
23//! leaks host identity into the container. See Docker community
24//! discussion of [host `machine-id` visibility in containers](https://forums.docker.com/t/host-machine-id-visible-from-containers/100533)
25//! and the sysbox issue [open sys/devices/virtual/dmi/id/product_uuid](https://github.com/nestybox/sysbox/issues/405)
26//! for the non-namespaced sysfs path.
27//!
28//! `ContainerId` (and, in pods, `KubernetesPodUid`) must sit above
29//! these sources in any chain that wants per-container identity; the
30//! default chains do this for you. See `docs/algorithm.md` →
31//! "Identity scope" for the full discussion.
32//!
33//! Authoritative references:
34//!
35//! - [`machine-id(5)`](https://www.freedesktop.org/software/systemd/man/latest/machine-id.html)
36//!   — systemd-managed per-host identifier, initialised once on first boot.
37//!   The `uninitialized` sentinel is specified there as the marker for the
38//!   early-boot window before the ID has been written.
39//! - [D-Bus specification, UUIDs](https://dbus.freedesktop.org/doc/dbus-specification.html#uuids)
40//!   — defines `/var/lib/dbus/machine-id` as the interoperable machine UUID.
41//!   On systemd systems this is a symlink to `/etc/machine-id`.
42//! - [`sysfs-class-dmi(5)` / kernel sysfs-firmware-dmi-tables](https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-firmware-dmi-tables)
43//!   and the SMBIOS specification from the
44//!   [DMTF](https://www.dmtf.org/dsp/DSP0134) — `/sys/class/dmi/id/product_uuid`
45//!   exposes the SMBIOS system UUID (type 1 "UUID" field). Readable by root
46//!   only on most distributions; this crate swallows `PermissionDenied` to
47//!   let unprivileged callers fall through to other sources.
48//! - GNU coreutils [`hostid(1)`](https://www.gnu.org/software/coreutils/hostid),
49//!   Linux [`gethostid(3)`](https://man7.org/linux/man-pages/man3/gethostid.3.html),
50//!   and [`sethostid(2)`](https://man7.org/linux/man-pages/man2/sethostid.2.html)
51//!   — document `/etc/hostid` as four raw bytes in native byte order.
52//!   `LinuxHostIdFile` reads the file directly; see its rustdoc for why the
53//!   `gethostid(3)` fallback (fabricated from `gethostname()`) is not used.
54
55use std::io::Read;
56use std::path::{Path, PathBuf};
57
58use crate::error::Error;
59use crate::source::{Probe, Source, SourceKind};
60use crate::sources::util::{NormalizeOutcome, classify, read_capped};
61
62macro_rules! file_source {
63    ($name:ident, $kind:expr, $default:expr, $doc:literal) => {
64        #[doc = $doc]
65        #[derive(Debug, Clone)]
66        pub struct $name {
67            path: PathBuf,
68        }
69
70        impl $name {
71            #[doc = concat!("Read from the standard path (`", $default, "`).")]
72            #[must_use]
73            pub fn new() -> Self {
74                Self {
75                    path: PathBuf::from($default),
76                }
77            }
78
79            /// Read from a caller-supplied path. Useful for tests and unusual
80            /// image layouts.
81            #[must_use]
82            pub fn at(path: impl Into<PathBuf>) -> Self {
83                Self { path: path.into() }
84            }
85
86            /// The configured path.
87            #[must_use]
88            pub fn path(&self) -> &Path {
89                &self.path
90            }
91        }
92
93        impl Default for $name {
94            fn default() -> Self {
95                Self::new()
96            }
97        }
98
99        impl Source for $name {
100            fn kind(&self) -> SourceKind {
101                $kind
102            }
103            fn probe(&self) -> Result<Option<Probe>, Error> {
104                read_machine_id_file($kind, &self.path)
105            }
106        }
107    };
108}
109
110file_source!(
111    MachineIdFile,
112    SourceKind::MachineId,
113    "/etc/machine-id",
114    "`/etc/machine-id` — the systemd-managed primary host identifier on modern Linux.\n\n\
115     # Known-duplicate filtering\n\n\
116     A non-trivial fraction of Linux installs ship or end up with machine-id\n\
117     values that are identical across many machines (Whonix's deliberate\n\
118     anti-fingerprinting constant; official container images that bake a\n\
119     single hex value into the filesystem layer; synthetic all-same-nibble\n\
120     values from broken image builds). Returning one of those would produce\n\
121     a silently non-unique identity shared by every host that inherits it,\n\
122     so this source additionally rejects, by returning `Ok(None)` with a\n\
123     `log::debug!` entry:\n\n\
124     - A curated list of public, citable shared values (`MACHINE_ID_DENYLIST`).\n\
125     - Any 32-hex-digit value whose nibbles are all the same character\n\
126       (`00…0`, `11…1`, `aa…a`, etc.). The systemd spec forbids all-zero\n\
127       machine-ids outright; the rest are only ever seen on synthetic or\n\
128       corrupt images.\n\n\
129     Anything not matching the filter passes through unchanged — the intent\n\
130     is to reject *known* garbage, not to gate on machine-id shape. A false\n\
131     positive here drops a legitimate host from identity resolution, so a\n\
132     missing entry is strictly preferable to an over-broad rule."
133);
134
135file_source!(
136    DbusMachineIdFile,
137    SourceKind::DbusMachineId,
138    "/var/lib/dbus/machine-id",
139    "`/var/lib/dbus/machine-id` — D-Bus machine ID. Often a symlink to `/etc/machine-id` \
140     but present on its own on some minimal images. Shares the same \
141     known-duplicate filter as [`MachineIdFile`]."
142);
143
144/// `/sys/class/dmi/id/product_uuid` — SMBIOS system UUID. Distinct per
145/// physical or virtual hardware, so it distinguishes cloned VMs that share
146/// a machine-id, but requires root on most distributions.
147///
148/// # Vendor-placeholder filtering
149///
150/// SMBIOS commonly ships vendor-default values that are stable *per model*,
151/// not per machine. Returning one of those would produce a silently
152/// non-unique identity shared by every box with the same mainboard. This
153/// source additionally rejects, by returning `Ok(None)` with a
154/// `log::debug!` entry:
155///
156/// - `00000000-0000-0000-0000-000000000000` (all-zero)
157/// - `ffffffff-ffff-ffff-ffff-ffffffffffff` (all-F, case-insensitive)
158/// - Any UUID whose 32 hex nibbles are all the same character
159///   (`11111111-…`, `aaaaaaaa-…`, etc.)
160/// - A conservative curated list of well-known vendor placeholders
161///   (e.g. `03000200-0400-0500-0006-000700080009`), sourced from
162///   [fwupd](https://github.com/fwupd/fwupd) and `dmidecode`.
163///
164/// Anything not matching the filter passes through unchanged — the intent
165/// is to reject *known* garbage, not to gate on UUID shape.
166#[derive(Debug, Clone)]
167pub struct DmiProductUuid {
168    path: PathBuf,
169}
170
171impl DmiProductUuid {
172    /// Read from the standard path (`/sys/class/dmi/id/product_uuid`).
173    #[must_use]
174    pub fn new() -> Self {
175        Self {
176            path: PathBuf::from("/sys/class/dmi/id/product_uuid"),
177        }
178    }
179
180    /// Read from a caller-supplied path. Useful for tests and unusual
181    /// image layouts.
182    #[must_use]
183    pub fn at(path: impl Into<PathBuf>) -> Self {
184        Self { path: path.into() }
185    }
186
187    /// The configured path.
188    #[must_use]
189    pub fn path(&self) -> &Path {
190        &self.path
191    }
192}
193
194impl Default for DmiProductUuid {
195    fn default() -> Self {
196        Self::new()
197    }
198}
199
200impl Source for DmiProductUuid {
201    fn kind(&self) -> SourceKind {
202        SourceKind::Dmi
203    }
204    fn probe(&self) -> Result<Option<Probe>, Error> {
205        read_dmi_file(&self.path)
206    }
207}
208
209/// Known-duplicate `/etc/machine-id` values, stored lowercase. Each entry is
210/// a public, citable shared value that every host reading the same image or
211/// install will produce identically — hashing it would silently collide
212/// `HostId`s across unrelated machines. Kept deliberately conservative: a
213/// missing entry means the value passes through, which is the less-bad
214/// failure mode versus a false positive dropping a legitimate host.
215///
216/// Container-image entries can rotate when upstream rebuilds the image;
217/// each entry carries the source image and observation date so a future
218/// maintainer can re-scan and prune obsolete values.
219const MACHINE_ID_DENYLIST: &[&str] = &[
220    // Whonix / Kicksecure deliberate anti-fingerprinting constant, shipped
221    // identically on every install.
222    // https://www.whonix.org/wiki/Protocol-Leak-Protection_and_Fingerprinting-Protection
223    "b08dfa6083e7567a1921a715000001fb",
224    // docker.io/library/oraclelinux:9 — observed 2026-04-19.
225    "d495c4b7bb8244639186ef65305fd685",
226    // docker.io/library/oraclelinux:8 — observed 2026-04-19.
227    "e28a15f597cd4693bb61f1f3e8447cbd",
228    // jrei/systemd-debian:latest — popular systemd-enabled base for
229    // Ansible/Molecule testing. Observed 2026-04-19.
230    "4c010dc413ad444698de6ee4677331b9",
231    // jrei/systemd-ubuntu:latest — observed 2026-04-19.
232    "a7570853ab864bbbbfc8c54b14eeaf8f",
233    // geerlingguy/docker-ubuntu2204-ansible:latest — observed 2026-04-19.
234    "5b4bb40898b2416087b6224f176978fb",
235    // geerlingguy/docker-debian12-ansible:latest — observed 2026-04-19.
236    "3948e4ca87b64871b31c9a49920b9834",
237    // geerlingguy/docker-rockylinux9-ansible:latest — observed 2026-04-19.
238    "835aa90928e143e3ae09efcd0c5cb118",
239];
240
241/// Return `true` if `value` is a known-duplicate machine-id that should be
242/// rejected rather than used as an identity.
243fn is_machine_id_garbage(value: &str) -> bool {
244    let lower = value.to_ascii_lowercase();
245    MACHINE_ID_DENYLIST.contains(&lower.as_str()) || is_all_same_nibble_hex32(&lower)
246}
247
248/// Return `true` if `value` is exactly 32 hex digits and every digit is
249/// the same character. Covers the systemd-forbidden all-zero case and the
250/// synthetic `"11"*32`, `"aa"*32`, etc. values seen on broken images.
251///
252/// Deliberately **not** unified with [`is_all_same_nibble_uuid`]: that
253/// predicate accepts hyphenated 8-4-4-4-12 UUIDs (SMBIOS/DMI format);
254/// this one rejects hyphens because machine-id is specified as exactly
255/// 32 hex digits with no separators.
256fn is_all_same_nibble_hex32(value: &str) -> bool {
257    let bytes = value.as_bytes();
258    bytes.len() == 32 && bytes[0].is_ascii_hexdigit() && bytes.iter().all(|b| *b == bytes[0])
259}
260
261fn read_machine_id_file(kind: SourceKind, path: &Path) -> Result<Option<Probe>, Error> {
262    match read_id_file(kind, path)? {
263        Some(probe) if is_machine_id_garbage(probe.value()) => {
264            log::debug!(
265                "host-identity: {kind:?} value {} matches a known-duplicate machine-id; \
266                 falling through",
267                probe.value()
268            );
269            Ok(None)
270        }
271        other => Ok(other),
272    }
273}
274
275/// Well-known vendor-placeholder UUIDs, stored lowercase. Sourced from
276/// fwupd's UEFI plugin quirks list and `dmidecode` field notes. Kept
277/// deliberately conservative — a missing entry means the value passes
278/// through, which is the less-bad failure mode.
279const DMI_PLACEHOLDER_UUIDS: &[&str] = &[
280    // Supermicro / AMI golden default seen on a wide range of boards.
281    "03000200-0400-0500-0006-000700080009",
282];
283
284/// Return `true` if `value` looks like SMBIOS vendor-default garbage that
285/// should be rejected rather than used as an identity.
286fn is_dmi_garbage(value: &str) -> bool {
287    let lower = value.to_ascii_lowercase();
288    if DMI_PLACEHOLDER_UUIDS.iter().any(|p| *p == lower) {
289        return true;
290    }
291    is_all_same_nibble_uuid(&lower)
292}
293
294/// Return `true` if the input is a canonical 8-4-4-4-12 hyphenated UUID
295/// whose 32 hex nibbles are all the same character. Subsumes the
296/// all-zero and all-F cases and rejects `11111111-…`, `aaaaaaaa-…`, etc.
297///
298/// The 32-hex-digit gate keeps short non-UUID values like `"abc"` from
299/// false-positively hitting this rule.
300///
301/// Deliberately **not** unified with [`is_all_same_nibble_hex32`]: that
302/// predicate requires exactly 32 hex digits with no hyphens (machine-id
303/// format); this one accepts hyphenated 8-4-4-4-12 UUIDs (SMBIOS/DMI
304/// format).
305fn is_all_same_nibble_uuid(value: &str) -> bool {
306    let mut chars = value.chars().filter(|c| *c != '-');
307    let Some(first) = chars.next() else {
308        return false;
309    };
310    if !first.is_ascii_hexdigit() {
311        return false;
312    }
313    let mut count = 1usize;
314    for c in chars {
315        if c != first {
316            return false;
317        }
318        count += 1;
319    }
320    count == 32
321}
322
323fn read_dmi_file(path: &Path) -> Result<Option<Probe>, Error> {
324    match read_id_file(SourceKind::Dmi, path)? {
325        Some(probe) if is_dmi_garbage(probe.value()) => {
326            log::debug!(
327                "host-identity: DMI product_uuid {} matches a known vendor-placeholder; \
328                 falling through",
329                probe.value()
330            );
331            Ok(None)
332        }
333        other => Ok(other),
334    }
335}
336
337// Open `path` and classify the common benign failures (file absent,
338// permission denied) as `Ok(None)` so the resolver can fall through
339// to the next source. Every other I/O error is surfaced with full
340// path context so the operator can diagnose it.
341fn open_id_file(kind: SourceKind, path: &Path) -> Result<Option<std::fs::File>, Error> {
342    match std::fs::File::open(path) {
343        Ok(file) => Ok(Some(file)),
344        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
345        Err(err) if err.kind() == std::io::ErrorKind::PermissionDenied => {
346            log::debug!(
347                "host-identity: permission denied reading {}",
348                path.display()
349            );
350            Ok(None)
351        }
352        Err(source) => Err(Error::Io {
353            source_kind: kind,
354            path: PathBuf::from(path),
355            source,
356        }),
357    }
358}
359
360fn read_id_file(kind: SourceKind, path: &Path) -> Result<Option<Probe>, Error> {
361    match read_capped(path) {
362        Ok(content) => match classify(&content) {
363            NormalizeOutcome::Usable(value) => Ok(Some(Probe::new(kind, value))),
364            NormalizeOutcome::Sentinel => Err(Error::Uninitialized {
365                source_kind: kind,
366                path: PathBuf::from(path),
367            }),
368            NormalizeOutcome::Empty => Ok(None),
369        },
370        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
371        Err(err) if err.kind() == std::io::ErrorKind::PermissionDenied => {
372            log::debug!(
373                "host-identity: permission denied reading {}",
374                path.display()
375            );
376            Ok(None)
377        }
378        Err(source) => Err(Error::Io {
379            source_kind: kind,
380            path: PathBuf::from(path),
381            source,
382        }),
383    }
384}
385
386/// `/etc/hostid` — the glibc 4-byte binary hostid file read by
387/// [`gethostid(3)`](https://man7.org/linux/man-pages/man3/gethostid.3.html)
388/// and [`hostid(1)`](https://www.gnu.org/software/coreutils/hostid).
389///
390/// Opt-in only: **not** part of [`crate::sources::default_chain`] or
391/// [`crate::sources::network_default_chain`]. On stock Linux distros the
392/// file is absent (no `sethostid` has run), so defaulting it would cost
393/// every caller a syscall for a near-universal miss. Ship it as a
394/// constructible type so operators who know they have `/etc/hostid`
395/// (`OpenZFS` hosts, minimal non-systemd images, Red Hat containers that
396/// bind-mount `machine-id` but not `hostid`) can push it explicitly.
397///
398/// # File format
399///
400/// glibc stores the hostid as four raw bytes in native byte order
401/// ([`sethostid(2)`](https://man7.org/linux/man-pages/man2/sethostid.2.html)).
402/// Decoded with `u32::from_ne_bytes(...)` and formatted as 8-digit
403/// lowercase hex to match `hostid(1)` output.
404///
405/// # Why we don't call `gethostid(3)`
406///
407/// When `/etc/hostid` is absent glibc fabricates a value from
408/// `gethostname()` → IPv4 lookup. That value is neither stable nor
409/// unique and would flow through as identity — actively harmful. This
410/// source reads the file directly; absence yields `Ok(None)` so the
411/// resolver falls through.
412///
413/// # Probe behaviour
414///
415/// - File absent / `PermissionDenied` → `Ok(None)`.
416/// - File size ≠ 4 bytes → `Ok(None)` with a `log::debug!` entry
417///   (defensive: sheared reads, FreeBSD text-UUID `/etc/hostid`
418///   mistakenly placed on Linux).
419/// - Value `0x00000000` or `0xffffffff` → `Ok(None)` with a
420///   `log::debug!` entry (unset or known-garbage sentinels).
421/// - Other I/O error → `Err(Error::Io)`.
422/// - Otherwise → `Ok(Some(Probe::new(SourceKind::LinuxHostId, "<hex>")))`.
423#[derive(Debug, Clone)]
424pub struct LinuxHostIdFile {
425    path: PathBuf,
426}
427
428impl LinuxHostIdFile {
429    /// Read from the standard path (`/etc/hostid`).
430    #[must_use]
431    pub fn new() -> Self {
432        Self {
433            path: PathBuf::from("/etc/hostid"),
434        }
435    }
436
437    /// Read from a caller-supplied path. Useful for tests and unusual
438    /// image layouts.
439    #[must_use]
440    pub fn at(path: impl Into<PathBuf>) -> Self {
441        Self { path: path.into() }
442    }
443
444    /// The configured path.
445    #[must_use]
446    pub fn path(&self) -> &Path {
447        &self.path
448    }
449}
450
451impl Default for LinuxHostIdFile {
452    fn default() -> Self {
453        Self::new()
454    }
455}
456
457impl Source for LinuxHostIdFile {
458    fn kind(&self) -> SourceKind {
459        SourceKind::LinuxHostId
460    }
461    fn probe(&self) -> Result<Option<Probe>, Error> {
462        read_linux_hostid(&self.path)
463    }
464}
465
466fn read_linux_hostid(path: &Path) -> Result<Option<Probe>, Error> {
467    let Some(file) = open_id_file(SourceKind::LinuxHostId, path)? else {
468        return Ok(None);
469    };
470    // Read up to five bytes so a file whose size is 4 fills the buffer
471    // exactly while a larger file (FreeBSD text UUID etc.) overshoots
472    // and is rejected.
473    let mut buf = Vec::with_capacity(5);
474    file.take(5)
475        .read_to_end(&mut buf)
476        .map_err(|source| Error::Io {
477            source_kind: SourceKind::LinuxHostId,
478            path: PathBuf::from(path),
479            source,
480        })?;
481    let Ok(bytes): Result<[u8; 4], _> = buf.as_slice().try_into() else {
482        log::debug!(
483            "host-identity: /etc/hostid at {} is {} bytes, expected 4; falling through",
484            path.display(),
485            buf.len(),
486        );
487        return Ok(None);
488    };
489    let value = u32::from_ne_bytes(bytes);
490    if value == 0 || value == u32::MAX {
491        log::debug!(
492            "host-identity: /etc/hostid at {} is {value:#010x} (unset/sentinel); falling through",
493            path.display()
494        );
495        return Ok(None);
496    }
497    Ok(Some(Probe::new(
498        SourceKind::LinuxHostId,
499        format!("{value:08x}"),
500    )))
501}
502
503/// Heuristic container-runtime detection.
504///
505/// Mirrors the checks agent-go uses: `/.dockerenv` existence, runtime markers
506/// in `/proc/1/cgroup`. Used by [`crate::HostId::in_container`] for
507/// provenance; does not affect which source is chosen (that is the resolver's
508/// job — add or remove [`crate::sources::ContainerId`] to change behaviour).
509#[must_use]
510pub(crate) fn in_container() -> bool {
511    const MARKERS: &[&str] = &["docker", "kubepods", "containerd", "podman", "lxc", "crio"];
512    Path::new("/.dockerenv").exists()
513        || std::fs::read_to_string("/proc/1/cgroup").is_ok_and(|cgroup| {
514            cgroup
515                .split(['/', ':', '-', '.', '_', '\n'])
516                .any(|seg| MARKERS.contains(&seg))
517        })
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523    use std::io::Write;
524    use tempfile::{NamedTempFile, TempDir};
525
526    #[test]
527    fn machine_id_file_rejects_uninitialized_sentinel() {
528        let mut f = NamedTempFile::new().unwrap();
529        writeln!(f, "uninitialized").unwrap();
530        let err = read_id_file(SourceKind::MachineId, f.path()).expect_err("sentinel must error");
531        match err {
532            Error::Uninitialized { path, source_kind } => {
533                assert_eq!(path, f.path());
534                assert_eq!(source_kind, SourceKind::MachineId);
535            }
536            other => panic!("expected Uninitialized, got {other:?}"),
537        }
538    }
539
540    #[test]
541    fn machine_id_file_accepts_normal_value() {
542        let mut f = NamedTempFile::new().unwrap();
543        writeln!(f, "abc123").unwrap();
544        let probe = read_id_file(SourceKind::MachineId, f.path())
545            .unwrap()
546            .unwrap();
547        assert_eq!(probe.value(), "abc123");
548    }
549
550    #[test]
551    fn machine_id_file_missing_is_none() {
552        let dir = TempDir::new().unwrap();
553        let missing = dir.path().join("definitely-not-there");
554        let probe = read_id_file(SourceKind::MachineId, &missing).unwrap();
555        assert!(probe.is_none());
556    }
557
558    #[test]
559    fn machine_id_file_empty_is_none() {
560        let f = NamedTempFile::new().unwrap();
561        let probe = read_id_file(SourceKind::MachineId, f.path()).unwrap();
562        assert!(probe.is_none());
563    }
564
565    #[test]
566    fn machine_id_file_whitespace_only_is_none() {
567        let mut f = NamedTempFile::new().unwrap();
568        write!(f, "   \n\t ").unwrap();
569        let probe = read_id_file(SourceKind::MachineId, f.path()).unwrap();
570        assert!(probe.is_none());
571    }
572
573    #[test]
574    fn machine_id_file_reports_io_error_for_directory() {
575        // read_to_string on a directory hits the generic IO arm and must
576        // surface as Error::Io carrying the path.
577        let dir = TempDir::new().unwrap();
578        let err = read_id_file(SourceKind::MachineId, dir.path())
579            .expect_err("reading a directory must error");
580        match err {
581            Error::Io { path, .. } => assert_eq!(path, dir.path()),
582            other => panic!("expected Io, got {other:?}"),
583        }
584    }
585
586    #[cfg(unix)]
587    #[test]
588    fn machine_id_file_permission_denied_is_none() {
589        use std::os::unix::fs::PermissionsExt;
590        use std::path::{Path, PathBuf};
591
592        /// Restores the file's readable permissions on drop so a panic
593        /// mid-test can't leave the tempfile unreadable (which would
594        /// break tempfile cleanup).
595        struct PermGuard(PathBuf);
596        impl Drop for PermGuard {
597            fn drop(&mut self) {
598                let _ = std::fs::set_permissions(&self.0, std::fs::Permissions::from_mode(0o600));
599            }
600        }
601
602        // Skip when running as root — chmod 0o000 doesn't deny root.
603        if nix_is_root() {
604            return;
605        }
606
607        let mut f = NamedTempFile::new().unwrap();
608        writeln!(f, "abc123").unwrap();
609        let path: &Path = f.path();
610        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o000)).unwrap();
611        let _guard = PermGuard(path.to_path_buf());
612
613        let probe = read_id_file(SourceKind::MachineId, path)
614            .expect("permission denied should be swallowed to Ok(None)");
615        assert!(probe.is_none());
616    }
617
618    fn machine_id_probe(kind: SourceKind, body: &str) -> Option<Probe> {
619        let mut f = NamedTempFile::new().unwrap();
620        write!(f, "{body}").unwrap();
621        read_machine_id_file(kind, f.path()).unwrap()
622    }
623
624    #[test]
625    fn machine_id_rejects_whonix_constant() {
626        // Removing this entry from MACHINE_ID_DENYLIST must fail this test.
627        assert!(
628            machine_id_probe(SourceKind::MachineId, "b08dfa6083e7567a1921a715000001fb\n").is_none()
629        );
630    }
631
632    #[test]
633    fn machine_id_rejects_whonix_constant_uppercase() {
634        assert!(
635            machine_id_probe(SourceKind::MachineId, "B08DFA6083E7567A1921A715000001FB\n").is_none()
636        );
637    }
638
639    #[test]
640    fn machine_id_rejects_oraclelinux_9_constant() {
641        assert!(
642            machine_id_probe(SourceKind::MachineId, "d495c4b7bb8244639186ef65305fd685\n").is_none()
643        );
644    }
645
646    #[test]
647    fn machine_id_rejects_oraclelinux_8_constant() {
648        assert!(
649            machine_id_probe(SourceKind::MachineId, "e28a15f597cd4693bb61f1f3e8447cbd\n").is_none()
650        );
651    }
652
653    #[test]
654    fn machine_id_rejects_jrei_systemd_debian_constant() {
655        assert!(
656            machine_id_probe(SourceKind::MachineId, "4c010dc413ad444698de6ee4677331b9\n").is_none()
657        );
658    }
659
660    #[test]
661    fn machine_id_rejects_jrei_systemd_ubuntu_constant() {
662        assert!(
663            machine_id_probe(SourceKind::MachineId, "a7570853ab864bbbbfc8c54b14eeaf8f\n").is_none()
664        );
665    }
666
667    #[test]
668    fn machine_id_rejects_geerlingguy_ansible_ubuntu_constant() {
669        assert!(
670            machine_id_probe(SourceKind::MachineId, "5b4bb40898b2416087b6224f176978fb\n").is_none()
671        );
672    }
673
674    #[test]
675    fn machine_id_rejects_geerlingguy_ansible_debian_constant() {
676        assert!(
677            machine_id_probe(SourceKind::MachineId, "3948e4ca87b64871b31c9a49920b9834\n").is_none()
678        );
679    }
680
681    #[test]
682    fn machine_id_rejects_geerlingguy_ansible_rocky_constant() {
683        assert!(
684            machine_id_probe(SourceKind::MachineId, "835aa90928e143e3ae09efcd0c5cb118\n").is_none()
685        );
686    }
687
688    #[test]
689    fn machine_id_rejects_all_zero_hex32() {
690        assert!(machine_id_probe(SourceKind::MachineId, &"0".repeat(32)).is_none());
691    }
692
693    #[test]
694    fn machine_id_rejects_all_same_nibble_hex32() {
695        assert!(machine_id_probe(SourceKind::MachineId, &"a".repeat(32)).is_none());
696        assert!(machine_id_probe(SourceKind::MachineId, &"F".repeat(32)).is_none());
697    }
698
699    #[test]
700    fn machine_id_accepts_plausible_real_value() {
701        let probe =
702            machine_id_probe(SourceKind::MachineId, "4c4c4544003957108052b4c04f384833\n").unwrap();
703        assert_eq!(probe.value(), "4c4c4544003957108052b4c04f384833");
704    }
705
706    #[test]
707    fn machine_id_filter_trims_whitespace_before_matching() {
708        // Confirms the filter composes with classify's trim.
709        assert!(
710            machine_id_probe(
711                SourceKind::MachineId,
712                "  b08dfa6083e7567a1921a715000001fb  \n\t"
713            )
714            .is_none()
715        );
716    }
717
718    #[test]
719    fn dbus_machine_id_rejects_whonix_constant() {
720        // Confirms the filter is wired into DbusMachineIdFile too.
721        assert!(
722            machine_id_probe(
723                SourceKind::DbusMachineId,
724                "b08dfa6083e7567a1921a715000001fb\n"
725            )
726            .is_none()
727        );
728    }
729
730    #[test]
731    fn read_id_file_does_not_apply_machine_id_filter() {
732        // Architectural negative control: the machine-id deny-list and
733        // hex32 check live only in the read_machine_id_file wrapper,
734        // never in the shared read_id_file helper — so a future source
735        // wired through read_id_file doesn't silently inherit the
736        // filter. Feeding an all-zero hex32 (which read_machine_id_file
737        // would reject) through read_id_file must pass through.
738        let mut f = NamedTempFile::new().unwrap();
739        write!(f, "{}", "0".repeat(32)).unwrap();
740        let probe = read_id_file(SourceKind::MachineId, f.path())
741            .unwrap()
742            .unwrap();
743        assert_eq!(probe.value(), "0".repeat(32));
744    }
745
746    #[test]
747    fn machine_id_file_probe_applies_filter() {
748        // End-to-end: MachineIdFile's Source::probe() (via the
749        // file_source! macro) must route through read_machine_id_file.
750        // Guards against regressions pointing the macro back at
751        // read_id_file.
752        let mut f = NamedTempFile::new().unwrap();
753        writeln!(f, "b08dfa6083e7567a1921a715000001fb").unwrap();
754        let probe = MachineIdFile::at(f.path()).probe().unwrap();
755        assert!(probe.is_none());
756    }
757
758    #[test]
759    fn dbus_machine_id_file_probe_applies_filter() {
760        let mut f = NamedTempFile::new().unwrap();
761        writeln!(f, "b08dfa6083e7567a1921a715000001fb").unwrap();
762        let probe = DbusMachineIdFile::at(f.path()).probe().unwrap();
763        assert!(probe.is_none());
764    }
765
766    #[test]
767    fn is_all_same_nibble_hex32_rejects_short_values() {
768        // Gate at exactly 32 chars so short non-hex32 strings pass through.
769        assert!(!is_all_same_nibble_hex32("aaa"));
770        assert!(!is_all_same_nibble_hex32(""));
771        assert!(!is_all_same_nibble_hex32(&"a".repeat(31)));
772        assert!(!is_all_same_nibble_hex32(&"a".repeat(33)));
773    }
774
775    #[test]
776    fn is_all_same_nibble_hex32_rejects_non_hex() {
777        assert!(!is_all_same_nibble_hex32(&"z".repeat(32)));
778    }
779
780    fn dmi_tempfile(body: &str) -> NamedTempFile {
781        let mut f = NamedTempFile::new().unwrap();
782        write!(f, "{body}").unwrap();
783        f
784    }
785
786    fn dmi_probe(body: &str) -> Option<Probe> {
787        let f = dmi_tempfile(body);
788        read_dmi_file(f.path()).unwrap()
789    }
790
791    #[test]
792    fn dmi_rejects_all_zero_uuid() {
793        assert!(dmi_probe("00000000-0000-0000-0000-000000000000\n").is_none());
794    }
795
796    #[test]
797    fn dmi_rejects_all_f_uuid_lower() {
798        assert!(dmi_probe("ffffffff-ffff-ffff-ffff-ffffffffffff\n").is_none());
799    }
800
801    #[test]
802    fn dmi_rejects_all_f_uuid_upper() {
803        assert!(dmi_probe("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF\n").is_none());
804    }
805
806    #[test]
807    fn dmi_rejects_all_same_nibble_1() {
808        assert!(dmi_probe("11111111-1111-1111-1111-111111111111\n").is_none());
809    }
810
811    #[test]
812    fn dmi_rejects_all_same_nibble_a() {
813        assert!(dmi_probe("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa\n").is_none());
814    }
815
816    #[test]
817    fn dmi_rejects_supermicro_ami_placeholder() {
818        // Removing this entry from DMI_PLACEHOLDER_UUIDS must fail this
819        // test — deliberate regression coverage.
820        assert!(dmi_probe("03000200-0400-0500-0006-000700080009\n").is_none());
821    }
822
823    #[test]
824    fn dmi_rejects_supermicro_ami_placeholder_uppercase() {
825        assert!(
826            dmi_probe(
827                "03000200-0400-0500-0006-000700080009"
828                    .to_ascii_uppercase()
829                    .as_str()
830            )
831            .is_none()
832        );
833    }
834
835    #[test]
836    fn dmi_rejects_garbage_with_trailing_whitespace() {
837        // Confirms the filter composes with classify's trim.
838        assert!(dmi_probe("  00000000-0000-0000-0000-000000000000  \n\t").is_none());
839    }
840
841    #[test]
842    fn dmi_accepts_plausible_real_uuid() {
843        let probe = dmi_probe("4c4c4544-0039-5710-8052-b4c04f384833\n").unwrap();
844        assert_eq!(probe.value(), "4c4c4544-0039-5710-8052-b4c04f384833");
845    }
846
847    #[test]
848    fn dmi_accepts_non_uuid_shape() {
849        // The 32-hex-digit gate in is_all_same_nibble_uuid must not
850        // false-positively reject short non-UUID values.
851        let probe = dmi_probe("abcdef\n").unwrap();
852        assert_eq!(probe.value(), "abcdef");
853    }
854
855    #[test]
856    fn machine_id_file_accepts_hyphenated_all_zero_uuid() {
857        // The machine-id filter's hex32 predicate deliberately requires
858        // exactly 32 hex digits with no hyphens (per the systemd
859        // machine-id format). A hyphenated all-zero UUID is not a valid
860        // machine-id shape but must not be rejected here — it would be
861        // the caller's job to write a correctly-shaped file.
862        let probe = machine_id_probe(
863            SourceKind::MachineId,
864            "00000000-0000-0000-0000-000000000000\n",
865        )
866        .unwrap();
867        assert_eq!(probe.value(), "00000000-0000-0000-0000-000000000000");
868    }
869
870    fn write_hostid(bytes: &[u8]) -> NamedTempFile {
871        let mut f = NamedTempFile::new().unwrap();
872        f.write_all(bytes).unwrap();
873        f
874    }
875
876    #[test]
877    fn linux_hostid_reads_native_endian_bytes() {
878        // `hostid(1)` prints `u32::from_ne_bytes(file_bytes)` formatted
879        // as 8-digit lowercase hex. Mirror that contract for both
880        // endiannesses so the test is honest on BE targets too.
881        let file_bytes = [0x8f, 0x8f, 0x98, 0x4f];
882        let expected = format!("{:08x}", u32::from_ne_bytes(file_bytes));
883        let f = write_hostid(&file_bytes);
884        let probe = read_linux_hostid(f.path()).unwrap().unwrap();
885        assert_eq!(probe.kind(), SourceKind::LinuxHostId);
886        assert_eq!(probe.value(), expected);
887    }
888
889    #[test]
890    fn linux_hostid_pads_small_values_to_eight_hex_digits() {
891        // Pin the `{:08x}` width specifier: a small value like 0x42
892        // must render as "00000042", matching `hostid(1)`'s `%08x`.
893        // Build the file bytes from the target-native u32 so the test
894        // is honest on both endiannesses.
895        let file_bytes = 0x0000_0042_u32.to_ne_bytes();
896        let f = write_hostid(&file_bytes);
897        let probe = read_linux_hostid(f.path()).unwrap().unwrap();
898        assert_eq!(probe.value(), "00000042");
899    }
900
901    #[test]
902    fn linux_hostid_missing_is_none() {
903        let dir = TempDir::new().unwrap();
904        let missing = dir.path().join("absent");
905        assert!(read_linux_hostid(&missing).unwrap().is_none());
906    }
907
908    #[test]
909    fn linux_hostid_wrong_size_too_small_is_none() {
910        let f = write_hostid(&[0x01, 0x02, 0x03]);
911        assert!(read_linux_hostid(f.path()).unwrap().is_none());
912    }
913
914    #[test]
915    fn linux_hostid_wrong_size_too_large_is_none() {
916        // FreeBSD ships a text UUID at /etc/hostid — longer than 4
917        // bytes. Defensive short-circuit so a FreeBSD file mistakenly
918        // placed on Linux falls through.
919        let f = write_hostid(b"4f988f8f-0000-0000-0000-000000000000\n");
920        assert!(read_linux_hostid(f.path()).unwrap().is_none());
921    }
922
923    #[test]
924    fn linux_hostid_empty_is_none() {
925        let f = write_hostid(&[]);
926        assert!(read_linux_hostid(f.path()).unwrap().is_none());
927    }
928
929    #[test]
930    fn linux_hostid_rejects_all_zero() {
931        let f = write_hostid(&[0, 0, 0, 0]);
932        assert!(read_linux_hostid(f.path()).unwrap().is_none());
933    }
934
935    #[test]
936    fn linux_hostid_rejects_all_ff() {
937        let f = write_hostid(&[0xff, 0xff, 0xff, 0xff]);
938        assert!(read_linux_hostid(f.path()).unwrap().is_none());
939    }
940
941    #[test]
942    fn linux_hostid_reports_io_error_for_directory() {
943        let dir = TempDir::new().unwrap();
944        let err = read_linux_hostid(dir.path())
945            .expect_err("reading a directory must surface as Error::Io");
946        match err {
947            Error::Io {
948                path, source_kind, ..
949            } => {
950                assert_eq!(path, dir.path());
951                assert_eq!(source_kind, SourceKind::LinuxHostId);
952            }
953            other => panic!("expected Io, got {other:?}"),
954        }
955    }
956
957    #[cfg(unix)]
958    #[test]
959    fn linux_hostid_permission_denied_is_none() {
960        use std::os::unix::fs::PermissionsExt;
961        use std::path::PathBuf;
962
963        struct PermGuard(PathBuf);
964        impl Drop for PermGuard {
965            fn drop(&mut self) {
966                let _ = std::fs::set_permissions(&self.0, std::fs::Permissions::from_mode(0o600));
967            }
968        }
969
970        if nix_is_root() {
971            return;
972        }
973        let f = write_hostid(&[0x01, 0x02, 0x03, 0x04]);
974        std::fs::set_permissions(f.path(), std::fs::Permissions::from_mode(0o000)).unwrap();
975        let _guard = PermGuard(f.path().to_path_buf());
976        assert!(read_linux_hostid(f.path()).unwrap().is_none());
977    }
978
979    #[cfg(unix)]
980    fn nix_is_root() -> bool {
981        // Avoid pulling in a new dep — `id -u` via libc would also work,
982        // but `/proc/self/status` is trivial. See
983        // [`effective_uid_from_status`] for the parsing contract.
984        std::fs::read_to_string("/proc/self/status")
985            .ok()
986            .is_some_and(|s| effective_uid_from_status(&s) == Some("0"))
987    }
988
989    /// Extract the effective UID from the `Uid:` line of
990    /// `/proc/self/status`. `proc(5)` documents the line as
991    /// `Uid:\t<real>\t<effective>\t<saved-set>\t<filesystem>`; we need
992    /// the *effective* UID because that is what determines whether the
993    /// `chmod 000` in `permission_denied_is_recoverable` actually bars
994    /// the test process from reading the file. Returns `None` when the
995    /// line is missing or malformed.
996    #[cfg(unix)]
997    fn effective_uid_from_status(status: &str) -> Option<&str> {
998        status
999            .lines()
1000            .find_map(|l| l.strip_prefix("Uid:")?.split_whitespace().nth(1))
1001    }
1002
1003    #[cfg(unix)]
1004    #[test]
1005    fn effective_uid_from_status_extracts_second_field() {
1006        // Real != effective: the helper must track effective so callers
1007        // like `nix_is_root` can correctly skip permission-denied tests
1008        // on a setuid-dropped runner.
1009        let status = "\
1010Name:\tbash
1011Uid:\t1000\t0\t1000\t1000
1012Gid:\t1000\t1000\t1000\t1000
1013";
1014        assert_eq!(effective_uid_from_status(status), Some("0"));
1015    }
1016
1017    #[cfg(unix)]
1018    #[test]
1019    fn effective_uid_from_status_handles_common_shapes() {
1020        // Typical developer laptop: real == effective == saved == fs.
1021        assert_eq!(
1022            effective_uid_from_status("Uid:\t1000\t1000\t1000\t1000\n"),
1023            Some("1000"),
1024        );
1025        // Root-run CI (all zeroes).
1026        assert_eq!(effective_uid_from_status("Uid:\t0\t0\t0\t0\n"), Some("0"),);
1027        // Missing `Uid:` line — caller must fall back to "not root".
1028        assert_eq!(effective_uid_from_status("Name:\tthing\n"), None);
1029        // Present but truncated (one field) — treat as malformed.
1030        assert_eq!(effective_uid_from_status("Uid:\t1000\n"), None);
1031        // Zero fields after `Uid:` — still malformed.
1032        assert_eq!(effective_uid_from_status("Uid:\n"), None);
1033        // Leading whitespace on the line — `strip_prefix` is strict on
1034        // purpose, and real `/proc/self/status` never emits it.
1035        assert_eq!(effective_uid_from_status(" Uid:\t0\t0\t0\t0\n"), None);
1036    }
1037}