host_identity_cli/
lib.rs

1//! `host-identity` — command-line interface for the `host-identity` crate.
2//! Binary was renamed from `hostid` to avoid colliding with coreutils
3//! `hostid(1)`; see `crates/host-identity-cli/Cargo.toml` for the
4//! `[[bin]]` name and the rationale.
5//!
6//! This crate also exposes a small library surface so build tooling
7//! (the workspace `xtask` that generates man pages) can reuse the
8//! exact `clap::Command` definition the binary ships with. End users
9//! should depend on the [`host-identity`] library directly.
10//!
11//! [`host-identity`]: https://crates.io/crates/host-identity
12
13use std::ffi::OsStr;
14use std::fmt;
15use std::io::{self, Write};
16use std::path::PathBuf;
17use std::process::ExitCode;
18
19use anyhow::{Context, Result, anyhow};
20use clap::{Parser, Subcommand, ValueEnum};
21use host_identity::ids::{resolver_from_ids, source_ids};
22use host_identity::sources::{AppSpecific, FileOverride};
23use host_identity::{
24    HostId, ResolveOutcome, Resolver, Source, SourceKind, UnknownSourceError, Wrap,
25};
26use serde::Serialize;
27
28/// Environment variable that, when set to a non-empty path, causes the
29/// CLI to prepend a [`FileOverride`] at the front of the resolver
30/// chain. Takes precedence over `HOST_IDENTITY`.
31const HOST_IDENTITY_FILE_ENV: &str = "HOST_IDENTITY_FILE";
32
33#[cfg(feature = "network")]
34mod transport;
35
36/// Crate version, re-exported so the workspace `xtask` can stamp the
37/// man page footer with the CLI crate's version rather than its own.
38pub const VERSION: &str = env!("CARGO_PKG_VERSION");
39
40const LONG_ABOUT: &str = "\
41Resolve a stable, collision-resistant host UUID across platforms, container \
42runtimes, cloud providers, and Kubernetes.
43
44host-identity walks a platform-appropriate chain of identity sources (env override, \
45/etc/machine-id, DMI, cloud metadata, Kubernetes pod UID, …) and returns the \
46first one that produces a credible identifier. Cloned-VM sentinels, empty \
47files, and systemd's literal `uninitialized` string are rejected rather than \
48silently hashed into a shared ID.
49
50Two environment variables pin identity explicitly when the automatic chain \
51gets it wrong. HOST_IDENTITY_FILE names a file whose contents are used as \
52the host identifier and takes precedence over every other source, including \
53HOST_IDENTITY. HOST_IDENTITY supplies the identifier inline and is consulted \
54next. Both work with the default chain and with explicit --sources.
55
56By default the chain uses only local sources. Pass --network to pull in \
57cloud-metadata and Kubernetes probes, which require an HTTP client and a \
58binary built with the `network` feature.";
59
60const EXAMPLES: &str = "\
61EXAMPLES:
62    Print the host UUID using the default local source chain:
63        host-identity
64
65    Include cloud-metadata and Kubernetes sources:
66        host-identity resolve --network
67
68    Build a custom chain from explicit source identifiers:
69        host-identity resolve --sources env-override,machine-id,dmi
70
71    Derive a per-app UUID that doesn't leak the raw machine key:
72        host-identity resolve --app-id com.example.telemetry
73
74    Emit machine-readable output:
75        host-identity resolve --format json
76        host-identity audit --format json
77
78    Pin identity via environment override:
79        HOST_IDENTITY=11111111-2222-3333-4444-555555555555 host-identity
80
81    Pin identity via a file (takes precedence over HOST_IDENTITY):
82        HOST_IDENTITY_FILE=/etc/host-identity host-identity
83
84    List every source identifier compiled into this binary:
85        host-identity sources
86";
87
88/// Top-level command-line interface for the `host-identity` binary.
89#[derive(Parser)]
90#[command(
91    name = "host-identity",
92    version,
93    author,
94    about = "Resolve a stable host UUID across platforms, clouds, and Kubernetes",
95    long_about = LONG_ABOUT,
96    after_long_help = EXAMPLES,
97    args_conflicts_with_subcommands = true,
98)]
99pub struct Cli {
100    #[command(subcommand)]
101    command: Option<Command>,
102
103    /// Top-level flags apply only when no subcommand is given (they are
104    /// shorthand for `host-identity resolve ...`).
105    #[command(flatten)]
106    resolve: ResolveArgs,
107}
108
109#[derive(Subcommand)]
110enum Command {
111    /// Resolve the host identity and print it (default).
112    Resolve(ResolveArgs),
113    /// Walk every source without short-circuiting and report each outcome.
114    Audit(AuditArgs),
115    /// List every source identifier compiled into this binary.
116    Sources {
117        /// Emit JSON instead of one identifier per line.
118        #[arg(long)]
119        json: bool,
120    },
121}
122
123#[derive(Parser, Clone, Default)]
124struct ResolveArgs {
125    /// Output format.
126    #[arg(long, value_enum, default_value_t = Format::Plain)]
127    format: Format,
128
129    /// How the raw identifier is turned into a UUID.
130    #[arg(
131        long,
132        value_enum,
133        default_value_t = WrapArg::V5,
134        long_help = "\
135How the raw identifier returned by the winning source is turned into a UUID.
136
137  v5           UUID v5 (SHA-1) under this crate's private namespace (default).
138               Deterministic: the same raw input always produces the same
139               UUID. Rehashes the raw value even when the source already
140               yields a UUID (DMI product_uuid, macOS IOPlatformUUID,
141               Windows MachineGuid, SMBIOS), so two tools that share a raw
142               source cannot emit colliding IDs unless they also share this
143               crate's namespace.
144
145  v3           UUID v3 (MD5) under the nil namespace. Use ONLY for interop
146               with existing pipelines that already produced IDs this way —
147               notably the legacy Go derivation `uuid.NewMD5(uuid.Nil, raw)`.
148               Prefer v5 for new deployments; RFC 9562 recommends v5 over v3.
149
150  passthrough  Parse the raw value directly as a UUID, with no hashing.
151               Use when the source already yields a UUID string and you
152               want that exact UUID to survive unchanged — e.g. to match
153               an ID another tool on the same host already emits. Fails
154               with an error when the raw value is not a parseable UUID
155               (machine-id, container IDs, Kubernetes pod UIDs all
156               qualify; arbitrary strings from HOST_IDENTITY do not).
157
158Pick v5 unless you have a concrete interop requirement.",
159    )]
160    wrap: WrapArg,
161
162    /// Comma-separated source identifiers to build a custom chain
163    /// (see `host-identity sources`). Combine with `--network` to include
164    /// cloud-metadata sources in the chain.
165    #[arg(long, value_delimiter = ',')]
166    sources: Vec<String>,
167
168    /// Enable cloud-metadata and Kubernetes sources by supplying an HTTP
169    /// transport. Without `--sources` this adds them to the default chain;
170    /// with `--sources` it lets identifiers like `aws-imds` resolve.
171    /// Requires the binary to be built with the `network` feature.
172    #[arg(long)]
173    network: bool,
174
175    /// Per-request timeout, in milliseconds, for cloud-metadata and
176    /// Kubernetes HTTP probes. Only meaningful with `--network`. Off-cloud
177    /// hosts never answer these endpoints, so this directly bounds the
178    /// time spent waiting before falling through to the next source.
179    #[arg(long, value_name = "MS", value_parser = clap::value_parser!(u64).range(1..))]
180    network_timeout_ms: Option<u64>,
181
182    /// Wrap every source with an HMAC-SHA256 per-app derivation keyed on
183    /// the inner source value. Emits a per-app UUID; the inner raw value
184    /// never leaves the process.
185    #[arg(
186        long,
187        value_name = "APP_ID",
188        long_help = "\
189Wrap every source in the chain with an HMAC-SHA256 per-app derivation \
190keyed on the inner source value. When set, the resolver emits a per-app \
191UUID and the inner source's raw value never leaves the process.
192
193APP_ID is a UTF-8 byte string — reverse-DNS identifiers like \
194`com.example.telemetry` are idiomatic, but any stable bytes work. It is \
195NOT secret: privacy comes from not leaking the inner raw value, not from \
196APP_ID being hidden. The derived value is an identifier, not key material. \
197Callers needing a non-UTF-8 APP_ID must use the library API.
198
199Effect on the chain:
200  * Every source is wrapped, including the HOST_IDENTITY env override,
201    HOST_IDENTITY_FILE, cloud-metadata, and Kubernetes sources.
202  * Source labels in `--format json` and `audit` output become
203    `app-specific:<inner>` (e.g. `app-specific:machine-id`).
204
205Interaction with --wrap:
206  * v5 (default)     re-hashes the AppSpecific UUID under this crate's
207                     private namespace — per-app-unique AND
208                     namespace-separated from other tools that re-hash
209                     the same AppSpecific output.
210  * passthrough      round-trips the AppSpecific UUID unchanged — the
211                     \"byte-exact AppSpecific\" mode.
212  * v3               works, but v5 is preferred.
213
214Wrapping a source whose raw value is already public (cloud instance IDs, \
215Kubernetes pod UIDs readable via the API server) adds no privacy — the \
216input was not secret to begin with. Use this flag when you need to keep \
217a local machine key (machine-id, DMI, IoPlatformUuid, MachineGuid, \
218hostid, SMBIOS) out of your telemetry."
219    )]
220    app_id: Option<String>,
221}
222
223#[derive(Parser, Clone, Default)]
224struct AuditArgs {
225    #[command(flatten)]
226    resolve: ResolveArgs,
227}
228
229#[derive(ValueEnum, Clone, Copy, Default)]
230enum Format {
231    #[default]
232    Plain,
233    Summary,
234    Json,
235}
236
237#[derive(ValueEnum, Serialize, Clone, Copy, Default)]
238#[serde(rename_all = "lowercase")]
239enum WrapArg {
240    #[default]
241    V5,
242    V3,
243    Passthrough,
244}
245
246impl From<WrapArg> for Wrap {
247    fn from(w: WrapArg) -> Self {
248        match w {
249            WrapArg::V5 => Wrap::UuidV5Namespaced,
250            WrapArg::V3 => Wrap::UuidV3Nil,
251            WrapArg::Passthrough => Wrap::Passthrough,
252        }
253    }
254}
255
256/// Exit codes surfaced by the CLI. Scripts can branch on
257/// `Usage` (2) vs. `Runtime` (1) to distinguish a bad invocation
258/// from a host where no source produced an identity.
259const EXIT_USAGE: u8 = 2;
260
261/// Errors that `build_resolver` converts into an `EXIT_USAGE` exit.
262#[derive(Debug)]
263enum CliError {
264    Usage(anyhow::Error),
265    Runtime(anyhow::Error),
266}
267
268impl CliError {
269    fn exit_code(&self) -> ExitCode {
270        match self {
271            Self::Usage(_) => ExitCode::from(EXIT_USAGE),
272            Self::Runtime(_) => ExitCode::FAILURE,
273        }
274    }
275    fn into_inner(self) -> anyhow::Error {
276        match self {
277            Self::Usage(e) | Self::Runtime(e) => e,
278        }
279    }
280}
281
282fn usage<T>(msg: anyhow::Error) -> Result<T, CliError> {
283    Err(CliError::Usage(msg))
284}
285
286fn runtime_err<E: Into<anyhow::Error>>(e: E) -> CliError {
287    CliError::Runtime(e.into())
288}
289
290fn runtime<T>(msg: anyhow::Error) -> Result<T, CliError> {
291    Err(CliError::Runtime(msg))
292}
293
294/// Parse argv and run the CLI, returning the process exit code.
295#[must_use]
296pub fn run() -> ExitCode {
297    let cli = Cli::parse();
298    let result = match cli.command {
299        Some(Command::Resolve(args)) => run_resolve(&args),
300        Some(Command::Audit(args)) => run_audit(&args.resolve),
301        Some(Command::Sources { json }) => run_sources(json),
302        None => run_resolve(&cli.resolve),
303    };
304    match result {
305        Ok(()) => ExitCode::SUCCESS,
306        Err(err) => {
307            let code = err.exit_code();
308            eprintln!("host-identity: {:#}", err.into_inner());
309            code
310        }
311    }
312}
313
314/// Write to stdout, collapsing `BrokenPipe` into a clean exit.
315/// Without this, piping `host-identity audit | head` panics.
316fn write_and_flush(bytes: &[u8]) -> io::Result<()> {
317    let stdout = io::stdout();
318    let mut lock = stdout.lock();
319    match lock.write_all(bytes).and_then(|()| lock.flush()) {
320        Ok(()) => Ok(()),
321        Err(err) if err.kind() == io::ErrorKind::BrokenPipe => Ok(()),
322        Err(err) => Err(err),
323    }
324}
325
326fn build_resolver(args: &ResolveArgs) -> Result<Resolver, CliError> {
327    validate_resolve_args(args)?;
328    let wrap = Wrap::from(args.wrap);
329    let base = base_resolver(args)?.with_wrap(wrap);
330    let with_override = prepend_file_override(base);
331    Ok(apply_app_specific(
332        with_override,
333        args.app_id.as_deref(),
334        wrap,
335    ))
336}
337
338fn validate_resolve_args(args: &ResolveArgs) -> Result<(), CliError> {
339    if args.network_timeout_ms.is_some() && !args.network {
340        return usage(anyhow!("`--network-timeout-ms` requires `--network`"));
341    }
342    if matches!(args.app_id.as_deref(), Some("")) {
343        return usage(anyhow!("`--app-id` must not be empty"));
344    }
345    // A stray comma in `--sources foo,,bar` (or a leading/trailing
346    // comma) lets clap's `value_delimiter` admit an empty token. Reject
347    // it here with a message that names the flag — otherwise the empty
348    // id reaches `resolver_from_ids` and surfaces as
349    // `unknown source identifier: ``` (empty backticks).
350    if args.sources.iter().any(String::is_empty) {
351        return usage(anyhow!("`--sources` contains an empty identifier"));
352    }
353    Ok(())
354}
355
356fn base_resolver(args: &ResolveArgs) -> Result<Resolver, CliError> {
357    match (args.sources.is_empty(), args.network) {
358        (true, false) => Ok(Resolver::with_defaults()),
359        (true, true) => network_defaults(args.network_timeout_ms).map_err(CliError::Usage),
360        (false, false) => {
361            resolver_from_ids(&args.sources).map_err(|e| CliError::Usage(map_unknown(e)))
362        }
363        (false, true) => resolver_from_ids_network(&args.sources, args.network_timeout_ms)
364            .map_err(CliError::Usage),
365    }
366}
367
368fn prepend_file_override(resolver: Resolver) -> Resolver {
369    match host_identity_file_override() {
370        Some(file) => resolver.prepend(file),
371        None => resolver,
372    }
373}
374
375fn apply_app_specific(resolver: Resolver, app_id: Option<&str>, wrap: Wrap) -> Resolver {
376    let Some(app_id) = app_id else {
377        return resolver;
378    };
379    let id_bytes = app_id.as_bytes();
380    let wrapped: Vec<Box<dyn Source>> = resolver
381        .into_boxed_sources()
382        .into_iter()
383        .map(|s| Box::new(AppSpecific::new(s, id_bytes)) as Box<dyn Source>)
384        .collect();
385    Resolver::new().with_boxed_sources(wrapped).with_wrap(wrap)
386}
387
388/// Read `HOST_IDENTITY_FILE` from the process environment and, if set
389/// to a non-empty path, return a [`FileOverride`] for it. The override
390/// is prepended by [`build_resolver`] so it outranks every other source,
391/// matching the documented precedence in `LONG_ABOUT`.
392fn host_identity_file_override() -> Option<FileOverride> {
393    file_override_from_env_value(std::env::var_os(HOST_IDENTITY_FILE_ENV).as_deref())
394}
395
396/// Pure helper: construct a [`FileOverride`] from a raw env-var value.
397/// Returns `None` when the value is absent or empty. A set-but-empty
398/// value is treated the same as unset so a script clearing the
399/// variable (`HOST_IDENTITY_FILE=`) disables the override rather than
400/// silently turning into `FileOverride::new("")` (which would probe a
401/// relative empty path).
402fn file_override_from_env_value(value: Option<&OsStr>) -> Option<FileOverride> {
403    let raw = value?;
404    if raw.is_empty() {
405        return None;
406    }
407    Some(FileOverride::new(PathBuf::from(raw)))
408}
409
410#[cfg(feature = "network")]
411#[allow(clippy::unnecessary_wraps)]
412fn network_defaults(timeout_ms: Option<u64>) -> Result<Resolver> {
413    Ok(Resolver::with_network_defaults(build_transport(timeout_ms)))
414}
415
416#[cfg(not(feature = "network"))]
417fn network_defaults(_timeout_ms: Option<u64>) -> Result<Resolver> {
418    Err(network_feature_disabled())
419}
420
421#[cfg(feature = "network")]
422fn resolver_from_ids_network(ids: &[String], timeout_ms: Option<u64>) -> Result<Resolver> {
423    host_identity::ids::resolver_from_ids_with_transport(ids, build_transport(timeout_ms))
424        .map_err(map_unknown)
425}
426
427#[cfg(not(feature = "network"))]
428fn resolver_from_ids_network(_ids: &[String], _timeout_ms: Option<u64>) -> Result<Resolver> {
429    Err(network_feature_disabled())
430}
431
432#[cfg(feature = "network")]
433fn build_transport(timeout_ms: Option<u64>) -> transport::UreqTransport {
434    let timeout = timeout_ms.map_or(
435        transport::DEFAULT_NETWORK_TIMEOUT,
436        std::time::Duration::from_millis,
437    );
438    transport::UreqTransport::with_timeout(timeout)
439}
440
441#[cfg(not(feature = "network"))]
442fn network_feature_disabled() -> anyhow::Error {
443    anyhow!("this build has no `network` feature; rebuild with `--features network`")
444}
445
446fn map_unknown(err: UnknownSourceError) -> anyhow::Error {
447    match err {
448        UnknownSourceError::Unknown(id) => anyhow!("unknown source identifier: `{id}`"),
449        UnknownSourceError::RequiresPath(id) => anyhow!(
450            "source `{id}` requires a caller-supplied path and cannot be built from an identifier",
451        ),
452        UnknownSourceError::RequiresTransport(id) => {
453            anyhow!("source `{id}` is a cloud source; pass `--network` to supply an HTTP transport")
454        }
455        UnknownSourceError::FeatureDisabled(id, feat) => anyhow!(
456            "source `{id}` requires the `{feat}` feature, which isn't enabled in this build",
457        ),
458    }
459}
460
461fn run_resolve(args: &ResolveArgs) -> Result<(), CliError> {
462    let resolver = build_resolver(args)?;
463    let id = resolver
464        .resolve()
465        .context("no source produced a host identity")
466        .map_err(CliError::Runtime)?;
467    print_host_id(&id, args.format, args.wrap).map_err(CliError::Runtime)
468}
469
470fn run_audit(args: &ResolveArgs) -> Result<(), CliError> {
471    let resolver = build_resolver(args)?;
472    let outcomes = resolver.resolve_all();
473    let mut buf = Vec::new();
474    render_audit(&mut buf, args, &outcomes).map_err(CliError::Runtime)?;
475    write_and_flush(&buf).map_err(runtime_err)?;
476
477    // Exit non-zero (runtime) when every outcome errored or skipped —
478    // nothing to show for the walk, matching `run_resolve`'s contract.
479    if !outcomes
480        .iter()
481        .any(|o| matches!(o, ResolveOutcome::Found(_)))
482    {
483        return runtime(anyhow!("no source produced a host identity"));
484    }
485    Ok(())
486}
487
488fn render_audit(
489    buf: &mut Vec<u8>,
490    args: &ResolveArgs,
491    outcomes: &[ResolveOutcome],
492) -> anyhow::Result<()> {
493    match args.format {
494        Format::Json => render_audit_json(buf, args.wrap, outcomes),
495        Format::Plain => render_audit_plain(buf, outcomes),
496        Format::Summary => render_audit_summary(buf, outcomes),
497    }
498}
499
500fn render_audit_json(
501    buf: &mut Vec<u8>,
502    wrap: WrapArg,
503    outcomes: &[ResolveOutcome],
504) -> anyhow::Result<()> {
505    let report = AuditReport {
506        wrap,
507        entries: outcomes.iter().map(AuditEntry::from).collect(),
508    };
509    serde_json::to_writer_pretty(&mut *buf, &report)?;
510    buf.push(b'\n');
511    Ok(())
512}
513
514fn render_audit_plain(buf: &mut Vec<u8>, outcomes: &[ResolveOutcome]) -> anyhow::Result<()> {
515    for (i, outcome) in outcomes.iter().enumerate() {
516        let kind = outcome.source();
517        write!(buf, "{i:>2}. {kind:<28} -> ")?;
518        match outcome {
519            ResolveOutcome::Found(id) => writeln!(buf, "{}", id.summary())?,
520            ResolveOutcome::Skipped(_) => writeln!(buf, "(skipped)")?,
521            ResolveOutcome::Errored(_, err) => writeln!(buf, "ERROR {}", one_line(err))?,
522        }
523    }
524    Ok(())
525}
526
527/// One compact line per outcome, mirroring `resolve --format summary`'s
528/// `source:uuid` shape. `Skipped` and `Errored` outcomes emit
529/// `source:skipped` / `source:ERROR <msg>`. Note: some source labels
530/// themselves contain a colon (e.g. `AppSpecific` renders as
531/// `app-specific:<inner>`), and error text may contain arbitrary
532/// characters, so consumers that want to recover the uuid should
533/// `rsplit_once(':')` — UUIDs never contain a colon.
534fn render_audit_summary(buf: &mut Vec<u8>, outcomes: &[ResolveOutcome]) -> anyhow::Result<()> {
535    for outcome in outcomes {
536        match outcome {
537            ResolveOutcome::Found(id) => writeln!(buf, "{}", id.summary())?,
538            ResolveOutcome::Skipped(kind) => writeln!(buf, "{kind}:skipped")?,
539            ResolveOutcome::Errored(kind, err) => {
540                writeln!(buf, "{kind}:ERROR {}", one_line(err))?;
541            }
542        }
543    }
544    Ok(())
545}
546
547/// Collapse embedded newlines and carriage returns in a displayable
548/// value to single spaces. The audit plain and summary formats promise
549/// one line per outcome; a `Source` impl that produced a multi-line
550/// `Error::Platform { reason }` would otherwise silently break any
551/// script that parses `audit` stdout line-by-line. No-op for
552/// single-line errors — the returned `String` is byte-identical.
553fn one_line(err: &impl fmt::Display) -> String {
554    err.to_string().replace(['\n', '\r'], " ")
555}
556
557fn run_sources(json: bool) -> Result<(), CliError> {
558    let ids = available_source_ids();
559    let mut buf = Vec::new();
560    if json {
561        let entries: Vec<SourceEntry> = ids
562            .iter()
563            .map(|id| SourceEntry {
564                id,
565                description: describe_id(id),
566            })
567            .collect();
568        serde_json::to_writer_pretty(&mut buf, &entries).map_err(runtime_err)?;
569        buf.push(b'\n');
570    } else {
571        // Source identifiers are ASCII; char count == byte count. Use
572        // `chars().count()` anyway so a future non-ASCII label doesn't
573        // silently desync the padding width.
574        let width = ids
575            .iter()
576            .map(|id| id.chars().count())
577            .max()
578            .unwrap_or_default();
579        for id in &ids {
580            writeln!(buf, "{id:<width$}  {}", describe_id(id), width = width)
581                .map_err(runtime_err)?;
582        }
583    }
584    write_and_flush(&buf).map_err(runtime_err)
585}
586
587fn describe_id(id: &str) -> &'static str {
588    SourceKind::from_id(id).map_or("", SourceKind::describe)
589}
590
591#[derive(Serialize)]
592struct SourceEntry {
593    id: &'static str,
594    description: &'static str,
595}
596
597fn print_host_id(id: &HostId, format: Format, wrap: WrapArg) -> Result<()> {
598    let mut buf = Vec::new();
599    match format {
600        Format::Plain => writeln!(buf, "{id}")?,
601        Format::Summary => writeln!(buf, "{}", id.summary())?,
602        Format::Json => {
603            let out = HostIdReport {
604                wrap,
605                host_id: HostIdJson {
606                    uuid: id.as_uuid().to_string(),
607                    source: id.source().as_str(),
608                    in_container: id.in_container(),
609                },
610            };
611            serde_json::to_writer_pretty(&mut buf, &out)?;
612            buf.push(b'\n');
613        }
614    }
615    write_and_flush(&buf)?;
616    Ok(())
617}
618
619#[derive(Serialize)]
620struct HostIdReport {
621    wrap: WrapArg,
622    host_id: HostIdJson,
623}
624
625#[derive(Serialize)]
626struct HostIdJson {
627    uuid: String,
628    source: &'static str,
629    in_container: bool,
630}
631
632#[derive(Serialize)]
633struct AuditReport {
634    wrap: WrapArg,
635    entries: Vec<AuditEntry>,
636}
637
638#[derive(Serialize, Clone, Copy)]
639#[serde(rename_all = "lowercase")]
640enum AuditStatus {
641    Found,
642    Skipped,
643    Errored,
644}
645
646#[derive(Serialize)]
647struct AuditEntry {
648    source: &'static str,
649    status: AuditStatus,
650    uuid: Option<String>,
651    error: Option<String>,
652    in_container: Option<bool>,
653}
654
655impl From<&ResolveOutcome> for AuditEntry {
656    fn from(o: &ResolveOutcome) -> Self {
657        let source = o.source().as_str();
658        match o {
659            ResolveOutcome::Found(id) => Self {
660                source,
661                status: AuditStatus::Found,
662                uuid: Some(id.as_uuid().to_string()),
663                error: None,
664                in_container: Some(id.in_container()),
665            },
666            ResolveOutcome::Skipped(_) => Self {
667                source,
668                status: AuditStatus::Skipped,
669                uuid: None,
670                error: None,
671                in_container: None,
672            },
673            ResolveOutcome::Errored(_, err) => Self {
674                source,
675                status: AuditStatus::Errored,
676                uuid: None,
677                error: Some(err.to_string()),
678                in_container: None,
679            },
680        }
681    }
682}
683
684fn available_source_ids() -> Vec<&'static str> {
685    let mut ids = vec![
686        source_ids::ENV_OVERRIDE,
687        source_ids::FILE_OVERRIDE,
688        source_ids::MACHINE_ID,
689        source_ids::DBUS_MACHINE_ID,
690        source_ids::DMI,
691        source_ids::LINUX_HOSTID,
692        source_ids::IO_PLATFORM_UUID,
693        source_ids::WINDOWS_MACHINE_GUID,
694        source_ids::FREEBSD_HOSTID,
695        source_ids::KENV_SMBIOS,
696        source_ids::BSD_KERN_HOSTID,
697        source_ids::ILLUMOS_HOSTID,
698    ];
699    #[cfg(feature = "container")]
700    {
701        ids.push(source_ids::CONTAINER);
702        ids.push(source_ids::LXC);
703    }
704    #[cfg(feature = "network")]
705    {
706        ids.extend_from_slice(&[
707            source_ids::AWS_IMDS,
708            source_ids::GCP_METADATA,
709            source_ids::AZURE_IMDS,
710            source_ids::DIGITAL_OCEAN_METADATA,
711            source_ids::HETZNER_METADATA,
712            source_ids::OCI_METADATA,
713            source_ids::OPENSTACK_METADATA,
714            source_ids::KUBERNETES_POD_UID,
715            source_ids::KUBERNETES_SERVICE_ACCOUNT,
716            source_ids::KUBERNETES_DOWNWARD_API,
717        ]);
718    }
719    ids.sort_unstable();
720    ids
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726
727    #[test]
728    fn wrap_arg_maps_every_variant_to_library_wrap() {
729        assert!(matches!(Wrap::from(WrapArg::V5), Wrap::UuidV5Namespaced));
730        assert!(matches!(Wrap::from(WrapArg::V3), Wrap::UuidV3Nil));
731        assert!(matches!(
732            Wrap::from(WrapArg::Passthrough),
733            Wrap::Passthrough
734        ));
735    }
736
737    #[test]
738    fn available_source_ids_is_sorted_and_deduplicated() {
739        let ids = available_source_ids();
740        assert!(
741            ids.windows(2).all(|w| w[0] < w[1]),
742            "ids must be strictly sorted"
743        );
744        assert!(ids.contains(&source_ids::MACHINE_ID));
745        assert!(ids.contains(&source_ids::DMI));
746    }
747
748    #[test]
749    #[cfg(feature = "container")]
750    fn available_source_ids_includes_container_when_feature_enabled() {
751        assert!(available_source_ids().contains(&source_ids::CONTAINER));
752        assert!(available_source_ids().contains(&source_ids::LXC));
753    }
754
755    #[test]
756    fn build_resolver_defaults_when_no_flags_given() {
757        let args = ResolveArgs::default();
758        let resolver = build_resolver(&args).expect("defaults build");
759        assert!(
760            resolver
761                .source_kinds()
762                .contains(&host_identity::SourceKind::EnvOverride),
763            "default chain must include env-override",
764        );
765    }
766
767    #[test]
768    fn build_resolver_uses_ids_chain_when_sources_set() {
769        let args = ResolveArgs {
770            sources: vec!["env-override".into(), "machine-id".into()],
771            ..Default::default()
772        };
773        let resolver = build_resolver(&args).expect("ids build");
774        let kinds = resolver.source_kinds();
775        assert_eq!(kinds.len(), 2);
776        assert_eq!(kinds[0], host_identity::SourceKind::EnvOverride);
777        assert_eq!(kinds[1], host_identity::SourceKind::MachineId);
778    }
779
780    #[test]
781    fn build_resolver_rejects_unknown_source_id() {
782        let args = ResolveArgs {
783            sources: vec!["definitely-not-a-source".into()],
784            ..Default::default()
785        };
786        let err = build_resolver(&args).expect_err("unknown id must fail");
787        assert!(
788            err.into_inner()
789                .to_string()
790                .contains("unknown source identifier")
791        );
792    }
793
794    #[test]
795    #[cfg(feature = "network")]
796    fn build_resolver_network_defaults_includes_cloud_sources() {
797        let args = ResolveArgs {
798            network: true,
799            ..Default::default()
800        };
801        let resolver = build_resolver(&args).expect("network defaults build");
802        assert!(
803            resolver
804                .source_kinds()
805                .contains(&host_identity::SourceKind::AwsImds),
806            "--network should add cloud sources to the default chain",
807        );
808    }
809
810    #[test]
811    #[cfg(feature = "network")]
812    fn build_resolver_network_plus_ids_resolves_cloud_identifiers() {
813        let args = ResolveArgs {
814            sources: vec!["aws-imds".into()],
815            network: true,
816            ..Default::default()
817        };
818        let resolver = build_resolver(&args).expect("network + ids build");
819        assert_eq!(
820            resolver.source_kinds(),
821            vec![host_identity::SourceKind::AwsImds]
822        );
823    }
824
825    #[test]
826    #[cfg(not(feature = "network"))]
827    fn build_resolver_network_without_feature_errors() {
828        let args = ResolveArgs {
829            network: true,
830            ..Default::default()
831        };
832        let err = build_resolver(&args).expect_err("--network must fail without feature");
833        assert!(err.into_inner().to_string().contains("`network` feature"));
834    }
835
836    #[test]
837    fn build_resolver_rejects_network_timeout_without_network() {
838        let args = ResolveArgs {
839            network_timeout_ms: Some(500),
840            ..Default::default()
841        };
842        let err = build_resolver(&args).expect_err("must reject timeout without --network");
843        assert!(
844            err.into_inner()
845                .to_string()
846                .contains("requires `--network`")
847        );
848    }
849
850    #[test]
851    fn map_unknown_formats_each_variant_distinctly() {
852        let cases = [
853            (
854                UnknownSourceError::Unknown("weird".to_owned()),
855                "unknown source identifier",
856            ),
857            (
858                UnknownSourceError::RequiresPath("file-override"),
859                "caller-supplied path",
860            ),
861            (
862                UnknownSourceError::RequiresTransport("aws-imds"),
863                "pass `--network`",
864            ),
865            (
866                UnknownSourceError::FeatureDisabled("aws-imds", "aws"),
867                "isn't enabled in this build",
868            ),
869        ];
870        for (err, expected_fragment) in cases {
871            let msg = map_unknown(err).to_string();
872            assert!(
873                msg.contains(expected_fragment),
874                "message {msg:?} missing fragment {expected_fragment:?}",
875            );
876        }
877    }
878
879    #[test]
880    fn file_override_from_env_value_handles_absent_empty_and_set() {
881        assert!(file_override_from_env_value(None).is_none());
882        assert!(file_override_from_env_value(Some(OsStr::new(""))).is_none());
883        let fo = file_override_from_env_value(Some(OsStr::new("/tmp/host-id")))
884            .expect("non-empty value must yield a FileOverride");
885        assert_eq!(fo.path(), std::path::Path::new("/tmp/host-id"));
886    }
887
888    #[test]
889    fn host_id_json_schema_is_stable() {
890        // Pins the `--format json` schema for `host-identity resolve`. Any field
891        // rename or case change breaks downstream script parsers; this
892        // snapshot catches that at test time.
893        let sample = HostIdReport {
894            wrap: WrapArg::V5,
895            host_id: HostIdJson {
896                uuid: "11111111-2222-3333-4444-555555555555".to_owned(),
897                source: "machine-id",
898                in_container: false,
899            },
900        };
901        let json = serde_json::to_value(&sample).unwrap();
902        let obj = json.as_object().unwrap();
903        assert_eq!(obj.len(), 2);
904        assert_eq!(obj["wrap"], "v5");
905        let inner = obj["host_id"].as_object().unwrap();
906        assert_eq!(inner.len(), 3);
907        assert_eq!(inner["uuid"], "11111111-2222-3333-4444-555555555555");
908        assert_eq!(inner["source"], "machine-id");
909        assert_eq!(inner["in_container"], false);
910    }
911
912    #[test]
913    fn wrap_arg_serializes_to_lowercase_flag_string() {
914        // The `wrap` field in JSON output must match the CLI flag values
915        // verbatim so saved output round-trips back through `--wrap`.
916        for (variant, expected) in [
917            (WrapArg::V5, "v5"),
918            (WrapArg::V3, "v3"),
919            (WrapArg::Passthrough, "passthrough"),
920        ] {
921            assert_eq!(serde_json::to_value(variant).unwrap(), expected);
922        }
923    }
924
925    #[test]
926    fn audit_entry_schema_is_stable_for_every_status() {
927        let outcomes = mixed_outcomes();
928        let report = AuditReport {
929            wrap: WrapArg::V5,
930            entries: outcomes.iter().map(AuditEntry::from).collect(),
931        };
932        let json = serde_json::to_value(&report).unwrap();
933        let envelope = json.as_object().unwrap();
934        assert_eq!(envelope.len(), 2);
935        assert_eq!(envelope["wrap"], "v5");
936        let arr = envelope["entries"].as_array().unwrap();
937        assert_eq!(arr.len(), 3);
938        assert_eq!(arr[0]["status"], "found");
939        assert!(arr[0]["uuid"].is_string());
940        assert_eq!(arr[0]["error"], serde_json::Value::Null);
941        assert_eq!(arr[1]["status"], "errored");
942        assert!(arr[1]["error"].as_str().unwrap().contains("synthetic"));
943        assert_eq!(arr[1]["uuid"], serde_json::Value::Null);
944        assert_eq!(arr[2]["status"], "skipped");
945        // Every entry shares the same key set.
946        for entry in arr {
947            let keys: Vec<_> = entry.as_object().unwrap().keys().collect();
948            assert_eq!(keys.len(), 5);
949        }
950    }
951
952    #[test]
953    #[cfg(feature = "network")]
954    fn available_source_ids_includes_every_cloud_and_k8s_source() {
955        let ids = available_source_ids();
956        for id in [
957            source_ids::AWS_IMDS,
958            source_ids::GCP_METADATA,
959            source_ids::AZURE_IMDS,
960            source_ids::DIGITAL_OCEAN_METADATA,
961            source_ids::HETZNER_METADATA,
962            source_ids::OCI_METADATA,
963            source_ids::OPENSTACK_METADATA,
964            source_ids::KUBERNETES_POD_UID,
965            source_ids::KUBERNETES_SERVICE_ACCOUNT,
966            source_ids::KUBERNETES_DOWNWARD_API,
967        ] {
968            assert!(ids.contains(&id), "missing {id}");
969        }
970    }
971
972    #[test]
973    fn build_resolver_with_app_id_wraps_every_source() {
974        let args = ResolveArgs {
975            sources: vec!["env-override".into(), "machine-id".into()],
976            app_id: Some("com.example.a".into()),
977            ..Default::default()
978        };
979        let resolver = build_resolver(&args).expect("app-id build");
980        let kinds = resolver.source_kinds();
981        assert_eq!(kinds.len(), 2);
982        for kind in &kinds {
983            let label = kind.as_str();
984            assert!(
985                label.starts_with("app-specific:"),
986                "expected wrapped label, got {label:?}",
987            );
988        }
989    }
990
991    #[test]
992    fn build_resolver_with_empty_app_id_errors_usage() {
993        let args = ResolveArgs {
994            app_id: Some(String::new()),
995            ..Default::default()
996        };
997        let err = build_resolver(&args).expect_err("empty app-id must fail");
998        assert!(matches!(err, CliError::Usage(_)));
999        assert!(err.into_inner().to_string().contains("must not be empty"));
1000    }
1001
1002    #[test]
1003    fn validate_resolve_args_rejects_timeout_without_network() {
1004        let args = ResolveArgs {
1005            network_timeout_ms: Some(500),
1006            network: false,
1007            ..Default::default()
1008        };
1009        let err = validate_resolve_args(&args).expect_err("timeout without network must fail");
1010        assert!(matches!(err, CliError::Usage(_)));
1011        assert!(
1012            err.into_inner()
1013                .to_string()
1014                .contains("`--network-timeout-ms` requires `--network`")
1015        );
1016    }
1017
1018    #[test]
1019    fn validate_resolve_args_accepts_timeout_with_network() {
1020        let args = ResolveArgs {
1021            network_timeout_ms: Some(500),
1022            network: true,
1023            ..Default::default()
1024        };
1025        validate_resolve_args(&args).expect("timeout with network must validate");
1026    }
1027
1028    #[test]
1029    fn validate_resolve_args_accepts_default() {
1030        validate_resolve_args(&ResolveArgs::default()).expect("default args must validate");
1031    }
1032
1033    #[test]
1034    fn validate_resolve_args_rejects_empty_source_identifier_in_every_position() {
1035        // Regression for #21. A stray comma anywhere in `--sources`
1036        // produces an empty token; previously this surfaced downstream
1037        // as `unknown source identifier: ``` with empty backticks.
1038        let cases: &[&[&str]] = &[
1039            &[""],                      // `--sources ""`
1040            &["", "machine-id"],        // `--sources ,machine-id`
1041            &["machine-id", ""],        // `--sources machine-id,`
1042            &["machine-id", "", "dmi"], // `--sources machine-id,,dmi`
1043            &["", ""],                  // `--sources ,`
1044        ];
1045        for ids in cases {
1046            let args = ResolveArgs {
1047                sources: ids.iter().map(|&s| s.to_string()).collect(),
1048                ..Default::default()
1049            };
1050            let Err(CliError::Usage(err)) = validate_resolve_args(&args) else {
1051                panic!("empty id {ids:?} must fail as a usage error");
1052            };
1053            let msg = err.to_string();
1054            assert!(
1055                msg.contains("`--sources`") && msg.contains("empty identifier"),
1056                "error should name the flag and describe the problem for {ids:?}: {msg}",
1057            );
1058        }
1059    }
1060
1061    #[test]
1062    fn clap_parser_emits_empty_token_that_validation_catches() {
1063        // Guard against a future refactor of the `#[arg(... value_delimiter = ',')]`
1064        // attribute silently changing parse behaviour so empty tokens no
1065        // longer reach `validate_resolve_args`. If that ever happens this
1066        // test fails loudly instead of the validator becoming dead code.
1067        let cli = Cli::try_parse_from(["host-identity", "resolve", "--sources", "machine-id,,dmi"])
1068            .expect("clap must parse a doubled-comma source list");
1069        let Some(Command::Resolve(resolve)) = cli.command else {
1070            panic!("expected Resolve subcommand");
1071        };
1072        assert_eq!(
1073            resolve.sources,
1074            vec!["machine-id".to_owned(), String::new(), "dmi".to_owned()],
1075        );
1076        let Err(CliError::Usage(_)) = validate_resolve_args(&resolve) else {
1077            panic!("empty id must fail as a usage error");
1078        };
1079    }
1080
1081    #[test]
1082    fn validate_resolve_args_accepts_non_empty_app_id() {
1083        let args = ResolveArgs {
1084            app_id: Some("com.example.telemetry".into()),
1085            ..Default::default()
1086        };
1087        validate_resolve_args(&args).expect("non-empty app-id must validate");
1088    }
1089
1090    #[test]
1091    fn apply_app_specific_none_is_identity() {
1092        let resolver = Resolver::new()
1093            .push(host_identity::sources::EnvOverride::new("A"))
1094            .push(host_identity::sources::EnvOverride::new("B"))
1095            .with_wrap(Wrap::Passthrough);
1096        let before = resolver.source_kinds();
1097        let after = apply_app_specific(resolver, None, Wrap::Passthrough).source_kinds();
1098        assert_eq!(before, after);
1099        for kind in after {
1100            assert!(
1101                !kind.as_str().starts_with("app-specific:"),
1102                "None app-id must not wrap; got {kind:?}",
1103            );
1104        }
1105    }
1106
1107    /// Shared fixture for the audit render tests: a three-source chain
1108    /// that yields exactly one `Found`, one `Errored`, and one
1109    /// `Skipped` outcome, in that order.
1110    fn mixed_outcomes() -> Vec<ResolveOutcome> {
1111        use host_identity::sources::FnSource;
1112        let found_src = FnSource::new(SourceKind::custom("ok"), || Ok(Some("raw".into())));
1113        let err_src = FnSource::new(SourceKind::custom("bad"), || {
1114            Err(host_identity::Error::Platform {
1115                source_kind: SourceKind::custom("bad"),
1116                reason: "synthetic".into(),
1117            })
1118        });
1119        let skip_src = FnSource::new(SourceKind::custom("skip"), || Ok(None));
1120        Resolver::new()
1121            .push(found_src)
1122            .push(err_src)
1123            .push(skip_src)
1124            .resolve_all()
1125    }
1126
1127    #[test]
1128    fn render_audit_plain_formats_mixed_outcomes() {
1129        let outcomes = mixed_outcomes();
1130        let mut buf = Vec::new();
1131        render_audit_plain(&mut buf, &outcomes).expect("render");
1132        let text = String::from_utf8(buf).expect("utf-8");
1133        let lines: Vec<&str> = text.lines().collect();
1134        assert_eq!(lines.len(), 3);
1135        let arrow = lines[0].find(" -> ").expect("first line has arrow");
1136        for line in &lines {
1137            assert_eq!(
1138                line.find(" -> "),
1139                Some(arrow),
1140                "kind column should align across lines: {line:?}",
1141            );
1142        }
1143        assert!(lines[0].starts_with(" 0. ok "), "got: {:?}", lines[0]);
1144        assert!(lines[1].starts_with(" 1. bad "), "got: {:?}", lines[1]);
1145        assert!(lines[1].contains(" -> ERROR "));
1146        assert!(lines[1].contains("synthetic"));
1147        assert!(lines[2].starts_with(" 2. skip"), "got: {:?}", lines[2]);
1148        assert!(lines[2].ends_with(" -> (skipped)"), "got: {:?}", lines[2]);
1149    }
1150
1151    #[test]
1152    fn render_audit_summary_produces_one_compact_line_per_outcome() {
1153        let outcomes = mixed_outcomes();
1154        let mut buf = Vec::new();
1155        render_audit_summary(&mut buf, &outcomes).expect("render");
1156        let text = String::from_utf8(buf).expect("utf-8");
1157        let lines: Vec<&str> = text.lines().collect();
1158        assert_eq!(lines.len(), 3);
1159        assert!(
1160            lines[0].starts_with("ok:"),
1161            "found line should lead with source:uuid, got: {:?}",
1162            lines[0]
1163        );
1164        let uuid_tail = lines[0].strip_prefix("ok:").expect("ok: prefix");
1165        assert_eq!(uuid_tail.len(), 36, "uuid tail: {uuid_tail:?}");
1166        // `Error::Platform` renders as `{source_kind}: {reason}`, so the
1167        // source label appears twice — once as the line's leading column
1168        // and once inside the error text. Matches `render_audit_plain`'s
1169        // `ERROR {err}` tail.
1170        assert_eq!(lines[1], "bad:ERROR bad: synthetic");
1171        assert_eq!(lines[2], "skip:skipped");
1172    }
1173
1174    #[test]
1175    fn render_audit_summary_differs_from_plain() {
1176        let outcomes = mixed_outcomes();
1177        let mut plain = Vec::new();
1178        let mut summary = Vec::new();
1179        render_audit_plain(&mut plain, &outcomes).expect("plain");
1180        render_audit_summary(&mut summary, &outcomes).expect("summary");
1181        assert_ne!(
1182            plain, summary,
1183            "audit plain and summary must not collapse to identical output",
1184        );
1185    }
1186
1187    #[test]
1188    fn render_audit_summary_wraps_app_specific_label_into_three_colons() {
1189        // `AppSpecific` renders as `app-specific:<inner>`, so a Found
1190        // outcome under `--app-id` yields `app-specific:<inner>:<uuid>`
1191        // — three colons. The docstring tells callers to
1192        // `rsplit_once(':')` to recover the uuid; this test pins that
1193        // the output actually has that shape so the guidance stays
1194        // accurate.
1195        use host_identity::sources::{AppSpecific, FnSource};
1196        let inner = FnSource::new(SourceKind::custom("machine-id"), || {
1197            Ok(Some("11111111-2222-3333-4444-555555555555".into()))
1198        });
1199        let wrapped = AppSpecific::new(inner, b"com.example");
1200        let outcomes = Resolver::new().push(wrapped).resolve_all();
1201        let mut buf = Vec::new();
1202        render_audit_summary(&mut buf, &outcomes).expect("render");
1203        let line = String::from_utf8(buf)
1204            .expect("utf-8")
1205            .trim_end_matches('\n')
1206            .to_owned();
1207        assert!(
1208            line.starts_with("app-specific:machine-id:"),
1209            "expected three-colon shape, got: {line:?}",
1210        );
1211        let (label, uuid) = line
1212            .rsplit_once(':')
1213            .expect("rsplit_once must split a uuid tail off the label");
1214        assert_eq!(label, "app-specific:machine-id");
1215        assert_eq!(uuid.len(), 36, "uuid tail should be 36 chars: {uuid:?}");
1216    }
1217
1218    #[test]
1219    fn render_audit_summary_flattens_multi_line_errors_to_one_line() {
1220        // Regression for test-gap #T2: a `Source` impl that returned an
1221        // error with embedded newlines would otherwise break the
1222        // "one line per outcome" contract. `one_line` replaces \n/\r
1223        // with spaces so downstream line-based parsers stay correct.
1224        use host_identity::sources::FnSource;
1225        let src = FnSource::new(SourceKind::custom("bad"), || {
1226            Err(host_identity::Error::Platform {
1227                source_kind: SourceKind::custom("bad"),
1228                reason: "first\nsecond\r\nthird".to_owned(),
1229            })
1230        });
1231        let outcomes = Resolver::new().push(src).resolve_all();
1232        let mut summary = Vec::new();
1233        render_audit_summary(&mut summary, &outcomes).expect("summary");
1234        let mut plain = Vec::new();
1235        render_audit_plain(&mut plain, &outcomes).expect("plain");
1236        let summary_text = String::from_utf8(summary).expect("utf-8");
1237        let plain_text = String::from_utf8(plain).expect("utf-8");
1238        // Exactly one trailing newline per outcome — no interior ones.
1239        assert_eq!(
1240            summary_text.matches('\n').count(),
1241            1,
1242            "summary: {summary_text:?}"
1243        );
1244        assert_eq!(plain_text.matches('\n').count(), 1, "plain: {plain_text:?}");
1245        assert!(summary_text.contains("first second  third"));
1246        assert!(plain_text.contains("first second  third"));
1247    }
1248}
host_identity_cli/lib.rs

host_identity_cli/
lib.rs