Skip to main content

host_identity/sources/
container.rs

1//! Container runtime identity.
2//!
3//! Extracts the container ID from `/proc/self/mountinfo`. Matches agent-go's
4//! patterns so the extracted ID is wire-compatible with existing telemetry
5//! pipelines:
6//!
7//! - `/docker/<64-hex>` (Docker)
8//! - `:<64-hex>` (Kubernetes CRI via containerd)
9//! - `/system.slice/crio-<64-hex>.scope` (CRI-O systemd scope units)
10//! - `containers/<64-hex>` (Podman / CRI-O)
11//! - `sandboxes/<64-hex>` (containerd)
12//!
13//! Authoritative references:
14//!
15//! - [OCI Runtime Specification](https://github.com/opencontainers/runtime-spec/blob/main/spec.md)
16//!   — defines the container ID as an opaque but unique handle assigned by
17//!   the runtime; the 64-char lowercase hex shape is a runtime-level
18//!   convention (Docker / containerd / CRI-O) rather than a spec
19//!   requirement.
20//! - Linux [`proc_pid_mountinfo(5)`](https://man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html)
21//!   — documents the per-process mountinfo format this source parses.
22//! - [`cgroups(7)`](https://man7.org/linux/man-pages/man7/cgroups.7.html)
23//!   — documents the cgroup hierarchy names that container runtimes embed
24//!   in their mount paths (`/docker/...`, `kubepods-...`, `crio-...`).
25//!
26//! # Identity scope
27//!
28//! `ContainerId` is **per-container** scope. A process running on a
29//! bare host (no container runtime) sees no match in `mountinfo` and
30//! the source returns `Ok(None)` — the resolver falls through to the
31//! host-scope sources beneath it. A process running in a container
32//! returns the container's runtime-assigned ID, distinct from every
33//! sibling container on the same host. Placing `ContainerId` above
34//! the per-instance cloud sources and the per-host-OS sources is
35//! what prevents every container on one host from colliding onto the
36//! host's identity; the default chains do this for you. See
37//! `docs/algorithm.md` → "Identity scope".
38
39use std::io::{BufRead, BufReader, Read};
40use std::path::{Path, PathBuf};
41
42use crate::error::Error;
43use crate::source::{Probe, Source, SourceKind};
44
45const DEFAULT_MOUNTINFO_PATH: &str = "/proc/self/mountinfo";
46
47/// Upper bound on bytes read from mountinfo. Production systems with
48/// hundreds of mounts stay well under 1 MiB; capping at 2 MiB prevents
49/// an adversarial or corrupt procfs from exhausting memory through the
50/// internal line buffer of [`BufRead::lines`].
51const MAX_MOUNTINFO_BYTES: u64 = 2 * 1024 * 1024;
52
53/// Container ID extracted from a mountinfo file.
54#[derive(Debug, Clone)]
55pub struct ContainerId {
56    mountinfo_path: PathBuf,
57}
58
59impl ContainerId {
60    /// Read from the standard `/proc/self/mountinfo` path.
61    #[must_use]
62    pub fn new() -> Self {
63        Self {
64            mountinfo_path: PathBuf::from(DEFAULT_MOUNTINFO_PATH),
65        }
66    }
67
68    /// Read from a caller-supplied mountinfo path (useful for tests and
69    /// alternate procfs mount points).
70    #[must_use]
71    pub fn at(path: impl Into<PathBuf>) -> Self {
72        Self {
73            mountinfo_path: path.into(),
74        }
75    }
76}
77
78impl Default for ContainerId {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl Source for ContainerId {
85    fn kind(&self) -> SourceKind {
86        SourceKind::Container
87    }
88
89    fn probe(&self) -> Result<Option<Probe>, Error> {
90        Ok(extract_container_id(&self.mountinfo_path)
91            .map(|id| Probe::new(SourceKind::Container, id)))
92    }
93}
94
95fn extract_container_id(path: &Path) -> Option<String> {
96    let file = match std::fs::File::open(path) {
97        Ok(f) => f,
98        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return None,
99        Err(err) => {
100            log::debug!(
101                "host-identity: container: reading {}: {err}",
102                path.display()
103            );
104            return None;
105        }
106    };
107    BufReader::new(file.take(MAX_MOUNTINFO_BYTES))
108        .lines()
109        .map_while(Result::ok)
110        .find_map(|line| {
111            line.split_ascii_whitespace()
112                .find_map(container_id_from_word)
113        })
114}
115
116/// Runtime tokens that must appear somewhere in a mountinfo word for its
117/// 64-hex substring to be accepted as a container ID. Keeps incidental
118/// `/<64hex>/` paths (e.g. overlay `lowerdir=/var/lib/foo/<64hex>/data`
119/// from an unrelated tool) out of the match set.
120const RUNTIME_TOKENS: &[&str] = &[
121    "docker",
122    "kubepods",
123    "containerd",
124    "crio",
125    "containers",
126    "libpod",
127    "sandboxes",
128];
129
130fn word_has_runtime_token(word: &str) -> bool {
131    RUNTIME_TOKENS.iter().any(|tok| word.contains(tok))
132}
133
134fn container_id_from_word(word: &str) -> Option<String> {
135    if !word_has_runtime_token(word) {
136        return None;
137    }
138    let bytes = word.as_bytes();
139    bytes.windows(64).enumerate().find_map(|(start, run)| {
140        if !is_hex_run(run) || !matches_surrounding(&bytes[..start], &bytes[start + 64..]) {
141            return None;
142        }
143        let id = std::str::from_utf8(run).expect("ascii hex is valid utf-8");
144        Some(id.to_owned())
145    })
146}
147
148fn is_hex_run(bytes: &[u8]) -> bool {
149    bytes.iter().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
150}
151
152fn matches_surrounding(prefix: &[u8], suffix: &[u8]) -> bool {
153    // End-anchored: `/<hex>$` and `:<hex>$`.
154    if suffix.is_empty() && matches!(prefix.last(), Some(b'/' | b':')) {
155        return true;
156    }
157    // End-anchored: `/.+-<hex>.scope$` — a `/` must precede the trailing
158    // `-` with at least one character between them.
159    if suffix == b".scope" && prefix.last() == Some(&b'-') {
160        let before_dash = &prefix[..prefix.len() - 1];
161        if let Some(pos) = before_dash.iter().position(|&b| b == b'/') {
162            if pos + 1 < before_dash.len() {
163                return true;
164            }
165        }
166    }
167    // Un-anchored: `containers/<hex>` and `sandboxes/<hex>`.
168    prefix.ends_with(b"containers/") || prefix.ends_with(b"sandboxes/")
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn docker_pattern_matches() {
177        let hex = "a".repeat(64);
178        assert_eq!(container_id_from_word(&format!("/docker/{hex}")), Some(hex));
179    }
180
181    #[test]
182    fn rejects_short_hex() {
183        assert_eq!(container_id_from_word("/docker/abc"), None);
184    }
185
186    #[test]
187    fn scope_pattern_rejects_non_hex_tail() {
188        let tail = "z".repeat(64);
189        assert_eq!(container_id_from_word(&format!("/crio-{tail}.scope")), None);
190    }
191
192    #[test]
193    fn extract_container_id_reads_mountinfo_file() {
194        use std::io::Write;
195        let hex = "b".repeat(64);
196        let line = format!(
197            "1 2 0:0 / /host rw,relatime - overlay overlay rw,lowerdir=/var/lib/docker/containers/{hex}/hostname\n"
198        );
199        let mut f = tempfile::NamedTempFile::new().unwrap();
200        f.write_all(line.as_bytes()).unwrap();
201        assert_eq!(extract_container_id(f.path()), Some(hex));
202    }
203
204    #[test]
205    fn extract_container_id_empty_file_is_none() {
206        let f = tempfile::NamedTempFile::new().unwrap();
207        assert_eq!(extract_container_id(f.path()), None);
208    }
209
210    #[test]
211    fn colon_pattern_matches() {
212        let hex = "c".repeat(64);
213        assert_eq!(
214            container_id_from_word(&format!("docker://sha256:{hex}")),
215            Some(hex)
216        );
217    }
218
219    #[test]
220    fn scope_pattern_matches() {
221        let hex = "d".repeat(64);
222        assert_eq!(
223            container_id_from_word(&format!("/system.slice/crio-{hex}.scope")),
224            Some(hex)
225        );
226    }
227
228    #[test]
229    fn sandboxes_pattern_matches() {
230        let hex = "e".repeat(64);
231        assert_eq!(
232            container_id_from_word(&format!("/run/containerd/sandboxes/{hex}/rootfs")),
233            Some(hex)
234        );
235    }
236
237    #[test]
238    fn scope_pattern_requires_slash_before_dash() {
239        let hex = "f".repeat(64);
240        // No `/` anywhere before the trailing `-`.
241        assert_eq!(container_id_from_word(&format!("crio-{hex}.scope")), None);
242    }
243
244    #[test]
245    fn scope_pattern_requires_char_between_slash_and_dash() {
246        let hex = "0".repeat(64);
247        // `/` is immediately before `-` — `.+` in the original regex requires
248        // at least one char between them.
249        assert_eq!(container_id_from_word(&format!("/-{hex}.scope")), None);
250    }
251
252    #[test]
253    fn bare_hex_without_delimiter_is_rejected() {
254        let hex = "1".repeat(64);
255        assert_eq!(container_id_from_word(&hex), None);
256    }
257
258    #[test]
259    fn incidental_hex_path_without_runtime_token_is_rejected() {
260        // Overlay `lowerdir=` from an unrelated tool — the 64-hex run sits
261        // under `/<64hex>/` but the word carries no container-runtime token.
262        let hex = "2".repeat(64);
263        let word = format!("lowerdir=/var/lib/foo/{hex}/data");
264        assert_eq!(container_id_from_word(&word), None);
265    }
266}