Skip to main content

rustinel_core/
safety.rs

1//! Hardening primitives that make rustinel safe to run against *fully
2//! untrusted* inputs (lockfiles, manifests, source trees, advisory databases,
3//! registry caches).
4//!
5//! rustinel is a supply-chain tool, so it must never become a supply-chain
6//! attack vector itself. Every value that originates from analyzed data is
7//! treated as hostile:
8//!
9//! - **No code execution.** The core never runs `build.rs`, never compiles, and
10//!   never spawns processes. (The CLI's `advisory update` shells out to `git`
11//!   with a fixed argument vector and no shell interpolation.)
12//! - **No attacker-controlled network.** The optional metadata lookup (in the
13//!   CLI) fetches the crates.io sparse index over HTTPS with a *fixed* host and a
14//!   validated crate-name path; no request target is ever derived from analyzed
15//!   data, which removes SSRF as a class of bug.
16//! - **Bounded I/O.** Every file read is size-capped; directory walks are depth-
17//!   and entry-bounded; symlinks are never followed during traversal.
18//! - **Validated identifiers.** Crate names/versions are validated before they
19//!   are ever used to build a filesystem path or an index lookup, blocking path
20//!   traversal and separator injection.
21
22use std::fs::File;
23use std::io::Read;
24use std::path::{Component, Path};
25
26/// Maximum bytes read from a single source/manifest file.
27pub const MAX_SOURCE_FILE_BYTES: u64 = 8 * 1024 * 1024;
28/// Maximum bytes read from a single advisory document.
29pub const MAX_ADVISORY_FILE_BYTES: u64 = 1024 * 1024;
30/// Maximum directory recursion depth for any walk.
31pub const MAX_DIR_DEPTH: usize = 32;
32/// Maximum number of filesystem entries visited in a single walk.
33pub const MAX_DIR_ENTRIES: usize = 200_000;
34/// Maximum length accepted for a crate name or version token.
35pub const MAX_NAME_LEN: usize = 64;
36pub const MAX_VERSION_LEN: usize = 64;
37
38/// Validate a Cargo crate name for safe use in filesystem paths and index
39/// lookups. Conservative allowlist: ASCII alphanumerics plus `-` and `_`.
40///
41/// This rejects path separators, `..`, NUL, whitespace, URL metacharacters and
42/// any non-ASCII — i.e. everything an attacker would need for traversal or
43/// request smuggling.
44pub fn is_safe_crate_name(name: &str) -> bool {
45    !name.is_empty()
46        && name.len() <= MAX_NAME_LEN
47        && name
48            .bytes()
49            .all(|b| b.is_ascii_alphanumeric() || b == b'-' || b == b'_')
50}
51
52/// Validate a version token for safe use in a filesystem path. Allows the
53/// semver character set (alnum, `.`, `+`, `-`, `_`) and nothing else.
54pub fn is_safe_version(version: &str) -> bool {
55    !version.is_empty()
56        && version.len() <= MAX_VERSION_LEN
57        && version
58            .bytes()
59            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'.' | b'+' | b'-' | b'_'))
60        // Defense in depth: reject anything that could be a parent reference.
61        && version != ".."
62        && !version.contains("..")
63}
64
65/// A single path segment that is safe to join onto a trusted base directory:
66/// non-empty, no separators, not a `.`/`..` component.
67pub fn is_safe_path_segment(segment: &str) -> bool {
68    if segment.is_empty() || segment.len() > 255 {
69        return false;
70    }
71    if segment.contains('/') || segment.contains('\\') || segment.contains('\0') {
72        return false;
73    }
74    !matches!(segment, "." | "..")
75}
76
77/// True if `child`, once resolved, is contained within `base`. Both are
78/// canonicalized; if either cannot be canonicalized the check fails closed.
79pub fn is_contained_within(base: &Path, child: &Path) -> bool {
80    match (base.canonicalize(), child.canonicalize()) {
81        (Ok(b), Ok(c)) => c.starts_with(&b),
82        _ => false,
83    }
84}
85
86/// A path is "lexically clean" if it contains no `..` components (used as a
87/// cheap pre-check before any join).
88pub fn has_no_parent_components(path: &Path) -> bool {
89    !path.components().any(|c| matches!(c, Component::ParentDir))
90}
91
92/// Read a regular file, refusing anything larger than `max_bytes`, anything that
93/// is not a regular file, and reading at most `max_bytes` even if the file grows
94/// underneath us. Returns `None` (never an error) so callers degrade gracefully.
95///
96/// Callers should additionally skip symlinks during directory traversal; this
97/// function guards the read itself via an fstat on the open handle plus a capped
98/// reader.
99pub fn read_file_capped(path: &Path, max_bytes: u64) -> Option<String> {
100    let file = File::open(path).ok()?;
101    let meta = file.metadata().ok()?;
102    if !meta.is_file() {
103        return None;
104    }
105    if meta.len() > max_bytes {
106        return None;
107    }
108    let mut bytes = Vec::new();
109    // `take` bounds the read regardless of fstat (defense against TOCTOU growth).
110    file.take(max_bytes).read_to_end(&mut bytes).ok()?;
111    // Decode lossily: the scanners do ASCII substring matching, so a single
112    // non-UTF-8 byte must not drop a whole source file from analysis (that would
113    // be a trivial evasion gap). TOML callers still fail to parse malformed input.
114    Some(String::from_utf8_lossy(&bytes).into_owned())
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn crate_name_allows_normal() {
123        assert!(is_safe_crate_name("serde"));
124        assert!(is_safe_crate_name("openssl-sys"));
125        assert!(is_safe_crate_name("wasm_bindgen"));
126        assert!(is_safe_crate_name("a1"));
127    }
128
129    #[test]
130    fn crate_name_rejects_traversal_and_injection() {
131        assert!(!is_safe_crate_name(""));
132        assert!(!is_safe_crate_name(".."));
133        assert!(!is_safe_crate_name("../etc"));
134        assert!(!is_safe_crate_name("foo/bar"));
135        assert!(!is_safe_crate_name("foo\\bar"));
136        assert!(!is_safe_crate_name("foo bar"));
137        assert!(!is_safe_crate_name("foo\0"));
138        assert!(!is_safe_crate_name("a/../../b"));
139        assert!(!is_safe_crate_name("café")); // non-ASCII
140        assert!(!is_safe_crate_name(&"a".repeat(65)));
141        // URL/host-confusion attempts
142        assert!(!is_safe_crate_name("evil.com"));
143        assert!(!is_safe_crate_name("crate@host"));
144    }
145
146    #[test]
147    fn version_validation() {
148        assert!(is_safe_version("1.0.0"));
149        assert!(is_safe_version("0.9.99"));
150        assert!(is_safe_version("1.0.0+spec-1.1.0"));
151        assert!(!is_safe_version("../1.0.0"));
152        assert!(!is_safe_version("1.0.0/.."));
153        assert!(!is_safe_version(".."));
154        assert!(!is_safe_version("1 0"));
155        assert!(!is_safe_version(""));
156    }
157
158    #[test]
159    fn path_segment_validation() {
160        assert!(is_safe_path_segment("serde-1.0.0"));
161        assert!(!is_safe_path_segment(".."));
162        assert!(!is_safe_path_segment("a/b"));
163        assert!(!is_safe_path_segment(""));
164    }
165
166    #[test]
167    fn containment_blocks_escape() {
168        let base = std::env::temp_dir();
169        assert!(is_contained_within(&base, &base));
170        // A sibling/parent path must not be considered contained.
171        assert!(!is_contained_within(&base, std::path::Path::new("/")));
172    }
173
174    #[test]
175    fn no_parent_components_detects_dotdot() {
176        assert!(has_no_parent_components(Path::new("a/b/c")));
177        assert!(!has_no_parent_components(Path::new("a/../b")));
178    }
179
180    #[test]
181    fn read_cap_rejects_oversize() {
182        let dir = std::env::temp_dir();
183        let path = dir.join("rustinel_safety_big.txt");
184        std::fs::write(&path, vec![b'a'; 1024]).unwrap();
185        assert!(read_file_capped(&path, 4096).is_some());
186        assert!(read_file_capped(&path, 512).is_none());
187        let _ = std::fs::remove_file(&path);
188    }
189
190    #[test]
191    fn read_cap_decodes_non_utf8_lossily() {
192        // A single non-UTF-8 byte must NOT drop the whole file (an evasion gap) —
193        // the ASCII fingerprints the scanners look for must still survive.
194        let dir = std::env::temp_dir();
195        let path = dir.join("rustinel_safety_nonutf8.rs");
196        let mut bytes = b"fn x(){ reqwest::get(\"https://x.workers.dev\");".to_vec();
197        bytes.push(0xFF);
198        bytes.extend_from_slice(b" }");
199        std::fs::write(&path, &bytes).unwrap();
200        let got = read_file_capped(&path, 4096).expect("file must not be dropped");
201        assert!(got.contains(".workers.dev"));
202        let _ = std::fs::remove_file(&path);
203    }
204}