Skip to main content

rustinel_core/
lockfile.rs

1use crate::errors::RustinelError;
2use serde::{Deserialize, Serialize};
3use std::collections::BTreeMap;
4use std::path::{Path, PathBuf};
5
6/// A fully-qualified package identity: `name@version` plus its source registry.
7#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
8pub struct PackageId {
9    pub name: String,
10    pub version: String,
11    pub source: Option<String>,
12}
13
14impl std::fmt::Display for PackageId {
15    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16        write!(f, "{}@{}", self.name, self.version)
17    }
18}
19
20/// Canonical source string for packages resolved from the default crates.io
21/// registry. Cargo normalises crates.io to this `registry+` form in `Cargo.lock`
22/// regardless of the git/sparse fetch protocol, so it is the value seen in
23/// practice. [`CRATES_IO_SPARSE`] is accepted as well for robustness.
24pub const CRATES_IO_REGISTRY: &str = "registry+https://github.com/rust-lang/crates.io-index";
25
26/// Sparse-index source string for crates.io. Not normally written to
27/// `Cargo.lock` (cargo uses [`CRATES_IO_REGISTRY`]), but accepted defensively.
28pub const CRATES_IO_SPARSE: &str = "sparse+https://index.crates.io/";
29
30impl PackageId {
31    /// A package with no `source` is a local/workspace crate, not a registry dep.
32    pub fn is_local(&self) -> bool {
33        self.source.is_none()
34    }
35
36    /// True only for packages sourced from the default crates.io registry.
37    ///
38    /// RustSec advisories are keyed to crates.io, so a git, path, or
39    /// alternate-registry crate that merely shares a name with an advised
40    /// crate must not be matched against it (matches cargo-audit behaviour).
41    pub fn is_crates_io(&self) -> bool {
42        matches!(
43            self.source.as_deref(),
44            Some(CRATES_IO_REGISTRY) | Some(CRATES_IO_SPARSE)
45        )
46    }
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct Package {
51    pub id: PackageId,
52    pub checksum: Option<String>,
53    pub dependencies: Vec<String>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct LockfileModel {
58    pub path: PathBuf,
59    pub version: Option<u32>,
60    pub packages: Vec<Package>,
61}
62
63impl LockfileModel {
64    /// Group packages by crate name (sorted). Used to detect duplicate versions.
65    pub fn by_name(&self) -> BTreeMap<&str, Vec<&Package>> {
66        let mut out: BTreeMap<&str, Vec<&Package>> = BTreeMap::new();
67        for package in &self.packages {
68            out.entry(&package.id.name).or_default().push(package);
69        }
70        out
71    }
72
73    /// Registry (non-local) packages only.
74    pub fn registry_packages(&self) -> impl Iterator<Item = &Package> {
75        self.packages.iter().filter(|p| !p.id.is_local())
76    }
77}
78
79/// Parse a `Cargo.lock` from disk.
80pub fn parse_lockfile(path: &Path) -> Result<LockfileModel, RustinelError> {
81    let content = std::fs::read_to_string(path).map_err(|e| RustinelError::io(path, e))?;
82    parse_lockfile_str(path.to_path_buf(), &content)
83}
84
85/// Parse a `Cargo.lock` via the `cargo-lock` crate, converting both its `Err`
86/// and any **panic** it raises on malformed input into a clean error string.
87///
88/// `cargo-lock` v11 panics on some hostile lockfiles — e.g. a `checksum` that is
89/// 64 *bytes* but not 64 ASCII chars makes it byte-slice across a UTF-8 char
90/// boundary (`checksum.rs:48`, found by the fuzz harness). rustinel parses
91/// untrusted lockfiles, so a dependency panic must never crash us. Only the
92/// `cargo-lock` call is wrapped in `catch_unwind` (our own mapping code stays
93/// outside the closure, so genuine bugs there remain observable), and the panic
94/// hook is silenced for the duration so the output stays clean. Lockfile parsing
95/// is single-threaded, so the temporary global hook swap cannot race another
96/// thread's panic.
97fn parse_cargo_lock(content: &str) -> Result<cargo_lock::Lockfile, String> {
98    use std::panic::{catch_unwind, AssertUnwindSafe};
99    let prev = std::panic::take_hook();
100    std::panic::set_hook(Box::new(|_| {}));
101    let result = catch_unwind(AssertUnwindSafe(|| content.parse::<cargo_lock::Lockfile>()));
102    std::panic::set_hook(prev);
103    match result {
104        Ok(Ok(lockfile)) => Ok(lockfile),
105        Ok(Err(e)) => Err(e.to_string()),
106        Err(_) => Err("the lockfile parser rejected this input (guarded panic)".to_string()),
107    }
108}
109
110/// Parse a `Cargo.lock` from an in-memory string.
111///
112/// Backed by the upstream [`cargo_lock`] crate so the full lockfile grammar
113/// (v1–v4, git/path/registry sources, alternate registries) is handled
114/// correctly. We map its model into our own [`LockfileModel`] so the rest of the
115/// analysis is decoupled from the parser implementation. The top-level
116/// `version = N` field is read separately because it is the lockfile's *format*
117/// version, which we surface verbatim. Parsing is panic-guarded (see
118/// [`parse_cargo_lock`]) because the input is untrusted.
119pub fn parse_lockfile_str(path: PathBuf, content: &str) -> Result<LockfileModel, RustinelError> {
120    let version = extract_top_version(content);
121
122    let parsed: cargo_lock::Lockfile = parse_cargo_lock(content)
123        .map_err(|msg| RustinelError::lockfile_parse(path.clone(), msg))?;
124
125    let mut packages: Vec<Package> = parsed
126        .packages
127        .iter()
128        .map(|p| Package {
129            id: PackageId {
130                name: p.name.as_str().to_string(),
131                version: p.version.to_string(),
132                source: p.source.as_ref().map(|s| s.to_string()),
133            },
134            checksum: p.checksum.as_ref().map(|c| c.to_string()),
135            dependencies: p
136                .dependencies
137                .iter()
138                .map(|d| d.name.as_str().to_string())
139                .collect(),
140        })
141        .collect();
142
143    // Deterministic ordering regardless of lockfile layout.
144    packages.sort_by(|a, b| a.id.cmp(&b.id));
145
146    Ok(LockfileModel {
147        path,
148        version,
149        packages,
150    })
151}
152
153/// Extract the top-level `version = N` (lockfile format version) that appears
154/// before the first `[[package]]` block. Returns `None` for v1 lockfiles that
155/// omit it.
156fn extract_top_version(content: &str) -> Option<u32> {
157    for line in content.lines() {
158        let line = line.trim();
159        if line.starts_with("[[package]]") {
160            break;
161        }
162        // Tolerate any spacing around `=` (`version=3`, `version  =  3`, tabs).
163        if let Some(rest) = line.strip_prefix("version") {
164            if let Some(value) = rest.trim_start().strip_prefix('=') {
165                return value.trim().trim_matches('"').parse::<u32>().ok();
166            }
167        }
168    }
169    None
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn parses_simple_lockfile() {
178        let input = r#"
179version = 3
180
181[[package]]
182name = "serde"
183version = "1.0.197"
184source = "registry+https://github.com/rust-lang/crates.io-index"
185checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
186"#;
187        let model = parse_lockfile_str(PathBuf::from("Cargo.lock"), input).unwrap();
188        assert_eq!(model.version, Some(3));
189        assert_eq!(model.packages.len(), 1);
190        assert_eq!(model.packages[0].id.name, "serde");
191        assert_eq!(
192            model.packages[0].checksum.as_deref(),
193            Some("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
194        );
195        assert!(!model.packages[0].id.is_local());
196    }
197
198    #[test]
199    fn parses_dependencies_block() {
200        // cargo-lock validates that every listed dependency resolves to a
201        // package in the lockfile, so the referenced crates must be present.
202        let input = r#"
203version = 3
204
205[[package]]
206name = "itoa"
207version = "1.0.10"
208source = "registry+https://github.com/rust-lang/crates.io-index"
209checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
210
211[[package]]
212name = "ryu"
213version = "1.0.17"
214source = "registry+https://github.com/rust-lang/crates.io-index"
215checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
216
217[[package]]
218name = "serde"
219version = "1.0.197"
220source = "registry+https://github.com/rust-lang/crates.io-index"
221checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
222
223[[package]]
224name = "serde_json"
225version = "1.0.114"
226source = "registry+https://github.com/rust-lang/crates.io-index"
227checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
228dependencies = [
229 "itoa",
230 "ryu",
231 "serde",
232]
233"#;
234        let model = parse_lockfile_str(PathBuf::from("Cargo.lock"), input).unwrap();
235        assert_eq!(model.packages.len(), 4);
236        let sj = model
237            .packages
238            .iter()
239            .find(|p| p.id.name == "serde_json")
240            .unwrap();
241        assert_eq!(sj.dependencies, vec!["itoa", "ryu", "serde"]);
242    }
243
244    #[test]
245    fn local_workspace_crate_has_no_source() {
246        let input = r#"
247version = 3
248
249[[package]]
250name = "my-app"
251version = "0.1.0"
252dependencies = [
253 "serde",
254]
255
256[[package]]
257name = "serde"
258version = "1.0.197"
259source = "registry+https://github.com/rust-lang/crates.io-index"
260checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
261"#;
262        let model = parse_lockfile_str(PathBuf::from("Cargo.lock"), input).unwrap();
263        let app = model
264            .packages
265            .iter()
266            .find(|p| p.id.name == "my-app")
267            .unwrap();
268        assert!(app.id.is_local());
269        assert_eq!(model.registry_packages().count(), 1);
270    }
271
272    #[test]
273    fn empty_lockfile_is_ok() {
274        let model = parse_lockfile_str(PathBuf::from("Cargo.lock"), "version = 4\n").unwrap();
275        assert!(model.packages.is_empty());
276        assert_eq!(model.version, Some(4));
277    }
278
279    #[test]
280    fn version_field_tolerates_nonstandard_spacing() {
281        // The lockfile format version must parse regardless of spacing around `=`.
282        assert_eq!(extract_top_version("version = 3\n"), Some(3));
283        assert_eq!(extract_top_version("version=3\n"), Some(3));
284        assert_eq!(extract_top_version("version  =  3\n"), Some(3));
285        assert_eq!(extract_top_version("version =\t4\n"), Some(4));
286        // A `version` after the first [[package]] is not the format version.
287        assert_eq!(
288            extract_top_version("[[package]]\nversion = \"9.9.9\"\n"),
289            None
290        );
291        // A different key is not the version.
292        assert_eq!(extract_top_version("name = \"x\"\n"), None);
293    }
294
295    #[test]
296    fn malformed_package_block_errors() {
297        let input = "[[package]]\nname = \"x\"\n"; // missing version
298        let err = parse_lockfile_str(PathBuf::from("Cargo.lock"), input).unwrap_err();
299        assert!(matches!(err, RustinelError::LockfileParse { .. }));
300    }
301
302    #[test]
303    fn ordering_is_deterministic() {
304        let input = r#"
305[[package]]
306name = "zzz"
307version = "1.0.0"
308source = "registry+https://github.com/rust-lang/crates.io-index"
309checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
310
311[[package]]
312name = "aaa"
313version = "1.0.0"
314source = "registry+https://github.com/rust-lang/crates.io-index"
315checksum = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
316"#;
317        let model = parse_lockfile_str(PathBuf::from("Cargo.lock"), input).unwrap();
318        assert_eq!(model.packages[0].id.name, "aaa");
319        assert_eq!(model.packages[1].id.name, "zzz");
320    }
321
322    #[test]
323    fn malformed_utf8_checksum_does_not_panic() {
324        // Regression for a panic the fuzz harness found in cargo-lock v11: a
325        // `checksum` that is 64 *bytes* but contains a 2-byte UTF-8 char at an odd
326        // byte offset makes cargo-lock byte-slice across a char boundary and
327        // panic (`checksum.rs:48`). rustinel parses untrusted lockfiles, so this
328        // must surface as a clean Err — never a panic that crashes the process.
329        let bad = format!("{}\u{021C}{}", "a".repeat(61), "a"); // 61 + 2 + 1 = 64 bytes
330        assert_eq!(
331            bad.len(),
332            64,
333            "must be 64 bytes to pass cargo-lock's length gate"
334        );
335        let input = format!(
336            "version = 3\n\n[[package]]\nname = \"x\"\nversion = \"1.0.0\"\n\
337             source = \"registry+https://github.com/rust-lang/crates.io-index\"\n\
338             checksum = \"{bad}\"\n"
339        );
340        let r = parse_lockfile_str(PathBuf::from("Cargo.lock"), &input);
341        assert!(
342            r.is_err(),
343            "a malformed-checksum lockfile must be a clean Err, not a panic"
344        );
345    }
346}