Skip to main content

provenant/parsers/
cargo_lock.rs

1//! Parser for Cargo.lock lockfiles.
2//!
3//! Extracts resolved dependency information including exact versions and
4//! checksums from Rust Cargo.lock files.
5//!
6//! # Supported Formats
7//! - Cargo.lock (lockfile)
8//!
9//! # Key Features
10//! - Exact version resolution from lockfile
11//! - Direct vs transitive dependency tracking (`is_direct`)
12//! - Checksum extraction for verification
13//! - Package URL (purl) generation
14//! - Dependency graph with source tracking (crates.io, git, path)
15//!
16//! # Implementation Notes
17//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
18//! - Direct dependencies determined from root package's dependency list
19//! - Uses TOML parsing for structured data extraction
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
22use crate::parser_warn as warn;
23use packageurl::PackageUrl;
24use serde_json::json;
25use std::collections::{HashMap, hash_map::Entry};
26use std::fs::File;
27use std::io::Read;
28use std::path::Path;
29use toml::Value;
30
31use super::PackageParser;
32
33/// Rust Cargo.lock lockfile parser.
34///
35/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
36pub struct CargoLockParser;
37
38impl PackageParser for CargoLockParser {
39    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
40
41    fn is_match(path: &Path) -> bool {
42        path.file_name()
43            .and_then(|name| name.to_str())
44            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
45    }
46
47    fn extract_packages(path: &Path) -> Vec<PackageData> {
48        let content = match read_cargo_lock(path) {
49            Ok(content) => content,
50            Err(e) => {
51                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
52                return vec![default_package_data()];
53            }
54        };
55
56        let packages = match content.get("package").and_then(|v| v.as_array()) {
57            Some(pkgs) => pkgs,
58            None => {
59                warn!("No 'package' array found in Cargo.lock at {:?}", path);
60                return vec![default_package_data()];
61            }
62        };
63
64        let root_package = select_root_package(packages);
65
66        let name = root_package
67            .and_then(|p| p.get("name"))
68            .and_then(|v| v.as_str())
69            .map(String::from);
70
71        let version = root_package
72            .and_then(|p| p.get("version"))
73            .and_then(|v| v.as_str())
74            .map(String::from);
75
76        let checksum = root_package
77            .and_then(|p| p.get("checksum"))
78            .and_then(|v| v.as_str())
79            .map(String::from);
80
81        let (sha256, extra_data) = match checksum.as_deref() {
82            Some(h) if h.len() == 64 && Sha256Digest::from_hex(h).is_ok() => {
83                (Sha256Digest::from_hex(h).ok(), None)
84            }
85            Some(h) if hex::decode(h).is_ok() => {
86                let mut map = HashMap::new();
87                map.insert("checksum".to_string(), json!(h));
88                (None, Some(map))
89            }
90            _ => (None, None),
91        };
92
93        let dependencies = extract_all_dependencies(packages, root_package);
94
95        let purl = match (&name, &version) {
96            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
97                p.with_version(v.as_str()).ok()?;
98                Some(p.to_string())
99            }),
100            _ => None,
101        };
102
103        let api_data_url = match (&name, &version) {
104            (Some(n), Some(v)) => Some(format!("https://crates.io/api/v1/crates/{}/{}", n, v)),
105            (Some(n), None) => Some(format!("https://crates.io/api/v1/crates/{}", n)),
106            _ => None,
107        };
108
109        vec![PackageData {
110            package_type: Some(Self::PACKAGE_TYPE),
111            namespace: None,
112            name,
113            version,
114            qualifiers: None,
115            subpath: None,
116            primary_language: None,
117            description: None,
118            release_date: None,
119            parties: Vec::new(),
120            keywords: Vec::new(),
121            homepage_url: None,
122            download_url: None,
123            size: None,
124            sha1: None,
125            md5: None,
126            sha256,
127            sha512: None,
128            bug_tracking_url: None,
129            code_view_url: None,
130            vcs_url: None,
131            copyright: None,
132            holder: None,
133            declared_license_expression: None,
134            declared_license_expression_spdx: None,
135            license_detections: Vec::new(),
136            other_license_expression: None,
137            other_license_expression_spdx: None,
138            other_license_detections: Vec::new(),
139            extracted_license_statement: None,
140            notice_text: None,
141            source_packages: Vec::new(),
142            file_references: Vec::new(),
143            is_private: false,
144            is_virtual: false,
145            extra_data,
146            dependencies,
147            repository_homepage_url: None,
148            repository_download_url: None,
149            api_data_url,
150            datasource_id: Some(DatasourceId::CargoLock),
151            purl,
152        }]
153    }
154}
155
156fn read_cargo_lock(path: &Path) -> Result<Value, String> {
157    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
158    let mut content = String::new();
159    file.read_to_string(&mut content)
160        .map_err(|e| format!("Failed to read file: {}", e))?;
161    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
162}
163
164fn select_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
165    packages
166        .iter()
167        .filter_map(|package| package.as_table())
168        .find(|table| table.get("source").is_none())
169        .or_else(|| packages.first().and_then(|package| package.as_table()))
170}
171
172fn extract_all_dependencies(
173    packages: &[Value],
174    root_package: Option<&toml::map::Map<String, Value>>,
175) -> Vec<Dependency> {
176    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
177
178    let package_versions = build_package_versions(packages);
179    let package_provenance = build_package_provenance(packages);
180    let root_package_key = root_package.and_then(package_key_from_table);
181    for package in packages {
182        if let Some(pkg_table) = package.as_table() {
183            let is_root_package = package_key_from_table(pkg_table)
184                .zip(root_package_key)
185                .is_some_and(|(package_key, root_key)| package_key == root_key);
186
187            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
188                for dep in deps {
189                    if let Some(dep_str) = dep.as_str() {
190                        let parsed_dependency = parse_dependency_string(dep_str);
191                        let name = parsed_dependency.name;
192                        let resolved_version = if parsed_dependency.version.is_empty() {
193                            package_versions
194                                .get(name)
195                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
196                                .unwrap_or("")
197                        } else {
198                            parsed_dependency.version
199                        };
200
201                        if !name.is_empty() {
202                            let purl = if resolved_version.is_empty() {
203                                PackageUrl::new("cargo", name).ok().map(|p| p.to_string())
204                            } else {
205                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
206                                    p.with_version(resolved_version).ok()?;
207                                    Some(p.to_string())
208                                })
209                            };
210
211                            let extra_data = build_dependency_extra_data(
212                                name,
213                                resolved_version,
214                                parsed_dependency.source,
215                                &package_provenance,
216                            );
217
218                            let dependency = Dependency {
219                                purl,
220                                extracted_requirement: if resolved_version.is_empty() {
221                                    None
222                                } else {
223                                    Some(resolved_version.to_string())
224                                },
225                                scope: None,
226                                is_runtime: None,
227                                is_optional: None,
228                                is_pinned: Some(true),
229                                is_direct: Some(is_root_package),
230                                resolved_package: None,
231                                extra_data,
232                            };
233
234                            let key = CargoDependencyKey::from_dependency(&dependency);
235                            match all_dependencies.entry(key) {
236                                Entry::Vacant(entry) => {
237                                    entry.insert(dependency);
238                                }
239                                Entry::Occupied(mut entry) => {
240                                    if is_root_package {
241                                        entry.get_mut().is_direct = Some(true);
242                                    }
243                                }
244                            }
245                        }
246                    }
247                }
248            }
249        }
250    }
251
252    for package in packages.iter().filter_map(|package| package.as_table()) {
253        let Some((name, version)) = package_key_from_table(package) else {
254            continue;
255        };
256
257        let is_root_package = package_key_from_table(package)
258            .zip(root_package_key)
259            .is_some_and(|(package_key, root_key)| package_key == root_key);
260        if package.get("source").is_some() {
261            continue;
262        }
263
264        if is_root_package {
265            continue;
266        }
267
268        let Some(mut purl) = PackageUrl::new("cargo", name).ok() else {
269            continue;
270        };
271        if purl.with_version(version).is_err() {
272            continue;
273        }
274
275        let dependency = Dependency {
276            purl: Some(purl.to_string()),
277            extracted_requirement: Some(version.to_string()),
278            scope: None,
279            is_runtime: None,
280            is_optional: None,
281            is_pinned: Some(true),
282            is_direct: Some(true),
283            resolved_package: None,
284            extra_data: build_dependency_extra_data(name, version, None, &package_provenance),
285        };
286
287        let key = CargoDependencyKey::from_dependency(&dependency);
288        match all_dependencies.entry(key) {
289            Entry::Vacant(entry) => {
290                entry.insert(dependency);
291            }
292            Entry::Occupied(mut entry) => {
293                entry.get_mut().is_direct = Some(true);
294            }
295        }
296    }
297
298    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
299    dependencies.sort_by(|left, right| {
300        left.purl
301            .as_deref()
302            .cmp(&right.purl.as_deref())
303            .then_with(|| {
304                left.extracted_requirement
305                    .as_deref()
306                    .cmp(&right.extracted_requirement.as_deref())
307            })
308    });
309    dependencies
310}
311
312#[derive(Hash, PartialEq, Eq)]
313struct CargoDependencyKey {
314    purl: Option<String>,
315    extracted_requirement: Option<String>,
316    source: Option<String>,
317}
318
319impl CargoDependencyKey {
320    fn from_dependency(dependency: &Dependency) -> Self {
321        let source = dependency
322            .extra_data
323            .as_ref()
324            .and_then(|extra_data| extra_data.get("source"))
325            .and_then(|value| value.as_str())
326            .map(ToOwned::to_owned);
327
328        Self {
329            purl: dependency.purl.clone(),
330            extracted_requirement: dependency.extracted_requirement.clone(),
331            source,
332        }
333    }
334}
335
336fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
337    packages
338        .iter()
339        .filter_map(|package| package.as_table())
340        .filter_map(|table| {
341            Some((
342                table.get("name")?.as_str()?,
343                table.get("version")?.as_str()?,
344            ))
345        })
346        .fold(HashMap::new(), |mut acc, (name, version)| {
347            acc.entry(name).or_default().push(version);
348            acc
349        })
350}
351
352fn build_package_provenance<'a>(
353    packages: &'a [Value],
354) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
355    packages
356        .iter()
357        .filter_map(|package| package.as_table())
358        .filter_map(|table| {
359            Some((
360                (
361                    table.get("name")?.as_str()?,
362                    table.get("version")?.as_str()?,
363                ),
364                DependencyProvenance {
365                    source: table.get("source").and_then(|value| value.as_str()),
366                    checksum: table.get("checksum").and_then(|value| value.as_str()),
367                },
368            ))
369        })
370        .fold(HashMap::new(), |mut acc, (key, provenance)| {
371            acc.entry(key).or_default().push(provenance);
372            acc
373        })
374}
375
376fn build_dependency_extra_data(
377    name: &str,
378    resolved_version: &str,
379    source_hint: Option<&str>,
380    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
381) -> Option<HashMap<String, serde_json::Value>> {
382    let mut extra_data = HashMap::new();
383
384    if !resolved_version.is_empty()
385        && let Some(provenance) = package_provenance
386            .get(&(name, resolved_version))
387            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
388    {
389        if let Some(source) = provenance.source {
390            extra_data.insert("source".to_string(), json!(source));
391        }
392        if let Some(checksum) = provenance.checksum {
393            extra_data.insert("checksum".to_string(), json!(checksum));
394        }
395    }
396
397    if !extra_data.contains_key("source")
398        && let Some(source) = source_hint
399    {
400        extra_data.insert("source".to_string(), json!(source));
401    }
402
403    if extra_data.is_empty() {
404        None
405    } else {
406        Some(extra_data)
407    }
408}
409
410fn select_dependency_provenance<'a>(
411    candidates: &'a [DependencyProvenance<'a>],
412    source_hint: Option<&str>,
413) -> Option<DependencyProvenance<'a>> {
414    if let Some(source_hint) = source_hint {
415        return candidates
416            .iter()
417            .copied()
418            .find(|candidate| candidate.source == Some(source_hint));
419    }
420
421    (candidates.len() == 1).then_some(candidates[0])
422}
423
424fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
425    Some((
426        table.get("name")?.as_str()?,
427        table.get("version")?.as_str()?,
428    ))
429}
430
431fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
432    let trimmed = dep_str.trim();
433    let source = trimmed
434        .find(" (")
435        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
436    let without_source = trimmed
437        .find(" (")
438        .map(|source_start| &trimmed[..source_start])
439        .unwrap_or(trimmed);
440
441    let mut parts = without_source.split_whitespace();
442    let name = parts.next().unwrap_or("");
443    let version = parts.next().unwrap_or("");
444
445    ParsedDependency {
446        name,
447        version,
448        source,
449    }
450}
451
452#[derive(Clone, Copy)]
453struct ParsedDependency<'a> {
454    name: &'a str,
455    version: &'a str,
456    source: Option<&'a str>,
457}
458
459#[derive(Clone, Copy)]
460struct DependencyProvenance<'a> {
461    source: Option<&'a str>,
462    checksum: Option<&'a str>,
463}
464
465fn default_package_data() -> PackageData {
466    PackageData {
467        package_type: Some(CargoLockParser::PACKAGE_TYPE),
468        datasource_id: Some(DatasourceId::CargoLock),
469        ..Default::default()
470    }
471}
472
473crate::register_parser!(
474    "Rust Cargo.lock lockfile",
475    &["**/Cargo.lock", "**/cargo.lock"],
476    "cargo",
477    "Rust",
478    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
479);