Skip to main content

provenant/parsers/
cargo_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Cargo.lock lockfiles.
5//!
6//! Extracts resolved dependency information including exact versions and
7//! checksums from Rust Cargo.lock files.
8//!
9//! # Supported Formats
10//! - Cargo.lock (lockfile)
11//!
12//! # Key Features
13//! - Exact version resolution from lockfile
14//! - Direct vs transitive dependency tracking (`is_direct`)
15//! - Checksum extraction for verification
16//! - Package URL (purl) generation
17//! - Dependency graph with source tracking (crates.io, git, path)
18//!
19//! # Implementation Notes
20//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
21//! - Direct dependencies determined from root package's dependency list
22//! - Uses TOML parsing for structured data extraction
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use serde_json::json;
29use std::collections::{HashMap, hash_map::Entry};
30use std::path::Path;
31use toml::Value;
32
33use super::PackageParser;
34
35/// Rust Cargo.lock lockfile parser.
36///
37/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
38pub struct CargoLockParser;
39
40impl PackageParser for CargoLockParser {
41    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
42
43    fn is_match(path: &Path) -> bool {
44        path.file_name()
45            .and_then(|name| name.to_str())
46            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_cargo_lock(path) {
51            Ok(content) => content,
52            Err(e) => {
53                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let packages = match content.get("package").and_then(|v| v.as_array()) {
59            Some(pkgs) => pkgs,
60            None => {
61                warn!("No 'package' array found in Cargo.lock at {:?}", path);
62                return vec![default_package_data()];
63            }
64        };
65
66        let root_package = select_root_package(packages);
67
68        let name = root_package
69            .and_then(|p| p.get("name"))
70            .and_then(|v| v.as_str())
71            .map(|s| truncate_field(s.to_string()));
72
73        let version = root_package
74            .and_then(|p| p.get("version"))
75            .and_then(|v| v.as_str())
76            .map(|s| truncate_field(s.to_string()));
77
78        let checksum = root_package
79            .and_then(|p| p.get("checksum"))
80            .and_then(|v| v.as_str())
81            .map(|s| truncate_field(s.to_string()));
82
83        let (sha256, extra_data) = match checksum.as_deref() {
84            Some(h) if h.len() == 64 && Sha256Digest::from_hex(h).is_ok() => {
85                (Sha256Digest::from_hex(h).ok(), None)
86            }
87            Some(h) if hex::decode(h).is_ok() => {
88                let mut map = HashMap::new();
89                map.insert("checksum".to_string(), json!(h));
90                (None, Some(map))
91            }
92            _ => (None, None),
93        };
94
95        let dependencies = extract_all_dependencies(packages, root_package);
96
97        let purl = match (&name, &version) {
98            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
99                p.with_version(v.as_str()).ok()?;
100                Some(truncate_field(p.to_string()))
101            }),
102            _ => None,
103        };
104
105        let api_data_url = match (&name, &version) {
106            (Some(n), Some(v)) => Some(truncate_field(format!(
107                "https://crates.io/api/v1/crates/{}/{}",
108                n, v
109            ))),
110            (Some(n), None) => Some(truncate_field(format!(
111                "https://crates.io/api/v1/crates/{}",
112                n
113            ))),
114            _ => None,
115        };
116
117        vec![PackageData {
118            package_type: Some(Self::PACKAGE_TYPE),
119            namespace: None,
120            name,
121            version,
122            qualifiers: None,
123            subpath: None,
124            primary_language: None,
125            description: None,
126            release_date: None,
127            parties: Vec::new(),
128            keywords: Vec::new(),
129            homepage_url: None,
130            download_url: None,
131            size: None,
132            sha1: None,
133            md5: None,
134            sha256,
135            sha512: None,
136            bug_tracking_url: None,
137            code_view_url: None,
138            vcs_url: None,
139            copyright: None,
140            holder: None,
141            declared_license_expression: None,
142            declared_license_expression_spdx: None,
143            license_detections: Vec::new(),
144            other_license_expression: None,
145            other_license_expression_spdx: None,
146            other_license_detections: Vec::new(),
147            extracted_license_statement: None,
148            notice_text: None,
149            source_packages: Vec::new(),
150            file_references: Vec::new(),
151            is_private: false,
152            is_virtual: false,
153            extra_data,
154            dependencies,
155            repository_homepage_url: None,
156            repository_download_url: None,
157            api_data_url,
158            datasource_id: Some(DatasourceId::CargoLock),
159            purl,
160        }]
161    }
162}
163
164fn read_cargo_lock(path: &Path) -> Result<Value, String> {
165    let content =
166        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
167    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
168}
169
170fn select_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
171    packages
172        .iter()
173        .filter_map(|package| package.as_table())
174        .find(|table| table.get("source").is_none())
175        .or_else(|| packages.first().and_then(|package| package.as_table()))
176}
177
178fn extract_all_dependencies(
179    packages: &[Value],
180    root_package: Option<&toml::map::Map<String, Value>>,
181) -> Vec<Dependency> {
182    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
183
184    let package_versions = build_package_versions(packages);
185    let package_provenance = build_package_provenance(packages);
186    let root_package_key = root_package.and_then(package_key_from_table);
187    for package in packages.iter().take(MAX_ITERATION_COUNT) {
188        if let Some(pkg_table) = package.as_table() {
189            let is_root_package = package_key_from_table(pkg_table)
190                .zip(root_package_key)
191                .is_some_and(|(package_key, root_key)| package_key == root_key);
192
193            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
194                for dep in deps.iter().take(MAX_ITERATION_COUNT) {
195                    if let Some(dep_str) = dep.as_str() {
196                        let parsed_dependency = parse_dependency_string(dep_str);
197                        let name = parsed_dependency.name;
198                        let resolved_version = if parsed_dependency.version.is_empty() {
199                            package_versions
200                                .get(name)
201                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
202                                .unwrap_or("")
203                        } else {
204                            parsed_dependency.version
205                        };
206
207                        if !name.is_empty() {
208                            let purl = if resolved_version.is_empty() {
209                                PackageUrl::new("cargo", name)
210                                    .ok()
211                                    .map(|p| truncate_field(p.to_string()))
212                            } else {
213                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
214                                    p.with_version(resolved_version).ok()?;
215                                    Some(truncate_field(p.to_string()))
216                                })
217                            };
218
219                            let extra_data = build_dependency_extra_data(
220                                name,
221                                resolved_version,
222                                parsed_dependency.source,
223                                &package_provenance,
224                            );
225
226                            let dependency = Dependency {
227                                purl,
228                                extracted_requirement: if resolved_version.is_empty() {
229                                    None
230                                } else {
231                                    Some(truncate_field(resolved_version.to_string()))
232                                },
233                                scope: None,
234                                is_runtime: None,
235                                is_optional: None,
236                                is_pinned: Some(true),
237                                is_direct: Some(is_root_package),
238                                resolved_package: None,
239                                extra_data,
240                            };
241
242                            let key = CargoDependencyKey::from_dependency(&dependency);
243                            match all_dependencies.entry(key) {
244                                Entry::Vacant(entry) => {
245                                    entry.insert(dependency);
246                                }
247                                Entry::Occupied(mut entry) => {
248                                    if is_root_package {
249                                        entry.get_mut().is_direct = Some(true);
250                                    }
251                                }
252                            }
253                        }
254                    }
255                }
256            }
257        }
258    }
259
260    for package in packages
261        .iter()
262        .take(MAX_ITERATION_COUNT)
263        .filter_map(|package| package.as_table())
264    {
265        let Some((name, version)) = package_key_from_table(package) else {
266            continue;
267        };
268
269        let is_root_package = package_key_from_table(package)
270            .zip(root_package_key)
271            .is_some_and(|(package_key, root_key)| package_key == root_key);
272        if package.get("source").is_some() {
273            continue;
274        }
275
276        if is_root_package {
277            continue;
278        }
279
280        let Some(mut purl) = PackageUrl::new("cargo", name).ok() else {
281            continue;
282        };
283        if purl.with_version(version).is_err() {
284            continue;
285        }
286
287        let dependency = Dependency {
288            purl: Some(truncate_field(purl.to_string())),
289            extracted_requirement: Some(truncate_field(version.to_string())),
290            scope: None,
291            is_runtime: None,
292            is_optional: None,
293            is_pinned: Some(true),
294            is_direct: Some(true),
295            resolved_package: None,
296            extra_data: build_dependency_extra_data(name, version, None, &package_provenance),
297        };
298
299        let key = CargoDependencyKey::from_dependency(&dependency);
300        match all_dependencies.entry(key) {
301            Entry::Vacant(entry) => {
302                entry.insert(dependency);
303            }
304            Entry::Occupied(mut entry) => {
305                entry.get_mut().is_direct = Some(true);
306            }
307        }
308    }
309
310    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
311    dependencies.sort_by(|left, right| {
312        left.purl
313            .as_deref()
314            .cmp(&right.purl.as_deref())
315            .then_with(|| {
316                left.extracted_requirement
317                    .as_deref()
318                    .cmp(&right.extracted_requirement.as_deref())
319            })
320    });
321    dependencies
322}
323
324#[derive(Hash, PartialEq, Eq)]
325struct CargoDependencyKey {
326    purl: Option<String>,
327    extracted_requirement: Option<String>,
328    source: Option<String>,
329}
330
331impl CargoDependencyKey {
332    fn from_dependency(dependency: &Dependency) -> Self {
333        let source = dependency
334            .extra_data
335            .as_ref()
336            .and_then(|extra_data| extra_data.get("source"))
337            .and_then(|value| value.as_str())
338            .map(ToOwned::to_owned);
339
340        Self {
341            purl: dependency.purl.clone(),
342            extracted_requirement: dependency.extracted_requirement.clone(),
343            source,
344        }
345    }
346}
347
348fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
349    packages
350        .iter()
351        .filter_map(|package| package.as_table())
352        .filter_map(|table| {
353            Some((
354                table.get("name")?.as_str()?,
355                table.get("version")?.as_str()?,
356            ))
357        })
358        .fold(HashMap::new(), |mut acc, (name, version)| {
359            acc.entry(name).or_default().push(version);
360            acc
361        })
362}
363
364fn build_package_provenance<'a>(
365    packages: &'a [Value],
366) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
367    packages
368        .iter()
369        .filter_map(|package| package.as_table())
370        .filter_map(|table| {
371            Some((
372                (
373                    table.get("name")?.as_str()?,
374                    table.get("version")?.as_str()?,
375                ),
376                DependencyProvenance {
377                    source: table.get("source").and_then(|value| value.as_str()),
378                    checksum: table.get("checksum").and_then(|value| value.as_str()),
379                },
380            ))
381        })
382        .fold(HashMap::new(), |mut acc, (key, provenance)| {
383            acc.entry(key).or_default().push(provenance);
384            acc
385        })
386}
387
388fn build_dependency_extra_data(
389    name: &str,
390    resolved_version: &str,
391    source_hint: Option<&str>,
392    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
393) -> Option<HashMap<String, serde_json::Value>> {
394    let mut extra_data = HashMap::new();
395
396    if !resolved_version.is_empty()
397        && let Some(provenance) = package_provenance
398            .get(&(name, resolved_version))
399            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
400    {
401        if let Some(source) = provenance.source {
402            extra_data.insert(
403                "source".to_string(),
404                json!(truncate_field(source.to_string())),
405            );
406        }
407        if let Some(checksum) = provenance.checksum {
408            extra_data.insert(
409                "checksum".to_string(),
410                json!(truncate_field(checksum.to_string())),
411            );
412        }
413    }
414
415    if !extra_data.contains_key("source")
416        && let Some(source) = source_hint
417    {
418        extra_data.insert(
419            "source".to_string(),
420            json!(truncate_field(source.to_string())),
421        );
422    }
423
424    if extra_data.is_empty() {
425        None
426    } else {
427        Some(extra_data)
428    }
429}
430
431fn select_dependency_provenance<'a>(
432    candidates: &'a [DependencyProvenance<'a>],
433    source_hint: Option<&str>,
434) -> Option<DependencyProvenance<'a>> {
435    if let Some(source_hint) = source_hint {
436        return candidates
437            .iter()
438            .copied()
439            .find(|candidate| candidate.source == Some(source_hint));
440    }
441
442    (candidates.len() == 1).then_some(candidates[0])
443}
444
445fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
446    Some((
447        table.get("name")?.as_str()?,
448        table.get("version")?.as_str()?,
449    ))
450}
451
452fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
453    let trimmed = dep_str.trim();
454    let source = trimmed
455        .find(" (")
456        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
457    let without_source = trimmed
458        .find(" (")
459        .map(|source_start| &trimmed[..source_start])
460        .unwrap_or(trimmed);
461
462    let mut parts = without_source.split_whitespace();
463    let name = parts.next().unwrap_or("");
464    let version = parts.next().unwrap_or("");
465
466    ParsedDependency {
467        name,
468        version,
469        source,
470    }
471}
472
473#[derive(Clone, Copy)]
474struct ParsedDependency<'a> {
475    name: &'a str,
476    version: &'a str,
477    source: Option<&'a str>,
478}
479
480#[derive(Clone, Copy)]
481struct DependencyProvenance<'a> {
482    source: Option<&'a str>,
483    checksum: Option<&'a str>,
484}
485
486fn default_package_data() -> PackageData {
487    PackageData {
488        package_type: Some(CargoLockParser::PACKAGE_TYPE),
489        datasource_id: Some(DatasourceId::CargoLock),
490        ..Default::default()
491    }
492}
493
494crate::register_parser!(
495    "Rust Cargo.lock lockfile",
496    &["**/Cargo.lock", "**/cargo.lock"],
497    "cargo",
498    "Rust",
499    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
500);