Skip to main content

provenant/parsers/
cargo_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Cargo.lock lockfiles.
5//!
6//! Extracts resolved dependency information including exact versions and
7//! checksums from Rust Cargo.lock files.
8//!
9//! # Supported Formats
10//! - Cargo.lock (lockfile)
11//!
12//! # Key Features
13//! - Exact version resolution from lockfile
14//! - Direct vs transitive dependency tracking (`is_direct`)
15//! - Checksum extraction for verification
16//! - Package URL (purl) generation
17//! - Dependency graph with source tracking (crates.io, git, path)
18//!
19//! # Implementation Notes
20//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
21//! - Direct dependencies determined from root package's dependency list
22//! - Uses TOML parsing for structured data extraction
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use serde_json::json;
29use std::collections::{HashMap, hash_map::Entry};
30use std::path::Path;
31use toml::Value;
32
33use super::PackageParser;
34
35/// Rust Cargo.lock lockfile parser.
36///
37/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
38pub struct CargoLockParser;
39
40impl PackageParser for CargoLockParser {
41    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
42
43    fn is_match(path: &Path) -> bool {
44        path.file_name()
45            .and_then(|name| name.to_str())
46            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_cargo_lock(path) {
51            Ok(content) => content,
52            Err(e) => {
53                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let packages = match content.get("package").and_then(|v| v.as_array()) {
59            Some(pkgs) => pkgs,
60            None => {
61                warn!("No 'package' array found in Cargo.lock at {:?}", path);
62                return vec![default_package_data()];
63            }
64        };
65
66        let root_package = select_root_package(packages);
67
68        let name = root_package
69            .and_then(|p| p.get("name"))
70            .and_then(|v| v.as_str())
71            .map(|s| truncate_field(s.to_string()));
72
73        let version = root_package
74            .and_then(|p| p.get("version"))
75            .and_then(|v| v.as_str())
76            .map(|s| truncate_field(s.to_string()));
77
78        let checksum = root_package
79            .and_then(|p| p.get("checksum"))
80            .and_then(|v| v.as_str())
81            .map(|s| truncate_field(s.to_string()));
82
83        let (sha256, extra_data) = match checksum.as_deref() {
84            Some(h) if h.len() == 64 && Sha256Digest::from_hex(h).is_ok() => {
85                (Sha256Digest::from_hex(h).ok(), None)
86            }
87            Some(h) if hex::decode(h).is_ok() => {
88                let mut map = HashMap::new();
89                map.insert("checksum".to_string(), json!(h));
90                (None, Some(map))
91            }
92            _ => (None, None),
93        };
94
95        let dependencies = extract_all_dependencies(packages, root_package);
96
97        let purl = match (&name, &version) {
98            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
99                p.with_version(v.as_str()).ok()?;
100                Some(truncate_field(p.to_string()))
101            }),
102            _ => None,
103        };
104
105        let api_data_url = match (&name, &version) {
106            (Some(n), Some(v)) => Some(truncate_field(format!(
107                "https://crates.io/api/v1/crates/{}/{}",
108                n, v
109            ))),
110            (Some(n), None) => Some(truncate_field(format!(
111                "https://crates.io/api/v1/crates/{}",
112                n
113            ))),
114            _ => None,
115        };
116
117        vec![PackageData {
118            package_type: Some(Self::PACKAGE_TYPE),
119            namespace: None,
120            name,
121            version,
122            qualifiers: None,
123            subpath: None,
124            primary_language: None,
125            description: None,
126            release_date: None,
127            parties: Vec::new(),
128            keywords: Vec::new(),
129            homepage_url: None,
130            download_url: None,
131            size: None,
132            sha1: None,
133            md5: None,
134            sha256,
135            sha512: None,
136            bug_tracking_url: None,
137            code_view_url: None,
138            vcs_url: None,
139            copyright: None,
140            holder: None,
141            declared_license_expression: None,
142            declared_license_expression_spdx: None,
143            license_detections: Vec::new(),
144            other_license_expression: None,
145            other_license_expression_spdx: None,
146            other_license_detections: Vec::new(),
147            extracted_license_statement: None,
148            notice_text: None,
149            source_packages: Vec::new(),
150            file_references: Vec::new(),
151            is_private: false,
152            is_virtual: false,
153            extra_data,
154            dependencies,
155            repository_homepage_url: None,
156            repository_download_url: None,
157            api_data_url,
158            datasource_id: Some(DatasourceId::CargoLock),
159            purl,
160        }]
161    }
162}
163
164fn read_cargo_lock(path: &Path) -> Result<Value, String> {
165    let content =
166        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
167    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
168}
169
170fn select_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
171    packages
172        .iter()
173        .filter_map(|package| package.as_table())
174        .find(|table| table.get("source").is_none())
175        .or_else(|| packages.first().and_then(|package| package.as_table()))
176}
177
178fn extract_all_dependencies(
179    packages: &[Value],
180    root_package: Option<&toml::map::Map<String, Value>>,
181) -> Vec<Dependency> {
182    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
183
184    let package_versions = build_package_versions(packages);
185    let package_provenance = build_package_provenance(packages);
186    let root_package_key = root_package.and_then(package_key_from_table);
187    for package in packages.iter().take(MAX_ITERATION_COUNT) {
188        if let Some(pkg_table) = package.as_table() {
189            let is_root_package = package_key_from_table(pkg_table)
190                .zip(root_package_key)
191                .is_some_and(|(package_key, root_key)| package_key == root_key);
192
193            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
194                for dep in deps.iter().take(MAX_ITERATION_COUNT) {
195                    if let Some(dep_str) = dep.as_str() {
196                        let parsed_dependency = parse_dependency_string(dep_str);
197                        let name = parsed_dependency.name;
198                        let resolved_version = if parsed_dependency.version.is_empty() {
199                            package_versions
200                                .get(name)
201                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
202                                .unwrap_or("")
203                        } else {
204                            parsed_dependency.version
205                        };
206
207                        if !name.is_empty() {
208                            let purl = if resolved_version.is_empty() {
209                                PackageUrl::new("cargo", name)
210                                    .ok()
211                                    .map(|p| truncate_field(p.to_string()))
212                            } else {
213                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
214                                    p.with_version(resolved_version).ok()?;
215                                    Some(truncate_field(p.to_string()))
216                                })
217                            };
218
219                            let extra_data = build_dependency_extra_data(
220                                name,
221                                resolved_version,
222                                parsed_dependency.source,
223                                &package_provenance,
224                            );
225
226                            let dependency = Dependency {
227                                purl,
228                                extracted_requirement: if resolved_version.is_empty() {
229                                    None
230                                } else {
231                                    Some(truncate_field(resolved_version.to_string()))
232                                },
233                                scope: None,
234                                is_runtime: None,
235                                is_optional: None,
236                                is_pinned: Some(true),
237                                is_direct: Some(is_root_package),
238                                resolved_package: None,
239                                extra_data,
240                            };
241
242                            let key = CargoDependencyKey::from_dependency(&dependency);
243                            match all_dependencies.entry(key) {
244                                Entry::Vacant(entry) => {
245                                    entry.insert(dependency);
246                                }
247                                Entry::Occupied(mut entry) => {
248                                    if is_root_package {
249                                        entry.get_mut().is_direct = Some(true);
250                                    }
251                                }
252                            }
253                        }
254                    }
255                }
256            }
257        }
258    }
259
260    for package in packages
261        .iter()
262        .take(MAX_ITERATION_COUNT)
263        .filter_map(|package| package.as_table())
264    {
265        let Some((name, version)) = package_key_from_table(package) else {
266            continue;
267        };
268
269        if package.get("source").is_some() {
270            continue;
271        }
272
273        let Some(mut purl) = PackageUrl::new("cargo", name).ok() else {
274            continue;
275        };
276        if purl.with_version(version).is_err() {
277            continue;
278        }
279
280        let dependency = Dependency {
281            purl: Some(truncate_field(purl.to_string())),
282            extracted_requirement: Some(truncate_field(version.to_string())),
283            scope: None,
284            is_runtime: None,
285            is_optional: None,
286            is_pinned: Some(true),
287            is_direct: Some(true),
288            resolved_package: None,
289            extra_data: build_dependency_extra_data(name, version, None, &package_provenance),
290        };
291
292        let key = CargoDependencyKey::from_dependency(&dependency);
293        match all_dependencies.entry(key) {
294            Entry::Vacant(entry) => {
295                entry.insert(dependency);
296            }
297            Entry::Occupied(mut entry) => {
298                entry.get_mut().is_direct = Some(true);
299            }
300        }
301    }
302
303    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
304    dependencies.sort_by(|left, right| {
305        left.purl
306            .as_deref()
307            .cmp(&right.purl.as_deref())
308            .then_with(|| {
309                left.extracted_requirement
310                    .as_deref()
311                    .cmp(&right.extracted_requirement.as_deref())
312            })
313    });
314    dependencies
315}
316
317#[derive(Hash, PartialEq, Eq)]
318struct CargoDependencyKey {
319    purl: Option<String>,
320    extracted_requirement: Option<String>,
321    source: Option<String>,
322}
323
324impl CargoDependencyKey {
325    fn from_dependency(dependency: &Dependency) -> Self {
326        let source = dependency
327            .extra_data
328            .as_ref()
329            .and_then(|extra_data| extra_data.get("source"))
330            .and_then(|value| value.as_str())
331            .map(ToOwned::to_owned);
332
333        Self {
334            purl: dependency.purl.clone(),
335            extracted_requirement: dependency.extracted_requirement.clone(),
336            source,
337        }
338    }
339}
340
341fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
342    packages
343        .iter()
344        .filter_map(|package| package.as_table())
345        .filter_map(|table| {
346            Some((
347                table.get("name")?.as_str()?,
348                table.get("version")?.as_str()?,
349            ))
350        })
351        .fold(HashMap::new(), |mut acc, (name, version)| {
352            acc.entry(name).or_default().push(version);
353            acc
354        })
355}
356
357fn build_package_provenance<'a>(
358    packages: &'a [Value],
359) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
360    packages
361        .iter()
362        .filter_map(|package| package.as_table())
363        .filter_map(|table| {
364            Some((
365                (
366                    table.get("name")?.as_str()?,
367                    table.get("version")?.as_str()?,
368                ),
369                DependencyProvenance {
370                    source: table.get("source").and_then(|value| value.as_str()),
371                    checksum: table.get("checksum").and_then(|value| value.as_str()),
372                },
373            ))
374        })
375        .fold(HashMap::new(), |mut acc, (key, provenance)| {
376            acc.entry(key).or_default().push(provenance);
377            acc
378        })
379}
380
381fn build_dependency_extra_data(
382    name: &str,
383    resolved_version: &str,
384    source_hint: Option<&str>,
385    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
386) -> Option<HashMap<String, serde_json::Value>> {
387    let mut extra_data = HashMap::new();
388
389    if !resolved_version.is_empty()
390        && let Some(provenance) = package_provenance
391            .get(&(name, resolved_version))
392            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
393    {
394        if let Some(source) = provenance.source {
395            extra_data.insert(
396                "source".to_string(),
397                json!(truncate_field(source.to_string())),
398            );
399        }
400        if let Some(checksum) = provenance.checksum {
401            extra_data.insert(
402                "checksum".to_string(),
403                json!(truncate_field(checksum.to_string())),
404            );
405        }
406    }
407
408    if !extra_data.contains_key("source")
409        && let Some(source) = source_hint
410    {
411        extra_data.insert(
412            "source".to_string(),
413            json!(truncate_field(source.to_string())),
414        );
415    }
416
417    if extra_data.is_empty() {
418        None
419    } else {
420        Some(extra_data)
421    }
422}
423
424fn select_dependency_provenance<'a>(
425    candidates: &'a [DependencyProvenance<'a>],
426    source_hint: Option<&str>,
427) -> Option<DependencyProvenance<'a>> {
428    if let Some(source_hint) = source_hint {
429        return candidates
430            .iter()
431            .copied()
432            .find(|candidate| candidate.source == Some(source_hint));
433    }
434
435    (candidates.len() == 1).then_some(candidates[0])
436}
437
438fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
439    Some((
440        table.get("name")?.as_str()?,
441        table.get("version")?.as_str()?,
442    ))
443}
444
445fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
446    let trimmed = dep_str.trim();
447    let source = trimmed
448        .find(" (")
449        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
450    let without_source = trimmed
451        .find(" (")
452        .map(|source_start| &trimmed[..source_start])
453        .unwrap_or(trimmed);
454
455    let mut parts = without_source.split_whitespace();
456    let name = parts.next().unwrap_or("");
457    let version = parts.next().unwrap_or("");
458
459    ParsedDependency {
460        name,
461        version,
462        source,
463    }
464}
465
466#[derive(Clone, Copy)]
467struct ParsedDependency<'a> {
468    name: &'a str,
469    version: &'a str,
470    source: Option<&'a str>,
471}
472
473#[derive(Clone, Copy)]
474struct DependencyProvenance<'a> {
475    source: Option<&'a str>,
476    checksum: Option<&'a str>,
477}
478
479fn default_package_data() -> PackageData {
480    PackageData {
481        package_type: Some(CargoLockParser::PACKAGE_TYPE),
482        datasource_id: Some(DatasourceId::CargoLock),
483        ..Default::default()
484    }
485}
486
487crate::register_parser!(
488    "Rust Cargo.lock lockfile",
489    &["**/Cargo.lock", "**/cargo.lock"],
490    "cargo",
491    "Rust",
492    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
493);