Skip to main content

provenant/parsers/
cargo_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Cargo.lock lockfiles.
5//!
6//! Extracts resolved dependency information including exact versions and
7//! checksums from Rust Cargo.lock files.
8//!
9//! # Supported Formats
10//! - Cargo.lock (lockfile)
11//!
12//! # Key Features
13//! - Exact version resolution from lockfile
14//! - Direct vs transitive dependency tracking (`is_direct`)
15//! - Checksum extraction for verification
16//! - Package URL (purl) generation
17//! - Dependency graph with source tracking (crates.io, git, path)
18//!
19//! # Implementation Notes
20//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
21//! - Direct dependencies determined from root package's dependency list
22//! - Uses TOML parsing for structured data extraction
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use serde_json::json;
29use std::collections::{HashMap, HashSet, hash_map::Entry};
30use std::path::Path;
31use toml::Value;
32
33use super::PackageParser;
34
35/// Rust Cargo.lock lockfile parser.
36///
37/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
38pub struct CargoLockParser;
39
40impl PackageParser for CargoLockParser {
41    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
42
43    fn is_match(path: &Path) -> bool {
44        path.file_name()
45            .and_then(|name| name.to_str())
46            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_cargo_lock(path) {
51            Ok(content) => content,
52            Err(e) => {
53                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let packages = match content.get("package").and_then(|v| v.as_array()) {
59            Some(pkgs) => pkgs,
60            None => {
61                warn!("No 'package' array found in Cargo.lock at {:?}", path);
62                return vec![default_package_data()];
63            }
64        };
65
66        let identity_package = select_identity_package(packages);
67        let dependency_root_package = select_dependency_root_package(packages);
68
69        let name = identity_package
70            .and_then(|p| p.get("name"))
71            .and_then(|v| v.as_str())
72            .map(|s| truncate_field(s.to_string()));
73
74        let version = identity_package
75            .and_then(|p| p.get("version"))
76            .and_then(|v| v.as_str())
77            .map(|s| truncate_field(s.to_string()));
78
79        let checksum = identity_package
80            .and_then(|p| p.get("checksum"))
81            .and_then(|v| v.as_str())
82            .map(|s| truncate_field(s.to_string()));
83
84        let (sha256, extra_data) = match checksum.as_deref() {
85            Some(h) if h.len() == 64 && Sha256Digest::from_hex(h).is_ok() => {
86                (Sha256Digest::from_hex(h).ok(), None)
87            }
88            Some(h) if hex::decode(h).is_ok() => {
89                let mut map = HashMap::new();
90                map.insert("checksum".to_string(), json!(h));
91                (None, Some(map))
92            }
93            _ => (None, None),
94        };
95
96        let dependencies = extract_all_dependencies(packages, dependency_root_package);
97
98        let purl = match (&name, &version) {
99            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
100                p.with_version(v.as_str()).ok()?;
101                Some(truncate_field(p.to_string()))
102            }),
103            _ => None,
104        };
105
106        let api_data_url = match (&name, &version) {
107            (Some(n), Some(v)) => Some(truncate_field(format!(
108                "https://crates.io/api/v1/crates/{}/{}",
109                n, v
110            ))),
111            (Some(n), None) => Some(truncate_field(format!(
112                "https://crates.io/api/v1/crates/{}",
113                n
114            ))),
115            _ => None,
116        };
117
118        vec![PackageData {
119            package_type: Some(Self::PACKAGE_TYPE),
120            namespace: None,
121            name,
122            version,
123            qualifiers: None,
124            subpath: None,
125            primary_language: None,
126            description: None,
127            release_date: None,
128            parties: Vec::new(),
129            keywords: Vec::new(),
130            homepage_url: None,
131            download_url: None,
132            size: None,
133            sha1: None,
134            md5: None,
135            sha256,
136            sha512: None,
137            bug_tracking_url: None,
138            code_view_url: None,
139            vcs_url: None,
140            copyright: None,
141            holder: None,
142            declared_license_expression: None,
143            declared_license_expression_spdx: None,
144            license_detections: Vec::new(),
145            other_license_expression: None,
146            other_license_expression_spdx: None,
147            other_license_detections: Vec::new(),
148            extracted_license_statement: None,
149            notice_text: None,
150            source_packages: Vec::new(),
151            file_references: Vec::new(),
152            is_private: false,
153            is_virtual: false,
154            extra_data,
155            dependencies,
156            repository_homepage_url: None,
157            repository_download_url: None,
158            api_data_url,
159            datasource_id: Some(DatasourceId::CargoLock),
160            purl,
161        }]
162    }
163
164    fn metadata() -> Vec<super::metadata::ParserMetadata> {
165        vec![super::metadata::ParserMetadata {
166            description: "Rust Cargo.lock lockfile",
167            file_patterns: &["**/Cargo.lock", "**/cargo.lock"],
168            package_type: "cargo",
169            primary_language: "Rust",
170            documentation_url: Some(
171                "https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html",
172            ),
173        }]
174    }
175}
176
177fn read_cargo_lock(path: &Path) -> Result<Value, String> {
178    let content =
179        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
180    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
181}
182
183fn select_dependency_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
184    packages
185        .iter()
186        .filter_map(|package| package.as_table())
187        .find(|table| table.get("source").is_none())
188        .or_else(|| packages.first().and_then(|package| package.as_table()))
189}
190
191fn select_identity_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
192    let local_packages: Vec<_> = packages
193        .iter()
194        .filter_map(|package| package.as_table())
195        .filter(|table| table.get("source").is_none())
196        .collect();
197
198    match local_packages.as_slice() {
199        [] => packages.first().and_then(|package| package.as_table()),
200        [only] => Some(*only),
201        _ => select_unique_root_like_local_package(&local_packages),
202    }
203}
204
205fn select_unique_root_like_local_package<'a>(
206    local_packages: &[&'a toml::map::Map<String, Value>],
207) -> Option<&'a toml::map::Map<String, Value>> {
208    let local_keys: HashSet<(String, String)> = local_packages
209        .iter()
210        .filter_map(|table| package_key_from_table(table))
211        .map(|(name, version)| (name.to_string(), version.to_string()))
212        .collect();
213
214    let referenced_local_keys: HashSet<(String, String)> = local_packages
215        .iter()
216        .flat_map(|table| {
217            table
218                .get("dependencies")
219                .and_then(Value::as_array)
220                .into_iter()
221                .flatten()
222                .filter_map(Value::as_str)
223                .filter_map(|dep| {
224                    let parsed = parse_dependency_string(dep);
225                    (!parsed.name.is_empty() && !parsed.version.is_empty())
226                        .then(|| (parsed.name.to_string(), parsed.version.to_string()))
227                })
228                .filter(|key| local_keys.contains(key))
229                .collect::<Vec<_>>()
230        })
231        .collect();
232
233    let root_candidates: Vec<_> = local_packages
234        .iter()
235        .copied()
236        .filter(|table| {
237            package_key_from_table(table).is_some_and(|(name, version)| {
238                !referenced_local_keys.contains(&(name.to_string(), version.to_string()))
239            })
240        })
241        .collect();
242
243    match root_candidates.as_slice() {
244        [only] => Some(*only),
245        _ => None,
246    }
247}
248
249fn extract_all_dependencies(
250    packages: &[Value],
251    root_package: Option<&toml::map::Map<String, Value>>,
252) -> Vec<Dependency> {
253    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
254
255    let package_versions = build_package_versions(packages);
256    let package_provenance = build_package_provenance(packages);
257    let root_package_key = root_package.and_then(package_key_from_table);
258    for package in packages.iter().take(MAX_ITERATION_COUNT) {
259        if let Some(pkg_table) = package.as_table() {
260            let is_root_package = package_key_from_table(pkg_table)
261                .zip(root_package_key)
262                .is_some_and(|(package_key, root_key)| package_key == root_key);
263
264            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
265                for dep in deps.iter().take(MAX_ITERATION_COUNT) {
266                    if let Some(dep_str) = dep.as_str() {
267                        let parsed_dependency = parse_dependency_string(dep_str);
268                        let name = parsed_dependency.name;
269                        let resolved_version = if parsed_dependency.version.is_empty() {
270                            package_versions
271                                .get(name)
272                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
273                                .unwrap_or("")
274                        } else {
275                            parsed_dependency.version
276                        };
277
278                        if !name.is_empty() {
279                            let purl = if resolved_version.is_empty() {
280                                PackageUrl::new("cargo", name)
281                                    .ok()
282                                    .map(|p| truncate_field(p.to_string()))
283                            } else {
284                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
285                                    p.with_version(resolved_version).ok()?;
286                                    Some(truncate_field(p.to_string()))
287                                })
288                            };
289
290                            let extra_data = build_dependency_extra_data(
291                                name,
292                                resolved_version,
293                                parsed_dependency.source,
294                                &package_provenance,
295                            );
296
297                            let dependency = Dependency {
298                                purl,
299                                extracted_requirement: if resolved_version.is_empty() {
300                                    None
301                                } else {
302                                    Some(truncate_field(resolved_version.to_string()))
303                                },
304                                scope: None,
305                                is_runtime: None,
306                                is_optional: None,
307                                is_pinned: Some(true),
308                                is_direct: Some(is_root_package),
309                                resolved_package: None,
310                                extra_data,
311                            };
312
313                            let key = CargoDependencyKey::from_dependency(&dependency);
314                            match all_dependencies.entry(key) {
315                                Entry::Vacant(entry) => {
316                                    entry.insert(dependency);
317                                }
318                                Entry::Occupied(mut entry) => {
319                                    if is_root_package {
320                                        entry.get_mut().is_direct = Some(true);
321                                    }
322                                }
323                            }
324                        }
325                    }
326                }
327            }
328        }
329    }
330
331    for package in packages
332        .iter()
333        .take(MAX_ITERATION_COUNT)
334        .filter_map(|package| package.as_table())
335    {
336        let Some((name, version)) = package_key_from_table(package) else {
337            continue;
338        };
339
340        if package.get("source").is_some() {
341            continue;
342        }
343
344        let Some(mut purl) = PackageUrl::new("cargo", name).ok() else {
345            continue;
346        };
347        if purl.with_version(version).is_err() {
348            continue;
349        }
350
351        let dependency = Dependency {
352            purl: Some(truncate_field(purl.to_string())),
353            extracted_requirement: Some(truncate_field(version.to_string())),
354            scope: None,
355            is_runtime: None,
356            is_optional: None,
357            is_pinned: Some(true),
358            is_direct: Some(true),
359            resolved_package: None,
360            extra_data: build_dependency_extra_data(name, version, None, &package_provenance),
361        };
362
363        let key = CargoDependencyKey::from_dependency(&dependency);
364        match all_dependencies.entry(key) {
365            Entry::Vacant(entry) => {
366                entry.insert(dependency);
367            }
368            Entry::Occupied(mut entry) => {
369                entry.get_mut().is_direct = Some(true);
370            }
371        }
372    }
373
374    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
375    dependencies.sort_by(|left, right| {
376        left.purl
377            .as_deref()
378            .cmp(&right.purl.as_deref())
379            .then_with(|| {
380                left.extracted_requirement
381                    .as_deref()
382                    .cmp(&right.extracted_requirement.as_deref())
383            })
384    });
385    dependencies
386}
387
388#[derive(Hash, PartialEq, Eq)]
389struct CargoDependencyKey {
390    purl: Option<String>,
391    extracted_requirement: Option<String>,
392    source: Option<String>,
393}
394
395impl CargoDependencyKey {
396    fn from_dependency(dependency: &Dependency) -> Self {
397        let source = dependency
398            .extra_data
399            .as_ref()
400            .and_then(|extra_data| extra_data.get("source"))
401            .and_then(|value| value.as_str())
402            .map(ToOwned::to_owned);
403
404        Self {
405            purl: dependency.purl.clone(),
406            extracted_requirement: dependency.extracted_requirement.clone(),
407            source,
408        }
409    }
410}
411
412fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
413    packages
414        .iter()
415        .filter_map(|package| package.as_table())
416        .filter_map(|table| {
417            Some((
418                table.get("name")?.as_str()?,
419                table.get("version")?.as_str()?,
420            ))
421        })
422        .fold(HashMap::new(), |mut acc, (name, version)| {
423            acc.entry(name).or_default().push(version);
424            acc
425        })
426}
427
428fn build_package_provenance<'a>(
429    packages: &'a [Value],
430) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
431    packages
432        .iter()
433        .filter_map(|package| package.as_table())
434        .filter_map(|table| {
435            Some((
436                (
437                    table.get("name")?.as_str()?,
438                    table.get("version")?.as_str()?,
439                ),
440                DependencyProvenance {
441                    source: table.get("source").and_then(|value| value.as_str()),
442                    checksum: table.get("checksum").and_then(|value| value.as_str()),
443                },
444            ))
445        })
446        .fold(HashMap::new(), |mut acc, (key, provenance)| {
447            acc.entry(key).or_default().push(provenance);
448            acc
449        })
450}
451
452fn build_dependency_extra_data(
453    name: &str,
454    resolved_version: &str,
455    source_hint: Option<&str>,
456    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
457) -> Option<HashMap<String, serde_json::Value>> {
458    let mut extra_data = HashMap::new();
459
460    if !resolved_version.is_empty()
461        && let Some(provenance) = package_provenance
462            .get(&(name, resolved_version))
463            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
464    {
465        if let Some(source) = provenance.source {
466            extra_data.insert(
467                "source".to_string(),
468                json!(truncate_field(source.to_string())),
469            );
470        }
471        if let Some(checksum) = provenance.checksum {
472            extra_data.insert(
473                "checksum".to_string(),
474                json!(truncate_field(checksum.to_string())),
475            );
476        }
477    }
478
479    if !extra_data.contains_key("source")
480        && let Some(source) = source_hint
481    {
482        extra_data.insert(
483            "source".to_string(),
484            json!(truncate_field(source.to_string())),
485        );
486    }
487
488    if extra_data.is_empty() {
489        None
490    } else {
491        Some(extra_data)
492    }
493}
494
495fn select_dependency_provenance<'a>(
496    candidates: &'a [DependencyProvenance<'a>],
497    source_hint: Option<&str>,
498) -> Option<DependencyProvenance<'a>> {
499    if let Some(source_hint) = source_hint {
500        return candidates
501            .iter()
502            .copied()
503            .find(|candidate| candidate.source == Some(source_hint));
504    }
505
506    (candidates.len() == 1).then_some(candidates[0])
507}
508
509fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
510    Some((
511        table.get("name")?.as_str()?,
512        table.get("version")?.as_str()?,
513    ))
514}
515
516fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
517    let trimmed = dep_str.trim();
518    let source = trimmed
519        .find(" (")
520        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
521    let without_source = trimmed
522        .find(" (")
523        .map(|source_start| &trimmed[..source_start])
524        .unwrap_or(trimmed);
525
526    let mut parts = without_source.split_whitespace();
527    let name = parts.next().unwrap_or("");
528    let version = parts.next().unwrap_or("");
529
530    ParsedDependency {
531        name,
532        version,
533        source,
534    }
535}
536
537#[derive(Clone, Copy)]
538struct ParsedDependency<'a> {
539    name: &'a str,
540    version: &'a str,
541    source: Option<&'a str>,
542}
543
544#[derive(Clone, Copy)]
545struct DependencyProvenance<'a> {
546    source: Option<&'a str>,
547    checksum: Option<&'a str>,
548}
549
550fn default_package_data() -> PackageData {
551    PackageData {
552        package_type: Some(CargoLockParser::PACKAGE_TYPE),
553        datasource_id: Some(DatasourceId::CargoLock),
554        ..Default::default()
555    }
556}