Skip to main content

provenant/parsers/
cargo_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Cargo.lock lockfiles.
5//!
6//! Extracts resolved dependency information including exact versions and
7//! checksums from Rust Cargo.lock files.
8//!
9//! # Supported Formats
10//! - Cargo.lock (lockfile)
11//!
12//! # Key Features
13//! - Exact version resolution from lockfile
14//! - Direct vs transitive dependency tracking (`is_direct`)
15//! - Checksum extraction for verification
16//! - Package URL (purl) generation
17//! - Dependency graph with source tracking (crates.io, git, path)
18//!
19//! # Implementation Notes
20//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
21//! - Direct dependencies determined from root package's dependency list
22//! - Uses TOML parsing for structured data extraction
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use packageurl::PackageUrl;
28use serde_json::json;
29use std::collections::{HashMap, HashSet, hash_map::Entry};
30use std::path::Path;
31use toml::Value;
32
33use super::PackageParser;
34
35/// Rust Cargo.lock lockfile parser.
36///
37/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
38pub struct CargoLockParser;
39
40impl PackageParser for CargoLockParser {
41    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
42
43    fn is_match(path: &Path) -> bool {
44        path.file_name()
45            .and_then(|name| name.to_str())
46            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_cargo_lock(path) {
51            Ok(content) => content,
52            Err(e) => {
53                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let packages = match content.get("package").and_then(|v| v.as_array()) {
59            Some(pkgs) => pkgs,
60            None => {
61                warn!("No 'package' array found in Cargo.lock at {:?}", path);
62                return vec![default_package_data()];
63            }
64        };
65
66        let identity_package = select_identity_package(packages);
67        let dependency_root_package = select_dependency_root_package(packages);
68
69        let name = identity_package
70            .and_then(|p| p.get("name"))
71            .and_then(|v| v.as_str())
72            .map(|s| truncate_field(s.to_string()));
73
74        let version = identity_package
75            .and_then(|p| p.get("version"))
76            .and_then(|v| v.as_str())
77            .map(|s| truncate_field(s.to_string()));
78
79        let checksum = identity_package
80            .and_then(|p| p.get("checksum"))
81            .and_then(|v| v.as_str())
82            .map(|s| truncate_field(s.to_string()));
83
84        let (sha256, extra_data) = match checksum.as_deref() {
85            Some(h) if h.len() == 64 && Sha256Digest::from_hex(h).is_ok() => {
86                (Sha256Digest::from_hex(h).ok(), None)
87            }
88            Some(h) if hex::decode(h).is_ok() => {
89                let mut map = HashMap::new();
90                map.insert("checksum".to_string(), json!(h));
91                (None, Some(map))
92            }
93            _ => (None, None),
94        };
95
96        let dependencies = extract_all_dependencies(packages, dependency_root_package);
97
98        let purl = match (&name, &version) {
99            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
100                p.with_version(v.as_str()).ok()?;
101                Some(truncate_field(p.to_string()))
102            }),
103            _ => None,
104        };
105
106        let api_data_url = match (&name, &version) {
107            (Some(n), Some(v)) => Some(truncate_field(format!(
108                "https://crates.io/api/v1/crates/{}/{}",
109                n, v
110            ))),
111            (Some(n), None) => Some(truncate_field(format!(
112                "https://crates.io/api/v1/crates/{}",
113                n
114            ))),
115            _ => None,
116        };
117
118        vec![PackageData {
119            package_type: Some(Self::PACKAGE_TYPE),
120            namespace: None,
121            name,
122            version,
123            qualifiers: None,
124            subpath: None,
125            primary_language: None,
126            description: None,
127            release_date: None,
128            parties: Vec::new(),
129            keywords: Vec::new(),
130            homepage_url: None,
131            download_url: None,
132            size: None,
133            sha1: None,
134            md5: None,
135            sha256,
136            sha512: None,
137            bug_tracking_url: None,
138            code_view_url: None,
139            vcs_url: None,
140            copyright: None,
141            holder: None,
142            declared_license_expression: None,
143            declared_license_expression_spdx: None,
144            license_detections: Vec::new(),
145            other_license_expression: None,
146            other_license_expression_spdx: None,
147            other_license_detections: Vec::new(),
148            extracted_license_statement: None,
149            notice_text: None,
150            source_packages: Vec::new(),
151            file_references: Vec::new(),
152            is_private: false,
153            is_virtual: false,
154            extra_data,
155            dependencies,
156            repository_homepage_url: None,
157            repository_download_url: None,
158            api_data_url,
159            datasource_id: Some(DatasourceId::CargoLock),
160            purl,
161        }]
162    }
163}
164
165fn read_cargo_lock(path: &Path) -> Result<Value, String> {
166    let content =
167        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
168    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
169}
170
171fn select_dependency_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
172    packages
173        .iter()
174        .filter_map(|package| package.as_table())
175        .find(|table| table.get("source").is_none())
176        .or_else(|| packages.first().and_then(|package| package.as_table()))
177}
178
179fn select_identity_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
180    let local_packages: Vec<_> = packages
181        .iter()
182        .filter_map(|package| package.as_table())
183        .filter(|table| table.get("source").is_none())
184        .collect();
185
186    match local_packages.as_slice() {
187        [] => packages.first().and_then(|package| package.as_table()),
188        [only] => Some(*only),
189        _ => select_unique_root_like_local_package(&local_packages),
190    }
191}
192
193fn select_unique_root_like_local_package<'a>(
194    local_packages: &[&'a toml::map::Map<String, Value>],
195) -> Option<&'a toml::map::Map<String, Value>> {
196    let local_keys: HashSet<(String, String)> = local_packages
197        .iter()
198        .filter_map(|table| package_key_from_table(table))
199        .map(|(name, version)| (name.to_string(), version.to_string()))
200        .collect();
201
202    let referenced_local_keys: HashSet<(String, String)> = local_packages
203        .iter()
204        .flat_map(|table| {
205            table
206                .get("dependencies")
207                .and_then(Value::as_array)
208                .into_iter()
209                .flatten()
210                .filter_map(Value::as_str)
211                .filter_map(|dep| {
212                    let parsed = parse_dependency_string(dep);
213                    (!parsed.name.is_empty() && !parsed.version.is_empty())
214                        .then(|| (parsed.name.to_string(), parsed.version.to_string()))
215                })
216                .filter(|key| local_keys.contains(key))
217                .collect::<Vec<_>>()
218        })
219        .collect();
220
221    let root_candidates: Vec<_> = local_packages
222        .iter()
223        .copied()
224        .filter(|table| {
225            package_key_from_table(table).is_some_and(|(name, version)| {
226                !referenced_local_keys.contains(&(name.to_string(), version.to_string()))
227            })
228        })
229        .collect();
230
231    match root_candidates.as_slice() {
232        [only] => Some(*only),
233        _ => None,
234    }
235}
236
237fn extract_all_dependencies(
238    packages: &[Value],
239    root_package: Option<&toml::map::Map<String, Value>>,
240) -> Vec<Dependency> {
241    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
242
243    let package_versions = build_package_versions(packages);
244    let package_provenance = build_package_provenance(packages);
245    let root_package_key = root_package.and_then(package_key_from_table);
246    for package in packages.iter().take(MAX_ITERATION_COUNT) {
247        if let Some(pkg_table) = package.as_table() {
248            let is_root_package = package_key_from_table(pkg_table)
249                .zip(root_package_key)
250                .is_some_and(|(package_key, root_key)| package_key == root_key);
251
252            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
253                for dep in deps.iter().take(MAX_ITERATION_COUNT) {
254                    if let Some(dep_str) = dep.as_str() {
255                        let parsed_dependency = parse_dependency_string(dep_str);
256                        let name = parsed_dependency.name;
257                        let resolved_version = if parsed_dependency.version.is_empty() {
258                            package_versions
259                                .get(name)
260                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
261                                .unwrap_or("")
262                        } else {
263                            parsed_dependency.version
264                        };
265
266                        if !name.is_empty() {
267                            let purl = if resolved_version.is_empty() {
268                                PackageUrl::new("cargo", name)
269                                    .ok()
270                                    .map(|p| truncate_field(p.to_string()))
271                            } else {
272                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
273                                    p.with_version(resolved_version).ok()?;
274                                    Some(truncate_field(p.to_string()))
275                                })
276                            };
277
278                            let extra_data = build_dependency_extra_data(
279                                name,
280                                resolved_version,
281                                parsed_dependency.source,
282                                &package_provenance,
283                            );
284
285                            let dependency = Dependency {
286                                purl,
287                                extracted_requirement: if resolved_version.is_empty() {
288                                    None
289                                } else {
290                                    Some(truncate_field(resolved_version.to_string()))
291                                },
292                                scope: None,
293                                is_runtime: None,
294                                is_optional: None,
295                                is_pinned: Some(true),
296                                is_direct: Some(is_root_package),
297                                resolved_package: None,
298                                extra_data,
299                            };
300
301                            let key = CargoDependencyKey::from_dependency(&dependency);
302                            match all_dependencies.entry(key) {
303                                Entry::Vacant(entry) => {
304                                    entry.insert(dependency);
305                                }
306                                Entry::Occupied(mut entry) => {
307                                    if is_root_package {
308                                        entry.get_mut().is_direct = Some(true);
309                                    }
310                                }
311                            }
312                        }
313                    }
314                }
315            }
316        }
317    }
318
319    for package in packages
320        .iter()
321        .take(MAX_ITERATION_COUNT)
322        .filter_map(|package| package.as_table())
323    {
324        let Some((name, version)) = package_key_from_table(package) else {
325            continue;
326        };
327
328        if package.get("source").is_some() {
329            continue;
330        }
331
332        let Some(mut purl) = PackageUrl::new("cargo", name).ok() else {
333            continue;
334        };
335        if purl.with_version(version).is_err() {
336            continue;
337        }
338
339        let dependency = Dependency {
340            purl: Some(truncate_field(purl.to_string())),
341            extracted_requirement: Some(truncate_field(version.to_string())),
342            scope: None,
343            is_runtime: None,
344            is_optional: None,
345            is_pinned: Some(true),
346            is_direct: Some(true),
347            resolved_package: None,
348            extra_data: build_dependency_extra_data(name, version, None, &package_provenance),
349        };
350
351        let key = CargoDependencyKey::from_dependency(&dependency);
352        match all_dependencies.entry(key) {
353            Entry::Vacant(entry) => {
354                entry.insert(dependency);
355            }
356            Entry::Occupied(mut entry) => {
357                entry.get_mut().is_direct = Some(true);
358            }
359        }
360    }
361
362    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
363    dependencies.sort_by(|left, right| {
364        left.purl
365            .as_deref()
366            .cmp(&right.purl.as_deref())
367            .then_with(|| {
368                left.extracted_requirement
369                    .as_deref()
370                    .cmp(&right.extracted_requirement.as_deref())
371            })
372    });
373    dependencies
374}
375
376#[derive(Hash, PartialEq, Eq)]
377struct CargoDependencyKey {
378    purl: Option<String>,
379    extracted_requirement: Option<String>,
380    source: Option<String>,
381}
382
383impl CargoDependencyKey {
384    fn from_dependency(dependency: &Dependency) -> Self {
385        let source = dependency
386            .extra_data
387            .as_ref()
388            .and_then(|extra_data| extra_data.get("source"))
389            .and_then(|value| value.as_str())
390            .map(ToOwned::to_owned);
391
392        Self {
393            purl: dependency.purl.clone(),
394            extracted_requirement: dependency.extracted_requirement.clone(),
395            source,
396        }
397    }
398}
399
400fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
401    packages
402        .iter()
403        .filter_map(|package| package.as_table())
404        .filter_map(|table| {
405            Some((
406                table.get("name")?.as_str()?,
407                table.get("version")?.as_str()?,
408            ))
409        })
410        .fold(HashMap::new(), |mut acc, (name, version)| {
411            acc.entry(name).or_default().push(version);
412            acc
413        })
414}
415
416fn build_package_provenance<'a>(
417    packages: &'a [Value],
418) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
419    packages
420        .iter()
421        .filter_map(|package| package.as_table())
422        .filter_map(|table| {
423            Some((
424                (
425                    table.get("name")?.as_str()?,
426                    table.get("version")?.as_str()?,
427                ),
428                DependencyProvenance {
429                    source: table.get("source").and_then(|value| value.as_str()),
430                    checksum: table.get("checksum").and_then(|value| value.as_str()),
431                },
432            ))
433        })
434        .fold(HashMap::new(), |mut acc, (key, provenance)| {
435            acc.entry(key).or_default().push(provenance);
436            acc
437        })
438}
439
440fn build_dependency_extra_data(
441    name: &str,
442    resolved_version: &str,
443    source_hint: Option<&str>,
444    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
445) -> Option<HashMap<String, serde_json::Value>> {
446    let mut extra_data = HashMap::new();
447
448    if !resolved_version.is_empty()
449        && let Some(provenance) = package_provenance
450            .get(&(name, resolved_version))
451            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
452    {
453        if let Some(source) = provenance.source {
454            extra_data.insert(
455                "source".to_string(),
456                json!(truncate_field(source.to_string())),
457            );
458        }
459        if let Some(checksum) = provenance.checksum {
460            extra_data.insert(
461                "checksum".to_string(),
462                json!(truncate_field(checksum.to_string())),
463            );
464        }
465    }
466
467    if !extra_data.contains_key("source")
468        && let Some(source) = source_hint
469    {
470        extra_data.insert(
471            "source".to_string(),
472            json!(truncate_field(source.to_string())),
473        );
474    }
475
476    if extra_data.is_empty() {
477        None
478    } else {
479        Some(extra_data)
480    }
481}
482
483fn select_dependency_provenance<'a>(
484    candidates: &'a [DependencyProvenance<'a>],
485    source_hint: Option<&str>,
486) -> Option<DependencyProvenance<'a>> {
487    if let Some(source_hint) = source_hint {
488        return candidates
489            .iter()
490            .copied()
491            .find(|candidate| candidate.source == Some(source_hint));
492    }
493
494    (candidates.len() == 1).then_some(candidates[0])
495}
496
497fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
498    Some((
499        table.get("name")?.as_str()?,
500        table.get("version")?.as_str()?,
501    ))
502}
503
504fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
505    let trimmed = dep_str.trim();
506    let source = trimmed
507        .find(" (")
508        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
509    let without_source = trimmed
510        .find(" (")
511        .map(|source_start| &trimmed[..source_start])
512        .unwrap_or(trimmed);
513
514    let mut parts = without_source.split_whitespace();
515    let name = parts.next().unwrap_or("");
516    let version = parts.next().unwrap_or("");
517
518    ParsedDependency {
519        name,
520        version,
521        source,
522    }
523}
524
525#[derive(Clone, Copy)]
526struct ParsedDependency<'a> {
527    name: &'a str,
528    version: &'a str,
529    source: Option<&'a str>,
530}
531
532#[derive(Clone, Copy)]
533struct DependencyProvenance<'a> {
534    source: Option<&'a str>,
535    checksum: Option<&'a str>,
536}
537
538fn default_package_data() -> PackageData {
539    PackageData {
540        package_type: Some(CargoLockParser::PACKAGE_TYPE),
541        datasource_id: Some(DatasourceId::CargoLock),
542        ..Default::default()
543    }
544}
545
546crate::register_parser!(
547    "Rust Cargo.lock lockfile",
548    &["**/Cargo.lock", "**/cargo.lock"],
549    "cargo",
550    "Rust",
551    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
552);