Skip to main content

provenant/parsers/
cargo_lock.rs

1//! Parser for Cargo.lock lockfiles.
2//!
3//! Extracts resolved dependency information including exact versions and
4//! checksums from Rust Cargo.lock files.
5//!
6//! # Supported Formats
7//! - Cargo.lock (lockfile)
8//!
9//! # Key Features
10//! - Exact version resolution from lockfile
11//! - Direct vs transitive dependency tracking (`is_direct`)
12//! - Checksum extraction for verification
13//! - Package URL (purl) generation
14//! - Dependency graph with source tracking (crates.io, git, path)
15//!
16//! # Implementation Notes
17//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
18//! - Direct dependencies determined from root package's dependency list
19//! - Uses TOML parsing for structured data extraction
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
22use crate::parser_warn as warn;
23use packageurl::PackageUrl;
24use serde_json::json;
25use std::collections::{HashMap, hash_map::Entry};
26use std::fs::File;
27use std::io::Read;
28use std::path::Path;
29use toml::Value;
30
31use super::PackageParser;
32
33/// Rust Cargo.lock lockfile parser.
34///
35/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
36pub struct CargoLockParser;
37
38impl PackageParser for CargoLockParser {
39    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
40
41    fn is_match(path: &Path) -> bool {
42        path.file_name()
43            .and_then(|name| name.to_str())
44            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
45    }
46
47    fn extract_packages(path: &Path) -> Vec<PackageData> {
48        let content = match read_cargo_lock(path) {
49            Ok(content) => content,
50            Err(e) => {
51                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
52                return vec![default_package_data()];
53            }
54        };
55
56        let packages = match content.get("package").and_then(|v| v.as_array()) {
57            Some(pkgs) => pkgs,
58            None => {
59                warn!("No 'package' array found in Cargo.lock at {:?}", path);
60                return vec![default_package_data()];
61            }
62        };
63
64        let root_package = select_root_package(packages);
65
66        let name = root_package
67            .and_then(|p| p.get("name"))
68            .and_then(|v| v.as_str())
69            .map(String::from);
70
71        let version = root_package
72            .and_then(|p| p.get("version"))
73            .and_then(|v| v.as_str())
74            .map(String::from);
75
76        let checksum = root_package
77            .and_then(|p| p.get("checksum"))
78            .and_then(|v| v.as_str())
79            .map(String::from);
80
81        let dependencies = extract_all_dependencies(packages, root_package);
82
83        let purl = match (&name, &version) {
84            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
85                p.with_version(v.as_str()).ok()?;
86                Some(p.to_string())
87            }),
88            _ => None,
89        };
90
91        let api_data_url = match (&name, &version) {
92            (Some(n), Some(v)) => Some(format!("https://crates.io/api/v1/crates/{}/{}", n, v)),
93            (Some(n), None) => Some(format!("https://crates.io/api/v1/crates/{}", n)),
94            _ => None,
95        };
96
97        vec![PackageData {
98            package_type: Some(Self::PACKAGE_TYPE),
99            namespace: None,
100            name,
101            version,
102            qualifiers: None,
103            subpath: None,
104            primary_language: None,
105            description: None,
106            release_date: None,
107            parties: Vec::new(),
108            keywords: Vec::new(),
109            homepage_url: None,
110            download_url: None,
111            size: None,
112            sha1: None,
113            md5: None,
114            sha256: checksum,
115            sha512: None,
116            bug_tracking_url: None,
117            code_view_url: None,
118            vcs_url: None,
119            copyright: None,
120            holder: None,
121            declared_license_expression: None,
122            declared_license_expression_spdx: None,
123            license_detections: Vec::new(),
124            other_license_expression: None,
125            other_license_expression_spdx: None,
126            other_license_detections: Vec::new(),
127            extracted_license_statement: None,
128            notice_text: None,
129            source_packages: Vec::new(),
130            file_references: Vec::new(),
131            is_private: false,
132            is_virtual: false,
133            extra_data: None,
134            dependencies,
135            repository_homepage_url: None,
136            repository_download_url: None,
137            api_data_url,
138            datasource_id: Some(DatasourceId::CargoLock),
139            purl,
140        }]
141    }
142}
143
144fn read_cargo_lock(path: &Path) -> Result<Value, String> {
145    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
146    let mut content = String::new();
147    file.read_to_string(&mut content)
148        .map_err(|e| format!("Failed to read file: {}", e))?;
149    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
150}
151
152fn select_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
153    packages
154        .iter()
155        .filter_map(|package| package.as_table())
156        .find(|table| table.get("source").is_none())
157        .or_else(|| packages.first().and_then(|package| package.as_table()))
158}
159
160fn extract_all_dependencies(
161    packages: &[Value],
162    root_package: Option<&toml::map::Map<String, Value>>,
163) -> Vec<Dependency> {
164    let mut all_dependencies: HashMap<CargoDependencyKey, Dependency> = HashMap::new();
165
166    let package_versions = build_package_versions(packages);
167    let package_provenance = build_package_provenance(packages);
168    let root_package_key = root_package.and_then(package_key_from_table);
169
170    for package in packages {
171        if let Some(pkg_table) = package.as_table() {
172            let is_root_package = package_key_from_table(pkg_table)
173                .zip(root_package_key)
174                .is_some_and(|(package_key, root_key)| package_key == root_key);
175
176            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
177                for dep in deps {
178                    if let Some(dep_str) = dep.as_str() {
179                        let parsed_dependency = parse_dependency_string(dep_str);
180                        let name = parsed_dependency.name;
181                        let resolved_version = if parsed_dependency.version.is_empty() {
182                            package_versions
183                                .get(name)
184                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
185                                .unwrap_or("")
186                        } else {
187                            parsed_dependency.version
188                        };
189
190                        if !name.is_empty() {
191                            let purl = if resolved_version.is_empty() {
192                                PackageUrl::new("cargo", name).ok().map(|p| p.to_string())
193                            } else {
194                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
195                                    p.with_version(resolved_version).ok()?;
196                                    Some(p.to_string())
197                                })
198                            };
199
200                            let extra_data = build_dependency_extra_data(
201                                name,
202                                resolved_version,
203                                parsed_dependency.source,
204                                &package_provenance,
205                            );
206
207                            let dependency = Dependency {
208                                purl,
209                                extracted_requirement: if resolved_version.is_empty() {
210                                    None
211                                } else {
212                                    Some(resolved_version.to_string())
213                                },
214                                scope: Some("dependencies".to_string()),
215                                is_runtime: Some(true),
216                                is_optional: Some(false),
217                                is_pinned: Some(true),
218                                is_direct: Some(is_root_package),
219                                resolved_package: None,
220                                extra_data,
221                            };
222
223                            let key = CargoDependencyKey::from_dependency(&dependency);
224                            match all_dependencies.entry(key) {
225                                Entry::Vacant(entry) => {
226                                    entry.insert(dependency);
227                                }
228                                Entry::Occupied(mut entry) => {
229                                    if is_root_package {
230                                        entry.get_mut().is_direct = Some(true);
231                                    }
232                                }
233                            }
234                        }
235                    }
236                }
237            }
238        }
239    }
240
241    let mut dependencies: Vec<_> = all_dependencies.into_values().collect();
242    dependencies.sort_by(|left, right| {
243        left.purl
244            .as_deref()
245            .cmp(&right.purl.as_deref())
246            .then_with(|| {
247                left.extracted_requirement
248                    .as_deref()
249                    .cmp(&right.extracted_requirement.as_deref())
250            })
251    });
252    dependencies
253}
254
255#[derive(Hash, PartialEq, Eq)]
256struct CargoDependencyKey {
257    purl: Option<String>,
258    extracted_requirement: Option<String>,
259    source: Option<String>,
260}
261
262impl CargoDependencyKey {
263    fn from_dependency(dependency: &Dependency) -> Self {
264        let source = dependency
265            .extra_data
266            .as_ref()
267            .and_then(|extra_data| extra_data.get("source"))
268            .and_then(|value| value.as_str())
269            .map(ToOwned::to_owned);
270
271        Self {
272            purl: dependency.purl.clone(),
273            extracted_requirement: dependency.extracted_requirement.clone(),
274            source,
275        }
276    }
277}
278
279fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
280    packages
281        .iter()
282        .filter_map(|package| package.as_table())
283        .filter_map(|table| {
284            Some((
285                table.get("name")?.as_str()?,
286                table.get("version")?.as_str()?,
287            ))
288        })
289        .fold(HashMap::new(), |mut acc, (name, version)| {
290            acc.entry(name).or_default().push(version);
291            acc
292        })
293}
294
295fn build_package_provenance<'a>(
296    packages: &'a [Value],
297) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
298    packages
299        .iter()
300        .filter_map(|package| package.as_table())
301        .filter_map(|table| {
302            Some((
303                (
304                    table.get("name")?.as_str()?,
305                    table.get("version")?.as_str()?,
306                ),
307                DependencyProvenance {
308                    source: table.get("source").and_then(|value| value.as_str()),
309                    checksum: table.get("checksum").and_then(|value| value.as_str()),
310                },
311            ))
312        })
313        .fold(HashMap::new(), |mut acc, (key, provenance)| {
314            acc.entry(key).or_default().push(provenance);
315            acc
316        })
317}
318
319fn build_dependency_extra_data(
320    name: &str,
321    resolved_version: &str,
322    source_hint: Option<&str>,
323    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
324) -> Option<HashMap<String, serde_json::Value>> {
325    let mut extra_data = HashMap::new();
326
327    if !resolved_version.is_empty()
328        && let Some(provenance) = package_provenance
329            .get(&(name, resolved_version))
330            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
331    {
332        if let Some(source) = provenance.source {
333            extra_data.insert("source".to_string(), json!(source));
334        }
335        if let Some(checksum) = provenance.checksum {
336            extra_data.insert("checksum".to_string(), json!(checksum));
337        }
338    }
339
340    if !extra_data.contains_key("source")
341        && let Some(source) = source_hint
342    {
343        extra_data.insert("source".to_string(), json!(source));
344    }
345
346    if extra_data.is_empty() {
347        None
348    } else {
349        Some(extra_data)
350    }
351}
352
353fn select_dependency_provenance<'a>(
354    candidates: &'a [DependencyProvenance<'a>],
355    source_hint: Option<&str>,
356) -> Option<DependencyProvenance<'a>> {
357    if let Some(source_hint) = source_hint {
358        return candidates
359            .iter()
360            .copied()
361            .find(|candidate| candidate.source == Some(source_hint));
362    }
363
364    (candidates.len() == 1).then_some(candidates[0])
365}
366
367fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
368    Some((
369        table.get("name")?.as_str()?,
370        table.get("version")?.as_str()?,
371    ))
372}
373
374fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
375    let trimmed = dep_str.trim();
376    let source = trimmed
377        .find(" (")
378        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
379    let without_source = trimmed
380        .find(" (")
381        .map(|source_start| &trimmed[..source_start])
382        .unwrap_or(trimmed);
383
384    let mut parts = without_source.split_whitespace();
385    let name = parts.next().unwrap_or("");
386    let version = parts.next().unwrap_or("");
387
388    ParsedDependency {
389        name,
390        version,
391        source,
392    }
393}
394
395#[derive(Clone, Copy)]
396struct ParsedDependency<'a> {
397    name: &'a str,
398    version: &'a str,
399    source: Option<&'a str>,
400}
401
402#[derive(Clone, Copy)]
403struct DependencyProvenance<'a> {
404    source: Option<&'a str>,
405    checksum: Option<&'a str>,
406}
407
408fn default_package_data() -> PackageData {
409    PackageData {
410        package_type: Some(CargoLockParser::PACKAGE_TYPE),
411        datasource_id: Some(DatasourceId::CargoLock),
412        ..Default::default()
413    }
414}
415
416crate::register_parser!(
417    "Rust Cargo.lock lockfile",
418    &["**/Cargo.lock", "**/cargo.lock"],
419    "cargo",
420    "Rust",
421    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
422);