Skip to main content

provenant/parsers/
cargo_lock.rs

1//! Parser for Cargo.lock lockfiles.
2//!
3//! Extracts resolved dependency information including exact versions and
4//! checksums from Rust Cargo.lock files.
5//!
6//! # Supported Formats
7//! - Cargo.lock (lockfile)
8//!
9//! # Key Features
10//! - Exact version resolution from lockfile
11//! - Direct vs transitive dependency tracking (`is_direct`)
12//! - Checksum extraction for verification
13//! - Package URL (purl) generation
14//! - Dependency graph with source tracking (crates.io, git, path)
15//!
16//! # Implementation Notes
17//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
18//! - Direct dependencies determined from root package's dependency list
19//! - Uses TOML parsing for structured data extraction
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
22use log::warn;
23use packageurl::PackageUrl;
24use serde_json::json;
25use std::collections::HashMap;
26use std::fs::File;
27use std::io::Read;
28use std::path::Path;
29use toml::Value;
30
31use super::PackageParser;
32
33/// Rust Cargo.lock lockfile parser.
34///
35/// Extracts pinned dependency versions with checksums from Cargo-managed Rust projects.
36pub struct CargoLockParser;
37
38impl PackageParser for CargoLockParser {
39    const PACKAGE_TYPE: PackageType = PackageType::Cargo;
40
41    fn is_match(path: &Path) -> bool {
42        path.file_name()
43            .and_then(|name| name.to_str())
44            .is_some_and(|name| name.eq_ignore_ascii_case("cargo.lock"))
45    }
46
47    fn extract_packages(path: &Path) -> Vec<PackageData> {
48        let content = match read_cargo_lock(path) {
49            Ok(content) => content,
50            Err(e) => {
51                warn!("Failed to read or parse Cargo.lock at {:?}: {}", path, e);
52                return vec![default_package_data()];
53            }
54        };
55
56        let packages = match content.get("package").and_then(|v| v.as_array()) {
57            Some(pkgs) => pkgs,
58            None => {
59                warn!("No 'package' array found in Cargo.lock at {:?}", path);
60                return vec![default_package_data()];
61            }
62        };
63
64        let root_package = select_root_package(packages);
65
66        let name = root_package
67            .and_then(|p| p.get("name"))
68            .and_then(|v| v.as_str())
69            .map(String::from);
70
71        let version = root_package
72            .and_then(|p| p.get("version"))
73            .and_then(|v| v.as_str())
74            .map(String::from);
75
76        let checksum = root_package
77            .and_then(|p| p.get("checksum"))
78            .and_then(|v| v.as_str())
79            .map(String::from);
80
81        let dependencies = extract_all_dependencies(packages, root_package);
82
83        let purl = match (&name, &version) {
84            (Some(n), Some(v)) => PackageUrl::new("cargo", n).ok().and_then(|mut p| {
85                p.with_version(v.as_str()).ok()?;
86                Some(p.to_string())
87            }),
88            _ => None,
89        };
90
91        let api_data_url = match (&name, &version) {
92            (Some(n), Some(v)) => Some(format!("https://crates.io/api/v1/crates/{}/{}", n, v)),
93            (Some(n), None) => Some(format!("https://crates.io/api/v1/crates/{}", n)),
94            _ => None,
95        };
96
97        vec![PackageData {
98            package_type: Some(Self::PACKAGE_TYPE),
99            namespace: None,
100            name,
101            version,
102            qualifiers: None,
103            subpath: None,
104            primary_language: None,
105            description: None,
106            release_date: None,
107            parties: Vec::new(),
108            keywords: Vec::new(),
109            homepage_url: None,
110            download_url: None,
111            size: None,
112            sha1: None,
113            md5: None,
114            sha256: checksum,
115            sha512: None,
116            bug_tracking_url: None,
117            code_view_url: None,
118            vcs_url: None,
119            copyright: None,
120            holder: None,
121            declared_license_expression: None,
122            declared_license_expression_spdx: None,
123            license_detections: Vec::new(),
124            other_license_expression: None,
125            other_license_expression_spdx: None,
126            other_license_detections: Vec::new(),
127            extracted_license_statement: None,
128            notice_text: None,
129            source_packages: Vec::new(),
130            file_references: Vec::new(),
131            is_private: false,
132            is_virtual: false,
133            extra_data: None,
134            dependencies,
135            repository_homepage_url: None,
136            repository_download_url: None,
137            api_data_url,
138            datasource_id: Some(DatasourceId::CargoLock),
139            purl,
140        }]
141    }
142}
143
144fn read_cargo_lock(path: &Path) -> Result<Value, String> {
145    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
146    let mut content = String::new();
147    file.read_to_string(&mut content)
148        .map_err(|e| format!("Failed to read file: {}", e))?;
149    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
150}
151
152fn select_root_package(packages: &[Value]) -> Option<&toml::map::Map<String, Value>> {
153    packages
154        .iter()
155        .filter_map(|package| package.as_table())
156        .find(|table| table.get("source").is_none())
157        .or_else(|| packages.first().and_then(|package| package.as_table()))
158}
159
160fn extract_all_dependencies(
161    packages: &[Value],
162    root_package: Option<&toml::map::Map<String, Value>>,
163) -> Vec<Dependency> {
164    let mut all_dependencies = Vec::new();
165
166    let package_versions = build_package_versions(packages);
167    let package_provenance = build_package_provenance(packages);
168    let root_package_key = root_package.and_then(package_key_from_table);
169
170    for package in packages {
171        if let Some(pkg_table) = package.as_table() {
172            let is_root_package = package_key_from_table(pkg_table)
173                .zip(root_package_key)
174                .is_some_and(|(package_key, root_key)| package_key == root_key);
175
176            if let Some(deps) = pkg_table.get("dependencies").and_then(|v| v.as_array()) {
177                for dep in deps {
178                    if let Some(dep_str) = dep.as_str() {
179                        let parsed_dependency = parse_dependency_string(dep_str);
180                        let name = parsed_dependency.name;
181                        let resolved_version = if parsed_dependency.version.is_empty() {
182                            package_versions
183                                .get(name)
184                                .and_then(|versions| (versions.len() == 1).then_some(versions[0]))
185                                .unwrap_or("")
186                        } else {
187                            parsed_dependency.version
188                        };
189
190                        if !name.is_empty() {
191                            let purl = if resolved_version.is_empty() {
192                                PackageUrl::new("cargo", name).ok().map(|p| p.to_string())
193                            } else {
194                                PackageUrl::new("cargo", name).ok().and_then(|mut p| {
195                                    p.with_version(resolved_version).ok()?;
196                                    Some(p.to_string())
197                                })
198                            };
199
200                            let extra_data = build_dependency_extra_data(
201                                name,
202                                resolved_version,
203                                parsed_dependency.source,
204                                &package_provenance,
205                            );
206
207                            all_dependencies.push(Dependency {
208                                purl,
209                                extracted_requirement: if resolved_version.is_empty() {
210                                    None
211                                } else {
212                                    Some(resolved_version.to_string())
213                                },
214                                scope: Some("dependencies".to_string()),
215                                is_runtime: Some(true),
216                                is_optional: Some(false),
217                                is_pinned: Some(true),
218                                is_direct: Some(is_root_package),
219                                resolved_package: None,
220                                extra_data,
221                            });
222                        }
223                    }
224                }
225            }
226        }
227    }
228
229    all_dependencies
230}
231
232fn build_package_versions(packages: &[Value]) -> HashMap<&str, Vec<&str>> {
233    packages
234        .iter()
235        .filter_map(|package| package.as_table())
236        .filter_map(|table| {
237            Some((
238                table.get("name")?.as_str()?,
239                table.get("version")?.as_str()?,
240            ))
241        })
242        .fold(HashMap::new(), |mut acc, (name, version)| {
243            acc.entry(name).or_default().push(version);
244            acc
245        })
246}
247
248fn build_package_provenance<'a>(
249    packages: &'a [Value],
250) -> HashMap<(&'a str, &'a str), Vec<DependencyProvenance<'a>>> {
251    packages
252        .iter()
253        .filter_map(|package| package.as_table())
254        .filter_map(|table| {
255            Some((
256                (
257                    table.get("name")?.as_str()?,
258                    table.get("version")?.as_str()?,
259                ),
260                DependencyProvenance {
261                    source: table.get("source").and_then(|value| value.as_str()),
262                    checksum: table.get("checksum").and_then(|value| value.as_str()),
263                },
264            ))
265        })
266        .fold(HashMap::new(), |mut acc, (key, provenance)| {
267            acc.entry(key).or_default().push(provenance);
268            acc
269        })
270}
271
272fn build_dependency_extra_data(
273    name: &str,
274    resolved_version: &str,
275    source_hint: Option<&str>,
276    package_provenance: &HashMap<(&str, &str), Vec<DependencyProvenance<'_>>>,
277) -> Option<HashMap<String, serde_json::Value>> {
278    let mut extra_data = HashMap::new();
279
280    if !resolved_version.is_empty()
281        && let Some(provenance) = package_provenance
282            .get(&(name, resolved_version))
283            .and_then(|candidates| select_dependency_provenance(candidates, source_hint))
284    {
285        if let Some(source) = provenance.source {
286            extra_data.insert("source".to_string(), json!(source));
287        }
288        if let Some(checksum) = provenance.checksum {
289            extra_data.insert("checksum".to_string(), json!(checksum));
290        }
291    }
292
293    if !extra_data.contains_key("source")
294        && let Some(source) = source_hint
295    {
296        extra_data.insert("source".to_string(), json!(source));
297    }
298
299    if extra_data.is_empty() {
300        None
301    } else {
302        Some(extra_data)
303    }
304}
305
306fn select_dependency_provenance<'a>(
307    candidates: &'a [DependencyProvenance<'a>],
308    source_hint: Option<&str>,
309) -> Option<DependencyProvenance<'a>> {
310    if let Some(source_hint) = source_hint {
311        return candidates
312            .iter()
313            .copied()
314            .find(|candidate| candidate.source == Some(source_hint));
315    }
316
317    (candidates.len() == 1).then_some(candidates[0])
318}
319
320fn package_key_from_table(table: &toml::map::Map<String, Value>) -> Option<(&str, &str)> {
321    Some((
322        table.get("name")?.as_str()?,
323        table.get("version")?.as_str()?,
324    ))
325}
326
327fn parse_dependency_string(dep_str: &str) -> ParsedDependency<'_> {
328    let trimmed = dep_str.trim();
329    let source = trimmed
330        .find(" (")
331        .and_then(|source_start| trimmed[source_start + 2..].strip_suffix(')'));
332    let without_source = trimmed
333        .find(" (")
334        .map(|source_start| &trimmed[..source_start])
335        .unwrap_or(trimmed);
336
337    let mut parts = without_source.split_whitespace();
338    let name = parts.next().unwrap_or("");
339    let version = parts.next().unwrap_or("");
340
341    ParsedDependency {
342        name,
343        version,
344        source,
345    }
346}
347
348#[derive(Clone, Copy)]
349struct ParsedDependency<'a> {
350    name: &'a str,
351    version: &'a str,
352    source: Option<&'a str>,
353}
354
355#[derive(Clone, Copy)]
356struct DependencyProvenance<'a> {
357    source: Option<&'a str>,
358    checksum: Option<&'a str>,
359}
360
361fn default_package_data() -> PackageData {
362    PackageData {
363        package_type: Some(CargoLockParser::PACKAGE_TYPE),
364        datasource_id: Some(DatasourceId::CargoLock),
365        ..Default::default()
366    }
367}
368
369crate::register_parser!(
370    "Rust Cargo.lock lockfile",
371    &["**/Cargo.lock", "**/cargo.lock"],
372    "cargo",
373    "Rust",
374    Some("https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html"),
375);