1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
22use crate::parser_warn as warn;
23use crate::parsers::utils::{
24 MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, split_name_email, truncate_field,
25};
26use packageurl::PackageUrl;
27use std::path::Path;
28use toml::Value;
29
30use super::PackageParser;
31use super::license_normalization::{
32 DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
33 normalize_spdx_expression,
34};
35
36const FIELD_PACKAGE: &str = "package";
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_LICENSE: &str = "license";
40const FIELD_LICENSE_FILE: &str = "license-file";
41const FIELD_AUTHORS: &str = "authors";
42const FIELD_REPOSITORY: &str = "repository";
43const FIELD_HOMEPAGE: &str = "homepage";
44const FIELD_DEPENDENCIES: &str = "dependencies";
45const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
46const FIELD_DEV_DEPENDENCIES_LEGACY: &str = "dev_dependencies";
47const FIELD_BUILD_DEPENDENCIES: &str = "build-dependencies";
48const FIELD_BUILD_DEPENDENCIES_LEGACY: &str = "build_dependencies";
49const FIELD_DESCRIPTION: &str = "description";
50const FIELD_KEYWORDS: &str = "keywords";
51const FIELD_CATEGORIES: &str = "categories";
52const FIELD_RUST_VERSION: &str = "rust-version";
53const FIELD_EDITION: &str = "edition";
54const FIELD_README: &str = "readme";
55const FIELD_PUBLISH: &str = "publish";
56
57pub struct CargoParser;
62
63impl PackageParser for CargoParser {
64 const PACKAGE_TYPE: PackageType = PackageType::Cargo;
65
66 fn extract_packages(path: &Path) -> Vec<PackageData> {
67 let toml_content = match read_cargo_toml(path) {
68 Ok(content) => content,
69 Err(_) => return Vec::new(),
70 };
71
72 let package = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table());
73
74 let name = package
75 .and_then(|p| p.get(FIELD_NAME))
76 .and_then(|v| v.as_str())
77 .map(|s| truncate_field(s.to_string()));
78
79 let version = package
80 .and_then(|p| p.get(FIELD_VERSION))
81 .and_then(|v| v.as_str())
82 .map(|s| truncate_field(s.to_string()));
83
84 let raw_license = package
85 .and_then(|p| p.get(FIELD_LICENSE))
86 .and_then(|v| v.as_str())
87 .map(|s| truncate_field(s.to_string()));
88 let file_references = extract_file_references(&toml_content);
89 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
90 raw_license
91 .as_deref()
92 .and_then(normalize_spdx_expression)
93 .map(|normalized| {
94 build_declared_license_data(
95 normalized,
96 DeclaredLicenseMatchMetadata::single_line(
97 raw_license.as_deref().unwrap_or_default(),
98 ),
99 )
100 })
101 .unwrap_or_else(empty_declared_license_data);
102
103 let extracted_license_statement = raw_license.clone();
104
105 let dependencies = extract_dependencies_for_scopes(&toml_content, &[FIELD_DEPENDENCIES]);
106 let dev_dependencies = extract_dependencies_for_scopes(
107 &toml_content,
108 &[FIELD_DEV_DEPENDENCIES, FIELD_DEV_DEPENDENCIES_LEGACY],
109 );
110 let build_dependencies = extract_dependencies_for_scopes(
111 &toml_content,
112 &[FIELD_BUILD_DEPENDENCIES, FIELD_BUILD_DEPENDENCIES_LEGACY],
113 );
114
115 let purl = create_package_url(&name, &version);
116
117 let homepage_url = package
118 .and_then(|p| p.get(FIELD_HOMEPAGE))
119 .and_then(|v| v.as_str())
120 .map(|s| truncate_field(s.to_string()))
121 .or_else(|| {
122 name.as_ref()
123 .map(|n| format!("https://crates.io/crates/{}", n))
124 });
125
126 let repository_url = package
127 .and_then(|p| p.get(FIELD_REPOSITORY))
128 .and_then(|v| v.as_str())
129 .map(|s| truncate_field(s.to_string()));
130 let download_url = None;
131
132 let api_data_url = generate_cargo_api_url(&name, &version);
133
134 let repository_homepage_url = name
135 .as_ref()
136 .map(|n| format!("https://crates.io/crates/{}", n));
137
138 let repository_download_url = match (&name, &version) {
139 (Some(n), Some(v)) => Some(format!(
140 "https://crates.io/api/v1/crates/{}/{}/download",
141 n, v
142 )),
143 _ => None,
144 };
145
146 let description = package
147 .and_then(|p| p.get(FIELD_DESCRIPTION))
148 .and_then(|v| v.as_str())
149 .map(|s| truncate_field(s.trim().to_string()));
150
151 let keywords = extract_keywords_and_categories(&toml_content);
152
153 let extra_data = extract_extra_data(&toml_content);
154 let is_private = package
155 .and_then(|p| p.get(FIELD_PUBLISH))
156 .is_some_and(|value| matches!(value, Value::Boolean(false)));
157 vec![PackageData {
158 package_type: Some(Self::PACKAGE_TYPE),
159 namespace: None,
160 name,
161 version,
162 qualifiers: None,
163 subpath: None,
164 primary_language: Some("Rust".to_string()),
165 description,
166 release_date: None,
167 parties: extract_parties(&toml_content),
168 keywords,
169 homepage_url,
170 download_url,
171 size: None,
172 sha1: None,
173 md5: None,
174 sha256: None,
175 sha512: None,
176 bug_tracking_url: None,
177 code_view_url: None,
178 vcs_url: repository_url,
179 copyright: None,
180 holder: None,
181 declared_license_expression,
182 declared_license_expression_spdx,
183 license_detections,
184 other_license_expression: None,
185 other_license_expression_spdx: None,
186 other_license_detections: Vec::new(),
187 extracted_license_statement,
188 notice_text: None,
189 source_packages: Vec::new(),
190 file_references,
191 is_private,
192 is_virtual: false,
193 extra_data,
194 dependencies: [dependencies, dev_dependencies, build_dependencies].concat(),
195 repository_homepage_url,
196 repository_download_url,
197 api_data_url,
198 datasource_id: Some(DatasourceId::CargoToml),
199 purl,
200 }]
201 }
202
203 fn is_match(path: &Path) -> bool {
204 path.file_name()
205 .and_then(|name| name.to_str())
206 .is_some_and(|name| name.eq_ignore_ascii_case("cargo.toml"))
207 }
208}
209
210fn read_cargo_toml(path: &Path) -> Result<Value, String> {
212 let content =
213 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
214
215 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
216}
217
218fn generate_cargo_api_url(name: &Option<String>, _version: &Option<String>) -> Option<String> {
219 const REGISTRY: &str = "https://crates.io/api/v1/crates";
220 name.as_ref().map(|name| format!("{}/{}", REGISTRY, name))
221}
222
223fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
224 name.as_ref().and_then(|name| {
225 let mut package_url = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
226 Ok(p) => p,
227 Err(e) => {
228 warn!(
229 "Failed to create PackageUrl for cargo package '{}': {}",
230 name, e
231 );
232 return None;
233 }
234 };
235
236 if let Some(v) = version
237 && let Err(e) = package_url.with_version(v)
238 {
239 warn!(
240 "Failed to set version '{}' for cargo package '{}': {}",
241 v, name, e
242 );
243 return None;
244 }
245
246 Some(package_url.to_string())
247 })
248}
249
250fn extract_parties(toml_content: &Value) -> Vec<Party> {
252 let mut parties = Vec::new();
253
254 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table())
255 && let Some(authors) = package.get(FIELD_AUTHORS).and_then(|v| v.as_array())
256 {
257 for author in authors.iter().take(MAX_ITERATION_COUNT) {
258 if let Some(author_str) = author.as_str() {
259 let (name, email) = split_name_email(author_str);
260 parties.push(Party {
261 r#type: None,
262 role: Some("author".to_string()),
263 name,
264 email,
265 url: None,
266 organization: None,
267 organization_url: None,
268 timezone: None,
269 });
270 }
271 }
272 if authors.len() > MAX_ITERATION_COUNT {
273 warn!(
274 "Authors array has {} entries, capping at MAX_ITERATION_COUNT ({})",
275 authors.len(),
276 MAX_ITERATION_COUNT
277 );
278 }
279 }
280
281 parties
282}
283
284fn is_cargo_version_pinned(version_str: &str) -> bool {
291 let trimmed = version_str.trim();
292
293 if trimmed.is_empty() {
295 return false;
296 }
297
298 if trimmed.contains('^')
300 || trimmed.contains('~')
301 || trimmed.contains('>')
302 || trimmed.contains('<')
303 || trimmed.contains('*')
304 || trimmed.contains('=')
305 {
306 return false;
307 }
308
309 trimmed.matches('.').count() >= 2
313}
314
315fn extract_dependencies(toml_content: &Value, scope: &str) -> Vec<Dependency> {
316 use serde_json::json;
317
318 let mut dependencies = Vec::new();
319
320 let is_runtime = !scope.ends_with("dev-dependencies") && !scope.ends_with("build-dependencies");
322
323 if let Some(deps_table) = toml_content.get(scope).and_then(|v| v.as_table()) {
324 if deps_table.len() > MAX_ITERATION_COUNT {
325 warn!(
326 "Dependency table '{}' has {} entries, capping at MAX_ITERATION_COUNT ({})",
327 scope,
328 deps_table.len(),
329 MAX_ITERATION_COUNT
330 );
331 }
332 for (name, value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
333 let (extracted_requirement, is_optional, extra_data_map, is_pinned) = match value {
334 Value::String(version_str) => {
335 let pinned = is_cargo_version_pinned(version_str);
337 (
338 Some(version_str.to_string()),
339 false,
340 std::collections::HashMap::new(),
341 pinned,
342 )
343 }
344 Value::Table(table) => {
345 let version = table
347 .get("version")
348 .and_then(|v| v.as_str())
349 .map(String::from);
350
351 let pinned = version.as_ref().is_some_and(|v| is_cargo_version_pinned(v));
352
353 let is_optional = table
354 .get("optional")
355 .and_then(|v| v.as_bool())
356 .unwrap_or(false);
357
358 let mut extra_data = std::collections::HashMap::new();
359
360 for (key, val) in table {
362 match key.as_str() {
363 "version" => {
364 if let Some(v) = val.as_str() {
366 extra_data.insert("version".to_string(), json!(v));
367 }
368 }
369 "features" => {
370 if let Some(features_array) = val.as_array() {
372 let features: Vec<String> = features_array
373 .iter()
374 .filter_map(|f| f.as_str().map(String::from))
375 .collect();
376 extra_data.insert("features".to_string(), json!(features));
377 }
378 }
379 "optional" => {
380 }
382 _ => {
383 if let Some(s) = val.as_str() {
385 extra_data.insert(key.clone(), json!(s));
386 } else if let Some(b) = val.as_bool() {
387 extra_data.insert(key.clone(), json!(b));
388 } else if let Some(i) = val.as_integer() {
389 extra_data.insert(key.clone(), json!(i));
390 }
391 }
392 }
393 }
394
395 (version, is_optional, extra_data, pinned)
396 }
397 _ => {
398 continue;
400 }
401 };
402
403 if extracted_requirement.is_some() || !extra_data_map.is_empty() {
405 let purl = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
406 Ok(p) => p.to_string(),
407 Err(e) => {
408 warn!(
409 "Failed to create PackageUrl for cargo dependency '{}': {}",
410 name, e
411 );
412 continue; }
414 };
415
416 dependencies.push(Dependency {
417 purl: Some(purl),
418 extracted_requirement,
419 scope: Some(scope.to_string()),
420 is_runtime: Some(is_runtime),
421 is_optional: Some(is_optional),
422 is_pinned: Some(is_pinned),
423 is_direct: Some(true),
424 resolved_package: None,
425 extra_data: if extra_data_map.is_empty() {
426 None
427 } else {
428 Some(extra_data_map)
429 },
430 });
431 }
432 }
433 }
434
435 dependencies
436}
437
438fn extract_dependencies_for_scopes(toml_content: &Value, scopes: &[&str]) -> Vec<Dependency> {
439 scopes
440 .iter()
441 .flat_map(|scope| extract_dependencies(toml_content, scope))
442 .collect()
443}
444
445fn extract_keywords_and_categories(toml_content: &Value) -> Vec<String> {
447 let mut keywords = Vec::new();
448
449 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
450 if let Some(kw_array) = package.get(FIELD_KEYWORDS).and_then(|v| v.as_array()) {
451 if kw_array.len() > MAX_ITERATION_COUNT {
452 warn!(
453 "Keywords array has {} entries, capping at MAX_ITERATION_COUNT ({})",
454 kw_array.len(),
455 MAX_ITERATION_COUNT
456 );
457 }
458 for kw in kw_array.iter().take(MAX_ITERATION_COUNT) {
459 if let Some(kw_str) = kw.as_str() {
460 keywords.push(truncate_field(kw_str.to_string()));
461 }
462 }
463 }
464
465 if let Some(cat_array) = package.get(FIELD_CATEGORIES).and_then(|v| v.as_array()) {
466 if cat_array.len() > MAX_ITERATION_COUNT {
467 warn!(
468 "Categories array has {} entries, capping at MAX_ITERATION_COUNT ({})",
469 cat_array.len(),
470 MAX_ITERATION_COUNT
471 );
472 }
473 for cat in cat_array.iter().take(MAX_ITERATION_COUNT) {
474 if let Some(cat_str) = cat.as_str() {
475 keywords.push(truncate_field(cat_str.to_string()));
476 }
477 }
478 }
479 }
480
481 keywords
482}
483
484fn extract_file_references(toml_content: &Value) -> Vec<FileReference> {
485 let mut file_references = Vec::new();
486
487 if let Some(package) = toml_content
488 .get(FIELD_PACKAGE)
489 .and_then(|value| value.as_table())
490 {
491 for path in [
492 package
493 .get(FIELD_LICENSE_FILE)
494 .and_then(|value| value.as_str()),
495 package.get(FIELD_README).and_then(|value| value.as_str()),
496 ]
497 .into_iter()
498 .flatten()
499 {
500 if file_references
501 .iter()
502 .any(|reference: &FileReference| reference.path == path)
503 {
504 continue;
505 }
506
507 file_references.push(FileReference {
508 path: path.to_string(),
509 size: None,
510 sha1: None,
511 md5: None,
512 sha256: None,
513 sha512: None,
514 extra_data: None,
515 });
516 }
517 }
518
519 file_references
520}
521
522fn toml_to_json(value: &toml::Value, guard: &mut RecursionGuard<()>) -> serde_json::Value {
523 if guard.descend() {
524 warn!("TOML nesting depth exceeded, returning Null");
525 return serde_json::Value::Null;
526 }
527 let result = match value {
528 toml::Value::String(s) => serde_json::json!(s),
529 toml::Value::Integer(i) => serde_json::json!(i),
530 toml::Value::Float(f) => serde_json::json!(f),
531 toml::Value::Boolean(b) => serde_json::json!(b),
532 toml::Value::Array(a) => {
533 serde_json::Value::Array(a.iter().map(|v| toml_to_json(v, guard)).collect())
534 }
535 toml::Value::Table(t) => {
536 let map: serde_json::Map<String, serde_json::Value> = t
537 .iter()
538 .map(|(k, v)| (k.clone(), toml_to_json(v, guard)))
539 .collect();
540 serde_json::Value::Object(map)
541 }
542 toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
543 };
544 guard.ascend();
545 result
546}
547
548fn extract_extra_data(
550 toml_content: &Value,
551) -> Option<std::collections::HashMap<String, serde_json::Value>> {
552 use serde_json::json;
553 let mut extra_data = std::collections::HashMap::new();
554
555 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
556 if package.len() > MAX_ITERATION_COUNT {
557 warn!(
558 "Package table has {} entries, exceeding MAX_ITERATION_COUNT ({})",
559 package.len(),
560 MAX_ITERATION_COUNT
561 );
562 }
563 if let Some(rust_version_value) = package.get(FIELD_RUST_VERSION) {
564 if let Some(rust_version_str) = rust_version_value.as_str() {
565 extra_data.insert("rust_version".to_string(), json!(rust_version_str));
566 } else if rust_version_value
567 .as_table()
568 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
569 {
570 extra_data.insert("rust-version".to_string(), json!("workspace"));
571 }
572 }
573
574 if let Some(edition_value) = package.get(FIELD_EDITION) {
576 if let Some(edition_str) = edition_value.as_str() {
577 extra_data.insert("rust_edition".to_string(), json!(edition_str));
578 } else if edition_value
579 .as_table()
580 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
581 {
582 extra_data.insert("edition".to_string(), json!("workspace"));
583 }
584 }
585
586 if let Some(documentation) = package.get("documentation").and_then(|v| v.as_str()) {
588 extra_data.insert("documentation_url".to_string(), json!(documentation));
589 }
590
591 if let Some(license_file) = package.get(FIELD_LICENSE_FILE).and_then(|v| v.as_str()) {
593 extra_data.insert("license_file".to_string(), json!(license_file));
594 }
595
596 if let Some(readme_value) = package.get(FIELD_README) {
597 if let Some(readme_file) = readme_value.as_str() {
598 extra_data.insert("readme_file".to_string(), json!(readme_file));
599 } else if let Some(readme_enabled) = readme_value.as_bool() {
600 extra_data.insert("readme".to_string(), json!(readme_enabled));
601 } else if readme_value
602 .as_table()
603 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
604 {
605 extra_data.insert("readme".to_string(), json!("workspace"));
606 }
607 }
608
609 if let Some(publish_value) = package.get(FIELD_PUBLISH) {
610 extra_data.insert(
611 "publish".to_string(),
612 toml_to_json(publish_value, &mut RecursionGuard::depth_only()),
613 );
614 }
615
616 if let Some(version_value) = package.get(FIELD_VERSION)
619 && version_value
620 .as_table()
621 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
622 {
623 extra_data.insert("version".to_string(), json!("workspace"));
624 }
625
626 if let Some(license_value) = package.get(FIELD_LICENSE)
628 && license_value
629 .as_table()
630 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
631 {
632 extra_data.insert("license".to_string(), json!("workspace"));
633 }
634
635 if let Some(homepage_value) = package.get(FIELD_HOMEPAGE)
637 && homepage_value
638 .as_table()
639 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
640 {
641 extra_data.insert("homepage".to_string(), json!("workspace"));
642 }
643
644 if let Some(repository_value) = package.get(FIELD_REPOSITORY)
646 && repository_value
647 .as_table()
648 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
649 {
650 extra_data.insert("repository".to_string(), json!("workspace"));
651 }
652
653 if let Some(categories_value) = package.get(FIELD_CATEGORIES)
655 && categories_value
656 .as_table()
657 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
658 {
659 extra_data.insert("categories".to_string(), json!("workspace"));
660 }
661
662 if let Some(authors_value) = package.get(FIELD_AUTHORS)
664 && authors_value
665 .as_table()
666 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
667 {
668 extra_data.insert("authors".to_string(), json!("workspace"));
669 }
670 }
671
672 if let Some(workspace_value) = toml_content.get("workspace") {
674 extra_data.insert(
675 "workspace".to_string(),
676 toml_to_json(workspace_value, &mut RecursionGuard::depth_only()),
677 );
678 }
679
680 if extra_data.is_empty() {
681 None
682 } else {
683 Some(extra_data)
684 }
685}
686
687crate::register_parser!(
688 "Rust Cargo.toml manifest",
689 &["**/Cargo.toml", "**/cargo.toml"],
690 "cargo",
691 "Rust",
692 Some("https://doc.rust-lang.org/cargo/reference/manifest.html"),
693);