1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{
27 MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, split_name_email, truncate_field,
28};
29use packageurl::PackageUrl;
30use std::path::Path;
31use toml::Value;
32
33use super::PackageParser;
34use super::license_normalization::{
35 DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
36 normalize_spdx_expression,
37};
38
39const FIELD_PACKAGE: &str = "package";
40const FIELD_NAME: &str = "name";
41const FIELD_VERSION: &str = "version";
42const FIELD_LICENSE: &str = "license";
43const FIELD_LICENSE_FILE: &str = "license-file";
44const FIELD_AUTHORS: &str = "authors";
45const FIELD_REPOSITORY: &str = "repository";
46const FIELD_HOMEPAGE: &str = "homepage";
47const FIELD_DEPENDENCIES: &str = "dependencies";
48const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
49const FIELD_DEV_DEPENDENCIES_LEGACY: &str = "dev_dependencies";
50const FIELD_BUILD_DEPENDENCIES: &str = "build-dependencies";
51const FIELD_BUILD_DEPENDENCIES_LEGACY: &str = "build_dependencies";
52const FIELD_DESCRIPTION: &str = "description";
53const FIELD_KEYWORDS: &str = "keywords";
54const FIELD_CATEGORIES: &str = "categories";
55const FIELD_RUST_VERSION: &str = "rust-version";
56const FIELD_EDITION: &str = "edition";
57const FIELD_README: &str = "readme";
58const FIELD_PUBLISH: &str = "publish";
59
60pub struct CargoParser;
65
66impl PackageParser for CargoParser {
67 const PACKAGE_TYPE: PackageType = PackageType::Cargo;
68
69 fn extract_packages(path: &Path) -> Vec<PackageData> {
70 let toml_content = match read_cargo_toml(path) {
71 Ok(content) => content,
72 Err(_) => return Vec::new(),
73 };
74
75 let package = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table());
76
77 let name = package
78 .and_then(|p| p.get(FIELD_NAME))
79 .and_then(|v| v.as_str())
80 .map(|s| truncate_field(s.to_string()));
81
82 let version = package
83 .and_then(|p| p.get(FIELD_VERSION))
84 .and_then(|v| v.as_str())
85 .map(|s| truncate_field(s.to_string()));
86
87 let raw_license = package
88 .and_then(|p| p.get(FIELD_LICENSE))
89 .and_then(|v| v.as_str())
90 .map(|s| truncate_field(s.to_string()));
91 let file_references = extract_file_references(&toml_content);
92 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
93 raw_license
94 .as_deref()
95 .and_then(normalize_spdx_expression)
96 .map(|normalized| {
97 build_declared_license_data(
98 normalized,
99 DeclaredLicenseMatchMetadata::single_line(
100 raw_license.as_deref().unwrap_or_default(),
101 ),
102 )
103 })
104 .unwrap_or_else(empty_declared_license_data);
105
106 let extracted_license_statement = raw_license.clone();
107
108 let dependencies = extract_dependencies_for_scopes(&toml_content, &[FIELD_DEPENDENCIES]);
109 let dev_dependencies = extract_dependencies_for_scopes(
110 &toml_content,
111 &[FIELD_DEV_DEPENDENCIES, FIELD_DEV_DEPENDENCIES_LEGACY],
112 );
113 let build_dependencies = extract_dependencies_for_scopes(
114 &toml_content,
115 &[FIELD_BUILD_DEPENDENCIES, FIELD_BUILD_DEPENDENCIES_LEGACY],
116 );
117
118 let purl = create_package_url(&name, &version);
119
120 let homepage_url = package
121 .and_then(|p| p.get(FIELD_HOMEPAGE))
122 .and_then(|v| v.as_str())
123 .map(|s| truncate_field(s.to_string()))
124 .or_else(|| {
125 name.as_ref()
126 .map(|n| format!("https://crates.io/crates/{}", n))
127 });
128
129 let repository_url = package
130 .and_then(|p| p.get(FIELD_REPOSITORY))
131 .and_then(|v| v.as_str())
132 .map(|s| truncate_field(s.to_string()));
133 let download_url = None;
134
135 let api_data_url = generate_cargo_api_url(&name, &version);
136
137 let repository_homepage_url = name
138 .as_ref()
139 .map(|n| format!("https://crates.io/crates/{}", n));
140
141 let repository_download_url = match (&name, &version) {
142 (Some(n), Some(v)) => Some(format!(
143 "https://crates.io/api/v1/crates/{}/{}/download",
144 n, v
145 )),
146 _ => None,
147 };
148
149 let description = package
150 .and_then(|p| p.get(FIELD_DESCRIPTION))
151 .and_then(|v| v.as_str())
152 .map(|s| truncate_field(s.trim().to_string()));
153
154 let keywords = extract_keywords_and_categories(&toml_content);
155
156 let extra_data = extract_extra_data(&toml_content);
157 let is_private = package
158 .and_then(|p| p.get(FIELD_PUBLISH))
159 .is_some_and(|value| matches!(value, Value::Boolean(false)));
160 vec![PackageData {
161 package_type: Some(Self::PACKAGE_TYPE),
162 namespace: None,
163 name,
164 version,
165 qualifiers: None,
166 subpath: None,
167 primary_language: Some("Rust".to_string()),
168 description,
169 release_date: None,
170 parties: extract_parties(&toml_content),
171 keywords,
172 homepage_url,
173 download_url,
174 size: None,
175 sha1: None,
176 md5: None,
177 sha256: None,
178 sha512: None,
179 bug_tracking_url: None,
180 code_view_url: None,
181 vcs_url: repository_url,
182 copyright: None,
183 holder: None,
184 declared_license_expression,
185 declared_license_expression_spdx,
186 license_detections,
187 other_license_expression: None,
188 other_license_expression_spdx: None,
189 other_license_detections: Vec::new(),
190 extracted_license_statement,
191 notice_text: None,
192 source_packages: Vec::new(),
193 file_references,
194 is_private,
195 is_virtual: false,
196 extra_data,
197 dependencies: [dependencies, dev_dependencies, build_dependencies].concat(),
198 repository_homepage_url,
199 repository_download_url,
200 api_data_url,
201 datasource_id: Some(DatasourceId::CargoToml),
202 purl,
203 }]
204 }
205
206 fn is_match(path: &Path) -> bool {
207 path.file_name()
208 .and_then(|name| name.to_str())
209 .is_some_and(|name| name.eq_ignore_ascii_case("cargo.toml"))
210 }
211
212 fn metadata() -> Vec<super::metadata::ParserMetadata> {
213 vec![super::metadata::ParserMetadata {
214 description: "Rust Cargo.toml manifest",
215 file_patterns: &["**/Cargo.toml", "**/cargo.toml"],
216 package_type: "cargo",
217 primary_language: "Rust",
218 documentation_url: Some("https://doc.rust-lang.org/cargo/reference/manifest.html"),
219 }]
220 }
221}
222
223fn read_cargo_toml(path: &Path) -> Result<Value, String> {
225 let content =
226 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
227
228 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
229}
230
231fn generate_cargo_api_url(name: &Option<String>, _version: &Option<String>) -> Option<String> {
232 const REGISTRY: &str = "https://crates.io/api/v1/crates";
233 name.as_ref().map(|name| format!("{}/{}", REGISTRY, name))
234}
235
236fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
237 name.as_ref().and_then(|name| {
238 let mut package_url = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
239 Ok(p) => p,
240 Err(e) => {
241 warn!(
242 "Failed to create PackageUrl for cargo package '{}': {}",
243 name, e
244 );
245 return None;
246 }
247 };
248
249 if let Some(v) = version
250 && let Err(e) = package_url.with_version(v)
251 {
252 warn!(
253 "Failed to set version '{}' for cargo package '{}': {}",
254 v, name, e
255 );
256 return None;
257 }
258
259 Some(package_url.to_string())
260 })
261}
262
263fn extract_parties(toml_content: &Value) -> Vec<Party> {
265 let mut parties = Vec::new();
266
267 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table())
268 && let Some(authors) = package.get(FIELD_AUTHORS).and_then(|v| v.as_array())
269 {
270 for author in authors.iter().take(MAX_ITERATION_COUNT) {
271 if let Some(author_str) = author.as_str() {
272 let (name, email) = split_name_email(author_str);
273 parties.push(Party {
274 r#type: None,
275 role: Some("author".to_string()),
276 name,
277 email,
278 url: None,
279 organization: None,
280 organization_url: None,
281 timezone: None,
282 });
283 }
284 }
285 if authors.len() > MAX_ITERATION_COUNT {
286 warn!(
287 "Authors array has {} entries, capping at MAX_ITERATION_COUNT ({})",
288 authors.len(),
289 MAX_ITERATION_COUNT
290 );
291 }
292 }
293
294 parties
295}
296
297fn is_cargo_version_pinned(version_str: &str) -> bool {
304 let trimmed = version_str.trim();
305
306 if trimmed.is_empty() {
308 return false;
309 }
310
311 if trimmed.contains('^')
313 || trimmed.contains('~')
314 || trimmed.contains('>')
315 || trimmed.contains('<')
316 || trimmed.contains('*')
317 || trimmed.contains('=')
318 {
319 return false;
320 }
321
322 trimmed.matches('.').count() >= 2
326}
327
328fn extract_dependencies(toml_content: &Value, scope: &str) -> Vec<Dependency> {
329 use serde_json::json;
330
331 let mut dependencies = Vec::new();
332
333 let is_runtime = !scope.ends_with("dev-dependencies") && !scope.ends_with("build-dependencies");
335
336 if let Some(deps_table) = toml_content.get(scope).and_then(|v| v.as_table()) {
337 if deps_table.len() > MAX_ITERATION_COUNT {
338 warn!(
339 "Dependency table '{}' has {} entries, capping at MAX_ITERATION_COUNT ({})",
340 scope,
341 deps_table.len(),
342 MAX_ITERATION_COUNT
343 );
344 }
345 for (name, value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
346 let (extracted_requirement, is_optional, extra_data_map, is_pinned) = match value {
347 Value::String(version_str) => {
348 let pinned = is_cargo_version_pinned(version_str);
350 (
351 Some(version_str.to_string()),
352 false,
353 std::collections::HashMap::new(),
354 pinned,
355 )
356 }
357 Value::Table(table) => {
358 let version = table
360 .get("version")
361 .and_then(|v| v.as_str())
362 .map(String::from);
363
364 let pinned = version.as_ref().is_some_and(|v| is_cargo_version_pinned(v));
365
366 let is_optional = table
367 .get("optional")
368 .and_then(|v| v.as_bool())
369 .unwrap_or(false);
370
371 let mut extra_data = std::collections::HashMap::new();
372
373 for (key, val) in table {
375 match key.as_str() {
376 "version" => {
377 if let Some(v) = val.as_str() {
379 extra_data.insert("version".to_string(), json!(v));
380 }
381 }
382 "features" => {
383 if let Some(features_array) = val.as_array() {
385 let features: Vec<String> = features_array
386 .iter()
387 .filter_map(|f| f.as_str().map(String::from))
388 .collect();
389 extra_data.insert("features".to_string(), json!(features));
390 }
391 }
392 "optional" => {
393 }
395 _ => {
396 if let Some(s) = val.as_str() {
398 extra_data.insert(key.clone(), json!(s));
399 } else if let Some(b) = val.as_bool() {
400 extra_data.insert(key.clone(), json!(b));
401 } else if let Some(i) = val.as_integer() {
402 extra_data.insert(key.clone(), json!(i));
403 }
404 }
405 }
406 }
407
408 (version, is_optional, extra_data, pinned)
409 }
410 _ => {
411 continue;
413 }
414 };
415
416 if extracted_requirement.is_some() || !extra_data_map.is_empty() {
418 let purl = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
419 Ok(p) => p.to_string(),
420 Err(e) => {
421 warn!(
422 "Failed to create PackageUrl for cargo dependency '{}': {}",
423 name, e
424 );
425 continue; }
427 };
428
429 dependencies.push(Dependency {
430 purl: Some(purl),
431 extracted_requirement,
432 scope: Some(scope.to_string()),
433 is_runtime: Some(is_runtime),
434 is_optional: Some(is_optional),
435 is_pinned: Some(is_pinned),
436 is_direct: Some(true),
437 resolved_package: None,
438 extra_data: if extra_data_map.is_empty() {
439 None
440 } else {
441 Some(extra_data_map)
442 },
443 });
444 }
445 }
446 }
447
448 dependencies
449}
450
451fn extract_dependencies_for_scopes(toml_content: &Value, scopes: &[&str]) -> Vec<Dependency> {
452 scopes
453 .iter()
454 .flat_map(|scope| extract_dependencies(toml_content, scope))
455 .collect()
456}
457
458fn extract_keywords_and_categories(toml_content: &Value) -> Vec<String> {
460 let mut keywords = Vec::new();
461
462 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
463 if let Some(kw_array) = package.get(FIELD_KEYWORDS).and_then(|v| v.as_array()) {
464 if kw_array.len() > MAX_ITERATION_COUNT {
465 warn!(
466 "Keywords array has {} entries, capping at MAX_ITERATION_COUNT ({})",
467 kw_array.len(),
468 MAX_ITERATION_COUNT
469 );
470 }
471 for kw in kw_array.iter().take(MAX_ITERATION_COUNT) {
472 if let Some(kw_str) = kw.as_str() {
473 keywords.push(truncate_field(kw_str.to_string()));
474 }
475 }
476 }
477
478 if let Some(cat_array) = package.get(FIELD_CATEGORIES).and_then(|v| v.as_array()) {
479 if cat_array.len() > MAX_ITERATION_COUNT {
480 warn!(
481 "Categories array has {} entries, capping at MAX_ITERATION_COUNT ({})",
482 cat_array.len(),
483 MAX_ITERATION_COUNT
484 );
485 }
486 for cat in cat_array.iter().take(MAX_ITERATION_COUNT) {
487 if let Some(cat_str) = cat.as_str() {
488 keywords.push(truncate_field(cat_str.to_string()));
489 }
490 }
491 }
492 }
493
494 keywords
495}
496
497fn extract_file_references(toml_content: &Value) -> Vec<FileReference> {
498 let mut file_references = Vec::new();
499
500 if let Some(package) = toml_content
501 .get(FIELD_PACKAGE)
502 .and_then(|value| value.as_table())
503 {
504 for path in [
505 package
506 .get(FIELD_LICENSE_FILE)
507 .and_then(|value| value.as_str()),
508 package.get(FIELD_README).and_then(|value| value.as_str()),
509 ]
510 .into_iter()
511 .flatten()
512 {
513 if file_references
514 .iter()
515 .any(|reference: &FileReference| reference.path == path)
516 {
517 continue;
518 }
519
520 file_references.push(FileReference {
521 path: path.to_string(),
522 size: None,
523 sha1: None,
524 md5: None,
525 sha256: None,
526 sha512: None,
527 extra_data: None,
528 });
529 }
530 }
531
532 file_references
533}
534
535fn toml_to_json(value: &toml::Value, guard: &mut RecursionGuard<()>) -> serde_json::Value {
536 if guard.descend() {
537 warn!("TOML nesting depth exceeded, returning Null");
538 return serde_json::Value::Null;
539 }
540 let result = match value {
541 toml::Value::String(s) => serde_json::json!(s),
542 toml::Value::Integer(i) => serde_json::json!(i),
543 toml::Value::Float(f) => serde_json::json!(f),
544 toml::Value::Boolean(b) => serde_json::json!(b),
545 toml::Value::Array(a) => {
546 serde_json::Value::Array(a.iter().map(|v| toml_to_json(v, guard)).collect())
547 }
548 toml::Value::Table(t) => {
549 let map: serde_json::Map<String, serde_json::Value> = t
550 .iter()
551 .map(|(k, v)| (k.clone(), toml_to_json(v, guard)))
552 .collect();
553 serde_json::Value::Object(map)
554 }
555 toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
556 };
557 guard.ascend();
558 result
559}
560
561fn extract_extra_data(
563 toml_content: &Value,
564) -> Option<std::collections::HashMap<String, serde_json::Value>> {
565 use serde_json::json;
566 let mut extra_data = std::collections::HashMap::new();
567
568 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
569 if package.len() > MAX_ITERATION_COUNT {
570 warn!(
571 "Package table has {} entries, exceeding MAX_ITERATION_COUNT ({})",
572 package.len(),
573 MAX_ITERATION_COUNT
574 );
575 }
576 if let Some(rust_version_value) = package.get(FIELD_RUST_VERSION) {
577 if let Some(rust_version_str) = rust_version_value.as_str() {
578 extra_data.insert("rust_version".to_string(), json!(rust_version_str));
579 } else if rust_version_value
580 .as_table()
581 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
582 {
583 extra_data.insert("rust-version".to_string(), json!("workspace"));
584 }
585 }
586
587 if let Some(edition_value) = package.get(FIELD_EDITION) {
589 if let Some(edition_str) = edition_value.as_str() {
590 extra_data.insert("rust_edition".to_string(), json!(edition_str));
591 } else if edition_value
592 .as_table()
593 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
594 {
595 extra_data.insert("edition".to_string(), json!("workspace"));
596 }
597 }
598
599 if let Some(documentation) = package.get("documentation").and_then(|v| v.as_str()) {
601 extra_data.insert("documentation_url".to_string(), json!(documentation));
602 }
603
604 if let Some(license_file) = package.get(FIELD_LICENSE_FILE).and_then(|v| v.as_str()) {
606 extra_data.insert("license_file".to_string(), json!(license_file));
607 }
608
609 if let Some(readme_value) = package.get(FIELD_README) {
610 if let Some(readme_file) = readme_value.as_str() {
611 extra_data.insert("readme_file".to_string(), json!(readme_file));
612 } else if let Some(readme_enabled) = readme_value.as_bool() {
613 extra_data.insert("readme".to_string(), json!(readme_enabled));
614 } else if readme_value
615 .as_table()
616 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
617 {
618 extra_data.insert("readme".to_string(), json!("workspace"));
619 }
620 }
621
622 if let Some(publish_value) = package.get(FIELD_PUBLISH) {
623 extra_data.insert(
624 "publish".to_string(),
625 toml_to_json(publish_value, &mut RecursionGuard::depth_only()),
626 );
627 }
628
629 if let Some(version_value) = package.get(FIELD_VERSION)
632 && version_value
633 .as_table()
634 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
635 {
636 extra_data.insert("version".to_string(), json!("workspace"));
637 }
638
639 if let Some(license_value) = package.get(FIELD_LICENSE)
641 && license_value
642 .as_table()
643 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
644 {
645 extra_data.insert("license".to_string(), json!("workspace"));
646 }
647
648 if let Some(homepage_value) = package.get(FIELD_HOMEPAGE)
650 && homepage_value
651 .as_table()
652 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
653 {
654 extra_data.insert("homepage".to_string(), json!("workspace"));
655 }
656
657 if let Some(repository_value) = package.get(FIELD_REPOSITORY)
659 && repository_value
660 .as_table()
661 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
662 {
663 extra_data.insert("repository".to_string(), json!("workspace"));
664 }
665
666 if let Some(categories_value) = package.get(FIELD_CATEGORIES)
668 && categories_value
669 .as_table()
670 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
671 {
672 extra_data.insert("categories".to_string(), json!("workspace"));
673 }
674
675 if let Some(authors_value) = package.get(FIELD_AUTHORS)
677 && authors_value
678 .as_table()
679 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
680 {
681 extra_data.insert("authors".to_string(), json!("workspace"));
682 }
683 }
684
685 if let Some(workspace_value) = toml_content.get("workspace") {
687 extra_data.insert(
688 "workspace".to_string(),
689 toml_to_json(workspace_value, &mut RecursionGuard::depth_only()),
690 );
691 }
692
693 if extra_data.is_empty() {
694 None
695 } else {
696 Some(extra_data)
697 }
698}