1use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
22use crate::parsers::utils::split_name_email;
23use log::warn;
24use packageurl::PackageUrl;
25use std::fs::File;
26use std::io::Read;
27use std::path::Path;
28use toml::Value;
29
30use super::PackageParser;
31use super::license_normalization::normalize_spdx_declared_license;
32
33const FIELD_PACKAGE: &str = "package";
34const FIELD_NAME: &str = "name";
35const FIELD_VERSION: &str = "version";
36const FIELD_LICENSE: &str = "license";
37const FIELD_LICENSE_FILE: &str = "license-file";
38const FIELD_AUTHORS: &str = "authors";
39const FIELD_REPOSITORY: &str = "repository";
40const FIELD_HOMEPAGE: &str = "homepage";
41const FIELD_DEPENDENCIES: &str = "dependencies";
42const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
43const FIELD_BUILD_DEPENDENCIES: &str = "build-dependencies";
44const FIELD_DESCRIPTION: &str = "description";
45const FIELD_KEYWORDS: &str = "keywords";
46const FIELD_CATEGORIES: &str = "categories";
47const FIELD_RUST_VERSION: &str = "rust-version";
48const FIELD_EDITION: &str = "edition";
49const FIELD_README: &str = "readme";
50const FIELD_PUBLISH: &str = "publish";
51
52pub struct CargoParser;
57
58impl PackageParser for CargoParser {
59 const PACKAGE_TYPE: PackageType = PackageType::Cargo;
60
61 fn extract_packages(path: &Path) -> Vec<PackageData> {
62 let toml_content = match read_cargo_toml(path) {
63 Ok(content) => content,
64 Err(e) => {
65 warn!("Failed to read or parse Cargo.toml at {:?}: {}", path, e);
66 return vec![default_package_data()];
67 }
68 };
69
70 let package = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table());
71
72 let name = package
73 .and_then(|p| p.get(FIELD_NAME))
74 .and_then(|v| v.as_str())
75 .map(String::from);
76
77 let version = package
78 .and_then(|p| p.get(FIELD_VERSION))
79 .and_then(|v| v.as_str())
80 .map(String::from);
81
82 let raw_license = package
83 .and_then(|p| p.get(FIELD_LICENSE))
84 .and_then(|v| v.as_str())
85 .map(String::from);
86 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
87 normalize_spdx_declared_license(raw_license.as_deref());
88
89 let extracted_license_statement = raw_license.clone();
90
91 let dependencies = extract_dependencies(&toml_content, FIELD_DEPENDENCIES);
92 let dev_dependencies = extract_dependencies(&toml_content, FIELD_DEV_DEPENDENCIES);
93 let build_dependencies = extract_dependencies(&toml_content, FIELD_BUILD_DEPENDENCIES);
94
95 let purl = create_package_url(&name, &version);
96
97 let homepage_url = package
98 .and_then(|p| p.get(FIELD_HOMEPAGE))
99 .and_then(|v| v.as_str())
100 .map(String::from)
101 .or_else(|| {
102 name.as_ref()
103 .map(|n| format!("https://crates.io/crates/{}", n))
104 });
105
106 let repository_url = package
107 .and_then(|p| p.get(FIELD_REPOSITORY))
108 .and_then(|v| v.as_str())
109 .map(String::from);
110 let download_url = None;
111
112 let api_data_url = generate_cargo_api_url(&name, &version);
113
114 let repository_homepage_url = name
115 .as_ref()
116 .map(|n| format!("https://crates.io/crates/{}", n));
117
118 let repository_download_url = match (&name, &version) {
119 (Some(n), Some(v)) => Some(format!(
120 "https://crates.io/api/v1/crates/{}/{}/download",
121 n, v
122 )),
123 _ => None,
124 };
125
126 let description = package
127 .and_then(|p| p.get(FIELD_DESCRIPTION))
128 .and_then(|v| v.as_str())
129 .map(|s| s.trim().to_string());
130
131 let keywords = extract_keywords_and_categories(&toml_content);
132
133 let extra_data = extract_extra_data(&toml_content);
134 let file_references = extract_file_references(&toml_content);
135
136 vec![PackageData {
137 package_type: Some(Self::PACKAGE_TYPE),
138 namespace: None,
139 name,
140 version,
141 qualifiers: None,
142 subpath: None,
143 primary_language: Some("Rust".to_string()),
144 description,
145 release_date: None,
146 parties: extract_parties(&toml_content),
147 keywords,
148 homepage_url,
149 download_url,
150 size: None,
151 sha1: None,
152 md5: None,
153 sha256: None,
154 sha512: None,
155 bug_tracking_url: None,
156 code_view_url: None,
157 vcs_url: repository_url,
158 copyright: None,
159 holder: None,
160 declared_license_expression,
161 declared_license_expression_spdx,
162 license_detections,
163 other_license_expression: None,
164 other_license_expression_spdx: None,
165 other_license_detections: Vec::new(),
166 extracted_license_statement,
167 notice_text: None,
168 source_packages: Vec::new(),
169 file_references,
170 is_private: false,
171 is_virtual: false,
172 extra_data,
173 dependencies: [dependencies, dev_dependencies, build_dependencies].concat(),
174 repository_homepage_url,
175 repository_download_url,
176 api_data_url,
177 datasource_id: Some(DatasourceId::CargoToml),
178 purl,
179 }]
180 }
181
182 fn is_match(path: &Path) -> bool {
183 path.file_name()
184 .and_then(|name| name.to_str())
185 .is_some_and(|name| name.eq_ignore_ascii_case("cargo.toml"))
186 }
187}
188
189fn read_cargo_toml(path: &Path) -> Result<Value, String> {
191 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
192 let mut content = String::new();
193 file.read_to_string(&mut content)
194 .map_err(|e| format!("Error reading file: {}", e))?;
195
196 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
197}
198
199fn generate_cargo_api_url(name: &Option<String>, _version: &Option<String>) -> Option<String> {
200 const REGISTRY: &str = "https://crates.io/api/v1/crates";
201 name.as_ref().map(|name| format!("{}/{}", REGISTRY, name))
202}
203
204fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
205 name.as_ref().and_then(|name| {
206 let mut package_url = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
207 Ok(p) => p,
208 Err(e) => {
209 warn!(
210 "Failed to create PackageUrl for cargo package '{}': {}",
211 name, e
212 );
213 return None;
214 }
215 };
216
217 if let Some(v) = version
218 && let Err(e) = package_url.with_version(v)
219 {
220 warn!(
221 "Failed to set version '{}' for cargo package '{}': {}",
222 v, name, e
223 );
224 return None;
225 }
226
227 Some(package_url.to_string())
228 })
229}
230
231fn extract_parties(toml_content: &Value) -> Vec<Party> {
233 let mut parties = Vec::new();
234
235 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table())
236 && let Some(authors) = package.get(FIELD_AUTHORS).and_then(|v| v.as_array())
237 {
238 for author in authors {
239 if let Some(author_str) = author.as_str() {
240 let (name, email) = split_name_email(author_str);
241 parties.push(Party {
242 r#type: None,
243 role: Some("author".to_string()),
244 name,
245 email,
246 url: None,
247 organization: None,
248 organization_url: None,
249 timezone: None,
250 });
251 }
252 }
253 }
254
255 parties
256}
257
258fn is_cargo_version_pinned(version_str: &str) -> bool {
265 let trimmed = version_str.trim();
266
267 if trimmed.is_empty() {
269 return false;
270 }
271
272 if trimmed.contains('^')
274 || trimmed.contains('~')
275 || trimmed.contains('>')
276 || trimmed.contains('<')
277 || trimmed.contains('*')
278 || trimmed.contains('=')
279 {
280 return false;
281 }
282
283 trimmed.matches('.').count() >= 2
287}
288
289fn extract_dependencies(toml_content: &Value, scope: &str) -> Vec<Dependency> {
290 use serde_json::json;
291
292 let mut dependencies = Vec::new();
293
294 let is_runtime = !scope.ends_with("dev-dependencies") && !scope.ends_with("build-dependencies");
296
297 if let Some(deps_table) = toml_content.get(scope).and_then(|v| v.as_table()) {
298 for (name, value) in deps_table {
299 let (extracted_requirement, is_optional, extra_data_map, is_pinned) = match value {
300 Value::String(version_str) => {
301 let pinned = is_cargo_version_pinned(version_str);
303 (
304 Some(version_str.to_string()),
305 false,
306 std::collections::HashMap::new(),
307 pinned,
308 )
309 }
310 Value::Table(table) => {
311 let version = table
313 .get("version")
314 .and_then(|v| v.as_str())
315 .map(String::from);
316
317 let pinned = version.as_ref().is_some_and(|v| is_cargo_version_pinned(v));
318
319 let is_optional = table
320 .get("optional")
321 .and_then(|v| v.as_bool())
322 .unwrap_or(false);
323
324 let mut extra_data = std::collections::HashMap::new();
325
326 for (key, val) in table {
328 match key.as_str() {
329 "version" => {
330 if let Some(v) = val.as_str() {
332 extra_data.insert("version".to_string(), json!(v));
333 }
334 }
335 "features" => {
336 if let Some(features_array) = val.as_array() {
338 let features: Vec<String> = features_array
339 .iter()
340 .filter_map(|f| f.as_str().map(String::from))
341 .collect();
342 extra_data.insert("features".to_string(), json!(features));
343 }
344 }
345 "optional" => {
346 }
348 _ => {
349 if let Some(s) = val.as_str() {
351 extra_data.insert(key.clone(), json!(s));
352 } else if let Some(b) = val.as_bool() {
353 extra_data.insert(key.clone(), json!(b));
354 } else if let Some(i) = val.as_integer() {
355 extra_data.insert(key.clone(), json!(i));
356 }
357 }
358 }
359 }
360
361 (version, is_optional, extra_data, pinned)
362 }
363 _ => {
364 continue;
366 }
367 };
368
369 if extracted_requirement.is_some() || !extra_data_map.is_empty() {
371 let purl = match PackageUrl::new(CargoParser::PACKAGE_TYPE.as_str(), name) {
372 Ok(p) => p.to_string(),
373 Err(e) => {
374 warn!(
375 "Failed to create PackageUrl for cargo dependency '{}': {}",
376 name, e
377 );
378 continue; }
380 };
381
382 dependencies.push(Dependency {
383 purl: Some(purl),
384 extracted_requirement,
385 scope: Some(scope.to_string()),
386 is_runtime: Some(is_runtime),
387 is_optional: Some(is_optional),
388 is_pinned: Some(is_pinned),
389 is_direct: Some(true),
390 resolved_package: None,
391 extra_data: if extra_data_map.is_empty() {
392 None
393 } else {
394 Some(extra_data_map)
395 },
396 });
397 }
398 }
399 }
400
401 dependencies
402}
403
404fn extract_keywords_and_categories(toml_content: &Value) -> Vec<String> {
406 let mut keywords = Vec::new();
407
408 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
409 if let Some(kw_array) = package.get(FIELD_KEYWORDS).and_then(|v| v.as_array()) {
411 for kw in kw_array {
412 if let Some(kw_str) = kw.as_str() {
413 keywords.push(kw_str.to_string());
414 }
415 }
416 }
417
418 if let Some(cat_array) = package.get(FIELD_CATEGORIES).and_then(|v| v.as_array()) {
420 for cat in cat_array {
421 if let Some(cat_str) = cat.as_str() {
422 keywords.push(cat_str.to_string());
423 }
424 }
425 }
426 }
427
428 keywords
429}
430
431fn extract_file_references(toml_content: &Value) -> Vec<FileReference> {
432 let mut file_references = Vec::new();
433
434 if let Some(package) = toml_content
435 .get(FIELD_PACKAGE)
436 .and_then(|value| value.as_table())
437 {
438 for path in [
439 package
440 .get(FIELD_LICENSE_FILE)
441 .and_then(|value| value.as_str()),
442 package.get(FIELD_README).and_then(|value| value.as_str()),
443 ]
444 .into_iter()
445 .flatten()
446 {
447 if file_references
448 .iter()
449 .any(|reference: &FileReference| reference.path == path)
450 {
451 continue;
452 }
453
454 file_references.push(FileReference {
455 path: path.to_string(),
456 size: None,
457 sha1: None,
458 md5: None,
459 sha256: None,
460 sha512: None,
461 extra_data: None,
462 });
463 }
464 }
465
466 file_references
467}
468
469fn toml_to_json(value: &toml::Value) -> serde_json::Value {
471 match value {
472 toml::Value::String(s) => serde_json::json!(s),
473 toml::Value::Integer(i) => serde_json::json!(i),
474 toml::Value::Float(f) => serde_json::json!(f),
475 toml::Value::Boolean(b) => serde_json::json!(b),
476 toml::Value::Array(a) => serde_json::Value::Array(a.iter().map(toml_to_json).collect()),
477 toml::Value::Table(t) => {
478 let map: serde_json::Map<String, serde_json::Value> = t
479 .iter()
480 .map(|(k, v)| (k.clone(), toml_to_json(v)))
481 .collect();
482 serde_json::Value::Object(map)
483 }
484 toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
485 }
486}
487
488fn extract_extra_data(
490 toml_content: &Value,
491) -> Option<std::collections::HashMap<String, serde_json::Value>> {
492 use serde_json::json;
493 let mut extra_data = std::collections::HashMap::new();
494
495 if let Some(package) = toml_content.get(FIELD_PACKAGE).and_then(|v| v.as_table()) {
496 if let Some(rust_version_value) = package.get(FIELD_RUST_VERSION) {
498 if let Some(rust_version_str) = rust_version_value.as_str() {
499 extra_data.insert("rust_version".to_string(), json!(rust_version_str));
500 } else if rust_version_value
501 .as_table()
502 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
503 {
504 extra_data.insert("rust-version".to_string(), json!("workspace"));
505 }
506 }
507
508 if let Some(edition_value) = package.get(FIELD_EDITION) {
510 if let Some(edition_str) = edition_value.as_str() {
511 extra_data.insert("rust_edition".to_string(), json!(edition_str));
512 } else if edition_value
513 .as_table()
514 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
515 {
516 extra_data.insert("edition".to_string(), json!("workspace"));
517 }
518 }
519
520 if let Some(documentation) = package.get("documentation").and_then(|v| v.as_str()) {
522 extra_data.insert("documentation_url".to_string(), json!(documentation));
523 }
524
525 if let Some(license_file) = package.get(FIELD_LICENSE_FILE).and_then(|v| v.as_str()) {
527 extra_data.insert("license_file".to_string(), json!(license_file));
528 }
529
530 if let Some(readme_value) = package.get(FIELD_README) {
531 if let Some(readme_file) = readme_value.as_str() {
532 extra_data.insert("readme_file".to_string(), json!(readme_file));
533 } else if let Some(readme_enabled) = readme_value.as_bool() {
534 extra_data.insert("readme".to_string(), json!(readme_enabled));
535 } else if readme_value
536 .as_table()
537 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
538 {
539 extra_data.insert("readme".to_string(), json!("workspace"));
540 }
541 }
542
543 if let Some(publish_value) = package.get(FIELD_PUBLISH) {
544 extra_data.insert("publish".to_string(), toml_to_json(publish_value));
545 }
546
547 if let Some(version_value) = package.get(FIELD_VERSION)
550 && version_value
551 .as_table()
552 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
553 {
554 extra_data.insert("version".to_string(), json!("workspace"));
555 }
556
557 if let Some(license_value) = package.get(FIELD_LICENSE)
559 && license_value
560 .as_table()
561 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
562 {
563 extra_data.insert("license".to_string(), json!("workspace"));
564 }
565
566 if let Some(homepage_value) = package.get(FIELD_HOMEPAGE)
568 && homepage_value
569 .as_table()
570 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
571 {
572 extra_data.insert("homepage".to_string(), json!("workspace"));
573 }
574
575 if let Some(repository_value) = package.get(FIELD_REPOSITORY)
577 && repository_value
578 .as_table()
579 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
580 {
581 extra_data.insert("repository".to_string(), json!("workspace"));
582 }
583
584 if let Some(categories_value) = package.get(FIELD_CATEGORIES)
586 && categories_value
587 .as_table()
588 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
589 {
590 extra_data.insert("categories".to_string(), json!("workspace"));
591 }
592
593 if let Some(authors_value) = package.get(FIELD_AUTHORS)
595 && authors_value
596 .as_table()
597 .is_some_and(|t| t.get("workspace") == Some(&toml::Value::Boolean(true)))
598 {
599 extra_data.insert("authors".to_string(), json!("workspace"));
600 }
601 }
602
603 if let Some(workspace_value) = toml_content.get("workspace") {
605 extra_data.insert("workspace".to_string(), toml_to_json(workspace_value));
606 }
607
608 if extra_data.is_empty() {
609 None
610 } else {
611 Some(extra_data)
612 }
613}
614
615fn default_package_data() -> PackageData {
616 PackageData {
617 package_type: Some(CargoParser::PACKAGE_TYPE),
618 datasource_id: Some(DatasourceId::CargoToml),
619 ..Default::default()
620 }
621}
622
623crate::register_parser!(
624 "Rust Cargo.toml manifest",
625 &["**/Cargo.toml", "**/cargo.toml"],
626 "cargo",
627 "Rust",
628 Some("https://doc.rust-lang.org/cargo/reference/manifest.html"),
629);