1use std::path::Path;
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32use yaml_serde::Value as YamlValue;
33
34use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
36
37use super::PackageParser;
38use super::license_normalization::{
39 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
40 combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
41 normalize_spdx_expression,
42};
43
44const FIELD_NAME: &str = "name";
45const FIELD_VERSION: &str = "version";
46const FIELD_ABSTRACT: &str = "abstract";
47const FIELD_DESCRIPTION: &str = "description";
48const FIELD_LICENSE: &str = "license";
49const FIELD_AUTHOR: &str = "author";
50const FIELD_RESOURCES: &str = "resources";
51const FIELD_PREREQS: &str = "prereqs";
52const FIELD_REQUIRES: &str = "requires";
53const FIELD_BUILD_REQUIRES: &str = "build_requires";
54const FIELD_TEST_REQUIRES: &str = "test_requires";
55const FIELD_CONFIGURE_REQUIRES: &str = "configure_requires";
56
57pub struct CpanMetaJsonParser;
62
63impl PackageParser for CpanMetaJsonParser {
64 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
65
66 fn is_match(path: &Path) -> bool {
67 path.file_name().is_some_and(|name| name == "META.json")
68 }
69
70 fn extract_packages(path: &Path) -> Vec<PackageData> {
71 let json = match read_and_parse_json(path) {
72 Ok(json) => json,
73 Err(e) => {
74 warn!("Failed to parse META.json at {:?}: {}", path, e);
75 return vec![default_package_data(DatasourceId::CpanMetaJson)];
76 }
77 };
78
79 let name = json
80 .get(FIELD_NAME)
81 .and_then(|v| v.as_str())
82 .map(|s| truncate_field(s.to_string()));
83
84 let version = extract_version_from_json(&json);
85
86 let description = json
87 .get(FIELD_ABSTRACT)
88 .and_then(|v| v.as_str())
89 .map(|s| truncate_field(s.to_string()));
90
91 let extracted_license_statement = extract_license_from_json(&json);
92 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
93 normalize_cpan_declared_license(
94 json.get(FIELD_LICENSE),
95 extracted_license_statement.as_deref(),
96 );
97 let declared_license_expression = declared_license_expression.map(truncate_field);
98 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
99 let parties = extract_parties_from_json(&json);
100 let dependencies = extract_dependencies_from_json(&json);
101 let (homepage_url, vcs_url, code_view_url, bug_tracking_url) =
102 extract_resources_from_json(&json);
103
104 vec![PackageData {
105 package_type: Some(Self::PACKAGE_TYPE),
106 name,
107 version,
108 description,
109 declared_license_expression,
110 declared_license_expression_spdx,
111 license_detections,
112 extracted_license_statement,
113 parties,
114 dependencies,
115 homepage_url,
116 vcs_url,
117 code_view_url,
118 bug_tracking_url,
119 primary_language: Some("Perl".to_string()),
120 datasource_id: Some(DatasourceId::CpanMetaJson),
121 ..Default::default()
122 }]
123 }
124
125 fn metadata() -> Vec<super::metadata::ParserMetadata> {
126 vec![super::metadata::ParserMetadata {
127 description: "CPAN Perl META.json",
128 file_patterns: &["**/META.json"],
129 package_type: "cpan",
130 primary_language: "Perl",
131 documentation_url: Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
132 }]
133 }
134}
135
136pub struct CpanMetaYmlParser;
140
141impl PackageParser for CpanMetaYmlParser {
142 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
143
144 fn is_match(path: &Path) -> bool {
145 path.file_name().is_some_and(|name| name == "META.yml")
146 }
147
148 fn extract_packages(path: &Path) -> Vec<PackageData> {
149 let yaml = match read_and_parse_yaml(path) {
150 Ok(yaml) => yaml,
151 Err(e) => {
152 warn!("Failed to parse META.yml at {:?}: {}", path, e);
153 return vec![default_package_data(DatasourceId::CpanMetaYml)];
154 }
155 };
156
157 let name = yaml
158 .get(FIELD_NAME)
159 .and_then(|v| v.as_str())
160 .map(|s| truncate_field(s.to_string()));
161
162 let version = extract_version_from_yaml(&yaml);
163
164 let description = yaml
165 .get(FIELD_ABSTRACT)
166 .or_else(|| yaml.get(FIELD_DESCRIPTION))
167 .and_then(|v| v.as_str())
168 .map(|s| truncate_field(s.to_string()));
169
170 let extracted_license_statement = extract_license_from_yaml(&yaml);
171 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
172 normalize_cpan_declared_license(
173 yaml.get(YamlValue::String(FIELD_LICENSE.to_string())),
174 extracted_license_statement.as_deref(),
175 );
176 let declared_license_expression = declared_license_expression.map(truncate_field);
177 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
178 let parties = extract_parties_from_yaml(&yaml);
179 let dependencies = extract_dependencies_from_yaml(&yaml);
180 let (homepage_url, vcs_url, bug_tracking_url) = extract_resources_from_yaml(&yaml);
181
182 vec![PackageData {
183 package_type: Some(Self::PACKAGE_TYPE),
184 name,
185 version,
186 description,
187 declared_license_expression,
188 declared_license_expression_spdx,
189 license_detections,
190 extracted_license_statement,
191 parties,
192 dependencies,
193 homepage_url,
194 vcs_url,
195 bug_tracking_url,
196 primary_language: Some("Perl".to_string()),
197 datasource_id: Some(DatasourceId::CpanMetaYml),
198 ..Default::default()
199 }]
200 }
201
202 fn metadata() -> Vec<super::metadata::ParserMetadata> {
203 vec![super::metadata::ParserMetadata {
204 description: "CPAN Perl META.yml",
205 file_patterns: &["**/META.yml"],
206 package_type: "cpan",
207 primary_language: "Perl",
208 documentation_url: Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
209 }]
210 }
211}
212
213pub struct CpanManifestParser;
217
218impl PackageParser for CpanManifestParser {
219 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
220
221 fn is_match(path: &Path) -> bool {
222 path.file_name().is_some_and(|name| name == "MANIFEST")
223 }
224
225 fn extract_packages(path: &Path) -> Vec<PackageData> {
226 let content = match read_file_to_string(path, None) {
227 Ok(content) => content,
228 Err(e) => {
229 warn!("Failed to read MANIFEST at {:?}: {}", path, e);
230 return vec![default_package_data(DatasourceId::CpanManifest)];
231 }
232 };
233
234 let file_references = content
235 .lines()
236 .take(MAX_ITERATION_COUNT)
237 .filter(|line| !line.trim().is_empty())
238 .filter(|line| !line.trim().starts_with('#'))
239 .map(|line| {
240 let path = line.split_whitespace().next().unwrap_or(line);
241 FileReference {
242 path: truncate_field(path.to_string()),
243 size: None,
244 sha1: None,
245 md5: None,
246 sha256: None,
247 sha512: None,
248 extra_data: None,
249 }
250 })
251 .collect();
252
253 vec![PackageData {
254 package_type: Some(Self::PACKAGE_TYPE),
255 file_references,
256 primary_language: Some("Perl".to_string()),
257 datasource_id: Some(DatasourceId::CpanManifest),
258 ..Default::default()
259 }]
260 }
261
262 fn metadata() -> Vec<super::metadata::ParserMetadata> {
263 vec![super::metadata::ParserMetadata {
264 description: "CPAN Perl MANIFEST",
265 file_patterns: &["**/MANIFEST"],
266 package_type: "cpan",
267 primary_language: "Perl",
268 documentation_url: Some("https://metacpan.org/pod/Module::Manifest"),
269 }]
270 }
271}
272
273fn default_package_data(datasource_id: DatasourceId) -> PackageData {
274 PackageData {
275 package_type: Some(CpanMetaJsonParser::PACKAGE_TYPE),
276 primary_language: Some("Perl".to_string()),
277 datasource_id: Some(datasource_id),
278 ..Default::default()
279 }
280}
281
282fn read_and_parse_json(path: &Path) -> Result<serde_json::Map<String, JsonValue>, String> {
283 let content =
284 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
285 let json: JsonValue =
286 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
287 json.as_object()
288 .cloned()
289 .ok_or_else(|| "Root JSON is not an object".to_string())
290}
291
292fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
293 let content =
294 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
295 let yaml: YamlValue =
296 yaml_serde::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
297 yaml.as_mapping()
298 .cloned()
299 .ok_or_else(|| "Root YAML is not a mapping".to_string())
300}
301
302fn extract_version_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
303 json.get(FIELD_VERSION).and_then(|v| match v {
304 JsonValue::String(s) => Some(truncate_field(s.clone())),
305 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
306 _ => None,
307 })
308}
309
310fn extract_version_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
311 yaml.get(YamlValue::String(FIELD_VERSION.to_string()))
312 .and_then(|v| match v {
313 YamlValue::String(s) => Some(truncate_field(s.clone())),
314 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
315 _ => None,
316 })
317}
318
319fn extract_license_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
320 json.get(FIELD_LICENSE).and_then(|v| match v {
321 JsonValue::String(s) => Some(truncate_field(s.clone())),
322 JsonValue::Array(arr) => {
323 let licenses: Vec<String> = arr
324 .iter()
325 .take(MAX_ITERATION_COUNT)
326 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
327 .collect();
328 if licenses.is_empty() {
329 None
330 } else {
331 Some(truncate_field(licenses.join(" AND ")))
332 }
333 }
334 _ => None,
335 })
336}
337
338fn extract_license_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
339 yaml.get(YamlValue::String(FIELD_LICENSE.to_string()))
340 .and_then(|v| match v {
341 YamlValue::String(s) => Some(truncate_field(s.clone())),
342 YamlValue::Sequence(arr) => {
343 let licenses: Vec<String> = arr
344 .iter()
345 .take(MAX_ITERATION_COUNT)
346 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
347 .collect();
348 if licenses.is_empty() {
349 None
350 } else {
351 Some(truncate_field(licenses.join(" AND ")))
352 }
353 }
354 _ => None,
355 })
356}
357
358fn normalize_cpan_declared_license(
359 raw_license: Option<&impl LicenseValueAdapter>,
360 extracted_license_statement: Option<&str>,
361) -> (
362 Option<String>,
363 Option<String>,
364 Vec<crate::models::LicenseDetection>,
365) {
366 let Some(raw_license) = raw_license else {
367 return empty_declared_license_data();
368 };
369 let normalized = raw_license
370 .license_values()
371 .into_iter()
372 .map(|value| normalize_cpan_license_value(&value))
373 .collect::<Option<Vec<_>>>();
374
375 if let Some(normalized) = normalized
376 && let Some(combined) = combine_normalized_licenses(normalized, " AND ")
377 {
378 return build_declared_license_data(
379 combined,
380 DeclaredLicenseMatchMetadata::single_line(
381 extracted_license_statement.unwrap_or_default(),
382 ),
383 );
384 }
385
386 empty_declared_license_data()
387}
388
389trait LicenseValueAdapter {
390 fn license_values(&self) -> Vec<String>;
391}
392
393impl LicenseValueAdapter for JsonValue {
394 fn license_values(&self) -> Vec<String> {
395 match self {
396 JsonValue::String(value) => vec![truncate_field(value.trim().to_string())],
397 JsonValue::Array(values) => values
398 .iter()
399 .take(MAX_ITERATION_COUNT)
400 .filter_map(|value| value.as_str())
401 .map(str::trim)
402 .filter(|value| !value.is_empty())
403 .map(|s| truncate_field(s.to_string()))
404 .collect(),
405 _ => Vec::new(),
406 }
407 }
408}
409
410impl LicenseValueAdapter for YamlValue {
411 fn license_values(&self) -> Vec<String> {
412 match self {
413 YamlValue::String(value) => vec![truncate_field(value.trim().to_string())],
414 YamlValue::Sequence(values) => values
415 .iter()
416 .take(MAX_ITERATION_COUNT)
417 .filter_map(|value| value.as_str())
418 .map(str::trim)
419 .filter(|value| !value.is_empty())
420 .map(|s| truncate_field(s.to_string()))
421 .collect(),
422 _ => Vec::new(),
423 }
424 }
425}
426
427fn normalize_cpan_license_value(value: &str) -> Option<NormalizedDeclaredLicense> {
428 match value.trim() {
429 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
430 "gpl-1.0-plus OR artistic-perl-1.0",
431 "GPL-1.0-or-later OR Artistic-1.0-Perl",
432 )),
433 "artistic_2" => Some(NormalizedDeclaredLicense::new(
434 "artistic-2.0",
435 "Artistic-2.0",
436 )),
437 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
438 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
439 }
440}
441
442fn extract_parties_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Party> {
443 json.get(FIELD_AUTHOR)
444 .and_then(|v| v.as_array())
445 .map_or_else(Vec::new, |authors| {
446 authors
447 .iter()
448 .take(MAX_ITERATION_COUNT)
449 .filter_map(|author| {
450 author.as_str().map(|s| {
451 let (name, email) = parse_author_string(s);
452 Party {
453 r#type: Some("person".to_string()),
454 role: Some("author".to_string()),
455 name,
456 email,
457 url: None,
458 organization: None,
459 organization_url: None,
460 timezone: None,
461 }
462 })
463 })
464 .collect()
465 })
466}
467
468fn extract_parties_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Party> {
469 yaml.get(YamlValue::String(FIELD_AUTHOR.to_string()))
470 .and_then(|v| v.as_sequence())
471 .map_or_else(Vec::new, |authors| {
472 authors
473 .iter()
474 .take(MAX_ITERATION_COUNT)
475 .filter_map(|author| {
476 author.as_str().map(|s| {
477 let (name, email) = parse_author_string(s);
478 Party {
479 r#type: Some("person".to_string()),
480 role: Some("author".to_string()),
481 name,
482 email,
483 url: None,
484 organization: None,
485 organization_url: None,
486 timezone: None,
487 }
488 })
489 })
490 .collect()
491 })
492}
493
494fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
495 if let Some(email_start) = author_str.find('<')
496 && let Some(email_end) = author_str.find('>')
497 && email_start < email_end
498 {
499 let name = author_str[..email_start].trim();
500 let email = author_str[email_start + 1..email_end].trim();
501 return (
502 if name.is_empty() {
503 None
504 } else {
505 Some(truncate_field(name.to_string()))
506 },
507 if email.is_empty() {
508 None
509 } else {
510 Some(truncate_field(email.to_string()))
511 },
512 );
513 }
514 let trimmed = author_str.trim();
515 (
516 if trimmed.is_empty() {
517 None
518 } else {
519 Some(truncate_field(trimmed.to_string()))
520 },
521 None,
522 )
523}
524
525fn extract_resources_from_json(
526 json: &serde_json::Map<String, JsonValue>,
527) -> (
528 Option<String>,
529 Option<String>,
530 Option<String>,
531 Option<String>,
532) {
533 let resources = match json.get(FIELD_RESOURCES).and_then(|v| v.as_object()) {
534 Some(r) => r,
535 None => return (None, None, None, None),
536 };
537
538 let homepage_url = resources
539 .get("homepage")
540 .and_then(|v| v.as_str())
541 .map(|s| truncate_field(s.to_string()));
542
543 let vcs_url = resources.get("repository").and_then(|v| match v {
544 JsonValue::String(s) => Some(truncate_field(s.clone())),
545 JsonValue::Object(obj) => obj
546 .get("url")
547 .and_then(|u| u.as_str())
548 .map(|s| truncate_field(s.to_string())),
549 _ => None,
550 });
551
552 let code_view_url = resources
553 .get("repository")
554 .and_then(|v| v.as_object())
555 .and_then(|obj| {
556 obj.get("web")
557 .and_then(|u| u.as_str())
558 .map(|s| truncate_field(s.to_string()))
559 });
560
561 let bug_tracking_url = resources.get("bugtracker").and_then(|v| match v {
562 JsonValue::String(s) => Some(truncate_field(s.clone())),
563 JsonValue::Object(obj) => obj
564 .get("web")
565 .and_then(|u| u.as_str())
566 .map(|s| truncate_field(s.to_string())),
567 _ => None,
568 });
569
570 (homepage_url, vcs_url, code_view_url, bug_tracking_url)
571}
572
573fn extract_resources_from_yaml(
574 yaml: &yaml_serde::Mapping,
575) -> (Option<String>, Option<String>, Option<String>) {
576 let resources = match yaml
577 .get(YamlValue::String(FIELD_RESOURCES.to_string()))
578 .and_then(|v| v.as_mapping())
579 {
580 Some(r) => r,
581 None => return (None, None, None),
582 };
583
584 let homepage_url = resources
585 .get(YamlValue::String("homepage".to_string()))
586 .and_then(|v| v.as_str())
587 .map(|s| truncate_field(s.to_string()));
588
589 let vcs_url = resources
590 .get(YamlValue::String("repository".to_string()))
591 .and_then(|v| v.as_str())
592 .map(|s| truncate_field(s.to_string()));
593
594 let bug_tracking_url = resources
595 .get(YamlValue::String("bugtracker".to_string()))
596 .and_then(|v| v.as_str())
597 .map(|s| truncate_field(s.to_string()));
598
599 (homepage_url, vcs_url, bug_tracking_url)
600}
601
602fn extract_dependencies_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Dependency> {
603 let mut dependencies = Vec::new();
604
605 let prereqs = match json.get(FIELD_PREREQS).and_then(|v| v.as_object()) {
606 Some(p) => p,
607 None => return dependencies,
608 };
609
610 if let Some(runtime) = prereqs.get("runtime").and_then(|v| v.as_object())
612 && let Some(requires) = runtime.get("requires").and_then(|v| v.as_object())
613 {
614 dependencies.extend(extract_dependency_group(requires, "runtime", true, false));
615 }
616
617 if let Some(build) = prereqs.get("build").and_then(|v| v.as_object())
619 && let Some(requires) = build.get("requires").and_then(|v| v.as_object())
620 {
621 dependencies.extend(extract_dependency_group(requires, "build", false, false));
622 }
623
624 if let Some(test) = prereqs.get("test").and_then(|v| v.as_object())
626 && let Some(requires) = test.get("requires").and_then(|v| v.as_object())
627 {
628 dependencies.extend(extract_dependency_group(requires, "test", false, false));
629 }
630
631 if let Some(configure) = prereqs.get("configure").and_then(|v| v.as_object())
633 && let Some(requires) = configure.get("requires").and_then(|v| v.as_object())
634 {
635 dependencies.extend(extract_dependency_group(
636 requires,
637 "configure",
638 false,
639 false,
640 ));
641 }
642
643 dependencies
644}
645
646fn extract_dependencies_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Dependency> {
647 let mut dependencies = Vec::new();
648
649 if let Some(requires) = yaml
651 .get(YamlValue::String(FIELD_REQUIRES.to_string()))
652 .and_then(|v| v.as_mapping())
653 {
654 dependencies.extend(extract_yaml_dependency_group(
655 requires, "runtime", true, false,
656 ));
657 }
658
659 if let Some(build_requires) = yaml
660 .get(YamlValue::String(FIELD_BUILD_REQUIRES.to_string()))
661 .and_then(|v| v.as_mapping())
662 {
663 dependencies.extend(extract_yaml_dependency_group(
664 build_requires,
665 "build",
666 false,
667 false,
668 ));
669 }
670
671 if let Some(test_requires) = yaml
672 .get(YamlValue::String(FIELD_TEST_REQUIRES.to_string()))
673 .and_then(|v| v.as_mapping())
674 {
675 dependencies.extend(extract_yaml_dependency_group(
676 test_requires,
677 "test",
678 false,
679 false,
680 ));
681 }
682
683 if let Some(configure_requires) = yaml
684 .get(YamlValue::String(FIELD_CONFIGURE_REQUIRES.to_string()))
685 .and_then(|v| v.as_mapping())
686 {
687 dependencies.extend(extract_yaml_dependency_group(
688 configure_requires,
689 "configure",
690 false,
691 false,
692 ));
693 }
694
695 dependencies
696}
697
698fn extract_dependency_group(
699 deps: &serde_json::Map<String, JsonValue>,
700 scope: &str,
701 is_runtime: bool,
702 is_optional: bool,
703) -> Vec<Dependency> {
704 deps.iter()
705 .take(MAX_ITERATION_COUNT)
706 .filter_map(|(name, version)| {
707 if name == "perl" {
708 return None;
709 }
710
711 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
712 let purl = purl.map(truncate_field);
713
714 let extracted_requirement = match version {
715 JsonValue::String(s) => Some(truncate_field(s.clone())),
716 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
717 _ => None,
718 };
719
720 Some(Dependency {
721 purl,
722 extracted_requirement,
723 scope: Some(truncate_field(scope.to_string())),
724 is_runtime: Some(is_runtime),
725 is_optional: Some(is_optional),
726 is_pinned: None,
727 is_direct: Some(true),
728 resolved_package: None,
729 extra_data: None,
730 })
731 })
732 .collect()
733}
734
735fn extract_yaml_dependency_group(
736 deps: &yaml_serde::Mapping,
737 scope: &str,
738 is_runtime: bool,
739 is_optional: bool,
740) -> Vec<Dependency> {
741 deps.iter()
742 .take(MAX_ITERATION_COUNT)
743 .filter_map(|(key, value)| {
744 let name = key.as_str()?;
745
746 if name == "perl" {
747 return None;
748 }
749
750 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
751 let purl = purl.map(truncate_field);
752
753 let extracted_requirement = match value {
754 YamlValue::String(s) => Some(truncate_field(s.clone())),
755 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
756 _ => None,
757 };
758
759 Some(Dependency {
760 purl,
761 extracted_requirement,
762 scope: Some(truncate_field(scope.to_string())),
763 is_runtime: Some(is_runtime),
764 is_optional: Some(is_optional),
765 is_pinned: None,
766 is_direct: Some(true),
767 resolved_package: None,
768 extra_data: None,
769 })
770 })
771 .collect()
772}