1use std::path::Path;
25
26use crate::parser_warn as warn;
27use packageurl::PackageUrl;
28use serde_json::Value as JsonValue;
29use yaml_serde::Value as YamlValue;
30
31use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
32use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
33
34use super::PackageParser;
35use super::license_normalization::{
36 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
37 combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
38 normalize_spdx_expression,
39};
40
41const FIELD_NAME: &str = "name";
42const FIELD_VERSION: &str = "version";
43const FIELD_ABSTRACT: &str = "abstract";
44const FIELD_DESCRIPTION: &str = "description";
45const FIELD_LICENSE: &str = "license";
46const FIELD_AUTHOR: &str = "author";
47const FIELD_RESOURCES: &str = "resources";
48const FIELD_PREREQS: &str = "prereqs";
49const FIELD_REQUIRES: &str = "requires";
50const FIELD_BUILD_REQUIRES: &str = "build_requires";
51const FIELD_TEST_REQUIRES: &str = "test_requires";
52const FIELD_CONFIGURE_REQUIRES: &str = "configure_requires";
53
54pub struct CpanMetaJsonParser;
59
60impl PackageParser for CpanMetaJsonParser {
61 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
62
63 fn is_match(path: &Path) -> bool {
64 path.file_name().is_some_and(|name| name == "META.json")
65 }
66
67 fn extract_packages(path: &Path) -> Vec<PackageData> {
68 let json = match read_and_parse_json(path) {
69 Ok(json) => json,
70 Err(e) => {
71 warn!("Failed to parse META.json at {:?}: {}", path, e);
72 return vec![default_package_data(DatasourceId::CpanMetaJson)];
73 }
74 };
75
76 let name = json
77 .get(FIELD_NAME)
78 .and_then(|v| v.as_str())
79 .map(|s| truncate_field(s.to_string()));
80
81 let version = extract_version_from_json(&json);
82
83 let description = json
84 .get(FIELD_ABSTRACT)
85 .and_then(|v| v.as_str())
86 .map(|s| truncate_field(s.to_string()));
87
88 let extracted_license_statement = extract_license_from_json(&json);
89 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
90 normalize_cpan_declared_license(
91 json.get(FIELD_LICENSE),
92 extracted_license_statement.as_deref(),
93 );
94 let declared_license_expression = declared_license_expression.map(truncate_field);
95 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
96 let parties = extract_parties_from_json(&json);
97 let dependencies = extract_dependencies_from_json(&json);
98 let (homepage_url, vcs_url, code_view_url, bug_tracking_url) =
99 extract_resources_from_json(&json);
100
101 vec![PackageData {
102 package_type: Some(Self::PACKAGE_TYPE),
103 name,
104 version,
105 description,
106 declared_license_expression,
107 declared_license_expression_spdx,
108 license_detections,
109 extracted_license_statement,
110 parties,
111 dependencies,
112 homepage_url,
113 vcs_url,
114 code_view_url,
115 bug_tracking_url,
116 primary_language: Some("Perl".to_string()),
117 datasource_id: Some(DatasourceId::CpanMetaJson),
118 ..Default::default()
119 }]
120 }
121}
122
123pub struct CpanMetaYmlParser;
127
128impl PackageParser for CpanMetaYmlParser {
129 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
130
131 fn is_match(path: &Path) -> bool {
132 path.file_name().is_some_and(|name| name == "META.yml")
133 }
134
135 fn extract_packages(path: &Path) -> Vec<PackageData> {
136 let yaml = match read_and_parse_yaml(path) {
137 Ok(yaml) => yaml,
138 Err(e) => {
139 warn!("Failed to parse META.yml at {:?}: {}", path, e);
140 return vec![default_package_data(DatasourceId::CpanMetaYml)];
141 }
142 };
143
144 let name = yaml
145 .get(FIELD_NAME)
146 .and_then(|v| v.as_str())
147 .map(|s| truncate_field(s.to_string()));
148
149 let version = extract_version_from_yaml(&yaml);
150
151 let description = yaml
152 .get(FIELD_ABSTRACT)
153 .or_else(|| yaml.get(FIELD_DESCRIPTION))
154 .and_then(|v| v.as_str())
155 .map(|s| truncate_field(s.to_string()));
156
157 let extracted_license_statement = extract_license_from_yaml(&yaml);
158 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
159 normalize_cpan_declared_license(
160 yaml.get(YamlValue::String(FIELD_LICENSE.to_string())),
161 extracted_license_statement.as_deref(),
162 );
163 let declared_license_expression = declared_license_expression.map(truncate_field);
164 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
165 let parties = extract_parties_from_yaml(&yaml);
166 let dependencies = extract_dependencies_from_yaml(&yaml);
167 let (homepage_url, vcs_url, bug_tracking_url) = extract_resources_from_yaml(&yaml);
168
169 vec![PackageData {
170 package_type: Some(Self::PACKAGE_TYPE),
171 name,
172 version,
173 description,
174 declared_license_expression,
175 declared_license_expression_spdx,
176 license_detections,
177 extracted_license_statement,
178 parties,
179 dependencies,
180 homepage_url,
181 vcs_url,
182 bug_tracking_url,
183 primary_language: Some("Perl".to_string()),
184 datasource_id: Some(DatasourceId::CpanMetaYml),
185 ..Default::default()
186 }]
187 }
188}
189
190pub struct CpanManifestParser;
194
195impl PackageParser for CpanManifestParser {
196 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
197
198 fn is_match(path: &Path) -> bool {
199 path.file_name().is_some_and(|name| name == "MANIFEST")
200 }
201
202 fn extract_packages(path: &Path) -> Vec<PackageData> {
203 let content = match read_file_to_string(path, None) {
204 Ok(content) => content,
205 Err(e) => {
206 warn!("Failed to read MANIFEST at {:?}: {}", path, e);
207 return vec![default_package_data(DatasourceId::CpanManifest)];
208 }
209 };
210
211 let file_references = content
212 .lines()
213 .take(MAX_ITERATION_COUNT)
214 .filter(|line| !line.trim().is_empty())
215 .filter(|line| !line.trim().starts_with('#'))
216 .map(|line| {
217 let path = line.split_whitespace().next().unwrap_or(line);
218 FileReference {
219 path: truncate_field(path.to_string()),
220 size: None,
221 sha1: None,
222 md5: None,
223 sha256: None,
224 sha512: None,
225 extra_data: None,
226 }
227 })
228 .collect();
229
230 vec![PackageData {
231 package_type: Some(Self::PACKAGE_TYPE),
232 file_references,
233 primary_language: Some("Perl".to_string()),
234 datasource_id: Some(DatasourceId::CpanManifest),
235 ..Default::default()
236 }]
237 }
238}
239
240fn default_package_data(datasource_id: DatasourceId) -> PackageData {
241 PackageData {
242 package_type: Some(CpanMetaJsonParser::PACKAGE_TYPE),
243 primary_language: Some("Perl".to_string()),
244 datasource_id: Some(datasource_id),
245 ..Default::default()
246 }
247}
248
249fn read_and_parse_json(path: &Path) -> Result<serde_json::Map<String, JsonValue>, String> {
250 let content =
251 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
252 let json: JsonValue =
253 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
254 json.as_object()
255 .cloned()
256 .ok_or_else(|| "Root JSON is not an object".to_string())
257}
258
259fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
260 let content =
261 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
262 let yaml: YamlValue =
263 yaml_serde::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
264 yaml.as_mapping()
265 .cloned()
266 .ok_or_else(|| "Root YAML is not a mapping".to_string())
267}
268
269fn extract_version_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
270 json.get(FIELD_VERSION).and_then(|v| match v {
271 JsonValue::String(s) => Some(truncate_field(s.clone())),
272 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
273 _ => None,
274 })
275}
276
277fn extract_version_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
278 yaml.get(YamlValue::String(FIELD_VERSION.to_string()))
279 .and_then(|v| match v {
280 YamlValue::String(s) => Some(truncate_field(s.clone())),
281 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
282 _ => None,
283 })
284}
285
286fn extract_license_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
287 json.get(FIELD_LICENSE).and_then(|v| match v {
288 JsonValue::String(s) => Some(truncate_field(s.clone())),
289 JsonValue::Array(arr) => {
290 let licenses: Vec<String> = arr
291 .iter()
292 .take(MAX_ITERATION_COUNT)
293 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
294 .collect();
295 if licenses.is_empty() {
296 None
297 } else {
298 Some(truncate_field(licenses.join(" AND ")))
299 }
300 }
301 _ => None,
302 })
303}
304
305fn extract_license_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
306 yaml.get(YamlValue::String(FIELD_LICENSE.to_string()))
307 .and_then(|v| match v {
308 YamlValue::String(s) => Some(truncate_field(s.clone())),
309 YamlValue::Sequence(arr) => {
310 let licenses: Vec<String> = arr
311 .iter()
312 .take(MAX_ITERATION_COUNT)
313 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
314 .collect();
315 if licenses.is_empty() {
316 None
317 } else {
318 Some(truncate_field(licenses.join(" AND ")))
319 }
320 }
321 _ => None,
322 })
323}
324
325fn normalize_cpan_declared_license(
326 raw_license: Option<&impl LicenseValueAdapter>,
327 extracted_license_statement: Option<&str>,
328) -> (
329 Option<String>,
330 Option<String>,
331 Vec<crate::models::LicenseDetection>,
332) {
333 let Some(raw_license) = raw_license else {
334 return empty_declared_license_data();
335 };
336 let normalized = raw_license
337 .license_values()
338 .into_iter()
339 .map(|value| normalize_cpan_license_value(&value))
340 .collect::<Option<Vec<_>>>();
341
342 if let Some(normalized) = normalized
343 && let Some(combined) = combine_normalized_licenses(normalized, " AND ")
344 {
345 return build_declared_license_data(
346 combined,
347 DeclaredLicenseMatchMetadata::single_line(
348 extracted_license_statement.unwrap_or_default(),
349 ),
350 );
351 }
352
353 empty_declared_license_data()
354}
355
356trait LicenseValueAdapter {
357 fn license_values(&self) -> Vec<String>;
358}
359
360impl LicenseValueAdapter for JsonValue {
361 fn license_values(&self) -> Vec<String> {
362 match self {
363 JsonValue::String(value) => vec![truncate_field(value.trim().to_string())],
364 JsonValue::Array(values) => values
365 .iter()
366 .take(MAX_ITERATION_COUNT)
367 .filter_map(|value| value.as_str())
368 .map(str::trim)
369 .filter(|value| !value.is_empty())
370 .map(|s| truncate_field(s.to_string()))
371 .collect(),
372 _ => Vec::new(),
373 }
374 }
375}
376
377impl LicenseValueAdapter for YamlValue {
378 fn license_values(&self) -> Vec<String> {
379 match self {
380 YamlValue::String(value) => vec![truncate_field(value.trim().to_string())],
381 YamlValue::Sequence(values) => values
382 .iter()
383 .take(MAX_ITERATION_COUNT)
384 .filter_map(|value| value.as_str())
385 .map(str::trim)
386 .filter(|value| !value.is_empty())
387 .map(|s| truncate_field(s.to_string()))
388 .collect(),
389 _ => Vec::new(),
390 }
391 }
392}
393
394fn normalize_cpan_license_value(value: &str) -> Option<NormalizedDeclaredLicense> {
395 match value.trim() {
396 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
397 "gpl-1.0-plus OR artistic-perl-1.0",
398 "GPL-1.0-or-later OR Artistic-1.0-Perl",
399 )),
400 "artistic_2" => Some(NormalizedDeclaredLicense::new(
401 "artistic-2.0",
402 "Artistic-2.0",
403 )),
404 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
405 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
406 }
407}
408
409fn extract_parties_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Party> {
410 json.get(FIELD_AUTHOR)
411 .and_then(|v| v.as_array())
412 .map_or_else(Vec::new, |authors| {
413 authors
414 .iter()
415 .take(MAX_ITERATION_COUNT)
416 .filter_map(|author| {
417 author.as_str().map(|s| {
418 let (name, email) = parse_author_string(s);
419 Party {
420 r#type: Some("person".to_string()),
421 role: Some("author".to_string()),
422 name,
423 email,
424 url: None,
425 organization: None,
426 organization_url: None,
427 timezone: None,
428 }
429 })
430 })
431 .collect()
432 })
433}
434
435fn extract_parties_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Party> {
436 yaml.get(YamlValue::String(FIELD_AUTHOR.to_string()))
437 .and_then(|v| v.as_sequence())
438 .map_or_else(Vec::new, |authors| {
439 authors
440 .iter()
441 .take(MAX_ITERATION_COUNT)
442 .filter_map(|author| {
443 author.as_str().map(|s| {
444 let (name, email) = parse_author_string(s);
445 Party {
446 r#type: Some("person".to_string()),
447 role: Some("author".to_string()),
448 name,
449 email,
450 url: None,
451 organization: None,
452 organization_url: None,
453 timezone: None,
454 }
455 })
456 })
457 .collect()
458 })
459}
460
461fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
462 if let Some(email_start) = author_str.find('<')
463 && let Some(email_end) = author_str.find('>')
464 && email_start < email_end
465 {
466 let name = author_str[..email_start].trim();
467 let email = author_str[email_start + 1..email_end].trim();
468 return (
469 if name.is_empty() {
470 None
471 } else {
472 Some(truncate_field(name.to_string()))
473 },
474 if email.is_empty() {
475 None
476 } else {
477 Some(truncate_field(email.to_string()))
478 },
479 );
480 }
481 let trimmed = author_str.trim();
482 (
483 if trimmed.is_empty() {
484 None
485 } else {
486 Some(truncate_field(trimmed.to_string()))
487 },
488 None,
489 )
490}
491
492fn extract_resources_from_json(
493 json: &serde_json::Map<String, JsonValue>,
494) -> (
495 Option<String>,
496 Option<String>,
497 Option<String>,
498 Option<String>,
499) {
500 let resources = match json.get(FIELD_RESOURCES).and_then(|v| v.as_object()) {
501 Some(r) => r,
502 None => return (None, None, None, None),
503 };
504
505 let homepage_url = resources
506 .get("homepage")
507 .and_then(|v| v.as_str())
508 .map(|s| truncate_field(s.to_string()));
509
510 let vcs_url = resources.get("repository").and_then(|v| match v {
511 JsonValue::String(s) => Some(truncate_field(s.clone())),
512 JsonValue::Object(obj) => obj
513 .get("url")
514 .and_then(|u| u.as_str())
515 .map(|s| truncate_field(s.to_string())),
516 _ => None,
517 });
518
519 let code_view_url = resources
520 .get("repository")
521 .and_then(|v| v.as_object())
522 .and_then(|obj| {
523 obj.get("web")
524 .and_then(|u| u.as_str())
525 .map(|s| truncate_field(s.to_string()))
526 });
527
528 let bug_tracking_url = resources.get("bugtracker").and_then(|v| match v {
529 JsonValue::String(s) => Some(truncate_field(s.clone())),
530 JsonValue::Object(obj) => obj
531 .get("web")
532 .and_then(|u| u.as_str())
533 .map(|s| truncate_field(s.to_string())),
534 _ => None,
535 });
536
537 (homepage_url, vcs_url, code_view_url, bug_tracking_url)
538}
539
540fn extract_resources_from_yaml(
541 yaml: &yaml_serde::Mapping,
542) -> (Option<String>, Option<String>, Option<String>) {
543 let resources = match yaml
544 .get(YamlValue::String(FIELD_RESOURCES.to_string()))
545 .and_then(|v| v.as_mapping())
546 {
547 Some(r) => r,
548 None => return (None, None, None),
549 };
550
551 let homepage_url = resources
552 .get(YamlValue::String("homepage".to_string()))
553 .and_then(|v| v.as_str())
554 .map(|s| truncate_field(s.to_string()));
555
556 let vcs_url = resources
557 .get(YamlValue::String("repository".to_string()))
558 .and_then(|v| v.as_str())
559 .map(|s| truncate_field(s.to_string()));
560
561 let bug_tracking_url = resources
562 .get(YamlValue::String("bugtracker".to_string()))
563 .and_then(|v| v.as_str())
564 .map(|s| truncate_field(s.to_string()));
565
566 (homepage_url, vcs_url, bug_tracking_url)
567}
568
569fn extract_dependencies_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Dependency> {
570 let mut dependencies = Vec::new();
571
572 let prereqs = match json.get(FIELD_PREREQS).and_then(|v| v.as_object()) {
573 Some(p) => p,
574 None => return dependencies,
575 };
576
577 if let Some(runtime) = prereqs.get("runtime").and_then(|v| v.as_object())
579 && let Some(requires) = runtime.get("requires").and_then(|v| v.as_object())
580 {
581 dependencies.extend(extract_dependency_group(requires, "runtime", true, false));
582 }
583
584 if let Some(build) = prereqs.get("build").and_then(|v| v.as_object())
586 && let Some(requires) = build.get("requires").and_then(|v| v.as_object())
587 {
588 dependencies.extend(extract_dependency_group(requires, "build", false, false));
589 }
590
591 if let Some(test) = prereqs.get("test").and_then(|v| v.as_object())
593 && let Some(requires) = test.get("requires").and_then(|v| v.as_object())
594 {
595 dependencies.extend(extract_dependency_group(requires, "test", false, false));
596 }
597
598 if let Some(configure) = prereqs.get("configure").and_then(|v| v.as_object())
600 && let Some(requires) = configure.get("requires").and_then(|v| v.as_object())
601 {
602 dependencies.extend(extract_dependency_group(
603 requires,
604 "configure",
605 false,
606 false,
607 ));
608 }
609
610 dependencies
611}
612
613fn extract_dependencies_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Dependency> {
614 let mut dependencies = Vec::new();
615
616 if let Some(requires) = yaml
618 .get(YamlValue::String(FIELD_REQUIRES.to_string()))
619 .and_then(|v| v.as_mapping())
620 {
621 dependencies.extend(extract_yaml_dependency_group(
622 requires, "runtime", true, false,
623 ));
624 }
625
626 if let Some(build_requires) = yaml
627 .get(YamlValue::String(FIELD_BUILD_REQUIRES.to_string()))
628 .and_then(|v| v.as_mapping())
629 {
630 dependencies.extend(extract_yaml_dependency_group(
631 build_requires,
632 "build",
633 false,
634 false,
635 ));
636 }
637
638 if let Some(test_requires) = yaml
639 .get(YamlValue::String(FIELD_TEST_REQUIRES.to_string()))
640 .and_then(|v| v.as_mapping())
641 {
642 dependencies.extend(extract_yaml_dependency_group(
643 test_requires,
644 "test",
645 false,
646 false,
647 ));
648 }
649
650 if let Some(configure_requires) = yaml
651 .get(YamlValue::String(FIELD_CONFIGURE_REQUIRES.to_string()))
652 .and_then(|v| v.as_mapping())
653 {
654 dependencies.extend(extract_yaml_dependency_group(
655 configure_requires,
656 "configure",
657 false,
658 false,
659 ));
660 }
661
662 dependencies
663}
664
665fn extract_dependency_group(
666 deps: &serde_json::Map<String, JsonValue>,
667 scope: &str,
668 is_runtime: bool,
669 is_optional: bool,
670) -> Vec<Dependency> {
671 deps.iter()
672 .take(MAX_ITERATION_COUNT)
673 .filter_map(|(name, version)| {
674 if name == "perl" {
675 return None;
676 }
677
678 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
679 let purl = purl.map(truncate_field);
680
681 let extracted_requirement = match version {
682 JsonValue::String(s) => Some(truncate_field(s.clone())),
683 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
684 _ => None,
685 };
686
687 Some(Dependency {
688 purl,
689 extracted_requirement,
690 scope: Some(truncate_field(scope.to_string())),
691 is_runtime: Some(is_runtime),
692 is_optional: Some(is_optional),
693 is_pinned: None,
694 is_direct: Some(true),
695 resolved_package: None,
696 extra_data: None,
697 })
698 })
699 .collect()
700}
701
702fn extract_yaml_dependency_group(
703 deps: &yaml_serde::Mapping,
704 scope: &str,
705 is_runtime: bool,
706 is_optional: bool,
707) -> Vec<Dependency> {
708 deps.iter()
709 .take(MAX_ITERATION_COUNT)
710 .filter_map(|(key, value)| {
711 let name = key.as_str()?;
712
713 if name == "perl" {
714 return None;
715 }
716
717 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
718 let purl = purl.map(truncate_field);
719
720 let extracted_requirement = match value {
721 YamlValue::String(s) => Some(truncate_field(s.clone())),
722 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
723 _ => None,
724 };
725
726 Some(Dependency {
727 purl,
728 extracted_requirement,
729 scope: Some(truncate_field(scope.to_string())),
730 is_runtime: Some(is_runtime),
731 is_optional: Some(is_optional),
732 is_pinned: None,
733 is_direct: Some(true),
734 resolved_package: None,
735 extra_data: None,
736 })
737 })
738 .collect()
739}
740
741crate::register_parser!(
742 "CPAN Perl META.json",
743 &["**/META.json"],
744 "cpan",
745 "Perl",
746 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
747);
748
749crate::register_parser!(
750 "CPAN Perl META.yml",
751 &["**/META.yml"],
752 "cpan",
753 "Perl",
754 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
755);
756
757crate::register_parser!(
758 "CPAN Perl MANIFEST",
759 &["**/MANIFEST"],
760 "cpan",
761 "Perl",
762 Some("https://metacpan.org/pod/Module::Manifest"),
763);