1use std::path::Path;
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32use yaml_serde::Value as YamlValue;
33
34use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
35use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
36
37use super::PackageParser;
38use super::license_normalization::{
39 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
40 combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
41 normalize_spdx_expression,
42};
43
44const FIELD_NAME: &str = "name";
45const FIELD_VERSION: &str = "version";
46const FIELD_ABSTRACT: &str = "abstract";
47const FIELD_DESCRIPTION: &str = "description";
48const FIELD_LICENSE: &str = "license";
49const FIELD_AUTHOR: &str = "author";
50const FIELD_RESOURCES: &str = "resources";
51const FIELD_PREREQS: &str = "prereqs";
52const FIELD_REQUIRES: &str = "requires";
53const FIELD_BUILD_REQUIRES: &str = "build_requires";
54const FIELD_TEST_REQUIRES: &str = "test_requires";
55const FIELD_CONFIGURE_REQUIRES: &str = "configure_requires";
56
57pub struct CpanMetaJsonParser;
62
63impl PackageParser for CpanMetaJsonParser {
64 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
65
66 fn is_match(path: &Path) -> bool {
67 path.file_name().is_some_and(|name| name == "META.json")
68 }
69
70 fn extract_packages(path: &Path) -> Vec<PackageData> {
71 let json = match read_and_parse_json(path) {
72 Ok(json) => json,
73 Err(e) => {
74 warn!("Failed to parse META.json at {:?}: {}", path, e);
75 return vec![default_package_data(DatasourceId::CpanMetaJson)];
76 }
77 };
78
79 let name = json
80 .get(FIELD_NAME)
81 .and_then(|v| v.as_str())
82 .map(|s| truncate_field(s.to_string()));
83
84 let version = extract_version_from_json(&json);
85
86 let description = json
87 .get(FIELD_ABSTRACT)
88 .and_then(|v| v.as_str())
89 .map(|s| truncate_field(s.to_string()));
90
91 let extracted_license_statement = extract_license_from_json(&json);
92 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
93 normalize_cpan_declared_license(
94 json.get(FIELD_LICENSE),
95 extracted_license_statement.as_deref(),
96 );
97 let declared_license_expression = declared_license_expression.map(truncate_field);
98 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
99 let parties = extract_parties_from_json(&json);
100 let dependencies = extract_dependencies_from_json(&json);
101 let (homepage_url, vcs_url, code_view_url, bug_tracking_url) =
102 extract_resources_from_json(&json);
103
104 vec![PackageData {
105 package_type: Some(Self::PACKAGE_TYPE),
106 name,
107 version,
108 description,
109 declared_license_expression,
110 declared_license_expression_spdx,
111 license_detections,
112 extracted_license_statement,
113 parties,
114 dependencies,
115 homepage_url,
116 vcs_url,
117 code_view_url,
118 bug_tracking_url,
119 primary_language: Some("Perl".to_string()),
120 datasource_id: Some(DatasourceId::CpanMetaJson),
121 ..Default::default()
122 }]
123 }
124}
125
126pub struct CpanMetaYmlParser;
130
131impl PackageParser for CpanMetaYmlParser {
132 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
133
134 fn is_match(path: &Path) -> bool {
135 path.file_name().is_some_and(|name| name == "META.yml")
136 }
137
138 fn extract_packages(path: &Path) -> Vec<PackageData> {
139 let yaml = match read_and_parse_yaml(path) {
140 Ok(yaml) => yaml,
141 Err(e) => {
142 warn!("Failed to parse META.yml at {:?}: {}", path, e);
143 return vec![default_package_data(DatasourceId::CpanMetaYml)];
144 }
145 };
146
147 let name = yaml
148 .get(FIELD_NAME)
149 .and_then(|v| v.as_str())
150 .map(|s| truncate_field(s.to_string()));
151
152 let version = extract_version_from_yaml(&yaml);
153
154 let description = yaml
155 .get(FIELD_ABSTRACT)
156 .or_else(|| yaml.get(FIELD_DESCRIPTION))
157 .and_then(|v| v.as_str())
158 .map(|s| truncate_field(s.to_string()));
159
160 let extracted_license_statement = extract_license_from_yaml(&yaml);
161 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
162 normalize_cpan_declared_license(
163 yaml.get(YamlValue::String(FIELD_LICENSE.to_string())),
164 extracted_license_statement.as_deref(),
165 );
166 let declared_license_expression = declared_license_expression.map(truncate_field);
167 let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
168 let parties = extract_parties_from_yaml(&yaml);
169 let dependencies = extract_dependencies_from_yaml(&yaml);
170 let (homepage_url, vcs_url, bug_tracking_url) = extract_resources_from_yaml(&yaml);
171
172 vec![PackageData {
173 package_type: Some(Self::PACKAGE_TYPE),
174 name,
175 version,
176 description,
177 declared_license_expression,
178 declared_license_expression_spdx,
179 license_detections,
180 extracted_license_statement,
181 parties,
182 dependencies,
183 homepage_url,
184 vcs_url,
185 bug_tracking_url,
186 primary_language: Some("Perl".to_string()),
187 datasource_id: Some(DatasourceId::CpanMetaYml),
188 ..Default::default()
189 }]
190 }
191}
192
193pub struct CpanManifestParser;
197
198impl PackageParser for CpanManifestParser {
199 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
200
201 fn is_match(path: &Path) -> bool {
202 path.file_name().is_some_and(|name| name == "MANIFEST")
203 }
204
205 fn extract_packages(path: &Path) -> Vec<PackageData> {
206 let content = match read_file_to_string(path, None) {
207 Ok(content) => content,
208 Err(e) => {
209 warn!("Failed to read MANIFEST at {:?}: {}", path, e);
210 return vec![default_package_data(DatasourceId::CpanManifest)];
211 }
212 };
213
214 let file_references = content
215 .lines()
216 .take(MAX_ITERATION_COUNT)
217 .filter(|line| !line.trim().is_empty())
218 .filter(|line| !line.trim().starts_with('#'))
219 .map(|line| {
220 let path = line.split_whitespace().next().unwrap_or(line);
221 FileReference {
222 path: truncate_field(path.to_string()),
223 size: None,
224 sha1: None,
225 md5: None,
226 sha256: None,
227 sha512: None,
228 extra_data: None,
229 }
230 })
231 .collect();
232
233 vec![PackageData {
234 package_type: Some(Self::PACKAGE_TYPE),
235 file_references,
236 primary_language: Some("Perl".to_string()),
237 datasource_id: Some(DatasourceId::CpanManifest),
238 ..Default::default()
239 }]
240 }
241}
242
243fn default_package_data(datasource_id: DatasourceId) -> PackageData {
244 PackageData {
245 package_type: Some(CpanMetaJsonParser::PACKAGE_TYPE),
246 primary_language: Some("Perl".to_string()),
247 datasource_id: Some(datasource_id),
248 ..Default::default()
249 }
250}
251
252fn read_and_parse_json(path: &Path) -> Result<serde_json::Map<String, JsonValue>, String> {
253 let content =
254 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
255 let json: JsonValue =
256 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
257 json.as_object()
258 .cloned()
259 .ok_or_else(|| "Root JSON is not an object".to_string())
260}
261
262fn read_and_parse_yaml(path: &Path) -> Result<yaml_serde::Mapping, String> {
263 let content =
264 read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
265 let yaml: YamlValue =
266 yaml_serde::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
267 yaml.as_mapping()
268 .cloned()
269 .ok_or_else(|| "Root YAML is not a mapping".to_string())
270}
271
272fn extract_version_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
273 json.get(FIELD_VERSION).and_then(|v| match v {
274 JsonValue::String(s) => Some(truncate_field(s.clone())),
275 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
276 _ => None,
277 })
278}
279
280fn extract_version_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
281 yaml.get(YamlValue::String(FIELD_VERSION.to_string()))
282 .and_then(|v| match v {
283 YamlValue::String(s) => Some(truncate_field(s.clone())),
284 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
285 _ => None,
286 })
287}
288
289fn extract_license_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
290 json.get(FIELD_LICENSE).and_then(|v| match v {
291 JsonValue::String(s) => Some(truncate_field(s.clone())),
292 JsonValue::Array(arr) => {
293 let licenses: Vec<String> = arr
294 .iter()
295 .take(MAX_ITERATION_COUNT)
296 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
297 .collect();
298 if licenses.is_empty() {
299 None
300 } else {
301 Some(truncate_field(licenses.join(" AND ")))
302 }
303 }
304 _ => None,
305 })
306}
307
308fn extract_license_from_yaml(yaml: &yaml_serde::Mapping) -> Option<String> {
309 yaml.get(YamlValue::String(FIELD_LICENSE.to_string()))
310 .and_then(|v| match v {
311 YamlValue::String(s) => Some(truncate_field(s.clone())),
312 YamlValue::Sequence(arr) => {
313 let licenses: Vec<String> = arr
314 .iter()
315 .take(MAX_ITERATION_COUNT)
316 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
317 .collect();
318 if licenses.is_empty() {
319 None
320 } else {
321 Some(truncate_field(licenses.join(" AND ")))
322 }
323 }
324 _ => None,
325 })
326}
327
328fn normalize_cpan_declared_license(
329 raw_license: Option<&impl LicenseValueAdapter>,
330 extracted_license_statement: Option<&str>,
331) -> (
332 Option<String>,
333 Option<String>,
334 Vec<crate::models::LicenseDetection>,
335) {
336 let Some(raw_license) = raw_license else {
337 return empty_declared_license_data();
338 };
339 let normalized = raw_license
340 .license_values()
341 .into_iter()
342 .map(|value| normalize_cpan_license_value(&value))
343 .collect::<Option<Vec<_>>>();
344
345 if let Some(normalized) = normalized
346 && let Some(combined) = combine_normalized_licenses(normalized, " AND ")
347 {
348 return build_declared_license_data(
349 combined,
350 DeclaredLicenseMatchMetadata::single_line(
351 extracted_license_statement.unwrap_or_default(),
352 ),
353 );
354 }
355
356 empty_declared_license_data()
357}
358
359trait LicenseValueAdapter {
360 fn license_values(&self) -> Vec<String>;
361}
362
363impl LicenseValueAdapter for JsonValue {
364 fn license_values(&self) -> Vec<String> {
365 match self {
366 JsonValue::String(value) => vec![truncate_field(value.trim().to_string())],
367 JsonValue::Array(values) => values
368 .iter()
369 .take(MAX_ITERATION_COUNT)
370 .filter_map(|value| value.as_str())
371 .map(str::trim)
372 .filter(|value| !value.is_empty())
373 .map(|s| truncate_field(s.to_string()))
374 .collect(),
375 _ => Vec::new(),
376 }
377 }
378}
379
380impl LicenseValueAdapter for YamlValue {
381 fn license_values(&self) -> Vec<String> {
382 match self {
383 YamlValue::String(value) => vec![truncate_field(value.trim().to_string())],
384 YamlValue::Sequence(values) => values
385 .iter()
386 .take(MAX_ITERATION_COUNT)
387 .filter_map(|value| value.as_str())
388 .map(str::trim)
389 .filter(|value| !value.is_empty())
390 .map(|s| truncate_field(s.to_string()))
391 .collect(),
392 _ => Vec::new(),
393 }
394 }
395}
396
397fn normalize_cpan_license_value(value: &str) -> Option<NormalizedDeclaredLicense> {
398 match value.trim() {
399 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
400 "gpl-1.0-plus OR artistic-perl-1.0",
401 "GPL-1.0-or-later OR Artistic-1.0-Perl",
402 )),
403 "artistic_2" => Some(NormalizedDeclaredLicense::new(
404 "artistic-2.0",
405 "Artistic-2.0",
406 )),
407 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
408 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
409 }
410}
411
412fn extract_parties_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Party> {
413 json.get(FIELD_AUTHOR)
414 .and_then(|v| v.as_array())
415 .map_or_else(Vec::new, |authors| {
416 authors
417 .iter()
418 .take(MAX_ITERATION_COUNT)
419 .filter_map(|author| {
420 author.as_str().map(|s| {
421 let (name, email) = parse_author_string(s);
422 Party {
423 r#type: Some("person".to_string()),
424 role: Some("author".to_string()),
425 name,
426 email,
427 url: None,
428 organization: None,
429 organization_url: None,
430 timezone: None,
431 }
432 })
433 })
434 .collect()
435 })
436}
437
438fn extract_parties_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Party> {
439 yaml.get(YamlValue::String(FIELD_AUTHOR.to_string()))
440 .and_then(|v| v.as_sequence())
441 .map_or_else(Vec::new, |authors| {
442 authors
443 .iter()
444 .take(MAX_ITERATION_COUNT)
445 .filter_map(|author| {
446 author.as_str().map(|s| {
447 let (name, email) = parse_author_string(s);
448 Party {
449 r#type: Some("person".to_string()),
450 role: Some("author".to_string()),
451 name,
452 email,
453 url: None,
454 organization: None,
455 organization_url: None,
456 timezone: None,
457 }
458 })
459 })
460 .collect()
461 })
462}
463
464fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
465 if let Some(email_start) = author_str.find('<')
466 && let Some(email_end) = author_str.find('>')
467 && email_start < email_end
468 {
469 let name = author_str[..email_start].trim();
470 let email = author_str[email_start + 1..email_end].trim();
471 return (
472 if name.is_empty() {
473 None
474 } else {
475 Some(truncate_field(name.to_string()))
476 },
477 if email.is_empty() {
478 None
479 } else {
480 Some(truncate_field(email.to_string()))
481 },
482 );
483 }
484 let trimmed = author_str.trim();
485 (
486 if trimmed.is_empty() {
487 None
488 } else {
489 Some(truncate_field(trimmed.to_string()))
490 },
491 None,
492 )
493}
494
495fn extract_resources_from_json(
496 json: &serde_json::Map<String, JsonValue>,
497) -> (
498 Option<String>,
499 Option<String>,
500 Option<String>,
501 Option<String>,
502) {
503 let resources = match json.get(FIELD_RESOURCES).and_then(|v| v.as_object()) {
504 Some(r) => r,
505 None => return (None, None, None, None),
506 };
507
508 let homepage_url = resources
509 .get("homepage")
510 .and_then(|v| v.as_str())
511 .map(|s| truncate_field(s.to_string()));
512
513 let vcs_url = resources.get("repository").and_then(|v| match v {
514 JsonValue::String(s) => Some(truncate_field(s.clone())),
515 JsonValue::Object(obj) => obj
516 .get("url")
517 .and_then(|u| u.as_str())
518 .map(|s| truncate_field(s.to_string())),
519 _ => None,
520 });
521
522 let code_view_url = resources
523 .get("repository")
524 .and_then(|v| v.as_object())
525 .and_then(|obj| {
526 obj.get("web")
527 .and_then(|u| u.as_str())
528 .map(|s| truncate_field(s.to_string()))
529 });
530
531 let bug_tracking_url = resources.get("bugtracker").and_then(|v| match v {
532 JsonValue::String(s) => Some(truncate_field(s.clone())),
533 JsonValue::Object(obj) => obj
534 .get("web")
535 .and_then(|u| u.as_str())
536 .map(|s| truncate_field(s.to_string())),
537 _ => None,
538 });
539
540 (homepage_url, vcs_url, code_view_url, bug_tracking_url)
541}
542
543fn extract_resources_from_yaml(
544 yaml: &yaml_serde::Mapping,
545) -> (Option<String>, Option<String>, Option<String>) {
546 let resources = match yaml
547 .get(YamlValue::String(FIELD_RESOURCES.to_string()))
548 .and_then(|v| v.as_mapping())
549 {
550 Some(r) => r,
551 None => return (None, None, None),
552 };
553
554 let homepage_url = resources
555 .get(YamlValue::String("homepage".to_string()))
556 .and_then(|v| v.as_str())
557 .map(|s| truncate_field(s.to_string()));
558
559 let vcs_url = resources
560 .get(YamlValue::String("repository".to_string()))
561 .and_then(|v| v.as_str())
562 .map(|s| truncate_field(s.to_string()));
563
564 let bug_tracking_url = resources
565 .get(YamlValue::String("bugtracker".to_string()))
566 .and_then(|v| v.as_str())
567 .map(|s| truncate_field(s.to_string()));
568
569 (homepage_url, vcs_url, bug_tracking_url)
570}
571
572fn extract_dependencies_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Dependency> {
573 let mut dependencies = Vec::new();
574
575 let prereqs = match json.get(FIELD_PREREQS).and_then(|v| v.as_object()) {
576 Some(p) => p,
577 None => return dependencies,
578 };
579
580 if let Some(runtime) = prereqs.get("runtime").and_then(|v| v.as_object())
582 && let Some(requires) = runtime.get("requires").and_then(|v| v.as_object())
583 {
584 dependencies.extend(extract_dependency_group(requires, "runtime", true, false));
585 }
586
587 if let Some(build) = prereqs.get("build").and_then(|v| v.as_object())
589 && let Some(requires) = build.get("requires").and_then(|v| v.as_object())
590 {
591 dependencies.extend(extract_dependency_group(requires, "build", false, false));
592 }
593
594 if let Some(test) = prereqs.get("test").and_then(|v| v.as_object())
596 && let Some(requires) = test.get("requires").and_then(|v| v.as_object())
597 {
598 dependencies.extend(extract_dependency_group(requires, "test", false, false));
599 }
600
601 if let Some(configure) = prereqs.get("configure").and_then(|v| v.as_object())
603 && let Some(requires) = configure.get("requires").and_then(|v| v.as_object())
604 {
605 dependencies.extend(extract_dependency_group(
606 requires,
607 "configure",
608 false,
609 false,
610 ));
611 }
612
613 dependencies
614}
615
616fn extract_dependencies_from_yaml(yaml: &yaml_serde::Mapping) -> Vec<Dependency> {
617 let mut dependencies = Vec::new();
618
619 if let Some(requires) = yaml
621 .get(YamlValue::String(FIELD_REQUIRES.to_string()))
622 .and_then(|v| v.as_mapping())
623 {
624 dependencies.extend(extract_yaml_dependency_group(
625 requires, "runtime", true, false,
626 ));
627 }
628
629 if let Some(build_requires) = yaml
630 .get(YamlValue::String(FIELD_BUILD_REQUIRES.to_string()))
631 .and_then(|v| v.as_mapping())
632 {
633 dependencies.extend(extract_yaml_dependency_group(
634 build_requires,
635 "build",
636 false,
637 false,
638 ));
639 }
640
641 if let Some(test_requires) = yaml
642 .get(YamlValue::String(FIELD_TEST_REQUIRES.to_string()))
643 .and_then(|v| v.as_mapping())
644 {
645 dependencies.extend(extract_yaml_dependency_group(
646 test_requires,
647 "test",
648 false,
649 false,
650 ));
651 }
652
653 if let Some(configure_requires) = yaml
654 .get(YamlValue::String(FIELD_CONFIGURE_REQUIRES.to_string()))
655 .and_then(|v| v.as_mapping())
656 {
657 dependencies.extend(extract_yaml_dependency_group(
658 configure_requires,
659 "configure",
660 false,
661 false,
662 ));
663 }
664
665 dependencies
666}
667
668fn extract_dependency_group(
669 deps: &serde_json::Map<String, JsonValue>,
670 scope: &str,
671 is_runtime: bool,
672 is_optional: bool,
673) -> Vec<Dependency> {
674 deps.iter()
675 .take(MAX_ITERATION_COUNT)
676 .filter_map(|(name, version)| {
677 if name == "perl" {
678 return None;
679 }
680
681 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
682 let purl = purl.map(truncate_field);
683
684 let extracted_requirement = match version {
685 JsonValue::String(s) => Some(truncate_field(s.clone())),
686 JsonValue::Number(n) => Some(truncate_field(n.to_string())),
687 _ => None,
688 };
689
690 Some(Dependency {
691 purl,
692 extracted_requirement,
693 scope: Some(truncate_field(scope.to_string())),
694 is_runtime: Some(is_runtime),
695 is_optional: Some(is_optional),
696 is_pinned: None,
697 is_direct: Some(true),
698 resolved_package: None,
699 extra_data: None,
700 })
701 })
702 .collect()
703}
704
705fn extract_yaml_dependency_group(
706 deps: &yaml_serde::Mapping,
707 scope: &str,
708 is_runtime: bool,
709 is_optional: bool,
710) -> Vec<Dependency> {
711 deps.iter()
712 .take(MAX_ITERATION_COUNT)
713 .filter_map(|(key, value)| {
714 let name = key.as_str()?;
715
716 if name == "perl" {
717 return None;
718 }
719
720 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
721 let purl = purl.map(truncate_field);
722
723 let extracted_requirement = match value {
724 YamlValue::String(s) => Some(truncate_field(s.clone())),
725 YamlValue::Number(n) => Some(truncate_field(n.to_string())),
726 _ => None,
727 };
728
729 Some(Dependency {
730 purl,
731 extracted_requirement,
732 scope: Some(truncate_field(scope.to_string())),
733 is_runtime: Some(is_runtime),
734 is_optional: Some(is_optional),
735 is_pinned: None,
736 is_direct: Some(true),
737 resolved_package: None,
738 extra_data: None,
739 })
740 })
741 .collect()
742}
743
744crate::register_parser!(
745 "CPAN Perl META.json",
746 &["**/META.json"],
747 "cpan",
748 "Perl",
749 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
750);
751
752crate::register_parser!(
753 "CPAN Perl META.yml",
754 &["**/META.yml"],
755 "cpan",
756 "Perl",
757 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
758);
759
760crate::register_parser!(
761 "CPAN Perl MANIFEST",
762 &["**/MANIFEST"],
763 "cpan",
764 "Perl",
765 Some("https://metacpan.org/pod/Module::Manifest"),
766);