1use std::fs;
25use std::path::Path;
26
27use crate::parser_warn as warn;
28use packageurl::PackageUrl;
29use serde_json::Value as JsonValue;
30use serde_yaml::Value as YamlValue;
31
32use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
33
34use super::PackageParser;
35use super::license_normalization::{
36 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
37 combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
38 normalize_spdx_expression,
39};
40
41const FIELD_NAME: &str = "name";
42const FIELD_VERSION: &str = "version";
43const FIELD_ABSTRACT: &str = "abstract";
44const FIELD_DESCRIPTION: &str = "description";
45const FIELD_LICENSE: &str = "license";
46const FIELD_AUTHOR: &str = "author";
47const FIELD_RESOURCES: &str = "resources";
48const FIELD_PREREQS: &str = "prereqs";
49const FIELD_REQUIRES: &str = "requires";
50const FIELD_BUILD_REQUIRES: &str = "build_requires";
51const FIELD_TEST_REQUIRES: &str = "test_requires";
52const FIELD_CONFIGURE_REQUIRES: &str = "configure_requires";
53
54pub struct CpanMetaJsonParser;
59
60impl PackageParser for CpanMetaJsonParser {
61 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
62
63 fn is_match(path: &Path) -> bool {
64 path.file_name().is_some_and(|name| name == "META.json")
65 }
66
67 fn extract_packages(path: &Path) -> Vec<PackageData> {
68 let json = match read_and_parse_json(path) {
69 Ok(json) => json,
70 Err(e) => {
71 warn!("Failed to parse META.json at {:?}: {}", path, e);
72 return vec![default_package_data(DatasourceId::CpanMetaJson)];
73 }
74 };
75
76 let name = json
77 .get(FIELD_NAME)
78 .and_then(|v| v.as_str())
79 .map(String::from);
80
81 let version = extract_version_from_json(&json);
82
83 let description = json
84 .get(FIELD_ABSTRACT)
85 .and_then(|v| v.as_str())
86 .map(String::from);
87
88 let extracted_license_statement = extract_license_from_json(&json);
89 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
90 normalize_cpan_declared_license(
91 json.get(FIELD_LICENSE),
92 extracted_license_statement.as_deref(),
93 );
94 let parties = extract_parties_from_json(&json);
95 let dependencies = extract_dependencies_from_json(&json);
96 let (homepage_url, vcs_url, code_view_url, bug_tracking_url) =
97 extract_resources_from_json(&json);
98
99 vec![PackageData {
100 package_type: Some(Self::PACKAGE_TYPE),
101 name,
102 version,
103 description,
104 declared_license_expression,
105 declared_license_expression_spdx,
106 license_detections,
107 extracted_license_statement,
108 parties,
109 dependencies,
110 homepage_url,
111 vcs_url,
112 code_view_url,
113 bug_tracking_url,
114 primary_language: Some("Perl".to_string()),
115 datasource_id: Some(DatasourceId::CpanMetaJson),
116 ..Default::default()
117 }]
118 }
119}
120
121pub struct CpanMetaYmlParser;
125
126impl PackageParser for CpanMetaYmlParser {
127 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
128
129 fn is_match(path: &Path) -> bool {
130 path.file_name().is_some_and(|name| name == "META.yml")
131 }
132
133 fn extract_packages(path: &Path) -> Vec<PackageData> {
134 let yaml = match read_and_parse_yaml(path) {
135 Ok(yaml) => yaml,
136 Err(e) => {
137 warn!("Failed to parse META.yml at {:?}: {}", path, e);
138 return vec![default_package_data(DatasourceId::CpanMetaYml)];
139 }
140 };
141
142 let name = yaml
143 .get(FIELD_NAME)
144 .and_then(|v| v.as_str())
145 .map(String::from);
146
147 let version = extract_version_from_yaml(&yaml);
148
149 let description = yaml
150 .get(FIELD_ABSTRACT)
151 .or_else(|| yaml.get(FIELD_DESCRIPTION))
152 .and_then(|v| v.as_str())
153 .map(String::from);
154
155 let extracted_license_statement = extract_license_from_yaml(&yaml);
156 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
157 normalize_cpan_declared_license(
158 yaml.get(YamlValue::String(FIELD_LICENSE.to_string())),
159 extracted_license_statement.as_deref(),
160 );
161 let parties = extract_parties_from_yaml(&yaml);
162 let dependencies = extract_dependencies_from_yaml(&yaml);
163 let (homepage_url, vcs_url, bug_tracking_url) = extract_resources_from_yaml(&yaml);
164
165 vec![PackageData {
166 package_type: Some(Self::PACKAGE_TYPE),
167 name,
168 version,
169 description,
170 declared_license_expression,
171 declared_license_expression_spdx,
172 license_detections,
173 extracted_license_statement,
174 parties,
175 dependencies,
176 homepage_url,
177 vcs_url,
178 bug_tracking_url,
179 primary_language: Some("Perl".to_string()),
180 datasource_id: Some(DatasourceId::CpanMetaYml),
181 ..Default::default()
182 }]
183 }
184}
185
186pub struct CpanManifestParser;
190
191impl PackageParser for CpanManifestParser {
192 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
193
194 fn is_match(path: &Path) -> bool {
195 path.file_name().is_some_and(|name| name == "MANIFEST")
196 }
197
198 fn extract_packages(path: &Path) -> Vec<PackageData> {
199 let content = match fs::read_to_string(path) {
200 Ok(content) => content,
201 Err(e) => {
202 warn!("Failed to read MANIFEST at {:?}: {}", path, e);
203 return vec![default_package_data(DatasourceId::CpanManifest)];
204 }
205 };
206
207 let file_references = content
208 .lines()
209 .filter(|line| !line.trim().is_empty())
210 .filter(|line| !line.trim().starts_with('#'))
211 .map(|line| {
212 let path = line.split_whitespace().next().unwrap_or(line);
214 FileReference {
215 path: path.to_string(),
216 size: None,
217 sha1: None,
218 md5: None,
219 sha256: None,
220 sha512: None,
221 extra_data: None,
222 }
223 })
224 .collect();
225
226 vec![PackageData {
227 package_type: Some(Self::PACKAGE_TYPE),
228 file_references,
229 primary_language: Some("Perl".to_string()),
230 datasource_id: Some(DatasourceId::CpanManifest),
231 ..Default::default()
232 }]
233 }
234}
235
236fn default_package_data(datasource_id: DatasourceId) -> PackageData {
237 PackageData {
238 package_type: Some(CpanMetaJsonParser::PACKAGE_TYPE),
239 primary_language: Some("Perl".to_string()),
240 datasource_id: Some(datasource_id),
241 ..Default::default()
242 }
243}
244
245fn read_and_parse_json(path: &Path) -> Result<serde_json::Map<String, JsonValue>, String> {
246 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
247 let json: JsonValue =
248 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
249 json.as_object()
250 .cloned()
251 .ok_or_else(|| "Root JSON is not an object".to_string())
252}
253
254fn read_and_parse_yaml(path: &Path) -> Result<serde_yaml::Mapping, String> {
255 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
256 let yaml: YamlValue =
257 serde_yaml::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
258 yaml.as_mapping()
259 .cloned()
260 .ok_or_else(|| "Root YAML is not a mapping".to_string())
261}
262
263fn extract_version_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
264 json.get(FIELD_VERSION).and_then(|v| match v {
265 JsonValue::String(s) => Some(s.clone()),
266 JsonValue::Number(n) => Some(n.to_string()),
267 _ => None,
268 })
269}
270
271fn extract_version_from_yaml(yaml: &serde_yaml::Mapping) -> Option<String> {
272 yaml.get(YamlValue::String(FIELD_VERSION.to_string()))
273 .and_then(|v| match v {
274 YamlValue::String(s) => Some(s.clone()),
275 YamlValue::Number(n) => Some(n.to_string()),
276 _ => None,
277 })
278}
279
280fn extract_license_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
281 json.get(FIELD_LICENSE).and_then(|v| match v {
282 JsonValue::String(s) => Some(s.clone()),
283 JsonValue::Array(arr) => {
284 let licenses: Vec<String> = arr
285 .iter()
286 .filter_map(|item| item.as_str().map(String::from))
287 .collect();
288 if licenses.is_empty() {
289 None
290 } else {
291 Some(licenses.join(" AND "))
292 }
293 }
294 _ => None,
295 })
296}
297
298fn extract_license_from_yaml(yaml: &serde_yaml::Mapping) -> Option<String> {
299 yaml.get(YamlValue::String(FIELD_LICENSE.to_string()))
300 .and_then(|v| match v {
301 YamlValue::String(s) => Some(s.clone()),
302 YamlValue::Sequence(arr) => {
303 let licenses: Vec<String> = arr
304 .iter()
305 .filter_map(|item| item.as_str().map(String::from))
306 .collect();
307 if licenses.is_empty() {
308 None
309 } else {
310 Some(licenses.join(" AND "))
311 }
312 }
313 _ => None,
314 })
315}
316
317fn normalize_cpan_declared_license(
318 raw_license: Option<&impl LicenseValueAdapter>,
319 extracted_license_statement: Option<&str>,
320) -> (
321 Option<String>,
322 Option<String>,
323 Vec<crate::models::LicenseDetection>,
324) {
325 let Some(raw_license) = raw_license else {
326 return empty_declared_license_data();
327 };
328 let normalized = raw_license
329 .license_values()
330 .into_iter()
331 .map(|value| normalize_cpan_license_value(&value))
332 .collect::<Option<Vec<_>>>();
333
334 if let Some(normalized) = normalized
335 && let Some(combined) = combine_normalized_licenses(normalized, " AND ")
336 {
337 return build_declared_license_data(
338 combined,
339 DeclaredLicenseMatchMetadata::single_line(
340 extracted_license_statement.unwrap_or_default(),
341 ),
342 );
343 }
344
345 empty_declared_license_data()
346}
347
348trait LicenseValueAdapter {
349 fn license_values(&self) -> Vec<String>;
350}
351
352impl LicenseValueAdapter for JsonValue {
353 fn license_values(&self) -> Vec<String> {
354 match self {
355 JsonValue::String(value) => vec![value.trim().to_string()],
356 JsonValue::Array(values) => values
357 .iter()
358 .filter_map(|value| value.as_str())
359 .map(str::trim)
360 .filter(|value| !value.is_empty())
361 .map(ToOwned::to_owned)
362 .collect(),
363 _ => Vec::new(),
364 }
365 }
366}
367
368impl LicenseValueAdapter for YamlValue {
369 fn license_values(&self) -> Vec<String> {
370 match self {
371 YamlValue::String(value) => vec![value.trim().to_string()],
372 YamlValue::Sequence(values) => values
373 .iter()
374 .filter_map(|value| value.as_str())
375 .map(str::trim)
376 .filter(|value| !value.is_empty())
377 .map(ToOwned::to_owned)
378 .collect(),
379 _ => Vec::new(),
380 }
381 }
382}
383
384fn normalize_cpan_license_value(value: &str) -> Option<NormalizedDeclaredLicense> {
385 match value.trim() {
386 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
387 "gpl-1.0-plus OR artistic-perl-1.0",
388 "GPL-1.0-or-later OR Artistic-1.0-Perl",
389 )),
390 "artistic_2" => Some(NormalizedDeclaredLicense::new(
391 "artistic-2.0",
392 "Artistic-2.0",
393 )),
394 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
395 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
396 }
397}
398
399fn extract_parties_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Party> {
400 json.get(FIELD_AUTHOR)
401 .and_then(|v| v.as_array())
402 .map_or_else(Vec::new, |authors| {
403 authors
404 .iter()
405 .filter_map(|author| {
406 author.as_str().map(|s| {
407 let (name, email) = parse_author_string(s);
408 Party {
409 r#type: Some("person".to_string()),
410 role: Some("author".to_string()),
411 name,
412 email,
413 url: None,
414 organization: None,
415 organization_url: None,
416 timezone: None,
417 }
418 })
419 })
420 .collect()
421 })
422}
423
424fn extract_parties_from_yaml(yaml: &serde_yaml::Mapping) -> Vec<Party> {
425 yaml.get(YamlValue::String(FIELD_AUTHOR.to_string()))
426 .and_then(|v| v.as_sequence())
427 .map_or_else(Vec::new, |authors| {
428 authors
429 .iter()
430 .filter_map(|author| {
431 author.as_str().map(|s| {
432 let (name, email) = parse_author_string(s);
433 Party {
434 r#type: Some("person".to_string()),
435 role: Some("author".to_string()),
436 name,
437 email,
438 url: None,
439 organization: None,
440 organization_url: None,
441 timezone: None,
442 }
443 })
444 })
445 .collect()
446 })
447}
448
449fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
450 if let Some(email_start) = author_str.find('<')
452 && let Some(email_end) = author_str.find('>')
453 && email_start < email_end
454 {
455 let name = author_str[..email_start].trim();
456 let email = author_str[email_start + 1..email_end].trim();
457 return (
458 if name.is_empty() {
459 None
460 } else {
461 Some(name.to_string())
462 },
463 if email.is_empty() {
464 None
465 } else {
466 Some(email.to_string())
467 },
468 );
469 }
470 (Some(author_str.trim().to_string()), None)
472}
473
474fn extract_resources_from_json(
475 json: &serde_json::Map<String, JsonValue>,
476) -> (
477 Option<String>,
478 Option<String>,
479 Option<String>,
480 Option<String>,
481) {
482 let resources = match json.get(FIELD_RESOURCES).and_then(|v| v.as_object()) {
483 Some(r) => r,
484 None => return (None, None, None, None),
485 };
486
487 let homepage_url = resources
488 .get("homepage")
489 .and_then(|v| v.as_str())
490 .map(String::from);
491
492 let vcs_url = resources.get("repository").and_then(|v| match v {
493 JsonValue::String(s) => Some(s.clone()),
494 JsonValue::Object(obj) => obj.get("url").and_then(|u| u.as_str()).map(String::from),
495 _ => None,
496 });
497
498 let code_view_url = resources
499 .get("repository")
500 .and_then(|v| v.as_object())
501 .and_then(|obj| obj.get("web").and_then(|u| u.as_str()).map(String::from));
502
503 let bug_tracking_url = resources.get("bugtracker").and_then(|v| match v {
504 JsonValue::String(s) => Some(s.clone()),
505 JsonValue::Object(obj) => obj.get("web").and_then(|u| u.as_str()).map(String::from),
506 _ => None,
507 });
508
509 (homepage_url, vcs_url, code_view_url, bug_tracking_url)
510}
511
512fn extract_resources_from_yaml(
513 yaml: &serde_yaml::Mapping,
514) -> (Option<String>, Option<String>, Option<String>) {
515 let resources = match yaml
516 .get(YamlValue::String(FIELD_RESOURCES.to_string()))
517 .and_then(|v| v.as_mapping())
518 {
519 Some(r) => r,
520 None => return (None, None, None),
521 };
522
523 let homepage_url = resources
524 .get(YamlValue::String("homepage".to_string()))
525 .and_then(|v| v.as_str())
526 .map(String::from);
527
528 let vcs_url = resources
529 .get(YamlValue::String("repository".to_string()))
530 .and_then(|v| v.as_str())
531 .map(String::from);
532
533 let bug_tracking_url = resources
534 .get(YamlValue::String("bugtracker".to_string()))
535 .and_then(|v| v.as_str())
536 .map(String::from);
537
538 (homepage_url, vcs_url, bug_tracking_url)
539}
540
541fn extract_dependencies_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Dependency> {
542 let mut dependencies = Vec::new();
543
544 let prereqs = match json.get(FIELD_PREREQS).and_then(|v| v.as_object()) {
545 Some(p) => p,
546 None => return dependencies,
547 };
548
549 if let Some(runtime) = prereqs.get("runtime").and_then(|v| v.as_object())
551 && let Some(requires) = runtime.get("requires").and_then(|v| v.as_object())
552 {
553 dependencies.extend(extract_dependency_group(requires, "runtime", true, false));
554 }
555
556 if let Some(build) = prereqs.get("build").and_then(|v| v.as_object())
558 && let Some(requires) = build.get("requires").and_then(|v| v.as_object())
559 {
560 dependencies.extend(extract_dependency_group(requires, "build", false, false));
561 }
562
563 if let Some(test) = prereqs.get("test").and_then(|v| v.as_object())
565 && let Some(requires) = test.get("requires").and_then(|v| v.as_object())
566 {
567 dependencies.extend(extract_dependency_group(requires, "test", false, false));
568 }
569
570 if let Some(configure) = prereqs.get("configure").and_then(|v| v.as_object())
572 && let Some(requires) = configure.get("requires").and_then(|v| v.as_object())
573 {
574 dependencies.extend(extract_dependency_group(
575 requires,
576 "configure",
577 false,
578 false,
579 ));
580 }
581
582 dependencies
583}
584
585fn extract_dependencies_from_yaml(yaml: &serde_yaml::Mapping) -> Vec<Dependency> {
586 let mut dependencies = Vec::new();
587
588 if let Some(requires) = yaml
590 .get(YamlValue::String(FIELD_REQUIRES.to_string()))
591 .and_then(|v| v.as_mapping())
592 {
593 dependencies.extend(extract_yaml_dependency_group(
594 requires, "runtime", true, false,
595 ));
596 }
597
598 if let Some(build_requires) = yaml
599 .get(YamlValue::String(FIELD_BUILD_REQUIRES.to_string()))
600 .and_then(|v| v.as_mapping())
601 {
602 dependencies.extend(extract_yaml_dependency_group(
603 build_requires,
604 "build",
605 false,
606 false,
607 ));
608 }
609
610 if let Some(test_requires) = yaml
611 .get(YamlValue::String(FIELD_TEST_REQUIRES.to_string()))
612 .and_then(|v| v.as_mapping())
613 {
614 dependencies.extend(extract_yaml_dependency_group(
615 test_requires,
616 "test",
617 false,
618 false,
619 ));
620 }
621
622 if let Some(configure_requires) = yaml
623 .get(YamlValue::String(FIELD_CONFIGURE_REQUIRES.to_string()))
624 .and_then(|v| v.as_mapping())
625 {
626 dependencies.extend(extract_yaml_dependency_group(
627 configure_requires,
628 "configure",
629 false,
630 false,
631 ));
632 }
633
634 dependencies
635}
636
637fn extract_dependency_group(
638 deps: &serde_json::Map<String, JsonValue>,
639 scope: &str,
640 is_runtime: bool,
641 is_optional: bool,
642) -> Vec<Dependency> {
643 deps.iter()
644 .filter_map(|(name, version)| {
645 if name == "perl" {
647 return None;
648 }
649
650 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
651
652 let extracted_requirement = match version {
653 JsonValue::String(s) => Some(s.clone()),
654 JsonValue::Number(n) => Some(n.to_string()),
655 _ => None,
656 };
657
658 Some(Dependency {
659 purl,
660 extracted_requirement,
661 scope: Some(scope.to_string()),
662 is_runtime: Some(is_runtime),
663 is_optional: Some(is_optional),
664 is_pinned: None,
665 is_direct: Some(true),
666 resolved_package: None,
667 extra_data: None,
668 })
669 })
670 .collect()
671}
672
673fn extract_yaml_dependency_group(
674 deps: &serde_yaml::Mapping,
675 scope: &str,
676 is_runtime: bool,
677 is_optional: bool,
678) -> Vec<Dependency> {
679 deps.iter()
680 .filter_map(|(key, value)| {
681 let name = key.as_str()?;
682
683 if name == "perl" {
685 return None;
686 }
687
688 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
689
690 let extracted_requirement = match value {
691 YamlValue::String(s) => Some(s.clone()),
692 YamlValue::Number(n) => Some(n.to_string()),
693 _ => None,
694 };
695
696 Some(Dependency {
697 purl,
698 extracted_requirement,
699 scope: Some(scope.to_string()),
700 is_runtime: Some(is_runtime),
701 is_optional: Some(is_optional),
702 is_pinned: None,
703 is_direct: Some(true),
704 resolved_package: None,
705 extra_data: None,
706 })
707 })
708 .collect()
709}
710
711crate::register_parser!(
712 "CPAN Perl META.json",
713 &["**/META.json"],
714 "cpan",
715 "Perl",
716 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
717);
718
719crate::register_parser!(
720 "CPAN Perl META.yml",
721 &["**/META.yml"],
722 "cpan",
723 "Perl",
724 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
725);
726
727crate::register_parser!(
728 "CPAN Perl MANIFEST",
729 &["**/MANIFEST"],
730 "cpan",
731 "Perl",
732 Some("https://metacpan.org/pod/Module::Manifest"),
733);