1use std::fs;
25use std::path::Path;
26
27use log::warn;
28use packageurl::PackageUrl;
29use serde_json::Value as JsonValue;
30use serde_yaml::Value as YamlValue;
31
32use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType, Party};
33
34use super::PackageParser;
35
36const FIELD_NAME: &str = "name";
37const FIELD_VERSION: &str = "version";
38const FIELD_ABSTRACT: &str = "abstract";
39const FIELD_DESCRIPTION: &str = "description";
40const FIELD_LICENSE: &str = "license";
41const FIELD_AUTHOR: &str = "author";
42const FIELD_RESOURCES: &str = "resources";
43const FIELD_PREREQS: &str = "prereqs";
44const FIELD_REQUIRES: &str = "requires";
45const FIELD_BUILD_REQUIRES: &str = "build_requires";
46const FIELD_TEST_REQUIRES: &str = "test_requires";
47const FIELD_CONFIGURE_REQUIRES: &str = "configure_requires";
48
49pub struct CpanMetaJsonParser;
54
55impl PackageParser for CpanMetaJsonParser {
56 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
57
58 fn is_match(path: &Path) -> bool {
59 path.file_name().is_some_and(|name| name == "META.json")
60 }
61
62 fn extract_packages(path: &Path) -> Vec<PackageData> {
63 let json = match read_and_parse_json(path) {
64 Ok(json) => json,
65 Err(e) => {
66 warn!("Failed to parse META.json at {:?}: {}", path, e);
67 return vec![default_package_data(DatasourceId::CpanMetaJson)];
68 }
69 };
70
71 let name = json
72 .get(FIELD_NAME)
73 .and_then(|v| v.as_str())
74 .map(String::from);
75
76 let version = extract_version_from_json(&json);
77
78 let description = json
79 .get(FIELD_ABSTRACT)
80 .and_then(|v| v.as_str())
81 .map(String::from);
82
83 let extracted_license_statement = extract_license_from_json(&json);
84 let parties = extract_parties_from_json(&json);
85 let dependencies = extract_dependencies_from_json(&json);
86 let (homepage_url, vcs_url, code_view_url, bug_tracking_url) =
87 extract_resources_from_json(&json);
88
89 vec![PackageData {
90 package_type: Some(Self::PACKAGE_TYPE),
91 name,
92 version,
93 description,
94 extracted_license_statement,
95 parties,
96 dependencies,
97 homepage_url,
98 vcs_url,
99 code_view_url,
100 bug_tracking_url,
101 primary_language: Some("Perl".to_string()),
102 datasource_id: Some(DatasourceId::CpanMetaJson),
103 ..Default::default()
104 }]
105 }
106}
107
108pub struct CpanMetaYmlParser;
112
113impl PackageParser for CpanMetaYmlParser {
114 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
115
116 fn is_match(path: &Path) -> bool {
117 path.file_name().is_some_and(|name| name == "META.yml")
118 }
119
120 fn extract_packages(path: &Path) -> Vec<PackageData> {
121 let yaml = match read_and_parse_yaml(path) {
122 Ok(yaml) => yaml,
123 Err(e) => {
124 warn!("Failed to parse META.yml at {:?}: {}", path, e);
125 return vec![default_package_data(DatasourceId::CpanMetaYml)];
126 }
127 };
128
129 let name = yaml
130 .get(FIELD_NAME)
131 .and_then(|v| v.as_str())
132 .map(String::from);
133
134 let version = extract_version_from_yaml(&yaml);
135
136 let description = yaml
137 .get(FIELD_ABSTRACT)
138 .or_else(|| yaml.get(FIELD_DESCRIPTION))
139 .and_then(|v| v.as_str())
140 .map(String::from);
141
142 let extracted_license_statement = extract_license_from_yaml(&yaml);
143 let parties = extract_parties_from_yaml(&yaml);
144 let dependencies = extract_dependencies_from_yaml(&yaml);
145 let (homepage_url, vcs_url, bug_tracking_url) = extract_resources_from_yaml(&yaml);
146
147 vec![PackageData {
148 package_type: Some(Self::PACKAGE_TYPE),
149 name,
150 version,
151 description,
152 extracted_license_statement,
153 parties,
154 dependencies,
155 homepage_url,
156 vcs_url,
157 bug_tracking_url,
158 primary_language: Some("Perl".to_string()),
159 datasource_id: Some(DatasourceId::CpanMetaYml),
160 ..Default::default()
161 }]
162 }
163}
164
165pub struct CpanManifestParser;
169
170impl PackageParser for CpanManifestParser {
171 const PACKAGE_TYPE: PackageType = PackageType::Cpan;
172
173 fn is_match(path: &Path) -> bool {
174 path.file_name().is_some_and(|name| name == "MANIFEST")
175 }
176
177 fn extract_packages(path: &Path) -> Vec<PackageData> {
178 let content = match fs::read_to_string(path) {
179 Ok(content) => content,
180 Err(e) => {
181 warn!("Failed to read MANIFEST at {:?}: {}", path, e);
182 return vec![default_package_data(DatasourceId::CpanManifest)];
183 }
184 };
185
186 let file_references = content
187 .lines()
188 .filter(|line| !line.trim().is_empty())
189 .filter(|line| !line.trim().starts_with('#'))
190 .map(|line| {
191 let path = line.split_whitespace().next().unwrap_or(line);
193 FileReference {
194 path: path.to_string(),
195 size: None,
196 sha1: None,
197 md5: None,
198 sha256: None,
199 sha512: None,
200 extra_data: None,
201 }
202 })
203 .collect();
204
205 vec![PackageData {
206 package_type: Some(Self::PACKAGE_TYPE),
207 file_references,
208 primary_language: Some("Perl".to_string()),
209 datasource_id: Some(DatasourceId::CpanManifest),
210 ..Default::default()
211 }]
212 }
213}
214
215fn default_package_data(datasource_id: DatasourceId) -> PackageData {
216 PackageData {
217 package_type: Some(CpanMetaJsonParser::PACKAGE_TYPE),
218 primary_language: Some("Perl".to_string()),
219 datasource_id: Some(datasource_id),
220 ..Default::default()
221 }
222}
223
224fn read_and_parse_json(path: &Path) -> Result<serde_json::Map<String, JsonValue>, String> {
225 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
226 let json: JsonValue =
227 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
228 json.as_object()
229 .cloned()
230 .ok_or_else(|| "Root JSON is not an object".to_string())
231}
232
233fn read_and_parse_yaml(path: &Path) -> Result<serde_yaml::Mapping, String> {
234 let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
235 let yaml: YamlValue =
236 serde_yaml::from_str(&content).map_err(|e| format!("Failed to parse YAML: {}", e))?;
237 yaml.as_mapping()
238 .cloned()
239 .ok_or_else(|| "Root YAML is not a mapping".to_string())
240}
241
242fn extract_version_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
243 json.get(FIELD_VERSION).and_then(|v| match v {
244 JsonValue::String(s) => Some(s.clone()),
245 JsonValue::Number(n) => Some(n.to_string()),
246 _ => None,
247 })
248}
249
250fn extract_version_from_yaml(yaml: &serde_yaml::Mapping) -> Option<String> {
251 yaml.get(YamlValue::String(FIELD_VERSION.to_string()))
252 .and_then(|v| match v {
253 YamlValue::String(s) => Some(s.clone()),
254 YamlValue::Number(n) => Some(n.to_string()),
255 _ => None,
256 })
257}
258
259fn extract_license_from_json(json: &serde_json::Map<String, JsonValue>) -> Option<String> {
260 json.get(FIELD_LICENSE).and_then(|v| match v {
261 JsonValue::String(s) => Some(s.clone()),
262 JsonValue::Array(arr) => {
263 let licenses: Vec<String> = arr
264 .iter()
265 .filter_map(|item| item.as_str().map(String::from))
266 .collect();
267 if licenses.is_empty() {
268 None
269 } else {
270 Some(licenses.join(" AND "))
271 }
272 }
273 _ => None,
274 })
275}
276
277fn extract_license_from_yaml(yaml: &serde_yaml::Mapping) -> Option<String> {
278 yaml.get(YamlValue::String(FIELD_LICENSE.to_string()))
279 .and_then(|v| match v {
280 YamlValue::String(s) => Some(s.clone()),
281 YamlValue::Sequence(arr) => {
282 let licenses: Vec<String> = arr
283 .iter()
284 .filter_map(|item| item.as_str().map(String::from))
285 .collect();
286 if licenses.is_empty() {
287 None
288 } else {
289 Some(licenses.join(" AND "))
290 }
291 }
292 _ => None,
293 })
294}
295
296fn extract_parties_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Party> {
297 json.get(FIELD_AUTHOR)
298 .and_then(|v| v.as_array())
299 .map_or_else(Vec::new, |authors| {
300 authors
301 .iter()
302 .filter_map(|author| {
303 author.as_str().map(|s| {
304 let (name, email) = parse_author_string(s);
305 Party {
306 r#type: Some("person".to_string()),
307 role: Some("author".to_string()),
308 name,
309 email,
310 url: None,
311 organization: None,
312 organization_url: None,
313 timezone: None,
314 }
315 })
316 })
317 .collect()
318 })
319}
320
321fn extract_parties_from_yaml(yaml: &serde_yaml::Mapping) -> Vec<Party> {
322 yaml.get(YamlValue::String(FIELD_AUTHOR.to_string()))
323 .and_then(|v| v.as_sequence())
324 .map_or_else(Vec::new, |authors| {
325 authors
326 .iter()
327 .filter_map(|author| {
328 author.as_str().map(|s| {
329 let (name, email) = parse_author_string(s);
330 Party {
331 r#type: Some("person".to_string()),
332 role: Some("author".to_string()),
333 name,
334 email,
335 url: None,
336 organization: None,
337 organization_url: None,
338 timezone: None,
339 }
340 })
341 })
342 .collect()
343 })
344}
345
346fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
347 if let Some(email_start) = author_str.find('<')
349 && let Some(email_end) = author_str.find('>')
350 && email_start < email_end
351 {
352 let name = author_str[..email_start].trim();
353 let email = author_str[email_start + 1..email_end].trim();
354 return (
355 if name.is_empty() {
356 None
357 } else {
358 Some(name.to_string())
359 },
360 if email.is_empty() {
361 None
362 } else {
363 Some(email.to_string())
364 },
365 );
366 }
367 (Some(author_str.trim().to_string()), None)
369}
370
371fn extract_resources_from_json(
372 json: &serde_json::Map<String, JsonValue>,
373) -> (
374 Option<String>,
375 Option<String>,
376 Option<String>,
377 Option<String>,
378) {
379 let resources = match json.get(FIELD_RESOURCES).and_then(|v| v.as_object()) {
380 Some(r) => r,
381 None => return (None, None, None, None),
382 };
383
384 let homepage_url = resources
385 .get("homepage")
386 .and_then(|v| v.as_str())
387 .map(String::from);
388
389 let vcs_url = resources.get("repository").and_then(|v| match v {
390 JsonValue::String(s) => Some(s.clone()),
391 JsonValue::Object(obj) => obj.get("url").and_then(|u| u.as_str()).map(String::from),
392 _ => None,
393 });
394
395 let code_view_url = resources
396 .get("repository")
397 .and_then(|v| v.as_object())
398 .and_then(|obj| obj.get("web").and_then(|u| u.as_str()).map(String::from));
399
400 let bug_tracking_url = resources.get("bugtracker").and_then(|v| match v {
401 JsonValue::String(s) => Some(s.clone()),
402 JsonValue::Object(obj) => obj.get("web").and_then(|u| u.as_str()).map(String::from),
403 _ => None,
404 });
405
406 (homepage_url, vcs_url, code_view_url, bug_tracking_url)
407}
408
409fn extract_resources_from_yaml(
410 yaml: &serde_yaml::Mapping,
411) -> (Option<String>, Option<String>, Option<String>) {
412 let resources = match yaml
413 .get(YamlValue::String(FIELD_RESOURCES.to_string()))
414 .and_then(|v| v.as_mapping())
415 {
416 Some(r) => r,
417 None => return (None, None, None),
418 };
419
420 let homepage_url = resources
421 .get(YamlValue::String("homepage".to_string()))
422 .and_then(|v| v.as_str())
423 .map(String::from);
424
425 let vcs_url = resources
426 .get(YamlValue::String("repository".to_string()))
427 .and_then(|v| v.as_str())
428 .map(String::from);
429
430 let bug_tracking_url = resources
431 .get(YamlValue::String("bugtracker".to_string()))
432 .and_then(|v| v.as_str())
433 .map(String::from);
434
435 (homepage_url, vcs_url, bug_tracking_url)
436}
437
438fn extract_dependencies_from_json(json: &serde_json::Map<String, JsonValue>) -> Vec<Dependency> {
439 let mut dependencies = Vec::new();
440
441 let prereqs = match json.get(FIELD_PREREQS).and_then(|v| v.as_object()) {
442 Some(p) => p,
443 None => return dependencies,
444 };
445
446 if let Some(runtime) = prereqs.get("runtime").and_then(|v| v.as_object())
448 && let Some(requires) = runtime.get("requires").and_then(|v| v.as_object())
449 {
450 dependencies.extend(extract_dependency_group(requires, "runtime", true, false));
451 }
452
453 if let Some(build) = prereqs.get("build").and_then(|v| v.as_object())
455 && let Some(requires) = build.get("requires").and_then(|v| v.as_object())
456 {
457 dependencies.extend(extract_dependency_group(requires, "build", false, false));
458 }
459
460 if let Some(test) = prereqs.get("test").and_then(|v| v.as_object())
462 && let Some(requires) = test.get("requires").and_then(|v| v.as_object())
463 {
464 dependencies.extend(extract_dependency_group(requires, "test", false, false));
465 }
466
467 if let Some(configure) = prereqs.get("configure").and_then(|v| v.as_object())
469 && let Some(requires) = configure.get("requires").and_then(|v| v.as_object())
470 {
471 dependencies.extend(extract_dependency_group(
472 requires,
473 "configure",
474 false,
475 false,
476 ));
477 }
478
479 dependencies
480}
481
482fn extract_dependencies_from_yaml(yaml: &serde_yaml::Mapping) -> Vec<Dependency> {
483 let mut dependencies = Vec::new();
484
485 if let Some(requires) = yaml
487 .get(YamlValue::String(FIELD_REQUIRES.to_string()))
488 .and_then(|v| v.as_mapping())
489 {
490 dependencies.extend(extract_yaml_dependency_group(
491 requires, "runtime", true, false,
492 ));
493 }
494
495 if let Some(build_requires) = yaml
496 .get(YamlValue::String(FIELD_BUILD_REQUIRES.to_string()))
497 .and_then(|v| v.as_mapping())
498 {
499 dependencies.extend(extract_yaml_dependency_group(
500 build_requires,
501 "build",
502 false,
503 false,
504 ));
505 }
506
507 if let Some(test_requires) = yaml
508 .get(YamlValue::String(FIELD_TEST_REQUIRES.to_string()))
509 .and_then(|v| v.as_mapping())
510 {
511 dependencies.extend(extract_yaml_dependency_group(
512 test_requires,
513 "test",
514 false,
515 false,
516 ));
517 }
518
519 if let Some(configure_requires) = yaml
520 .get(YamlValue::String(FIELD_CONFIGURE_REQUIRES.to_string()))
521 .and_then(|v| v.as_mapping())
522 {
523 dependencies.extend(extract_yaml_dependency_group(
524 configure_requires,
525 "configure",
526 false,
527 false,
528 ));
529 }
530
531 dependencies
532}
533
534fn extract_dependency_group(
535 deps: &serde_json::Map<String, JsonValue>,
536 scope: &str,
537 is_runtime: bool,
538 is_optional: bool,
539) -> Vec<Dependency> {
540 deps.iter()
541 .filter_map(|(name, version)| {
542 if name == "perl" {
544 return None;
545 }
546
547 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
548
549 let extracted_requirement = match version {
550 JsonValue::String(s) => Some(s.clone()),
551 JsonValue::Number(n) => Some(n.to_string()),
552 _ => None,
553 };
554
555 Some(Dependency {
556 purl,
557 extracted_requirement,
558 scope: Some(scope.to_string()),
559 is_runtime: Some(is_runtime),
560 is_optional: Some(is_optional),
561 is_pinned: None,
562 is_direct: Some(true),
563 resolved_package: None,
564 extra_data: None,
565 })
566 })
567 .collect()
568}
569
570fn extract_yaml_dependency_group(
571 deps: &serde_yaml::Mapping,
572 scope: &str,
573 is_runtime: bool,
574 is_optional: bool,
575) -> Vec<Dependency> {
576 deps.iter()
577 .filter_map(|(key, value)| {
578 let name = key.as_str()?;
579
580 if name == "perl" {
582 return None;
583 }
584
585 let purl = PackageUrl::new("cpan", name).ok().map(|p| p.to_string());
586
587 let extracted_requirement = match value {
588 YamlValue::String(s) => Some(s.clone()),
589 YamlValue::Number(n) => Some(n.to_string()),
590 _ => None,
591 };
592
593 Some(Dependency {
594 purl,
595 extracted_requirement,
596 scope: Some(scope.to_string()),
597 is_runtime: Some(is_runtime),
598 is_optional: Some(is_optional),
599 is_pinned: None,
600 is_direct: Some(true),
601 resolved_package: None,
602 extra_data: None,
603 })
604 })
605 .collect()
606}
607
608crate::register_parser!(
609 "CPAN Perl META.json",
610 &["**/META.json"],
611 "cpan",
612 "Perl",
613 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
614);
615
616crate::register_parser!(
617 "CPAN Perl META.yml",
618 &["**/META.yml"],
619 "cpan",
620 "Perl",
621 Some("https://metacpan.org/pod/CPAN::Meta::Spec"),
622);
623
624crate::register_parser!(
625 "CPAN Perl MANIFEST",
626 &["**/MANIFEST"],
627 "cpan",
628 "Perl",
629 Some("https://metacpan.org/pod/Module::Manifest"),
630);