1use std::collections::HashMap;
16use std::path::Path;
17use std::sync::LazyLock;
18
19use crate::parser_warn as warn;
20use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28use super::license_normalization::{
29 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
30 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
31};
32
33static RE_WRITEMAKEFILE: LazyLock<Regex> =
34 LazyLock::new(|| Regex::new(r"WriteMakefile1?\s*\(").unwrap());
35static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
36 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
37 .unwrap()
38});
39static RE_HASH_BLOCK: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").unwrap());
41static RE_AUTHOR_ARRAY: LazyLock<Regex> =
42 LazyLock::new(|| Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").unwrap());
43static RE_QUOTED_STRING: LazyLock<Regex> =
44 LazyLock::new(|| Regex::new(r#"['"]([^'"]*)['"']"#).unwrap());
45static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
46 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#).unwrap()
47});
48static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
51 )
52 .unwrap()
53});
54
55const PACKAGE_TYPE: PackageType = PackageType::Cpan;
56const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
57
58pub struct CpanMakefilePlParser;
59
60impl PackageParser for CpanMakefilePlParser {
61 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
62
63 fn is_match(path: &Path) -> bool {
64 path.file_name().is_some_and(|name| name == "Makefile.PL")
65 }
66
67 fn extract_packages(path: &Path) -> Vec<PackageData> {
68 let content = match read_file_to_string(path, None) {
69 Ok(c) => c,
70 Err(e) => {
71 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
72 return vec![PackageData {
73 package_type: Some(PACKAGE_TYPE),
74 primary_language: Some("Perl".to_string()),
75 datasource_id: Some(DatasourceId::CpanMakefile),
76 ..Default::default()
77 }];
78 }
79 };
80
81 vec![parse_makefile_pl_with_base(&content, path.parent())]
82 }
83}
84
85#[cfg(test)]
86pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
87 parse_makefile_pl_with_base(content, None)
88}
89
90pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
91 let makefile_block = extract_writemakefile_block(content);
93 if makefile_block.is_empty() {
94 return default_package_data();
95 }
96
97 let fields = parse_hash_fields(&makefile_block);
98
99 let name = fields.get("NAME").map(|n| truncate_field(n.to_string()));
100 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
101
102 let version = fields
103 .get("VERSION")
104 .map(|v| truncate_field(v.to_string()))
105 .or_else(|| resolved_metadata.version.clone());
106 let description = fields
107 .get("ABSTRACT")
108 .map(|d| truncate_field(d.to_string()))
109 .or_else(|| resolved_metadata.abstract_text.clone());
110 let extracted_license_statement = fields.get("LICENSE").map(|l| truncate_field(l.to_string()));
111 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
112 extracted_license_statement
113 .as_deref()
114 .and_then(normalize_cpan_makefile_license)
115 .map(|normalized| {
116 build_declared_license_data(
117 normalized,
118 DeclaredLicenseMatchMetadata::single_line(
119 extracted_license_statement.as_deref().unwrap_or_default(),
120 ),
121 )
122 })
123 .unwrap_or_else(empty_declared_license_data);
124
125 let parties = parse_author(&fields);
126 let dependencies = parse_dependencies(&fields);
127
128 let mut extra_data = HashMap::new();
129 if let Some(min_perl) = fields.get("MIN_PERL_VERSION") {
130 extra_data.insert(
131 "MIN_PERL_VERSION".to_string(),
132 json!(truncate_field(min_perl.to_string())),
133 );
134 }
135 if let Some(version_from) = fields.get("VERSION_FROM") {
136 extra_data.insert(
137 "VERSION_FROM".to_string(),
138 json!(truncate_field(version_from.to_string())),
139 );
140 }
141 if let Some(abstract_from) = fields.get("ABSTRACT_FROM") {
142 extra_data.insert(
143 "ABSTRACT_FROM".to_string(),
144 json!(truncate_field(abstract_from.to_string())),
145 );
146 }
147
148 let purl = name.as_ref().and_then(|n| {
150 let purl_name = n.replace("::", "-");
151 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
152 if let Some(v) = &version {
153 let _ = p.with_version(v).ok();
154 }
155 p.to_string()
156 })
157 });
158
159 PackageData {
160 package_type: Some(PACKAGE_TYPE),
161 namespace: Some("cpan".to_string()),
162 name,
163 version,
164 description,
165 declared_license_expression,
166 declared_license_expression_spdx,
167 license_detections,
168 extracted_license_statement,
169 parties,
170 dependencies,
171 extra_data: if extra_data.is_empty() {
172 None
173 } else {
174 Some(extra_data)
175 },
176 purl,
177 datasource_id: Some(DatasourceId::CpanMakefile),
178 primary_language: Some("Perl".to_string()),
179 ..Default::default()
180 }
181}
182
183#[derive(Default)]
184struct ResolvedMetadata {
185 version: Option<String>,
186 abstract_text: Option<String>,
187}
188
189fn default_package_data() -> PackageData {
190 PackageData {
191 package_type: Some(PACKAGE_TYPE),
192 primary_language: Some("Perl".to_string()),
193 datasource_id: Some(DatasourceId::CpanMakefile),
194 ..Default::default()
195 }
196}
197
198fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
199 match value.trim() {
200 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
201 "gpl-1.0-plus OR artistic-perl-1.0",
202 "GPL-1.0-or-later OR Artistic-1.0-Perl",
203 )),
204 "artistic_2" => Some(NormalizedDeclaredLicense::new(
205 "artistic-2.0",
206 "Artistic-2.0",
207 )),
208 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
209 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
210 }
211}
212
213fn resolve_referenced_metadata(
214 fields: &HashMap<String, String>,
215 base_dir: Option<&Path>,
216) -> ResolvedMetadata {
217 let Some(base_dir) = base_dir else {
218 return ResolvedMetadata::default();
219 };
220
221 let mut resolved = ResolvedMetadata::default();
222 let mut cache: HashMap<String, Option<String>> = HashMap::new();
223
224 if let Some(version_from) = fields.get("VERSION_FROM")
225 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
226 {
227 resolved.version = extract_version_from_module_content(content);
228 }
229
230 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
231 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
232 {
233 resolved.abstract_text = extract_abstract_from_module_content(content);
234 }
235
236 resolved
237}
238
239fn load_referenced_metadata_file<'a>(
240 base_dir: &Path,
241 relative_path: &str,
242 cache: &'a mut HashMap<String, Option<String>>,
243) -> Option<&'a String> {
244 let entry = cache
245 .entry(relative_path.to_string())
246 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
247 entry.as_ref()
248}
249
250fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
251 let ref_path = Path::new(relative_path);
252 if ref_path.is_absolute() {
253 return None;
254 }
255
256 let base_dir = base_dir.canonicalize().ok()?;
257 let candidate = base_dir.join(ref_path);
258 let canonical_candidate = candidate.canonicalize().ok()?;
259 if !canonical_candidate.starts_with(&base_dir) {
260 return None;
261 }
262
263 let metadata = std::fs::metadata(&canonical_candidate).ok()?;
264 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
265 return None;
266 }
267
268 read_file_to_string(&canonical_candidate, None).ok()
269}
270
271fn extract_version_from_module_content(content: &str) -> Option<String> {
272 RE_VERSION_ASSIGNMENT
273 .captures(content)
274 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
275 .map(|m| m.as_str().trim().to_string())
276 .map(truncate_field)
277 .filter(|value| !value.is_empty())
278}
279
280fn extract_abstract_from_module_content(content: &str) -> Option<String> {
281 let mut in_name_section = false;
282
283 for line in content.lines() {
284 let trimmed = line.trim();
285 if trimmed == "=head1 NAME" {
286 in_name_section = true;
287 continue;
288 }
289
290 if in_name_section {
291 if trimmed.starts_with('=') {
292 break;
293 }
294 if trimmed.is_empty() {
295 continue;
296 }
297
298 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
299 let abstract_text = abstract_text.trim();
300 if !abstract_text.is_empty() {
301 return Some(truncate_field(abstract_text.to_string()));
302 }
303 }
304 }
305 }
306
307 None
308}
309
310fn extract_writemakefile_block(content: &str) -> String {
311 let start_match = match RE_WRITEMAKEFILE.find(content) {
312 Some(m) => m,
313 None => return String::new(),
314 };
315
316 let start_pos = start_match.end();
317 let content_from_start = &content[start_pos..];
318
319 let mut depth = 1;
321 let mut end_pos = 0;
322 let chars: Vec<char> = content_from_start.chars().collect();
323
324 for (i, &ch) in chars.iter().enumerate() {
325 if i >= MAX_ITERATION_COUNT {
326 break;
327 }
328 match ch {
329 '(' => depth += 1,
330 ')' => {
331 depth -= 1;
332 if depth == 0 {
333 end_pos = i;
334 break;
335 }
336 }
337 _ => {}
338 }
339 }
340
341 if end_pos > 0 {
342 content_from_start[..end_pos].to_string()
343 } else {
344 String::new()
345 }
346}
347
348fn parse_hash_fields(content: &str) -> HashMap<String, String> {
349 let mut fields = HashMap::new();
350
351 for cap in RE_SIMPLE_KV
352 .captures_iter(content)
353 .take(MAX_ITERATION_COUNT)
354 {
355 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
356 let value = cap
357 .get(2)
358 .or_else(|| cap.get(3))
359 .or_else(|| cap.get(4))
360 .or_else(|| cap.get(5))
361 .map(|m| m.as_str().to_string());
362
363 if let Some(v) = value {
364 fields.insert(key, v);
365 }
366 }
367
368 parse_hash_dependencies(content, &mut fields);
370
371 parse_author_array(content, &mut fields);
373
374 fields
375}
376
377fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
378 for cap in RE_HASH_BLOCK
379 .captures_iter(content)
380 .take(MAX_ITERATION_COUNT)
381 {
382 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("");
383 let hash_content = cap.get(2).map(|m| m.as_str()).unwrap_or("");
384
385 if matches!(
388 key,
389 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
390 ) {
391 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
392 }
393 }
394}
395
396fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
397 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
398 let array_content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
399
400 let authors: Vec<String> = RE_QUOTED_STRING
401 .captures_iter(array_content)
402 .take(MAX_ITERATION_COUNT)
403 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
404 .collect();
405
406 if !authors.is_empty() {
407 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
409 }
410 }
411}
412
413fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
414 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
416 return authors_str
417 .split("||")
418 .filter_map(|author_str| {
419 if author_str.trim().is_empty() {
420 return None;
421 }
422 let (name, email) = parse_author_string(author_str);
423 Some(Party {
424 role: Some("author".to_string()),
425 name,
426 email,
427 r#type: Some("person".to_string()),
428 url: None,
429 organization: None,
430 organization_url: None,
431 timezone: None,
432 })
433 })
434 .collect();
435 }
436
437 if let Some(author_str) = fields.get("AUTHOR") {
438 let (name, email) = parse_author_string(author_str);
439 return vec![Party {
440 role: Some("author".to_string()),
441 name,
442 email,
443 r#type: Some("person".to_string()),
444 url: None,
445 organization: None,
446 organization_url: None,
447 timezone: None,
448 }];
449 }
450
451 Vec::new()
452}
453
454fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
455 if let Some(start) = s.find('<')
456 && let Some(end) = s.find('>')
457 && start < end
458 {
459 let name = s[..start].trim();
460 let email = s[start + 1..end].trim();
461 return (
462 if name.is_empty() {
463 None
464 } else {
465 Some(truncate_field(name.to_string()))
466 },
467 if email.is_empty() {
468 None
469 } else {
470 Some(truncate_field(email.to_string()))
471 },
472 );
473 }
474 (Some(truncate_field(s.trim().to_string())), None)
475}
476
477fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
478 let mut dependencies = Vec::new();
479
480 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
482 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
483 }
484
485 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
487 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
488 }
489
490 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
492 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
493 }
494
495 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
497 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
498 }
499
500 dependencies
501}
502
503fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
504 let mut deps = Vec::new();
505
506 for cap in RE_DEP_PAIR
507 .captures_iter(hash_content)
508 .take(MAX_ITERATION_COUNT)
509 {
510 let module_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
511
512 if module_name == "perl" {
514 continue;
515 }
516
517 let version = cap
518 .get(2)
519 .or_else(|| cap.get(3))
520 .or_else(|| cap.get(4))
521 .map(|m| m.as_str());
522
523 let extracted_requirement = match version {
524 Some("0") | Some("") | None => None,
525 Some(v) => Some(truncate_field(v.to_string())),
526 };
527
528 let purl = PackageUrl::new("cpan", module_name)
529 .ok()
530 .map(|p| p.to_string());
531
532 deps.push(Dependency {
533 purl,
534 extracted_requirement,
535 scope: Some(truncate_field(scope.to_string())),
536 is_runtime: Some(is_runtime),
537 is_optional: Some(false),
538 is_pinned: None,
539 is_direct: Some(true),
540 resolved_package: None,
541 extra_data: None,
542 });
543 }
544
545 deps
546}
547
548crate::register_parser!(
549 "CPAN Perl Makefile.PL",
550 &["*/Makefile.PL"],
551 "cpan",
552 "Perl",
553 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
554);