1use std::collections::HashMap;
19use std::path::Path;
20use std::sync::LazyLock;
21
22use crate::parser_warn as warn;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24use packageurl::PackageUrl;
25use regex::Regex;
26use serde_json::json;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
29
30use super::PackageParser;
31use super::license_normalization::{
32 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
33 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
34};
35
36static RE_WRITEMAKEFILE: LazyLock<Regex> = LazyLock::new(|| {
37 Regex::new(r"WriteMakefile1?\s*\(").expect("valid regex: WriteMakefile call pattern")
38});
39static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
40 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
41 .expect("valid regex: simple key=>value pattern")
42});
43static RE_HASH_BLOCK: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").expect("valid regex: hash block pattern")
45});
46static RE_AUTHOR_ARRAY: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").expect("valid regex: AUTHOR array pattern")
48});
49static RE_QUOTED_STRING: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(r#"['"]([^'"]*)['"']"#).expect("valid regex: quoted string pattern")
51});
52static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
53 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#)
54 .expect("valid regex: dependency pair pattern")
55});
56static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
57 Regex::new(
58 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
59 )
60 .expect("valid regex: VERSION assignment pattern")
61});
62
63const PACKAGE_TYPE: PackageType = PackageType::Cpan;
64const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
65
66pub struct CpanMakefilePlParser;
67
68impl PackageParser for CpanMakefilePlParser {
69 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
70
71 fn is_match(path: &Path) -> bool {
72 path.file_name().is_some_and(|name| name == "Makefile.PL")
73 }
74
75 fn extract_packages(path: &Path) -> Vec<PackageData> {
76 let content = match read_file_to_string(path, None) {
77 Ok(c) => c,
78 Err(e) => {
79 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
80 return vec![PackageData {
81 package_type: Some(PACKAGE_TYPE),
82 primary_language: Some("Perl".to_string()),
83 datasource_id: Some(DatasourceId::CpanMakefile),
84 ..Default::default()
85 }];
86 }
87 };
88
89 vec![parse_makefile_pl_with_base(&content, path.parent())]
90 }
91}
92
93#[cfg(test)]
94pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
95 parse_makefile_pl_with_base(content, None)
96}
97
98pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
99 let makefile_block = extract_writemakefile_block(content);
101 if makefile_block.is_empty() {
102 return default_package_data();
103 }
104
105 let fields = parse_hash_fields(&makefile_block);
106
107 let name = fields.get("NAME").and_then(|n| sanitize_scalar_field(n));
108 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
109
110 let version = fields
111 .get("VERSION")
112 .and_then(|v| sanitize_scalar_field(v))
113 .or_else(|| resolved_metadata.version.clone());
114 let description = fields
115 .get("ABSTRACT")
116 .and_then(|d| sanitize_scalar_field(d))
117 .or_else(|| resolved_metadata.abstract_text.clone());
118 let extracted_license_statement = fields.get("LICENSE").and_then(|l| sanitize_scalar_field(l));
119 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
120 extracted_license_statement
121 .as_deref()
122 .and_then(normalize_cpan_makefile_license)
123 .map(|normalized| {
124 build_declared_license_data(
125 normalized,
126 DeclaredLicenseMatchMetadata::single_line(
127 extracted_license_statement.as_deref().unwrap_or_default(),
128 ),
129 )
130 })
131 .unwrap_or_else(empty_declared_license_data);
132
133 let parties = parse_author(&fields);
134 let dependencies = parse_dependencies(&fields);
135
136 let mut extra_data = HashMap::new();
137 if let Some(min_perl) = fields
138 .get("MIN_PERL_VERSION")
139 .and_then(|value| sanitize_scalar_field(value))
140 {
141 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
142 }
143 if let Some(version_from) = fields
144 .get("VERSION_FROM")
145 .and_then(|value| sanitize_scalar_field(value))
146 {
147 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
148 }
149 if let Some(abstract_from) = fields
150 .get("ABSTRACT_FROM")
151 .and_then(|value| sanitize_scalar_field(value))
152 {
153 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
154 }
155
156 let purl = name.as_ref().and_then(|n| {
158 let purl_name = n.replace("::", "-");
159 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
160 if let Some(v) = &version {
161 let _ = p.with_version(v).ok();
162 }
163 p.to_string()
164 })
165 });
166
167 PackageData {
168 package_type: Some(PACKAGE_TYPE),
169 namespace: Some("cpan".to_string()),
170 name,
171 version,
172 description,
173 declared_license_expression,
174 declared_license_expression_spdx,
175 license_detections,
176 extracted_license_statement,
177 parties,
178 dependencies,
179 extra_data: if extra_data.is_empty() {
180 None
181 } else {
182 Some(extra_data)
183 },
184 purl,
185 datasource_id: Some(DatasourceId::CpanMakefile),
186 primary_language: Some("Perl".to_string()),
187 ..Default::default()
188 }
189}
190
191#[derive(Default)]
192struct ResolvedMetadata {
193 version: Option<String>,
194 abstract_text: Option<String>,
195}
196
197fn default_package_data() -> PackageData {
198 PackageData {
199 package_type: Some(PACKAGE_TYPE),
200 primary_language: Some("Perl".to_string()),
201 datasource_id: Some(DatasourceId::CpanMakefile),
202 ..Default::default()
203 }
204}
205
206fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
207 match value.trim() {
208 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
209 "gpl-1.0-plus OR artistic-perl-1.0",
210 "GPL-1.0-or-later OR Artistic-1.0-Perl",
211 )),
212 "artistic_2" => Some(NormalizedDeclaredLicense::new(
213 "artistic-2.0",
214 "Artistic-2.0",
215 )),
216 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
217 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
218 }
219}
220
221fn sanitize_scalar_field(value: &str) -> Option<String> {
222 let trimmed = value.trim();
223 if trimmed.is_empty() || looks_like_unresolved_template_value(trimmed) {
224 return None;
225 }
226
227 Some(truncate_field(trimmed.to_string()))
228}
229
230fn looks_like_unresolved_template_value(value: &str) -> bool {
231 let trimmed = value.trim();
232 let uppercase = trimmed.to_ascii_uppercase();
233
234 trimmed.contains("[%")
235 || trimmed.contains("%]")
236 || trimmed.contains("<%")
237 || trimmed.contains("%>")
238 || (trimmed.contains("{{") && trimmed.contains("}}"))
239 || trimmed.contains("${{")
240 || trimmed.contains("[d2%")
241 || trimmed.contains("%2d]")
242 || matches!(
243 uppercase.as_str(),
244 "YOUR NAME" | "YOUR APPLICATION ABSTRACT" | "YOUREMAIL@EXAMPLE.COM"
245 )
246}
247
248fn resolve_referenced_metadata(
249 fields: &HashMap<String, String>,
250 base_dir: Option<&Path>,
251) -> ResolvedMetadata {
252 let Some(base_dir) = base_dir else {
253 return ResolvedMetadata::default();
254 };
255
256 let mut resolved = ResolvedMetadata::default();
257 let mut cache: HashMap<String, Option<String>> = HashMap::new();
258
259 if let Some(version_from) = fields.get("VERSION_FROM")
260 && !looks_like_unresolved_template_value(version_from)
261 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
262 {
263 resolved.version = extract_version_from_module_content(content);
264 }
265
266 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
267 && !looks_like_unresolved_template_value(abstract_from)
268 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
269 {
270 resolved.abstract_text = extract_abstract_from_module_content(content);
271 }
272
273 resolved
274}
275
276fn load_referenced_metadata_file<'a>(
277 base_dir: &Path,
278 relative_path: &str,
279 cache: &'a mut HashMap<String, Option<String>>,
280) -> Option<&'a String> {
281 let entry = cache
282 .entry(relative_path.to_string())
283 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
284 entry.as_ref()
285}
286
287fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
288 let ref_path = Path::new(relative_path);
289 if ref_path.is_absolute() {
290 return None;
291 }
292
293 let base_dir = base_dir.canonicalize().ok()?;
294 let candidate = base_dir.join(ref_path);
295 let canonical_candidate = candidate.canonicalize().ok()?;
296 if !canonical_candidate.starts_with(&base_dir) {
297 return None;
298 }
299
300 let metadata = std::fs::metadata(&canonical_candidate).ok()?;
301 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
302 return None;
303 }
304
305 read_file_to_string(&canonical_candidate, None).ok()
306}
307
308fn extract_version_from_module_content(content: &str) -> Option<String> {
309 RE_VERSION_ASSIGNMENT
310 .captures(content)
311 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
312 .map(|m| m.as_str().trim().to_string())
313 .map(truncate_field)
314 .filter(|value| !value.is_empty())
315}
316
317fn extract_abstract_from_module_content(content: &str) -> Option<String> {
318 let mut in_name_section = false;
319
320 for line in content.lines() {
321 let trimmed = line.trim();
322 if trimmed == "=head1 NAME" {
323 in_name_section = true;
324 continue;
325 }
326
327 if in_name_section {
328 if trimmed.starts_with('=') {
329 break;
330 }
331 if trimmed.is_empty() {
332 continue;
333 }
334
335 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
336 let abstract_text = abstract_text.trim();
337 if !abstract_text.is_empty() {
338 return Some(truncate_field(abstract_text.to_string()));
339 }
340 }
341 }
342 }
343
344 None
345}
346
347fn extract_writemakefile_block(content: &str) -> String {
348 let start_match = match RE_WRITEMAKEFILE.find(content) {
349 Some(m) => m,
350 None => return String::new(),
351 };
352
353 let start_pos = start_match.end();
354 let content_from_start = &content[start_pos..];
355
356 let mut depth = 1;
358 let mut end_pos = 0;
359 let chars: Vec<char> = content_from_start.chars().collect();
360
361 for (i, &ch) in chars.iter().enumerate() {
362 if i >= MAX_ITERATION_COUNT {
363 break;
364 }
365 match ch {
366 '(' => depth += 1,
367 ')' => {
368 depth -= 1;
369 if depth == 0 {
370 end_pos = i;
371 break;
372 }
373 }
374 _ => {}
375 }
376 }
377
378 if end_pos > 0 {
379 content_from_start[..end_pos].to_string()
380 } else {
381 String::new()
382 }
383}
384
385fn parse_hash_fields(content: &str) -> HashMap<String, String> {
386 let mut fields = HashMap::new();
387
388 for cap in RE_SIMPLE_KV
389 .captures_iter(content)
390 .take(MAX_ITERATION_COUNT)
391 {
392 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
393 let value = cap
394 .get(2)
395 .or_else(|| cap.get(3))
396 .or_else(|| cap.get(4))
397 .or_else(|| cap.get(5))
398 .map(|m| m.as_str().to_string());
399
400 if let Some(v) = value {
401 fields.insert(key, v);
402 }
403 }
404
405 parse_hash_dependencies(content, &mut fields);
407
408 parse_author_array(content, &mut fields);
410
411 fields
412}
413
414fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
415 for cap in RE_HASH_BLOCK
416 .captures_iter(content)
417 .take(MAX_ITERATION_COUNT)
418 {
419 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("");
420 let hash_content = cap.get(2).map(|m| m.as_str()).unwrap_or("");
421
422 if matches!(
425 key,
426 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
427 ) {
428 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
429 }
430 }
431}
432
433fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
434 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
435 let array_content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
436
437 let authors: Vec<String> = RE_QUOTED_STRING
438 .captures_iter(array_content)
439 .take(MAX_ITERATION_COUNT)
440 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
441 .collect();
442
443 if !authors.is_empty() {
444 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
446 }
447 }
448}
449
450fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
451 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
453 return authors_str
454 .split("||")
455 .filter_map(|author_str| {
456 if author_str.trim().is_empty() {
457 return None;
458 }
459 let (name, email) = parse_author_string(author_str);
460 build_author_party(name, email)
461 })
462 .collect();
463 }
464
465 if let Some(author_str) = fields.get("AUTHOR") {
466 let (name, email) = parse_author_string(author_str);
467 return build_author_party(name, email).into_iter().collect();
468 }
469
470 Vec::new()
471}
472
473fn build_author_party(name: Option<String>, email: Option<String>) -> Option<Party> {
474 if name.is_none() && email.is_none() {
475 return None;
476 }
477
478 Some(Party {
479 role: Some("author".to_string()),
480 name,
481 email,
482 r#type: Some("person".to_string()),
483 url: None,
484 organization: None,
485 organization_url: None,
486 timezone: None,
487 })
488}
489
490fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
491 if let Some(start) = s.find('<')
492 && let Some(end) = s.find('>')
493 && start < end
494 {
495 let name = s[..start].trim();
496 let email = s[start + 1..end].trim();
497 return (sanitize_scalar_field(name), sanitize_scalar_field(email));
498 }
499 (sanitize_scalar_field(s), None)
500}
501
502fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
503 let mut dependencies = Vec::new();
504
505 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
507 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
508 }
509
510 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
512 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
513 }
514
515 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
517 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
518 }
519
520 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
522 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
523 }
524
525 dependencies
526}
527
528fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
529 let mut deps = Vec::new();
530
531 for cap in RE_DEP_PAIR
532 .captures_iter(hash_content)
533 .take(MAX_ITERATION_COUNT)
534 {
535 let module_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
536
537 if module_name == "perl" {
539 continue;
540 }
541
542 let version = cap
543 .get(2)
544 .or_else(|| cap.get(3))
545 .or_else(|| cap.get(4))
546 .map(|m| m.as_str());
547
548 let extracted_requirement = match version {
549 Some("0") | Some("") | None => None,
550 Some(v) => Some(truncate_field(v.to_string())),
551 };
552
553 let purl = PackageUrl::new("cpan", module_name)
554 .ok()
555 .map(|p| p.to_string());
556
557 deps.push(Dependency {
558 purl,
559 extracted_requirement,
560 scope: Some(truncate_field(scope.to_string())),
561 is_runtime: Some(is_runtime),
562 is_optional: Some(false),
563 is_pinned: None,
564 is_direct: Some(true),
565 resolved_package: None,
566 extra_data: None,
567 });
568 }
569
570 deps
571}
572
573crate::register_parser!(
574 "CPAN Perl Makefile.PL",
575 &["*/Makefile.PL"],
576 "cpan",
577 "Perl",
578 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
579);