1use std::collections::HashMap;
19use std::path::Path;
20use std::sync::LazyLock;
21
22use crate::parser_warn as warn;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24use packageurl::PackageUrl;
25use regex::Regex;
26use serde_json::json;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
29
30use super::PackageParser;
31use super::license_normalization::{
32 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
33 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
34};
35
36static RE_WRITEMAKEFILE: LazyLock<Regex> = LazyLock::new(|| {
37 Regex::new(r"WriteMakefile1?\s*\(").expect("valid regex: WriteMakefile call pattern")
38});
39static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
40 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
41 .expect("valid regex: simple key=>value pattern")
42});
43static RE_HASH_BLOCK: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").expect("valid regex: hash block pattern")
45});
46static RE_AUTHOR_ARRAY: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").expect("valid regex: AUTHOR array pattern")
48});
49static RE_QUOTED_STRING: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(r#"['"]([^'"]*)['"']"#).expect("valid regex: quoted string pattern")
51});
52static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
53 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#)
54 .expect("valid regex: dependency pair pattern")
55});
56static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
57 Regex::new(
58 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
59 )
60 .expect("valid regex: VERSION assignment pattern")
61});
62
63const PACKAGE_TYPE: PackageType = PackageType::Cpan;
64const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
65
66pub struct CpanMakefilePlParser;
67
68impl PackageParser for CpanMakefilePlParser {
69 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
70
71 fn is_match(path: &Path) -> bool {
72 path.file_name().is_some_and(|name| name == "Makefile.PL")
73 }
74
75 fn extract_packages(path: &Path) -> Vec<PackageData> {
76 let content = match read_file_to_string(path, None) {
77 Ok(c) => c,
78 Err(e) => {
79 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
80 return vec![PackageData {
81 package_type: Some(PACKAGE_TYPE),
82 primary_language: Some("Perl".to_string()),
83 datasource_id: Some(DatasourceId::CpanMakefile),
84 ..Default::default()
85 }];
86 }
87 };
88
89 vec![parse_makefile_pl_with_base(&content, path.parent())]
90 }
91
92 fn metadata() -> Vec<super::metadata::ParserMetadata> {
93 vec![super::metadata::ParserMetadata {
94 description: "CPAN Perl Makefile.PL",
95 file_patterns: &["*/Makefile.PL"],
96 package_type: "cpan",
97 primary_language: "Perl",
98 documentation_url: Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
99 }]
100 }
101}
102
103#[cfg(test)]
104pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
105 parse_makefile_pl_with_base(content, None)
106}
107
108pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
109 let makefile_block = extract_writemakefile_block(content);
111 if makefile_block.is_empty() {
112 return default_package_data();
113 }
114
115 let fields = parse_hash_fields(&makefile_block);
116
117 let name = fields.get("NAME").and_then(|n| sanitize_scalar_field(n));
118 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
119
120 let version = fields
121 .get("VERSION")
122 .and_then(|v| sanitize_scalar_field(v))
123 .or_else(|| resolved_metadata.version.clone());
124 let description = fields
125 .get("ABSTRACT")
126 .and_then(|d| sanitize_scalar_field(d))
127 .or_else(|| resolved_metadata.abstract_text.clone());
128 let extracted_license_statement = fields.get("LICENSE").and_then(|l| sanitize_scalar_field(l));
129 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
130 extracted_license_statement
131 .as_deref()
132 .and_then(normalize_cpan_makefile_license)
133 .map(|normalized| {
134 build_declared_license_data(
135 normalized,
136 DeclaredLicenseMatchMetadata::single_line(
137 extracted_license_statement.as_deref().unwrap_or_default(),
138 ),
139 )
140 })
141 .unwrap_or_else(empty_declared_license_data);
142
143 let parties = parse_author(&fields);
144 let dependencies = parse_dependencies(&fields);
145
146 let mut extra_data = HashMap::new();
147 if let Some(min_perl) = fields
148 .get("MIN_PERL_VERSION")
149 .and_then(|value| sanitize_scalar_field(value))
150 {
151 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
152 }
153 if let Some(version_from) = fields
154 .get("VERSION_FROM")
155 .and_then(|value| sanitize_scalar_field(value))
156 {
157 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
158 }
159 if let Some(abstract_from) = fields
160 .get("ABSTRACT_FROM")
161 .and_then(|value| sanitize_scalar_field(value))
162 {
163 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
164 }
165
166 let purl = name.as_ref().and_then(|n| {
168 let purl_name = n.replace("::", "-");
169 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
170 if let Some(v) = &version {
171 let _ = p.with_version(v).ok();
172 }
173 p.to_string()
174 })
175 });
176
177 PackageData {
178 package_type: Some(PACKAGE_TYPE),
179 namespace: Some("cpan".to_string()),
180 name,
181 version,
182 description,
183 declared_license_expression,
184 declared_license_expression_spdx,
185 license_detections,
186 extracted_license_statement,
187 parties,
188 dependencies,
189 extra_data: if extra_data.is_empty() {
190 None
191 } else {
192 Some(extra_data)
193 },
194 purl,
195 datasource_id: Some(DatasourceId::CpanMakefile),
196 primary_language: Some("Perl".to_string()),
197 ..Default::default()
198 }
199}
200
201#[derive(Default)]
202struct ResolvedMetadata {
203 version: Option<String>,
204 abstract_text: Option<String>,
205}
206
207fn default_package_data() -> PackageData {
208 PackageData {
209 package_type: Some(PACKAGE_TYPE),
210 primary_language: Some("Perl".to_string()),
211 datasource_id: Some(DatasourceId::CpanMakefile),
212 ..Default::default()
213 }
214}
215
216fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
217 match value.trim() {
218 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
219 "gpl-1.0-plus OR artistic-perl-1.0",
220 "GPL-1.0-or-later OR Artistic-1.0-Perl",
221 )),
222 "artistic_2" => Some(NormalizedDeclaredLicense::new(
223 "artistic-2.0",
224 "Artistic-2.0",
225 )),
226 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
227 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
228 }
229}
230
231fn sanitize_scalar_field(value: &str) -> Option<String> {
232 let trimmed = value.trim();
233 if trimmed.is_empty() || looks_like_unresolved_template_value(trimmed) {
234 return None;
235 }
236
237 Some(truncate_field(trimmed.to_string()))
238}
239
240fn looks_like_unresolved_template_value(value: &str) -> bool {
241 let trimmed = value.trim();
242 let uppercase = trimmed.to_ascii_uppercase();
243
244 trimmed.contains("[%")
245 || trimmed.contains("%]")
246 || trimmed.contains("<%")
247 || trimmed.contains("%>")
248 || (trimmed.contains("{{") && trimmed.contains("}}"))
249 || trimmed.contains("${{")
250 || trimmed.contains("[d2%")
251 || trimmed.contains("%2d]")
252 || matches!(
253 uppercase.as_str(),
254 "YOUR NAME" | "YOUR APPLICATION ABSTRACT" | "YOUREMAIL@EXAMPLE.COM"
255 )
256}
257
258fn resolve_referenced_metadata(
259 fields: &HashMap<String, String>,
260 base_dir: Option<&Path>,
261) -> ResolvedMetadata {
262 let Some(base_dir) = base_dir else {
263 return ResolvedMetadata::default();
264 };
265
266 let mut resolved = ResolvedMetadata::default();
267 let mut cache: HashMap<String, Option<String>> = HashMap::new();
268
269 if let Some(version_from) = fields.get("VERSION_FROM")
270 && !looks_like_unresolved_template_value(version_from)
271 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
272 {
273 resolved.version = extract_version_from_module_content(content);
274 }
275
276 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
277 && !looks_like_unresolved_template_value(abstract_from)
278 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
279 {
280 resolved.abstract_text = extract_abstract_from_module_content(content);
281 }
282
283 resolved
284}
285
286fn load_referenced_metadata_file<'a>(
287 base_dir: &Path,
288 relative_path: &str,
289 cache: &'a mut HashMap<String, Option<String>>,
290) -> Option<&'a String> {
291 let entry = cache
292 .entry(relative_path.to_string())
293 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
294 entry.as_ref()
295}
296
297fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
298 let ref_path = Path::new(relative_path);
299 if ref_path.is_absolute() {
300 return None;
301 }
302
303 let base_dir = base_dir.canonicalize().ok()?;
304 let candidate = base_dir.join(ref_path);
305 let canonical_candidate = candidate.canonicalize().ok()?;
306 if !canonical_candidate.starts_with(&base_dir) {
307 return None;
308 }
309
310 let metadata = std::fs::metadata(&canonical_candidate).ok()?;
311 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
312 return None;
313 }
314
315 read_file_to_string(&canonical_candidate, None).ok()
316}
317
318fn extract_version_from_module_content(content: &str) -> Option<String> {
319 RE_VERSION_ASSIGNMENT
320 .captures(content)
321 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
322 .map(|m| m.as_str().trim().to_string())
323 .map(truncate_field)
324 .filter(|value| !value.is_empty())
325}
326
327fn extract_abstract_from_module_content(content: &str) -> Option<String> {
328 let mut in_name_section = false;
329
330 for line in content.lines() {
331 let trimmed = line.trim();
332 if trimmed == "=head1 NAME" {
333 in_name_section = true;
334 continue;
335 }
336
337 if in_name_section {
338 if trimmed.starts_with('=') {
339 break;
340 }
341 if trimmed.is_empty() {
342 continue;
343 }
344
345 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
346 let abstract_text = abstract_text.trim();
347 if !abstract_text.is_empty() {
348 return Some(truncate_field(abstract_text.to_string()));
349 }
350 }
351 }
352 }
353
354 None
355}
356
357fn extract_writemakefile_block(content: &str) -> String {
358 let start_match = match RE_WRITEMAKEFILE.find(content) {
359 Some(m) => m,
360 None => return String::new(),
361 };
362
363 let start_pos = start_match.end();
364 let content_from_start = &content[start_pos..];
365
366 let mut depth = 1;
368 let mut end_pos = 0;
369 let chars: Vec<char> = content_from_start.chars().collect();
370
371 for (i, &ch) in chars.iter().enumerate() {
372 if i >= MAX_ITERATION_COUNT {
373 break;
374 }
375 match ch {
376 '(' => depth += 1,
377 ')' => {
378 depth -= 1;
379 if depth == 0 {
380 end_pos = i;
381 break;
382 }
383 }
384 _ => {}
385 }
386 }
387
388 if end_pos > 0 {
389 content_from_start[..end_pos].to_string()
390 } else {
391 String::new()
392 }
393}
394
395fn parse_hash_fields(content: &str) -> HashMap<String, String> {
396 let mut fields = HashMap::new();
397
398 for cap in RE_SIMPLE_KV
399 .captures_iter(content)
400 .take(MAX_ITERATION_COUNT)
401 {
402 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
403 let value = cap
404 .get(2)
405 .or_else(|| cap.get(3))
406 .or_else(|| cap.get(4))
407 .or_else(|| cap.get(5))
408 .map(|m| m.as_str().to_string());
409
410 if let Some(v) = value {
411 fields.insert(key, v);
412 }
413 }
414
415 parse_hash_dependencies(content, &mut fields);
417
418 parse_author_array(content, &mut fields);
420
421 fields
422}
423
424fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
425 for cap in RE_HASH_BLOCK
426 .captures_iter(content)
427 .take(MAX_ITERATION_COUNT)
428 {
429 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("");
430 let hash_content = cap.get(2).map(|m| m.as_str()).unwrap_or("");
431
432 if matches!(
435 key,
436 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
437 ) {
438 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
439 }
440 }
441}
442
443fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
444 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
445 let array_content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
446
447 let authors: Vec<String> = RE_QUOTED_STRING
448 .captures_iter(array_content)
449 .take(MAX_ITERATION_COUNT)
450 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
451 .collect();
452
453 if !authors.is_empty() {
454 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
456 }
457 }
458}
459
460fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
461 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
463 return authors_str
464 .split("||")
465 .filter_map(|author_str| {
466 if author_str.trim().is_empty() {
467 return None;
468 }
469 let (name, email) = parse_author_string(author_str);
470 build_author_party(name, email)
471 })
472 .collect();
473 }
474
475 if let Some(author_str) = fields.get("AUTHOR") {
476 let (name, email) = parse_author_string(author_str);
477 return build_author_party(name, email).into_iter().collect();
478 }
479
480 Vec::new()
481}
482
483fn build_author_party(name: Option<String>, email: Option<String>) -> Option<Party> {
484 if name.is_none() && email.is_none() {
485 return None;
486 }
487
488 Some(Party {
489 role: Some("author".to_string()),
490 name,
491 email,
492 r#type: Some("person".to_string()),
493 url: None,
494 organization: None,
495 organization_url: None,
496 timezone: None,
497 })
498}
499
500fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
501 if let Some(start) = s.find('<')
502 && let Some(end) = s.find('>')
503 && start < end
504 {
505 let name = s[..start].trim();
506 let email = s[start + 1..end].trim();
507 return (sanitize_scalar_field(name), sanitize_scalar_field(email));
508 }
509 (sanitize_scalar_field(s), None)
510}
511
512fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
513 let mut dependencies = Vec::new();
514
515 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
517 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
518 }
519
520 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
522 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
523 }
524
525 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
527 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
528 }
529
530 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
532 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
533 }
534
535 dependencies
536}
537
538fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
539 let mut deps = Vec::new();
540
541 for cap in RE_DEP_PAIR
542 .captures_iter(hash_content)
543 .take(MAX_ITERATION_COUNT)
544 {
545 let module_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
546
547 if module_name == "perl" {
549 continue;
550 }
551
552 let version = cap
553 .get(2)
554 .or_else(|| cap.get(3))
555 .or_else(|| cap.get(4))
556 .map(|m| m.as_str());
557
558 let extracted_requirement = match version {
559 Some("0") | Some("") | None => None,
560 Some(v) => Some(truncate_field(v.to_string())),
561 };
562
563 let purl = PackageUrl::new("cpan", module_name)
564 .ok()
565 .map(|p| p.to_string());
566
567 deps.push(Dependency {
568 purl,
569 extracted_requirement,
570 scope: Some(truncate_field(scope.to_string())),
571 is_runtime: Some(is_runtime),
572 is_optional: Some(false),
573 is_pinned: None,
574 is_direct: Some(true),
575 resolved_package: None,
576 extra_data: None,
577 });
578 }
579
580 deps
581}