1use std::collections::HashMap;
16use std::path::Path;
17use std::sync::LazyLock;
18
19use crate::parser_warn as warn;
20use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28use super::license_normalization::{
29 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
30 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
31};
32
33static RE_WRITEMAKEFILE: LazyLock<Regex> = LazyLock::new(|| {
34 Regex::new(r"WriteMakefile1?\s*\(").expect("valid regex: WriteMakefile call pattern")
35});
36static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
37 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
38 .expect("valid regex: simple key=>value pattern")
39});
40static RE_HASH_BLOCK: LazyLock<Regex> = LazyLock::new(|| {
41 Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").expect("valid regex: hash block pattern")
42});
43static RE_AUTHOR_ARRAY: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").expect("valid regex: AUTHOR array pattern")
45});
46static RE_QUOTED_STRING: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r#"['"]([^'"]*)['"']"#).expect("valid regex: quoted string pattern")
48});
49static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#)
51 .expect("valid regex: dependency pair pattern")
52});
53static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
54 Regex::new(
55 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
56 )
57 .expect("valid regex: VERSION assignment pattern")
58});
59
60const PACKAGE_TYPE: PackageType = PackageType::Cpan;
61const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
62
63pub struct CpanMakefilePlParser;
64
65impl PackageParser for CpanMakefilePlParser {
66 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
67
68 fn is_match(path: &Path) -> bool {
69 path.file_name().is_some_and(|name| name == "Makefile.PL")
70 }
71
72 fn extract_packages(path: &Path) -> Vec<PackageData> {
73 let content = match read_file_to_string(path, None) {
74 Ok(c) => c,
75 Err(e) => {
76 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
77 return vec![PackageData {
78 package_type: Some(PACKAGE_TYPE),
79 primary_language: Some("Perl".to_string()),
80 datasource_id: Some(DatasourceId::CpanMakefile),
81 ..Default::default()
82 }];
83 }
84 };
85
86 vec![parse_makefile_pl_with_base(&content, path.parent())]
87 }
88}
89
90#[cfg(test)]
91pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
92 parse_makefile_pl_with_base(content, None)
93}
94
95pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
96 let makefile_block = extract_writemakefile_block(content);
98 if makefile_block.is_empty() {
99 return default_package_data();
100 }
101
102 let fields = parse_hash_fields(&makefile_block);
103
104 let name = fields.get("NAME").and_then(|n| sanitize_scalar_field(n));
105 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
106
107 let version = fields
108 .get("VERSION")
109 .and_then(|v| sanitize_scalar_field(v))
110 .or_else(|| resolved_metadata.version.clone());
111 let description = fields
112 .get("ABSTRACT")
113 .and_then(|d| sanitize_scalar_field(d))
114 .or_else(|| resolved_metadata.abstract_text.clone());
115 let extracted_license_statement = fields.get("LICENSE").and_then(|l| sanitize_scalar_field(l));
116 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
117 extracted_license_statement
118 .as_deref()
119 .and_then(normalize_cpan_makefile_license)
120 .map(|normalized| {
121 build_declared_license_data(
122 normalized,
123 DeclaredLicenseMatchMetadata::single_line(
124 extracted_license_statement.as_deref().unwrap_or_default(),
125 ),
126 )
127 })
128 .unwrap_or_else(empty_declared_license_data);
129
130 let parties = parse_author(&fields);
131 let dependencies = parse_dependencies(&fields);
132
133 let mut extra_data = HashMap::new();
134 if let Some(min_perl) = fields
135 .get("MIN_PERL_VERSION")
136 .and_then(|value| sanitize_scalar_field(value))
137 {
138 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
139 }
140 if let Some(version_from) = fields
141 .get("VERSION_FROM")
142 .and_then(|value| sanitize_scalar_field(value))
143 {
144 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
145 }
146 if let Some(abstract_from) = fields
147 .get("ABSTRACT_FROM")
148 .and_then(|value| sanitize_scalar_field(value))
149 {
150 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
151 }
152
153 let purl = name.as_ref().and_then(|n| {
155 let purl_name = n.replace("::", "-");
156 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
157 if let Some(v) = &version {
158 let _ = p.with_version(v).ok();
159 }
160 p.to_string()
161 })
162 });
163
164 PackageData {
165 package_type: Some(PACKAGE_TYPE),
166 namespace: Some("cpan".to_string()),
167 name,
168 version,
169 description,
170 declared_license_expression,
171 declared_license_expression_spdx,
172 license_detections,
173 extracted_license_statement,
174 parties,
175 dependencies,
176 extra_data: if extra_data.is_empty() {
177 None
178 } else {
179 Some(extra_data)
180 },
181 purl,
182 datasource_id: Some(DatasourceId::CpanMakefile),
183 primary_language: Some("Perl".to_string()),
184 ..Default::default()
185 }
186}
187
188#[derive(Default)]
189struct ResolvedMetadata {
190 version: Option<String>,
191 abstract_text: Option<String>,
192}
193
194fn default_package_data() -> PackageData {
195 PackageData {
196 package_type: Some(PACKAGE_TYPE),
197 primary_language: Some("Perl".to_string()),
198 datasource_id: Some(DatasourceId::CpanMakefile),
199 ..Default::default()
200 }
201}
202
203fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
204 match value.trim() {
205 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
206 "gpl-1.0-plus OR artistic-perl-1.0",
207 "GPL-1.0-or-later OR Artistic-1.0-Perl",
208 )),
209 "artistic_2" => Some(NormalizedDeclaredLicense::new(
210 "artistic-2.0",
211 "Artistic-2.0",
212 )),
213 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
214 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
215 }
216}
217
218fn sanitize_scalar_field(value: &str) -> Option<String> {
219 let trimmed = value.trim();
220 if trimmed.is_empty() || looks_like_unresolved_template_value(trimmed) {
221 return None;
222 }
223
224 Some(truncate_field(trimmed.to_string()))
225}
226
227fn looks_like_unresolved_template_value(value: &str) -> bool {
228 let trimmed = value.trim();
229 let uppercase = trimmed.to_ascii_uppercase();
230
231 trimmed.contains("[%")
232 || trimmed.contains("%]")
233 || trimmed.contains("<%")
234 || trimmed.contains("%>")
235 || (trimmed.contains("{{") && trimmed.contains("}}"))
236 || trimmed.contains("${{")
237 || trimmed.contains("[d2%")
238 || trimmed.contains("%2d]")
239 || matches!(
240 uppercase.as_str(),
241 "YOUR NAME" | "YOUR APPLICATION ABSTRACT" | "YOUREMAIL@EXAMPLE.COM"
242 )
243}
244
245fn resolve_referenced_metadata(
246 fields: &HashMap<String, String>,
247 base_dir: Option<&Path>,
248) -> ResolvedMetadata {
249 let Some(base_dir) = base_dir else {
250 return ResolvedMetadata::default();
251 };
252
253 let mut resolved = ResolvedMetadata::default();
254 let mut cache: HashMap<String, Option<String>> = HashMap::new();
255
256 if let Some(version_from) = fields.get("VERSION_FROM")
257 && !looks_like_unresolved_template_value(version_from)
258 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
259 {
260 resolved.version = extract_version_from_module_content(content);
261 }
262
263 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
264 && !looks_like_unresolved_template_value(abstract_from)
265 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
266 {
267 resolved.abstract_text = extract_abstract_from_module_content(content);
268 }
269
270 resolved
271}
272
273fn load_referenced_metadata_file<'a>(
274 base_dir: &Path,
275 relative_path: &str,
276 cache: &'a mut HashMap<String, Option<String>>,
277) -> Option<&'a String> {
278 let entry = cache
279 .entry(relative_path.to_string())
280 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
281 entry.as_ref()
282}
283
284fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
285 let ref_path = Path::new(relative_path);
286 if ref_path.is_absolute() {
287 return None;
288 }
289
290 let base_dir = base_dir.canonicalize().ok()?;
291 let candidate = base_dir.join(ref_path);
292 let canonical_candidate = candidate.canonicalize().ok()?;
293 if !canonical_candidate.starts_with(&base_dir) {
294 return None;
295 }
296
297 let metadata = std::fs::metadata(&canonical_candidate).ok()?;
298 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
299 return None;
300 }
301
302 read_file_to_string(&canonical_candidate, None).ok()
303}
304
305fn extract_version_from_module_content(content: &str) -> Option<String> {
306 RE_VERSION_ASSIGNMENT
307 .captures(content)
308 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
309 .map(|m| m.as_str().trim().to_string())
310 .map(truncate_field)
311 .filter(|value| !value.is_empty())
312}
313
314fn extract_abstract_from_module_content(content: &str) -> Option<String> {
315 let mut in_name_section = false;
316
317 for line in content.lines() {
318 let trimmed = line.trim();
319 if trimmed == "=head1 NAME" {
320 in_name_section = true;
321 continue;
322 }
323
324 if in_name_section {
325 if trimmed.starts_with('=') {
326 break;
327 }
328 if trimmed.is_empty() {
329 continue;
330 }
331
332 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
333 let abstract_text = abstract_text.trim();
334 if !abstract_text.is_empty() {
335 return Some(truncate_field(abstract_text.to_string()));
336 }
337 }
338 }
339 }
340
341 None
342}
343
344fn extract_writemakefile_block(content: &str) -> String {
345 let start_match = match RE_WRITEMAKEFILE.find(content) {
346 Some(m) => m,
347 None => return String::new(),
348 };
349
350 let start_pos = start_match.end();
351 let content_from_start = &content[start_pos..];
352
353 let mut depth = 1;
355 let mut end_pos = 0;
356 let chars: Vec<char> = content_from_start.chars().collect();
357
358 for (i, &ch) in chars.iter().enumerate() {
359 if i >= MAX_ITERATION_COUNT {
360 break;
361 }
362 match ch {
363 '(' => depth += 1,
364 ')' => {
365 depth -= 1;
366 if depth == 0 {
367 end_pos = i;
368 break;
369 }
370 }
371 _ => {}
372 }
373 }
374
375 if end_pos > 0 {
376 content_from_start[..end_pos].to_string()
377 } else {
378 String::new()
379 }
380}
381
382fn parse_hash_fields(content: &str) -> HashMap<String, String> {
383 let mut fields = HashMap::new();
384
385 for cap in RE_SIMPLE_KV
386 .captures_iter(content)
387 .take(MAX_ITERATION_COUNT)
388 {
389 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
390 let value = cap
391 .get(2)
392 .or_else(|| cap.get(3))
393 .or_else(|| cap.get(4))
394 .or_else(|| cap.get(5))
395 .map(|m| m.as_str().to_string());
396
397 if let Some(v) = value {
398 fields.insert(key, v);
399 }
400 }
401
402 parse_hash_dependencies(content, &mut fields);
404
405 parse_author_array(content, &mut fields);
407
408 fields
409}
410
411fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
412 for cap in RE_HASH_BLOCK
413 .captures_iter(content)
414 .take(MAX_ITERATION_COUNT)
415 {
416 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("");
417 let hash_content = cap.get(2).map(|m| m.as_str()).unwrap_or("");
418
419 if matches!(
422 key,
423 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
424 ) {
425 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
426 }
427 }
428}
429
430fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
431 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
432 let array_content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
433
434 let authors: Vec<String> = RE_QUOTED_STRING
435 .captures_iter(array_content)
436 .take(MAX_ITERATION_COUNT)
437 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
438 .collect();
439
440 if !authors.is_empty() {
441 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
443 }
444 }
445}
446
447fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
448 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
450 return authors_str
451 .split("||")
452 .filter_map(|author_str| {
453 if author_str.trim().is_empty() {
454 return None;
455 }
456 let (name, email) = parse_author_string(author_str);
457 build_author_party(name, email)
458 })
459 .collect();
460 }
461
462 if let Some(author_str) = fields.get("AUTHOR") {
463 let (name, email) = parse_author_string(author_str);
464 return build_author_party(name, email).into_iter().collect();
465 }
466
467 Vec::new()
468}
469
470fn build_author_party(name: Option<String>, email: Option<String>) -> Option<Party> {
471 if name.is_none() && email.is_none() {
472 return None;
473 }
474
475 Some(Party {
476 role: Some("author".to_string()),
477 name,
478 email,
479 r#type: Some("person".to_string()),
480 url: None,
481 organization: None,
482 organization_url: None,
483 timezone: None,
484 })
485}
486
487fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
488 if let Some(start) = s.find('<')
489 && let Some(end) = s.find('>')
490 && start < end
491 {
492 let name = s[..start].trim();
493 let email = s[start + 1..end].trim();
494 return (sanitize_scalar_field(name), sanitize_scalar_field(email));
495 }
496 (sanitize_scalar_field(s), None)
497}
498
499fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
500 let mut dependencies = Vec::new();
501
502 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
504 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
505 }
506
507 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
509 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
510 }
511
512 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
514 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
515 }
516
517 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
519 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
520 }
521
522 dependencies
523}
524
525fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
526 let mut deps = Vec::new();
527
528 for cap in RE_DEP_PAIR
529 .captures_iter(hash_content)
530 .take(MAX_ITERATION_COUNT)
531 {
532 let module_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
533
534 if module_name == "perl" {
536 continue;
537 }
538
539 let version = cap
540 .get(2)
541 .or_else(|| cap.get(3))
542 .or_else(|| cap.get(4))
543 .map(|m| m.as_str());
544
545 let extracted_requirement = match version {
546 Some("0") | Some("") | None => None,
547 Some(v) => Some(truncate_field(v.to_string())),
548 };
549
550 let purl = PackageUrl::new("cpan", module_name)
551 .ok()
552 .map(|p| p.to_string());
553
554 deps.push(Dependency {
555 purl,
556 extracted_requirement,
557 scope: Some(truncate_field(scope.to_string())),
558 is_runtime: Some(is_runtime),
559 is_optional: Some(false),
560 is_pinned: None,
561 is_direct: Some(true),
562 resolved_package: None,
563 extra_data: None,
564 });
565 }
566
567 deps
568}
569
570crate::register_parser!(
571 "CPAN Perl Makefile.PL",
572 &["*/Makefile.PL"],
573 "cpan",
574 "Perl",
575 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
576);