1use std::collections::HashMap;
16use std::path::Path;
17use std::sync::LazyLock;
18
19use crate::parser_warn as warn;
20use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28use super::license_normalization::{
29 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
30 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
31};
32
33static RE_WRITEMAKEFILE: LazyLock<Regex> = LazyLock::new(|| {
34 Regex::new(r"WriteMakefile1?\s*\(").expect("valid regex: WriteMakefile call pattern")
35});
36static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
37 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
38 .expect("valid regex: simple key=>value pattern")
39});
40static RE_HASH_BLOCK: LazyLock<Regex> = LazyLock::new(|| {
41 Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").expect("valid regex: hash block pattern")
42});
43static RE_AUTHOR_ARRAY: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").expect("valid regex: AUTHOR array pattern")
45});
46static RE_QUOTED_STRING: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r#"['"]([^'"]*)['"']"#).expect("valid regex: quoted string pattern")
48});
49static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#)
51 .expect("valid regex: dependency pair pattern")
52});
53static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
54 Regex::new(
55 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
56 )
57 .expect("valid regex: VERSION assignment pattern")
58});
59
60const PACKAGE_TYPE: PackageType = PackageType::Cpan;
61const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
62
63pub struct CpanMakefilePlParser;
64
65impl PackageParser for CpanMakefilePlParser {
66 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
67
68 fn is_match(path: &Path) -> bool {
69 path.file_name().is_some_and(|name| name == "Makefile.PL")
70 }
71
72 fn extract_packages(path: &Path) -> Vec<PackageData> {
73 let content = match read_file_to_string(path, None) {
74 Ok(c) => c,
75 Err(e) => {
76 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
77 return vec![PackageData {
78 package_type: Some(PACKAGE_TYPE),
79 primary_language: Some("Perl".to_string()),
80 datasource_id: Some(DatasourceId::CpanMakefile),
81 ..Default::default()
82 }];
83 }
84 };
85
86 vec![parse_makefile_pl_with_base(&content, path.parent())]
87 }
88}
89
90#[cfg(test)]
91pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
92 parse_makefile_pl_with_base(content, None)
93}
94
95pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
96 let makefile_block = extract_writemakefile_block(content);
98 if makefile_block.is_empty() {
99 return default_package_data();
100 }
101
102 let fields = parse_hash_fields(&makefile_block);
103
104 let name = fields.get("NAME").map(|n| truncate_field(n.to_string()));
105 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
106
107 let version = fields
108 .get("VERSION")
109 .map(|v| truncate_field(v.to_string()))
110 .or_else(|| resolved_metadata.version.clone());
111 let description = fields
112 .get("ABSTRACT")
113 .map(|d| truncate_field(d.to_string()))
114 .or_else(|| resolved_metadata.abstract_text.clone());
115 let extracted_license_statement = fields.get("LICENSE").map(|l| truncate_field(l.to_string()));
116 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
117 extracted_license_statement
118 .as_deref()
119 .and_then(normalize_cpan_makefile_license)
120 .map(|normalized| {
121 build_declared_license_data(
122 normalized,
123 DeclaredLicenseMatchMetadata::single_line(
124 extracted_license_statement.as_deref().unwrap_or_default(),
125 ),
126 )
127 })
128 .unwrap_or_else(empty_declared_license_data);
129
130 let parties = parse_author(&fields);
131 let dependencies = parse_dependencies(&fields);
132
133 let mut extra_data = HashMap::new();
134 if let Some(min_perl) = fields.get("MIN_PERL_VERSION") {
135 extra_data.insert(
136 "MIN_PERL_VERSION".to_string(),
137 json!(truncate_field(min_perl.to_string())),
138 );
139 }
140 if let Some(version_from) = fields.get("VERSION_FROM") {
141 extra_data.insert(
142 "VERSION_FROM".to_string(),
143 json!(truncate_field(version_from.to_string())),
144 );
145 }
146 if let Some(abstract_from) = fields.get("ABSTRACT_FROM") {
147 extra_data.insert(
148 "ABSTRACT_FROM".to_string(),
149 json!(truncate_field(abstract_from.to_string())),
150 );
151 }
152
153 let purl = name.as_ref().and_then(|n| {
155 let purl_name = n.replace("::", "-");
156 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
157 if let Some(v) = &version {
158 let _ = p.with_version(v).ok();
159 }
160 p.to_string()
161 })
162 });
163
164 PackageData {
165 package_type: Some(PACKAGE_TYPE),
166 namespace: Some("cpan".to_string()),
167 name,
168 version,
169 description,
170 declared_license_expression,
171 declared_license_expression_spdx,
172 license_detections,
173 extracted_license_statement,
174 parties,
175 dependencies,
176 extra_data: if extra_data.is_empty() {
177 None
178 } else {
179 Some(extra_data)
180 },
181 purl,
182 datasource_id: Some(DatasourceId::CpanMakefile),
183 primary_language: Some("Perl".to_string()),
184 ..Default::default()
185 }
186}
187
188#[derive(Default)]
189struct ResolvedMetadata {
190 version: Option<String>,
191 abstract_text: Option<String>,
192}
193
194fn default_package_data() -> PackageData {
195 PackageData {
196 package_type: Some(PACKAGE_TYPE),
197 primary_language: Some("Perl".to_string()),
198 datasource_id: Some(DatasourceId::CpanMakefile),
199 ..Default::default()
200 }
201}
202
203fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
204 match value.trim() {
205 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
206 "gpl-1.0-plus OR artistic-perl-1.0",
207 "GPL-1.0-or-later OR Artistic-1.0-Perl",
208 )),
209 "artistic_2" => Some(NormalizedDeclaredLicense::new(
210 "artistic-2.0",
211 "Artistic-2.0",
212 )),
213 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
214 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
215 }
216}
217
218fn resolve_referenced_metadata(
219 fields: &HashMap<String, String>,
220 base_dir: Option<&Path>,
221) -> ResolvedMetadata {
222 let Some(base_dir) = base_dir else {
223 return ResolvedMetadata::default();
224 };
225
226 let mut resolved = ResolvedMetadata::default();
227 let mut cache: HashMap<String, Option<String>> = HashMap::new();
228
229 if let Some(version_from) = fields.get("VERSION_FROM")
230 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
231 {
232 resolved.version = extract_version_from_module_content(content);
233 }
234
235 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
236 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
237 {
238 resolved.abstract_text = extract_abstract_from_module_content(content);
239 }
240
241 resolved
242}
243
244fn load_referenced_metadata_file<'a>(
245 base_dir: &Path,
246 relative_path: &str,
247 cache: &'a mut HashMap<String, Option<String>>,
248) -> Option<&'a String> {
249 let entry = cache
250 .entry(relative_path.to_string())
251 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
252 entry.as_ref()
253}
254
255fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
256 let ref_path = Path::new(relative_path);
257 if ref_path.is_absolute() {
258 return None;
259 }
260
261 let base_dir = base_dir.canonicalize().ok()?;
262 let candidate = base_dir.join(ref_path);
263 let canonical_candidate = candidate.canonicalize().ok()?;
264 if !canonical_candidate.starts_with(&base_dir) {
265 return None;
266 }
267
268 let metadata = std::fs::metadata(&canonical_candidate).ok()?;
269 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
270 return None;
271 }
272
273 read_file_to_string(&canonical_candidate, None).ok()
274}
275
276fn extract_version_from_module_content(content: &str) -> Option<String> {
277 RE_VERSION_ASSIGNMENT
278 .captures(content)
279 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
280 .map(|m| m.as_str().trim().to_string())
281 .map(truncate_field)
282 .filter(|value| !value.is_empty())
283}
284
285fn extract_abstract_from_module_content(content: &str) -> Option<String> {
286 let mut in_name_section = false;
287
288 for line in content.lines() {
289 let trimmed = line.trim();
290 if trimmed == "=head1 NAME" {
291 in_name_section = true;
292 continue;
293 }
294
295 if in_name_section {
296 if trimmed.starts_with('=') {
297 break;
298 }
299 if trimmed.is_empty() {
300 continue;
301 }
302
303 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
304 let abstract_text = abstract_text.trim();
305 if !abstract_text.is_empty() {
306 return Some(truncate_field(abstract_text.to_string()));
307 }
308 }
309 }
310 }
311
312 None
313}
314
315fn extract_writemakefile_block(content: &str) -> String {
316 let start_match = match RE_WRITEMAKEFILE.find(content) {
317 Some(m) => m,
318 None => return String::new(),
319 };
320
321 let start_pos = start_match.end();
322 let content_from_start = &content[start_pos..];
323
324 let mut depth = 1;
326 let mut end_pos = 0;
327 let chars: Vec<char> = content_from_start.chars().collect();
328
329 for (i, &ch) in chars.iter().enumerate() {
330 if i >= MAX_ITERATION_COUNT {
331 break;
332 }
333 match ch {
334 '(' => depth += 1,
335 ')' => {
336 depth -= 1;
337 if depth == 0 {
338 end_pos = i;
339 break;
340 }
341 }
342 _ => {}
343 }
344 }
345
346 if end_pos > 0 {
347 content_from_start[..end_pos].to_string()
348 } else {
349 String::new()
350 }
351}
352
353fn parse_hash_fields(content: &str) -> HashMap<String, String> {
354 let mut fields = HashMap::new();
355
356 for cap in RE_SIMPLE_KV
357 .captures_iter(content)
358 .take(MAX_ITERATION_COUNT)
359 {
360 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
361 let value = cap
362 .get(2)
363 .or_else(|| cap.get(3))
364 .or_else(|| cap.get(4))
365 .or_else(|| cap.get(5))
366 .map(|m| m.as_str().to_string());
367
368 if let Some(v) = value {
369 fields.insert(key, v);
370 }
371 }
372
373 parse_hash_dependencies(content, &mut fields);
375
376 parse_author_array(content, &mut fields);
378
379 fields
380}
381
382fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
383 for cap in RE_HASH_BLOCK
384 .captures_iter(content)
385 .take(MAX_ITERATION_COUNT)
386 {
387 let key = cap.get(1).map(|m| m.as_str()).unwrap_or("");
388 let hash_content = cap.get(2).map(|m| m.as_str()).unwrap_or("");
389
390 if matches!(
393 key,
394 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
395 ) {
396 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
397 }
398 }
399}
400
401fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
402 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
403 let array_content = cap.get(1).map(|m| m.as_str()).unwrap_or("");
404
405 let authors: Vec<String> = RE_QUOTED_STRING
406 .captures_iter(array_content)
407 .take(MAX_ITERATION_COUNT)
408 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
409 .collect();
410
411 if !authors.is_empty() {
412 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
414 }
415 }
416}
417
418fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
419 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
421 return authors_str
422 .split("||")
423 .filter_map(|author_str| {
424 if author_str.trim().is_empty() {
425 return None;
426 }
427 let (name, email) = parse_author_string(author_str);
428 Some(Party {
429 role: Some("author".to_string()),
430 name,
431 email,
432 r#type: Some("person".to_string()),
433 url: None,
434 organization: None,
435 organization_url: None,
436 timezone: None,
437 })
438 })
439 .collect();
440 }
441
442 if let Some(author_str) = fields.get("AUTHOR") {
443 let (name, email) = parse_author_string(author_str);
444 return vec![Party {
445 role: Some("author".to_string()),
446 name,
447 email,
448 r#type: Some("person".to_string()),
449 url: None,
450 organization: None,
451 organization_url: None,
452 timezone: None,
453 }];
454 }
455
456 Vec::new()
457}
458
459fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
460 if let Some(start) = s.find('<')
461 && let Some(end) = s.find('>')
462 && start < end
463 {
464 let name = s[..start].trim();
465 let email = s[start + 1..end].trim();
466 return (
467 if name.is_empty() {
468 None
469 } else {
470 Some(truncate_field(name.to_string()))
471 },
472 if email.is_empty() {
473 None
474 } else {
475 Some(truncate_field(email.to_string()))
476 },
477 );
478 }
479 (Some(truncate_field(s.trim().to_string())), None)
480}
481
482fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
483 let mut dependencies = Vec::new();
484
485 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
487 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
488 }
489
490 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
492 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
493 }
494
495 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
497 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
498 }
499
500 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
502 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
503 }
504
505 dependencies
506}
507
508fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
509 let mut deps = Vec::new();
510
511 for cap in RE_DEP_PAIR
512 .captures_iter(hash_content)
513 .take(MAX_ITERATION_COUNT)
514 {
515 let module_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
516
517 if module_name == "perl" {
519 continue;
520 }
521
522 let version = cap
523 .get(2)
524 .or_else(|| cap.get(3))
525 .or_else(|| cap.get(4))
526 .map(|m| m.as_str());
527
528 let extracted_requirement = match version {
529 Some("0") | Some("") | None => None,
530 Some(v) => Some(truncate_field(v.to_string())),
531 };
532
533 let purl = PackageUrl::new("cpan", module_name)
534 .ok()
535 .map(|p| p.to_string());
536
537 deps.push(Dependency {
538 purl,
539 extracted_requirement,
540 scope: Some(truncate_field(scope.to_string())),
541 is_runtime: Some(is_runtime),
542 is_optional: Some(false),
543 is_pinned: None,
544 is_direct: Some(true),
545 resolved_package: None,
546 extra_data: None,
547 });
548 }
549
550 deps
551}
552
553crate::register_parser!(
554 "CPAN Perl Makefile.PL",
555 &["*/Makefile.PL"],
556 "cpan",
557 "Perl",
558 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
559);