1use std::collections::HashMap;
16use std::fs;
17use std::path::Path;
18use std::sync::LazyLock;
19
20use crate::parser_warn as warn;
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28use super::license_normalization::{
29 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
30 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
31};
32
33static RE_WRITEMAKEFILE: LazyLock<Regex> =
34 LazyLock::new(|| Regex::new(r"WriteMakefile1?\s*\(").unwrap());
35static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
36 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
37 .unwrap()
38});
39static RE_HASH_BLOCK: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").unwrap());
41static RE_AUTHOR_ARRAY: LazyLock<Regex> =
42 LazyLock::new(|| Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").unwrap());
43static RE_QUOTED_STRING: LazyLock<Regex> =
44 LazyLock::new(|| Regex::new(r#"['"]([^'"]*)['"']"#).unwrap());
45static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
46 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#).unwrap()
47});
48static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
51 )
52 .unwrap()
53});
54
55const PACKAGE_TYPE: PackageType = PackageType::Cpan;
56const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
57
58pub struct CpanMakefilePlParser;
59
60impl PackageParser for CpanMakefilePlParser {
61 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
62
63 fn is_match(path: &Path) -> bool {
64 path.file_name().is_some_and(|name| name == "Makefile.PL")
65 }
66
67 fn extract_packages(path: &Path) -> Vec<PackageData> {
68 let content = match fs::read_to_string(path) {
69 Ok(c) => c,
70 Err(e) => {
71 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
72 return vec![PackageData {
73 package_type: Some(PACKAGE_TYPE),
74 primary_language: Some("Perl".to_string()),
75 datasource_id: Some(DatasourceId::CpanMakefile),
76 ..Default::default()
77 }];
78 }
79 };
80
81 vec![parse_makefile_pl_with_base(&content, path.parent())]
82 }
83}
84
85#[cfg(test)]
86pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
87 parse_makefile_pl_with_base(content, None)
88}
89
90pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
91 let makefile_block = extract_writemakefile_block(content);
93 if makefile_block.is_empty() {
94 return default_package_data();
95 }
96
97 let fields = parse_hash_fields(&makefile_block);
98
99 let name = fields.get("NAME").map(|n| n.to_string());
100 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
101
102 let version = fields
103 .get("VERSION")
104 .map(|v| v.to_string())
105 .or_else(|| resolved_metadata.version.clone());
106 let description = fields
107 .get("ABSTRACT")
108 .map(|d| d.to_string())
109 .or_else(|| resolved_metadata.abstract_text.clone());
110 let extracted_license_statement = fields.get("LICENSE").map(|l| l.to_string());
111 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
112 extracted_license_statement
113 .as_deref()
114 .and_then(normalize_cpan_makefile_license)
115 .map(|normalized| {
116 build_declared_license_data(
117 normalized,
118 DeclaredLicenseMatchMetadata::single_line(
119 extracted_license_statement.as_deref().unwrap_or_default(),
120 ),
121 )
122 })
123 .unwrap_or_else(empty_declared_license_data);
124
125 let parties = parse_author(&fields);
126 let dependencies = parse_dependencies(&fields);
127
128 let mut extra_data = HashMap::new();
129 if let Some(min_perl) = fields.get("MIN_PERL_VERSION") {
130 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
131 }
132 if let Some(version_from) = fields.get("VERSION_FROM") {
133 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
134 }
135 if let Some(abstract_from) = fields.get("ABSTRACT_FROM") {
136 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
137 }
138
139 let purl = name.as_ref().and_then(|n| {
141 let purl_name = n.replace("::", "-");
142 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
143 if let Some(v) = &version {
144 let _ = p.with_version(v).ok();
145 }
146 p.to_string()
147 })
148 });
149
150 PackageData {
151 package_type: Some(PACKAGE_TYPE),
152 namespace: Some("cpan".to_string()),
153 name,
154 version,
155 description,
156 declared_license_expression,
157 declared_license_expression_spdx,
158 license_detections,
159 extracted_license_statement,
160 parties,
161 dependencies,
162 extra_data: if extra_data.is_empty() {
163 None
164 } else {
165 Some(extra_data)
166 },
167 purl,
168 datasource_id: Some(DatasourceId::CpanMakefile),
169 primary_language: Some("Perl".to_string()),
170 ..Default::default()
171 }
172}
173
174#[derive(Default)]
175struct ResolvedMetadata {
176 version: Option<String>,
177 abstract_text: Option<String>,
178}
179
180fn default_package_data() -> PackageData {
181 PackageData {
182 package_type: Some(PACKAGE_TYPE),
183 primary_language: Some("Perl".to_string()),
184 datasource_id: Some(DatasourceId::CpanMakefile),
185 ..Default::default()
186 }
187}
188
189fn normalize_cpan_makefile_license(value: &str) -> Option<NormalizedDeclaredLicense> {
190 match value.trim() {
191 "perl_5" | "Perl_5" => Some(NormalizedDeclaredLicense::new(
192 "gpl-1.0-plus OR artistic-perl-1.0",
193 "GPL-1.0-or-later OR Artistic-1.0-Perl",
194 )),
195 "artistic_2" => Some(NormalizedDeclaredLicense::new(
196 "artistic-2.0",
197 "Artistic-2.0",
198 )),
199 "apache_2_0" => Some(NormalizedDeclaredLicense::new("apache-2.0", "Apache-2.0")),
200 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
201 }
202}
203
204fn resolve_referenced_metadata(
205 fields: &HashMap<String, String>,
206 base_dir: Option<&Path>,
207) -> ResolvedMetadata {
208 let Some(base_dir) = base_dir else {
209 return ResolvedMetadata::default();
210 };
211
212 let mut resolved = ResolvedMetadata::default();
213 let mut cache: HashMap<String, Option<String>> = HashMap::new();
214
215 if let Some(version_from) = fields.get("VERSION_FROM")
216 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
217 {
218 resolved.version = extract_version_from_module_content(content);
219 }
220
221 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
222 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
223 {
224 resolved.abstract_text = extract_abstract_from_module_content(content);
225 }
226
227 resolved
228}
229
230fn load_referenced_metadata_file<'a>(
231 base_dir: &Path,
232 relative_path: &str,
233 cache: &'a mut HashMap<String, Option<String>>,
234) -> Option<&'a String> {
235 let entry = cache
236 .entry(relative_path.to_string())
237 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
238 entry.as_ref()
239}
240
241fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
242 let ref_path = Path::new(relative_path);
243 if ref_path.is_absolute() {
244 return None;
245 }
246
247 let base_dir = base_dir.canonicalize().ok()?;
248 let candidate = base_dir.join(ref_path);
249 let canonical_candidate = candidate.canonicalize().ok()?;
250 if !canonical_candidate.starts_with(&base_dir) {
251 return None;
252 }
253
254 let metadata = fs::metadata(&canonical_candidate).ok()?;
255 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
256 return None;
257 }
258
259 fs::read_to_string(canonical_candidate).ok()
260}
261
262fn extract_version_from_module_content(content: &str) -> Option<String> {
263 RE_VERSION_ASSIGNMENT
264 .captures(content)
265 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
266 .map(|m| m.as_str().trim().to_string())
267 .filter(|value| !value.is_empty())
268}
269
270fn extract_abstract_from_module_content(content: &str) -> Option<String> {
271 let mut in_name_section = false;
272
273 for line in content.lines() {
274 let trimmed = line.trim();
275 if trimmed == "=head1 NAME" {
276 in_name_section = true;
277 continue;
278 }
279
280 if in_name_section {
281 if trimmed.starts_with('=') {
282 break;
283 }
284 if trimmed.is_empty() {
285 continue;
286 }
287
288 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
289 let abstract_text = abstract_text.trim();
290 if !abstract_text.is_empty() {
291 return Some(abstract_text.to_string());
292 }
293 }
294 }
295 }
296
297 None
298}
299
300fn extract_writemakefile_block(content: &str) -> String {
301 let start_match = match RE_WRITEMAKEFILE.find(content) {
302 Some(m) => m,
303 None => return String::new(),
304 };
305
306 let start_pos = start_match.end();
307 let content_from_start = &content[start_pos..];
308
309 let mut depth = 1;
311 let mut end_pos = 0;
312 let chars: Vec<char> = content_from_start.chars().collect();
313
314 for (i, &ch) in chars.iter().enumerate() {
315 match ch {
316 '(' => depth += 1,
317 ')' => {
318 depth -= 1;
319 if depth == 0 {
320 end_pos = i;
321 break;
322 }
323 }
324 _ => {}
325 }
326 }
327
328 if end_pos > 0 {
329 content_from_start[..end_pos].to_string()
330 } else {
331 String::new()
332 }
333}
334
335fn parse_hash_fields(content: &str) -> HashMap<String, String> {
336 let mut fields = HashMap::new();
337
338 for cap in RE_SIMPLE_KV.captures_iter(content) {
339 let key = cap
340 .get(1)
341 .expect("group 1 always exists")
342 .as_str()
343 .to_string();
344 let value = cap
345 .get(2)
346 .or_else(|| cap.get(3))
347 .or_else(|| cap.get(4))
348 .or_else(|| cap.get(5))
349 .map(|m| m.as_str().to_string());
350
351 if let Some(v) = value {
352 fields.insert(key, v);
353 }
354 }
355
356 parse_hash_dependencies(content, &mut fields);
358
359 parse_author_array(content, &mut fields);
361
362 fields
363}
364
365fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
366 for cap in RE_HASH_BLOCK.captures_iter(content) {
367 let key = cap.get(1).expect("group 1 always exists").as_str();
368 let hash_content = cap.get(2).expect("group 2 always exists").as_str();
369
370 if matches!(
373 key,
374 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
375 ) {
376 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
377 }
378 }
379}
380
381fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
382 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
383 let array_content = cap.get(1).expect("group 1 always exists").as_str();
384
385 let authors: Vec<String> = RE_QUOTED_STRING
386 .captures_iter(array_content)
387 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
388 .collect();
389
390 if !authors.is_empty() {
391 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
393 }
394 }
395}
396
397fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
398 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
400 return authors_str
401 .split("||")
402 .filter_map(|author_str| {
403 if author_str.trim().is_empty() {
404 return None;
405 }
406 let (name, email) = parse_author_string(author_str);
407 Some(Party {
408 role: Some("author".to_string()),
409 name,
410 email,
411 r#type: Some("person".to_string()),
412 url: None,
413 organization: None,
414 organization_url: None,
415 timezone: None,
416 })
417 })
418 .collect();
419 }
420
421 if let Some(author_str) = fields.get("AUTHOR") {
423 let (name, email) = parse_author_string(author_str);
424 return vec![Party {
425 role: Some("author".to_string()),
426 name,
427 email,
428 r#type: Some("person".to_string()),
429 url: None,
430 organization: None,
431 organization_url: None,
432 timezone: None,
433 }];
434 }
435
436 Vec::new()
437}
438
439fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
440 if let Some(start) = s.find('<')
442 && let Some(end) = s.find('>')
443 && start < end
444 {
445 let name = s[..start].trim();
446 let email = s[start + 1..end].trim();
447 return (
448 if name.is_empty() {
449 None
450 } else {
451 Some(name.to_string())
452 },
453 if email.is_empty() {
454 None
455 } else {
456 Some(email.to_string())
457 },
458 );
459 }
460 (Some(s.trim().to_string()), None)
462}
463
464fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
465 let mut dependencies = Vec::new();
466
467 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
469 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
470 }
471
472 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
474 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
475 }
476
477 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
479 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
480 }
481
482 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
484 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
485 }
486
487 dependencies
488}
489
490fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
491 let mut deps = Vec::new();
492
493 for cap in RE_DEP_PAIR.captures_iter(hash_content) {
494 let module_name = cap.get(1).expect("group 1 always exists").as_str();
495
496 if module_name == "perl" {
498 continue;
499 }
500
501 let version = cap
502 .get(2)
503 .or_else(|| cap.get(3))
504 .or_else(|| cap.get(4))
505 .map(|m| m.as_str());
506
507 let extracted_requirement = match version {
508 Some("0") | Some("") | None => None,
509 Some(v) => Some(v.to_string()),
510 };
511
512 let purl = PackageUrl::new("cpan", module_name)
513 .ok()
514 .map(|p| p.to_string());
515
516 deps.push(Dependency {
517 purl,
518 extracted_requirement,
519 scope: Some(scope.to_string()),
520 is_runtime: Some(is_runtime),
521 is_optional: Some(false),
522 is_pinned: None,
523 is_direct: Some(true),
524 resolved_package: None,
525 extra_data: None,
526 });
527 }
528
529 deps
530}
531
532crate::register_parser!(
533 "CPAN Perl Makefile.PL",
534 &["*/Makefile.PL"],
535 "cpan",
536 "Perl",
537 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
538);