1use std::collections::HashMap;
16use std::fs;
17use std::path::Path;
18use std::sync::LazyLock;
19
20use log::warn;
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28
29static RE_WRITEMAKEFILE: LazyLock<Regex> =
30 LazyLock::new(|| Regex::new(r"WriteMakefile1?\s*\(").unwrap());
31static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
32 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
33 .unwrap()
34});
35static RE_HASH_BLOCK: LazyLock<Regex> =
36 LazyLock::new(|| Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").unwrap());
37static RE_AUTHOR_ARRAY: LazyLock<Regex> =
38 LazyLock::new(|| Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").unwrap());
39static RE_QUOTED_STRING: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r#"['"]([^'"]*)['"']"#).unwrap());
41static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
42 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#).unwrap()
43});
44static RE_VERSION_ASSIGNMENT: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(
46 r#"(?m)^\s*(?:our\s+)?\$(?:[A-Za-z_][\w:]*::)?VERSION\s*=\s*(?:'([^']+)'|"([^"]+)")"#,
47 )
48 .unwrap()
49});
50
51const PACKAGE_TYPE: PackageType = PackageType::Cpan;
52const MAX_METADATA_FILE_SIZE: u64 = 1024 * 1024;
53
54pub struct CpanMakefilePlParser;
55
56impl PackageParser for CpanMakefilePlParser {
57 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
58
59 fn is_match(path: &Path) -> bool {
60 path.file_name().is_some_and(|name| name == "Makefile.PL")
61 }
62
63 fn extract_packages(path: &Path) -> Vec<PackageData> {
64 let content = match fs::read_to_string(path) {
65 Ok(c) => c,
66 Err(e) => {
67 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
68 return vec![PackageData {
69 package_type: Some(PACKAGE_TYPE),
70 primary_language: Some("Perl".to_string()),
71 datasource_id: Some(DatasourceId::CpanMakefile),
72 ..Default::default()
73 }];
74 }
75 };
76
77 vec![parse_makefile_pl_with_base(&content, path.parent())]
78 }
79}
80
81#[cfg(test)]
82pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
83 parse_makefile_pl_with_base(content, None)
84}
85
86pub(crate) fn parse_makefile_pl_with_base(content: &str, base_dir: Option<&Path>) -> PackageData {
87 let makefile_block = extract_writemakefile_block(content);
89 if makefile_block.is_empty() {
90 return default_package_data();
91 }
92
93 let fields = parse_hash_fields(&makefile_block);
94
95 let name = fields.get("NAME").map(|n| n.to_string());
96 let resolved_metadata = resolve_referenced_metadata(&fields, base_dir);
97
98 let version = fields
99 .get("VERSION")
100 .map(|v| v.to_string())
101 .or_else(|| resolved_metadata.version.clone());
102 let description = fields
103 .get("ABSTRACT")
104 .map(|d| d.to_string())
105 .or_else(|| resolved_metadata.abstract_text.clone());
106 let extracted_license_statement = fields.get("LICENSE").map(|l| l.to_string());
107
108 let parties = parse_author(&fields);
109 let dependencies = parse_dependencies(&fields);
110
111 let mut extra_data = HashMap::new();
112 if let Some(min_perl) = fields.get("MIN_PERL_VERSION") {
113 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
114 }
115 if let Some(version_from) = fields.get("VERSION_FROM") {
116 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
117 }
118 if let Some(abstract_from) = fields.get("ABSTRACT_FROM") {
119 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
120 }
121
122 let purl = name.as_ref().and_then(|n| {
124 let purl_name = n.replace("::", "-");
125 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
126 if let Some(v) = &version {
127 let _ = p.with_version(v).ok();
128 }
129 p.to_string()
130 })
131 });
132
133 PackageData {
134 package_type: Some(PACKAGE_TYPE),
135 namespace: Some("cpan".to_string()),
136 name,
137 version,
138 description,
139 extracted_license_statement,
140 parties,
141 dependencies,
142 extra_data: if extra_data.is_empty() {
143 None
144 } else {
145 Some(extra_data)
146 },
147 purl,
148 datasource_id: Some(DatasourceId::CpanMakefile),
149 primary_language: Some("Perl".to_string()),
150 ..Default::default()
151 }
152}
153
154#[derive(Default)]
155struct ResolvedMetadata {
156 version: Option<String>,
157 abstract_text: Option<String>,
158}
159
160fn default_package_data() -> PackageData {
161 PackageData {
162 package_type: Some(PACKAGE_TYPE),
163 primary_language: Some("Perl".to_string()),
164 datasource_id: Some(DatasourceId::CpanMakefile),
165 ..Default::default()
166 }
167}
168
169fn resolve_referenced_metadata(
170 fields: &HashMap<String, String>,
171 base_dir: Option<&Path>,
172) -> ResolvedMetadata {
173 let Some(base_dir) = base_dir else {
174 return ResolvedMetadata::default();
175 };
176
177 let mut resolved = ResolvedMetadata::default();
178 let mut cache: HashMap<String, Option<String>> = HashMap::new();
179
180 if let Some(version_from) = fields.get("VERSION_FROM")
181 && let Some(content) = load_referenced_metadata_file(base_dir, version_from, &mut cache)
182 {
183 resolved.version = extract_version_from_module_content(content);
184 }
185
186 if let Some(abstract_from) = fields.get("ABSTRACT_FROM")
187 && let Some(content) = load_referenced_metadata_file(base_dir, abstract_from, &mut cache)
188 {
189 resolved.abstract_text = extract_abstract_from_module_content(content);
190 }
191
192 resolved
193}
194
195fn load_referenced_metadata_file<'a>(
196 base_dir: &Path,
197 relative_path: &str,
198 cache: &'a mut HashMap<String, Option<String>>,
199) -> Option<&'a String> {
200 let entry = cache
201 .entry(relative_path.to_string())
202 .or_insert_with(|| read_safe_metadata_file(base_dir, relative_path));
203 entry.as_ref()
204}
205
206fn read_safe_metadata_file(base_dir: &Path, relative_path: &str) -> Option<String> {
207 let ref_path = Path::new(relative_path);
208 if ref_path.is_absolute() {
209 return None;
210 }
211
212 let base_dir = base_dir.canonicalize().ok()?;
213 let candidate = base_dir.join(ref_path);
214 let canonical_candidate = candidate.canonicalize().ok()?;
215 if !canonical_candidate.starts_with(&base_dir) {
216 return None;
217 }
218
219 let metadata = fs::metadata(&canonical_candidate).ok()?;
220 if !metadata.is_file() || metadata.len() > MAX_METADATA_FILE_SIZE {
221 return None;
222 }
223
224 fs::read_to_string(canonical_candidate).ok()
225}
226
227fn extract_version_from_module_content(content: &str) -> Option<String> {
228 RE_VERSION_ASSIGNMENT
229 .captures(content)
230 .and_then(|caps| caps.get(1).or_else(|| caps.get(2)))
231 .map(|m| m.as_str().trim().to_string())
232 .filter(|value| !value.is_empty())
233}
234
235fn extract_abstract_from_module_content(content: &str) -> Option<String> {
236 let mut in_name_section = false;
237
238 for line in content.lines() {
239 let trimmed = line.trim();
240 if trimmed == "=head1 NAME" {
241 in_name_section = true;
242 continue;
243 }
244
245 if in_name_section {
246 if trimmed.starts_with('=') {
247 break;
248 }
249 if trimmed.is_empty() {
250 continue;
251 }
252
253 if let Some((_, abstract_text)) = trimmed.split_once(" - ") {
254 let abstract_text = abstract_text.trim();
255 if !abstract_text.is_empty() {
256 return Some(abstract_text.to_string());
257 }
258 }
259 }
260 }
261
262 None
263}
264
265fn extract_writemakefile_block(content: &str) -> String {
266 let start_match = match RE_WRITEMAKEFILE.find(content) {
267 Some(m) => m,
268 None => return String::new(),
269 };
270
271 let start_pos = start_match.end();
272 let content_from_start = &content[start_pos..];
273
274 let mut depth = 1;
276 let mut end_pos = 0;
277 let chars: Vec<char> = content_from_start.chars().collect();
278
279 for (i, &ch) in chars.iter().enumerate() {
280 match ch {
281 '(' => depth += 1,
282 ')' => {
283 depth -= 1;
284 if depth == 0 {
285 end_pos = i;
286 break;
287 }
288 }
289 _ => {}
290 }
291 }
292
293 if end_pos > 0 {
294 content_from_start[..end_pos].to_string()
295 } else {
296 String::new()
297 }
298}
299
300fn parse_hash_fields(content: &str) -> HashMap<String, String> {
301 let mut fields = HashMap::new();
302
303 for cap in RE_SIMPLE_KV.captures_iter(content) {
304 let key = cap
305 .get(1)
306 .expect("group 1 always exists")
307 .as_str()
308 .to_string();
309 let value = cap
310 .get(2)
311 .or_else(|| cap.get(3))
312 .or_else(|| cap.get(4))
313 .or_else(|| cap.get(5))
314 .map(|m| m.as_str().to_string());
315
316 if let Some(v) = value {
317 fields.insert(key, v);
318 }
319 }
320
321 parse_hash_dependencies(content, &mut fields);
323
324 parse_author_array(content, &mut fields);
326
327 fields
328}
329
330fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
331 for cap in RE_HASH_BLOCK.captures_iter(content) {
332 let key = cap.get(1).expect("group 1 always exists").as_str();
333 let hash_content = cap.get(2).expect("group 2 always exists").as_str();
334
335 if matches!(
338 key,
339 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
340 ) {
341 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
342 }
343 }
344}
345
346fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
347 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
348 let array_content = cap.get(1).expect("group 1 always exists").as_str();
349
350 let authors: Vec<String> = RE_QUOTED_STRING
351 .captures_iter(array_content)
352 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
353 .collect();
354
355 if !authors.is_empty() {
356 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
358 }
359 }
360}
361
362fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
363 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
365 return authors_str
366 .split("||")
367 .filter_map(|author_str| {
368 if author_str.trim().is_empty() {
369 return None;
370 }
371 let (name, email) = parse_author_string(author_str);
372 Some(Party {
373 role: Some("author".to_string()),
374 name,
375 email,
376 r#type: Some("person".to_string()),
377 url: None,
378 organization: None,
379 organization_url: None,
380 timezone: None,
381 })
382 })
383 .collect();
384 }
385
386 if let Some(author_str) = fields.get("AUTHOR") {
388 let (name, email) = parse_author_string(author_str);
389 return vec![Party {
390 role: Some("author".to_string()),
391 name,
392 email,
393 r#type: Some("person".to_string()),
394 url: None,
395 organization: None,
396 organization_url: None,
397 timezone: None,
398 }];
399 }
400
401 Vec::new()
402}
403
404fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
405 if let Some(start) = s.find('<')
407 && let Some(end) = s.find('>')
408 && start < end
409 {
410 let name = s[..start].trim();
411 let email = s[start + 1..end].trim();
412 return (
413 if name.is_empty() {
414 None
415 } else {
416 Some(name.to_string())
417 },
418 if email.is_empty() {
419 None
420 } else {
421 Some(email.to_string())
422 },
423 );
424 }
425 (Some(s.trim().to_string()), None)
427}
428
429fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
430 let mut dependencies = Vec::new();
431
432 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
434 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
435 }
436
437 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
439 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
440 }
441
442 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
444 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
445 }
446
447 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
449 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
450 }
451
452 dependencies
453}
454
455fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
456 let mut deps = Vec::new();
457
458 for cap in RE_DEP_PAIR.captures_iter(hash_content) {
459 let module_name = cap.get(1).expect("group 1 always exists").as_str();
460
461 if module_name == "perl" {
463 continue;
464 }
465
466 let version = cap
467 .get(2)
468 .or_else(|| cap.get(3))
469 .or_else(|| cap.get(4))
470 .map(|m| m.as_str());
471
472 let extracted_requirement = match version {
473 Some("0") | Some("") | None => None,
474 Some(v) => Some(v.to_string()),
475 };
476
477 let purl = PackageUrl::new("cpan", module_name)
478 .ok()
479 .map(|p| p.to_string());
480
481 deps.push(Dependency {
482 purl,
483 extracted_requirement,
484 scope: Some(scope.to_string()),
485 is_runtime: Some(is_runtime),
486 is_optional: Some(false),
487 is_pinned: None,
488 is_direct: Some(true),
489 resolved_package: None,
490 extra_data: None,
491 });
492 }
493
494 deps
495}
496
497crate::register_parser!(
498 "CPAN Perl Makefile.PL",
499 &["*/Makefile.PL"],
500 "cpan",
501 "Perl",
502 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
503);