1use std::collections::HashMap;
16use std::fs;
17use std::path::Path;
18use std::sync::LazyLock;
19
20use log::warn;
21use packageurl::PackageUrl;
22use regex::Regex;
23use serde_json::json;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26
27use super::PackageParser;
28
29static RE_WRITEMAKEFILE: LazyLock<Regex> =
30 LazyLock::new(|| Regex::new(r"WriteMakefile1?\s*\(").unwrap());
31static RE_SIMPLE_KV: LazyLock<Regex> = LazyLock::new(|| {
32 Regex::new(r#"(?m)^\s*([A-Z_]+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|q\{([^}]*)\}|q\(([^)]*)\))"#)
33 .unwrap()
34});
35static RE_HASH_BLOCK: LazyLock<Regex> =
36 LazyLock::new(|| Regex::new(r"([A-Z_]+)\s*=>\s*\{([^}]*)\}").unwrap());
37static RE_AUTHOR_ARRAY: LazyLock<Regex> =
38 LazyLock::new(|| Regex::new(r"AUTHOR\s*=>\s*\[([^\]]*)\]").unwrap());
39static RE_QUOTED_STRING: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r#"['"]([^'"]*)['"']"#).unwrap());
41static RE_DEP_PAIR: LazyLock<Regex> = LazyLock::new(|| {
42 Regex::new(r#"['"]([^'"]+)['"]\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))"#).unwrap()
43});
44
45const PACKAGE_TYPE: PackageType = PackageType::Cpan;
46
47pub struct CpanMakefilePlParser;
48
49impl PackageParser for CpanMakefilePlParser {
50 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
51
52 fn is_match(path: &Path) -> bool {
53 path.file_name().is_some_and(|name| name == "Makefile.PL")
54 }
55
56 fn extract_packages(path: &Path) -> Vec<PackageData> {
57 let content = match fs::read_to_string(path) {
58 Ok(c) => c,
59 Err(e) => {
60 warn!("Failed to read Makefile.PL file {:?}: {}", path, e);
61 return vec![PackageData {
62 package_type: Some(PACKAGE_TYPE),
63 primary_language: Some("Perl".to_string()),
64 datasource_id: Some(DatasourceId::CpanMakefile),
65 ..Default::default()
66 }];
67 }
68 };
69
70 vec![parse_makefile_pl(&content)]
71 }
72}
73
74pub(crate) fn parse_makefile_pl(content: &str) -> PackageData {
75 let makefile_block = extract_writemakefile_block(content);
77 if makefile_block.is_empty() {
78 return default_package_data();
79 }
80
81 let fields = parse_hash_fields(&makefile_block);
82
83 let name = fields.get("NAME").map(|n| n.to_string());
84 let version = fields.get("VERSION").map(|v| v.to_string());
85 let description = fields.get("ABSTRACT").map(|d| d.to_string());
86 let extracted_license_statement = fields.get("LICENSE").map(|l| l.to_string());
87
88 let parties = parse_author(&fields);
89 let dependencies = parse_dependencies(&fields);
90
91 let mut extra_data = HashMap::new();
92 if let Some(min_perl) = fields.get("MIN_PERL_VERSION") {
93 extra_data.insert("MIN_PERL_VERSION".to_string(), json!(min_perl));
94 }
95 if let Some(version_from) = fields.get("VERSION_FROM") {
96 extra_data.insert("VERSION_FROM".to_string(), json!(version_from));
97 }
98 if let Some(abstract_from) = fields.get("ABSTRACT_FROM") {
99 extra_data.insert("ABSTRACT_FROM".to_string(), json!(abstract_from));
100 }
101
102 let purl = name.as_ref().and_then(|n| {
104 let purl_name = n.replace("::", "-");
105 PackageUrl::new("cpan", &purl_name).ok().map(|mut p| {
106 if let Some(v) = &version {
107 let _ = p.with_version(v).ok();
108 }
109 p.to_string()
110 })
111 });
112
113 PackageData {
114 package_type: Some(PACKAGE_TYPE),
115 namespace: Some("cpan".to_string()),
116 name,
117 version,
118 description,
119 extracted_license_statement,
120 parties,
121 dependencies,
122 extra_data: if extra_data.is_empty() {
123 None
124 } else {
125 Some(extra_data)
126 },
127 purl,
128 datasource_id: Some(DatasourceId::CpanMakefile),
129 primary_language: Some("Perl".to_string()),
130 ..Default::default()
131 }
132}
133
134fn default_package_data() -> PackageData {
135 PackageData {
136 package_type: Some(PACKAGE_TYPE),
137 primary_language: Some("Perl".to_string()),
138 datasource_id: Some(DatasourceId::CpanMakefile),
139 ..Default::default()
140 }
141}
142
143fn extract_writemakefile_block(content: &str) -> String {
144 let start_match = match RE_WRITEMAKEFILE.find(content) {
145 Some(m) => m,
146 None => return String::new(),
147 };
148
149 let start_pos = start_match.end();
150 let content_from_start = &content[start_pos..];
151
152 let mut depth = 1;
154 let mut end_pos = 0;
155 let chars: Vec<char> = content_from_start.chars().collect();
156
157 for (i, &ch) in chars.iter().enumerate() {
158 match ch {
159 '(' => depth += 1,
160 ')' => {
161 depth -= 1;
162 if depth == 0 {
163 end_pos = i;
164 break;
165 }
166 }
167 _ => {}
168 }
169 }
170
171 if end_pos > 0 {
172 content_from_start[..end_pos].to_string()
173 } else {
174 String::new()
175 }
176}
177
178fn parse_hash_fields(content: &str) -> HashMap<String, String> {
179 let mut fields = HashMap::new();
180
181 for cap in RE_SIMPLE_KV.captures_iter(content) {
182 let key = cap
183 .get(1)
184 .expect("group 1 always exists")
185 .as_str()
186 .to_string();
187 let value = cap
188 .get(2)
189 .or_else(|| cap.get(3))
190 .or_else(|| cap.get(4))
191 .or_else(|| cap.get(5))
192 .map(|m| m.as_str().to_string());
193
194 if let Some(v) = value {
195 fields.insert(key, v);
196 }
197 }
198
199 parse_hash_dependencies(content, &mut fields);
201
202 parse_author_array(content, &mut fields);
204
205 fields
206}
207
208fn parse_hash_dependencies(content: &str, fields: &mut HashMap<String, String>) {
209 for cap in RE_HASH_BLOCK.captures_iter(content) {
210 let key = cap.get(1).expect("group 1 always exists").as_str();
211 let hash_content = cap.get(2).expect("group 2 always exists").as_str();
212
213 if matches!(
216 key,
217 "PREREQ_PM" | "BUILD_REQUIRES" | "TEST_REQUIRES" | "CONFIGURE_REQUIRES"
218 ) {
219 fields.insert(format!("_HASH_{}", key), hash_content.to_string());
220 }
221 }
222}
223
224fn parse_author_array(content: &str, fields: &mut HashMap<String, String>) {
225 if let Some(cap) = RE_AUTHOR_ARRAY.captures(content) {
226 let array_content = cap.get(1).expect("group 1 always exists").as_str();
227
228 let authors: Vec<String> = RE_QUOTED_STRING
229 .captures_iter(array_content)
230 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
231 .collect();
232
233 if !authors.is_empty() {
234 fields.insert("_ARRAY_AUTHOR".to_string(), authors.join("||"));
236 }
237 }
238}
239
240fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
241 if let Some(authors_str) = fields.get("_ARRAY_AUTHOR") {
243 return authors_str
244 .split("||")
245 .filter_map(|author_str| {
246 if author_str.trim().is_empty() {
247 return None;
248 }
249 let (name, email) = parse_author_string(author_str);
250 Some(Party {
251 role: Some("author".to_string()),
252 name,
253 email,
254 r#type: Some("person".to_string()),
255 url: None,
256 organization: None,
257 organization_url: None,
258 timezone: None,
259 })
260 })
261 .collect();
262 }
263
264 if let Some(author_str) = fields.get("AUTHOR") {
266 let (name, email) = parse_author_string(author_str);
267 return vec![Party {
268 role: Some("author".to_string()),
269 name,
270 email,
271 r#type: Some("person".to_string()),
272 url: None,
273 organization: None,
274 organization_url: None,
275 timezone: None,
276 }];
277 }
278
279 Vec::new()
280}
281
282fn parse_author_string(s: &str) -> (Option<String>, Option<String>) {
283 if let Some(start) = s.find('<')
285 && let Some(end) = s.find('>')
286 && start < end
287 {
288 let name = s[..start].trim();
289 let email = s[start + 1..end].trim();
290 return (
291 if name.is_empty() {
292 None
293 } else {
294 Some(name.to_string())
295 },
296 if email.is_empty() {
297 None
298 } else {
299 Some(email.to_string())
300 },
301 );
302 }
303 (Some(s.trim().to_string()), None)
305}
306
307fn parse_dependencies(fields: &HashMap<String, String>) -> Vec<Dependency> {
308 let mut dependencies = Vec::new();
309
310 if let Some(hash_content) = fields.get("_HASH_PREREQ_PM") {
312 dependencies.extend(extract_deps_from_hash(hash_content, "runtime", true));
313 }
314
315 if let Some(hash_content) = fields.get("_HASH_BUILD_REQUIRES") {
317 dependencies.extend(extract_deps_from_hash(hash_content, "build", false));
318 }
319
320 if let Some(hash_content) = fields.get("_HASH_TEST_REQUIRES") {
322 dependencies.extend(extract_deps_from_hash(hash_content, "test", false));
323 }
324
325 if let Some(hash_content) = fields.get("_HASH_CONFIGURE_REQUIRES") {
327 dependencies.extend(extract_deps_from_hash(hash_content, "configure", false));
328 }
329
330 dependencies
331}
332
333fn extract_deps_from_hash(hash_content: &str, scope: &str, is_runtime: bool) -> Vec<Dependency> {
334 let mut deps = Vec::new();
335
336 for cap in RE_DEP_PAIR.captures_iter(hash_content) {
337 let module_name = cap.get(1).expect("group 1 always exists").as_str();
338
339 if module_name == "perl" {
341 continue;
342 }
343
344 let version = cap
345 .get(2)
346 .or_else(|| cap.get(3))
347 .or_else(|| cap.get(4))
348 .map(|m| m.as_str());
349
350 let extracted_requirement = match version {
351 Some("0") | Some("") | None => None,
352 Some(v) => Some(v.to_string()),
353 };
354
355 let purl = PackageUrl::new("cpan", module_name)
356 .ok()
357 .map(|p| p.to_string());
358
359 deps.push(Dependency {
360 purl,
361 extracted_requirement,
362 scope: Some(scope.to_string()),
363 is_runtime: Some(is_runtime),
364 is_optional: Some(false),
365 is_pinned: None,
366 is_direct: Some(true),
367 resolved_package: None,
368 extra_data: None,
369 });
370 }
371
372 deps
373}
374
375crate::register_parser!(
376 "CPAN Perl Makefile.PL",
377 &["*/Makefile.PL"],
378 "cpan",
379 "Perl",
380 Some("https://metacpan.org/pod/ExtUtils::MakeMaker"),
381);