1use std::collections::HashMap;
15use std::path::Path;
16
17use crate::parser_warn as warn;
18use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
19use serde_json::json;
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22
23use super::PackageParser;
24use super::license_normalization::{
25 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
26 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
27};
28
29const PACKAGE_TYPE: PackageType = PackageType::Cpan;
30
31pub struct CpanDistIniParser;
32
33impl PackageParser for CpanDistIniParser {
34 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
35
36 fn is_match(path: &Path) -> bool {
37 path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
38 }
39
40 fn extract_packages(path: &Path) -> Vec<PackageData> {
41 let content = match read_file_to_string(path, None) {
42 Ok(c) => c,
43 Err(e) => {
44 warn!("Failed to read dist.ini file {:?}: {}", path, e);
45 return vec![PackageData {
46 package_type: Some(PACKAGE_TYPE),
47 primary_language: Some("Perl".to_string()),
48 datasource_id: Some(DatasourceId::CpanDistIni),
49 ..Default::default()
50 }];
51 }
52 };
53
54 vec![parse_dist_ini(&content)]
55 }
56}
57
58pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
59 let (root_fields, sections) = parse_ini_structure(content);
60
61 let name = root_fields
62 .get("name")
63 .map(|s| truncate_field(s.replace('-', "::")));
64 let version = root_fields.get("version").cloned().map(truncate_field);
65 let description = root_fields.get("abstract").cloned().map(truncate_field);
66 let extracted_license_statement = root_fields.get("license").cloned().map(truncate_field);
67 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
68 extracted_license_statement
69 .as_deref()
70 .and_then(normalize_cpan_dist_ini_license)
71 .map(|normalized| {
72 build_declared_license_data(
73 normalized,
74 DeclaredLicenseMatchMetadata::single_line(
75 extracted_license_statement.as_deref().unwrap_or_default(),
76 ),
77 )
78 })
79 .unwrap_or_else(empty_declared_license_data);
80 let copyright_holder = root_fields
81 .get("copyright_holder")
82 .cloned()
83 .map(truncate_field);
84
85 let parties = parse_author(&root_fields);
86 let dependencies = parse_dependencies(§ions);
87
88 let mut extra_data = HashMap::new();
89 if let Some(holder) = copyright_holder {
90 extra_data.insert("copyright_holder".to_string(), json!(holder));
91 }
92 if let Some(year) = root_fields.get("copyright_year") {
93 extra_data.insert("copyright_year".to_string(), json!(year));
94 }
95
96 PackageData {
97 package_type: Some(PACKAGE_TYPE),
98 namespace: Some("cpan".to_string()),
99 name,
100 version,
101 description,
102 declared_license_expression,
103 declared_license_expression_spdx,
104 license_detections,
105 extracted_license_statement,
106 parties,
107 dependencies,
108 extra_data: if extra_data.is_empty() {
109 None
110 } else {
111 Some(extra_data)
112 },
113 datasource_id: Some(DatasourceId::CpanDistIni),
114 primary_language: Some("Perl".to_string()),
115 ..Default::default()
116 }
117}
118
119fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
120 match value.trim() {
121 "Perl_5" => Some(NormalizedDeclaredLicense::new(
122 "gpl-1.0-plus OR artistic-perl-1.0",
123 "GPL-1.0-or-later OR Artistic-1.0-Perl",
124 )),
125 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
126 }
127}
128
129fn parse_ini_structure(
130 content: &str,
131) -> (
132 HashMap<String, String>,
133 HashMap<String, HashMap<String, String>>,
134) {
135 let mut root_fields = HashMap::new();
136 let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
137 let mut current_section: Option<String> = None;
138
139 for line in content.lines().take(MAX_ITERATION_COUNT) {
140 let line = line.trim();
141
142 if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
143 continue;
144 }
145
146 if line.starts_with('[') && line.ends_with(']') {
147 current_section = Some(line[1..line.len() - 1].to_string());
148 continue;
149 }
150
151 if let Some((key, value)) = line.split_once('=') {
152 let key = key.trim().to_string();
153 let value = truncate_field(value.trim().to_string());
154
155 if let Some(section_name) = ¤t_section {
156 sections
157 .entry(section_name.clone())
158 .or_default()
159 .insert(key, value);
160 } else {
161 root_fields.insert(key, value);
162 }
163 }
164 }
165
166 (root_fields, sections)
167}
168
169fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
170 fields
171 .get("author")
172 .map(|author_str| {
173 if let Some((name, email)) = parse_author_string(author_str) {
174 vec![Party {
175 role: Some("author".to_string()),
176 name: Some(name),
177 email: Some(email),
178 r#type: None,
179 url: None,
180 organization: None,
181 organization_url: None,
182 timezone: None,
183 }]
184 } else {
185 vec![Party {
186 role: Some("author".to_string()),
187 name: Some(truncate_field(author_str.clone())),
188 r#type: None,
189 email: None,
190 url: None,
191 organization: None,
192 organization_url: None,
193 timezone: None,
194 }]
195 }
196 })
197 .unwrap_or_default()
198}
199
200fn parse_author_string(s: &str) -> Option<(String, String)> {
201 if let Some(start) = s.find('<')
202 && let Some(end) = s.find('>')
203 {
204 let name = truncate_field(s[..start].trim().to_string());
205 let email = truncate_field(s[start + 1..end].trim().to_string());
206 return Some((name, email));
207 }
208 None
209}
210
211fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
212 let mut dependencies = Vec::new();
213
214 let mut sorted_sections: Vec<_> = sections.iter().collect();
215 sorted_sections.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
216
217 for (section_name, fields) in sorted_sections.iter().take(MAX_ITERATION_COUNT) {
218 let Some(scope) = classify_prereq_scope(section_name) else {
219 continue;
220 };
221
222 let mut sorted_fields: Vec<_> = fields.iter().collect();
223 sorted_fields.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
224
225 for (module_name, version_req) in sorted_fields.iter().take(MAX_ITERATION_COUNT) {
226 let purl = truncate_field(format!("pkg:cpan/{}", module_name));
227 let extracted_requirement = if version_req.as_str() == "0" || version_req.is_empty() {
228 None
229 } else {
230 Some(truncate_field(version_req.to_string()))
231 };
232
233 dependencies.push(Dependency {
234 purl: Some(purl),
235 scope: Some(scope.clone()),
236 extracted_requirement,
237 is_runtime: Some(scope == "runtime"),
238 is_optional: Some(false),
239 is_pinned: None,
240 is_direct: None,
241 resolved_package: None,
242 extra_data: None,
243 });
244 }
245 }
246
247 dependencies
248}
249
250fn classify_prereq_scope(section_name: &str) -> Option<String> {
251 if !section_name.starts_with("Prereq") {
252 return None;
253 }
254
255 if section_name.contains("TestRequires") || section_name.contains("Test") {
256 Some("test".to_string())
257 } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
258 Some("build".to_string())
259 } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
260 Some("configure".to_string())
261 } else {
262 Some("runtime".to_string())
263 }
264}
265
266crate::register_parser!(
267 "CPAN Perl dist.ini",
268 &["*/dist.ini"],
269 "cpan",
270 "Perl",
271 Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
272);