1use std::collections::HashMap;
18use std::path::Path;
19
20use crate::parser_warn as warn;
21use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
22use serde_json::json;
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25
26use super::PackageParser;
27use super::license_normalization::{
28 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
29 empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
30};
31
32const PACKAGE_TYPE: PackageType = PackageType::Cpan;
33
34pub struct CpanDistIniParser;
35
36impl PackageParser for CpanDistIniParser {
37 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
38
39 fn is_match(path: &Path) -> bool {
40 path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
41 }
42
43 fn extract_packages(path: &Path) -> Vec<PackageData> {
44 let content = match read_file_to_string(path, None) {
45 Ok(c) => c,
46 Err(e) => {
47 warn!("Failed to read dist.ini file {:?}: {}", path, e);
48 return vec![PackageData {
49 package_type: Some(PACKAGE_TYPE),
50 primary_language: Some("Perl".to_string()),
51 datasource_id: Some(DatasourceId::CpanDistIni),
52 ..Default::default()
53 }];
54 }
55 };
56
57 vec![parse_dist_ini(&content)]
58 }
59}
60
61pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
62 let (root_fields, sections) = parse_ini_structure(content);
63
64 let name = root_fields
65 .get("name")
66 .map(|s| truncate_field(s.replace('-', "::")));
67 let version = root_fields.get("version").cloned().map(truncate_field);
68 let description = root_fields.get("abstract").cloned().map(truncate_field);
69 let extracted_license_statement = root_fields.get("license").cloned().map(truncate_field);
70 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
71 extracted_license_statement
72 .as_deref()
73 .and_then(normalize_cpan_dist_ini_license)
74 .map(|normalized| {
75 build_declared_license_data(
76 normalized,
77 DeclaredLicenseMatchMetadata::single_line(
78 extracted_license_statement.as_deref().unwrap_or_default(),
79 ),
80 )
81 })
82 .unwrap_or_else(empty_declared_license_data);
83 let copyright_holder = root_fields
84 .get("copyright_holder")
85 .cloned()
86 .map(truncate_field);
87
88 let parties = parse_author(&root_fields);
89 let dependencies = parse_dependencies(§ions);
90
91 let mut extra_data = HashMap::new();
92 if let Some(holder) = copyright_holder {
93 extra_data.insert("copyright_holder".to_string(), json!(holder));
94 }
95 if let Some(year) = root_fields.get("copyright_year") {
96 extra_data.insert("copyright_year".to_string(), json!(year));
97 }
98
99 PackageData {
100 package_type: Some(PACKAGE_TYPE),
101 namespace: Some("cpan".to_string()),
102 name,
103 version,
104 description,
105 declared_license_expression,
106 declared_license_expression_spdx,
107 license_detections,
108 extracted_license_statement,
109 parties,
110 dependencies,
111 extra_data: if extra_data.is_empty() {
112 None
113 } else {
114 Some(extra_data)
115 },
116 datasource_id: Some(DatasourceId::CpanDistIni),
117 primary_language: Some("Perl".to_string()),
118 ..Default::default()
119 }
120}
121
122fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
123 match value.trim() {
124 "Perl_5" => Some(NormalizedDeclaredLicense::new(
125 "gpl-1.0-plus OR artistic-perl-1.0",
126 "GPL-1.0-or-later OR Artistic-1.0-Perl",
127 )),
128 other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
129 }
130}
131
132fn parse_ini_structure(
133 content: &str,
134) -> (
135 HashMap<String, String>,
136 HashMap<String, HashMap<String, String>>,
137) {
138 let mut root_fields = HashMap::new();
139 let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
140 let mut current_section: Option<String> = None;
141
142 for line in content.lines().take(MAX_ITERATION_COUNT) {
143 let line = line.trim();
144
145 if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
146 continue;
147 }
148
149 if line.starts_with('[') && line.ends_with(']') {
150 current_section = Some(line[1..line.len() - 1].to_string());
151 continue;
152 }
153
154 if let Some((key, value)) = line.split_once('=') {
155 let key = key.trim().to_string();
156 let value = truncate_field(value.trim().to_string());
157
158 if let Some(section_name) = ¤t_section {
159 sections
160 .entry(section_name.clone())
161 .or_default()
162 .insert(key, value);
163 } else {
164 root_fields.insert(key, value);
165 }
166 }
167 }
168
169 (root_fields, sections)
170}
171
172fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
173 fields
174 .get("author")
175 .map(|author_str| {
176 if let Some((name, email)) = parse_author_string(author_str) {
177 vec![Party {
178 role: Some("author".to_string()),
179 name: Some(name),
180 email: Some(email),
181 r#type: None,
182 url: None,
183 organization: None,
184 organization_url: None,
185 timezone: None,
186 }]
187 } else {
188 vec![Party {
189 role: Some("author".to_string()),
190 name: Some(truncate_field(author_str.clone())),
191 r#type: None,
192 email: None,
193 url: None,
194 organization: None,
195 organization_url: None,
196 timezone: None,
197 }]
198 }
199 })
200 .unwrap_or_default()
201}
202
203fn parse_author_string(s: &str) -> Option<(String, String)> {
204 if let Some(start) = s.find('<')
205 && let Some(end) = s.find('>')
206 {
207 let name = truncate_field(s[..start].trim().to_string());
208 let email = truncate_field(s[start + 1..end].trim().to_string());
209 return Some((name, email));
210 }
211 None
212}
213
214fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
215 let mut dependencies = Vec::new();
216
217 let mut sorted_sections: Vec<_> = sections.iter().collect();
218 sorted_sections.sort_by_key(|(left_name, _)| *left_name);
219
220 for (section_name, fields) in sorted_sections.iter().take(MAX_ITERATION_COUNT) {
221 let Some(scope) = classify_prereq_scope(section_name) else {
222 continue;
223 };
224
225 let mut sorted_fields: Vec<_> = fields.iter().collect();
226 sorted_fields.sort_by_key(|(left_name, _)| *left_name);
227
228 for (module_name, version_req) in sorted_fields.iter().take(MAX_ITERATION_COUNT) {
229 let purl = truncate_field(format!("pkg:cpan/{}", module_name));
230 let extracted_requirement = if version_req.as_str() == "0" || version_req.is_empty() {
231 None
232 } else {
233 Some(truncate_field(version_req.to_string()))
234 };
235
236 dependencies.push(Dependency {
237 purl: Some(purl),
238 scope: Some(scope.clone()),
239 extracted_requirement,
240 is_runtime: Some(scope == "runtime"),
241 is_optional: Some(false),
242 is_pinned: None,
243 is_direct: None,
244 resolved_package: None,
245 extra_data: None,
246 });
247 }
248 }
249
250 dependencies
251}
252
253fn classify_prereq_scope(section_name: &str) -> Option<String> {
254 if !section_name.starts_with("Prereq") {
255 return None;
256 }
257
258 if section_name.contains("TestRequires") || section_name.contains("Test") {
259 Some("test".to_string())
260 } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
261 Some("build".to_string())
262 } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
263 Some("configure".to_string())
264 } else {
265 Some("runtime".to_string())
266 }
267}
268
269crate::register_parser!(
270 "CPAN Perl dist.ini",
271 &["*/dist.ini"],
272 "cpan",
273 "Perl",
274 Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
275);