Skip to main content

provenant/parsers/
cpan_dist_ini.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for CPAN dist.ini files.
5//!
6//! Extracts Perl package metadata from `dist.ini` files used by Dist::Zilla.
7//!
8//! # Supported Formats
9//! - `dist.ini` - CPAN Dist::Zilla configuration
10//!
11//! # Implementation Notes
12//! - Format: INI-style configuration file
13//! - Spec: https://metacpan.org/pod/Dist::Zilla::Tutorial
14//! - Extracts: name, version, author, license, copyright_holder, abstract
15//! - Dependencies from [Prereq] sections (beyond Python which has no parser)
16
17use std::collections::HashMap;
18use std::path::Path;
19
20use crate::parser_warn as warn;
21use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
22use serde_json::json;
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25
26use super::PackageParser;
27use super::license_normalization::{
28    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
29    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
30};
31
32const PACKAGE_TYPE: PackageType = PackageType::Cpan;
33
34pub struct CpanDistIniParser;
35
36impl PackageParser for CpanDistIniParser {
37    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
38
39    fn is_match(path: &Path) -> bool {
40        path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
41    }
42
43    fn extract_packages(path: &Path) -> Vec<PackageData> {
44        let content = match read_file_to_string(path, None) {
45            Ok(c) => c,
46            Err(e) => {
47                warn!("Failed to read dist.ini file {:?}: {}", path, e);
48                return vec![PackageData {
49                    package_type: Some(PACKAGE_TYPE),
50                    primary_language: Some("Perl".to_string()),
51                    datasource_id: Some(DatasourceId::CpanDistIni),
52                    ..Default::default()
53                }];
54            }
55        };
56
57        vec![parse_dist_ini(&content)]
58    }
59}
60
61pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
62    let (root_fields, sections) = parse_ini_structure(content);
63
64    let name = root_fields
65        .get("name")
66        .map(|s| truncate_field(s.replace('-', "::")));
67    let version = root_fields.get("version").cloned().map(truncate_field);
68    let description = root_fields.get("abstract").cloned().map(truncate_field);
69    let extracted_license_statement = root_fields.get("license").cloned().map(truncate_field);
70    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
71        extracted_license_statement
72            .as_deref()
73            .and_then(normalize_cpan_dist_ini_license)
74            .map(|normalized| {
75                build_declared_license_data(
76                    normalized,
77                    DeclaredLicenseMatchMetadata::single_line(
78                        extracted_license_statement.as_deref().unwrap_or_default(),
79                    ),
80                )
81            })
82            .unwrap_or_else(empty_declared_license_data);
83    let copyright_holder = root_fields
84        .get("copyright_holder")
85        .cloned()
86        .map(truncate_field);
87
88    let parties = parse_author(&root_fields);
89    let dependencies = parse_dependencies(&sections);
90
91    let mut extra_data = HashMap::new();
92    if let Some(holder) = copyright_holder {
93        extra_data.insert("copyright_holder".to_string(), json!(holder));
94    }
95    if let Some(year) = root_fields.get("copyright_year") {
96        extra_data.insert("copyright_year".to_string(), json!(year));
97    }
98
99    PackageData {
100        package_type: Some(PACKAGE_TYPE),
101        namespace: Some("cpan".to_string()),
102        name,
103        version,
104        description,
105        declared_license_expression,
106        declared_license_expression_spdx,
107        license_detections,
108        extracted_license_statement,
109        parties,
110        dependencies,
111        extra_data: if extra_data.is_empty() {
112            None
113        } else {
114            Some(extra_data)
115        },
116        datasource_id: Some(DatasourceId::CpanDistIni),
117        primary_language: Some("Perl".to_string()),
118        ..Default::default()
119    }
120}
121
122fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
123    match value.trim() {
124        "Perl_5" => Some(NormalizedDeclaredLicense::new(
125            "gpl-1.0-plus OR artistic-perl-1.0",
126            "GPL-1.0-or-later OR Artistic-1.0-Perl",
127        )),
128        other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
129    }
130}
131
132fn parse_ini_structure(
133    content: &str,
134) -> (
135    HashMap<String, String>,
136    HashMap<String, HashMap<String, String>>,
137) {
138    let mut root_fields = HashMap::new();
139    let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
140    let mut current_section: Option<String> = None;
141
142    for line in content.lines().take(MAX_ITERATION_COUNT) {
143        let line = line.trim();
144
145        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
146            continue;
147        }
148
149        if line.starts_with('[') && line.ends_with(']') {
150            current_section = Some(line[1..line.len() - 1].to_string());
151            continue;
152        }
153
154        if let Some((key, value)) = line.split_once('=') {
155            let key = key.trim().to_string();
156            let value = truncate_field(value.trim().to_string());
157
158            if let Some(section_name) = &current_section {
159                sections
160                    .entry(section_name.clone())
161                    .or_default()
162                    .insert(key, value);
163            } else {
164                root_fields.insert(key, value);
165            }
166        }
167    }
168
169    (root_fields, sections)
170}
171
172fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
173    fields
174        .get("author")
175        .map(|author_str| {
176            if let Some((name, email)) = parse_author_string(author_str) {
177                vec![Party {
178                    role: Some("author".to_string()),
179                    name: Some(name),
180                    email: Some(email),
181                    r#type: None,
182                    url: None,
183                    organization: None,
184                    organization_url: None,
185                    timezone: None,
186                }]
187            } else {
188                vec![Party {
189                    role: Some("author".to_string()),
190                    name: Some(truncate_field(author_str.clone())),
191                    r#type: None,
192                    email: None,
193                    url: None,
194                    organization: None,
195                    organization_url: None,
196                    timezone: None,
197                }]
198            }
199        })
200        .unwrap_or_default()
201}
202
203fn parse_author_string(s: &str) -> Option<(String, String)> {
204    if let Some(start) = s.find('<')
205        && let Some(end) = s.find('>')
206    {
207        let name = truncate_field(s[..start].trim().to_string());
208        let email = truncate_field(s[start + 1..end].trim().to_string());
209        return Some((name, email));
210    }
211    None
212}
213
214fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
215    let mut dependencies = Vec::new();
216
217    let mut sorted_sections: Vec<_> = sections.iter().collect();
218    sorted_sections.sort_by_key(|(left_name, _)| *left_name);
219
220    for (section_name, fields) in sorted_sections.iter().take(MAX_ITERATION_COUNT) {
221        let Some(scope) = classify_prereq_scope(section_name) else {
222            continue;
223        };
224
225        let mut sorted_fields: Vec<_> = fields.iter().collect();
226        sorted_fields.sort_by_key(|(left_name, _)| *left_name);
227
228        for (module_name, version_req) in sorted_fields.iter().take(MAX_ITERATION_COUNT) {
229            let purl = truncate_field(format!("pkg:cpan/{}", module_name));
230            let extracted_requirement = if version_req.as_str() == "0" || version_req.is_empty() {
231                None
232            } else {
233                Some(truncate_field(version_req.to_string()))
234            };
235
236            dependencies.push(Dependency {
237                purl: Some(purl),
238                scope: Some(scope.clone()),
239                extracted_requirement,
240                is_runtime: Some(scope == "runtime"),
241                is_optional: Some(false),
242                is_pinned: None,
243                is_direct: None,
244                resolved_package: None,
245                extra_data: None,
246            });
247        }
248    }
249
250    dependencies
251}
252
253fn classify_prereq_scope(section_name: &str) -> Option<String> {
254    if !section_name.starts_with("Prereq") {
255        return None;
256    }
257
258    if section_name.contains("TestRequires") || section_name.contains("Test") {
259        Some("test".to_string())
260    } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
261        Some("build".to_string())
262    } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
263        Some("configure".to_string())
264    } else {
265        Some("runtime".to_string())
266    }
267}
268
269crate::register_parser!(
270    "CPAN Perl dist.ini",
271    &["*/dist.ini"],
272    "cpan",
273    "Perl",
274    Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
275);