Skip to main content

provenant/parsers/
cpan_dist_ini.rs

1//! Parser for CPAN dist.ini files.
2//!
3//! Extracts Perl package metadata from `dist.ini` files used by Dist::Zilla.
4//!
5//! # Supported Formats
6//! - `dist.ini` - CPAN Dist::Zilla configuration
7//!
8//! # Implementation Notes
9//! - Format: INI-style configuration file
10//! - Spec: https://metacpan.org/pod/Dist::Zilla::Tutorial
11//! - Extracts: name, version, author, license, copyright_holder, abstract
12//! - Dependencies from [Prereq] sections (beyond Python which has no parser)
13
14use std::collections::HashMap;
15use std::fs;
16use std::path::Path;
17
18use log::warn;
19use serde_json::json;
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22
23use super::PackageParser;
24use super::license_normalization::{
25    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
26    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
27};
28
29const PACKAGE_TYPE: PackageType = PackageType::Cpan;
30
31pub struct CpanDistIniParser;
32
33impl PackageParser for CpanDistIniParser {
34    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
35
36    fn is_match(path: &Path) -> bool {
37        path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
38    }
39
40    fn extract_packages(path: &Path) -> Vec<PackageData> {
41        let content = match fs::read_to_string(path) {
42            Ok(c) => c,
43            Err(e) => {
44                warn!("Failed to read dist.ini file {:?}: {}", path, e);
45                return vec![PackageData {
46                    package_type: Some(PACKAGE_TYPE),
47                    primary_language: Some("Perl".to_string()),
48                    datasource_id: Some(DatasourceId::CpanDistIni),
49                    ..Default::default()
50                }];
51            }
52        };
53
54        vec![parse_dist_ini(&content)]
55    }
56}
57
58pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
59    let (root_fields, sections) = parse_ini_structure(content);
60
61    let name = root_fields.get("name").map(|s| s.replace('-', "::"));
62    let version = root_fields.get("version").cloned();
63    let description = root_fields.get("abstract").cloned();
64    let extracted_license_statement = root_fields.get("license").cloned();
65    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
66        extracted_license_statement
67            .as_deref()
68            .and_then(normalize_cpan_dist_ini_license)
69            .map(|normalized| {
70                build_declared_license_data(
71                    normalized,
72                    DeclaredLicenseMatchMetadata::single_line(
73                        extracted_license_statement.as_deref().unwrap_or_default(),
74                    ),
75                )
76            })
77            .unwrap_or_else(empty_declared_license_data);
78    let copyright_holder = root_fields.get("copyright_holder").cloned();
79
80    let parties = parse_author(&root_fields);
81    let dependencies = parse_dependencies(&sections);
82
83    let mut extra_data = HashMap::new();
84    if let Some(holder) = copyright_holder {
85        extra_data.insert("copyright_holder".to_string(), json!(holder));
86    }
87    if let Some(year) = root_fields.get("copyright_year") {
88        extra_data.insert("copyright_year".to_string(), json!(year));
89    }
90
91    PackageData {
92        package_type: Some(PACKAGE_TYPE),
93        namespace: Some("cpan".to_string()),
94        name,
95        version,
96        description,
97        declared_license_expression,
98        declared_license_expression_spdx,
99        license_detections,
100        extracted_license_statement,
101        parties,
102        dependencies,
103        extra_data: if extra_data.is_empty() {
104            None
105        } else {
106            Some(extra_data)
107        },
108        datasource_id: Some(DatasourceId::CpanDistIni),
109        primary_language: Some("Perl".to_string()),
110        ..Default::default()
111    }
112}
113
114fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
115    match value.trim() {
116        "Perl_5" => Some(NormalizedDeclaredLicense::new(
117            "gpl-1.0-plus OR artistic-perl-1.0",
118            "GPL-1.0-or-later OR Artistic-1.0-Perl",
119        )),
120        other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
121    }
122}
123
124fn parse_ini_structure(
125    content: &str,
126) -> (
127    HashMap<String, String>,
128    HashMap<String, HashMap<String, String>>,
129) {
130    let mut root_fields = HashMap::new();
131    let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
132    let mut current_section: Option<String> = None;
133
134    for line in content.lines() {
135        let line = line.trim();
136
137        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
138            continue;
139        }
140
141        if line.starts_with('[') && line.ends_with(']') {
142            current_section = Some(line[1..line.len() - 1].to_string());
143            continue;
144        }
145
146        if let Some((key, value)) = line.split_once('=') {
147            let key = key.trim().to_string();
148            let value = value.trim().to_string();
149
150            if let Some(section_name) = &current_section {
151                sections
152                    .entry(section_name.clone())
153                    .or_default()
154                    .insert(key, value);
155            } else {
156                root_fields.insert(key, value);
157            }
158        }
159    }
160
161    (root_fields, sections)
162}
163
164fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
165    fields
166        .get("author")
167        .map(|author_str| {
168            if let Some((name, email)) = parse_author_string(author_str) {
169                vec![Party {
170                    role: Some("author".to_string()),
171                    name: Some(name),
172                    email: Some(email),
173                    r#type: None,
174                    url: None,
175                    organization: None,
176                    organization_url: None,
177                    timezone: None,
178                }]
179            } else {
180                vec![Party {
181                    role: Some("author".to_string()),
182                    name: Some(author_str.clone()),
183                    r#type: None,
184                    email: None,
185                    url: None,
186                    organization: None,
187                    organization_url: None,
188                    timezone: None,
189                }]
190            }
191        })
192        .unwrap_or_default()
193}
194
195fn parse_author_string(s: &str) -> Option<(String, String)> {
196    if let Some(start) = s.find('<')
197        && let Some(end) = s.find('>')
198    {
199        let name = s[..start].trim().to_string();
200        let email = s[start + 1..end].trim().to_string();
201        return Some((name, email));
202    }
203    None
204}
205
206fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
207    let mut dependencies = Vec::new();
208
209    let mut sorted_sections: Vec<_> = sections.iter().collect();
210    sorted_sections.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
211
212    for (section_name, fields) in sorted_sections {
213        let Some(scope) = classify_prereq_scope(section_name) else {
214            continue;
215        };
216
217        let mut sorted_fields: Vec<_> = fields.iter().collect();
218        sorted_fields.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
219
220        for (module_name, version_req) in sorted_fields {
221            let purl = format!("pkg:cpan/{}", module_name);
222            let extracted_requirement = if version_req == "0" || version_req.is_empty() {
223                None
224            } else {
225                Some(version_req.clone())
226            };
227
228            dependencies.push(Dependency {
229                purl: Some(purl),
230                scope: Some(scope.clone()),
231                extracted_requirement,
232                is_runtime: Some(scope == "runtime"),
233                is_optional: Some(false),
234                is_pinned: None,
235                is_direct: None,
236                resolved_package: None,
237                extra_data: None,
238            });
239        }
240    }
241
242    dependencies
243}
244
245fn classify_prereq_scope(section_name: &str) -> Option<String> {
246    if !section_name.starts_with("Prereq") {
247        return None;
248    }
249
250    if section_name.contains("TestRequires") || section_name.contains("Test") {
251        Some("test".to_string())
252    } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
253        Some("build".to_string())
254    } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
255        Some("configure".to_string())
256    } else {
257        Some("runtime".to_string())
258    }
259}
260
261crate::register_parser!(
262    "CPAN Perl dist.ini",
263    &["*/dist.ini"],
264    "cpan",
265    "Perl",
266    Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
267);