Skip to main content

provenant/parsers/
cpan_dist_ini.rs

1//! Parser for CPAN dist.ini files.
2//!
3//! Extracts Perl package metadata from `dist.ini` files used by Dist::Zilla.
4//!
5//! # Supported Formats
6//! - `dist.ini` - CPAN Dist::Zilla configuration
7//!
8//! # Implementation Notes
9//! - Format: INI-style configuration file
10//! - Spec: https://metacpan.org/pod/Dist::Zilla::Tutorial
11//! - Extracts: name, version, author, license, copyright_holder, abstract
12//! - Dependencies from [Prereq] sections (beyond Python which has no parser)
13
14use std::collections::HashMap;
15use std::path::Path;
16
17use crate::parser_warn as warn;
18use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
19use serde_json::json;
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22
23use super::PackageParser;
24use super::license_normalization::{
25    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
26    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
27};
28
29const PACKAGE_TYPE: PackageType = PackageType::Cpan;
30
31pub struct CpanDistIniParser;
32
33impl PackageParser for CpanDistIniParser {
34    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
35
36    fn is_match(path: &Path) -> bool {
37        path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
38    }
39
40    fn extract_packages(path: &Path) -> Vec<PackageData> {
41        let content = match read_file_to_string(path, None) {
42            Ok(c) => c,
43            Err(e) => {
44                warn!("Failed to read dist.ini file {:?}: {}", path, e);
45                return vec![PackageData {
46                    package_type: Some(PACKAGE_TYPE),
47                    primary_language: Some("Perl".to_string()),
48                    datasource_id: Some(DatasourceId::CpanDistIni),
49                    ..Default::default()
50                }];
51            }
52        };
53
54        vec![parse_dist_ini(&content)]
55    }
56}
57
58pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
59    let (root_fields, sections) = parse_ini_structure(content);
60
61    let name = root_fields
62        .get("name")
63        .map(|s| truncate_field(s.replace('-', "::")));
64    let version = root_fields.get("version").cloned().map(truncate_field);
65    let description = root_fields.get("abstract").cloned().map(truncate_field);
66    let extracted_license_statement = root_fields.get("license").cloned().map(truncate_field);
67    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
68        extracted_license_statement
69            .as_deref()
70            .and_then(normalize_cpan_dist_ini_license)
71            .map(|normalized| {
72                build_declared_license_data(
73                    normalized,
74                    DeclaredLicenseMatchMetadata::single_line(
75                        extracted_license_statement.as_deref().unwrap_or_default(),
76                    ),
77                )
78            })
79            .unwrap_or_else(empty_declared_license_data);
80    let copyright_holder = root_fields
81        .get("copyright_holder")
82        .cloned()
83        .map(truncate_field);
84
85    let parties = parse_author(&root_fields);
86    let dependencies = parse_dependencies(&sections);
87
88    let mut extra_data = HashMap::new();
89    if let Some(holder) = copyright_holder {
90        extra_data.insert("copyright_holder".to_string(), json!(holder));
91    }
92    if let Some(year) = root_fields.get("copyright_year") {
93        extra_data.insert("copyright_year".to_string(), json!(year));
94    }
95
96    PackageData {
97        package_type: Some(PACKAGE_TYPE),
98        namespace: Some("cpan".to_string()),
99        name,
100        version,
101        description,
102        declared_license_expression,
103        declared_license_expression_spdx,
104        license_detections,
105        extracted_license_statement,
106        parties,
107        dependencies,
108        extra_data: if extra_data.is_empty() {
109            None
110        } else {
111            Some(extra_data)
112        },
113        datasource_id: Some(DatasourceId::CpanDistIni),
114        primary_language: Some("Perl".to_string()),
115        ..Default::default()
116    }
117}
118
119fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
120    match value.trim() {
121        "Perl_5" => Some(NormalizedDeclaredLicense::new(
122            "gpl-1.0-plus OR artistic-perl-1.0",
123            "GPL-1.0-or-later OR Artistic-1.0-Perl",
124        )),
125        other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
126    }
127}
128
129fn parse_ini_structure(
130    content: &str,
131) -> (
132    HashMap<String, String>,
133    HashMap<String, HashMap<String, String>>,
134) {
135    let mut root_fields = HashMap::new();
136    let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
137    let mut current_section: Option<String> = None;
138
139    for line in content.lines().take(MAX_ITERATION_COUNT) {
140        let line = line.trim();
141
142        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
143            continue;
144        }
145
146        if line.starts_with('[') && line.ends_with(']') {
147            current_section = Some(line[1..line.len() - 1].to_string());
148            continue;
149        }
150
151        if let Some((key, value)) = line.split_once('=') {
152            let key = key.trim().to_string();
153            let value = truncate_field(value.trim().to_string());
154
155            if let Some(section_name) = &current_section {
156                sections
157                    .entry(section_name.clone())
158                    .or_default()
159                    .insert(key, value);
160            } else {
161                root_fields.insert(key, value);
162            }
163        }
164    }
165
166    (root_fields, sections)
167}
168
169fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
170    fields
171        .get("author")
172        .map(|author_str| {
173            if let Some((name, email)) = parse_author_string(author_str) {
174                vec![Party {
175                    role: Some("author".to_string()),
176                    name: Some(name),
177                    email: Some(email),
178                    r#type: None,
179                    url: None,
180                    organization: None,
181                    organization_url: None,
182                    timezone: None,
183                }]
184            } else {
185                vec![Party {
186                    role: Some("author".to_string()),
187                    name: Some(truncate_field(author_str.clone())),
188                    r#type: None,
189                    email: None,
190                    url: None,
191                    organization: None,
192                    organization_url: None,
193                    timezone: None,
194                }]
195            }
196        })
197        .unwrap_or_default()
198}
199
200fn parse_author_string(s: &str) -> Option<(String, String)> {
201    if let Some(start) = s.find('<')
202        && let Some(end) = s.find('>')
203    {
204        let name = truncate_field(s[..start].trim().to_string());
205        let email = truncate_field(s[start + 1..end].trim().to_string());
206        return Some((name, email));
207    }
208    None
209}
210
211fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
212    let mut dependencies = Vec::new();
213
214    let mut sorted_sections: Vec<_> = sections.iter().collect();
215    sorted_sections.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
216
217    for (section_name, fields) in sorted_sections.iter().take(MAX_ITERATION_COUNT) {
218        let Some(scope) = classify_prereq_scope(section_name) else {
219            continue;
220        };
221
222        let mut sorted_fields: Vec<_> = fields.iter().collect();
223        sorted_fields.sort_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
224
225        for (module_name, version_req) in sorted_fields.iter().take(MAX_ITERATION_COUNT) {
226            let purl = truncate_field(format!("pkg:cpan/{}", module_name));
227            let extracted_requirement = if version_req.as_str() == "0" || version_req.is_empty() {
228                None
229            } else {
230                Some(truncate_field(version_req.to_string()))
231            };
232
233            dependencies.push(Dependency {
234                purl: Some(purl),
235                scope: Some(scope.clone()),
236                extracted_requirement,
237                is_runtime: Some(scope == "runtime"),
238                is_optional: Some(false),
239                is_pinned: None,
240                is_direct: None,
241                resolved_package: None,
242                extra_data: None,
243            });
244        }
245    }
246
247    dependencies
248}
249
250fn classify_prereq_scope(section_name: &str) -> Option<String> {
251    if !section_name.starts_with("Prereq") {
252        return None;
253    }
254
255    if section_name.contains("TestRequires") || section_name.contains("Test") {
256        Some("test".to_string())
257    } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
258        Some("build".to_string())
259    } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
260        Some("configure".to_string())
261    } else {
262        Some("runtime".to_string())
263    }
264}
265
266crate::register_parser!(
267    "CPAN Perl dist.ini",
268    &["*/dist.ini"],
269    "cpan",
270    "Perl",
271    Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
272);