Skip to main content

provenant/parsers/
cpan_dist_ini.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for CPAN dist.ini files.
5//!
6//! Extracts Perl package metadata from `dist.ini` files used by Dist::Zilla.
7//!
8//! # Supported Formats
9//! - `dist.ini` - CPAN Dist::Zilla configuration
10//!
11//! # Implementation Notes
12//! - Format: INI-style configuration file
13//! - Spec: https://metacpan.org/pod/Dist::Zilla::Tutorial
14//! - Extracts: name, version, author, license, copyright_holder, abstract
15//! - Dependencies from [Prereq] sections (beyond Python which has no parser)
16
17use std::collections::HashMap;
18use std::path::Path;
19
20use crate::parser_warn as warn;
21use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
22use serde_json::json;
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25
26use super::PackageParser;
27use super::license_normalization::{
28    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
29    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_expression,
30};
31
32const PACKAGE_TYPE: PackageType = PackageType::Cpan;
33
34pub struct CpanDistIniParser;
35
36impl PackageParser for CpanDistIniParser {
37    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
38
39    fn is_match(path: &Path) -> bool {
40        path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
41    }
42
43    fn extract_packages(path: &Path) -> Vec<PackageData> {
44        let content = match read_file_to_string(path, None) {
45            Ok(c) => c,
46            Err(e) => {
47                warn!("Failed to read dist.ini file {:?}: {}", path, e);
48                return vec![PackageData {
49                    package_type: Some(PACKAGE_TYPE),
50                    primary_language: Some("Perl".to_string()),
51                    datasource_id: Some(DatasourceId::CpanDistIni),
52                    ..Default::default()
53                }];
54            }
55        };
56
57        vec![parse_dist_ini(&content)]
58    }
59
60    fn metadata() -> Vec<super::metadata::ParserMetadata> {
61        vec![super::metadata::ParserMetadata {
62            description: "CPAN Perl dist.ini",
63            file_patterns: &["*/dist.ini"],
64            package_type: "cpan",
65            primary_language: "Perl",
66            documentation_url: Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
67        }]
68    }
69}
70
71pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
72    let (root_fields, sections) = parse_ini_structure(content);
73
74    let name = root_fields
75        .get("name")
76        .map(|s| truncate_field(s.replace('-', "::")));
77    let version = root_fields.get("version").cloned().map(truncate_field);
78    let description = root_fields.get("abstract").cloned().map(truncate_field);
79    let extracted_license_statement = root_fields.get("license").cloned().map(truncate_field);
80    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
81        extracted_license_statement
82            .as_deref()
83            .and_then(normalize_cpan_dist_ini_license)
84            .map(|normalized| {
85                build_declared_license_data(
86                    normalized,
87                    DeclaredLicenseMatchMetadata::single_line(
88                        extracted_license_statement.as_deref().unwrap_or_default(),
89                    ),
90                )
91            })
92            .unwrap_or_else(empty_declared_license_data);
93    let copyright_holder = root_fields
94        .get("copyright_holder")
95        .cloned()
96        .map(truncate_field);
97
98    let parties = parse_author(&root_fields);
99    let dependencies = parse_dependencies(&sections);
100
101    let mut extra_data = HashMap::new();
102    if let Some(holder) = copyright_holder {
103        extra_data.insert("copyright_holder".to_string(), json!(holder));
104    }
105    if let Some(year) = root_fields.get("copyright_year") {
106        extra_data.insert("copyright_year".to_string(), json!(year));
107    }
108
109    PackageData {
110        package_type: Some(PACKAGE_TYPE),
111        namespace: Some("cpan".to_string()),
112        name,
113        version,
114        description,
115        declared_license_expression,
116        declared_license_expression_spdx,
117        license_detections,
118        extracted_license_statement,
119        parties,
120        dependencies,
121        extra_data: if extra_data.is_empty() {
122            None
123        } else {
124            Some(extra_data)
125        },
126        datasource_id: Some(DatasourceId::CpanDistIni),
127        primary_language: Some("Perl".to_string()),
128        ..Default::default()
129    }
130}
131
132fn normalize_cpan_dist_ini_license(value: &str) -> Option<NormalizedDeclaredLicense> {
133    match value.trim() {
134        "Perl_5" => Some(NormalizedDeclaredLicense::new(
135            "gpl-1.0-plus OR artistic-perl-1.0",
136            "GPL-1.0-or-later OR Artistic-1.0-Perl",
137        )),
138        other => normalize_spdx_expression(other).or_else(|| normalize_declared_license_key(other)),
139    }
140}
141
142fn parse_ini_structure(
143    content: &str,
144) -> (
145    HashMap<String, String>,
146    HashMap<String, HashMap<String, String>>,
147) {
148    let mut root_fields = HashMap::new();
149    let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
150    let mut current_section: Option<String> = None;
151
152    for line in content.lines().take(MAX_ITERATION_COUNT) {
153        let line = line.trim();
154
155        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
156            continue;
157        }
158
159        if line.starts_with('[') && line.ends_with(']') {
160            current_section = Some(line[1..line.len() - 1].to_string());
161            continue;
162        }
163
164        if let Some((key, value)) = line.split_once('=') {
165            let key = key.trim().to_string();
166            let value = truncate_field(value.trim().to_string());
167
168            if let Some(section_name) = &current_section {
169                sections
170                    .entry(section_name.clone())
171                    .or_default()
172                    .insert(key, value);
173            } else {
174                root_fields.insert(key, value);
175            }
176        }
177    }
178
179    (root_fields, sections)
180}
181
182fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
183    fields
184        .get("author")
185        .map(|author_str| {
186            if let Some((name, email)) = parse_author_string(author_str) {
187                vec![Party {
188                    role: Some("author".to_string()),
189                    name: Some(name),
190                    email: Some(email),
191                    r#type: None,
192                    url: None,
193                    organization: None,
194                    organization_url: None,
195                    timezone: None,
196                }]
197            } else {
198                vec![Party {
199                    role: Some("author".to_string()),
200                    name: Some(truncate_field(author_str.clone())),
201                    r#type: None,
202                    email: None,
203                    url: None,
204                    organization: None,
205                    organization_url: None,
206                    timezone: None,
207                }]
208            }
209        })
210        .unwrap_or_default()
211}
212
213fn parse_author_string(s: &str) -> Option<(String, String)> {
214    if let Some(start) = s.find('<')
215        && let Some(end) = s.find('>')
216    {
217        let name = truncate_field(s[..start].trim().to_string());
218        let email = truncate_field(s[start + 1..end].trim().to_string());
219        return Some((name, email));
220    }
221    None
222}
223
224fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
225    let mut dependencies = Vec::new();
226
227    let mut sorted_sections: Vec<_> = sections.iter().collect();
228    sorted_sections.sort_by_key(|(left_name, _)| *left_name);
229
230    for (section_name, fields) in sorted_sections.iter().take(MAX_ITERATION_COUNT) {
231        let Some(scope) = classify_prereq_scope(section_name) else {
232            continue;
233        };
234
235        let mut sorted_fields: Vec<_> = fields.iter().collect();
236        sorted_fields.sort_by_key(|(left_name, _)| *left_name);
237
238        for (module_name, version_req) in sorted_fields.iter().take(MAX_ITERATION_COUNT) {
239            let purl = truncate_field(format!("pkg:cpan/{}", module_name));
240            let extracted_requirement = if version_req.as_str() == "0" || version_req.is_empty() {
241                None
242            } else {
243                Some(truncate_field(version_req.to_string()))
244            };
245
246            dependencies.push(Dependency {
247                purl: Some(purl),
248                scope: Some(scope.clone()),
249                extracted_requirement,
250                is_runtime: Some(scope == "runtime"),
251                is_optional: Some(false),
252                is_pinned: None,
253                is_direct: None,
254                resolved_package: None,
255                extra_data: None,
256            });
257        }
258    }
259
260    dependencies
261}
262
263fn classify_prereq_scope(section_name: &str) -> Option<String> {
264    if !section_name.starts_with("Prereq") {
265        return None;
266    }
267
268    if section_name.contains("TestRequires") || section_name.contains("Test") {
269        Some("test".to_string())
270    } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
271        Some("build".to_string())
272    } else if section_name.contains("ConfigureRequires") || section_name.contains("Configure") {
273        Some("configure".to_string())
274    } else {
275        Some("runtime".to_string())
276    }
277}