Skip to main content

provenant/parsers/
cpan_dist_ini.rs

1//! Parser for CPAN dist.ini files.
2//!
3//! Extracts Perl package metadata from `dist.ini` files used by Dist::Zilla.
4//!
5//! # Supported Formats
6//! - `dist.ini` - CPAN Dist::Zilla configuration
7//!
8//! # Implementation Notes
9//! - Format: INI-style configuration file
10//! - Spec: https://metacpan.org/pod/Dist::Zilla::Tutorial
11//! - Extracts: name, version, author, license, copyright_holder, abstract
12//! - Dependencies from [Prereq] sections (beyond Python which has no parser)
13
14use std::collections::HashMap;
15use std::fs;
16use std::path::Path;
17
18use log::warn;
19use serde_json::json;
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22
23use super::PackageParser;
24
25const PACKAGE_TYPE: PackageType = PackageType::Cpan;
26
27pub struct CpanDistIniParser;
28
29impl PackageParser for CpanDistIniParser {
30    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
31
32    fn is_match(path: &Path) -> bool {
33        path.to_str().is_some_and(|p| p.ends_with("/dist.ini"))
34    }
35
36    fn extract_packages(path: &Path) -> Vec<PackageData> {
37        let content = match fs::read_to_string(path) {
38            Ok(c) => c,
39            Err(e) => {
40                warn!("Failed to read dist.ini file {:?}: {}", path, e);
41                return vec![PackageData {
42                    package_type: Some(PACKAGE_TYPE),
43                    primary_language: Some("Perl".to_string()),
44                    datasource_id: Some(DatasourceId::CpanDistIni),
45                    ..Default::default()
46                }];
47            }
48        };
49
50        vec![parse_dist_ini(&content)]
51    }
52}
53
54pub(crate) fn parse_dist_ini(content: &str) -> PackageData {
55    let (root_fields, sections) = parse_ini_structure(content);
56
57    let name = root_fields.get("name").map(|s| s.replace('-', "::"));
58    let version = root_fields.get("version").cloned();
59    let description = root_fields.get("abstract").cloned();
60    let declared_license_expression = root_fields.get("license").cloned();
61    let copyright_holder = root_fields.get("copyright_holder").cloned();
62
63    let parties = parse_author(&root_fields);
64    let dependencies = parse_dependencies(&sections);
65
66    let mut extra_data = HashMap::new();
67    if let Some(holder) = copyright_holder {
68        extra_data.insert("copyright_holder".to_string(), json!(holder));
69    }
70    if let Some(year) = root_fields.get("copyright_year") {
71        extra_data.insert("copyright_year".to_string(), json!(year));
72    }
73
74    PackageData {
75        package_type: Some(PACKAGE_TYPE),
76        namespace: Some("cpan".to_string()),
77        name,
78        version,
79        description,
80        declared_license_expression,
81        parties,
82        dependencies,
83        extra_data: if extra_data.is_empty() {
84            None
85        } else {
86            Some(extra_data)
87        },
88        datasource_id: Some(DatasourceId::CpanDistIni),
89        primary_language: Some("Perl".to_string()),
90        ..Default::default()
91    }
92}
93
94fn parse_ini_structure(
95    content: &str,
96) -> (
97    HashMap<String, String>,
98    HashMap<String, HashMap<String, String>>,
99) {
100    let mut root_fields = HashMap::new();
101    let mut sections: HashMap<String, HashMap<String, String>> = HashMap::new();
102    let mut current_section: Option<String> = None;
103
104    for line in content.lines() {
105        let line = line.trim();
106
107        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
108            continue;
109        }
110
111        if line.starts_with('[') && line.ends_with(']') {
112            current_section = Some(line[1..line.len() - 1].to_string());
113            continue;
114        }
115
116        if let Some((key, value)) = line.split_once('=') {
117            let key = key.trim().to_string();
118            let value = value.trim().to_string();
119
120            if let Some(section_name) = &current_section {
121                sections
122                    .entry(section_name.clone())
123                    .or_default()
124                    .insert(key, value);
125            } else {
126                root_fields.insert(key, value);
127            }
128        }
129    }
130
131    (root_fields, sections)
132}
133
134fn parse_author(fields: &HashMap<String, String>) -> Vec<Party> {
135    fields
136        .get("author")
137        .map(|author_str| {
138            if let Some((name, email)) = parse_author_string(author_str) {
139                vec![Party {
140                    role: Some("author".to_string()),
141                    name: Some(name),
142                    email: Some(email),
143                    r#type: None,
144                    url: None,
145                    organization: None,
146                    organization_url: None,
147                    timezone: None,
148                }]
149            } else {
150                vec![Party {
151                    role: Some("author".to_string()),
152                    name: Some(author_str.clone()),
153                    r#type: None,
154                    email: None,
155                    url: None,
156                    organization: None,
157                    organization_url: None,
158                    timezone: None,
159                }]
160            }
161        })
162        .unwrap_or_default()
163}
164
165fn parse_author_string(s: &str) -> Option<(String, String)> {
166    if let Some(start) = s.find('<')
167        && let Some(end) = s.find('>')
168    {
169        let name = s[..start].trim().to_string();
170        let email = s[start + 1..end].trim().to_string();
171        return Some((name, email));
172    }
173    None
174}
175
176fn parse_dependencies(sections: &HashMap<String, HashMap<String, String>>) -> Vec<Dependency> {
177    let mut dependencies = Vec::new();
178
179    for (section_name, fields) in sections {
180        let scope = if section_name.starts_with("Prereq") {
181            if section_name.contains("TestRequires") || section_name.contains("Test") {
182                Some("test".to_string())
183            } else if section_name.contains("BuildRequires") || section_name.contains("Build") {
184                Some("build".to_string())
185            } else {
186                Some("runtime".to_string())
187            }
188        } else {
189            continue;
190        };
191
192        for (module_name, version_req) in fields {
193            let purl = format!("pkg:cpan/{}", module_name);
194            let extracted_requirement = if version_req == "0" || version_req.is_empty() {
195                None
196            } else {
197                Some(version_req.clone())
198            };
199
200            dependencies.push(Dependency {
201                purl: Some(purl),
202                scope: scope.clone(),
203                extracted_requirement,
204                is_runtime: Some(scope.as_deref() == Some("runtime")),
205                is_optional: Some(false),
206                is_pinned: None,
207                is_direct: None,
208                resolved_package: None,
209                extra_data: None,
210            });
211        }
212    }
213
214    dependencies
215}
216
217crate::register_parser!(
218    "CPAN Perl dist.ini",
219    &["*/dist.ini"],
220    "cpan",
221    "Perl",
222    Some("https://metacpan.org/pod/Dist::Zilla::Tutorial"),
223);