Skip to main content

provenant/parsers/
rpm_specfile.rs

1//! Parser for RPM .spec files.
2//!
3//! Extracts package metadata from RPM specfiles, which define how RPM packages
4//! are built. This is a beyond-parity implementation - the Python version is
5//! a complete stub with "TODO: implement me!!" comments.
6//!
7//! # Supported Formats
8//! - *.spec (RPM specfiles)
9//!
10//! # Key Features
11//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
12//! - Dependency extraction (BuildRequires, Requires, Provides)
13//! - %description section parsing
14//! - Basic macro expansion (%{name}, %{version}, %{release})
15//! - %define and %global macro definitions
16//! - Conditional macro handling (%{?dist})
17//! - Multi-line dependency lists (comma-separated)
18//! - Scoped Requires (Requires(post), Requires(preun), etc.)
19//!
20//! # Implementation Notes
21//! - Parses only the preamble (before %prep, %build, etc. sections)
22//! - Tags are case-insensitive per RPM spec format
23//! - Simple macro expansion for common patterns
24//! - BuildRequires dependencies have is_runtime=false, scope="build"
25//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
26//! - datasource_id is "rpm_specfile"
27
28use std::collections::HashMap;
29use std::path::Path;
30use std::sync::LazyLock;
31
32use log::warn;
33use packageurl::PackageUrl;
34use regex::Regex;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
37use crate::parsers::utils::{read_file_to_string, split_name_email};
38
39use super::PackageParser;
40
41static RE_CONDITIONAL_MACRO: LazyLock<Regex> =
42    LazyLock::new(|| Regex::new(r"%\{\?[^}]+\}").unwrap());
43
44const PACKAGE_TYPE: PackageType = PackageType::Rpm;
45
46/// Parser for RPM specfiles
47pub struct RpmSpecfileParser;
48
49impl PackageParser for RpmSpecfileParser {
50    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
51
52    fn is_match(path: &Path) -> bool {
53        path.extension()
54            .and_then(|e| e.to_str())
55            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        let content = match read_file_to_string(path) {
60            Ok(c) => c,
61            Err(e) => {
62                warn!("Failed to read RPM specfile {:?}: {}", path, e);
63                return vec![PackageData {
64                    package_type: Some(PACKAGE_TYPE),
65                    datasource_id: Some(DatasourceId::RpmSpecfile),
66                    ..Default::default()
67                }];
68            }
69        };
70
71        vec![parse_specfile(&content)]
72    }
73}
74
75fn parse_specfile(content: &str) -> PackageData {
76    let mut tags: HashMap<String, String> = HashMap::new();
77    let mut macros: HashMap<String, String> = HashMap::new();
78    let mut build_requires: Vec<String> = Vec::new();
79    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
80    let mut provides: Vec<String> = Vec::new();
81    let mut description: Option<String> = None;
82
83    let lines: Vec<&str> = content.lines().collect();
84    let mut i = 0;
85
86    // Parse preamble (everything before % sections)
87    while i < lines.len() {
88        let line = lines[i].trim();
89
90        // Stop at first section marker (%, but not %define/%global)
91        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
92            break;
93        }
94
95        // Skip empty lines and comments
96        if line.is_empty() || line.starts_with('#') {
97            i += 1;
98            continue;
99        }
100
101        // Parse %define and %global macros
102        if let Some(stripped) = line
103            .strip_prefix("%define")
104            .or(line.strip_prefix("%global"))
105        {
106            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
107            if parts.len() == 2 {
108                macros.insert(parts[0].to_string(), parts[1].trim().to_string());
109            }
110            i += 1;
111            continue;
112        }
113
114        // Parse Tag: Value lines
115        if let Some(colon_pos) = line.find(':') {
116            let tag = line[..colon_pos].trim().to_lowercase();
117            let value = line[colon_pos + 1..].trim().to_string();
118
119            match tag.as_str() {
120                "buildrequires" => {
121                    // BuildRequires can be comma-separated
122                    for dep in value.split(',') {
123                        let dep = dep.trim();
124                        if !dep.is_empty() {
125                            build_requires.push(dep.to_string());
126                        }
127                    }
128                }
129                t if t.starts_with("requires") => {
130                    // Parse Requires, Requires(post), Requires(preun), etc.
131                    let scope = if let Some(start) = t.find('(') {
132                        if let Some(end) = t.find(')') {
133                            Some(t[start + 1..end].to_string())
134                        } else {
135                            Some("runtime".to_string())
136                        }
137                    } else {
138                        Some("runtime".to_string())
139                    };
140
141                    for dep in value.split(',') {
142                        let dep = dep.trim();
143                        if !dep.is_empty() {
144                            requires.push((dep.to_string(), scope.clone()));
145                        }
146                    }
147                }
148                "provides" => {
149                    for prov in value.split(',') {
150                        let prov = prov.trim();
151                        if !prov.is_empty() {
152                            provides.push(prov.to_string());
153                        }
154                    }
155                }
156                _ => {
157                    tags.insert(tag, value);
158                }
159            }
160        }
161
162        i += 1;
163    }
164
165    // Now parse %description section if present
166    while i < lines.len() {
167        let line = lines[i].trim();
168
169        if line.starts_with("%description") {
170            i += 1;
171            let mut desc_lines = Vec::new();
172
173            // Collect lines until next % section
174            while i < lines.len() {
175                let desc_line = lines[i];
176                let trimmed = desc_line.trim();
177
178                // Stop at next section
179                if trimmed.starts_with('%') {
180                    break;
181                }
182
183                // Don't include empty lines at start
184                if !desc_lines.is_empty() || !trimmed.is_empty() {
185                    desc_lines.push(desc_line);
186                }
187
188                i += 1;
189            }
190
191            // Trim trailing empty lines
192            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
193                desc_lines.pop();
194            }
195
196            if !desc_lines.is_empty() {
197                description = Some(desc_lines.join("\n"));
198            }
199
200            break;
201        }
202
203        i += 1;
204    }
205
206    // Extract basic metadata from tags
207    let name = tags.get("name").cloned();
208    let version = tags.get("version").cloned();
209    let release = tags.get("release").cloned();
210
211    // Store name and version in macros for expansion
212    if let Some(ref n) = name {
213        macros.insert("name".to_string(), n.clone());
214    }
215    if let Some(ref v) = version {
216        macros.insert("version".to_string(), v.clone());
217    }
218    if let Some(ref r) = release {
219        macros.insert("release".to_string(), r.clone());
220    }
221
222    // Expand macros in all tag values
223    let mut expanded_tags: HashMap<String, String> = HashMap::new();
224    for (tag, value) in tags.iter() {
225        expanded_tags.insert(tag.clone(), expand_macros(value, &macros));
226    }
227
228    // Get expanded values
229    let name = expanded_tags.get("name").cloned();
230    let version = expanded_tags.get("version").cloned();
231    let release = expanded_tags.get("release").cloned();
232    let summary = expanded_tags.get("summary").cloned();
233    let license = expanded_tags.get("license").cloned();
234    let url = expanded_tags.get("url").cloned();
235    let group = expanded_tags.get("group").cloned();
236    let epoch = expanded_tags.get("epoch").cloned();
237    let packager = expanded_tags.get("packager").cloned();
238
239    let download_url = expanded_tags
240        .get("source")
241        .or_else(|| expanded_tags.get("source0"))
242        .cloned();
243
244    // Create parties
245    let mut parties = Vec::new();
246    if let Some(pkg) = packager {
247        let (name_opt, email_opt) = split_name_email(&pkg);
248        parties.push(Party {
249            r#type: None,
250            role: Some("packager".to_string()),
251            name: name_opt,
252            email: email_opt,
253            url: None,
254            organization: None,
255            organization_url: None,
256            timezone: None,
257        });
258    }
259
260    // Create dependencies
261    let mut dependencies = Vec::new();
262
263    for dep_str in build_requires {
264        let dep_name = extract_dep_name(&dep_str);
265        let purl = build_rpm_purl(&dep_name, None);
266
267        dependencies.push(Dependency {
268            purl,
269            extracted_requirement: Some(dep_str),
270            scope: Some("build".to_string()),
271            is_runtime: Some(false),
272            is_optional: Some(false),
273            is_direct: Some(true),
274            is_pinned: None,
275            resolved_package: None,
276            extra_data: None,
277        });
278    }
279
280    for (dep_str, scope) in requires {
281        let dep_name = extract_dep_name(&dep_str);
282        let purl = build_rpm_purl(&dep_name, None);
283
284        dependencies.push(Dependency {
285            purl,
286            extracted_requirement: Some(dep_str),
287            scope,
288            is_runtime: Some(true),
289            is_optional: Some(false),
290            is_direct: Some(true),
291            is_pinned: None,
292            resolved_package: None,
293            extra_data: None,
294        });
295    }
296
297    // Build PURL
298    let purl = name
299        .as_ref()
300        .and_then(|n| build_rpm_purl(n, version.as_deref()));
301
302    // Build extra_data for non-standard fields
303    let mut extra_data = HashMap::new();
304    if let Some(r) = release {
305        extra_data.insert("release".to_string(), serde_json::Value::String(r));
306    }
307    if let Some(e) = epoch {
308        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
309    }
310    if let Some(g) = group {
311        extra_data.insert("group".to_string(), serde_json::Value::String(g));
312    }
313    if !provides.is_empty() {
314        let provides_json: Vec<serde_json::Value> = provides
315            .into_iter()
316            .map(serde_json::Value::String)
317            .collect();
318        extra_data.insert(
319            "provides".to_string(),
320            serde_json::Value::Array(provides_json),
321        );
322    }
323
324    let extra_data_opt = if extra_data.is_empty() {
325        None
326    } else {
327        Some(extra_data)
328    };
329
330    // Use %description if available, otherwise use Summary
331    let description_text = description.or(summary);
332
333    PackageData {
334        datasource_id: Some(DatasourceId::RpmSpecfile),
335        package_type: Some(PACKAGE_TYPE),
336        namespace: None, // RPM namespace is optional
337        name,
338        version,
339        description: description_text,
340        homepage_url: url,
341        download_url,
342        extracted_license_statement: license,
343        parties,
344        dependencies,
345        purl,
346        extra_data: extra_data_opt,
347        ..Default::default()
348    }
349}
350
351/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
352fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
353    let mut result = s.to_string();
354
355    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
356
357    // Expand simple macros %{macro}
358    for (key, value) in macros {
359        let pattern = format!("%{{{}}}", key);
360        result = result.replace(&pattern, value);
361    }
362
363    result
364}
365
366/// Extracts the package name from a dependency string (removes version constraints)
367fn extract_dep_name(dep: &str) -> String {
368    // Split on operators: >=, <=, =, >, <
369    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
370
371    parts[0].to_string()
372}
373
374/// Builds a package URL for RPM packages
375fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
376    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
377
378    if let Some(ver) = version {
379        purl.with_version(ver).ok()?;
380    }
381
382    Some(purl.to_string())
383}
384
385crate::register_parser!(
386    "RPM specfile",
387    &["**/*.spec"],
388    "rpm",
389    "",
390    Some("https://rpm-software-management.github.io/rpm/manual/spec.html"),
391);