Skip to main content

provenant/parsers/
rpm_specfile.rs

1//! Parser for RPM .spec files.
2//!
3//! Extracts package metadata from RPM specfiles, which define how RPM packages
4//! are built. This is a beyond-parity implementation - the Python version is
5//! a complete stub with "TODO: implement me!!" comments.
6//!
7//! # Supported Formats
8//! - *.spec (RPM specfiles)
9//!
10//! # Key Features
11//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
12//! - Dependency extraction (BuildRequires, Requires, Provides)
13//! - %description section parsing
14//! - Basic macro expansion (%{name}, %{version}, %{release})
15//! - %define and %global macro definitions
16//! - Conditional macro handling (%{?dist})
17//! - Multi-line dependency lists (comma-separated)
18//! - Scoped Requires (Requires(post), Requires(preun), etc.)
19//!
20//! # Implementation Notes
21//! - Parses only the preamble (before %prep, %build, etc. sections)
22//! - Tags are case-insensitive per RPM spec format
23//! - Simple macro expansion for common patterns
24//! - BuildRequires dependencies have is_runtime=false, scope="build"
25//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
26//! - datasource_id is "rpm_specfile"
27
28use std::collections::HashMap;
29use std::path::Path;
30use std::sync::LazyLock;
31
32use crate::parser_warn as warn;
33use packageurl::PackageUrl;
34use regex::Regex;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
37use crate::parsers::utils::{
38    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
39};
40
41use super::PackageParser;
42
43static RE_CONDITIONAL_MACRO: LazyLock<Regex> = LazyLock::new(|| {
44    Regex::new(r"%\{\?[^}]+\}").expect("valid regex: %{?...} pattern is a compile-time constant")
45});
46
47const PACKAGE_TYPE: PackageType = PackageType::Rpm;
48
49/// Parser for RPM specfiles
50pub struct RpmSpecfileParser;
51
52impl PackageParser for RpmSpecfileParser {
53    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
54
55    fn is_match(path: &Path) -> bool {
56        path.extension()
57            .and_then(|e| e.to_str())
58            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
59    }
60
61    fn extract_packages(path: &Path) -> Vec<PackageData> {
62        let content = match read_file_to_string(path, None) {
63            Ok(c) => c,
64            Err(e) => {
65                warn!("Failed to read RPM specfile {:?}: {}", path, e);
66                return vec![PackageData {
67                    package_type: Some(PACKAGE_TYPE),
68                    datasource_id: Some(DatasourceId::RpmSpecfile),
69                    ..Default::default()
70                }];
71            }
72        };
73
74        vec![parse_specfile(&content)]
75    }
76}
77
78fn parse_specfile(content: &str) -> PackageData {
79    let mut tags: HashMap<String, String> = HashMap::new();
80    let mut macros: HashMap<String, String> = HashMap::new();
81    let mut build_requires: Vec<String> = Vec::new();
82    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
83    let mut provides: Vec<String> = Vec::new();
84    let mut description: Option<String> = None;
85
86    let lines: Vec<&str> = content.lines().collect();
87    let mut i = 0;
88    let mut iterations: usize = 0;
89
90    while i < lines.len() {
91        iterations += 1;
92        if iterations > MAX_ITERATION_COUNT {
93            warn!(
94                "RPM specfile preamble iteration limit ({}) exceeded",
95                MAX_ITERATION_COUNT
96            );
97            break;
98        }
99        let line = lines[i].trim();
100
101        // Stop at first section marker (%, but not %define/%global)
102        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
103            if is_conditional_preamble_directive(line) {
104                i += 1;
105                continue;
106            }
107            break;
108        }
109
110        // Skip empty lines and comments
111        if line.is_empty() || line.starts_with('#') {
112            i += 1;
113            continue;
114        }
115
116        // Parse %define and %global macros
117        if let Some(stripped) = line
118            .strip_prefix("%define")
119            .or(line.strip_prefix("%global"))
120        {
121            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
122            if parts.len() == 2 {
123                macros.insert(
124                    parts[0].to_string(),
125                    truncate_field(parts[1].trim().to_string()),
126                );
127            }
128            i += 1;
129            continue;
130        }
131
132        // Parse Tag: Value lines
133        if let Some(colon_pos) = line.find(':') {
134            let tag = line[..colon_pos].trim().to_lowercase();
135            let value = line[colon_pos + 1..].trim().to_string();
136
137            match tag.as_str() {
138                "buildrequires" => {
139                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
140                        let dep = dep.trim();
141                        if !dep.is_empty() {
142                            build_requires.push(dep.to_string());
143                        }
144                    }
145                }
146                t if t.starts_with("requires") => {
147                    // Parse Requires, Requires(post), Requires(preun), etc.
148                    let scope = if let Some(start) = t.find('(') {
149                        if let Some(end) = t.find(')') {
150                            Some(t[start + 1..end].to_string())
151                        } else {
152                            Some("runtime".to_string())
153                        }
154                    } else {
155                        Some("runtime".to_string())
156                    };
157
158                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
159                        let dep = dep.trim();
160                        if !dep.is_empty() {
161                            requires.push((dep.to_string(), scope.clone()));
162                        }
163                    }
164                }
165                "provides" => {
166                    for prov in value.split(',').take(MAX_ITERATION_COUNT) {
167                        let prov = prov.trim();
168                        if !prov.is_empty() {
169                            provides.push(prov.to_string());
170                        }
171                    }
172                }
173                _ => {
174                    tags.insert(tag, value);
175                }
176            }
177        }
178
179        i += 1;
180    }
181
182    // Now parse %description section if present
183    let mut desc_iterations: usize = 0;
184    while i < lines.len() {
185        desc_iterations += 1;
186        if desc_iterations > MAX_ITERATION_COUNT {
187            warn!(
188                "RPM specfile description search iteration limit ({}) exceeded",
189                MAX_ITERATION_COUNT
190            );
191            break;
192        }
193        let line = lines[i].trim();
194
195        if line.starts_with("%description") {
196            i += 1;
197            let mut desc_lines = Vec::new();
198
199            while i < lines.len() {
200                desc_iterations += 1;
201                if desc_iterations > MAX_ITERATION_COUNT {
202                    warn!(
203                        "RPM specfile description iteration limit ({}) exceeded",
204                        MAX_ITERATION_COUNT
205                    );
206                    break;
207                }
208                let desc_line = lines[i];
209                let trimmed = desc_line.trim();
210
211                // Stop at next section
212                if trimmed.starts_with('%') {
213                    break;
214                }
215
216                // Don't include empty lines at start
217                if !desc_lines.is_empty() || !trimmed.is_empty() {
218                    desc_lines.push(desc_line);
219                }
220
221                i += 1;
222            }
223
224            // Trim trailing empty lines
225            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
226                desc_lines.pop();
227            }
228
229            if !desc_lines.is_empty() {
230                description = Some(desc_lines.join("\n"));
231            }
232
233            break;
234        }
235
236        i += 1;
237    }
238
239    // Extract basic metadata from tags
240    let name = tags.get("name").cloned();
241    let version = tags.get("version").cloned();
242    let release = tags.get("release").cloned();
243
244    // Store name and version in macros for expansion
245    if let Some(ref n) = name {
246        macros.insert("name".to_string(), n.clone());
247    }
248    if let Some(ref v) = version {
249        macros.insert("version".to_string(), v.clone());
250    }
251    if let Some(ref r) = release {
252        macros.insert("release".to_string(), r.clone());
253    }
254
255    // Expand macros in all tag values
256    let mut expanded_tags: HashMap<String, String> = HashMap::new();
257    for (tag, value) in tags.iter() {
258        expanded_tags.insert(tag.clone(), truncate_field(expand_macros(value, &macros)));
259    }
260
261    // Get expanded values
262    let name = expanded_tags.get("name").cloned();
263    let version = expanded_tags.get("version").cloned();
264    let release = expanded_tags.get("release").cloned();
265    let summary = expanded_tags.get("summary").cloned();
266    let license = expanded_tags.get("license").cloned();
267    let url = expanded_tags.get("url").cloned();
268    let group = expanded_tags.get("group").cloned();
269    let epoch = expanded_tags.get("epoch").cloned();
270    let packager = expanded_tags.get("packager").cloned();
271
272    let download_url = expanded_tags
273        .get("source")
274        .or_else(|| expanded_tags.get("source0"))
275        .cloned()
276        .map(truncate_field);
277
278    // Create parties
279    let mut parties = Vec::new();
280    if let Some(pkg) = packager {
281        let (name_opt, email_opt) = split_name_email(&pkg);
282        parties.push(Party {
283            r#type: None,
284            role: Some("packager".to_string()),
285            name: name_opt,
286            email: email_opt,
287            url: None,
288            organization: None,
289            organization_url: None,
290            timezone: None,
291        });
292    }
293
294    // Create dependencies
295    let mut dependencies = Vec::new();
296
297    for dep_str in build_requires.into_iter().take(MAX_ITERATION_COUNT) {
298        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
299        let dep_name = extract_dep_name(&dep_str);
300        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
301
302        dependencies.push(Dependency {
303            purl,
304            extracted_requirement: Some(dep_str),
305            scope: Some("build".to_string()),
306            is_runtime: Some(false),
307            is_optional: Some(false),
308            is_direct: Some(true),
309            is_pinned: None,
310            resolved_package: None,
311            extra_data: None,
312        });
313    }
314
315    for (dep_str, scope) in requires.into_iter().take(MAX_ITERATION_COUNT) {
316        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
317        let dep_name = extract_dep_name(&dep_str);
318        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
319
320        dependencies.push(Dependency {
321            purl,
322            extracted_requirement: Some(dep_str),
323            scope,
324            is_runtime: Some(true),
325            is_optional: Some(false),
326            is_direct: Some(true),
327            is_pinned: None,
328            resolved_package: None,
329            extra_data: None,
330        });
331    }
332
333    // Build PURL
334    let purl = name
335        .as_ref()
336        .and_then(|n| build_rpm_purl(n, version.as_deref()))
337        .map(truncate_field);
338
339    // Build extra_data for non-standard fields
340    let mut extra_data = HashMap::new();
341    if let Some(r) = release {
342        extra_data.insert("release".to_string(), serde_json::Value::String(r));
343    }
344    if let Some(e) = epoch {
345        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
346    }
347    if let Some(g) = group {
348        extra_data.insert("group".to_string(), serde_json::Value::String(g));
349    }
350    if !provides.is_empty() {
351        let provides_json: Vec<serde_json::Value> = provides
352            .into_iter()
353            .take(MAX_ITERATION_COUNT)
354            .map(|prov| serde_json::Value::String(truncate_field(expand_macros(&prov, &macros))))
355            .collect();
356        extra_data.insert(
357            "provides".to_string(),
358            serde_json::Value::Array(provides_json),
359        );
360    }
361
362    let extra_data_opt = if extra_data.is_empty() {
363        None
364    } else {
365        Some(extra_data)
366    };
367
368    // Use %description if available, otherwise use Summary
369    let description_text = description.map(truncate_field).or(summary);
370
371    PackageData {
372        datasource_id: Some(DatasourceId::RpmSpecfile),
373        package_type: Some(PACKAGE_TYPE),
374        namespace: None, // RPM namespace is optional
375        name,
376        version,
377        description: description_text,
378        homepage_url: url,
379        download_url,
380        extracted_license_statement: license,
381        parties,
382        dependencies,
383        purl,
384        extra_data: extra_data_opt,
385        ..Default::default()
386    }
387}
388
389fn is_conditional_preamble_directive(line: &str) -> bool {
390    [
391        "%if", "%ifarch", "%ifnarch", "%ifos", "%ifnos", "%elif", "%else", "%endif",
392    ]
393    .iter()
394    .any(|directive| line.starts_with(directive))
395}
396
397/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
398fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
399    let mut result = s.to_string();
400
401    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
402
403    // Expand simple macros %{macro}
404    for (key, value) in macros {
405        let pattern = format!("%{{{}}}", key);
406        result = result.replace(&pattern, value);
407    }
408
409    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
410
411    result
412}
413
414/// Extracts the package name from a dependency string (removes version constraints)
415fn extract_dep_name(dep: &str) -> String {
416    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
417
418    truncate_field(parts[0].to_string())
419}
420
421/// Builds a package URL for RPM packages
422fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
423    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
424
425    if let Some(ver) = version {
426        purl.with_version(ver).ok()?;
427    }
428
429    Some(purl.to_string())
430}
431
432crate::register_parser!(
433    "RPM specfile",
434    &["**/*.spec"],
435    "rpm",
436    "",
437    Some("https://rpm-software-management.github.io/rpm/manual/spec.html"),
438);