Skip to main content

provenant/parsers/
rpm_specfile.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM .spec files.
5//!
6//! Extracts package metadata from RPM specfiles, which define how RPM packages
7//! are built. This is a beyond-parity implementation - the Python version is
8//! a complete stub with "TODO: implement me!!" comments.
9//!
10//! # Supported Formats
11//! - *.spec (RPM specfiles)
12//!
13//! # Key Features
14//! - Preamble tag extraction (Name, Version, Release, Summary, License, etc.)
15//! - Dependency extraction (BuildRequires, Requires, Provides)
16//! - %description section parsing
17//! - Basic macro expansion (%{name}, %{version}, %{release})
18//! - %define and %global macro definitions
19//! - Conditional macro handling (%{?dist})
20//! - Multi-line dependency lists (comma-separated)
21//! - Scoped Requires (Requires(post), Requires(preun), etc.)
22//!
23//! # Implementation Notes
24//! - Parses only the preamble (before %prep, %build, etc. sections)
25//! - Tags are case-insensitive per RPM spec format
26//! - Simple macro expansion for common patterns
27//! - BuildRequires dependencies have is_runtime=false, scope="build"
28//! - Runtime Requires dependencies have is_runtime=true, scope="runtime"
29//! - datasource_id is "rpm_specfile"
30
31use std::collections::HashMap;
32use std::path::Path;
33use std::sync::LazyLock;
34
35use crate::parser_warn as warn;
36use packageurl::PackageUrl;
37use regex::Regex;
38
39use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
40use crate::parsers::utils::{
41    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
42};
43
44use super::PackageParser;
45
46static RE_CONDITIONAL_MACRO: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(r"%\{\?[^}]+\}").expect("valid regex: %{?...} pattern is a compile-time constant")
48});
49
50const PACKAGE_TYPE: PackageType = PackageType::Rpm;
51
52/// Parser for RPM specfiles
53pub struct RpmSpecfileParser;
54
55impl PackageParser for RpmSpecfileParser {
56    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
57
58    fn is_match(path: &Path) -> bool {
59        path.extension()
60            .and_then(|e| e.to_str())
61            .is_some_and(|ext| ext.eq_ignore_ascii_case("spec"))
62    }
63
64    fn extract_packages(path: &Path) -> Vec<PackageData> {
65        let content = match read_file_to_string(path, None) {
66            Ok(c) => c,
67            Err(e) => {
68                warn!("Failed to read RPM specfile {:?}: {}", path, e);
69                return vec![PackageData {
70                    package_type: Some(PACKAGE_TYPE),
71                    datasource_id: Some(DatasourceId::RpmSpecfile),
72                    ..Default::default()
73                }];
74            }
75        };
76
77        vec![parse_specfile(&content)]
78    }
79}
80
81fn parse_specfile(content: &str) -> PackageData {
82    let mut tags: HashMap<String, String> = HashMap::new();
83    let mut macros: HashMap<String, String> = HashMap::new();
84    let mut build_requires: Vec<String> = Vec::new();
85    let mut requires: Vec<(String, Option<String>)> = Vec::new(); // (requirement, scope)
86    let mut provides: Vec<String> = Vec::new();
87    let mut description: Option<String> = None;
88
89    let lines: Vec<&str> = content.lines().collect();
90    let mut i = 0;
91    let mut iterations: usize = 0;
92
93    while i < lines.len() {
94        iterations += 1;
95        if iterations > MAX_ITERATION_COUNT {
96            warn!(
97                "RPM specfile preamble iteration limit ({}) exceeded",
98                MAX_ITERATION_COUNT
99            );
100            break;
101        }
102        let line = lines[i].trim();
103
104        // Stop at first section marker (%, but not %define/%global)
105        if line.starts_with('%') && !line.starts_with("%define") && !line.starts_with("%global") {
106            if is_conditional_preamble_directive(line) {
107                i += 1;
108                continue;
109            }
110            break;
111        }
112
113        // Skip empty lines and comments
114        if line.is_empty() || line.starts_with('#') {
115            i += 1;
116            continue;
117        }
118
119        // Parse %define and %global macros
120        if let Some(stripped) = line
121            .strip_prefix("%define")
122            .or(line.strip_prefix("%global"))
123        {
124            let parts: Vec<&str> = stripped.trim().splitn(2, char::is_whitespace).collect();
125            if parts.len() == 2 {
126                macros.insert(
127                    parts[0].to_string(),
128                    truncate_field(parts[1].trim().to_string()),
129                );
130            }
131            i += 1;
132            continue;
133        }
134
135        // Parse Tag: Value lines
136        if let Some(colon_pos) = line.find(':') {
137            let tag = line[..colon_pos].trim().to_lowercase();
138            let value = line[colon_pos + 1..].trim().to_string();
139
140            match tag.as_str() {
141                "buildrequires" => {
142                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
143                        let dep = dep.trim();
144                        if !dep.is_empty() {
145                            build_requires.push(dep.to_string());
146                        }
147                    }
148                }
149                t if t.starts_with("requires") => {
150                    // Parse Requires, Requires(post), Requires(preun), etc.
151                    let scope = if let Some(start) = t.find('(') {
152                        if let Some(end) = t.find(')') {
153                            Some(t[start + 1..end].to_string())
154                        } else {
155                            Some("runtime".to_string())
156                        }
157                    } else {
158                        Some("runtime".to_string())
159                    };
160
161                    for dep in value.split(',').take(MAX_ITERATION_COUNT) {
162                        let dep = dep.trim();
163                        if !dep.is_empty() {
164                            requires.push((dep.to_string(), scope.clone()));
165                        }
166                    }
167                }
168                "provides" => {
169                    for prov in value.split(',').take(MAX_ITERATION_COUNT) {
170                        let prov = prov.trim();
171                        if !prov.is_empty() {
172                            provides.push(prov.to_string());
173                        }
174                    }
175                }
176                _ => {
177                    tags.insert(tag, value);
178                }
179            }
180        }
181
182        i += 1;
183    }
184
185    // Now parse %description section if present
186    let mut desc_iterations: usize = 0;
187    while i < lines.len() {
188        desc_iterations += 1;
189        if desc_iterations > MAX_ITERATION_COUNT {
190            warn!(
191                "RPM specfile description search iteration limit ({}) exceeded",
192                MAX_ITERATION_COUNT
193            );
194            break;
195        }
196        let line = lines[i].trim();
197
198        if line.starts_with("%description") {
199            i += 1;
200            let mut desc_lines = Vec::new();
201
202            while i < lines.len() {
203                desc_iterations += 1;
204                if desc_iterations > MAX_ITERATION_COUNT {
205                    warn!(
206                        "RPM specfile description iteration limit ({}) exceeded",
207                        MAX_ITERATION_COUNT
208                    );
209                    break;
210                }
211                let desc_line = lines[i];
212                let trimmed = desc_line.trim();
213
214                // Stop at next section
215                if trimmed.starts_with('%') {
216                    break;
217                }
218
219                // Don't include empty lines at start
220                if !desc_lines.is_empty() || !trimmed.is_empty() {
221                    desc_lines.push(desc_line);
222                }
223
224                i += 1;
225            }
226
227            // Trim trailing empty lines
228            while desc_lines.last().is_some_and(|l| l.trim().is_empty()) {
229                desc_lines.pop();
230            }
231
232            if !desc_lines.is_empty() {
233                description = Some(desc_lines.join("\n"));
234            }
235
236            break;
237        }
238
239        i += 1;
240    }
241
242    // Extract basic metadata from tags
243    let name = tags.get("name").cloned();
244    let version = tags.get("version").cloned();
245    let release = tags.get("release").cloned();
246
247    // Store name and version in macros for expansion
248    if let Some(ref n) = name {
249        macros.insert("name".to_string(), n.clone());
250    }
251    if let Some(ref v) = version {
252        macros.insert("version".to_string(), v.clone());
253    }
254    if let Some(ref r) = release {
255        macros.insert("release".to_string(), r.clone());
256    }
257
258    // Expand macros in all tag values
259    let mut expanded_tags: HashMap<String, String> = HashMap::new();
260    for (tag, value) in tags.iter() {
261        expanded_tags.insert(tag.clone(), truncate_field(expand_macros(value, &macros)));
262    }
263
264    // Get expanded values
265    let name = expanded_tags.get("name").cloned();
266    let version = expanded_tags.get("version").cloned();
267    let release = expanded_tags.get("release").cloned();
268    let summary = expanded_tags.get("summary").cloned();
269    let license = expanded_tags.get("license").cloned();
270    let url = expanded_tags.get("url").cloned();
271    let group = expanded_tags.get("group").cloned();
272    let epoch = expanded_tags.get("epoch").cloned();
273    let packager = expanded_tags.get("packager").cloned();
274
275    let download_url = expanded_tags
276        .get("source")
277        .or_else(|| expanded_tags.get("source0"))
278        .cloned()
279        .map(truncate_field);
280
281    // Create parties
282    let mut parties = Vec::new();
283    if let Some(pkg) = packager {
284        let (name_opt, email_opt) = split_name_email(&pkg);
285        parties.push(Party {
286            r#type: None,
287            role: Some("packager".to_string()),
288            name: name_opt,
289            email: email_opt,
290            url: None,
291            organization: None,
292            organization_url: None,
293            timezone: None,
294        });
295    }
296
297    // Create dependencies
298    let mut dependencies = Vec::new();
299
300    for dep_str in build_requires.into_iter().take(MAX_ITERATION_COUNT) {
301        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
302        let dep_name = extract_dep_name(&dep_str);
303        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
304
305        dependencies.push(Dependency {
306            purl,
307            extracted_requirement: Some(dep_str),
308            scope: Some("build".to_string()),
309            is_runtime: Some(false),
310            is_optional: Some(false),
311            is_direct: Some(true),
312            is_pinned: None,
313            resolved_package: None,
314            extra_data: None,
315        });
316    }
317
318    for (dep_str, scope) in requires.into_iter().take(MAX_ITERATION_COUNT) {
319        let dep_str = truncate_field(expand_macros(&dep_str, &macros));
320        let dep_name = extract_dep_name(&dep_str);
321        let purl = build_rpm_purl(&dep_name, None).map(truncate_field);
322
323        dependencies.push(Dependency {
324            purl,
325            extracted_requirement: Some(dep_str),
326            scope,
327            is_runtime: Some(true),
328            is_optional: Some(false),
329            is_direct: Some(true),
330            is_pinned: None,
331            resolved_package: None,
332            extra_data: None,
333        });
334    }
335
336    // Build PURL
337    let purl = name
338        .as_ref()
339        .and_then(|n| build_rpm_purl(n, version.as_deref()))
340        .map(truncate_field);
341
342    // Build extra_data for non-standard fields
343    let mut extra_data = HashMap::new();
344    if let Some(r) = release {
345        extra_data.insert("release".to_string(), serde_json::Value::String(r));
346    }
347    if let Some(e) = epoch {
348        extra_data.insert("epoch".to_string(), serde_json::Value::String(e));
349    }
350    if let Some(g) = group {
351        extra_data.insert("group".to_string(), serde_json::Value::String(g));
352    }
353    if !provides.is_empty() {
354        let provides_json: Vec<serde_json::Value> = provides
355            .into_iter()
356            .take(MAX_ITERATION_COUNT)
357            .map(|prov| serde_json::Value::String(truncate_field(expand_macros(&prov, &macros))))
358            .collect();
359        extra_data.insert(
360            "provides".to_string(),
361            serde_json::Value::Array(provides_json),
362        );
363    }
364
365    let extra_data_opt = if extra_data.is_empty() {
366        None
367    } else {
368        Some(extra_data)
369    };
370
371    // Use %description if available, otherwise use Summary
372    let description_text = description.map(truncate_field).or(summary);
373
374    PackageData {
375        datasource_id: Some(DatasourceId::RpmSpecfile),
376        package_type: Some(PACKAGE_TYPE),
377        namespace: None, // RPM namespace is optional
378        name,
379        version,
380        description: description_text,
381        homepage_url: url,
382        download_url,
383        extracted_license_statement: license,
384        parties,
385        dependencies,
386        purl,
387        extra_data: extra_data_opt,
388        ..Default::default()
389    }
390}
391
392fn is_conditional_preamble_directive(line: &str) -> bool {
393    [
394        "%if", "%ifarch", "%ifnarch", "%ifos", "%ifnos", "%elif", "%else", "%endif",
395    ]
396    .iter()
397    .any(|directive| line.starts_with(directive))
398}
399
400/// Expands simple macros in a string (%{name}, %{version}, %{release}, %{?dist})
401fn expand_macros(s: &str, macros: &HashMap<String, String>) -> String {
402    let mut result = s.to_string();
403
404    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
405
406    // Expand simple macros %{macro}
407    for (key, value) in macros {
408        let pattern = format!("%{{{}}}", key);
409        result = result.replace(&pattern, value);
410    }
411
412    result = RE_CONDITIONAL_MACRO.replace_all(&result, "").to_string();
413
414    result
415}
416
417/// Extracts the package name from a dependency string (removes version constraints)
418fn extract_dep_name(dep: &str) -> String {
419    let parts: Vec<&str> = dep.split(&['>', '<', '='][..]).map(|s| s.trim()).collect();
420
421    truncate_field(parts[0].to_string())
422}
423
424/// Builds a package URL for RPM packages
425fn build_rpm_purl(name: &str, version: Option<&str>) -> Option<String> {
426    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
427
428    if let Some(ver) = version {
429        purl.with_version(ver).ok()?;
430    }
431
432    Some(purl.to_string())
433}
434
435crate::register_parser!(
436    "RPM specfile",
437    &["**/*.spec"],
438    "rpm",
439    "",
440    Some("https://rpm-software-management.github.io/rpm/manual/spec.html"),
441);