Skip to main content

provenant/parsers/
arch.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10
11use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
12use crate::parsers::utils::{
13    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
14};
15
16use super::PackageParser;
17use super::metadata::ParserMetadata;
18
19const PACKAGE_TYPE: PackageType = PackageType::Alpm;
20const PACKAGE_NAMESPACE: &str = "arch";
21
22pub struct ArchSrcinfoParser;
23pub struct ArchPkginfoParser;
24
25impl PackageParser for ArchSrcinfoParser {
26    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
27
28    fn metadata() -> Vec<ParserMetadata> {
29        vec![ParserMetadata {
30            description: "Arch Linux package metadata (.SRCINFO, .AURINFO, .PKGINFO)",
31            file_patterns: &["**/.SRCINFO", "**/.AURINFO", "**/.PKGINFO"],
32            package_type: "alpm",
33            primary_language: "",
34            documentation_url: Some("https://wiki.archlinux.org/title/.SRCINFO"),
35        }]
36    }
37
38    fn is_match(path: &Path) -> bool {
39        path.file_name()
40            .and_then(|name| name.to_str())
41            .is_some_and(|name| matches!(name, ".SRCINFO" | ".AURINFO"))
42    }
43
44    fn extract_packages(path: &Path) -> Vec<PackageData> {
45        let content = match read_file_to_string(path, None) {
46            Ok(content) => content,
47            Err(e) => {
48                warn!("Failed to read Arch source metadata {:?}: {}", path, e);
49                return vec![default_package_data(srcinfo_datasource_id(path))];
50            }
51        };
52
53        parse_srcinfo_like(&content, srcinfo_datasource_id(path))
54    }
55}
56
57impl PackageParser for ArchPkginfoParser {
58    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
59
60    fn is_match(path: &Path) -> bool {
61        path.file_name().and_then(|name| name.to_str()) == Some(".PKGINFO")
62    }
63
64    fn extract_packages(path: &Path) -> Vec<PackageData> {
65        let content = match read_file_to_string(path, None) {
66            Ok(content) => content,
67            Err(e) => {
68                warn!("Failed to read Arch .PKGINFO {:?}: {}", path, e);
69                return vec![default_package_data(DatasourceId::ArchPkginfo)];
70            }
71        };
72
73        vec![parse_pkginfo(&content)]
74    }
75}
76
77fn default_package_data(datasource_id: DatasourceId) -> PackageData {
78    PackageData {
79        package_type: Some(PACKAGE_TYPE),
80        namespace: Some(PACKAGE_NAMESPACE.to_string()),
81        datasource_id: Some(datasource_id),
82        ..Default::default()
83    }
84}
85
86fn srcinfo_datasource_id(path: &Path) -> DatasourceId {
87    match path.file_name().and_then(|name| name.to_str()) {
88        Some(".AURINFO") => DatasourceId::ArchAurinfo,
89        _ => DatasourceId::ArchSrcinfo,
90    }
91}
92
93type MultiMap = HashMap<String, Vec<String>>;
94
95fn parse_key_value_lines(content: &str) -> MultiMap {
96    let mut fields: MultiMap = HashMap::new();
97
98    for line in content.lines().take(MAX_ITERATION_COUNT) {
99        let line = line.trim();
100        if line.is_empty() || line.starts_with('#') {
101            continue;
102        }
103
104        if let Some((key, value)) = line.split_once('=') {
105            let key = key.trim();
106            let value = value.trim();
107            if !key.is_empty() {
108                fields
109                    .entry(key.to_string())
110                    .or_default()
111                    .push(truncate_field(value.to_string()));
112            }
113        }
114    }
115
116    fields
117}
118
119fn parse_srcinfo_like(content: &str, datasource_id: DatasourceId) -> Vec<PackageData> {
120    let mut pkgbase: MultiMap = HashMap::new();
121    let mut packages: Vec<MultiMap> = Vec::new();
122    let mut current_is_pkgbase = true;
123
124    for line in content.lines().take(MAX_ITERATION_COUNT) {
125        let line = line.trim();
126        if line.is_empty() || line.starts_with('#') {
127            continue;
128        }
129
130        let Some((key, value)) = line.split_once('=') else {
131            continue;
132        };
133
134        let key = key.trim();
135        let value = value.trim();
136
137        if key == "pkgbase" {
138            pkgbase
139                .entry(key.to_string())
140                .or_default()
141                .push(truncate_field(value.to_string()));
142            current_is_pkgbase = true;
143            continue;
144        }
145
146        if key == "pkgname" {
147            packages.push(HashMap::from([(
148                key.to_string(),
149                vec![truncate_field(value.to_string())],
150            )]));
151            current_is_pkgbase = false;
152            continue;
153        }
154
155        let target = if current_is_pkgbase {
156            &mut pkgbase
157        } else {
158            packages.last_mut().unwrap_or(&mut pkgbase)
159        };
160
161        target
162            .entry(key.to_string())
163            .or_default()
164            .push(truncate_field(value.to_string()));
165    }
166
167    if packages.is_empty() {
168        packages.push(HashMap::new());
169    }
170
171    let results: Vec<_> = packages
172        .into_iter()
173        .filter_map(|package_section| {
174            let merged = merge_srcinfo_sections(&pkgbase, &package_section);
175            let pkg = build_package_from_arch_metadata(&merged, datasource_id, true);
176            pkg.name.is_some().then_some(pkg)
177        })
178        .collect();
179
180    if results.is_empty() {
181        vec![default_package_data(datasource_id)]
182    } else {
183        results
184    }
185}
186
187fn merge_srcinfo_sections(pkgbase: &MultiMap, package: &MultiMap) -> MultiMap {
188    let mut merged = pkgbase.clone();
189
190    for (key, values) in package {
191        if should_append_srcinfo_values(key) {
192            merged
193                .entry(key.clone())
194                .or_default()
195                .extend(values.clone());
196        } else {
197            merged.insert(key.clone(), values.clone());
198        }
199    }
200
201    if !merged.contains_key("pkgname")
202        && let Some(pkgbase_name) = pkgbase.get("pkgbase").and_then(|vals| vals.first())
203    {
204        merged.insert("pkgname".to_string(), vec![pkgbase_name.clone()]);
205    }
206
207    merged
208}
209
210fn should_append_srcinfo_values(key: &str) -> bool {
211    matches!(
212        key,
213        "arch"
214            | "groups"
215            | "license"
216            | "noextract"
217            | "options"
218            | "backup"
219            | "validpgpkeys"
220            | "source"
221            | "depends"
222            | "makedepends"
223            | "checkdepends"
224            | "optdepends"
225            | "provides"
226            | "conflicts"
227            | "replaces"
228            | "md5sums"
229            | "sha1sums"
230            | "sha224sums"
231            | "sha256sums"
232            | "sha384sums"
233            | "sha512sums"
234            | "b2sums"
235            | "cksums"
236    ) || is_arch_variant_key(key)
237}
238
239fn is_arch_variant_key(key: &str) -> bool {
240    arch_variant_base(key).is_some()
241}
242
243fn arch_variant_base(key: &str) -> Option<&'static str> {
244    [
245        "source",
246        "depends",
247        "makedepends",
248        "checkdepends",
249        "optdepends",
250        "provides",
251        "conflicts",
252        "replaces",
253        "md5sums",
254        "sha1sums",
255        "sha224sums",
256        "sha256sums",
257        "sha384sums",
258        "sha512sums",
259        "b2sums",
260        "cksums",
261    ]
262    .into_iter()
263    .find(|base| {
264        key.strip_prefix(base)
265            .and_then(|rest| rest.strip_prefix('_'))
266            .is_some_and(|arch| !arch.is_empty())
267    })
268}
269
270fn parse_pkginfo(content: &str) -> PackageData {
271    let fields = parse_key_value_lines(content);
272    build_package_from_arch_metadata(&fields, DatasourceId::ArchPkginfo, false)
273}
274
275fn build_package_from_arch_metadata(
276    fields: &MultiMap,
277    datasource_id: DatasourceId,
278    is_srcinfo_like: bool,
279) -> PackageData {
280    let name = get_first(fields, "pkgname");
281    let pkgbase = get_first(fields, "pkgbase").or_else(|| name.clone());
282    let version = if is_srcinfo_like {
283        build_srcinfo_version(fields)
284    } else {
285        get_first(fields, "pkgver")
286    };
287    let description = get_first(fields, "pkgdesc");
288    let homepage_url = get_first(fields, "url");
289    let extracted_license_statement = join_values(fields.get("license"));
290    let arch_values = get_all(fields, "arch");
291    let purl_arch = (arch_values.len() == 1).then(|| arch_values[0].as_str());
292
293    let mut package = default_package_data(datasource_id);
294    package.name = name.map(truncate_field);
295    package.version = version.map(truncate_field);
296    package.description = description.map(truncate_field);
297    package.homepage_url = homepage_url.map(truncate_field);
298    package.extracted_license_statement = extracted_license_statement.map(truncate_field);
299    package.primary_language = None;
300    package.purl = package
301        .name
302        .as_deref()
303        .and_then(|name| build_alpm_purl(name, package.version.as_deref(), purl_arch));
304    package.source_packages = pkgbase
305        .and_then(|base| build_alpm_purl(&base, package.version.as_deref(), purl_arch))
306        .into_iter()
307        .collect();
308
309    if !is_srcinfo_like {
310        if let Some(packager) = get_first(fields, "packager") {
311            let (packager_name, packager_email) = split_name_email(&packager);
312            package.parties.push(Party {
313                r#type: Some("person".to_string()),
314                role: Some("packager".to_string()),
315                name: packager_name.map(truncate_field),
316                email: packager_email.map(truncate_field),
317                url: None,
318                organization: None,
319                organization_url: None,
320                timezone: None,
321            });
322        }
323        package.size = get_first(fields, "size").and_then(|size| size.parse::<u64>().ok());
324    }
325
326    package.dependencies = build_dependencies(fields);
327    package.extra_data = build_extra_data(fields, is_srcinfo_like, purl_arch);
328    package
329}
330
331fn build_srcinfo_version(fields: &MultiMap) -> Option<String> {
332    let pkgver = get_first(fields, "pkgver")?;
333    let pkgrel = get_first(fields, "pkgrel");
334    let epoch = get_first(fields, "epoch");
335
336    let mut version = match pkgrel {
337        Some(pkgrel) => format!("{}-{}", pkgver, pkgrel),
338        None => pkgver,
339    };
340
341    if let Some(epoch) = epoch
342        && epoch != "0"
343    {
344        version = format!("{}:{}", epoch, version);
345    }
346
347    Some(version)
348}
349
350fn build_alpm_purl(name: &str, version: Option<&str>, arch: Option<&str>) -> Option<String> {
351    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
352    purl.with_namespace(PACKAGE_NAMESPACE).ok()?;
353
354    if let Some(version) = version {
355        purl.with_version(version).ok()?;
356    }
357
358    if let Some(arch) = arch {
359        purl.add_qualifier("arch", arch).ok()?;
360    }
361
362    Some(purl.to_string())
363}
364
365fn build_dependencies(fields: &MultiMap) -> Vec<Dependency> {
366    let mut dependencies = Vec::new();
367    let mut keys: Vec<_> = fields.keys().cloned().collect();
368    keys.sort();
369
370    for key in keys.iter().take(MAX_ITERATION_COUNT) {
371        let Some((scope, is_runtime, is_optional)) = dependency_semantics(key) else {
372            continue;
373        };
374
375        for value in get_all(fields, key) {
376            if let Some(dep_name) = extract_arch_dependency_name(&value) {
377                dependencies.push(Dependency {
378                    purl: build_alpm_purl(&dep_name, None, None),
379                    extracted_requirement: Some(truncate_field(value.clone())),
380                    scope: Some(scope.to_string()),
381                    is_runtime: Some(is_runtime),
382                    is_optional: Some(is_optional),
383                    is_pinned: Some(false),
384                    is_direct: Some(true),
385                    resolved_package: None,
386                    extra_data: None,
387                });
388            }
389        }
390    }
391
392    dependencies
393}
394
395fn dependency_semantics(key: &str) -> Option<(&str, bool, bool)> {
396    let base = key;
397    let normalized = arch_variant_base(key).unwrap_or(key);
398
399    match normalized {
400        "depends" | "depend" => Some((base, true, false)),
401        "makedepends" | "makedepend" => Some((base, false, false)),
402        "checkdepends" | "checkdepend" => Some((base, false, false)),
403        "optdepends" | "optdepend" => Some((base, true, true)),
404        _ => None,
405    }
406}
407
408fn extract_arch_dependency_name(value: &str) -> Option<String> {
409    let dep = value.split(':').next()?.trim();
410    let end = dep.find(['<', '>', '=']).unwrap_or(dep.len());
411    let name = dep[..end].trim();
412    (!name.is_empty()).then(|| truncate_field(name.to_string()))
413}
414
415fn build_extra_data(
416    fields: &MultiMap,
417    is_srcinfo_like: bool,
418    purl_arch: Option<&str>,
419) -> Option<HashMap<String, JsonValue>> {
420    let consumed: HashSet<&str> = HashSet::from([
421        "pkgbase", "pkgname", "pkgver", "pkgrel", "epoch", "pkgdesc", "url", "license", "packager",
422        "size",
423    ]);
424
425    let mut extra = HashMap::new();
426
427    for (key, values) in fields.iter().take(MAX_ITERATION_COUNT) {
428        if consumed.contains(key.as_str()) {
429            continue;
430        }
431
432        let value = if should_force_array_extra_value(key) {
433            JsonValue::Array(
434                values
435                    .iter()
436                    .cloned()
437                    .map(|v| JsonValue::String(truncate_field(v)))
438                    .collect(),
439            )
440        } else if values.len() == 1 {
441            if key == "builddate" {
442                values[0]
443                    .parse::<u64>()
444                    .map(JsonValue::from)
445                    .unwrap_or_else(|_| JsonValue::String(truncate_field(values[0].clone())))
446            } else {
447                JsonValue::String(truncate_field(values[0].clone()))
448            }
449        } else {
450            JsonValue::Array(
451                values
452                    .iter()
453                    .cloned()
454                    .map(|v| JsonValue::String(truncate_field(v)))
455                    .collect(),
456            )
457        };
458        extra.insert(key.clone(), value);
459    }
460
461    if is_srcinfo_like && !fields.contains_key("pkgbase") && !fields.contains_key("pkgname") {
462        return None;
463    }
464
465    if !is_srcinfo_like
466        && purl_arch.is_some()
467        && !extra.contains_key("arch")
468        && let Some(arch) = purl_arch
469    {
470        extra.insert(
471            "arch".to_string(),
472            JsonValue::String(truncate_field(arch.to_string())),
473        );
474    }
475
476    (!extra.is_empty()).then_some(extra)
477}
478
479fn get_first(fields: &MultiMap, key: &str) -> Option<String> {
480    fields.get(key).and_then(|values| values.first()).cloned()
481}
482
483fn get_all(fields: &MultiMap, key: &str) -> Vec<String> {
484    fields.get(key).cloned().unwrap_or_default()
485}
486
487fn join_values(values: Option<&Vec<String>>) -> Option<String> {
488    let values = values?;
489    if values.is_empty() {
490        None
491    } else {
492        Some(values.join(" AND "))
493    }
494}
495
496fn should_force_array_extra_value(key: &str) -> bool {
497    matches!(
498        key,
499        "provides"
500            | "conflict"
501            | "conflicts"
502            | "replace"
503            | "replaces"
504            | "source"
505            | "arch"
506            | "license"
507            | "groups"
508            | "options"
509            | "backup"
510            | "validpgpkeys"
511            | "md5sums"
512            | "sha1sums"
513            | "sha224sums"
514            | "sha256sums"
515            | "sha384sums"
516            | "sha512sums"
517            | "b2sums"
518            | "cksums"
519    ) || is_arch_variant_key(key)
520}