Skip to main content

provenant/parsers/
alpine.rs

1//! Parser for Alpine Linux package metadata files.
2//!
3//! Extracts installed package metadata from Alpine Linux package database files
4//! using the APK package manager format.
5//!
6//! # Supported Formats
7//! - `/lib/apk/db/installed` (Installed package database)
8//!
9//! # Key Features
10//! - Installed package metadata extraction from system database
11//! - Dependency tracking from provides/requires fields
12//! - Author and maintainer information extraction
13//! - License information parsing
14//! - Package URL (purl) generation
15//!
16//! # Implementation Notes
17//! - Uses custom case-sensitive key-value parser (not the generic `rfc822` module)
18//! - Database stored in text format with multi-paragraph records
19//! - Graceful error handling with `warn!()` logs
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use log::warn;
25
26use crate::models::{
27    DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
28};
29use crate::parsers::utils::{read_file_to_string, split_name_email};
30
31use super::PackageParser;
32use super::license_normalization::{
33    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
34    build_declared_license_data_from_pair, combine_normalized_licenses,
35    empty_declared_license_data, normalize_declared_license_key,
36};
37
38const PACKAGE_TYPE: PackageType = PackageType::Alpine;
39
40fn default_package_data(datasource_id: DatasourceId) -> PackageData {
41    PackageData {
42        package_type: Some(PACKAGE_TYPE),
43        datasource_id: Some(datasource_id),
44        ..Default::default()
45    }
46}
47
48/// Parser for Alpine Linux installed package database
49pub struct AlpineInstalledParser;
50
51pub struct AlpineApkbuildParser;
52
53impl PackageParser for AlpineInstalledParser {
54    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
55
56    fn is_match(path: &Path) -> bool {
57        path.to_str()
58            .map(|p| p.contains("/lib/apk/db/") && p.ends_with("installed"))
59            .unwrap_or(false)
60    }
61
62    fn extract_packages(path: &Path) -> Vec<PackageData> {
63        let content = match read_file_to_string(path) {
64            Ok(c) => c,
65            Err(e) => {
66                warn!("Failed to read Alpine installed db {:?}: {}", path, e);
67                return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
68            }
69        };
70
71        parse_alpine_installed_db(&content)
72    }
73}
74
75impl PackageParser for AlpineApkbuildParser {
76    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
77
78    fn is_match(path: &Path) -> bool {
79        path.file_name().and_then(|n| n.to_str()) == Some("APKBUILD")
80    }
81
82    fn extract_packages(path: &Path) -> Vec<PackageData> {
83        let content = match read_file_to_string(path) {
84            Ok(c) => c,
85            Err(e) => {
86                warn!("Failed to read APKBUILD {:?}: {}", path, e);
87                return vec![default_package_data(DatasourceId::AlpineApkbuild)];
88            }
89        };
90
91        vec![parse_apkbuild(&content)]
92    }
93}
94
95fn parse_alpine_installed_db(content: &str) -> Vec<PackageData> {
96    let raw_paragraphs: Vec<&str> = content
97        .split("\n\n")
98        .filter(|p| !p.trim().is_empty())
99        .collect();
100
101    let mut all_packages = Vec::new();
102
103    for raw_text in &raw_paragraphs {
104        let headers = parse_alpine_headers(raw_text);
105        let pkg = parse_alpine_package_paragraph(&headers, raw_text);
106        if pkg.name.is_some() {
107            all_packages.push(pkg);
108        }
109    }
110
111    if all_packages.is_empty() {
112        return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
113    }
114
115    all_packages
116}
117
118/// Parse Alpine DB headers preserving case sensitivity.
119///
120/// Alpine's installed DB uses single-letter case-sensitive keys (e.g., `T:` for
121/// description vs `t:` for timestamp, `C:` for checksum vs `c:` for git commit).
122/// The generic rfc822 parser lowercases all keys, causing collisions.
123fn parse_alpine_headers(content: &str) -> HashMap<String, Vec<String>> {
124    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
125
126    for line in content.lines() {
127        if line.is_empty() {
128            continue;
129        }
130
131        if let Some((key, value)) = line.split_once(':') {
132            let key = key.trim();
133            let value = value.trim();
134            if !key.is_empty() && !value.is_empty() {
135                headers
136                    .entry(key.to_string())
137                    .or_default()
138                    .push(value.to_string());
139            }
140        }
141    }
142
143    headers
144}
145
146fn get_first(headers: &HashMap<String, Vec<String>>, key: &str) -> Option<String> {
147    headers
148        .get(key)
149        .and_then(|values| values.first())
150        .map(|v| v.trim().to_string())
151}
152
153fn get_all(headers: &HashMap<String, Vec<String>>, key: &str) -> Vec<String> {
154    headers
155        .get(key)
156        .cloned()
157        .unwrap_or_default()
158        .into_iter()
159        .filter(|v| !v.trim().is_empty())
160        .collect()
161}
162
163fn parse_alpine_package_paragraph(
164    headers: &HashMap<String, Vec<String>>,
165    raw_text: &str,
166) -> PackageData {
167    let name = get_first(headers, "P");
168    let version = get_first(headers, "V");
169    let description = get_first(headers, "T");
170    let homepage_url = get_first(headers, "U");
171    let architecture = get_first(headers, "A");
172
173    let is_virtual = description
174        .as_ref()
175        .is_some_and(|d| d == "virtual meta package");
176
177    let namespace = Some("alpine".to_string());
178    let mut parties = Vec::new();
179
180    if let Some(maintainer) = get_first(headers, "m") {
181        let (name_opt, email_opt) = split_name_email(&maintainer);
182        parties.push(Party {
183            r#type: None,
184            role: Some("maintainer".to_string()),
185            name: name_opt,
186            email: email_opt,
187            url: None,
188            organization: None,
189            organization_url: None,
190            timezone: None,
191        });
192    }
193
194    let extracted_license_statement = get_first(headers, "L");
195    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
196        build_alpine_license_data(extracted_license_statement.as_deref());
197
198    let source_packages = if let Some(origin) = get_first(headers, "o") {
199        vec![format!("pkg:alpine/{}", origin)]
200    } else {
201        Vec::new()
202    };
203    let vcs_url = get_first(headers, "c")
204        .map(|commit| format!("git+https://git.alpinelinux.org/aports/commit/?id={commit}"));
205
206    let mut dependencies = Vec::new();
207    for dep in get_all(headers, "D") {
208        for dep_str in dep.split_whitespace() {
209            if dep_str.starts_with("so:") || dep_str.starts_with("cmd:") {
210                continue;
211            }
212
213            dependencies.push(Dependency {
214                purl: Some(format!("pkg:alpine/{}", dep_str)),
215                extracted_requirement: None,
216                scope: Some("install".to_string()),
217                is_runtime: Some(true),
218                is_optional: Some(false),
219                is_direct: Some(true),
220                resolved_package: None,
221                extra_data: None,
222                is_pinned: Some(false),
223            });
224        }
225    }
226
227    let mut extra_data = HashMap::new();
228
229    if is_virtual {
230        extra_data.insert("is_virtual".to_string(), true.into());
231    }
232
233    if let Some(checksum) = get_first(headers, "C") {
234        extra_data.insert("checksum".to_string(), checksum.into());
235    }
236
237    if let Some(size) = get_first(headers, "S") {
238        extra_data.insert("compressed_size".to_string(), size.into());
239    }
240
241    if let Some(installed_size) = get_first(headers, "I") {
242        extra_data.insert("installed_size".to_string(), installed_size.into());
243    }
244
245    if let Some(timestamp) = get_first(headers, "t") {
246        extra_data.insert("build_timestamp".to_string(), timestamp.into());
247    }
248
249    if let Some(commit) = get_first(headers, "c") {
250        extra_data.insert("git_commit".to_string(), commit.into());
251    }
252
253    let providers = extract_providers(raw_text);
254    if !providers.is_empty() {
255        let provider_list: Vec<serde_json::Value> =
256            providers.into_iter().map(|s| s.into()).collect();
257        extra_data.insert("providers".to_string(), provider_list.into());
258    }
259
260    let file_references = extract_file_references(raw_text);
261
262    PackageData {
263        datasource_id: Some(DatasourceId::AlpineInstalledDb),
264        package_type: Some(PACKAGE_TYPE),
265        namespace: namespace.clone(),
266        name: name.clone(),
267        version: version.clone(),
268        description,
269        homepage_url,
270        vcs_url,
271        parties,
272        declared_license_expression,
273        declared_license_expression_spdx,
274        license_detections,
275        extracted_license_statement,
276        source_packages,
277        dependencies,
278        file_references,
279        purl: name
280            .as_ref()
281            .and_then(|n| build_alpine_purl(n, version.as_deref(), architecture.as_deref())),
282        extra_data: if extra_data.is_empty() {
283            None
284        } else {
285            Some(extra_data)
286        },
287        ..Default::default()
288    }
289}
290
291fn parse_apkbuild(content: &str) -> PackageData {
292    let variables = parse_apkbuild_variables(content);
293
294    let name = variables.get("pkgname").cloned();
295    let version = match (variables.get("pkgver"), variables.get("pkgrel")) {
296        (Some(ver), Some(rel)) => Some(format!("{}-r{}", ver, rel)),
297        (Some(ver), None) => Some(ver.clone()),
298        _ => None,
299    };
300    let description = variables.get("pkgdesc").cloned();
301    let homepage_url = variables.get("url").cloned();
302    let extracted_license_statement = variables.get("license").cloned();
303    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
304        build_alpine_license_data(extracted_license_statement.as_deref());
305
306    let dependencies = parse_apkbuild_dependencies(&variables);
307
308    let mut extra_data = HashMap::new();
309    if let Some(source) = variables.get("source") {
310        let sources_value: Vec<serde_json::Value> = parse_apkbuild_sources(source)
311            .into_iter()
312            .map(|(file_name, url)| serde_json::json!({ "file_name": file_name, "url": url }))
313            .collect();
314        if !sources_value.is_empty() {
315            extra_data.insert(
316                "sources".to_string(),
317                serde_json::Value::Array(sources_value),
318            );
319        }
320    }
321    for (field, checksum_key) in [
322        ("sha512sums", "sha512"),
323        ("sha256sums", "sha256"),
324        ("md5sums", "md5"),
325    ] {
326        if let Some(checksums) = variables.get(field) {
327            let checksum_entries: Vec<serde_json::Value> = parse_apkbuild_checksums(checksums)
328                .into_iter()
329                .map(|(file_name, checksum)| serde_json::json!({ "file_name": file_name, checksum_key: checksum }))
330                .collect();
331            if !checksum_entries.is_empty() {
332                match extra_data.get_mut("checksums") {
333                    Some(serde_json::Value::Array(existing)) => existing.extend(checksum_entries),
334                    _ => {
335                        extra_data.insert(
336                            "checksums".to_string(),
337                            serde_json::Value::Array(checksum_entries),
338                        );
339                    }
340                }
341            }
342        }
343    }
344
345    PackageData {
346        datasource_id: Some(DatasourceId::AlpineApkbuild),
347        package_type: Some(PACKAGE_TYPE),
348        namespace: None,
349        name: name.clone(),
350        version: version.clone(),
351        description,
352        homepage_url,
353        extracted_license_statement,
354        declared_license_expression,
355        declared_license_expression_spdx,
356        license_detections,
357        dependencies,
358        purl: name
359            .as_deref()
360            .and_then(|n| build_alpine_purl(n, version.as_deref(), None)),
361        extra_data: (!extra_data.is_empty()).then_some(extra_data),
362        ..default_package_data(DatasourceId::AlpineApkbuild)
363    }
364}
365
366fn parse_apkbuild_variables(content: &str) -> HashMap<String, String> {
367    let mut raw = HashMap::new();
368    let mut lines = content.lines().peekable();
369    let mut brace_depth = 0usize;
370
371    while let Some(line) = lines.next() {
372        let trimmed = line.trim();
373        if trimmed.is_empty() || trimmed.starts_with('#') {
374            continue;
375        }
376        if trimmed.ends_with("(){") || trimmed.ends_with("() {") {
377            brace_depth += 1;
378            continue;
379        }
380        if brace_depth > 0 {
381            brace_depth += trimmed.chars().filter(|c| *c == '{').count();
382            brace_depth = brace_depth.saturating_sub(trimmed.chars().filter(|c| *c == '}').count());
383            continue;
384        }
385        let Some((name, value)) = trimmed.split_once('=') else {
386            continue;
387        };
388        let mut value = value.trim().to_string();
389        if value.starts_with('"') && !value.ends_with('"') {
390            while let Some(next) = lines.peek() {
391                value.push('\n');
392                value.push_str(next);
393                let current = lines.next().unwrap();
394                if current.trim_end().ends_with('"') {
395                    break;
396                }
397            }
398        }
399        raw.insert(name.trim().to_string(), value);
400    }
401
402    let mut resolved = HashMap::new();
403    for key in [
404        "pkgname",
405        "pkgver",
406        "pkgrel",
407        "pkgdesc",
408        "url",
409        "license",
410        "source",
411        "depends",
412        "depends_dev",
413        "makedepends",
414        "makedepends_build",
415        "makedepends_host",
416        "checkdepends",
417        "sha512sums",
418        "sha256sums",
419        "md5sums",
420    ] {
421        if let Some(value) = raw.get(key) {
422            resolved.insert(key.to_string(), resolve_apkbuild_value(value, &raw));
423        }
424    }
425    resolved
426}
427
428fn resolve_apkbuild_value(value: &str, variables: &HashMap<String, String>) -> String {
429    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
430    for _ in 0..8 {
431        let previous = resolved.clone();
432        for (name, raw_value) in variables {
433            let raw_value = strip_wrapping_quotes(raw_value.trim());
434            let resolved_raw = resolve_apkbuild_value_no_recursion(raw_value, variables);
435            let value_resolved = strip_wrapping_quotes(&resolved_raw);
436            resolved = resolved.replace(
437                &format!("${{{name}//./-}}"),
438                &value_resolved.replace('.', "-"),
439            );
440            resolved = resolved.replace(
441                &format!("${{{name}//./_}}"),
442                &value_resolved.replace('.', "_"),
443            );
444            resolved = resolved.replace(
445                &format!("${{{name}::8}}"),
446                &value_resolved.chars().take(8).collect::<String>(),
447            );
448            resolved = resolved.replace(&format!("${{{name}}}"), value_resolved);
449            resolved = resolved.replace(&format!("${name}"), value_resolved);
450        }
451        if resolved == previous {
452            break;
453        }
454    }
455    resolved
456}
457
458fn resolve_apkbuild_value_no_recursion(value: &str, variables: &HashMap<String, String>) -> String {
459    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
460    for (name, raw_value) in variables {
461        let raw_value = strip_wrapping_quotes(raw_value.trim());
462        resolved = resolved.replace(&format!("${{{name}//./-}}"), &raw_value.replace('.', "-"));
463        resolved = resolved.replace(&format!("${{{name}//./_}}"), &raw_value.replace('.', "_"));
464        resolved = resolved.replace(
465            &format!("${{{name}::8}}"),
466            &raw_value.chars().take(8).collect::<String>(),
467        );
468        resolved = resolved.replace(&format!("${{{name}}}"), raw_value);
469        resolved = resolved.replace(&format!("${name}"), raw_value);
470    }
471    resolved
472}
473
474fn strip_wrapping_quotes(value: &str) -> &str {
475    value
476        .strip_prefix('"')
477        .and_then(|v| v.strip_suffix('"'))
478        .or_else(|| value.strip_prefix('\'').and_then(|v| v.strip_suffix('\'')))
479        .unwrap_or(value)
480}
481
482fn parse_apkbuild_sources(value: &str) -> Vec<(Option<String>, Option<String>)> {
483    value
484        .split_whitespace()
485        .filter(|part| !part.is_empty())
486        .map(|part| {
487            if let Some((file_name, url)) = part.split_once("::") {
488                (Some(file_name.to_string()), Some(url.to_string()))
489            } else if part.contains("://") {
490                (None, Some(part.to_string()))
491            } else {
492                (Some(part.to_string()), None)
493            }
494        })
495        .collect()
496}
497
498fn parse_apkbuild_checksums(value: &str) -> Vec<(String, String)> {
499    value
500        .lines()
501        .flat_map(|line| line.split_whitespace())
502        .collect::<Vec<_>>()
503        .chunks(2)
504        .filter_map(|chunk| {
505            if chunk.len() == 2 {
506                Some((chunk[1].to_string(), chunk[0].to_string()))
507            } else {
508                None
509            }
510        })
511        .collect()
512}
513
514fn build_alpine_license_data(
515    extracted: Option<&str>,
516) -> (Option<String>, Option<String>, Vec<LicenseDetection>) {
517    let Some(extracted) = extracted.map(str::trim).filter(|s| !s.is_empty()) else {
518        return empty_declared_license_data();
519    };
520
521    if extracted == "custom:multiple" {
522        return build_declared_license_data_from_pair(
523            "unknown-license-reference",
524            "LicenseRef-provenant-unknown-license-reference",
525            DeclaredLicenseMatchMetadata::single_line(extracted),
526        );
527    }
528
529    let normalized_tokens = extracted
530        .split_whitespace()
531        .filter(|part| *part != "AND")
532        .map(normalize_alpine_license_token)
533        .collect::<Option<Vec<_>>>();
534
535    let Some(normalized_tokens) = normalized_tokens else {
536        return empty_declared_license_data();
537    };
538
539    let Some(combined) = combine_normalized_licenses(normalized_tokens, " AND ") else {
540        return empty_declared_license_data();
541    };
542
543    build_declared_license_data(
544        combined,
545        DeclaredLicenseMatchMetadata::single_line(extracted),
546    )
547}
548
549fn normalize_alpine_license_token(token: &str) -> Option<NormalizedDeclaredLicense> {
550    match token {
551        "ICU" => Some(NormalizedDeclaredLicense::new("x11", "ICU")),
552        "Unicode-TOU" => Some(NormalizedDeclaredLicense::new("unicode-tou", "Unicode-TOU")),
553        "Ruby" => Some(NormalizedDeclaredLicense::new("ruby", "Ruby")),
554        "BSD-2-Clause" => Some(NormalizedDeclaredLicense::new(
555            "bsd-simplified",
556            "BSD-2-Clause",
557        )),
558        "BSD-3-Clause" => Some(NormalizedDeclaredLicense::new("bsd-new", "BSD-3-Clause")),
559        other => normalize_declared_license_key(other),
560    }
561}
562
563fn parse_apkbuild_dependencies(variables: &HashMap<String, String>) -> Vec<Dependency> {
564    let mut dependencies = Vec::new();
565
566    for (field, scope, is_runtime, is_optional) in [
567        ("depends", "depends", true, false),
568        ("depends_dev", "depends_dev", false, true),
569        ("makedepends", "makedepends", false, true),
570        ("makedepends_build", "makedepends_build", false, true),
571        ("makedepends_host", "makedepends_host", false, true),
572        ("checkdepends", "checkdepends", false, true),
573    ] {
574        let Some(value) = variables.get(field) else {
575            continue;
576        };
577
578        for dep_str in value.split_whitespace() {
579            let dep_str = dep_str.trim();
580            if dep_str.is_empty() {
581                continue;
582            }
583
584            let dep_name = dep_str
585                .split(['<', '>', '=', '!', '~'])
586                .next()
587                .unwrap_or(dep_str)
588                .trim();
589            if dep_name.is_empty() {
590                continue;
591            }
592
593            dependencies.push(Dependency {
594                purl: build_alpine_purl(dep_name, None, None),
595                extracted_requirement: Some(dep_str.to_string()),
596                scope: Some(scope.to_string()),
597                is_runtime: Some(is_runtime),
598                is_optional: Some(is_optional),
599                is_pinned: Some(dep_str.contains('=')),
600                is_direct: Some(true),
601                resolved_package: None,
602                extra_data: None,
603            });
604        }
605    }
606
607    dependencies
608}
609
610fn extract_file_references(raw_text: &str) -> Vec<FileReference> {
611    let mut file_references = Vec::new();
612    let mut current_dir = String::new();
613    let mut current_file: Option<FileReference> = None;
614
615    for line in raw_text.lines() {
616        if line.is_empty() {
617            continue;
618        }
619
620        if let Some((field_type, value)) = line.split_once(':') {
621            let value = value.trim();
622            match field_type {
623                "F" => {
624                    if let Some(file) = current_file.take() {
625                        file_references.push(file);
626                    }
627                    current_dir = value.to_string();
628                }
629                "R" => {
630                    if let Some(file) = current_file.take() {
631                        file_references.push(file);
632                    }
633
634                    let path = if current_dir.is_empty() {
635                        value.to_string()
636                    } else {
637                        format!("{}/{}", current_dir, value)
638                    };
639
640                    current_file = Some(FileReference {
641                        path,
642                        size: None,
643                        sha1: None,
644                        md5: None,
645                        sha256: None,
646                        sha512: None,
647                        extra_data: None,
648                    });
649                }
650                "Z" => {
651                    if let Some(ref mut file) = current_file
652                        && value.starts_with("Q1")
653                    {
654                        use base64::Engine;
655                        if let Ok(decoded) =
656                            base64::engine::general_purpose::STANDARD.decode(&value[2..])
657                        {
658                            let hex_string = decoded
659                                .iter()
660                                .map(|b| format!("{:02x}", b))
661                                .collect::<String>();
662                            file.sha1 = Some(hex_string);
663                        }
664                    }
665                }
666                "a" => {
667                    if let Some(ref mut file) = current_file {
668                        let mut extra = HashMap::new();
669                        extra.insert(
670                            "attributes".to_string(),
671                            serde_json::Value::String(value.to_string()),
672                        );
673                        file.extra_data = Some(extra);
674                    }
675                }
676                _ => {}
677            }
678        }
679    }
680
681    if let Some(file) = current_file {
682        file_references.push(file);
683    }
684
685    file_references
686}
687
688fn extract_providers(raw_text: &str) -> Vec<String> {
689    let mut providers = Vec::new();
690
691    for line in raw_text.lines() {
692        if line.is_empty() {
693            continue;
694        }
695
696        if let Some(value) = line.strip_prefix("p:") {
697            providers.extend(value.split_whitespace().map(|s| s.to_string()));
698        }
699    }
700
701    providers
702}
703
704fn build_alpine_purl(
705    name: &str,
706    version: Option<&str>,
707    architecture: Option<&str>,
708) -> Option<String> {
709    use packageurl::PackageUrl;
710
711    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
712
713    if let Some(ver) = version {
714        purl.with_version(ver).ok()?;
715    }
716
717    if let Some(arch) = architecture {
718        purl.add_qualifier("arch", arch).ok()?;
719    }
720
721    Some(purl.to_string())
722}
723
724/// Parser for Alpine Linux .apk package archives
725pub struct AlpineApkParser;
726
727impl PackageParser for AlpineApkParser {
728    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
729
730    fn is_match(path: &Path) -> bool {
731        path.extension().and_then(|e| e.to_str()) == Some("apk")
732    }
733
734    fn extract_packages(path: &Path) -> Vec<PackageData> {
735        vec![match extract_apk_archive(path) {
736            Ok(data) => data,
737            Err(e) => {
738                warn!("Failed to extract .apk archive {:?}: {}", path, e);
739                PackageData {
740                    package_type: Some(PACKAGE_TYPE),
741                    datasource_id: Some(DatasourceId::AlpineApkArchive),
742                    ..Default::default()
743                }
744            }
745        }]
746    }
747}
748
749fn extract_apk_archive(path: &Path) -> Result<PackageData, String> {
750    use flate2::read::GzDecoder;
751    use std::io::Read;
752
753    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .apk file: {}", e))?;
754
755    let decoder = GzDecoder::new(file);
756    let mut archive = tar::Archive::new(decoder);
757
758    for entry_result in archive
759        .entries()
760        .map_err(|e| format!("Failed to read tar entries: {}", e))?
761    {
762        let mut entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
763
764        let entry_path = entry
765            .path()
766            .map_err(|e| format!("Failed to get entry path: {}", e))?;
767
768        if entry_path.ends_with(".PKGINFO") {
769            let mut content = String::new();
770            entry
771                .read_to_string(&mut content)
772                .map_err(|e| format!("Failed to read .PKGINFO: {}", e))?;
773
774            return Ok(parse_pkginfo(&content));
775        }
776    }
777
778    Err(".apk archive does not contain .PKGINFO file".to_string())
779}
780
781fn parse_pkginfo(content: &str) -> PackageData {
782    let mut fields: HashMap<&str, Vec<&str>> = HashMap::new();
783
784    for line in content.lines() {
785        let line = line.trim();
786        if line.is_empty() || line.starts_with('#') {
787            continue;
788        }
789
790        if let Some((key, value)) = line.split_once(" = ") {
791            fields.entry(key.trim()).or_default().push(value.trim());
792        }
793    }
794
795    let name = fields
796        .get("pkgname")
797        .and_then(|v| v.first())
798        .map(|s| s.to_string());
799    let pkgver = fields.get("pkgver").and_then(|v| v.first());
800    let version = pkgver.map(|s| s.to_string());
801    let arch = fields
802        .get("arch")
803        .and_then(|v| v.first())
804        .map(|s| s.to_string());
805    let license = fields
806        .get("license")
807        .and_then(|v| v.first())
808        .map(|s| s.to_string());
809    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
810        build_alpine_license_data(license.as_deref());
811    let description = fields
812        .get("pkgdesc")
813        .and_then(|v| v.first())
814        .map(|s| s.to_string());
815    let homepage = fields
816        .get("url")
817        .and_then(|v| v.first())
818        .map(|s| s.to_string());
819    let origin = fields
820        .get("origin")
821        .and_then(|v| v.first())
822        .map(|s| s.to_string());
823    let maintainer_str = fields.get("maintainer").and_then(|v| v.first());
824
825    let mut parties = Vec::new();
826    if let Some(maint) = maintainer_str {
827        let (maint_name, maint_email) = split_name_email(maint);
828        parties.push(Party {
829            r#type: Some("person".to_string()),
830            role: Some("maintainer".to_string()),
831            name: maint_name,
832            email: maint_email,
833            url: None,
834            organization: None,
835            organization_url: None,
836            timezone: None,
837        });
838    }
839
840    let purl = name
841        .as_ref()
842        .and_then(|n| build_alpine_purl(n, version.as_deref(), arch.as_deref()));
843
844    let mut dependencies = Vec::new();
845    if let Some(depends_list) = fields.get("depend") {
846        for dep_str in depends_list {
847            let dep_name = dep_str.split_whitespace().next().unwrap_or(dep_str);
848            dependencies.push(Dependency {
849                purl: Some(format!("pkg:alpine/{}", dep_name)),
850                extracted_requirement: Some(dep_str.to_string()),
851                scope: Some("runtime".to_string()),
852                is_runtime: Some(true),
853                is_optional: Some(false),
854                is_pinned: None,
855                is_direct: Some(true),
856                resolved_package: None,
857                extra_data: None,
858            });
859        }
860    }
861
862    PackageData {
863        datasource_id: Some(DatasourceId::AlpineApkArchive),
864        package_type: Some(PACKAGE_TYPE),
865        namespace: Some("alpine".to_string()),
866        name,
867        version,
868        description,
869        homepage_url: homepage,
870        declared_license_expression,
871        declared_license_expression_spdx,
872        license_detections,
873        extracted_license_statement: license,
874        parties,
875        dependencies,
876        purl,
877        extra_data: origin.map(|o| {
878            let mut map = HashMap::new();
879            map.insert("origin".to_string(), serde_json::Value::String(o));
880            map
881        }),
882        ..Default::default()
883    }
884}
885
886#[cfg(test)]
887mod tests {
888    use super::*;
889    use std::io::Write;
890    use std::path::PathBuf;
891    use tempfile::TempDir;
892
893    /// Creates a temp file mimicking the Alpine installed db path structure.
894    /// Returns the TempDir (must be kept alive) and path to the file.
895    fn create_temp_installed_db(content: &str) -> (TempDir, PathBuf) {
896        let temp_dir = TempDir::new().expect("Failed to create temp dir");
897        let db_dir = temp_dir.path().join("lib/apk/db");
898        std::fs::create_dir_all(&db_dir).expect("Failed to create db dir");
899        let file_path = db_dir.join("installed");
900        let mut file = std::fs::File::create(&file_path).expect("Failed to create file");
901        file.write_all(content.as_bytes())
902            .expect("Failed to write content");
903        (temp_dir, file_path)
904    }
905
906    #[test]
907    fn test_alpine_parser_is_match() {
908        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
909            "/lib/apk/db/installed"
910        )));
911        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
912            "/var/lib/apk/db/installed"
913        )));
914        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
915            "/lib/apk/db/status"
916        )));
917        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
918            "installed"
919        )));
920    }
921
922    #[test]
923    fn test_parse_alpine_package_basic() {
924        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
925P:alpine-baselayout-data
926V:3.2.0-r22
927A:x86_64
928S:11435
929I:73728
930T:Alpine base dir structure and init scripts
931U:https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout
932L:GPL-2.0-only
933o:alpine-baselayout
934m:Natanael Copa <ncopa@alpinelinux.org>
935t:1655134784
936c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
937
938";
939        let (_dir, path) = create_temp_installed_db(content);
940        let pkg = AlpineInstalledParser::extract_first_package(&path);
941        assert_eq!(pkg.name, Some("alpine-baselayout-data".to_string()));
942        assert_eq!(pkg.version, Some("3.2.0-r22".to_string()));
943        assert_eq!(pkg.namespace, Some("alpine".to_string()));
944        assert_eq!(
945            pkg.description,
946            Some("Alpine base dir structure and init scripts".to_string())
947        );
948        assert_eq!(
949            pkg.homepage_url,
950            Some("https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout".to_string())
951        );
952        assert_eq!(
953            pkg.extracted_license_statement,
954            Some("GPL-2.0-only".to_string())
955        );
956        assert_eq!(pkg.parties.len(), 1);
957        assert_eq!(pkg.parties[0].name, Some("Natanael Copa".to_string()));
958        assert_eq!(
959            pkg.parties[0].email,
960            Some("ncopa@alpinelinux.org".to_string())
961        );
962        assert!(
963            pkg.purl
964                .as_ref()
965                .unwrap()
966                .contains("alpine-baselayout-data")
967        );
968        assert!(pkg.purl.as_ref().unwrap().contains("arch=x86_64"));
969    }
970
971    #[test]
972    fn test_parse_alpine_with_dependencies() {
973        let content = "P:musl
974V:1.2.3-r0
975A:x86_64
976D:scanelf so:libc.musl-x86_64.so.1
977
978";
979        let (_dir, path) = create_temp_installed_db(content);
980        let pkg = AlpineInstalledParser::extract_first_package(&path);
981        assert_eq!(pkg.name, Some("musl".to_string()));
982        assert_eq!(pkg.dependencies.len(), 1);
983        assert!(
984            pkg.dependencies[0]
985                .purl
986                .as_ref()
987                .unwrap()
988                .contains("scanelf")
989        );
990    }
991
992    #[test]
993    fn test_build_alpine_purl() {
994        let purl = build_alpine_purl("busybox", Some("1.31.1-r9"), Some("x86_64"));
995        assert_eq!(
996            purl,
997            Some("pkg:alpine/busybox@1.31.1-r9?arch=x86_64".to_string())
998        );
999
1000        let purl_no_arch = build_alpine_purl("package", Some("1.0"), None);
1001        assert_eq!(purl_no_arch, Some("pkg:alpine/package@1.0".to_string()));
1002    }
1003
1004    #[test]
1005    fn test_parse_alpine_extra_data() {
1006        let content = "P:test-package
1007V:1.0
1008C:base64checksum==
1009S:12345
1010I:67890
1011t:1234567890
1012c:gitcommithash
1013
1014";
1015        let (_dir, path) = create_temp_installed_db(content);
1016        let pkg = AlpineInstalledParser::extract_first_package(&path);
1017        assert!(pkg.extra_data.is_some());
1018        let extra = pkg.extra_data.as_ref().unwrap();
1019        assert_eq!(extra["checksum"], "base64checksum==");
1020        assert_eq!(extra["compressed_size"], "12345");
1021        assert_eq!(extra["installed_size"], "67890");
1022        assert_eq!(extra["build_timestamp"], "1234567890");
1023        assert_eq!(extra["git_commit"], "gitcommithash");
1024    }
1025
1026    #[test]
1027    fn test_parse_alpine_case_sensitive_keys() {
1028        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
1029P:test-pkg
1030V:1.0
1031T:A test description
1032t:1655134784
1033c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
1034
1035";
1036        let (_dir, path) = create_temp_installed_db(content);
1037        let pkg = AlpineInstalledParser::extract_first_package(&path);
1038        assert_eq!(pkg.description, Some("A test description".to_string()));
1039        let extra = pkg.extra_data.as_ref().unwrap();
1040        assert_eq!(extra["checksum"], "Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=");
1041        assert_eq!(extra["build_timestamp"], "1655134784");
1042        assert_eq!(
1043            extra["git_commit"],
1044            "cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1045        );
1046    }
1047
1048    #[test]
1049    fn test_parse_alpine_multiple_packages() {
1050        let content = "P:package1
1051V:1.0
1052A:x86_64
1053
1054P:package2
1055V:2.0
1056A:aarch64
1057
1058";
1059        let (_dir, path) = create_temp_installed_db(content);
1060        let pkgs = AlpineInstalledParser::extract_packages(&path);
1061        assert_eq!(pkgs.len(), 2);
1062        assert_eq!(pkgs[0].name, Some("package1".to_string()));
1063        assert_eq!(pkgs[0].version, Some("1.0".to_string()));
1064        assert_eq!(pkgs[1].name, Some("package2".to_string()));
1065        assert_eq!(pkgs[1].version, Some("2.0".to_string()));
1066    }
1067
1068    #[test]
1069    fn test_parse_alpine_file_references() {
1070        let content = "P:test-pkg
1071V:1.0
1072F:usr/bin
1073R:test
1074Z:Q1WTc55xfvPogzA0YUV24D0Ym+MKE=
1075F:etc
1076R:config
1077Z:Q1pcfTfDNEbNKQc2s1tia7da05M8Q=
1078
1079";
1080        let (_dir, path) = create_temp_installed_db(content);
1081        let pkg = AlpineInstalledParser::extract_first_package(&path);
1082        assert_eq!(pkg.file_references.len(), 2);
1083        assert_eq!(pkg.file_references[0].path, "usr/bin/test");
1084        assert!(pkg.file_references[0].sha1.is_some());
1085        assert_eq!(pkg.file_references[1].path, "etc/config");
1086        assert!(pkg.file_references[1].sha1.is_some());
1087    }
1088
1089    #[test]
1090    fn test_parse_alpine_empty_fields() {
1091        let content = "P:minimal-package
1092V:1.0
1093
1094";
1095        let (_dir, path) = create_temp_installed_db(content);
1096        let pkg = AlpineInstalledParser::extract_first_package(&path);
1097        assert_eq!(pkg.name, Some("minimal-package".to_string()));
1098        assert_eq!(pkg.version, Some("1.0".to_string()));
1099        assert!(pkg.description.is_none());
1100        assert!(pkg.homepage_url.is_none());
1101        assert_eq!(pkg.dependencies.len(), 0);
1102    }
1103
1104    #[test]
1105    fn test_parse_alpine_origin_field() {
1106        let content = "P:busybox-ifupdown
1107V:1.35.0-r13
1108o:busybox
1109A:x86_64
1110
1111";
1112        let (_dir, path) = create_temp_installed_db(content);
1113        let pkg = AlpineInstalledParser::extract_first_package(&path);
1114        assert_eq!(pkg.name, Some("busybox-ifupdown".to_string()));
1115        assert_eq!(pkg.source_packages.len(), 1);
1116        assert_eq!(pkg.source_packages[0], "pkg:alpine/busybox");
1117    }
1118
1119    #[test]
1120    fn test_parse_alpine_url_field() {
1121        let content = "P:openssl
1122V:1.1.1q-r0
1123U:https://www.openssl.org
1124A:x86_64
1125
1126";
1127        let (_dir, path) = create_temp_installed_db(content);
1128        let pkg = AlpineInstalledParser::extract_first_package(&path);
1129        assert_eq!(
1130            pkg.homepage_url,
1131            Some("https://www.openssl.org".to_string())
1132        );
1133    }
1134
1135    #[test]
1136    fn test_parse_alpine_provider_field() {
1137        let content = "P:some-package
1138V:1.0
1139p:cmd:binary=1.0
1140p:so:libtest.so.1
1141
1142";
1143        let (_dir, path) = create_temp_installed_db(content);
1144        let pkg = AlpineInstalledParser::extract_first_package(&path);
1145        assert!(pkg.extra_data.is_some());
1146        let extra = pkg.extra_data.as_ref().unwrap();
1147        let providers = extra.get("providers").and_then(|v| v.as_array());
1148        assert!(providers.is_some());
1149        let provider_array = providers.unwrap();
1150        assert_eq!(provider_array.len(), 2);
1151        assert_eq!(provider_array[0].as_str(), Some("cmd:binary=1.0"));
1152        assert_eq!(provider_array[1].as_str(), Some("so:libtest.so.1"));
1153    }
1154
1155    #[test]
1156    fn test_alpine_apk_parser_is_match() {
1157        assert!(AlpineApkParser::is_match(&PathBuf::from("package.apk")));
1158        assert!(AlpineApkParser::is_match(&PathBuf::from(
1159            "/path/to/app-1.0.apk"
1160        )));
1161        assert!(!AlpineApkParser::is_match(&PathBuf::from("package.tar.gz")));
1162        assert!(!AlpineApkParser::is_match(&PathBuf::from("installed")));
1163    }
1164
1165    #[test]
1166    fn test_alpine_apkbuild_parser_is_match() {
1167        assert!(AlpineApkbuildParser::is_match(&PathBuf::from("APKBUILD")));
1168        assert!(AlpineApkbuildParser::is_match(&PathBuf::from(
1169            "/path/to/APKBUILD"
1170        )));
1171        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from("apkbuild")));
1172        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from(
1173            "APKBUILD.txt"
1174        )));
1175    }
1176
1177    #[test]
1178    fn test_parse_apkbuild_icu_reference() {
1179        let path = PathBuf::from("testdata/alpine-fixtures/apkbuild/alpine14/main/icu/APKBUILD");
1180        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1181
1182        assert_eq!(pkg.datasource_id, Some(DatasourceId::AlpineApkbuild));
1183        assert_eq!(pkg.name.as_deref(), Some("icu"));
1184        assert_eq!(pkg.version.as_deref(), Some("67.1-r2"));
1185        assert_eq!(
1186            pkg.description.as_deref(),
1187            Some("International Components for Unicode library")
1188        );
1189        assert_eq!(
1190            pkg.homepage_url.as_deref(),
1191            Some("http://site.icu-project.org/")
1192        );
1193        assert_eq!(
1194            pkg.extracted_license_statement.as_deref(),
1195            Some("MIT ICU Unicode-TOU")
1196        );
1197        assert_eq!(
1198            pkg.declared_license_expression_spdx.as_deref(),
1199            Some("MIT AND ICU AND Unicode-TOU")
1200        );
1201        assert_eq!(pkg.dependencies.len(), 3);
1202        let depends_dev = pkg
1203            .dependencies
1204            .iter()
1205            .find(|dep| dep.scope.as_deref() == Some("depends_dev"))
1206            .expect("depends_dev dependency missing");
1207        assert_eq!(depends_dev.purl.as_deref(), Some("pkg:alpine/icu"));
1208        assert_eq!(depends_dev.is_runtime, Some(false));
1209        assert_eq!(depends_dev.is_optional, Some(true));
1210
1211        let check_dep_names: Vec<_> = pkg
1212            .dependencies
1213            .iter()
1214            .filter(|dep| dep.scope.as_deref() == Some("checkdepends"))
1215            .filter_map(|dep| dep.purl.as_deref())
1216            .collect();
1217        assert!(check_dep_names.contains(&"pkg:alpine/diffutils"));
1218        assert!(check_dep_names.contains(&"pkg:alpine/python3"));
1219        let extra = pkg.extra_data.as_ref().unwrap();
1220        assert!(extra.contains_key("sources"));
1221        assert!(extra.contains_key("checksums"));
1222    }
1223
1224    #[test]
1225    fn test_parse_apkbuild_custom_multiple_license_uses_raw_matched_text() {
1226        let path = PathBuf::from(
1227            "testdata/alpine-fixtures/apkbuild/alpine13/main/linux-firmware/APKBUILD",
1228        );
1229        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1230
1231        assert_eq!(pkg.name.as_deref(), Some("linux-firmware"));
1232        assert_eq!(pkg.version.as_deref(), Some("20201218-r0"));
1233        assert_eq!(
1234            pkg.extracted_license_statement.as_deref(),
1235            Some("custom:multiple")
1236        );
1237        assert_eq!(
1238            pkg.declared_license_expression.as_deref(),
1239            Some("unknown-license-reference")
1240        );
1241        assert_eq!(
1242            pkg.declared_license_expression_spdx.as_deref(),
1243            Some("LicenseRef-provenant-unknown-license-reference")
1244        );
1245        let matched = pkg.license_detections[0].matches[0].matched_text.as_deref();
1246        assert_eq!(matched, Some("custom:multiple"));
1247    }
1248
1249    #[test]
1250    fn test_parse_alpine_no_files_package_still_detected() {
1251        let path = PathBuf::from("testdata/alpine-fixtures/full-installed/installed");
1252        let content = std::fs::read_to_string(&path).expect("read installed db fixture");
1253        let packages = parse_alpine_installed_db(&content);
1254        let libc_utils = packages
1255            .into_iter()
1256            .find(|pkg| pkg.name.as_deref() == Some("libc-utils"))
1257            .expect("libc-utils package should exist");
1258
1259        assert_eq!(libc_utils.file_references.len(), 0);
1260        assert!(
1261            libc_utils
1262                .purl
1263                .as_deref()
1264                .is_some_and(|p| p.contains("libc-utils"))
1265        );
1266    }
1267
1268    #[test]
1269    fn test_parse_alpine_commit_generates_https_vcs_url() {
1270        let content =
1271            "P:test-package\nV:1.0-r0\nA:x86_64\nc:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd\n";
1272        let (_dir, path) = create_temp_installed_db(content);
1273        let pkg = AlpineInstalledParser::extract_first_package(&path);
1274
1275        assert_eq!(
1276            pkg.vcs_url.as_deref(),
1277            Some(
1278                "git+https://git.alpinelinux.org/aports/commit/?id=cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1279            )
1280        );
1281    }
1282
1283    #[test]
1284    fn test_parse_alpine_virtual_package() {
1285        let content = "P:.postgis-rundeps
1286V:20210104.190748
1287A:noarch
1288S:0
1289I:0
1290T:virtual meta package
1291U:
1292L:
1293D:json-c geos gdal proj protobuf-c libstdc++
1294
1295";
1296        let (_dir, path) = create_temp_installed_db(content);
1297        let pkg = AlpineInstalledParser::extract_first_package(&path);
1298        assert_eq!(pkg.name, Some(".postgis-rundeps".to_string()));
1299        assert_eq!(pkg.version, Some("20210104.190748".to_string()));
1300        assert_eq!(pkg.description, Some("virtual meta package".to_string()));
1301        assert!(pkg.extra_data.is_some());
1302        let extra = pkg.extra_data.as_ref().unwrap();
1303        assert_eq!(
1304            extra.get("is_virtual").and_then(|v| v.as_bool()),
1305            Some(true)
1306        );
1307        assert_eq!(pkg.dependencies.len(), 6);
1308        assert!(pkg.homepage_url.is_none());
1309        assert!(pkg.extracted_license_statement.is_none());
1310    }
1311
1312    #[test]
1313    fn test_installed_db_license_normalization() {
1314        let content = "P:test-package\nV:1.0-r0\nA:x86_64\nL:MIT\n\n";
1315        let (_dir, path) = create_temp_installed_db(content);
1316        let pkg = AlpineInstalledParser::extract_first_package(&path);
1317
1318        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("MIT"));
1319        assert_eq!(pkg.declared_license_expression.as_deref(), Some("mit"));
1320        assert_eq!(pkg.declared_license_expression_spdx.as_deref(), Some("MIT"));
1321        assert_eq!(pkg.license_detections.len(), 1);
1322    }
1323
1324    #[test]
1325    fn test_apk_archive_license_normalization() {
1326        let path = PathBuf::from("testdata/alpine/apk/basic/test-package-1.0-r0.apk");
1327        let pkg = AlpineApkParser::extract_first_package(&path);
1328
1329        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("MIT"));
1330        assert_eq!(pkg.declared_license_expression.as_deref(), Some("mit"));
1331        assert_eq!(pkg.declared_license_expression_spdx.as_deref(), Some("MIT"));
1332        assert_eq!(pkg.license_detections.len(), 1);
1333    }
1334}
1335
1336crate::register_parser!(
1337    "Alpine Linux package (installed db and .apk archive)",
1338    &["**/lib/apk/db/installed", "**/*.apk"],
1339    "alpine",
1340    "",
1341    Some("https://wiki.alpinelinux.org/wiki/Apk_spec"),
1342);
1343
1344crate::register_parser!(
1345    "Alpine Linux APKBUILD recipe",
1346    &["**/APKBUILD"],
1347    "alpine",
1348    "Shell",
1349    Some("https://wiki.alpinelinux.org/wiki/APKBUILD_Reference"),
1350);