Skip to main content

provenant/parsers/
alpine.rs

1//! Parser for Alpine Linux package metadata files.
2//!
3//! Extracts installed package metadata from Alpine Linux package database files
4//! using the APK package manager format.
5//!
6//! # Supported Formats
7//! - `/lib/apk/db/installed` (Installed package database)
8//!
9//! # Key Features
10//! - Installed package metadata extraction from system database
11//! - Dependency tracking from provides/requires fields
12//! - Author and maintainer information extraction
13//! - License information parsing
14//! - Package URL (purl) generation
15//!
16//! # Implementation Notes
17//! - Uses custom case-sensitive key-value parser (not the generic `rfc822` module)
18//! - Database stored in text format with multi-paragraph records
19//! - Graceful error handling with `warn!()` logs
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use crate::parser_warn as warn;
25use crate::utils::magic;
26
27use crate::models::{
28    DatasourceId, Dependency, FileReference, LicenseDetection, PackageData, PackageType, Party,
29};
30use crate::parsers::utils::{read_file_to_string, split_name_email};
31
32use super::PackageParser;
33use super::license_normalization::{
34    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
35    build_declared_license_data_from_pair, combine_normalized_licenses,
36    empty_declared_license_data, normalize_declared_license_key,
37};
38
39const PACKAGE_TYPE: PackageType = PackageType::Alpine;
40
41fn default_package_data(datasource_id: DatasourceId) -> PackageData {
42    PackageData {
43        package_type: Some(PACKAGE_TYPE),
44        datasource_id: Some(datasource_id),
45        ..Default::default()
46    }
47}
48
49/// Parser for Alpine Linux installed package database
50pub struct AlpineInstalledParser;
51
52pub struct AlpineApkbuildParser;
53
54impl PackageParser for AlpineInstalledParser {
55    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
56
57    fn is_match(path: &Path) -> bool {
58        path.to_str()
59            .map(|p| p.contains("/lib/apk/db/") && p.ends_with("installed"))
60            .unwrap_or(false)
61    }
62
63    fn extract_packages(path: &Path) -> Vec<PackageData> {
64        let content = match read_file_to_string(path) {
65            Ok(c) => c,
66            Err(e) => {
67                warn!("Failed to read Alpine installed db {:?}: {}", path, e);
68                return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
69            }
70        };
71
72        parse_alpine_installed_db(&content)
73    }
74}
75
76impl PackageParser for AlpineApkbuildParser {
77    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
78
79    fn is_match(path: &Path) -> bool {
80        path.file_name().and_then(|n| n.to_str()) == Some("APKBUILD")
81    }
82
83    fn extract_packages(path: &Path) -> Vec<PackageData> {
84        let content = match read_file_to_string(path) {
85            Ok(c) => c,
86            Err(e) => {
87                warn!("Failed to read APKBUILD {:?}: {}", path, e);
88                return vec![default_package_data(DatasourceId::AlpineApkbuild)];
89            }
90        };
91
92        vec![parse_apkbuild(&content)]
93    }
94}
95
96fn parse_alpine_installed_db(content: &str) -> Vec<PackageData> {
97    let raw_paragraphs: Vec<&str> = content
98        .split("\n\n")
99        .filter(|p| !p.trim().is_empty())
100        .collect();
101
102    let mut all_packages = Vec::new();
103
104    for raw_text in &raw_paragraphs {
105        let headers = parse_alpine_headers(raw_text);
106        let pkg = parse_alpine_package_paragraph(&headers, raw_text);
107        if pkg.name.is_some() {
108            all_packages.push(pkg);
109        }
110    }
111
112    if all_packages.is_empty() {
113        return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
114    }
115
116    all_packages
117}
118
119/// Parse Alpine DB headers preserving case sensitivity.
120///
121/// Alpine's installed DB uses single-letter case-sensitive keys (e.g., `T:` for
122/// description vs `t:` for timestamp, `C:` for checksum vs `c:` for git commit).
123/// The generic rfc822 parser lowercases all keys, causing collisions.
124fn parse_alpine_headers(content: &str) -> HashMap<String, Vec<String>> {
125    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
126
127    for line in content.lines() {
128        if line.is_empty() {
129            continue;
130        }
131
132        if let Some((key, value)) = line.split_once(':') {
133            let key = key.trim();
134            let value = value.trim();
135            if !key.is_empty() && !value.is_empty() {
136                headers
137                    .entry(key.to_string())
138                    .or_default()
139                    .push(value.to_string());
140            }
141        }
142    }
143
144    headers
145}
146
147fn get_first(headers: &HashMap<String, Vec<String>>, key: &str) -> Option<String> {
148    headers
149        .get(key)
150        .and_then(|values| values.first())
151        .map(|v| v.trim().to_string())
152}
153
154fn get_all(headers: &HashMap<String, Vec<String>>, key: &str) -> Vec<String> {
155    headers
156        .get(key)
157        .cloned()
158        .unwrap_or_default()
159        .into_iter()
160        .filter(|v| !v.trim().is_empty())
161        .collect()
162}
163
164fn parse_alpine_package_paragraph(
165    headers: &HashMap<String, Vec<String>>,
166    raw_text: &str,
167) -> PackageData {
168    let name = get_first(headers, "P");
169    let version = get_first(headers, "V");
170    let description = get_first(headers, "T");
171    let homepage_url = get_first(headers, "U");
172    let architecture = get_first(headers, "A");
173
174    let is_virtual = description
175        .as_ref()
176        .is_some_and(|d| d == "virtual meta package");
177
178    let namespace = Some("alpine".to_string());
179    let mut parties = Vec::new();
180
181    if let Some(maintainer) = get_first(headers, "m") {
182        let (name_opt, email_opt) = split_name_email(&maintainer);
183        parties.push(Party {
184            r#type: None,
185            role: Some("maintainer".to_string()),
186            name: name_opt,
187            email: email_opt,
188            url: None,
189            organization: None,
190            organization_url: None,
191            timezone: None,
192        });
193    }
194
195    let extracted_license_statement = get_first(headers, "L");
196    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
197        build_alpine_license_data(extracted_license_statement.as_deref());
198
199    let source_packages = if let Some(origin) = get_first(headers, "o") {
200        vec![format!("pkg:alpine/{}", origin)]
201    } else {
202        Vec::new()
203    };
204    let vcs_url = get_first(headers, "c")
205        .map(|commit| format!("git+https://git.alpinelinux.org/aports/commit/?id={commit}"));
206
207    let mut dependencies = Vec::new();
208    for dep in get_all(headers, "D") {
209        for dep_str in dep.split_whitespace() {
210            if dep_str.starts_with("so:") || dep_str.starts_with("cmd:") {
211                continue;
212            }
213
214            dependencies.push(Dependency {
215                purl: Some(format!("pkg:alpine/{}", dep_str)),
216                extracted_requirement: None,
217                scope: Some("install".to_string()),
218                is_runtime: Some(true),
219                is_optional: Some(false),
220                is_direct: Some(true),
221                resolved_package: None,
222                extra_data: None,
223                is_pinned: Some(false),
224            });
225        }
226    }
227
228    let mut extra_data = HashMap::new();
229
230    if is_virtual {
231        extra_data.insert("is_virtual".to_string(), true.into());
232    }
233
234    if let Some(checksum) = get_first(headers, "C") {
235        extra_data.insert("checksum".to_string(), checksum.into());
236    }
237
238    if let Some(size) = get_first(headers, "S") {
239        extra_data.insert("compressed_size".to_string(), size.into());
240    }
241
242    if let Some(installed_size) = get_first(headers, "I") {
243        extra_data.insert("installed_size".to_string(), installed_size.into());
244    }
245
246    if let Some(timestamp) = get_first(headers, "t") {
247        extra_data.insert("build_timestamp".to_string(), timestamp.into());
248    }
249
250    if let Some(commit) = get_first(headers, "c") {
251        extra_data.insert("git_commit".to_string(), commit.into());
252    }
253
254    let providers = extract_providers(raw_text);
255    if !providers.is_empty() {
256        let provider_list: Vec<serde_json::Value> =
257            providers.into_iter().map(|s| s.into()).collect();
258        extra_data.insert("providers".to_string(), provider_list.into());
259    }
260
261    let file_references = extract_file_references(raw_text);
262
263    PackageData {
264        datasource_id: Some(DatasourceId::AlpineInstalledDb),
265        package_type: Some(PACKAGE_TYPE),
266        namespace: namespace.clone(),
267        name: name.clone(),
268        version: version.clone(),
269        description,
270        homepage_url,
271        vcs_url,
272        parties,
273        declared_license_expression,
274        declared_license_expression_spdx,
275        license_detections,
276        extracted_license_statement,
277        source_packages,
278        dependencies,
279        file_references,
280        purl: name
281            .as_ref()
282            .and_then(|n| build_alpine_purl(n, version.as_deref(), architecture.as_deref())),
283        extra_data: if extra_data.is_empty() {
284            None
285        } else {
286            Some(extra_data)
287        },
288        ..Default::default()
289    }
290}
291
292fn parse_apkbuild(content: &str) -> PackageData {
293    let variables = parse_apkbuild_variables(content);
294
295    let name = variables.get("pkgname").cloned();
296    let version = match (variables.get("pkgver"), variables.get("pkgrel")) {
297        (Some(ver), Some(rel)) => Some(format!("{}-r{}", ver, rel)),
298        (Some(ver), None) => Some(ver.clone()),
299        _ => None,
300    };
301    let description = variables.get("pkgdesc").cloned();
302    let homepage_url = variables.get("url").cloned();
303    let extracted_license_statement = variables.get("license").cloned();
304    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
305        build_alpine_license_data(extracted_license_statement.as_deref());
306
307    let dependencies = parse_apkbuild_dependencies(&variables);
308
309    let mut extra_data = HashMap::new();
310    if let Some(source) = variables.get("source") {
311        let sources_value: Vec<serde_json::Value> = parse_apkbuild_sources(source)
312            .into_iter()
313            .map(|(file_name, url)| serde_json::json!({ "file_name": file_name, "url": url }))
314            .collect();
315        if !sources_value.is_empty() {
316            extra_data.insert(
317                "sources".to_string(),
318                serde_json::Value::Array(sources_value),
319            );
320        }
321    }
322    for (field, checksum_key) in [
323        ("sha512sums", "sha512"),
324        ("sha256sums", "sha256"),
325        ("md5sums", "md5"),
326    ] {
327        if let Some(checksums) = variables.get(field) {
328            let checksum_entries: Vec<serde_json::Value> = parse_apkbuild_checksums(checksums)
329                .into_iter()
330                .map(|(file_name, checksum)| serde_json::json!({ "file_name": file_name, checksum_key: checksum }))
331                .collect();
332            if !checksum_entries.is_empty() {
333                match extra_data.get_mut("checksums") {
334                    Some(serde_json::Value::Array(existing)) => existing.extend(checksum_entries),
335                    _ => {
336                        extra_data.insert(
337                            "checksums".to_string(),
338                            serde_json::Value::Array(checksum_entries),
339                        );
340                    }
341                }
342            }
343        }
344    }
345
346    PackageData {
347        datasource_id: Some(DatasourceId::AlpineApkbuild),
348        package_type: Some(PACKAGE_TYPE),
349        namespace: None,
350        name: name.clone(),
351        version: version.clone(),
352        description,
353        homepage_url,
354        extracted_license_statement,
355        declared_license_expression,
356        declared_license_expression_spdx,
357        license_detections,
358        dependencies,
359        purl: name
360            .as_deref()
361            .and_then(|n| build_alpine_purl(n, version.as_deref(), None)),
362        extra_data: (!extra_data.is_empty()).then_some(extra_data),
363        ..default_package_data(DatasourceId::AlpineApkbuild)
364    }
365}
366
367fn parse_apkbuild_variables(content: &str) -> HashMap<String, String> {
368    let mut raw = HashMap::new();
369    let mut lines = content.lines().peekable();
370    let mut brace_depth = 0usize;
371
372    while let Some(line) = lines.next() {
373        let trimmed = line.trim();
374        if trimmed.is_empty() || trimmed.starts_with('#') {
375            continue;
376        }
377        if trimmed.ends_with("(){") || trimmed.ends_with("() {") {
378            brace_depth += 1;
379            continue;
380        }
381        if brace_depth > 0 {
382            brace_depth += trimmed.chars().filter(|c| *c == '{').count();
383            brace_depth = brace_depth.saturating_sub(trimmed.chars().filter(|c| *c == '}').count());
384            continue;
385        }
386        let Some((name, value)) = trimmed.split_once('=') else {
387            continue;
388        };
389        let mut value = value.trim().to_string();
390        if value.starts_with('"') && !value.ends_with('"') {
391            while let Some(next) = lines.peek() {
392                value.push('\n');
393                value.push_str(next);
394                let current = lines.next().unwrap();
395                if current.trim_end().ends_with('"') {
396                    break;
397                }
398            }
399        }
400        raw.insert(name.trim().to_string(), value);
401    }
402
403    let mut resolved = HashMap::new();
404    for key in [
405        "pkgname",
406        "pkgver",
407        "pkgrel",
408        "pkgdesc",
409        "url",
410        "license",
411        "source",
412        "depends",
413        "depends_dev",
414        "makedepends",
415        "makedepends_build",
416        "makedepends_host",
417        "checkdepends",
418        "sha512sums",
419        "sha256sums",
420        "md5sums",
421    ] {
422        if let Some(value) = raw.get(key) {
423            resolved.insert(key.to_string(), resolve_apkbuild_value(value, &raw));
424        }
425    }
426    resolved
427}
428
429fn resolve_apkbuild_value(value: &str, variables: &HashMap<String, String>) -> String {
430    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
431    for _ in 0..8 {
432        let previous = resolved.clone();
433        for (name, raw_value) in variables {
434            let raw_value = strip_wrapping_quotes(raw_value.trim());
435            let resolved_raw = resolve_apkbuild_value_no_recursion(raw_value, variables);
436            let value_resolved = strip_wrapping_quotes(&resolved_raw);
437            resolved = resolved.replace(
438                &format!("${{{name}//./-}}"),
439                &value_resolved.replace('.', "-"),
440            );
441            resolved = resolved.replace(
442                &format!("${{{name}//./_}}"),
443                &value_resolved.replace('.', "_"),
444            );
445            resolved = resolved.replace(
446                &format!("${{{name}::8}}"),
447                &value_resolved.chars().take(8).collect::<String>(),
448            );
449            resolved = resolved.replace(&format!("${{{name}}}"), value_resolved);
450            resolved = resolved.replace(&format!("${name}"), value_resolved);
451        }
452        if resolved == previous {
453            break;
454        }
455    }
456    resolved
457}
458
459fn resolve_apkbuild_value_no_recursion(value: &str, variables: &HashMap<String, String>) -> String {
460    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
461    for (name, raw_value) in variables {
462        let raw_value = strip_wrapping_quotes(raw_value.trim());
463        resolved = resolved.replace(&format!("${{{name}//./-}}"), &raw_value.replace('.', "-"));
464        resolved = resolved.replace(&format!("${{{name}//./_}}"), &raw_value.replace('.', "_"));
465        resolved = resolved.replace(
466            &format!("${{{name}::8}}"),
467            &raw_value.chars().take(8).collect::<String>(),
468        );
469        resolved = resolved.replace(&format!("${{{name}}}"), raw_value);
470        resolved = resolved.replace(&format!("${name}"), raw_value);
471    }
472    resolved
473}
474
475fn strip_wrapping_quotes(value: &str) -> &str {
476    value
477        .strip_prefix('"')
478        .and_then(|v| v.strip_suffix('"'))
479        .or_else(|| value.strip_prefix('\'').and_then(|v| v.strip_suffix('\'')))
480        .unwrap_or(value)
481}
482
483fn parse_apkbuild_sources(value: &str) -> Vec<(Option<String>, Option<String>)> {
484    value
485        .split_whitespace()
486        .filter(|part| !part.is_empty())
487        .map(|part| {
488            if let Some((file_name, url)) = part.split_once("::") {
489                (Some(file_name.to_string()), Some(url.to_string()))
490            } else if part.contains("://") {
491                (None, Some(part.to_string()))
492            } else {
493                (Some(part.to_string()), None)
494            }
495        })
496        .collect()
497}
498
499fn parse_apkbuild_checksums(value: &str) -> Vec<(String, String)> {
500    value
501        .lines()
502        .flat_map(|line| line.split_whitespace())
503        .collect::<Vec<_>>()
504        .chunks(2)
505        .filter_map(|chunk| {
506            if chunk.len() == 2 {
507                Some((chunk[1].to_string(), chunk[0].to_string()))
508            } else {
509                None
510            }
511        })
512        .collect()
513}
514
515fn build_alpine_license_data(
516    extracted: Option<&str>,
517) -> (Option<String>, Option<String>, Vec<LicenseDetection>) {
518    let Some(extracted) = extracted.map(str::trim).filter(|s| !s.is_empty()) else {
519        return empty_declared_license_data();
520    };
521
522    if extracted == "custom:multiple" {
523        return build_declared_license_data_from_pair(
524            "unknown-license-reference",
525            "LicenseRef-provenant-unknown-license-reference",
526            DeclaredLicenseMatchMetadata::single_line(extracted),
527        );
528    }
529
530    let normalized_tokens = extracted
531        .split_whitespace()
532        .filter(|part| *part != "AND")
533        .map(normalize_alpine_license_token)
534        .collect::<Option<Vec<_>>>();
535
536    let Some(normalized_tokens) = normalized_tokens else {
537        return empty_declared_license_data();
538    };
539
540    let Some(combined) = combine_normalized_licenses(normalized_tokens, " AND ") else {
541        return empty_declared_license_data();
542    };
543
544    build_declared_license_data(
545        combined,
546        DeclaredLicenseMatchMetadata::single_line(extracted),
547    )
548}
549
550fn normalize_alpine_license_token(token: &str) -> Option<NormalizedDeclaredLicense> {
551    match token {
552        "ICU" => Some(NormalizedDeclaredLicense::new("x11", "ICU")),
553        "Unicode-TOU" => Some(NormalizedDeclaredLicense::new("unicode-tou", "Unicode-TOU")),
554        "Ruby" => Some(NormalizedDeclaredLicense::new("ruby", "Ruby")),
555        "BSD-2-Clause" => Some(NormalizedDeclaredLicense::new(
556            "bsd-simplified",
557            "BSD-2-Clause",
558        )),
559        "BSD-3-Clause" => Some(NormalizedDeclaredLicense::new("bsd-new", "BSD-3-Clause")),
560        other => normalize_declared_license_key(other),
561    }
562}
563
564fn parse_apkbuild_dependencies(variables: &HashMap<String, String>) -> Vec<Dependency> {
565    let mut dependencies = Vec::new();
566
567    for (field, scope, is_runtime, is_optional) in [
568        ("depends", "depends", true, false),
569        ("depends_dev", "depends_dev", false, true),
570        ("makedepends", "makedepends", false, true),
571        ("makedepends_build", "makedepends_build", false, true),
572        ("makedepends_host", "makedepends_host", false, true),
573        ("checkdepends", "checkdepends", false, true),
574    ] {
575        let Some(value) = variables.get(field) else {
576            continue;
577        };
578
579        for dep_str in value.split_whitespace() {
580            let dep_str = dep_str.trim();
581            if dep_str.is_empty() {
582                continue;
583            }
584
585            let dep_name = dep_str
586                .split(['<', '>', '=', '!', '~'])
587                .next()
588                .unwrap_or(dep_str)
589                .trim();
590            if dep_name.is_empty() {
591                continue;
592            }
593
594            dependencies.push(Dependency {
595                purl: build_alpine_purl(dep_name, None, None),
596                extracted_requirement: Some(dep_str.to_string()),
597                scope: Some(scope.to_string()),
598                is_runtime: Some(is_runtime),
599                is_optional: Some(is_optional),
600                is_pinned: Some(dep_str.contains('=')),
601                is_direct: Some(true),
602                resolved_package: None,
603                extra_data: None,
604            });
605        }
606    }
607
608    dependencies
609}
610
611fn extract_file_references(raw_text: &str) -> Vec<FileReference> {
612    let mut file_references = Vec::new();
613    let mut current_dir = String::new();
614    let mut current_file: Option<FileReference> = None;
615
616    for line in raw_text.lines() {
617        if line.is_empty() {
618            continue;
619        }
620
621        if let Some((field_type, value)) = line.split_once(':') {
622            let value = value.trim();
623            match field_type {
624                "F" => {
625                    if let Some(file) = current_file.take() {
626                        file_references.push(file);
627                    }
628                    current_dir = value.to_string();
629                }
630                "R" => {
631                    if let Some(file) = current_file.take() {
632                        file_references.push(file);
633                    }
634
635                    let path = if current_dir.is_empty() {
636                        value.to_string()
637                    } else {
638                        format!("{}/{}", current_dir, value)
639                    };
640
641                    current_file = Some(FileReference {
642                        path,
643                        size: None,
644                        sha1: None,
645                        md5: None,
646                        sha256: None,
647                        sha512: None,
648                        extra_data: None,
649                    });
650                }
651                "Z" => {
652                    if let Some(ref mut file) = current_file
653                        && value.starts_with("Q1")
654                    {
655                        use base64::Engine;
656                        if let Ok(decoded) =
657                            base64::engine::general_purpose::STANDARD.decode(&value[2..])
658                        {
659                            let hex_string = decoded
660                                .iter()
661                                .map(|b| format!("{:02x}", b))
662                                .collect::<String>();
663                            file.sha1 = Some(hex_string);
664                        }
665                    }
666                }
667                "a" => {
668                    if let Some(ref mut file) = current_file {
669                        let mut extra = HashMap::new();
670                        extra.insert(
671                            "attributes".to_string(),
672                            serde_json::Value::String(value.to_string()),
673                        );
674                        file.extra_data = Some(extra);
675                    }
676                }
677                _ => {}
678            }
679        }
680    }
681
682    if let Some(file) = current_file {
683        file_references.push(file);
684    }
685
686    file_references
687}
688
689fn extract_providers(raw_text: &str) -> Vec<String> {
690    let mut providers = Vec::new();
691
692    for line in raw_text.lines() {
693        if line.is_empty() {
694            continue;
695        }
696
697        if let Some(value) = line.strip_prefix("p:") {
698            providers.extend(value.split_whitespace().map(|s| s.to_string()));
699        }
700    }
701
702    providers
703}
704
705fn build_alpine_purl(
706    name: &str,
707    version: Option<&str>,
708    architecture: Option<&str>,
709) -> Option<String> {
710    use packageurl::PackageUrl;
711
712    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
713
714    if let Some(ver) = version {
715        purl.with_version(ver).ok()?;
716    }
717
718    if let Some(arch) = architecture {
719        purl.add_qualifier("arch", arch).ok()?;
720    }
721
722    Some(purl.to_string())
723}
724
725/// Parser for Alpine Linux .apk package archives
726pub struct AlpineApkParser;
727
728impl PackageParser for AlpineApkParser {
729    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
730
731    fn is_match(path: &Path) -> bool {
732        path.extension().and_then(|e| e.to_str()) == Some("apk")
733            && magic::is_gzip(path)
734            && !magic::is_zip(path)
735    }
736
737    fn extract_packages(path: &Path) -> Vec<PackageData> {
738        vec![match extract_apk_archive(path) {
739            Ok(data) => data,
740            Err(e) => {
741                warn!("Failed to extract .apk archive {:?}: {}", path, e);
742                PackageData {
743                    package_type: Some(PACKAGE_TYPE),
744                    datasource_id: Some(DatasourceId::AlpineApkArchive),
745                    ..Default::default()
746                }
747            }
748        }]
749    }
750}
751
752fn extract_apk_archive(path: &Path) -> Result<PackageData, String> {
753    use flate2::read::GzDecoder;
754    use std::io::Read;
755
756    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .apk file: {}", e))?;
757
758    let decoder = GzDecoder::new(file);
759    let mut archive = tar::Archive::new(decoder);
760
761    for entry_result in archive
762        .entries()
763        .map_err(|e| format!("Failed to read tar entries: {}", e))?
764    {
765        let mut entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
766
767        let entry_path = entry
768            .path()
769            .map_err(|e| format!("Failed to get entry path: {}", e))?;
770
771        if entry_path.ends_with(".PKGINFO") {
772            let mut content = String::new();
773            entry
774                .read_to_string(&mut content)
775                .map_err(|e| format!("Failed to read .PKGINFO: {}", e))?;
776
777            return Ok(parse_pkginfo(&content));
778        }
779    }
780
781    Err(".apk archive does not contain .PKGINFO file".to_string())
782}
783
784fn parse_pkginfo(content: &str) -> PackageData {
785    let mut fields: HashMap<&str, Vec<&str>> = HashMap::new();
786
787    for line in content.lines() {
788        let line = line.trim();
789        if line.is_empty() || line.starts_with('#') {
790            continue;
791        }
792
793        if let Some((key, value)) = line.split_once(" = ") {
794            fields.entry(key.trim()).or_default().push(value.trim());
795        }
796    }
797
798    let name = fields
799        .get("pkgname")
800        .and_then(|v| v.first())
801        .map(|s| s.to_string());
802    let pkgver = fields.get("pkgver").and_then(|v| v.first());
803    let version = pkgver.map(|s| s.to_string());
804    let arch = fields
805        .get("arch")
806        .and_then(|v| v.first())
807        .map(|s| s.to_string());
808    let license = fields
809        .get("license")
810        .and_then(|v| v.first())
811        .map(|s| s.to_string());
812    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
813        build_alpine_license_data(license.as_deref());
814    let description = fields
815        .get("pkgdesc")
816        .and_then(|v| v.first())
817        .map(|s| s.to_string());
818    let homepage = fields
819        .get("url")
820        .and_then(|v| v.first())
821        .map(|s| s.to_string());
822    let origin = fields
823        .get("origin")
824        .and_then(|v| v.first())
825        .map(|s| s.to_string());
826    let maintainer_str = fields.get("maintainer").and_then(|v| v.first());
827
828    let mut parties = Vec::new();
829    if let Some(maint) = maintainer_str {
830        let (maint_name, maint_email) = split_name_email(maint);
831        parties.push(Party {
832            r#type: Some("person".to_string()),
833            role: Some("maintainer".to_string()),
834            name: maint_name,
835            email: maint_email,
836            url: None,
837            organization: None,
838            organization_url: None,
839            timezone: None,
840        });
841    }
842
843    let purl = name
844        .as_ref()
845        .and_then(|n| build_alpine_purl(n, version.as_deref(), arch.as_deref()));
846
847    let mut dependencies = Vec::new();
848    if let Some(depends_list) = fields.get("depend") {
849        for dep_str in depends_list {
850            let dep_name = dep_str.split_whitespace().next().unwrap_or(dep_str);
851            dependencies.push(Dependency {
852                purl: Some(format!("pkg:alpine/{}", dep_name)),
853                extracted_requirement: Some(dep_str.to_string()),
854                scope: Some("runtime".to_string()),
855                is_runtime: Some(true),
856                is_optional: Some(false),
857                is_pinned: None,
858                is_direct: Some(true),
859                resolved_package: None,
860                extra_data: None,
861            });
862        }
863    }
864
865    PackageData {
866        datasource_id: Some(DatasourceId::AlpineApkArchive),
867        package_type: Some(PACKAGE_TYPE),
868        namespace: Some("alpine".to_string()),
869        name,
870        version,
871        description,
872        homepage_url: homepage,
873        declared_license_expression,
874        declared_license_expression_spdx,
875        license_detections,
876        extracted_license_statement: license,
877        parties,
878        dependencies,
879        purl,
880        extra_data: origin.map(|o| {
881            let mut map = HashMap::new();
882            map.insert("origin".to_string(), serde_json::Value::String(o));
883            map
884        }),
885        ..Default::default()
886    }
887}
888
889#[cfg(test)]
890mod tests {
891    use super::*;
892    use std::io::Write;
893    use std::path::PathBuf;
894    use tempfile::TempDir;
895
896    /// Creates a temp file mimicking the Alpine installed db path structure.
897    /// Returns the TempDir (must be kept alive) and path to the file.
898    fn create_temp_installed_db(content: &str) -> (TempDir, PathBuf) {
899        let temp_dir = TempDir::new().expect("Failed to create temp dir");
900        let db_dir = temp_dir.path().join("lib/apk/db");
901        std::fs::create_dir_all(&db_dir).expect("Failed to create db dir");
902        let file_path = db_dir.join("installed");
903        let mut file = std::fs::File::create(&file_path).expect("Failed to create file");
904        file.write_all(content.as_bytes())
905            .expect("Failed to write content");
906        (temp_dir, file_path)
907    }
908
909    #[test]
910    fn test_alpine_parser_is_match() {
911        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
912            "/lib/apk/db/installed"
913        )));
914        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
915            "/var/lib/apk/db/installed"
916        )));
917        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
918            "/lib/apk/db/status"
919        )));
920        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
921            "installed"
922        )));
923    }
924
925    #[test]
926    fn test_parse_alpine_package_basic() {
927        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
928P:alpine-baselayout-data
929V:3.2.0-r22
930A:x86_64
931S:11435
932I:73728
933T:Alpine base dir structure and init scripts
934U:https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout
935L:GPL-2.0-only
936o:alpine-baselayout
937m:Natanael Copa <ncopa@alpinelinux.org>
938t:1655134784
939c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
940
941";
942        let (_dir, path) = create_temp_installed_db(content);
943        let pkg = AlpineInstalledParser::extract_first_package(&path);
944        assert_eq!(pkg.name, Some("alpine-baselayout-data".to_string()));
945        assert_eq!(pkg.version, Some("3.2.0-r22".to_string()));
946        assert_eq!(pkg.namespace, Some("alpine".to_string()));
947        assert_eq!(
948            pkg.description,
949            Some("Alpine base dir structure and init scripts".to_string())
950        );
951        assert_eq!(
952            pkg.homepage_url,
953            Some("https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout".to_string())
954        );
955        assert_eq!(
956            pkg.extracted_license_statement,
957            Some("GPL-2.0-only".to_string())
958        );
959        assert_eq!(pkg.parties.len(), 1);
960        assert_eq!(pkg.parties[0].name, Some("Natanael Copa".to_string()));
961        assert_eq!(
962            pkg.parties[0].email,
963            Some("ncopa@alpinelinux.org".to_string())
964        );
965        assert!(
966            pkg.purl
967                .as_ref()
968                .unwrap()
969                .contains("alpine-baselayout-data")
970        );
971        assert!(pkg.purl.as_ref().unwrap().contains("arch=x86_64"));
972    }
973
974    #[test]
975    fn test_parse_alpine_with_dependencies() {
976        let content = "P:musl
977V:1.2.3-r0
978A:x86_64
979D:scanelf so:libc.musl-x86_64.so.1
980
981";
982        let (_dir, path) = create_temp_installed_db(content);
983        let pkg = AlpineInstalledParser::extract_first_package(&path);
984        assert_eq!(pkg.name, Some("musl".to_string()));
985        assert_eq!(pkg.dependencies.len(), 1);
986        assert!(
987            pkg.dependencies[0]
988                .purl
989                .as_ref()
990                .unwrap()
991                .contains("scanelf")
992        );
993    }
994
995    #[test]
996    fn test_build_alpine_purl() {
997        let purl = build_alpine_purl("busybox", Some("1.31.1-r9"), Some("x86_64"));
998        assert_eq!(
999            purl,
1000            Some("pkg:alpine/busybox@1.31.1-r9?arch=x86_64".to_string())
1001        );
1002
1003        let purl_no_arch = build_alpine_purl("package", Some("1.0"), None);
1004        assert_eq!(purl_no_arch, Some("pkg:alpine/package@1.0".to_string()));
1005    }
1006
1007    #[test]
1008    fn test_parse_alpine_extra_data() {
1009        let content = "P:test-package
1010V:1.0
1011C:base64checksum==
1012S:12345
1013I:67890
1014t:1234567890
1015c:gitcommithash
1016
1017";
1018        let (_dir, path) = create_temp_installed_db(content);
1019        let pkg = AlpineInstalledParser::extract_first_package(&path);
1020        assert!(pkg.extra_data.is_some());
1021        let extra = pkg.extra_data.as_ref().unwrap();
1022        assert_eq!(extra["checksum"], "base64checksum==");
1023        assert_eq!(extra["compressed_size"], "12345");
1024        assert_eq!(extra["installed_size"], "67890");
1025        assert_eq!(extra["build_timestamp"], "1234567890");
1026        assert_eq!(extra["git_commit"], "gitcommithash");
1027    }
1028
1029    #[test]
1030    fn test_parse_alpine_case_sensitive_keys() {
1031        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
1032P:test-pkg
1033V:1.0
1034T:A test description
1035t:1655134784
1036c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
1037
1038";
1039        let (_dir, path) = create_temp_installed_db(content);
1040        let pkg = AlpineInstalledParser::extract_first_package(&path);
1041        assert_eq!(pkg.description, Some("A test description".to_string()));
1042        let extra = pkg.extra_data.as_ref().unwrap();
1043        assert_eq!(extra["checksum"], "Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=");
1044        assert_eq!(extra["build_timestamp"], "1655134784");
1045        assert_eq!(
1046            extra["git_commit"],
1047            "cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1048        );
1049    }
1050
1051    #[test]
1052    fn test_parse_alpine_multiple_packages() {
1053        let content = "P:package1
1054V:1.0
1055A:x86_64
1056
1057P:package2
1058V:2.0
1059A:aarch64
1060
1061";
1062        let (_dir, path) = create_temp_installed_db(content);
1063        let pkgs = AlpineInstalledParser::extract_packages(&path);
1064        assert_eq!(pkgs.len(), 2);
1065        assert_eq!(pkgs[0].name, Some("package1".to_string()));
1066        assert_eq!(pkgs[0].version, Some("1.0".to_string()));
1067        assert_eq!(pkgs[1].name, Some("package2".to_string()));
1068        assert_eq!(pkgs[1].version, Some("2.0".to_string()));
1069    }
1070
1071    #[test]
1072    fn test_parse_alpine_file_references() {
1073        let content = "P:test-pkg
1074V:1.0
1075F:usr/bin
1076R:test
1077Z:Q1WTc55xfvPogzA0YUV24D0Ym+MKE=
1078F:etc
1079R:config
1080Z:Q1pcfTfDNEbNKQc2s1tia7da05M8Q=
1081
1082";
1083        let (_dir, path) = create_temp_installed_db(content);
1084        let pkg = AlpineInstalledParser::extract_first_package(&path);
1085        assert_eq!(pkg.file_references.len(), 2);
1086        assert_eq!(pkg.file_references[0].path, "usr/bin/test");
1087        assert!(pkg.file_references[0].sha1.is_some());
1088        assert_eq!(pkg.file_references[1].path, "etc/config");
1089        assert!(pkg.file_references[1].sha1.is_some());
1090    }
1091
1092    #[test]
1093    fn test_parse_alpine_empty_fields() {
1094        let content = "P:minimal-package
1095V:1.0
1096
1097";
1098        let (_dir, path) = create_temp_installed_db(content);
1099        let pkg = AlpineInstalledParser::extract_first_package(&path);
1100        assert_eq!(pkg.name, Some("minimal-package".to_string()));
1101        assert_eq!(pkg.version, Some("1.0".to_string()));
1102        assert!(pkg.description.is_none());
1103        assert!(pkg.homepage_url.is_none());
1104        assert_eq!(pkg.dependencies.len(), 0);
1105    }
1106
1107    #[test]
1108    fn test_parse_alpine_origin_field() {
1109        let content = "P:busybox-ifupdown
1110V:1.35.0-r13
1111o:busybox
1112A:x86_64
1113
1114";
1115        let (_dir, path) = create_temp_installed_db(content);
1116        let pkg = AlpineInstalledParser::extract_first_package(&path);
1117        assert_eq!(pkg.name, Some("busybox-ifupdown".to_string()));
1118        assert_eq!(pkg.source_packages.len(), 1);
1119        assert_eq!(pkg.source_packages[0], "pkg:alpine/busybox");
1120    }
1121
1122    #[test]
1123    fn test_parse_alpine_url_field() {
1124        let content = "P:openssl
1125V:1.1.1q-r0
1126U:https://www.openssl.org
1127A:x86_64
1128
1129";
1130        let (_dir, path) = create_temp_installed_db(content);
1131        let pkg = AlpineInstalledParser::extract_first_package(&path);
1132        assert_eq!(
1133            pkg.homepage_url,
1134            Some("https://www.openssl.org".to_string())
1135        );
1136    }
1137
1138    #[test]
1139    fn test_parse_alpine_provider_field() {
1140        let content = "P:some-package
1141V:1.0
1142p:cmd:binary=1.0
1143p:so:libtest.so.1
1144
1145";
1146        let (_dir, path) = create_temp_installed_db(content);
1147        let pkg = AlpineInstalledParser::extract_first_package(&path);
1148        assert!(pkg.extra_data.is_some());
1149        let extra = pkg.extra_data.as_ref().unwrap();
1150        let providers = extra.get("providers").and_then(|v| v.as_array());
1151        assert!(providers.is_some());
1152        let provider_array = providers.unwrap();
1153        assert_eq!(provider_array.len(), 2);
1154        assert_eq!(provider_array[0].as_str(), Some("cmd:binary=1.0"));
1155        assert_eq!(provider_array[1].as_str(), Some("so:libtest.so.1"));
1156    }
1157
1158    #[test]
1159    fn test_alpine_apk_parser_is_match() {
1160        let temp_dir = TempDir::new().expect("Failed to create temp dir");
1161        let apk_path = temp_dir.path().join("app-1.0.apk");
1162        let mut file = std::fs::File::create(&apk_path).expect("Failed to create apk fixture");
1163        file.write_all(&[0x1F, 0x8B, 0x08, 0x00])
1164            .expect("Failed to write gzip signature");
1165
1166        assert!(AlpineApkParser::is_match(&apk_path));
1167        assert!(!AlpineApkParser::is_match(&PathBuf::from("package.tar.gz")));
1168        assert!(!AlpineApkParser::is_match(&PathBuf::from("installed")));
1169    }
1170
1171    #[test]
1172    fn test_alpine_apk_parser_rejects_android_apk_fixture() {
1173        let android_apk = PathBuf::from("testdata/misc/test_android.apk");
1174        let alpine_apk = PathBuf::from("testdata/misc/test_alpine.apk");
1175
1176        assert!(!AlpineApkParser::is_match(&android_apk));
1177        assert!(AlpineApkParser::is_match(&alpine_apk));
1178    }
1179
1180    #[test]
1181    fn test_alpine_apkbuild_parser_is_match() {
1182        assert!(AlpineApkbuildParser::is_match(&PathBuf::from("APKBUILD")));
1183        assert!(AlpineApkbuildParser::is_match(&PathBuf::from(
1184            "/path/to/APKBUILD"
1185        )));
1186        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from("apkbuild")));
1187        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from(
1188            "APKBUILD.txt"
1189        )));
1190    }
1191
1192    #[test]
1193    fn test_parse_apkbuild_icu_reference() {
1194        let path = PathBuf::from("testdata/alpine-fixtures/apkbuild/alpine14/main/icu/APKBUILD");
1195        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1196
1197        assert_eq!(pkg.datasource_id, Some(DatasourceId::AlpineApkbuild));
1198        assert_eq!(pkg.name.as_deref(), Some("icu"));
1199        assert_eq!(pkg.version.as_deref(), Some("67.1-r2"));
1200        assert_eq!(
1201            pkg.description.as_deref(),
1202            Some("International Components for Unicode library")
1203        );
1204        assert_eq!(
1205            pkg.homepage_url.as_deref(),
1206            Some("http://site.icu-project.org/")
1207        );
1208        assert_eq!(
1209            pkg.extracted_license_statement.as_deref(),
1210            Some("MIT ICU Unicode-TOU")
1211        );
1212        assert_eq!(
1213            pkg.declared_license_expression_spdx.as_deref(),
1214            Some("MIT AND ICU AND Unicode-TOU")
1215        );
1216        assert_eq!(pkg.dependencies.len(), 3);
1217        let depends_dev = pkg
1218            .dependencies
1219            .iter()
1220            .find(|dep| dep.scope.as_deref() == Some("depends_dev"))
1221            .expect("depends_dev dependency missing");
1222        assert_eq!(depends_dev.purl.as_deref(), Some("pkg:alpine/icu"));
1223        assert_eq!(depends_dev.is_runtime, Some(false));
1224        assert_eq!(depends_dev.is_optional, Some(true));
1225
1226        let check_dep_names: Vec<_> = pkg
1227            .dependencies
1228            .iter()
1229            .filter(|dep| dep.scope.as_deref() == Some("checkdepends"))
1230            .filter_map(|dep| dep.purl.as_deref())
1231            .collect();
1232        assert!(check_dep_names.contains(&"pkg:alpine/diffutils"));
1233        assert!(check_dep_names.contains(&"pkg:alpine/python3"));
1234        let extra = pkg.extra_data.as_ref().unwrap();
1235        assert!(extra.contains_key("sources"));
1236        assert!(extra.contains_key("checksums"));
1237    }
1238
1239    #[test]
1240    fn test_parse_apkbuild_custom_multiple_license_uses_raw_matched_text() {
1241        let path = PathBuf::from(
1242            "testdata/alpine-fixtures/apkbuild/alpine13/main/linux-firmware/APKBUILD",
1243        );
1244        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1245
1246        assert_eq!(pkg.name.as_deref(), Some("linux-firmware"));
1247        assert_eq!(pkg.version.as_deref(), Some("20201218-r0"));
1248        assert_eq!(
1249            pkg.extracted_license_statement.as_deref(),
1250            Some("custom:multiple")
1251        );
1252        assert_eq!(
1253            pkg.declared_license_expression.as_deref(),
1254            Some("unknown-license-reference")
1255        );
1256        assert_eq!(
1257            pkg.declared_license_expression_spdx.as_deref(),
1258            Some("LicenseRef-provenant-unknown-license-reference")
1259        );
1260        let matched = pkg.license_detections[0].matches[0].matched_text.as_deref();
1261        assert_eq!(matched, Some("custom:multiple"));
1262    }
1263
1264    #[test]
1265    fn test_parse_alpine_no_files_package_still_detected() {
1266        let path = PathBuf::from("testdata/alpine-fixtures/full-installed/installed");
1267        let content = std::fs::read_to_string(&path).expect("read installed db fixture");
1268        let packages = parse_alpine_installed_db(&content);
1269        let libc_utils = packages
1270            .into_iter()
1271            .find(|pkg| pkg.name.as_deref() == Some("libc-utils"))
1272            .expect("libc-utils package should exist");
1273
1274        assert_eq!(libc_utils.file_references.len(), 0);
1275        assert!(
1276            libc_utils
1277                .purl
1278                .as_deref()
1279                .is_some_and(|p| p.contains("libc-utils"))
1280        );
1281    }
1282
1283    #[test]
1284    fn test_parse_alpine_commit_generates_https_vcs_url() {
1285        let content =
1286            "P:test-package\nV:1.0-r0\nA:x86_64\nc:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd\n";
1287        let (_dir, path) = create_temp_installed_db(content);
1288        let pkg = AlpineInstalledParser::extract_first_package(&path);
1289
1290        assert_eq!(
1291            pkg.vcs_url.as_deref(),
1292            Some(
1293                "git+https://git.alpinelinux.org/aports/commit/?id=cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1294            )
1295        );
1296    }
1297
1298    #[test]
1299    fn test_parse_alpine_virtual_package() {
1300        let content = "P:.postgis-rundeps
1301V:20210104.190748
1302A:noarch
1303S:0
1304I:0
1305T:virtual meta package
1306U:
1307L:
1308D:json-c geos gdal proj protobuf-c libstdc++
1309
1310";
1311        let (_dir, path) = create_temp_installed_db(content);
1312        let pkg = AlpineInstalledParser::extract_first_package(&path);
1313        assert_eq!(pkg.name, Some(".postgis-rundeps".to_string()));
1314        assert_eq!(pkg.version, Some("20210104.190748".to_string()));
1315        assert_eq!(pkg.description, Some("virtual meta package".to_string()));
1316        assert!(pkg.extra_data.is_some());
1317        let extra = pkg.extra_data.as_ref().unwrap();
1318        assert_eq!(
1319            extra.get("is_virtual").and_then(|v| v.as_bool()),
1320            Some(true)
1321        );
1322        assert_eq!(pkg.dependencies.len(), 6);
1323        assert!(pkg.homepage_url.is_none());
1324        assert!(pkg.extracted_license_statement.is_none());
1325    }
1326
1327    #[test]
1328    fn test_installed_db_license_normalization() {
1329        let content = "P:test-package\nV:1.0-r0\nA:x86_64\nL:MIT\n\n";
1330        let (_dir, path) = create_temp_installed_db(content);
1331        let pkg = AlpineInstalledParser::extract_first_package(&path);
1332
1333        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("MIT"));
1334        assert_eq!(pkg.declared_license_expression.as_deref(), Some("mit"));
1335        assert_eq!(pkg.declared_license_expression_spdx.as_deref(), Some("MIT"));
1336        assert_eq!(pkg.license_detections.len(), 1);
1337    }
1338
1339    #[test]
1340    fn test_apk_archive_license_normalization() {
1341        let path = PathBuf::from("testdata/alpine/apk/basic/test-package-1.0-r0.apk");
1342        let pkg = AlpineApkParser::extract_first_package(&path);
1343
1344        assert_eq!(pkg.extracted_license_statement.as_deref(), Some("MIT"));
1345        assert_eq!(pkg.declared_license_expression.as_deref(), Some("mit"));
1346        assert_eq!(pkg.declared_license_expression_spdx.as_deref(), Some("MIT"));
1347        assert_eq!(pkg.license_detections.len(), 1);
1348    }
1349}
1350
1351crate::register_parser!(
1352    "Alpine Linux package (installed db and .apk archive)",
1353    &["**/lib/apk/db/installed", "**/*.apk"],
1354    "alpine",
1355    "",
1356    Some("https://wiki.alpinelinux.org/wiki/Apk_spec"),
1357);
1358
1359crate::register_parser!(
1360    "Alpine Linux APKBUILD recipe",
1361    &["**/APKBUILD"],
1362    "alpine",
1363    "Shell",
1364    Some("https://wiki.alpinelinux.org/wiki/APKBUILD_Reference"),
1365);