Skip to main content

provenant/parsers/
alpine.rs

1//! Parser for Alpine Linux package metadata files.
2//!
3//! Extracts installed package metadata from Alpine Linux package database files
4//! using the APK package manager format.
5//!
6//! # Supported Formats
7//! - `/lib/apk/db/installed` (Installed package database)
8//!
9//! # Key Features
10//! - Installed package metadata extraction from system database
11//! - Dependency tracking from provides/requires fields
12//! - Author and maintainer information extraction
13//! - License information parsing
14//! - Package URL (purl) generation
15//!
16//! # Implementation Notes
17//! - Uses custom case-sensitive key-value parser (not the generic `rfc822` module)
18//! - Database stored in text format with multi-paragraph records
19//! - Graceful error handling with `warn!()` logs
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use log::warn;
25
26use crate::models::{
27    DatasourceId, Dependency, FileReference, LicenseDetection, Match, PackageData, PackageType,
28    Party,
29};
30use crate::parsers::utils::{read_file_to_string, split_name_email};
31
32use super::PackageParser;
33
34const PACKAGE_TYPE: PackageType = PackageType::Alpine;
35
36fn default_package_data(datasource_id: DatasourceId) -> PackageData {
37    PackageData {
38        package_type: Some(PACKAGE_TYPE),
39        datasource_id: Some(datasource_id),
40        ..Default::default()
41    }
42}
43
44/// Parser for Alpine Linux installed package database
45pub struct AlpineInstalledParser;
46
47pub struct AlpineApkbuildParser;
48
49impl PackageParser for AlpineInstalledParser {
50    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
51
52    fn is_match(path: &Path) -> bool {
53        path.to_str()
54            .map(|p| p.contains("/lib/apk/db/") && p.ends_with("installed"))
55            .unwrap_or(false)
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        let content = match read_file_to_string(path) {
60            Ok(c) => c,
61            Err(e) => {
62                warn!("Failed to read Alpine installed db {:?}: {}", path, e);
63                return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
64            }
65        };
66
67        parse_alpine_installed_db(&content)
68    }
69}
70
71impl PackageParser for AlpineApkbuildParser {
72    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
73
74    fn is_match(path: &Path) -> bool {
75        path.file_name().and_then(|n| n.to_str()) == Some("APKBUILD")
76    }
77
78    fn extract_packages(path: &Path) -> Vec<PackageData> {
79        let content = match read_file_to_string(path) {
80            Ok(c) => c,
81            Err(e) => {
82                warn!("Failed to read APKBUILD {:?}: {}", path, e);
83                return vec![default_package_data(DatasourceId::AlpineApkbuild)];
84            }
85        };
86
87        vec![parse_apkbuild(&content)]
88    }
89}
90
91fn parse_alpine_installed_db(content: &str) -> Vec<PackageData> {
92    let raw_paragraphs: Vec<&str> = content
93        .split("\n\n")
94        .filter(|p| !p.trim().is_empty())
95        .collect();
96
97    let mut all_packages = Vec::new();
98
99    for raw_text in &raw_paragraphs {
100        let headers = parse_alpine_headers(raw_text);
101        let pkg = parse_alpine_package_paragraph(&headers, raw_text);
102        if pkg.name.is_some() {
103            all_packages.push(pkg);
104        }
105    }
106
107    if all_packages.is_empty() {
108        return vec![default_package_data(DatasourceId::AlpineInstalledDb)];
109    }
110
111    all_packages
112}
113
114/// Parse Alpine DB headers preserving case sensitivity.
115///
116/// Alpine's installed DB uses single-letter case-sensitive keys (e.g., `T:` for
117/// description vs `t:` for timestamp, `C:` for checksum vs `c:` for git commit).
118/// The generic rfc822 parser lowercases all keys, causing collisions.
119fn parse_alpine_headers(content: &str) -> HashMap<String, Vec<String>> {
120    let mut headers: HashMap<String, Vec<String>> = HashMap::new();
121
122    for line in content.lines() {
123        if line.is_empty() {
124            continue;
125        }
126
127        if let Some((key, value)) = line.split_once(':') {
128            let key = key.trim();
129            let value = value.trim();
130            if !key.is_empty() && !value.is_empty() {
131                headers
132                    .entry(key.to_string())
133                    .or_default()
134                    .push(value.to_string());
135            }
136        }
137    }
138
139    headers
140}
141
142fn get_first(headers: &HashMap<String, Vec<String>>, key: &str) -> Option<String> {
143    headers
144        .get(key)
145        .and_then(|values| values.first())
146        .map(|v| v.trim().to_string())
147}
148
149fn get_all(headers: &HashMap<String, Vec<String>>, key: &str) -> Vec<String> {
150    headers
151        .get(key)
152        .cloned()
153        .unwrap_or_default()
154        .into_iter()
155        .filter(|v| !v.trim().is_empty())
156        .collect()
157}
158
159fn parse_alpine_package_paragraph(
160    headers: &HashMap<String, Vec<String>>,
161    raw_text: &str,
162) -> PackageData {
163    let name = get_first(headers, "P");
164    let version = get_first(headers, "V");
165    let description = get_first(headers, "T");
166    let homepage_url = get_first(headers, "U");
167    let architecture = get_first(headers, "A");
168
169    let is_virtual = description
170        .as_ref()
171        .is_some_and(|d| d == "virtual meta package");
172
173    let namespace = Some("alpine".to_string());
174    let mut parties = Vec::new();
175
176    if let Some(maintainer) = get_first(headers, "m") {
177        let (name_opt, email_opt) = split_name_email(&maintainer);
178        parties.push(Party {
179            r#type: None,
180            role: Some("maintainer".to_string()),
181            name: name_opt,
182            email: email_opt,
183            url: None,
184            organization: None,
185            organization_url: None,
186            timezone: None,
187        });
188    }
189
190    let extracted_license_statement = get_first(headers, "L");
191
192    let source_packages = if let Some(origin) = get_first(headers, "o") {
193        vec![format!("pkg:alpine/{}", origin)]
194    } else {
195        Vec::new()
196    };
197    let vcs_url = get_first(headers, "c")
198        .map(|commit| format!("git+https://git.alpinelinux.org/aports/commit/?id={commit}"));
199
200    let mut dependencies = Vec::new();
201    for dep in get_all(headers, "D") {
202        for dep_str in dep.split_whitespace() {
203            if dep_str.starts_with("so:") || dep_str.starts_with("cmd:") {
204                continue;
205            }
206
207            dependencies.push(Dependency {
208                purl: Some(format!("pkg:alpine/{}", dep_str)),
209                extracted_requirement: None,
210                scope: Some("install".to_string()),
211                is_runtime: Some(true),
212                is_optional: Some(false),
213                is_direct: Some(true),
214                resolved_package: None,
215                extra_data: None,
216                is_pinned: Some(false),
217            });
218        }
219    }
220
221    let mut extra_data = HashMap::new();
222
223    if is_virtual {
224        extra_data.insert("is_virtual".to_string(), true.into());
225    }
226
227    if let Some(checksum) = get_first(headers, "C") {
228        extra_data.insert("checksum".to_string(), checksum.into());
229    }
230
231    if let Some(size) = get_first(headers, "S") {
232        extra_data.insert("compressed_size".to_string(), size.into());
233    }
234
235    if let Some(installed_size) = get_first(headers, "I") {
236        extra_data.insert("installed_size".to_string(), installed_size.into());
237    }
238
239    if let Some(timestamp) = get_first(headers, "t") {
240        extra_data.insert("build_timestamp".to_string(), timestamp.into());
241    }
242
243    if let Some(commit) = get_first(headers, "c") {
244        extra_data.insert("git_commit".to_string(), commit.into());
245    }
246
247    let providers = extract_providers(raw_text);
248    if !providers.is_empty() {
249        let provider_list: Vec<serde_json::Value> =
250            providers.into_iter().map(|s| s.into()).collect();
251        extra_data.insert("providers".to_string(), provider_list.into());
252    }
253
254    let file_references = extract_file_references(raw_text);
255
256    PackageData {
257        datasource_id: Some(DatasourceId::AlpineInstalledDb),
258        package_type: Some(PACKAGE_TYPE),
259        namespace: namespace.clone(),
260        name: name.clone(),
261        version: version.clone(),
262        description,
263        homepage_url,
264        vcs_url,
265        parties,
266        extracted_license_statement,
267        source_packages,
268        dependencies,
269        file_references,
270        purl: name
271            .as_ref()
272            .and_then(|n| build_alpine_purl(n, version.as_deref(), architecture.as_deref())),
273        extra_data: if extra_data.is_empty() {
274            None
275        } else {
276            Some(extra_data)
277        },
278        ..Default::default()
279    }
280}
281
282fn parse_apkbuild(content: &str) -> PackageData {
283    let variables = parse_apkbuild_variables(content);
284
285    let name = variables.get("pkgname").cloned();
286    let version = match (variables.get("pkgver"), variables.get("pkgrel")) {
287        (Some(ver), Some(rel)) => Some(format!("{}-r{}", ver, rel)),
288        (Some(ver), None) => Some(ver.clone()),
289        _ => None,
290    };
291    let description = variables.get("pkgdesc").cloned();
292    let homepage_url = variables.get("url").cloned();
293    let extracted_license_statement = variables.get("license").cloned();
294    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
295        build_apkbuild_license_data(extracted_license_statement.as_deref());
296
297    let mut extra_data = HashMap::new();
298    if let Some(source) = variables.get("source") {
299        let sources_value: Vec<serde_json::Value> = parse_apkbuild_sources(source)
300            .into_iter()
301            .map(|(file_name, url)| serde_json::json!({ "file_name": file_name, "url": url }))
302            .collect();
303        if !sources_value.is_empty() {
304            extra_data.insert(
305                "sources".to_string(),
306                serde_json::Value::Array(sources_value),
307            );
308        }
309    }
310    for (field, checksum_key) in [
311        ("sha512sums", "sha512"),
312        ("sha256sums", "sha256"),
313        ("md5sums", "md5"),
314    ] {
315        if let Some(checksums) = variables.get(field) {
316            let checksum_entries: Vec<serde_json::Value> = parse_apkbuild_checksums(checksums)
317                .into_iter()
318                .map(|(file_name, checksum)| serde_json::json!({ "file_name": file_name, checksum_key: checksum }))
319                .collect();
320            if !checksum_entries.is_empty() {
321                match extra_data.get_mut("checksums") {
322                    Some(serde_json::Value::Array(existing)) => existing.extend(checksum_entries),
323                    _ => {
324                        extra_data.insert(
325                            "checksums".to_string(),
326                            serde_json::Value::Array(checksum_entries),
327                        );
328                    }
329                }
330            }
331        }
332    }
333
334    PackageData {
335        datasource_id: Some(DatasourceId::AlpineApkbuild),
336        package_type: Some(PACKAGE_TYPE),
337        namespace: None,
338        name: name.clone(),
339        version: version.clone(),
340        description,
341        homepage_url,
342        extracted_license_statement,
343        declared_license_expression,
344        declared_license_expression_spdx,
345        license_detections,
346        purl: name
347            .as_deref()
348            .and_then(|n| build_alpine_purl(n, version.as_deref(), None)),
349        extra_data: (!extra_data.is_empty()).then_some(extra_data),
350        ..default_package_data(DatasourceId::AlpineApkbuild)
351    }
352}
353
354fn parse_apkbuild_variables(content: &str) -> HashMap<String, String> {
355    let mut raw = HashMap::new();
356    let mut lines = content.lines().peekable();
357    let mut brace_depth = 0usize;
358
359    while let Some(line) = lines.next() {
360        let trimmed = line.trim();
361        if trimmed.is_empty() || trimmed.starts_with('#') {
362            continue;
363        }
364        if trimmed.ends_with("(){") || trimmed.ends_with("() {") {
365            brace_depth += 1;
366            continue;
367        }
368        if brace_depth > 0 {
369            brace_depth += trimmed.chars().filter(|c| *c == '{').count();
370            brace_depth = brace_depth.saturating_sub(trimmed.chars().filter(|c| *c == '}').count());
371            continue;
372        }
373        let Some((name, value)) = trimmed.split_once('=') else {
374            continue;
375        };
376        let mut value = value.trim().to_string();
377        if value.starts_with('"') && !value.ends_with('"') {
378            while let Some(next) = lines.peek() {
379                value.push('\n');
380                value.push_str(next);
381                let current = lines.next().unwrap();
382                if current.trim_end().ends_with('"') {
383                    break;
384                }
385            }
386        }
387        raw.insert(name.trim().to_string(), value);
388    }
389
390    let mut resolved = HashMap::new();
391    for key in [
392        "pkgname",
393        "pkgver",
394        "pkgrel",
395        "pkgdesc",
396        "url",
397        "license",
398        "source",
399        "sha512sums",
400        "sha256sums",
401        "md5sums",
402    ] {
403        if let Some(value) = raw.get(key) {
404            resolved.insert(key.to_string(), resolve_apkbuild_value(value, &raw));
405        }
406    }
407    resolved
408}
409
410fn resolve_apkbuild_value(value: &str, variables: &HashMap<String, String>) -> String {
411    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
412    for _ in 0..8 {
413        let previous = resolved.clone();
414        for (name, raw_value) in variables {
415            let raw_value = strip_wrapping_quotes(raw_value.trim());
416            let resolved_raw = resolve_apkbuild_value_no_recursion(raw_value, variables);
417            let value_resolved = strip_wrapping_quotes(&resolved_raw);
418            resolved = resolved.replace(
419                &format!("${{{name}//./-}}"),
420                &value_resolved.replace('.', "-"),
421            );
422            resolved = resolved.replace(
423                &format!("${{{name}//./_}}"),
424                &value_resolved.replace('.', "_"),
425            );
426            resolved = resolved.replace(
427                &format!("${{{name}::8}}"),
428                &value_resolved.chars().take(8).collect::<String>(),
429            );
430            resolved = resolved.replace(&format!("${{{name}}}"), value_resolved);
431            resolved = resolved.replace(&format!("${name}"), value_resolved);
432        }
433        if resolved == previous {
434            break;
435        }
436    }
437    resolved
438}
439
440fn resolve_apkbuild_value_no_recursion(value: &str, variables: &HashMap<String, String>) -> String {
441    let mut resolved = strip_wrapping_quotes(value.trim()).to_string();
442    for (name, raw_value) in variables {
443        let raw_value = strip_wrapping_quotes(raw_value.trim());
444        resolved = resolved.replace(&format!("${{{name}//./-}}"), &raw_value.replace('.', "-"));
445        resolved = resolved.replace(&format!("${{{name}//./_}}"), &raw_value.replace('.', "_"));
446        resolved = resolved.replace(
447            &format!("${{{name}::8}}"),
448            &raw_value.chars().take(8).collect::<String>(),
449        );
450        resolved = resolved.replace(&format!("${{{name}}}"), raw_value);
451        resolved = resolved.replace(&format!("${name}"), raw_value);
452    }
453    resolved
454}
455
456fn strip_wrapping_quotes(value: &str) -> &str {
457    value
458        .strip_prefix('"')
459        .and_then(|v| v.strip_suffix('"'))
460        .or_else(|| value.strip_prefix('\'').and_then(|v| v.strip_suffix('\'')))
461        .unwrap_or(value)
462}
463
464fn parse_apkbuild_sources(value: &str) -> Vec<(Option<String>, Option<String>)> {
465    value
466        .split_whitespace()
467        .filter(|part| !part.is_empty())
468        .map(|part| {
469            if let Some((file_name, url)) = part.split_once("::") {
470                (Some(file_name.to_string()), Some(url.to_string()))
471            } else if part.contains("://") {
472                (None, Some(part.to_string()))
473            } else {
474                (Some(part.to_string()), None)
475            }
476        })
477        .collect()
478}
479
480fn parse_apkbuild_checksums(value: &str) -> Vec<(String, String)> {
481    value
482        .lines()
483        .flat_map(|line| line.split_whitespace())
484        .collect::<Vec<_>>()
485        .chunks(2)
486        .filter_map(|chunk| {
487            if chunk.len() == 2 {
488                Some((chunk[1].to_string(), chunk[0].to_string()))
489            } else {
490                None
491            }
492        })
493        .collect()
494}
495
496fn build_apkbuild_license_data(
497    extracted: Option<&str>,
498) -> (Option<String>, Option<String>, Vec<LicenseDetection>) {
499    let Some(extracted) = extracted.map(str::trim).filter(|s| !s.is_empty()) else {
500        return (None, None, Vec::new());
501    };
502
503    let (declared, declared_spdx) = if extracted == "custom:multiple" {
504        (
505            Some("unknown-license-reference".to_string()),
506            Some("LicenseRef-provenant-unknown-license-reference".to_string()),
507        )
508    } else {
509        let parts: Vec<&str> = extracted
510            .split_whitespace()
511            .filter(|part| *part != "AND")
512            .collect();
513        let declared_parts: Vec<String> = parts
514            .iter()
515            .map(|part| match *part {
516                "MIT" => "mit".to_string(),
517                "ICU" => "x11".to_string(),
518                "Unicode-TOU" => "unicode-tou".to_string(),
519                "Ruby" => "ruby".to_string(),
520                "BSD-2-Clause" => "bsd-simplified".to_string(),
521                "BSD-3-Clause" => "bsd-new".to_string(),
522                other => other.to_ascii_lowercase(),
523            })
524            .collect();
525        let spdx_parts: Vec<String> = parts.iter().map(|part| part.to_string()).collect();
526        (
527            combine_license_expressions_in_order(declared_parts),
528            combine_license_expressions_in_order(spdx_parts),
529        )
530    };
531
532    let Some(declared_expr) = declared.clone() else {
533        return (None, None, Vec::new());
534    };
535    let Some(declared_spdx_expr) = declared_spdx.clone() else {
536        return (declared, declared_spdx, Vec::new());
537    };
538
539    let detection = LicenseDetection {
540        license_expression: declared_expr.clone(),
541        license_expression_spdx: declared_spdx_expr.clone(),
542        matches: vec![Match {
543            license_expression: declared_expr,
544            license_expression_spdx: declared_spdx_expr,
545            from_file: None,
546            start_line: 1,
547            end_line: 1,
548            matcher: Some("1-spdx-id".to_string()),
549            score: 100.0,
550            matched_length: Some(extracted.split_whitespace().count()),
551            match_coverage: Some(100.0),
552            rule_relevance: Some(100),
553            rule_identifier: None,
554            rule_url: None,
555            matched_text: Some(extracted.to_string()),
556        }],
557        identifier: None,
558    };
559
560    (declared, declared_spdx, vec![detection])
561}
562
563fn combine_license_expressions_in_order(expressions: Vec<String>) -> Option<String> {
564    let expressions: Vec<String> = expressions.into_iter().filter(|e| !e.is_empty()).collect();
565    if expressions.is_empty() {
566        None
567    } else {
568        Some(expressions.join(" AND "))
569    }
570}
571
572fn extract_file_references(raw_text: &str) -> Vec<FileReference> {
573    let mut file_references = Vec::new();
574    let mut current_dir = String::new();
575    let mut current_file: Option<FileReference> = None;
576
577    for line in raw_text.lines() {
578        if line.is_empty() {
579            continue;
580        }
581
582        if let Some((field_type, value)) = line.split_once(':') {
583            let value = value.trim();
584            match field_type {
585                "F" => {
586                    if let Some(file) = current_file.take() {
587                        file_references.push(file);
588                    }
589                    current_dir = value.to_string();
590                }
591                "R" => {
592                    if let Some(file) = current_file.take() {
593                        file_references.push(file);
594                    }
595
596                    let path = if current_dir.is_empty() {
597                        value.to_string()
598                    } else {
599                        format!("{}/{}", current_dir, value)
600                    };
601
602                    current_file = Some(FileReference {
603                        path,
604                        size: None,
605                        sha1: None,
606                        md5: None,
607                        sha256: None,
608                        sha512: None,
609                        extra_data: None,
610                    });
611                }
612                "Z" => {
613                    if let Some(ref mut file) = current_file
614                        && value.starts_with("Q1")
615                    {
616                        use base64::Engine;
617                        if let Ok(decoded) =
618                            base64::engine::general_purpose::STANDARD.decode(&value[2..])
619                        {
620                            let hex_string = decoded
621                                .iter()
622                                .map(|b| format!("{:02x}", b))
623                                .collect::<String>();
624                            file.sha1 = Some(hex_string);
625                        }
626                    }
627                }
628                "a" => {
629                    if let Some(ref mut file) = current_file {
630                        let mut extra = HashMap::new();
631                        extra.insert(
632                            "attributes".to_string(),
633                            serde_json::Value::String(value.to_string()),
634                        );
635                        file.extra_data = Some(extra);
636                    }
637                }
638                _ => {}
639            }
640        }
641    }
642
643    if let Some(file) = current_file {
644        file_references.push(file);
645    }
646
647    file_references
648}
649
650fn extract_providers(raw_text: &str) -> Vec<String> {
651    let mut providers = Vec::new();
652
653    for line in raw_text.lines() {
654        if line.is_empty() {
655            continue;
656        }
657
658        if let Some(value) = line.strip_prefix("p:") {
659            providers.extend(value.split_whitespace().map(|s| s.to_string()));
660        }
661    }
662
663    providers
664}
665
666fn build_alpine_purl(
667    name: &str,
668    version: Option<&str>,
669    architecture: Option<&str>,
670) -> Option<String> {
671    use packageurl::PackageUrl;
672
673    let mut purl = PackageUrl::new(PACKAGE_TYPE.as_str(), name).ok()?;
674
675    if let Some(ver) = version {
676        purl.with_version(ver).ok()?;
677    }
678
679    if let Some(arch) = architecture {
680        purl.add_qualifier("arch", arch).ok()?;
681    }
682
683    Some(purl.to_string())
684}
685
686/// Parser for Alpine Linux .apk package archives
687pub struct AlpineApkParser;
688
689impl PackageParser for AlpineApkParser {
690    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
691
692    fn is_match(path: &Path) -> bool {
693        path.extension().and_then(|e| e.to_str()) == Some("apk")
694    }
695
696    fn extract_packages(path: &Path) -> Vec<PackageData> {
697        vec![match extract_apk_archive(path) {
698            Ok(data) => data,
699            Err(e) => {
700                warn!("Failed to extract .apk archive {:?}: {}", path, e);
701                PackageData {
702                    package_type: Some(PACKAGE_TYPE),
703                    datasource_id: Some(DatasourceId::AlpineApkArchive),
704                    ..Default::default()
705                }
706            }
707        }]
708    }
709}
710
711fn extract_apk_archive(path: &Path) -> Result<PackageData, String> {
712    use flate2::read::GzDecoder;
713    use std::io::Read;
714
715    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .apk file: {}", e))?;
716
717    let decoder = GzDecoder::new(file);
718    let mut archive = tar::Archive::new(decoder);
719
720    for entry_result in archive
721        .entries()
722        .map_err(|e| format!("Failed to read tar entries: {}", e))?
723    {
724        let mut entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
725
726        let entry_path = entry
727            .path()
728            .map_err(|e| format!("Failed to get entry path: {}", e))?;
729
730        if entry_path.ends_with(".PKGINFO") {
731            let mut content = String::new();
732            entry
733                .read_to_string(&mut content)
734                .map_err(|e| format!("Failed to read .PKGINFO: {}", e))?;
735
736            return Ok(parse_pkginfo(&content));
737        }
738    }
739
740    Err(".apk archive does not contain .PKGINFO file".to_string())
741}
742
743fn parse_pkginfo(content: &str) -> PackageData {
744    let mut fields: HashMap<&str, Vec<&str>> = HashMap::new();
745
746    for line in content.lines() {
747        let line = line.trim();
748        if line.is_empty() || line.starts_with('#') {
749            continue;
750        }
751
752        if let Some((key, value)) = line.split_once(" = ") {
753            fields.entry(key.trim()).or_default().push(value.trim());
754        }
755    }
756
757    let name = fields
758        .get("pkgname")
759        .and_then(|v| v.first())
760        .map(|s| s.to_string());
761    let pkgver = fields.get("pkgver").and_then(|v| v.first());
762    let version = pkgver.map(|s| s.to_string());
763    let arch = fields
764        .get("arch")
765        .and_then(|v| v.first())
766        .map(|s| s.to_string());
767    let license = fields
768        .get("license")
769        .and_then(|v| v.first())
770        .map(|s| s.to_string());
771    let description = fields
772        .get("pkgdesc")
773        .and_then(|v| v.first())
774        .map(|s| s.to_string());
775    let homepage = fields
776        .get("url")
777        .and_then(|v| v.first())
778        .map(|s| s.to_string());
779    let origin = fields
780        .get("origin")
781        .and_then(|v| v.first())
782        .map(|s| s.to_string());
783    let maintainer_str = fields.get("maintainer").and_then(|v| v.first());
784
785    let mut parties = Vec::new();
786    if let Some(maint) = maintainer_str {
787        let (maint_name, maint_email) = split_name_email(maint);
788        parties.push(Party {
789            r#type: Some("person".to_string()),
790            role: Some("maintainer".to_string()),
791            name: maint_name,
792            email: maint_email,
793            url: None,
794            organization: None,
795            organization_url: None,
796            timezone: None,
797        });
798    }
799
800    let purl = name
801        .as_ref()
802        .and_then(|n| build_alpine_purl(n, version.as_deref(), arch.as_deref()));
803
804    let mut dependencies = Vec::new();
805    if let Some(depends_list) = fields.get("depend") {
806        for dep_str in depends_list {
807            let dep_name = dep_str.split_whitespace().next().unwrap_or(dep_str);
808            dependencies.push(Dependency {
809                purl: Some(format!("pkg:alpine/{}", dep_name)),
810                extracted_requirement: Some(dep_str.to_string()),
811                scope: Some("runtime".to_string()),
812                is_runtime: Some(true),
813                is_optional: Some(false),
814                is_pinned: None,
815                is_direct: Some(true),
816                resolved_package: None,
817                extra_data: None,
818            });
819        }
820    }
821
822    PackageData {
823        datasource_id: Some(DatasourceId::AlpineApkArchive),
824        package_type: Some(PACKAGE_TYPE),
825        namespace: Some("alpine".to_string()),
826        name,
827        version,
828        description,
829        homepage_url: homepage,
830        extracted_license_statement: license,
831        parties,
832        dependencies,
833        purl,
834        extra_data: origin.map(|o| {
835            let mut map = HashMap::new();
836            map.insert("origin".to_string(), serde_json::Value::String(o));
837            map
838        }),
839        ..Default::default()
840    }
841}
842
843#[cfg(test)]
844mod tests {
845    use super::*;
846    use std::io::Write;
847    use std::path::PathBuf;
848    use tempfile::TempDir;
849
850    /// Creates a temp file mimicking the Alpine installed db path structure.
851    /// Returns the TempDir (must be kept alive) and path to the file.
852    fn create_temp_installed_db(content: &str) -> (TempDir, PathBuf) {
853        let temp_dir = TempDir::new().expect("Failed to create temp dir");
854        let db_dir = temp_dir.path().join("lib/apk/db");
855        std::fs::create_dir_all(&db_dir).expect("Failed to create db dir");
856        let file_path = db_dir.join("installed");
857        let mut file = std::fs::File::create(&file_path).expect("Failed to create file");
858        file.write_all(content.as_bytes())
859            .expect("Failed to write content");
860        (temp_dir, file_path)
861    }
862
863    #[test]
864    fn test_alpine_parser_is_match() {
865        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
866            "/lib/apk/db/installed"
867        )));
868        assert!(AlpineInstalledParser::is_match(&PathBuf::from(
869            "/var/lib/apk/db/installed"
870        )));
871        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
872            "/lib/apk/db/status"
873        )));
874        assert!(!AlpineInstalledParser::is_match(&PathBuf::from(
875            "installed"
876        )));
877    }
878
879    #[test]
880    fn test_parse_alpine_package_basic() {
881        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
882P:alpine-baselayout-data
883V:3.2.0-r22
884A:x86_64
885S:11435
886I:73728
887T:Alpine base dir structure and init scripts
888U:https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout
889L:GPL-2.0-only
890o:alpine-baselayout
891m:Natanael Copa <ncopa@alpinelinux.org>
892t:1655134784
893c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
894
895";
896        let (_dir, path) = create_temp_installed_db(content);
897        let pkg = AlpineInstalledParser::extract_first_package(&path);
898        assert_eq!(pkg.name, Some("alpine-baselayout-data".to_string()));
899        assert_eq!(pkg.version, Some("3.2.0-r22".to_string()));
900        assert_eq!(pkg.namespace, Some("alpine".to_string()));
901        assert_eq!(
902            pkg.description,
903            Some("Alpine base dir structure and init scripts".to_string())
904        );
905        assert_eq!(
906            pkg.homepage_url,
907            Some("https://git.alpinelinux.org/cgit/aports/tree/main/alpine-baselayout".to_string())
908        );
909        assert_eq!(
910            pkg.extracted_license_statement,
911            Some("GPL-2.0-only".to_string())
912        );
913        assert_eq!(pkg.parties.len(), 1);
914        assert_eq!(pkg.parties[0].name, Some("Natanael Copa".to_string()));
915        assert_eq!(
916            pkg.parties[0].email,
917            Some("ncopa@alpinelinux.org".to_string())
918        );
919        assert!(
920            pkg.purl
921                .as_ref()
922                .unwrap()
923                .contains("alpine-baselayout-data")
924        );
925        assert!(pkg.purl.as_ref().unwrap().contains("arch=x86_64"));
926    }
927
928    #[test]
929    fn test_parse_alpine_with_dependencies() {
930        let content = "P:musl
931V:1.2.3-r0
932A:x86_64
933D:scanelf so:libc.musl-x86_64.so.1
934
935";
936        let (_dir, path) = create_temp_installed_db(content);
937        let pkg = AlpineInstalledParser::extract_first_package(&path);
938        assert_eq!(pkg.name, Some("musl".to_string()));
939        assert_eq!(pkg.dependencies.len(), 1);
940        assert!(
941            pkg.dependencies[0]
942                .purl
943                .as_ref()
944                .unwrap()
945                .contains("scanelf")
946        );
947    }
948
949    #[test]
950    fn test_build_alpine_purl() {
951        let purl = build_alpine_purl("busybox", Some("1.31.1-r9"), Some("x86_64"));
952        assert_eq!(
953            purl,
954            Some("pkg:alpine/busybox@1.31.1-r9?arch=x86_64".to_string())
955        );
956
957        let purl_no_arch = build_alpine_purl("package", Some("1.0"), None);
958        assert_eq!(purl_no_arch, Some("pkg:alpine/package@1.0".to_string()));
959    }
960
961    #[test]
962    fn test_parse_alpine_extra_data() {
963        let content = "P:test-package
964V:1.0
965C:base64checksum==
966S:12345
967I:67890
968t:1234567890
969c:gitcommithash
970
971";
972        let (_dir, path) = create_temp_installed_db(content);
973        let pkg = AlpineInstalledParser::extract_first_package(&path);
974        assert!(pkg.extra_data.is_some());
975        let extra = pkg.extra_data.as_ref().unwrap();
976        assert_eq!(extra["checksum"], "base64checksum==");
977        assert_eq!(extra["compressed_size"], "12345");
978        assert_eq!(extra["installed_size"], "67890");
979        assert_eq!(extra["build_timestamp"], "1234567890");
980        assert_eq!(extra["git_commit"], "gitcommithash");
981    }
982
983    #[test]
984    fn test_parse_alpine_case_sensitive_keys() {
985        let content = "C:Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=
986P:test-pkg
987V:1.0
988T:A test description
989t:1655134784
990c:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd
991
992";
993        let (_dir, path) = create_temp_installed_db(content);
994        let pkg = AlpineInstalledParser::extract_first_package(&path);
995        assert_eq!(pkg.description, Some("A test description".to_string()));
996        let extra = pkg.extra_data.as_ref().unwrap();
997        assert_eq!(extra["checksum"], "Q1v4QhLje3kWlC8DJj+ZfJTjlJRSU=");
998        assert_eq!(extra["build_timestamp"], "1655134784");
999        assert_eq!(
1000            extra["git_commit"],
1001            "cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1002        );
1003    }
1004
1005    #[test]
1006    fn test_parse_alpine_multiple_packages() {
1007        let content = "P:package1
1008V:1.0
1009A:x86_64
1010
1011P:package2
1012V:2.0
1013A:aarch64
1014
1015";
1016        let (_dir, path) = create_temp_installed_db(content);
1017        let pkgs = AlpineInstalledParser::extract_packages(&path);
1018        assert_eq!(pkgs.len(), 2);
1019        assert_eq!(pkgs[0].name, Some("package1".to_string()));
1020        assert_eq!(pkgs[0].version, Some("1.0".to_string()));
1021        assert_eq!(pkgs[1].name, Some("package2".to_string()));
1022        assert_eq!(pkgs[1].version, Some("2.0".to_string()));
1023    }
1024
1025    #[test]
1026    fn test_parse_alpine_file_references() {
1027        let content = "P:test-pkg
1028V:1.0
1029F:usr/bin
1030R:test
1031Z:Q1WTc55xfvPogzA0YUV24D0Ym+MKE=
1032F:etc
1033R:config
1034Z:Q1pcfTfDNEbNKQc2s1tia7da05M8Q=
1035
1036";
1037        let (_dir, path) = create_temp_installed_db(content);
1038        let pkg = AlpineInstalledParser::extract_first_package(&path);
1039        assert_eq!(pkg.file_references.len(), 2);
1040        assert_eq!(pkg.file_references[0].path, "usr/bin/test");
1041        assert!(pkg.file_references[0].sha1.is_some());
1042        assert_eq!(pkg.file_references[1].path, "etc/config");
1043        assert!(pkg.file_references[1].sha1.is_some());
1044    }
1045
1046    #[test]
1047    fn test_parse_alpine_empty_fields() {
1048        let content = "P:minimal-package
1049V:1.0
1050
1051";
1052        let (_dir, path) = create_temp_installed_db(content);
1053        let pkg = AlpineInstalledParser::extract_first_package(&path);
1054        assert_eq!(pkg.name, Some("minimal-package".to_string()));
1055        assert_eq!(pkg.version, Some("1.0".to_string()));
1056        assert!(pkg.description.is_none());
1057        assert!(pkg.homepage_url.is_none());
1058        assert_eq!(pkg.dependencies.len(), 0);
1059    }
1060
1061    #[test]
1062    fn test_parse_alpine_origin_field() {
1063        let content = "P:busybox-ifupdown
1064V:1.35.0-r13
1065o:busybox
1066A:x86_64
1067
1068";
1069        let (_dir, path) = create_temp_installed_db(content);
1070        let pkg = AlpineInstalledParser::extract_first_package(&path);
1071        assert_eq!(pkg.name, Some("busybox-ifupdown".to_string()));
1072        assert_eq!(pkg.source_packages.len(), 1);
1073        assert_eq!(pkg.source_packages[0], "pkg:alpine/busybox");
1074    }
1075
1076    #[test]
1077    fn test_parse_alpine_url_field() {
1078        let content = "P:openssl
1079V:1.1.1q-r0
1080U:https://www.openssl.org
1081A:x86_64
1082
1083";
1084        let (_dir, path) = create_temp_installed_db(content);
1085        let pkg = AlpineInstalledParser::extract_first_package(&path);
1086        assert_eq!(
1087            pkg.homepage_url,
1088            Some("https://www.openssl.org".to_string())
1089        );
1090    }
1091
1092    #[test]
1093    fn test_parse_alpine_provider_field() {
1094        let content = "P:some-package
1095V:1.0
1096p:cmd:binary=1.0
1097p:so:libtest.so.1
1098
1099";
1100        let (_dir, path) = create_temp_installed_db(content);
1101        let pkg = AlpineInstalledParser::extract_first_package(&path);
1102        assert!(pkg.extra_data.is_some());
1103        let extra = pkg.extra_data.as_ref().unwrap();
1104        let providers = extra.get("providers").and_then(|v| v.as_array());
1105        assert!(providers.is_some());
1106        let provider_array = providers.unwrap();
1107        assert_eq!(provider_array.len(), 2);
1108        assert_eq!(provider_array[0].as_str(), Some("cmd:binary=1.0"));
1109        assert_eq!(provider_array[1].as_str(), Some("so:libtest.so.1"));
1110    }
1111
1112    #[test]
1113    fn test_alpine_apk_parser_is_match() {
1114        assert!(AlpineApkParser::is_match(&PathBuf::from("package.apk")));
1115        assert!(AlpineApkParser::is_match(&PathBuf::from(
1116            "/path/to/app-1.0.apk"
1117        )));
1118        assert!(!AlpineApkParser::is_match(&PathBuf::from("package.tar.gz")));
1119        assert!(!AlpineApkParser::is_match(&PathBuf::from("installed")));
1120    }
1121
1122    #[test]
1123    fn test_alpine_apkbuild_parser_is_match() {
1124        assert!(AlpineApkbuildParser::is_match(&PathBuf::from("APKBUILD")));
1125        assert!(AlpineApkbuildParser::is_match(&PathBuf::from(
1126            "/path/to/APKBUILD"
1127        )));
1128        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from("apkbuild")));
1129        assert!(!AlpineApkbuildParser::is_match(&PathBuf::from(
1130            "APKBUILD.txt"
1131        )));
1132    }
1133
1134    #[test]
1135    fn test_parse_apkbuild_icu_reference() {
1136        let path = PathBuf::from(
1137            "reference/scancode-toolkit/tests/packagedcode/data/alpine/apkbuild/alpine14/main/icu/APKBUILD",
1138        );
1139        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1140
1141        assert_eq!(pkg.datasource_id, Some(DatasourceId::AlpineApkbuild));
1142        assert_eq!(pkg.name.as_deref(), Some("icu"));
1143        assert_eq!(pkg.version.as_deref(), Some("67.1-r2"));
1144        assert_eq!(
1145            pkg.description.as_deref(),
1146            Some("International Components for Unicode library")
1147        );
1148        assert_eq!(
1149            pkg.homepage_url.as_deref(),
1150            Some("http://site.icu-project.org/")
1151        );
1152        assert_eq!(
1153            pkg.extracted_license_statement.as_deref(),
1154            Some("MIT ICU Unicode-TOU")
1155        );
1156        assert_eq!(
1157            pkg.declared_license_expression_spdx.as_deref(),
1158            Some("MIT AND ICU AND Unicode-TOU")
1159        );
1160        let extra = pkg.extra_data.as_ref().unwrap();
1161        assert!(extra.contains_key("sources"));
1162        assert!(extra.contains_key("checksums"));
1163    }
1164
1165    #[test]
1166    fn test_parse_apkbuild_custom_multiple_license_uses_raw_matched_text() {
1167        let path = PathBuf::from(
1168            "reference/scancode-toolkit/tests/packagedcode/data/alpine/apkbuild/alpine13/main/linux-firmware/APKBUILD",
1169        );
1170        let pkg = AlpineApkbuildParser::extract_first_package(&path);
1171
1172        assert_eq!(pkg.name.as_deref(), Some("linux-firmware"));
1173        assert_eq!(pkg.version.as_deref(), Some("20201218-r0"));
1174        assert_eq!(
1175            pkg.extracted_license_statement.as_deref(),
1176            Some("custom:multiple")
1177        );
1178        assert_eq!(
1179            pkg.declared_license_expression.as_deref(),
1180            Some("unknown-license-reference")
1181        );
1182        assert_eq!(
1183            pkg.declared_license_expression_spdx.as_deref(),
1184            Some("LicenseRef-provenant-unknown-license-reference")
1185        );
1186        let matched = pkg.license_detections[0].matches[0].matched_text.as_deref();
1187        assert_eq!(matched, Some("custom:multiple"));
1188    }
1189
1190    #[test]
1191    fn test_parse_alpine_no_files_package_still_detected() {
1192        let path = PathBuf::from(
1193            "reference/scancode-toolkit/tests/packagedcode/data/alpine/full-installed/installed",
1194        );
1195        let content = std::fs::read_to_string(&path).expect("read installed db fixture");
1196        let packages = parse_alpine_installed_db(&content);
1197        let libc_utils = packages
1198            .into_iter()
1199            .find(|pkg| pkg.name.as_deref() == Some("libc-utils"))
1200            .expect("libc-utils package should exist");
1201
1202        assert_eq!(libc_utils.file_references.len(), 0);
1203        assert!(
1204            libc_utils
1205                .purl
1206                .as_deref()
1207                .is_some_and(|p| p.contains("libc-utils"))
1208        );
1209    }
1210
1211    #[test]
1212    fn test_parse_alpine_commit_generates_https_vcs_url() {
1213        let content =
1214            "P:test-package\nV:1.0-r0\nA:x86_64\nc:cb70ca5c6d6db0399d2dd09189c5d57827bce5cd\n";
1215        let (_dir, path) = create_temp_installed_db(content);
1216        let pkg = AlpineInstalledParser::extract_first_package(&path);
1217
1218        assert_eq!(
1219            pkg.vcs_url.as_deref(),
1220            Some(
1221                "git+https://git.alpinelinux.org/aports/commit/?id=cb70ca5c6d6db0399d2dd09189c5d57827bce5cd"
1222            )
1223        );
1224    }
1225
1226    #[test]
1227    fn test_parse_alpine_virtual_package() {
1228        let content = "P:.postgis-rundeps
1229V:20210104.190748
1230A:noarch
1231S:0
1232I:0
1233T:virtual meta package
1234U:
1235L:
1236D:json-c geos gdal proj protobuf-c libstdc++
1237
1238";
1239        let (_dir, path) = create_temp_installed_db(content);
1240        let pkg = AlpineInstalledParser::extract_first_package(&path);
1241        assert_eq!(pkg.name, Some(".postgis-rundeps".to_string()));
1242        assert_eq!(pkg.version, Some("20210104.190748".to_string()));
1243        assert_eq!(pkg.description, Some("virtual meta package".to_string()));
1244        assert!(pkg.extra_data.is_some());
1245        let extra = pkg.extra_data.as_ref().unwrap();
1246        assert_eq!(
1247            extra.get("is_virtual").and_then(|v| v.as_bool()),
1248            Some(true)
1249        );
1250        assert_eq!(pkg.dependencies.len(), 6);
1251        assert!(pkg.homepage_url.is_none());
1252        assert!(pkg.extracted_license_statement.is_none());
1253    }
1254}
1255
1256crate::register_parser!(
1257    "Alpine Linux package (installed db and .apk archive)",
1258    &["**/lib/apk/db/installed", "**/*.apk"],
1259    "alpine",
1260    "",
1261    Some("https://wiki.alpinelinux.org/wiki/Apk_spec"),
1262);
1263
1264crate::register_parser!(
1265    "Alpine Linux APKBUILD recipe",
1266    &["**/APKBUILD"],
1267    "alpine",
1268    "Shell",
1269    Some("https://wiki.alpinelinux.org/wiki/APKBUILD_Reference"),
1270);