Skip to main content

provenant/parsers/
rpm_license_files.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM license files in /usr/share/licenses/ directories.
5//!
6//! Identifies packages from their license files installed in the standard
7//! /usr/share/licenses/ location, primarily used in Mariner distroless containers.
8
9use crate::models::{DatasourceId, PackageType};
10use std::path::Path;
11
12use crate::models::PackageData;
13use crate::parsers::PackageParser;
14use crate::parsers::utils::truncate_field;
15
16const PACKAGE_TYPE: PackageType = PackageType::Rpm;
17
18/// Parser for RPM license files in /usr/share/licenses/ directories.
19///
20/// Identifies packages from their license files installed in the standard
21/// /usr/share/licenses/ location, primarily used in Mariner distroless containers.
22///
23/// # Supported Formats
24/// - `/usr/share/licenses/*/COPYING*` - COPYING license files
25/// - `/usr/share/licenses/*/LICENSE*` - LICENSE files
26///
27/// # Key Features
28/// - Extracts package name from directory path
29/// - Supports Mariner distroless container convention
30/// - Package URL generation with mariner namespace
31///
32/// # Implementation Notes
33/// - Package name is extracted from the directory between `licenses/` and the filename
34/// - For example: `/usr/share/licenses/openssl/LICENSE` → package name is "openssl"
35/// - Does NOT perform license detection (that's handled by the license scanner)
36/// - datasource_id: "rpm_package_licenses"
37/// - namespace: "mariner"
38pub struct RpmLicenseFilesParser;
39
40impl PackageParser for RpmLicenseFilesParser {
41    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
42
43    fn is_match(path: &Path) -> bool {
44        let path_str = path.to_string_lossy();
45
46        // Check if path contains usr/share/licenses/
47        if !path_str.contains("usr/share/licenses/") {
48            return false;
49        }
50
51        // Get the filename
52        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
53            // Match files starting with COPYING or LICENSE (case-sensitive)
54            filename.starts_with("COPYING") || filename.starts_with("LICENSE")
55        } else {
56            false
57        }
58    }
59
60    fn extract_packages(path: &Path) -> Vec<PackageData> {
61        // Extract package name from path
62        let path_str = path.to_string_lossy();
63
64        // Split by usr/share/licenses/ and get the next path component
65        let name = if let Some(after_licenses) = path_str.split("usr/share/licenses/").nth(1) {
66            // Get the first path component after licenses/ (the package name)
67            after_licenses
68                .split('/')
69                .next()
70                .map(|s| truncate_field(s.to_string()))
71        } else {
72            None
73        };
74
75        // Build package data
76        let mut pkg = PackageData {
77            package_type: Some(PACKAGE_TYPE),
78            datasource_id: Some(DatasourceId::RpmPackageLicenses),
79            namespace: Some("mariner".to_string()),
80            name: name.clone(),
81            ..Default::default()
82        };
83
84        // Build PURL if we have a name
85        if let Some(ref package_name) = name {
86            use packageurl::PackageUrl;
87            if let Ok(mut purl) = PackageUrl::new(PACKAGE_TYPE.as_str(), package_name)
88                && purl.with_namespace("mariner").is_ok()
89            {
90                pkg.purl = Some(truncate_field(purl.to_string()));
91            }
92        }
93
94        vec![pkg]
95    }
96}
97
98crate::register_parser!(
99    "RPM mariner distroless package license files",
100    &[
101        "*usr/share/licenses/*/COPYING*",
102        "*usr/share/licenses/*/LICENSE*"
103    ],
104    "rpm",
105    "",
106    Some("https://github.com/microsoft/marinara/"),
107);