Skip to main content

provenant/parsers/
rpm_license_files.rs

1//! Parser for RPM license files in /usr/share/licenses/ directories.
2//!
3//! Identifies packages from their license files installed in the standard
4//! /usr/share/licenses/ location, primarily used in Mariner distroless containers.
5
6use crate::models::{DatasourceId, PackageType};
7use std::path::Path;
8
9use crate::models::PackageData;
10use crate::parsers::PackageParser;
11
12const PACKAGE_TYPE: PackageType = PackageType::Rpm;
13
14/// Parser for RPM license files in /usr/share/licenses/ directories.
15///
16/// Identifies packages from their license files installed in the standard
17/// /usr/share/licenses/ location, primarily used in Mariner distroless containers.
18///
19/// # Supported Formats
20/// - `/usr/share/licenses/*/COPYING*` - COPYING license files
21/// - `/usr/share/licenses/*/LICENSE*` - LICENSE files
22///
23/// # Key Features
24/// - Extracts package name from directory path
25/// - Supports Mariner distroless container convention
26/// - Package URL generation with mariner namespace
27///
28/// # Implementation Notes
29/// - Package name is extracted from the directory between `licenses/` and the filename
30/// - For example: `/usr/share/licenses/openssl/LICENSE` → package name is "openssl"
31/// - Does NOT perform license detection (that's handled by the license scanner)
32/// - datasource_id: "rpm_package_licenses"
33/// - namespace: "mariner"
34pub struct RpmLicenseFilesParser;
35
36impl PackageParser for RpmLicenseFilesParser {
37    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
38
39    fn is_match(path: &Path) -> bool {
40        let path_str = path.to_string_lossy();
41
42        // Check if path contains usr/share/licenses/
43        if !path_str.contains("usr/share/licenses/") {
44            return false;
45        }
46
47        // Get the filename
48        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
49            // Match files starting with COPYING or LICENSE (case-sensitive)
50            filename.starts_with("COPYING") || filename.starts_with("LICENSE")
51        } else {
52            false
53        }
54    }
55
56    fn extract_packages(path: &Path) -> Vec<PackageData> {
57        // Extract package name from path
58        let path_str = path.to_string_lossy();
59
60        // Split by usr/share/licenses/ and get the next path component
61        let name = if let Some(after_licenses) = path_str.split("usr/share/licenses/").nth(1) {
62            // Get the first path component after licenses/ (the package name)
63            after_licenses.split('/').next().map(|s| s.to_string())
64        } else {
65            None
66        };
67
68        // Build package data
69        let mut pkg = PackageData {
70            package_type: Some(PACKAGE_TYPE),
71            datasource_id: Some(DatasourceId::RpmPackageLicenses),
72            namespace: Some("mariner".to_string()),
73            name: name.clone(),
74            ..Default::default()
75        };
76
77        // Build PURL if we have a name
78        if let Some(ref package_name) = name {
79            use packageurl::PackageUrl;
80            if let Ok(mut purl) = PackageUrl::new(PACKAGE_TYPE.as_str(), package_name)
81                && purl.with_namespace("mariner").is_ok()
82            {
83                pkg.purl = Some(purl.to_string());
84            }
85        }
86
87        vec![pkg]
88    }
89}
90
91crate::register_parser!(
92    "RPM mariner distroless package license files",
93    &[
94        "*usr/share/licenses/*/COPYING*",
95        "*usr/share/licenses/*/LICENSE*"
96    ],
97    "rpm",
98    "",
99    Some("https://github.com/microsoft/marinara/"),
100);