provenant/parsers/rpm_license_files.rs
1//! Parser for RPM license files in /usr/share/licenses/ directories.
2//!
3//! Identifies packages from their license files installed in the standard
4//! /usr/share/licenses/ location, primarily used in Mariner distroless containers.
5
6use crate::models::{DatasourceId, PackageType};
7use std::path::Path;
8
9use crate::models::PackageData;
10use crate::parsers::PackageParser;
11
12const PACKAGE_TYPE: PackageType = PackageType::Rpm;
13
14/// Parser for RPM license files in /usr/share/licenses/ directories.
15///
16/// Identifies packages from their license files installed in the standard
17/// /usr/share/licenses/ location, primarily used in Mariner distroless containers.
18///
19/// # Supported Formats
20/// - `/usr/share/licenses/*/COPYING*` - COPYING license files
21/// - `/usr/share/licenses/*/LICENSE*` - LICENSE files
22///
23/// # Key Features
24/// - Extracts package name from directory path
25/// - Supports Mariner distroless container convention
26/// - Package URL generation with mariner namespace
27///
28/// # Implementation Notes
29/// - Package name is extracted from the directory between `licenses/` and the filename
30/// - For example: `/usr/share/licenses/openssl/LICENSE` → package name is "openssl"
31/// - Does NOT perform license detection (that's handled by the license scanner)
32/// - datasource_id: "rpm_package_licenses"
33/// - namespace: "mariner"
34pub struct RpmLicenseFilesParser;
35
36impl PackageParser for RpmLicenseFilesParser {
37 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
38
39 fn is_match(path: &Path) -> bool {
40 let path_str = path.to_string_lossy();
41
42 // Check if path contains usr/share/licenses/
43 if !path_str.contains("usr/share/licenses/") {
44 return false;
45 }
46
47 // Get the filename
48 if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
49 // Match files starting with COPYING or LICENSE (case-sensitive)
50 filename.starts_with("COPYING") || filename.starts_with("LICENSE")
51 } else {
52 false
53 }
54 }
55
56 fn extract_packages(path: &Path) -> Vec<PackageData> {
57 // Extract package name from path
58 let path_str = path.to_string_lossy();
59
60 // Split by usr/share/licenses/ and get the next path component
61 let name = if let Some(after_licenses) = path_str.split("usr/share/licenses/").nth(1) {
62 // Get the first path component after licenses/ (the package name)
63 after_licenses.split('/').next().map(|s| s.to_string())
64 } else {
65 None
66 };
67
68 // Build package data
69 let mut pkg = PackageData {
70 package_type: Some(PACKAGE_TYPE),
71 datasource_id: Some(DatasourceId::RpmPackageLicenses),
72 namespace: Some("mariner".to_string()),
73 name: name.clone(),
74 ..Default::default()
75 };
76
77 // Build PURL if we have a name
78 if let Some(ref package_name) = name {
79 use packageurl::PackageUrl;
80 if let Ok(mut purl) = PackageUrl::new(PACKAGE_TYPE.as_str(), package_name)
81 && purl.with_namespace("mariner").is_ok()
82 {
83 pkg.purl = Some(purl.to_string());
84 }
85 }
86
87 vec![pkg]
88 }
89}
90
91crate::register_parser!(
92 "RPM mariner distroless package license files",
93 &[
94 "*usr/share/licenses/*/COPYING*",
95 "*usr/share/licenses/*/LICENSE*"
96 ],
97 "rpm",
98 "",
99 Some("https://github.com/microsoft/marinara/"),
100);