provenant/parsers/rpm_license_files.rs
1//! Parser for RPM license files in /usr/share/licenses/ directories.
2//!
3//! Identifies packages from their license files installed in the standard
4//! /usr/share/licenses/ location, primarily used in Mariner distroless containers.
5
6use crate::models::{DatasourceId, PackageType};
7use std::path::Path;
8
9use crate::models::PackageData;
10use crate::parsers::PackageParser;
11use crate::parsers::utils::truncate_field;
12
13const PACKAGE_TYPE: PackageType = PackageType::Rpm;
14
15/// Parser for RPM license files in /usr/share/licenses/ directories.
16///
17/// Identifies packages from their license files installed in the standard
18/// /usr/share/licenses/ location, primarily used in Mariner distroless containers.
19///
20/// # Supported Formats
21/// - `/usr/share/licenses/*/COPYING*` - COPYING license files
22/// - `/usr/share/licenses/*/LICENSE*` - LICENSE files
23///
24/// # Key Features
25/// - Extracts package name from directory path
26/// - Supports Mariner distroless container convention
27/// - Package URL generation with mariner namespace
28///
29/// # Implementation Notes
30/// - Package name is extracted from the directory between `licenses/` and the filename
31/// - For example: `/usr/share/licenses/openssl/LICENSE` → package name is "openssl"
32/// - Does NOT perform license detection (that's handled by the license scanner)
33/// - datasource_id: "rpm_package_licenses"
34/// - namespace: "mariner"
35pub struct RpmLicenseFilesParser;
36
37impl PackageParser for RpmLicenseFilesParser {
38 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
39
40 fn is_match(path: &Path) -> bool {
41 let path_str = path.to_string_lossy();
42
43 // Check if path contains usr/share/licenses/
44 if !path_str.contains("usr/share/licenses/") {
45 return false;
46 }
47
48 // Get the filename
49 if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
50 // Match files starting with COPYING or LICENSE (case-sensitive)
51 filename.starts_with("COPYING") || filename.starts_with("LICENSE")
52 } else {
53 false
54 }
55 }
56
57 fn extract_packages(path: &Path) -> Vec<PackageData> {
58 // Extract package name from path
59 let path_str = path.to_string_lossy();
60
61 // Split by usr/share/licenses/ and get the next path component
62 let name = if let Some(after_licenses) = path_str.split("usr/share/licenses/").nth(1) {
63 // Get the first path component after licenses/ (the package name)
64 after_licenses
65 .split('/')
66 .next()
67 .map(|s| truncate_field(s.to_string()))
68 } else {
69 None
70 };
71
72 // Build package data
73 let mut pkg = PackageData {
74 package_type: Some(PACKAGE_TYPE),
75 datasource_id: Some(DatasourceId::RpmPackageLicenses),
76 namespace: Some("mariner".to_string()),
77 name: name.clone(),
78 ..Default::default()
79 };
80
81 // Build PURL if we have a name
82 if let Some(ref package_name) = name {
83 use packageurl::PackageUrl;
84 if let Ok(mut purl) = PackageUrl::new(PACKAGE_TYPE.as_str(), package_name)
85 && purl.with_namespace("mariner").is_ok()
86 {
87 pkg.purl = Some(truncate_field(purl.to_string()));
88 }
89 }
90
91 vec![pkg]
92 }
93}
94
95crate::register_parser!(
96 "RPM mariner distroless package license files",
97 &[
98 "*usr/share/licenses/*/COPYING*",
99 "*usr/share/licenses/*/LICENSE*"
100 ],
101 "rpm",
102 "",
103 Some("https://github.com/microsoft/marinara/"),
104);