provenant/parsers/rpm_license_files.rs
1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for RPM license files in /usr/share/licenses/ directories.
5//!
6//! Identifies packages from their license files installed in the standard
7//! /usr/share/licenses/ location, primarily used in Mariner distroless containers.
8
9use crate::models::{DatasourceId, PackageType};
10use std::path::Path;
11
12use crate::models::PackageData;
13use crate::parsers::PackageParser;
14use crate::parsers::utils::truncate_field;
15
16const PACKAGE_TYPE: PackageType = PackageType::Rpm;
17
18/// Parser for RPM license files in /usr/share/licenses/ directories.
19///
20/// Identifies packages from their license files installed in the standard
21/// /usr/share/licenses/ location, primarily used in Mariner distroless containers.
22///
23/// # Supported Formats
24/// - `/usr/share/licenses/*/COPYING*` - COPYING license files
25/// - `/usr/share/licenses/*/LICENSE*` - LICENSE files
26///
27/// # Key Features
28/// - Extracts package name from directory path
29/// - Supports Mariner distroless container convention
30/// - Package URL generation with mariner namespace
31///
32/// # Implementation Notes
33/// - Package name is extracted from the directory between `licenses/` and the filename
34/// - For example: `/usr/share/licenses/openssl/LICENSE` → package name is "openssl"
35/// - Does NOT perform license detection (that's handled by the license scanner)
36/// - datasource_id: "rpm_package_licenses"
37/// - namespace: "mariner"
38pub struct RpmLicenseFilesParser;
39
40impl PackageParser for RpmLicenseFilesParser {
41 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
42
43 fn is_match(path: &Path) -> bool {
44 let path_str = path.to_string_lossy();
45
46 // Check if path contains usr/share/licenses/
47 if !path_str.contains("usr/share/licenses/") {
48 return false;
49 }
50
51 // Get the filename
52 if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
53 // Match files starting with COPYING or LICENSE (case-sensitive)
54 filename.starts_with("COPYING") || filename.starts_with("LICENSE")
55 } else {
56 false
57 }
58 }
59
60 fn extract_packages(path: &Path) -> Vec<PackageData> {
61 // Extract package name from path
62 let path_str = path.to_string_lossy();
63
64 // Split by usr/share/licenses/ and get the next path component
65 let name = if let Some(after_licenses) = path_str.split("usr/share/licenses/").nth(1) {
66 // Get the first path component after licenses/ (the package name)
67 after_licenses
68 .split('/')
69 .next()
70 .map(|s| truncate_field(s.to_string()))
71 } else {
72 None
73 };
74
75 // Build package data
76 let mut pkg = PackageData {
77 package_type: Some(PACKAGE_TYPE),
78 datasource_id: Some(DatasourceId::RpmPackageLicenses),
79 namespace: Some("mariner".to_string()),
80 name: name.clone(),
81 ..Default::default()
82 };
83
84 // Build PURL if we have a name
85 if let Some(ref package_name) = name {
86 use packageurl::PackageUrl;
87 if let Ok(mut purl) = PackageUrl::new(PACKAGE_TYPE.as_str(), package_name)
88 && purl.with_namespace("mariner").is_ok()
89 {
90 pkg.purl = Some(truncate_field(purl.to_string()));
91 }
92 }
93
94 vec![pkg]
95 }
96
97 fn metadata() -> Vec<super::metadata::ParserMetadata> {
98 vec![super::metadata::ParserMetadata {
99 description: "RPM mariner distroless package license files",
100 file_patterns: &[
101 "*usr/share/licenses/*/COPYING*",
102 "*usr/share/licenses/*/LICENSE*",
103 ],
104 package_type: "rpm",
105 primary_language: "",
106 documentation_url: Some("https://github.com/microsoft/marinara/"),
107 }]
108 }
109}