Skip to main content

provenant/parsers/
misc.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! File type recognizers for various package archives and binary formats.
5//!
6//! This module contains simple file-type recognizers that identify packages by
7//! their file extensions or path patterns. These recognizers do NOT parse file
8//! contents - they only tag files with the appropriate package_type and datasource_id.
9//!
10//! # Implementation Notes
11//!
12//! - All recognizers use the `file_recognizer!` macro to reduce boilerplate
13//! - Recognizers return minimal PackageData with only package_type and datasource_id set
14//! - These correspond to Python's misc.py NonAssemblableDatafileHandler classes
15//! - No actual parsing is performed (Python also has `# TODO: parse me!!!`)
16//! - Some recognizers use magic byte detection for disambiguation (Squashfs, NSIS, InstallShield)
17
18use std::path::Path;
19
20use super::PackageParser;
21use crate::models::{DatasourceId, PackageData, PackageType};
22use crate::utils::magic;
23
24/// Helper macro to define file-type recognizers with minimal boilerplate.
25///
26/// Each recognizer matches specific file patterns and returns a minimal
27/// PackageData structure with only package_type and datasource_id populated.
28///
29/// # Arguments
30///
31/// * `$name` - Struct name for the recognizer
32/// * `$pkg_type` - Package type string (e.g., "jar", "war", "meteor")
33/// * `$datasource` - Datasource ID string (e.g., "java_jar", "meteor_package")
34/// * `$match_fn` - Closure that takes a &Path and returns bool for matching
35macro_rules! file_recognizer {
36    ($name:ident, $pkg_type:expr, $datasource:expr, $match_fn:expr) => {
37        pub struct $name;
38
39        impl PackageParser for $name {
40            const PACKAGE_TYPE: PackageType = $pkg_type;
41
42            fn is_match(path: &Path) -> bool {
43                ($match_fn)(path)
44            }
45
46            fn extract_packages(path: &Path) -> Vec<PackageData> {
47                let _ = path;
48                vec![PackageData {
49                    package_type: Some($pkg_type),
50                    datasource_id: Some($datasource),
51                    ..Default::default()
52                }]
53            }
54        }
55    };
56}
57
58// Java Archives
59
60file_recognizer!(
61    JavaJarRecognizer,
62    PackageType::Jar,
63    DatasourceId::JavaJar,
64    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("jar")
65);
66
67file_recognizer!(
68    IvyXmlRecognizer,
69    PackageType::Ivy,
70    DatasourceId::AntIvyXml,
71    |path: &Path| path.to_str().is_some_and(|p| p.ends_with("/ivy.xml"))
72);
73
74file_recognizer!(
75    JavaWarRecognizer,
76    PackageType::War,
77    DatasourceId::JavaWarArchive,
78    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("war")
79);
80
81file_recognizer!(
82    JavaWarWebXmlRecognizer,
83    PackageType::War,
84    DatasourceId::JavaWarWebXml,
85    |path: &Path| path
86        .to_str()
87        .is_some_and(|p| p.ends_with("/WEB-INF/web.xml") || p.ends_with("WEB-INF/web.xml"))
88);
89
90file_recognizer!(
91    JavaEarRecognizer,
92    PackageType::Ear,
93    DatasourceId::JavaEarArchive,
94    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("ear")
95);
96
97file_recognizer!(
98    JavaEarAppXmlRecognizer,
99    PackageType::Ear,
100    DatasourceId::JavaEarApplicationXml,
101    |path: &Path| path.to_str().is_some_and(
102        |p| p.ends_with("/META-INF/application.xml") || p.ends_with("META-INF/application.xml")
103    )
104);
105
106// Apache Axis2
107
108file_recognizer!(
109    Axis2ModuleXmlRecognizer,
110    PackageType::Axis2,
111    DatasourceId::Axis2ModuleXml,
112    |path: &Path| {
113        path.to_str().is_some_and(|p| {
114            let lower = p.to_lowercase();
115            lower.ends_with("/meta-inf/module.xml") || lower.ends_with("meta-inf/module.xml")
116        })
117    }
118);
119
120file_recognizer!(
121    Axis2MarRecognizer,
122    PackageType::Axis2,
123    DatasourceId::Axis2Mar,
124    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("mar")
125);
126
127// JBoss
128
129file_recognizer!(
130    JBossSarRecognizer,
131    PackageType::JbossService,
132    DatasourceId::JbossSar,
133    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("sar")
134);
135
136file_recognizer!(
137    JBossServiceXmlRecognizer,
138    PackageType::JbossService,
139    DatasourceId::JbossServiceXml,
140    |path: &Path| {
141        path.to_str().is_some_and(|p| {
142            let lower = p.to_lowercase();
143            lower.ends_with("/meta-inf/jboss-service.xml")
144                || lower.ends_with("meta-inf/jboss-service.xml")
145        })
146    }
147);
148
149// Meteor
150
151file_recognizer!(
152    MeteorPackageRecognizer,
153    PackageType::Meteor,
154    DatasourceId::MeteorPackage,
155    |path: &Path| path.to_str().is_some_and(|p| p.ends_with("/package.js"))
156);
157
158// Mobile Apps
159
160file_recognizer!(
161    AndroidLibraryRecognizer,
162    PackageType::AndroidLib,
163    DatasourceId::AndroidAarLibrary,
164    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("aar")
165);
166
167file_recognizer!(
168    MozillaXpiRecognizer,
169    PackageType::Mozilla,
170    DatasourceId::MozillaXpi,
171    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("xpi")
172);
173
174file_recognizer!(
175    ChromeCrxRecognizer,
176    PackageType::Chrome,
177    DatasourceId::ChromeCrx,
178    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("crx")
179);
180
181file_recognizer!(
182    IosIpaRecognizer,
183    PackageType::Ios,
184    DatasourceId::IosIpa,
185    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("ipa")
186);
187
188// Archives
189
190file_recognizer!(
191    CabArchiveRecognizer,
192    PackageType::Cab,
193    DatasourceId::MicrosoftCabinet,
194    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("cab")
195);
196
197file_recognizer!(
198    SharArchiveRecognizer,
199    PackageType::Shar,
200    DatasourceId::SharShellArchive,
201    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("shar")
202);
203
204// Disk Images
205
206file_recognizer!(
207    AppleDmgRecognizer,
208    PackageType::Dmg,
209    DatasourceId::AppleDmg,
210    |path: &Path| {
211        path.extension()
212            .and_then(|e| e.to_str())
213            .is_some_and(|ext| ext == "dmg" || ext == "sparseimage")
214    }
215);
216
217file_recognizer!(
218    IsoImageRecognizer,
219    PackageType::Iso,
220    DatasourceId::IsoDiskImage,
221    |path: &Path| {
222        path.extension()
223            .and_then(|e| e.to_str())
224            .is_some_and(|ext| ext == "iso" || ext == "udf" || ext == "img")
225    }
226);
227
228// Installers and Binary Formats (require magic byte detection)
229
230file_recognizer!(
231    SquashfsRecognizer,
232    PackageType::Squashfs,
233    DatasourceId::SquashfsDiskImage,
234    |path: &Path| magic::is_squashfs(path)
235);
236
237file_recognizer!(
238    NsisRecognizer,
239    PackageType::Nsis,
240    DatasourceId::NsisInstaller,
241    |path: &Path| {
242        path.extension()
243            .and_then(|e| e.to_str())
244            .is_some_and(|ext| ext == "exe")
245            && magic::is_nsis_installer(path)
246    }
247);
248
249file_recognizer!(
250    InstallShieldRecognizer,
251    PackageType::Installshield,
252    DatasourceId::InstallshieldInstaller,
253    |path: &Path| {
254        path.extension()
255            .and_then(|e| e.to_str())
256            .is_some_and(|ext| ext == "exe")
257            && magic::is_zip(path)
258    }
259);
260
261pub(crate) static RECOGNIZER_METADATA: &[super::metadata::ParserMetadata] = &[
262    super::metadata::ParserMetadata {
263        description: "Misc file type recognizers (JAR, WAR, EAR, AAR, iOS, Chrome, Mozilla, installers, disk images, etc.)",
264        file_patterns: &[
265            "**/*.jar",
266            "**/ivy.xml",
267            "**/*.war",
268            "**/WEB-INF/web.xml",
269            "**/*.ear",
270            "**/META-INF/application.xml",
271            "**/meta-inf/module.xml",
272            "**/*.mar",
273            "**/*.sar",
274            "**/meta-inf/jboss-service.xml",
275            "**/package.js",
276            "**/*.aar",
277            "**/*.xpi",
278            "**/*.crx",
279            "**/*.ipa",
280            "**/*.cab",
281            "**/*.shar",
282            "**/*.dmg",
283            "**/*.sparseimage",
284            "**/*.iso",
285            "**/*.udf",
286            "**/*.img",
287            "**/*.exe",
288        ],
289        package_type: "",
290        primary_language: "",
291        documentation_url: None,
292    },
293];