Skip to main content

provenant/parsers/
misc.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! File type recognizers for various package archives and binary formats.
5//!
6//! This module contains simple file-type recognizers that identify packages by
7//! their file extensions or path patterns. These recognizers do NOT parse file
8//! contents - they only tag files with the appropriate package_type and datasource_id.
9//!
10//! # Implementation Notes
11//!
12//! - All recognizers use the `file_recognizer!` macro to reduce boilerplate
13//! - Recognizers return minimal PackageData with only package_type and datasource_id set
14//! - These correspond to Python's misc.py NonAssemblableDatafileHandler classes
15//! - No actual parsing is performed (Python also has `# TODO: parse me!!!`)
16//! - Some recognizers use magic byte detection for disambiguation (Squashfs, NSIS, InstallShield)
17
18use std::path::Path;
19
20use super::PackageParser;
21use crate::models::{DatasourceId, PackageData, PackageType};
22use crate::utils::magic;
23
24/// Helper macro to define file-type recognizers with minimal boilerplate.
25///
26/// Each recognizer matches specific file patterns and returns a minimal
27/// PackageData structure with only package_type and datasource_id populated.
28///
29/// # Arguments
30///
31/// * `$name` - Struct name for the recognizer
32/// * `$pkg_type` - Package type string (e.g., "jar", "war", "meteor")
33/// * `$datasource` - Datasource ID string (e.g., "java_jar", "meteor_package")
34/// * `$match_fn` - Closure that takes a &Path and returns bool for matching
35macro_rules! file_recognizer {
36    ($name:ident, $pkg_type:expr, $datasource:expr, $match_fn:expr) => {
37        pub struct $name;
38
39        impl PackageParser for $name {
40            const PACKAGE_TYPE: PackageType = $pkg_type;
41
42            fn is_match(path: &Path) -> bool {
43                ($match_fn)(path)
44            }
45
46            fn extract_packages(path: &Path) -> Vec<PackageData> {
47                let _ = path;
48                vec![PackageData {
49                    package_type: Some($pkg_type),
50                    datasource_id: Some($datasource),
51                    ..Default::default()
52                }]
53            }
54        }
55    };
56}
57
58// Java Archives
59
60file_recognizer!(
61    JavaJarRecognizer,
62    PackageType::Jar,
63    DatasourceId::JavaJar,
64    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("jar")
65);
66
67file_recognizer!(
68    IvyXmlRecognizer,
69    PackageType::Ivy,
70    DatasourceId::AntIvyXml,
71    |path: &Path| path.to_str().is_some_and(|p| p.ends_with("/ivy.xml"))
72);
73
74file_recognizer!(
75    JavaWarRecognizer,
76    PackageType::War,
77    DatasourceId::JavaWarArchive,
78    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("war")
79);
80
81file_recognizer!(
82    JavaWarWebXmlRecognizer,
83    PackageType::War,
84    DatasourceId::JavaWarWebXml,
85    |path: &Path| path
86        .to_str()
87        .is_some_and(|p| p.ends_with("/WEB-INF/web.xml") || p.ends_with("WEB-INF/web.xml"))
88);
89
90file_recognizer!(
91    JavaEarRecognizer,
92    PackageType::Ear,
93    DatasourceId::JavaEarArchive,
94    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("ear")
95);
96
97file_recognizer!(
98    JavaEarAppXmlRecognizer,
99    PackageType::Ear,
100    DatasourceId::JavaEarApplicationXml,
101    |path: &Path| path.to_str().is_some_and(
102        |p| p.ends_with("/META-INF/application.xml") || p.ends_with("META-INF/application.xml")
103    )
104);
105
106// Apache Axis2
107
108file_recognizer!(
109    Axis2ModuleXmlRecognizer,
110    PackageType::Axis2,
111    DatasourceId::Axis2ModuleXml,
112    |path: &Path| {
113        path.to_str().is_some_and(|p| {
114            let lower = p.to_lowercase();
115            lower.ends_with("/meta-inf/module.xml") || lower.ends_with("meta-inf/module.xml")
116        })
117    }
118);
119
120file_recognizer!(
121    Axis2MarRecognizer,
122    PackageType::Axis2,
123    DatasourceId::Axis2Mar,
124    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("mar")
125);
126
127// JBoss
128
129file_recognizer!(
130    JBossSarRecognizer,
131    PackageType::JbossService,
132    DatasourceId::JbossSar,
133    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("sar")
134);
135
136file_recognizer!(
137    JBossServiceXmlRecognizer,
138    PackageType::JbossService,
139    DatasourceId::JbossServiceXml,
140    |path: &Path| {
141        path.to_str().is_some_and(|p| {
142            let lower = p.to_lowercase();
143            lower.ends_with("/meta-inf/jboss-service.xml")
144                || lower.ends_with("meta-inf/jboss-service.xml")
145        })
146    }
147);
148
149// Meteor
150
151file_recognizer!(
152    MeteorPackageRecognizer,
153    PackageType::Meteor,
154    DatasourceId::MeteorPackage,
155    |path: &Path| path.to_str().is_some_and(|p| p.ends_with("/package.js"))
156);
157
158// Mobile Apps
159
160file_recognizer!(
161    AndroidLibraryRecognizer,
162    PackageType::AndroidLib,
163    DatasourceId::AndroidAarLibrary,
164    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("aar")
165);
166
167file_recognizer!(
168    MozillaXpiRecognizer,
169    PackageType::Mozilla,
170    DatasourceId::MozillaXpi,
171    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("xpi")
172);
173
174file_recognizer!(
175    ChromeCrxRecognizer,
176    PackageType::Chrome,
177    DatasourceId::ChromeCrx,
178    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("crx")
179);
180
181file_recognizer!(
182    IosIpaRecognizer,
183    PackageType::Ios,
184    DatasourceId::IosIpa,
185    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("ipa")
186);
187
188// Archives
189
190file_recognizer!(
191    CabArchiveRecognizer,
192    PackageType::Cab,
193    DatasourceId::MicrosoftCabinet,
194    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("cab")
195);
196
197file_recognizer!(
198    SharArchiveRecognizer,
199    PackageType::Shar,
200    DatasourceId::SharShellArchive,
201    |path: &Path| path.extension().and_then(|e| e.to_str()) == Some("shar")
202);
203
204// Disk Images
205
206file_recognizer!(
207    AppleDmgRecognizer,
208    PackageType::Dmg,
209    DatasourceId::AppleDmg,
210    |path: &Path| {
211        path.extension()
212            .and_then(|e| e.to_str())
213            .is_some_and(|ext| ext == "dmg" || ext == "sparseimage")
214    }
215);
216
217file_recognizer!(
218    IsoImageRecognizer,
219    PackageType::Iso,
220    DatasourceId::IsoDiskImage,
221    |path: &Path| {
222        path.extension()
223            .and_then(|e| e.to_str())
224            .is_some_and(|ext| ext == "iso" || ext == "udf" || ext == "img")
225    }
226);
227
228// Installers and Binary Formats (require magic byte detection)
229
230file_recognizer!(
231    SquashfsRecognizer,
232    PackageType::Squashfs,
233    DatasourceId::SquashfsDiskImage,
234    |path: &Path| magic::is_squashfs(path)
235);
236
237file_recognizer!(
238    NsisRecognizer,
239    PackageType::Nsis,
240    DatasourceId::NsisInstaller,
241    |path: &Path| {
242        path.extension()
243            .and_then(|e| e.to_str())
244            .is_some_and(|ext| ext == "exe")
245            && magic::is_nsis_installer(path)
246    }
247);
248
249file_recognizer!(
250    InstallShieldRecognizer,
251    PackageType::Installshield,
252    DatasourceId::InstallshieldInstaller,
253    |path: &Path| {
254        path.extension()
255            .and_then(|e| e.to_str())
256            .is_some_and(|ext| ext == "exe")
257            && magic::is_zip(path)
258    }
259);
260
261crate::register_parser!(
262    "Misc file type recognizers (JAR, WAR, EAR, AAR, iOS, Chrome, Mozilla, installers, disk images, etc.)",
263    &[
264        "**/*.jar",
265        "**/ivy.xml",
266        "**/*.war",
267        "**/WEB-INF/web.xml",
268        "**/*.ear",
269        "**/META-INF/application.xml",
270        "**/meta-inf/module.xml",
271        "**/*.mar",
272        "**/*.sar",
273        "**/meta-inf/jboss-service.xml",
274        "**/package.js",
275        "**/*.aar",
276        "**/*.xpi",
277        "**/*.crx",
278        "**/*.ipa",
279        "**/*.cab",
280        "**/*.shar",
281        "**/*.dmg",
282        "**/*.sparseimage",
283        "**/*.iso",
284        "**/*.udf",
285        "**/*.img",
286        "**/*.exe",
287    ],
288    "",
289    "",
290    None,
291);