Skip to main content

provenant/parsers/
mod.rs

1mod about;
2#[cfg(test)]
3mod about_scan_test;
4#[cfg(test)]
5mod about_test;
6mod alpine;
7#[cfg(test)]
8mod alpine_scan_test;
9mod arch;
10#[cfg(test)]
11mod arch_test;
12mod autotools;
13#[cfg(test)]
14mod autotools_test;
15mod bazel;
16#[cfg(test)]
17mod bazel_module_test;
18#[cfg(test)]
19mod bazel_test;
20mod bower;
21#[cfg(test)]
22mod bower_test;
23mod buck;
24#[cfg(test)]
25mod buck_test;
26mod bun_lock;
27#[cfg(test)]
28mod bun_lock_test;
29mod bun_lockb;
30#[cfg(test)]
31mod bun_lockb_test;
32mod cargo;
33mod cargo_lock;
34#[cfg(test)]
35mod cargo_lock_test;
36#[cfg(test)]
37mod cargo_test;
38mod chef;
39#[cfg(test)]
40mod chef_test;
41mod clojure;
42#[cfg(test)]
43mod clojure_test;
44mod composer;
45#[cfg(test)]
46mod composer_test;
47mod conan;
48mod conan_data;
49#[cfg(test)]
50mod conan_data_test;
51#[cfg(test)]
52mod conan_test;
53mod conda;
54mod conda_meta_json;
55#[cfg(test)]
56mod conda_meta_json_test;
57#[cfg(test)]
58mod conda_test;
59mod cpan;
60mod cpan_dist_ini;
61#[cfg(test)]
62mod cpan_dist_ini_test;
63mod cpan_makefile_pl;
64#[cfg(test)]
65mod cpan_makefile_pl_test;
66#[cfg(test)]
67mod cpan_scan_test;
68#[cfg(test)]
69mod cpan_test;
70mod cran;
71#[cfg(test)]
72mod cran_test;
73mod dart;
74#[cfg(test)]
75mod dart_test;
76mod debian;
77#[cfg(test)]
78mod debian_scan_test;
79#[cfg(test)]
80mod debian_test;
81mod deno;
82mod deno_lock;
83#[cfg(test)]
84mod deno_lock_test;
85#[cfg(test)]
86mod deno_test;
87mod docker;
88#[cfg(test)]
89mod docker_scan_test;
90#[cfg(test)]
91mod docker_test;
92mod freebsd;
93#[cfg(test)]
94mod freebsd_test;
95mod gitmodules;
96mod go;
97mod go_mod_graph;
98#[cfg(test)]
99mod go_test;
100#[cfg(test)]
101mod go_work_test;
102mod gradle;
103mod gradle_lock;
104#[cfg(test)]
105mod gradle_lock_test;
106mod gradle_module;
107#[cfg(test)]
108mod gradle_module_scan_test;
109#[cfg(test)]
110mod gradle_module_test;
111mod hackage;
112#[cfg(test)]
113mod hackage_test;
114mod haxe;
115#[cfg(test)]
116mod haxe_test;
117mod helm;
118#[cfg(test)]
119mod helm_test;
120mod hex_lock;
121#[cfg(test)]
122mod hex_lock_test;
123mod license_normalization;
124mod maven;
125#[cfg(test)]
126mod maven_test;
127mod meson;
128#[cfg(test)]
129mod meson_test;
130pub mod metadata;
131mod microsoft_update_manifest;
132#[cfg(test)]
133mod microsoft_update_manifest_test;
134mod misc;
135#[cfg(test)]
136mod misc_test;
137mod nix;
138#[cfg(test)]
139mod nix_test;
140mod npm;
141mod npm_lock;
142#[cfg(test)]
143mod npm_lock_test;
144#[cfg(test)]
145mod npm_test;
146mod npm_workspace;
147#[cfg(test)]
148mod npm_workspace_test;
149mod nuget;
150#[cfg(test)]
151mod nuget_test;
152mod opam;
153mod os_release;
154#[cfg(test)]
155mod os_release_test;
156#[cfg(test)]
157mod osgi_test;
158mod pep508;
159mod pip_inspect_deplock;
160#[cfg(test)]
161mod pip_inspect_deplock_test;
162mod pipfile_lock;
163#[cfg(test)]
164mod pipfile_lock_test;
165mod pixi;
166#[cfg(test)]
167mod pixi_test;
168mod pnpm_lock;
169#[cfg(test)]
170mod pnpm_lock_test;
171mod podfile;
172mod podfile_lock;
173#[cfg(test)]
174mod podfile_lock_test;
175mod podspec;
176mod podspec_json;
177#[cfg(test)]
178mod podspec_json_test;
179mod poetry_lock;
180#[cfg(test)]
181mod poetry_lock_test;
182mod pylock_toml;
183#[cfg(test)]
184mod pylock_toml_test;
185mod python;
186#[cfg(test)]
187mod python_scan_test;
188#[cfg(test)]
189mod python_test;
190mod readme;
191#[cfg(test)]
192mod readme_test;
193mod requirements_txt;
194#[cfg(test)]
195mod requirements_txt_test;
196pub(crate) mod rfc822;
197mod rpm_db;
198#[cfg(test)]
199mod rpm_db_scan_test;
200mod rpm_license_files;
201#[cfg(test)]
202mod rpm_license_files_test;
203mod rpm_mariner_manifest;
204#[cfg(test)]
205mod rpm_mariner_manifest_test;
206mod rpm_parser;
207mod rpm_specfile;
208#[cfg(test)]
209mod rpm_specfile_test;
210mod rpm_yumdb;
211mod ruby;
212#[cfg(test)]
213mod ruby_test;
214mod sbt;
215#[cfg(test)]
216mod sbt_test;
217#[cfg(test)]
218mod scan_pipeline_test_utils;
219mod swift_manifest_json;
220#[cfg(test)]
221mod swift_manifest_json_test;
222mod swift_resolved;
223#[cfg(test)]
224mod swift_resolved_test;
225#[cfg(test)]
226mod swift_scan_test;
227mod swift_show_dependencies;
228#[cfg(test)]
229mod swift_show_dependencies_test;
230pub mod utils;
231mod uv_lock;
232#[cfg(test)]
233mod uv_lock_test;
234mod vcpkg;
235#[cfg(test)]
236mod vcpkg_test;
237mod yarn_lock;
238#[cfg(test)]
239mod yarn_lock_test;
240
241#[cfg(all(test, feature = "golden-tests"))]
242mod golden_test;
243
244use std::path::Path;
245
246use crate::models::{PackageData, PackageType};
247
248/// Package parser trait for extracting metadata from package manifest files.
249///
250/// Each parser implementation handles a specific package manager/ecosystem
251/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
252/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
253///
254/// # Implementation Guide
255///
256/// Implementors must provide:
257/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
258/// - `is_match()`: Returns true if the given file path matches this parser's expected format
259/// - `extract_packages()`: Parses the file and returns all extracted package metadata
260///
261/// # Error Handling
262///
263/// Parsers should handle errors gracefully by returning default/empty `PackageData`
264/// and logging warnings rather than panicking. This allows the scan to continue
265/// processing other files even when individual files fail to parse.
266///
267/// # Example
268///
269/// ```ignore
270/// use provenant::models::{PackageData, PackageType};
271/// use provenant::parsers::PackageParser;
272/// use std::path::Path;
273///
274/// pub struct MyParser;
275///
276/// impl PackageParser for MyParser {
277///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
278///
279///     fn is_match(path: &Path) -> bool {
280///         path.file_name().is_some_and(|name| name == "package.json")
281///     }
282///
283///     fn extract_packages(path: &Path) -> Vec<PackageData> {
284///         // Parse file and return metadata
285///         // On error, log warning and return default
286///         vec![PackageData::default()]
287///     }
288/// }
289/// ```
290pub trait PackageParser {
291    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
292    const PACKAGE_TYPE: PackageType;
293
294    /// Extracts all packages from the given file path.
295    ///
296    /// Returns a vector of `PackageData` structures containing all extracted metadata
297    /// including name, version, dependencies, licenses, etc. Most parsers return a
298    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
299    /// can contain multiple packages in a single file.
300    ///
301    /// On parse errors, returns a vector with a default `PackageData` with minimal or
302    /// no fields populated.
303    fn extract_packages(path: &Path) -> Vec<PackageData>;
304
305    /// Checks if the given file path matches this parser's expected format.
306    ///
307    /// Returns true if the file should be handled by this parser based on filename,
308    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
309    fn is_match(path: &Path) -> bool;
310
311    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
312    /// or a default [`PackageData`] if the file contains no packages.
313    fn extract_first_package(path: &Path) -> PackageData {
314        Self::extract_packages(path)
315            .into_iter()
316            .next()
317            .unwrap_or_default()
318    }
319}
320
321pub use self::about::AboutFileParser;
322pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
323pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
324pub use self::autotools::AutotoolsConfigureParser;
325pub use self::bazel::{BazelBuildParser, BazelModuleParser};
326pub use self::bower::BowerJsonParser;
327pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
328pub use self::bun_lock::BunLockParser;
329pub use self::bun_lockb::BunLockbParser;
330pub use self::cargo::CargoParser;
331#[cfg_attr(not(test), allow(unused_imports))]
332pub use self::cargo_lock::CargoLockParser;
333pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
334pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
335pub use self::composer::{ComposerJsonParser, ComposerLockParser};
336pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
337pub use self::conan_data::ConanDataParser;
338pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
339pub use self::conda_meta_json::CondaMetaJsonParser;
340pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
341pub use self::cpan_dist_ini::CpanDistIniParser;
342pub use self::cpan_makefile_pl::CpanMakefilePlParser;
343pub use self::cran::CranParser;
344pub use self::dart::{PubspecLockParser, PubspecYamlParser};
345pub use self::debian::{
346    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
347    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
348    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
349    DebianMd5sumInPackageParser, DebianOrigTarParser,
350};
351pub use self::deno::DenoParser;
352pub use self::deno_lock::DenoLockParser;
353pub use self::docker::DockerfileParser;
354pub use self::freebsd::FreebsdCompactManifestParser;
355pub use self::gitmodules::GitmodulesParser;
356pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
357pub use self::go_mod_graph::GoModGraphParser;
358pub use self::gradle::GradleParser;
359pub use self::gradle_lock::GradleLockfileParser;
360pub use self::gradle_module::GradleModuleParser;
361pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
362pub use self::haxe::HaxeParser;
363pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
364pub use self::hex_lock::HexLockParser;
365pub use self::maven::MavenParser;
366pub use self::meson::MesonParser;
367pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
368pub use self::misc::{
369    AndroidApkRecognizer, AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer,
370    Axis2ModuleXmlRecognizer, CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer,
371    IosIpaRecognizer, IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer,
372    JBossServiceXmlRecognizer, JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer,
373    JavaWarRecognizer, JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer,
374    NsisRecognizer, SharArchiveRecognizer, SquashfsRecognizer,
375};
376pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
377pub use self::npm::NpmParser;
378pub use self::npm_lock::NpmLockParser;
379pub use self::npm_workspace::NpmWorkspaceParser;
380pub use self::nuget::{
381    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
382    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
383    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
384};
385pub use self::opam::OpamParser;
386pub use self::os_release::OsReleaseParser;
387pub use self::pip_inspect_deplock::PipInspectDeplockParser;
388pub use self::pipfile_lock::PipfileLockParser;
389pub use self::pixi::{PixiLockParser, PixiTomlParser};
390pub use self::pnpm_lock::PnpmLockParser;
391pub use self::podfile::PodfileParser;
392pub use self::podfile_lock::PodfileLockParser;
393pub use self::podspec::PodspecParser;
394pub use self::podspec_json::PodspecJsonParser;
395pub use self::poetry_lock::PoetryLockParser;
396pub use self::pylock_toml::PylockTomlParser;
397pub use self::python::PythonParser;
398pub use self::readme::ReadmeParser;
399pub use self::requirements_txt::RequirementsTxtParser;
400pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser, RpmSqliteDatabaseParser};
401pub use self::rpm_license_files::RpmLicenseFilesParser;
402pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
403pub use self::rpm_parser::RpmParser;
404pub use self::rpm_specfile::RpmSpecfileParser;
405pub use self::rpm_yumdb::RpmYumdbParser;
406pub use self::ruby::{
407    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
408};
409pub use self::sbt::SbtParser;
410pub use self::swift_manifest_json::SwiftManifestJsonParser;
411pub use self::swift_resolved::SwiftPackageResolvedParser;
412pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
413pub use self::uv_lock::UvLockParser;
414pub use self::vcpkg::VcpkgManifestParser;
415pub use self::yarn_lock::YarnLockParser;
416
417/// Registers all parsers and recognizers, generating dispatch functions.
418///
419/// Parsers are tried first, then recognizers. This ordering is important because
420/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
421/// more specific parsers if checked first.
422macro_rules! register_package_handlers {
423    (
424        parsers: [$($parser:ty),* $(,)?],
425        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
426    ) => {
427        pub fn try_parse_file(path: &Path) -> Option<Vec<PackageData>> {
428            $(
429                if <$parser>::is_match(path) {
430                    return Some(<$parser>::extract_packages(path));
431                }
432            )*
433            $(
434                if <$recognizer>::is_match(path) {
435                    return Some(<$recognizer>::extract_packages(path));
436                }
437            )*
438            None
439        }
440
441        // Used by the parser-golden maintenance tool in `xtask`.
442        // Scanner runtime dispatch goes through `try_parse_file()` instead.
443        #[allow(dead_code)]
444        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
445            match type_name {
446                $(
447                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
448                )*
449                $(
450                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
451                )*
452                _ => None
453            }
454        }
455
456        // Used by the parser-golden maintenance tool in `xtask` and by
457        // `tests/scanner_integration.rs` to verify parser registration.
458        #[allow(dead_code)]
459        pub fn list_parser_types() -> Vec<&'static str> {
460            vec![
461                $(
462                    stringify!($parser),
463                )*
464                $(
465                    stringify!($recognizer),
466                )*
467            ]
468        }
469    };
470}
471
472register_package_handlers! {
473    parsers: [
474        AboutFileParser,
475        AlpineApkParser,
476        AlpineApkbuildParser,
477        AlpineInstalledParser,
478        ArchPkginfoParser,
479        ArchSrcinfoParser,
480        AutotoolsConfigureParser,
481        BazelBuildParser,
482        BazelModuleParser,
483        BowerJsonParser,
484        BunLockParser,
485        BunLockbParser,
486        BuckBuildParser,
487        BuckMetadataBzlParser,
488        CargoLockParser,
489        CargoParser,
490        ChefMetadataJsonParser,
491        ChefMetadataRbParser,
492        ClojureDepsEdnParser,
493        ClojureProjectCljParser,
494        ComposerJsonParser,
495        ComposerLockParser,
496        ConanDataParser,
497        ConanFilePyParser,
498        ConanfileTxtParser,
499        ConanLockParser,
500        CondaEnvironmentYmlParser,
501        CondaMetaJsonParser,
502        CondaMetaYamlParser,
503        CpanDistIniParser,
504        CpanMakefilePlParser,
505        CpanManifestParser,
506        CpanMetaJsonParser,
507        CpanMetaYmlParser,
508        CranParser,
509        DebianControlInExtractedDebParser,
510        DebianControlParser,
511        DebianCopyrightParser,
512        DebianDebianTarParser,
513        DebianDebParser,
514        DebianDistrolessInstalledParser,
515        DebianDscParser,
516        DebianInstalledListParser,
517        DebianInstalledMd5sumsParser,
518        DebianInstalledParser,
519        DebianMd5sumInPackageParser,
520        DebianOrigTarParser,
521        DenoParser,
522        DenoLockParser,
523        DockerfileParser,
524        FreebsdCompactManifestParser,
525        GemArchiveParser,
526        GemfileLockParser,
527        GemfileParser,
528        GemMetadataExtractedParser,
529        GemspecParser,
530        GitmodulesParser,
531        GodepsParser,
532        GoModParser,
533        GoModGraphParser,
534        GoSumParser,
535        GoWorkParser,
536        GradleLockfileParser,
537        GradleParser,
538        GradleModuleParser,
539        HackageCabalParser,
540        HackageCabalProjectParser,
541        HackageStackYamlParser,
542        HelmChartYamlParser,
543        HelmChartLockParser,
544        HaxeParser,
545        HexLockParser,
546        MavenParser,
547        MesonParser,
548        MicrosoftUpdateManifestParser,
549        NixDefaultParser,
550        NixFlakeLockParser,
551        NixFlakeParser,
552        NpmLockParser,
553        NpmParser,
554        NpmWorkspaceParser,
555        DotNetDepsJsonParser,
556        CentralPackageManagementPropsParser,
557        DirectoryBuildPropsParser,
558        NupkgParser,
559        NuspecParser,
560        PackageReferenceProjectParser,
561        OpamParser,
562        OsReleaseParser,
563        PackagesConfigParser,
564        PackagesLockParser,
565        ProjectJsonParser,
566        ProjectLockJsonParser,
567        PipfileLockParser,
568        PipInspectDeplockParser,
569        PixiTomlParser,
570        PixiLockParser,
571        PnpmLockParser,
572        PodfileLockParser,
573        PodfileParser,
574        PodspecJsonParser,
575        PodspecParser,
576        PoetryLockParser,
577        PylockTomlParser,
578        PubspecLockParser,
579        PubspecYamlParser,
580        PythonParser,
581        UvLockParser,
582        VcpkgManifestParser,
583        ReadmeParser,
584        RequirementsTxtParser,
585        RpmBdbDatabaseParser,
586        RpmLicenseFilesParser,
587        RpmMarinerManifestParser,
588        RpmNdbDatabaseParser,
589        RpmParser,
590        RpmSpecfileParser,
591        RpmSqliteDatabaseParser,
592        RpmYumdbParser,
593        SbtParser,
594        SwiftManifestJsonParser,
595        SwiftPackageResolvedParser,
596        SwiftShowDependenciesParser,
597        YarnLockParser,
598    ],
599    recognizers: [
600        AndroidApkRecognizer,
601        AndroidLibraryRecognizer,
602        AppleDmgRecognizer,
603        Axis2MarRecognizer,
604        Axis2ModuleXmlRecognizer,
605        CabArchiveRecognizer,
606        ChromeCrxRecognizer,
607        InstallShieldRecognizer,
608        IosIpaRecognizer,
609        IsoImageRecognizer,
610        IvyXmlRecognizer,
611        JavaEarAppXmlRecognizer,
612        JavaEarRecognizer,
613        JavaJarRecognizer,
614        JavaWarRecognizer,
615        JavaWarWebXmlRecognizer,
616        JBossSarRecognizer,
617        JBossServiceXmlRecognizer,
618        MeteorPackageRecognizer,
619        MozillaXpiRecognizer,
620        NsisRecognizer,
621        SharArchiveRecognizer,
622        SquashfsRecognizer,
623    ],
624}