Skip to main content

provenant/parsers/
mod.rs

1mod about;
2#[cfg(test)]
3mod about_scan_test;
4#[cfg(test)]
5mod about_test;
6mod alpine;
7#[cfg(test)]
8mod alpine_scan_test;
9mod arch;
10#[cfg(test)]
11mod arch_scan_test;
12#[cfg(test)]
13mod arch_test;
14mod autotools;
15#[cfg(test)]
16mod autotools_test;
17mod bazel;
18#[cfg(test)]
19mod bazel_module_test;
20#[cfg(test)]
21mod bazel_test;
22mod bower;
23#[cfg(test)]
24mod bower_scan_test;
25#[cfg(test)]
26mod bower_test;
27mod buck;
28#[cfg(test)]
29mod buck_test;
30mod bun_lock;
31#[cfg(test)]
32mod bun_lock_test;
33mod bun_lockb;
34#[cfg(test)]
35mod bun_lockb_test;
36mod cargo;
37mod cargo_lock;
38#[cfg(test)]
39mod cargo_lock_test;
40#[cfg(test)]
41mod cargo_scan_test;
42#[cfg(test)]
43mod cargo_test;
44mod chef;
45#[cfg(test)]
46mod chef_scan_test;
47#[cfg(test)]
48mod chef_test;
49mod clojure;
50#[cfg(test)]
51mod clojure_test;
52mod composer;
53#[cfg(test)]
54mod composer_scan_test;
55#[cfg(test)]
56mod composer_test;
57mod conan;
58mod conan_data;
59#[cfg(test)]
60mod conan_data_test;
61#[cfg(test)]
62mod conan_scan_test;
63#[cfg(test)]
64mod conan_test;
65mod conda;
66mod conda_meta_json;
67#[cfg(test)]
68mod conda_meta_json_test;
69#[cfg(test)]
70mod conda_scan_test;
71#[cfg(test)]
72mod conda_test;
73mod cpan;
74mod cpan_dist_ini;
75#[cfg(test)]
76mod cpan_dist_ini_test;
77mod cpan_makefile_pl;
78#[cfg(test)]
79mod cpan_makefile_pl_test;
80#[cfg(test)]
81mod cpan_scan_test;
82#[cfg(test)]
83mod cpan_test;
84mod cran;
85#[cfg(test)]
86mod cran_scan_test;
87#[cfg(test)]
88mod cran_test;
89mod dart;
90#[cfg(test)]
91mod dart_scan_test;
92#[cfg(test)]
93mod dart_test;
94mod debian;
95#[cfg(test)]
96mod debian_scan_test;
97#[cfg(test)]
98mod debian_test;
99mod deno;
100mod deno_lock;
101#[cfg(test)]
102mod deno_lock_test;
103#[cfg(test)]
104mod deno_scan_test;
105#[cfg(test)]
106mod deno_test;
107mod docker;
108#[cfg(test)]
109mod docker_scan_test;
110#[cfg(test)]
111mod docker_test;
112mod freebsd;
113#[cfg(test)]
114mod freebsd_scan_test;
115#[cfg(test)]
116mod freebsd_test;
117mod gitmodules;
118#[cfg(test)]
119mod gitmodules_scan_test;
120mod go;
121mod go_mod_graph;
122#[cfg(test)]
123mod go_scan_test;
124#[cfg(test)]
125mod go_test;
126#[cfg(test)]
127mod go_work_test;
128mod gradle;
129mod gradle_lock;
130#[cfg(test)]
131mod gradle_lock_test;
132mod gradle_module;
133#[cfg(test)]
134mod gradle_module_scan_test;
135#[cfg(test)]
136mod gradle_module_test;
137#[cfg(test)]
138mod gradle_scan_test;
139mod hackage;
140#[cfg(test)]
141mod hackage_scan_test;
142#[cfg(test)]
143mod hackage_test;
144mod haxe;
145#[cfg(test)]
146mod haxe_scan_test;
147#[cfg(test)]
148mod haxe_test;
149mod helm;
150#[cfg(test)]
151mod helm_scan_test;
152#[cfg(test)]
153mod helm_test;
154mod hex_lock;
155#[cfg(test)]
156mod hex_lock_test;
157mod license_normalization;
158mod maven;
159#[cfg(test)]
160mod maven_scan_test;
161#[cfg(test)]
162mod maven_test;
163mod meson;
164#[cfg(test)]
165mod meson_test;
166pub mod metadata;
167mod microsoft_update_manifest;
168#[cfg(test)]
169mod microsoft_update_manifest_test;
170mod misc;
171#[cfg(test)]
172mod misc_test;
173mod nix;
174#[cfg(test)]
175mod nix_scan_test;
176#[cfg(test)]
177mod nix_test;
178mod npm;
179mod npm_lock;
180#[cfg(test)]
181mod npm_lock_test;
182#[cfg(test)]
183mod npm_scan_test;
184#[cfg(test)]
185mod npm_test;
186mod npm_workspace;
187#[cfg(test)]
188mod npm_workspace_test;
189mod nuget;
190#[cfg(test)]
191mod nuget_scan_test;
192#[cfg(test)]
193mod nuget_test;
194mod opam;
195#[cfg(test)]
196mod opam_scan_test;
197mod os_release;
198#[cfg(test)]
199mod os_release_test;
200#[cfg(test)]
201mod osgi_test;
202mod pep508;
203mod pip_inspect_deplock;
204#[cfg(test)]
205mod pip_inspect_deplock_test;
206mod pipfile_lock;
207#[cfg(test)]
208mod pipfile_lock_test;
209mod pixi;
210#[cfg(test)]
211mod pixi_scan_test;
212#[cfg(test)]
213mod pixi_test;
214mod pnpm_lock;
215#[cfg(test)]
216mod pnpm_lock_test;
217mod podfile;
218mod podfile_lock;
219#[cfg(test)]
220mod podfile_lock_test;
221mod podspec;
222mod podspec_json;
223#[cfg(test)]
224mod podspec_json_test;
225mod poetry_lock;
226#[cfg(test)]
227mod poetry_lock_test;
228mod pylock_toml;
229#[cfg(test)]
230mod pylock_toml_test;
231mod python;
232#[cfg(test)]
233mod python_scan_test;
234#[cfg(test)]
235mod python_test;
236mod readme;
237#[cfg(test)]
238mod readme_test;
239mod requirements_txt;
240#[cfg(test)]
241mod requirements_txt_test;
242pub(crate) mod rfc822;
243mod rpm_db;
244#[cfg(test)]
245mod rpm_db_scan_test;
246mod rpm_license_files;
247#[cfg(test)]
248mod rpm_license_files_test;
249mod rpm_mariner_manifest;
250#[cfg(test)]
251mod rpm_mariner_manifest_test;
252mod rpm_parser;
253#[cfg(test)]
254mod rpm_scan_test;
255mod rpm_specfile;
256#[cfg(test)]
257mod rpm_specfile_test;
258mod rpm_yumdb;
259mod ruby;
260#[cfg(test)]
261mod ruby_scan_test;
262#[cfg(test)]
263mod ruby_test;
264mod sbt;
265#[cfg(test)]
266mod sbt_test;
267#[cfg(test)]
268mod scan_pipeline_test_utils;
269mod swift_manifest_json;
270#[cfg(test)]
271mod swift_manifest_json_test;
272mod swift_resolved;
273#[cfg(test)]
274mod swift_resolved_test;
275#[cfg(test)]
276mod swift_scan_test;
277mod swift_show_dependencies;
278#[cfg(test)]
279mod swift_show_dependencies_test;
280pub mod utils;
281mod uv_lock;
282#[cfg(test)]
283mod uv_lock_test;
284mod vcpkg;
285#[cfg(test)]
286mod vcpkg_scan_test;
287#[cfg(test)]
288mod vcpkg_test;
289mod yarn_lock;
290#[cfg(test)]
291mod yarn_lock_test;
292
293#[cfg(all(test, feature = "golden-tests"))]
294mod golden_test;
295
296use std::path::Path;
297
298use crate::models::{PackageData, PackageType};
299
300/// Package parser trait for extracting metadata from package manifest files.
301///
302/// Each parser implementation handles a specific package manager/ecosystem
303/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
304/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
305///
306/// # Implementation Guide
307///
308/// Implementors must provide:
309/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
310/// - `is_match()`: Returns true if the given file path matches this parser's expected format
311/// - `extract_packages()`: Parses the file and returns all extracted package metadata
312///
313/// # Error Handling
314///
315/// Parsers should handle errors gracefully by returning default/empty `PackageData`
316/// and logging warnings rather than panicking. This allows the scan to continue
317/// processing other files even when individual files fail to parse.
318///
319/// # Example
320///
321/// ```ignore
322/// use provenant::models::{PackageData, PackageType};
323/// use provenant::parsers::PackageParser;
324/// use std::path::Path;
325///
326/// pub struct MyParser;
327///
328/// impl PackageParser for MyParser {
329///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
330///
331///     fn is_match(path: &Path) -> bool {
332///         path.file_name().is_some_and(|name| name == "package.json")
333///     }
334///
335///     fn extract_packages(path: &Path) -> Vec<PackageData> {
336///         // Parse file and return metadata
337///         // On error, log warning and return default
338///         vec![PackageData::default()]
339///     }
340/// }
341/// ```
342pub trait PackageParser {
343    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
344    const PACKAGE_TYPE: PackageType;
345
346    /// Extracts all packages from the given file path.
347    ///
348    /// Returns a vector of `PackageData` structures containing all extracted metadata
349    /// including name, version, dependencies, licenses, etc. Most parsers return a
350    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
351    /// can contain multiple packages in a single file.
352    ///
353    /// On parse errors, returns a vector with a default `PackageData` with minimal or
354    /// no fields populated.
355    fn extract_packages(path: &Path) -> Vec<PackageData>;
356
357    /// Checks if the given file path matches this parser's expected format.
358    ///
359    /// Returns true if the file should be handled by this parser based on filename,
360    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
361    fn is_match(path: &Path) -> bool;
362
363    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
364    /// or a default [`PackageData`] if the file contains no packages.
365    fn extract_first_package(path: &Path) -> PackageData {
366        Self::extract_packages(path)
367            .into_iter()
368            .next()
369            .unwrap_or_default()
370    }
371}
372
373pub use self::about::AboutFileParser;
374pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
375pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
376pub use self::autotools::AutotoolsConfigureParser;
377pub use self::bazel::{BazelBuildParser, BazelModuleParser};
378pub use self::bower::BowerJsonParser;
379pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
380pub use self::bun_lock::BunLockParser;
381pub use self::bun_lockb::BunLockbParser;
382pub use self::cargo::CargoParser;
383#[cfg_attr(not(test), allow(unused_imports))]
384pub use self::cargo_lock::CargoLockParser;
385pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
386pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
387pub use self::composer::{ComposerJsonParser, ComposerLockParser};
388pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
389pub use self::conan_data::ConanDataParser;
390pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
391pub use self::conda_meta_json::CondaMetaJsonParser;
392pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
393pub use self::cpan_dist_ini::CpanDistIniParser;
394pub use self::cpan_makefile_pl::CpanMakefilePlParser;
395pub use self::cran::CranParser;
396pub use self::dart::{PubspecLockParser, PubspecYamlParser};
397pub use self::debian::{
398    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
399    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
400    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
401    DebianMd5sumInPackageParser, DebianOrigTarParser,
402};
403pub use self::deno::DenoParser;
404pub use self::deno_lock::DenoLockParser;
405pub use self::docker::DockerfileParser;
406pub use self::freebsd::FreebsdCompactManifestParser;
407pub use self::gitmodules::GitmodulesParser;
408pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
409pub use self::go_mod_graph::GoModGraphParser;
410pub use self::gradle::GradleParser;
411pub use self::gradle_lock::GradleLockfileParser;
412pub use self::gradle_module::GradleModuleParser;
413pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
414pub use self::haxe::HaxeParser;
415pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
416pub use self::hex_lock::HexLockParser;
417pub use self::maven::MavenParser;
418pub use self::meson::MesonParser;
419pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
420pub use self::misc::{
421    AndroidApkRecognizer, AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer,
422    Axis2ModuleXmlRecognizer, CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer,
423    IosIpaRecognizer, IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer,
424    JBossServiceXmlRecognizer, JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer,
425    JavaWarRecognizer, JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer,
426    NsisRecognizer, SharArchiveRecognizer, SquashfsRecognizer,
427};
428pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
429pub use self::npm::NpmParser;
430pub use self::npm_lock::NpmLockParser;
431pub use self::npm_workspace::NpmWorkspaceParser;
432pub use self::nuget::{
433    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
434    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
435    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
436};
437pub use self::opam::OpamParser;
438pub use self::os_release::OsReleaseParser;
439pub use self::pip_inspect_deplock::PipInspectDeplockParser;
440pub use self::pipfile_lock::PipfileLockParser;
441pub use self::pixi::{PixiLockParser, PixiTomlParser};
442pub use self::pnpm_lock::PnpmLockParser;
443pub use self::podfile::PodfileParser;
444pub use self::podfile_lock::PodfileLockParser;
445pub use self::podspec::PodspecParser;
446pub use self::podspec_json::PodspecJsonParser;
447pub use self::poetry_lock::PoetryLockParser;
448pub use self::pylock_toml::PylockTomlParser;
449pub use self::python::PythonParser;
450pub use self::readme::ReadmeParser;
451pub use self::requirements_txt::RequirementsTxtParser;
452pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser, RpmSqliteDatabaseParser};
453pub use self::rpm_license_files::RpmLicenseFilesParser;
454pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
455pub use self::rpm_parser::RpmParser;
456pub use self::rpm_specfile::RpmSpecfileParser;
457pub use self::rpm_yumdb::RpmYumdbParser;
458pub use self::ruby::{
459    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
460};
461pub use self::sbt::SbtParser;
462pub use self::swift_manifest_json::SwiftManifestJsonParser;
463pub use self::swift_resolved::SwiftPackageResolvedParser;
464pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
465pub use self::uv_lock::UvLockParser;
466pub use self::vcpkg::VcpkgManifestParser;
467pub use self::yarn_lock::YarnLockParser;
468
469/// Registers all parsers and recognizers, generating dispatch functions.
470///
471/// Parsers are tried first, then recognizers. This ordering is important because
472/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
473/// more specific parsers if checked first.
474macro_rules! register_package_handlers {
475    (
476        parsers: [$($parser:ty),* $(,)?],
477        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
478    ) => {
479        pub fn try_parse_file(path: &Path) -> Option<Vec<PackageData>> {
480            $(
481                if <$parser>::is_match(path) {
482                    return Some(<$parser>::extract_packages(path));
483                }
484            )*
485            $(
486                if <$recognizer>::is_match(path) {
487                    return Some(<$recognizer>::extract_packages(path));
488                }
489            )*
490            None
491        }
492
493        // Used by the parser-golden maintenance tool in `xtask`.
494        // Scanner runtime dispatch goes through `try_parse_file()` instead.
495        #[allow(dead_code)]
496        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
497            match type_name {
498                $(
499                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
500                )*
501                $(
502                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
503                )*
504                _ => None
505            }
506        }
507
508        // Used by the parser-golden maintenance tool in `xtask` and by
509        // `tests/scanner_integration.rs` to verify parser registration.
510        #[allow(dead_code)]
511        pub fn list_parser_types() -> Vec<&'static str> {
512            vec![
513                $(
514                    stringify!($parser),
515                )*
516                $(
517                    stringify!($recognizer),
518                )*
519            ]
520        }
521    };
522}
523
524register_package_handlers! {
525    parsers: [
526        AboutFileParser,
527        AlpineApkParser,
528        AlpineApkbuildParser,
529        AlpineInstalledParser,
530        ArchPkginfoParser,
531        ArchSrcinfoParser,
532        AutotoolsConfigureParser,
533        BazelBuildParser,
534        BazelModuleParser,
535        BowerJsonParser,
536        BunLockParser,
537        BunLockbParser,
538        BuckBuildParser,
539        BuckMetadataBzlParser,
540        CargoLockParser,
541        CargoParser,
542        ChefMetadataJsonParser,
543        ChefMetadataRbParser,
544        ClojureDepsEdnParser,
545        ClojureProjectCljParser,
546        ComposerJsonParser,
547        ComposerLockParser,
548        ConanDataParser,
549        ConanFilePyParser,
550        ConanfileTxtParser,
551        ConanLockParser,
552        CondaEnvironmentYmlParser,
553        CondaMetaJsonParser,
554        CondaMetaYamlParser,
555        CpanDistIniParser,
556        CpanMakefilePlParser,
557        CpanManifestParser,
558        CpanMetaJsonParser,
559        CpanMetaYmlParser,
560        CranParser,
561        DebianControlInExtractedDebParser,
562        DebianControlParser,
563        DebianCopyrightParser,
564        DebianDebianTarParser,
565        DebianDebParser,
566        DebianDistrolessInstalledParser,
567        DebianDscParser,
568        DebianInstalledListParser,
569        DebianInstalledMd5sumsParser,
570        DebianInstalledParser,
571        DebianMd5sumInPackageParser,
572        DebianOrigTarParser,
573        DenoParser,
574        DenoLockParser,
575        DockerfileParser,
576        FreebsdCompactManifestParser,
577        GemArchiveParser,
578        GemfileLockParser,
579        GemfileParser,
580        GemMetadataExtractedParser,
581        GemspecParser,
582        GitmodulesParser,
583        GodepsParser,
584        GoModParser,
585        GoModGraphParser,
586        GoSumParser,
587        GoWorkParser,
588        GradleLockfileParser,
589        GradleParser,
590        GradleModuleParser,
591        HackageCabalParser,
592        HackageCabalProjectParser,
593        HackageStackYamlParser,
594        HelmChartYamlParser,
595        HelmChartLockParser,
596        HaxeParser,
597        HexLockParser,
598        MavenParser,
599        MesonParser,
600        MicrosoftUpdateManifestParser,
601        NixDefaultParser,
602        NixFlakeLockParser,
603        NixFlakeParser,
604        NpmLockParser,
605        NpmParser,
606        NpmWorkspaceParser,
607        DotNetDepsJsonParser,
608        CentralPackageManagementPropsParser,
609        DirectoryBuildPropsParser,
610        NupkgParser,
611        NuspecParser,
612        PackageReferenceProjectParser,
613        OpamParser,
614        OsReleaseParser,
615        PackagesConfigParser,
616        PackagesLockParser,
617        ProjectJsonParser,
618        ProjectLockJsonParser,
619        PipfileLockParser,
620        PipInspectDeplockParser,
621        PixiTomlParser,
622        PixiLockParser,
623        PnpmLockParser,
624        PodfileLockParser,
625        PodfileParser,
626        PodspecJsonParser,
627        PodspecParser,
628        PoetryLockParser,
629        PylockTomlParser,
630        PubspecLockParser,
631        PubspecYamlParser,
632        PythonParser,
633        UvLockParser,
634        VcpkgManifestParser,
635        ReadmeParser,
636        RequirementsTxtParser,
637        RpmBdbDatabaseParser,
638        RpmLicenseFilesParser,
639        RpmMarinerManifestParser,
640        RpmNdbDatabaseParser,
641        RpmParser,
642        RpmSpecfileParser,
643        RpmSqliteDatabaseParser,
644        RpmYumdbParser,
645        SbtParser,
646        SwiftManifestJsonParser,
647        SwiftPackageResolvedParser,
648        SwiftShowDependenciesParser,
649        YarnLockParser,
650    ],
651    recognizers: [
652        AndroidApkRecognizer,
653        AndroidLibraryRecognizer,
654        AppleDmgRecognizer,
655        Axis2MarRecognizer,
656        Axis2ModuleXmlRecognizer,
657        CabArchiveRecognizer,
658        ChromeCrxRecognizer,
659        InstallShieldRecognizer,
660        IosIpaRecognizer,
661        IsoImageRecognizer,
662        IvyXmlRecognizer,
663        JavaEarAppXmlRecognizer,
664        JavaEarRecognizer,
665        JavaJarRecognizer,
666        JavaWarRecognizer,
667        JavaWarWebXmlRecognizer,
668        JBossSarRecognizer,
669        JBossServiceXmlRecognizer,
670        MeteorPackageRecognizer,
671        MozillaXpiRecognizer,
672        NsisRecognizer,
673        SharArchiveRecognizer,
674        SquashfsRecognizer,
675    ],
676}