Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod freebsd;
131#[cfg(test)]
132mod freebsd_scan_test;
133#[cfg(test)]
134mod freebsd_test;
135mod gitmodules;
136#[cfg(test)]
137mod gitmodules_scan_test;
138mod go;
139mod go_mod_graph;
140#[cfg(test)]
141mod go_scan_test;
142#[cfg(test)]
143mod go_test;
144#[cfg(test)]
145mod go_work_test;
146#[cfg(all(test, feature = "golden-tests"))]
147pub(crate) mod golden_test_utils;
148mod gradle;
149mod gradle_lock;
150#[cfg(test)]
151mod gradle_lock_test;
152mod gradle_module;
153#[cfg(test)]
154mod gradle_module_scan_test;
155#[cfg(test)]
156mod gradle_module_test;
157#[cfg(test)]
158mod gradle_scan_test;
159mod hackage;
160#[cfg(test)]
161mod hackage_scan_test;
162#[cfg(test)]
163mod hackage_test;
164mod haxe;
165#[cfg(test)]
166mod haxe_scan_test;
167#[cfg(test)]
168mod haxe_test;
169mod helm;
170#[cfg(test)]
171mod helm_scan_test;
172#[cfg(test)]
173mod helm_test;
174mod hex_lock;
175#[cfg(test)]
176mod hex_lock_test;
177mod julia;
178#[cfg(test)]
179mod julia_test;
180mod license_normalization;
181mod maven;
182#[cfg(test)]
183mod maven_scan_test;
184#[cfg(test)]
185mod maven_test;
186mod meson;
187#[cfg(test)]
188mod meson_scan_test;
189#[cfg(test)]
190mod meson_test;
191pub mod metadata;
192mod microsoft_update_manifest;
193#[cfg(test)]
194mod microsoft_update_manifest_test;
195mod misc;
196#[cfg(test)]
197mod misc_test;
198mod nix;
199#[cfg(test)]
200mod nix_scan_test;
201#[cfg(test)]
202mod nix_test;
203mod npm;
204mod npm_lock;
205#[cfg(test)]
206mod npm_lock_test;
207#[cfg(test)]
208mod npm_scan_test;
209#[cfg(test)]
210mod npm_test;
211mod npm_workspace;
212#[cfg(test)]
213mod npm_workspace_test;
214mod nuget;
215mod opam;
216#[cfg(test)]
217mod opam_scan_test;
218mod os_release;
219#[cfg(test)]
220mod os_release_test;
221#[cfg(test)]
222mod osgi_test;
223mod pep508;
224mod pip_inspect_deplock;
225#[cfg(test)]
226mod pip_inspect_deplock_test;
227mod pipfile_lock;
228#[cfg(test)]
229mod pipfile_lock_test;
230mod pixi;
231#[cfg(test)]
232mod pixi_scan_test;
233#[cfg(test)]
234mod pixi_test;
235mod pnpm_lock;
236#[cfg(test)]
237mod pnpm_lock_test;
238mod podfile;
239mod podfile_lock;
240#[cfg(test)]
241mod podfile_lock_test;
242mod podspec;
243mod podspec_json;
244#[cfg(test)]
245mod podspec_json_test;
246mod poetry_lock;
247#[cfg(test)]
248mod poetry_lock_test;
249mod publiccode;
250#[cfg(test)]
251mod publiccode_test;
252mod pylock_toml;
253#[cfg(test)]
254mod pylock_toml_test;
255mod python;
256mod readme;
257#[cfg(test)]
258mod readme_test;
259mod requirements_txt;
260#[cfg(test)]
261mod requirements_txt_test;
262pub(crate) mod rfc822;
263mod rpm_db;
264mod rpm_db_native;
265#[cfg(test)]
266mod rpm_db_scan_test;
267mod rpm_license_files;
268#[cfg(test)]
269mod rpm_license_files_test;
270mod rpm_mariner_manifest;
271#[cfg(test)]
272mod rpm_mariner_manifest_test;
273mod rpm_parser;
274#[cfg(test)]
275mod rpm_scan_test;
276mod rpm_specfile;
277#[cfg(test)]
278mod rpm_specfile_test;
279mod rpm_yumdb;
280mod ruby;
281#[cfg(test)]
282mod ruby_scan_test;
283#[cfg(test)]
284mod ruby_test;
285mod sbt;
286#[cfg(test)]
287mod sbt_test;
288#[cfg(test)]
289mod scan_test_utils;
290mod swift_manifest_json;
291#[cfg(test)]
292mod swift_manifest_json_test;
293mod swift_resolved;
294#[cfg(test)]
295mod swift_resolved_test;
296#[cfg(test)]
297mod swift_scan_test;
298mod swift_show_dependencies;
299#[cfg(test)]
300mod swift_show_dependencies_test;
301pub mod utils;
302mod uv_lock;
303#[cfg(test)]
304mod uv_lock_test;
305mod vcpkg;
306#[cfg(test)]
307mod vcpkg_scan_test;
308#[cfg(test)]
309mod vcpkg_test;
310pub(crate) mod windows_executable;
311#[cfg(test)]
312mod windows_executable_golden_test;
313mod yarn_lock;
314#[cfg(test)]
315mod yarn_lock_test;
316mod yarn_pnp;
317#[cfg(test)]
318mod yarn_pnp_test;
319
320#[cfg(all(test, feature = "golden-tests"))]
321mod golden_test;
322
323use std::cell::RefCell;
324use std::panic::{AssertUnwindSafe, catch_unwind};
325use std::path::Path;
326use std::sync::Arc;
327
328use crate::license_detection::LicenseDetectionEngine;
329use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
330use crate::parsers::license_normalization::finalize_package_declared_license_references;
331use crate::parsers::utils::MAX_ITERATION_COUNT;
332
333thread_local! {
334    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
335    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
336}
337
338#[derive(Debug, Default)]
339pub struct ParsePackagesResult {
340    pub packages: Vec<PackageData>,
341    pub scan_diagnostics: Vec<ScanDiagnostic>,
342    pub scan_errors: Vec<String>,
343}
344
345fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
346    if let Some(message) = payload.downcast_ref::<&str>() {
347        (*message).to_string()
348    } else if let Some(message) = payload.downcast_ref::<String>() {
349        message.clone()
350    } else {
351        "unknown panic payload".to_string()
352    }
353}
354
355pub(crate) fn capture_parser_diagnostics<F>(
356    extract: F,
357    handler_name: &str,
358    path: &Path,
359    license_engine: Option<Arc<LicenseDetectionEngine>>,
360) -> ParsePackagesResult
361where
362    F: FnOnce() -> Vec<PackageData>,
363{
364    PARSER_DIAGNOSTIC_STACK.with(|stack| {
365        stack.borrow_mut().push(Vec::new());
366    });
367    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
368        stack.borrow_mut().push(license_engine);
369    });
370
371    let extract_result = catch_unwind(AssertUnwindSafe(|| {
372        extract()
373            .into_iter()
374            .map(|mut package| {
375                finalize_package_declared_license_references(&mut package);
376                package
377            })
378            .take(MAX_ITERATION_COUNT)
379            .collect::<Vec<_>>()
380    }));
381    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
382        stack.borrow_mut().pop();
383    });
384    let mut scan_diagnostics =
385        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
386
387    match extract_result {
388        Ok(packages) => ParsePackagesResult {
389            packages,
390            scan_errors: scan_diagnostics
391                .iter()
392                .map(|diagnostic| diagnostic.message.clone())
393                .collect(),
394            scan_diagnostics,
395        },
396        Err(payload) => {
397            scan_diagnostics.push(ScanDiagnostic::error(format!(
398                "{} panicked while parsing {}: {}",
399                handler_name,
400                path.display(),
401                panic_payload_to_string(payload.as_ref())
402            )));
403            ParsePackagesResult {
404                packages: Vec::new(),
405                scan_errors: scan_diagnostics
406                    .iter()
407                    .map(|diagnostic| diagnostic.message.clone())
408                    .collect(),
409                scan_diagnostics,
410            }
411        }
412    }
413}
414
415pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
416    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
417}
418
419pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
420    PARSER_DIAGNOSTIC_STACK.with(|stack| {
421        let mut stack = stack.borrow_mut();
422        let Some(active) = stack.last_mut() else {
423            return false;
424        };
425        active.push(ScanDiagnostic { severity, message });
426        true
427    })
428}
429
430#[macro_export]
431macro_rules! parser_warn {
432    ($($arg:tt)*) => {{
433        let message = format!($($arg)*);
434        if !$crate::parsers::record_parser_diagnostic(
435            message.clone(),
436            $crate::models::DiagnosticSeverity::Warning,
437        ) {
438            log::warn!("{message}");
439        }
440    }};
441}
442
443/// Package parser trait for extracting metadata from package manifest files.
444///
445/// Each parser implementation handles a specific package manager/ecosystem
446/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
447/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
448///
449/// # Implementation Guide
450///
451/// Implementors must provide:
452/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
453/// - `is_match()`: Returns true if the given file path matches this parser's expected format
454/// - `extract_packages()`: Parses the file and returns all extracted package metadata
455///
456/// # Error Handling
457///
458/// Parsers should handle errors gracefully by returning default/empty `PackageData`
459/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
460/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
461/// CI output and serialized scan results stay aligned.
462/// This allows the scan to continue processing other files even when individual
463/// files fail to parse.
464///
465/// # Example
466///
467/// ```ignore
468/// use provenant::models::{PackageData, PackageType};
469/// use provenant::parsers::PackageParser;
470/// use std::path::Path;
471///
472/// pub struct MyParser;
473///
474/// impl PackageParser for MyParser {
475///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
476///
477///     fn is_match(path: &Path) -> bool {
478///         path.file_name().is_some_and(|name| name == "package.json")
479///     }
480///
481///     fn extract_packages(path: &Path) -> Vec<PackageData> {
482///         vec![PackageData::default()]
483///     }
484/// }
485/// ```
486pub trait PackageParser {
487    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
488    const PACKAGE_TYPE: PackageType;
489
490    /// Extracts all packages from the given file path.
491    ///
492    /// Returns a vector of `PackageData` structures containing all extracted metadata
493    /// including name, version, dependencies, licenses, etc. Most parsers return a
494    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
495    /// can contain multiple packages in a single file.
496    ///
497    /// On parse errors, returns a vector with a default `PackageData` with minimal or
498    /// no fields populated.
499    fn extract_packages(path: &Path) -> Vec<PackageData>;
500
501    /// Checks if the given file path matches this parser's expected format.
502    ///
503    /// Returns true if the file should be handled by this parser based on filename,
504    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
505    fn is_match(path: &Path) -> bool;
506
507    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
508    /// or a default [`PackageData`] if the file contains no packages.
509    fn extract_first_package(path: &Path) -> PackageData {
510        Self::extract_packages(path)
511            .into_iter()
512            .map(|mut package| {
513                finalize_package_declared_license_references(&mut package);
514                package
515            })
516            .next()
517            .unwrap_or_default()
518    }
519}
520
521pub fn try_parse_rpm_archive_with_license_engine(
522    path: &Path,
523    license_engine: Option<Arc<LicenseDetectionEngine>>,
524) -> Option<ParsePackagesResult> {
525    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
526        return None;
527    }
528
529    if <RpmParser as PackageParser>::is_match(path) {
530        return Some(capture_parser_diagnostics(
531            || self::rpm_parser::extract_rpm_packages(path),
532            stringify!(RpmParser),
533            path,
534            license_engine,
535        ));
536    }
537
538    None
539}
540
541pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
542    try_parse_rpm_archive_with_license_engine(path, None)
543}
544
545pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
546    self::rpm_parser::path_looks_like_rpm_archive(path)
547}
548
549pub use self::about::AboutFileParser;
550pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
551pub use self::android::{
552    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
553};
554pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
555pub use self::autotools::AutotoolsConfigureParser;
556pub use self::bazel::{BazelBuildParser, BazelModuleParser};
557pub use self::bitbake::BitbakeRecipeParser;
558pub use self::bower::BowerJsonParser;
559pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
560pub use self::bun_lock::BunLockParser;
561pub use self::bun_lockb::BunLockbParser;
562pub use self::cargo::CargoParser;
563#[cfg_attr(not(test), allow(unused_imports))]
564pub use self::cargo_lock::CargoLockParser;
565pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
566pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
567pub use self::citation::CitationCffParser;
568pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
569pub use self::composer::{ComposerJsonParser, ComposerLockParser};
570pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
571pub use self::conan_data::ConanDataParser;
572pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
573pub use self::conda_meta_json::CondaMetaJsonParser;
574pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
575pub use self::cpan_dist_ini::CpanDistIniParser;
576pub use self::cpan_makefile_pl::CpanMakefilePlParser;
577pub use self::cran::CranParser;
578pub use self::dart::{PubspecLockParser, PubspecYamlParser};
579pub use self::debian::{
580    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
581    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
582    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
583    DebianMd5sumInPackageParser, DebianOrigTarParser,
584};
585pub use self::deno::DenoParser;
586pub use self::deno_lock::DenoLockParser;
587pub use self::docker::DockerfileParser;
588pub use self::freebsd::FreebsdCompactManifestParser;
589pub use self::gitmodules::GitmodulesParser;
590pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
591pub use self::go_mod_graph::GoModGraphParser;
592pub use self::gradle::GradleParser;
593pub use self::gradle_lock::GradleLockfileParser;
594pub use self::gradle_module::GradleModuleParser;
595pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
596pub use self::haxe::HaxeParser;
597pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
598pub use self::hex_lock::HexLockParser;
599pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
600pub use self::maven::MavenParser;
601pub use self::meson::MesonParser;
602pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
603pub use self::misc::{
604    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
605    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
606    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
607    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
608    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
609    SharArchiveRecognizer, SquashfsRecognizer,
610};
611pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
612pub use self::npm::NpmParser;
613pub use self::npm_lock::NpmLockParser;
614pub use self::npm_workspace::NpmWorkspaceParser;
615pub use self::nuget::{
616    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
617    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
618    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
619};
620pub use self::opam::OpamParser;
621pub use self::os_release::OsReleaseParser;
622pub use self::pip_inspect_deplock::PipInspectDeplockParser;
623pub use self::pipfile_lock::PipfileLockParser;
624pub use self::pixi::{PixiLockParser, PixiTomlParser};
625pub use self::pnpm_lock::PnpmLockParser;
626pub use self::podfile::PodfileParser;
627pub use self::podfile_lock::PodfileLockParser;
628pub use self::podspec::PodspecParser;
629pub use self::podspec_json::PodspecJsonParser;
630pub use self::poetry_lock::PoetryLockParser;
631pub use self::publiccode::PubliccodeParser;
632pub use self::pylock_toml::PylockTomlParser;
633pub use self::python::PythonParser;
634pub use self::readme::ReadmeParser;
635pub use self::requirements_txt::RequirementsTxtParser;
636#[cfg(feature = "rpm-sqlite")]
637pub use self::rpm_db::RpmSqliteDatabaseParser;
638pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
639pub use self::rpm_license_files::RpmLicenseFilesParser;
640pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
641pub use self::rpm_parser::RpmParser;
642pub use self::rpm_specfile::RpmSpecfileParser;
643pub use self::rpm_yumdb::RpmYumdbParser;
644pub use self::ruby::{
645    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
646};
647pub use self::sbt::SbtParser;
648pub use self::swift_manifest_json::SwiftManifestJsonParser;
649pub use self::swift_resolved::SwiftPackageResolvedParser;
650pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
651pub use self::uv_lock::UvLockParser;
652pub use self::vcpkg::VcpkgManifestParser;
653pub use self::yarn_lock::YarnLockParser;
654pub use self::yarn_pnp::YarnPnpParser;
655
656/// Registers all parsers and recognizers, generating dispatch functions.
657///
658/// Parsers are tried first, then recognizers. This ordering is important because
659/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
660/// more specific parsers if checked first.
661macro_rules! register_package_handlers {
662    (
663        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
664        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
665    ) => {
666        pub fn try_parse_file_with_license_engine(
667            path: &Path,
668            license_engine: Option<Arc<LicenseDetectionEngine>>,
669        ) -> Option<ParsePackagesResult> {
670            $(
671                $(#[$parser_meta])*
672                if <$parser>::is_match(path) {
673                    return Some(capture_parser_diagnostics(
674                        || <$parser>::extract_packages(path),
675                        stringify!($parser),
676                        path,
677                        license_engine.clone(),
678                    ));
679                }
680            )*
681            $(
682                if <$recognizer>::is_match(path) {
683                    return Some(capture_parser_diagnostics(
684                        || <$recognizer>::extract_packages(path),
685                        stringify!($recognizer),
686                        path,
687                        license_engine.clone(),
688                    ));
689                }
690            )*
691            None
692        }
693
694        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
695            try_parse_file_with_license_engine(path, None)
696        }
697
698        // Used by the parser-golden maintenance tool in `xtask`.
699        // Scanner runtime dispatch goes through `try_parse_file()`.
700        #[allow(dead_code)]
701        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
702            match type_name {
703                $(
704                    $(#[$parser_meta])*
705                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
706                )*
707                $(
708                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
709                )*
710                _ => None
711            }
712        }
713
714        // Used by the parser-golden maintenance tool in `xtask` and by
715        // `tests/scanner_integration.rs` to verify parser registration.
716        #[allow(dead_code)]
717        pub fn list_parser_types() -> Vec<&'static str> {
718            vec![
719                $(
720                    $(#[$parser_meta])*
721                    stringify!($parser),
722                )*
723                $(
724                    stringify!($recognizer),
725                )*
726            ]
727        }
728    };
729}
730
731#[cfg(test)]
732mod tests {
733    use std::collections::HashMap;
734
735    use super::{active_parser_license_engine, capture_parser_diagnostics};
736    use crate::license_detection::LicenseDetectionEngine;
737    use crate::models::PackageData;
738    use crate::parsers::license_normalization::{
739        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
740    };
741    use std::path::Path;
742    use std::sync::Arc;
743
744    #[test]
745    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
746        let engine =
747            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
748
749        let result = capture_parser_diagnostics(
750            || {
751                assert!(active_parser_license_engine().is_some());
752                vec![PackageData::default()]
753            },
754            "TestParser",
755            Path::new("testdata/package.json"),
756            Some(engine),
757        );
758
759        assert_eq!(result.packages.len(), 1);
760        assert!(active_parser_license_engine().is_none());
761    }
762
763    #[test]
764    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
765        let engine =
766            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
767        clear_last_parser_license_engine_ptr();
768
769        let result = capture_parser_diagnostics(
770            || {
771                vec![PackageData {
772                    declared_license_expression: Some("mit".to_string()),
773                    declared_license_expression_spdx: Some("MIT".to_string()),
774                    extracted_license_statement: Some("MIT".to_string()),
775                    extra_data: Some(HashMap::from([(
776                        "license_file".to_string(),
777                        serde_json::Value::String("LICENSE".to_string()),
778                    )])),
779                    ..Default::default()
780                }]
781            },
782            "TestParser",
783            Path::new("testdata/package.json"),
784            Some(Arc::clone(&engine)),
785        );
786
787        assert_eq!(result.packages.len(), 1);
788        assert_eq!(
789            last_parser_license_engine_ptr(),
790            Some(Arc::as_ptr(&engine) as usize)
791        );
792        assert_eq!(
793            result.packages[0].license_detections[0].matches[0]
794                .referenced_filenames
795                .as_ref(),
796            Some(&vec!["LICENSE".to_string()])
797        );
798        assert!(active_parser_license_engine().is_none());
799    }
800}
801
802register_package_handlers! {
803    parsers: [
804        AboutFileParser,
805        AndroidAabParser,
806        AndroidApkParser,
807        AndroidManifestParser,
808        AndroidSoongMetadataParser,
809        AlpineApkParser,
810        AlpineApkbuildParser,
811        AlpineInstalledParser,
812        ArchPkginfoParser,
813        ArchSrcinfoParser,
814        AutotoolsConfigureParser,
815        BazelBuildParser,
816        BazelModuleParser,
817        BitbakeRecipeParser,
818        BowerJsonParser,
819        BunLockParser,
820        BunLockbParser,
821        BuckBuildParser,
822        BuckMetadataBzlParser,
823        CargoLockParser,
824        CargoParser,
825        CarthageCartfileParser,
826        CarthageCartfileResolvedParser,
827        ChefMetadataJsonParser,
828        ChefMetadataRbParser,
829        CitationCffParser,
830        ClojureDepsEdnParser,
831        ClojureProjectCljParser,
832        ComposerJsonParser,
833        ComposerLockParser,
834        ConanDataParser,
835        ConanFilePyParser,
836        ConanfileTxtParser,
837        ConanLockParser,
838        CondaEnvironmentYmlParser,
839        CondaMetaJsonParser,
840        CondaMetaYamlParser,
841        CpanDistIniParser,
842        CpanMakefilePlParser,
843        CpanManifestParser,
844        CpanMetaJsonParser,
845        CpanMetaYmlParser,
846        CranParser,
847        DebianControlInExtractedDebParser,
848        DebianControlParser,
849        DebianCopyrightParser,
850        DebianDebianTarParser,
851        DebianDebParser,
852        DebianDistrolessInstalledParser,
853        DebianDscParser,
854        DebianInstalledListParser,
855        DebianInstalledMd5sumsParser,
856        DebianInstalledParser,
857        DebianMd5sumInPackageParser,
858        DebianOrigTarParser,
859        DenoParser,
860        DenoLockParser,
861        DockerfileParser,
862        FreebsdCompactManifestParser,
863        GemArchiveParser,
864        GemfileLockParser,
865        GemfileParser,
866        GemMetadataExtractedParser,
867        GemspecParser,
868        GitmodulesParser,
869        GodepsParser,
870        GoModParser,
871        GoModGraphParser,
872        GoSumParser,
873        GoWorkParser,
874        GradleLockfileParser,
875        GradleParser,
876        GradleModuleParser,
877        HackageCabalParser,
878        HackageCabalProjectParser,
879        HackageStackYamlParser,
880        HelmChartYamlParser,
881        HelmChartLockParser,
882        HaxeParser,
883        HexLockParser,
884        JuliaManifestTomlParser,
885        JuliaProjectTomlParser,
886        MavenParser,
887        MesonParser,
888        MicrosoftUpdateManifestParser,
889        NixDefaultParser,
890        NixFlakeLockParser,
891        NixFlakeParser,
892        NpmLockParser,
893        NpmParser,
894        NpmWorkspaceParser,
895        DotNetDepsJsonParser,
896        CentralPackageManagementPropsParser,
897        DirectoryBuildPropsParser,
898        NupkgParser,
899        NuspecParser,
900        PackageReferenceProjectParser,
901        OpamParser,
902        OsReleaseParser,
903        PackagesConfigParser,
904        PackagesLockParser,
905        ProjectJsonParser,
906        ProjectLockJsonParser,
907        PipfileLockParser,
908        PipInspectDeplockParser,
909        PixiTomlParser,
910        PixiLockParser,
911        PnpmLockParser,
912        PodfileLockParser,
913        PodfileParser,
914        PodspecJsonParser,
915        PodspecParser,
916        PoetryLockParser,
917        PubliccodeParser,
918        PylockTomlParser,
919        PubspecLockParser,
920        PubspecYamlParser,
921        PythonParser,
922        UvLockParser,
923        VcpkgManifestParser,
924        ReadmeParser,
925        RequirementsTxtParser,
926        RpmBdbDatabaseParser,
927        RpmLicenseFilesParser,
928        RpmMarinerManifestParser,
929        RpmNdbDatabaseParser,
930        RpmParser,
931        RpmSpecfileParser,
932        #[cfg(feature = "rpm-sqlite")]
933        RpmSqliteDatabaseParser,
934        RpmYumdbParser,
935        SbtParser,
936        SwiftManifestJsonParser,
937        SwiftPackageResolvedParser,
938        SwiftShowDependenciesParser,
939        YarnLockParser,
940        YarnPnpParser,
941    ],
942    recognizers: [
943        AndroidLibraryRecognizer,
944        AppleDmgRecognizer,
945        Axis2MarRecognizer,
946        Axis2ModuleXmlRecognizer,
947        CabArchiveRecognizer,
948        ChromeCrxRecognizer,
949        InstallShieldRecognizer,
950        IosIpaRecognizer,
951        IsoImageRecognizer,
952        IvyXmlRecognizer,
953        JavaEarAppXmlRecognizer,
954        JavaEarRecognizer,
955        JavaJarRecognizer,
956        JavaWarRecognizer,
957        JavaWarWebXmlRecognizer,
958        JBossSarRecognizer,
959        JBossServiceXmlRecognizer,
960        MeteorPackageRecognizer,
961        MozillaXpiRecognizer,
962        NsisRecognizer,
963        SharArchiveRecognizer,
964        SquashfsRecognizer,
965    ],
966}
967
968#[cfg(test)]
969mod panic_isolation_tests {
970    use super::*;
971    use crate::models::DiagnosticSeverity;
972
973    #[test]
974    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
975        let path = Path::new("fixtures/panic-package.json");
976        let result = capture_parser_diagnostics(
977            || -> Vec<PackageData> { panic!("panic boom") },
978            "PanicParser",
979            path,
980            None,
981        );
982
983        assert!(result.packages.is_empty());
984        assert_eq!(result.scan_errors.len(), 1);
985        assert_eq!(result.scan_diagnostics.len(), 1);
986        assert_eq!(
987            result.scan_diagnostics[0].severity,
988            DiagnosticSeverity::Error
989        );
990        assert!(result.scan_errors[0].contains("PanicParser"));
991        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
992        assert!(result.scan_errors[0].contains("panic boom"));
993    }
994
995    #[test]
996    fn capture_parser_diagnostics_recovers_after_panic() {
997        let panic_path = Path::new("fixtures/panic-package.json");
998        let _ = capture_parser_diagnostics(
999            || -> Vec<PackageData> { panic!("panic boom") },
1000            "PanicParser",
1001            panic_path,
1002            None,
1003        );
1004
1005        let ok_path = Path::new("fixtures/recovered-package.json");
1006        let result = capture_parser_diagnostics(
1007            || {
1008                crate::parser_warn!("recoverable parser warning");
1009                vec![PackageData {
1010                    package_type: Some(PackageType::Npm),
1011                    ..Default::default()
1012                }]
1013            },
1014            "RecoveringParser",
1015            ok_path,
1016            None,
1017        );
1018
1019        assert_eq!(result.packages.len(), 1);
1020        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1021        assert_eq!(result.scan_diagnostics.len(), 1);
1022        assert_eq!(
1023            result.scan_diagnostics[0].severity,
1024            DiagnosticSeverity::Warning
1025        );
1026    }
1027}