Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(all(test, feature = "golden-tests"))]
152pub(crate) mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185mod license_normalization;
186mod maven;
187#[cfg(test)]
188mod maven_scan_test;
189#[cfg(test)]
190mod maven_test;
191mod meson;
192#[cfg(test)]
193mod meson_scan_test;
194#[cfg(test)]
195mod meson_test;
196pub mod metadata;
197mod microsoft_update_manifest;
198#[cfg(test)]
199mod microsoft_update_manifest_test;
200mod misc;
201#[cfg(test)]
202mod misc_test;
203mod nix;
204#[cfg(test)]
205mod nix_scan_test;
206#[cfg(test)]
207mod nix_test;
208mod npm;
209mod npm_lock;
210#[cfg(test)]
211mod npm_lock_test;
212#[cfg(test)]
213mod npm_scan_test;
214#[cfg(test)]
215mod npm_test;
216mod npm_workspace;
217#[cfg(test)]
218mod npm_workspace_test;
219mod nuget;
220mod opam;
221#[cfg(test)]
222mod opam_scan_test;
223mod os_release;
224#[cfg(test)]
225mod os_release_test;
226#[cfg(test)]
227mod osgi_test;
228mod pep508;
229mod pip_inspect_deplock;
230#[cfg(test)]
231mod pip_inspect_deplock_test;
232mod pipfile_lock;
233#[cfg(test)]
234mod pipfile_lock_test;
235mod pixi;
236#[cfg(test)]
237mod pixi_scan_test;
238#[cfg(test)]
239mod pixi_test;
240mod pnpm_lock;
241#[cfg(test)]
242mod pnpm_lock_test;
243mod podfile;
244mod podfile_lock;
245#[cfg(test)]
246mod podfile_lock_test;
247mod podspec;
248mod podspec_json;
249#[cfg(test)]
250mod podspec_json_test;
251mod poetry_lock;
252#[cfg(test)]
253mod poetry_lock_test;
254mod publiccode;
255#[cfg(test)]
256mod publiccode_test;
257mod pylock_toml;
258#[cfg(test)]
259mod pylock_toml_test;
260mod python;
261mod readme;
262#[cfg(test)]
263mod readme_test;
264mod requirements_txt;
265#[cfg(test)]
266mod requirements_txt_test;
267pub(crate) mod rfc822;
268mod rpm_db;
269mod rpm_db_native;
270#[cfg(test)]
271mod rpm_db_scan_test;
272mod rpm_license_files;
273#[cfg(test)]
274mod rpm_license_files_test;
275mod rpm_mariner_manifest;
276#[cfg(test)]
277mod rpm_mariner_manifest_test;
278mod rpm_parser;
279#[cfg(test)]
280mod rpm_scan_test;
281mod rpm_specfile;
282#[cfg(test)]
283mod rpm_specfile_test;
284mod rpm_yumdb;
285mod ruby;
286#[cfg(test)]
287mod ruby_scan_test;
288#[cfg(test)]
289mod ruby_test;
290mod sbt;
291#[cfg(test)]
292mod sbt_test;
293#[cfg(test)]
294mod scan_test_utils;
295mod swift_manifest_json;
296#[cfg(test)]
297mod swift_manifest_json_test;
298mod swift_resolved;
299#[cfg(test)]
300mod swift_resolved_test;
301#[cfg(test)]
302mod swift_scan_test;
303mod swift_show_dependencies;
304#[cfg(test)]
305mod swift_show_dependencies_test;
306pub mod utils;
307mod uv_lock;
308#[cfg(test)]
309mod uv_lock_test;
310mod vcpkg;
311#[cfg(test)]
312mod vcpkg_scan_test;
313#[cfg(test)]
314mod vcpkg_test;
315pub(crate) mod windows_executable;
316#[cfg(test)]
317mod windows_executable_golden_test;
318mod yarn_lock;
319#[cfg(test)]
320mod yarn_lock_test;
321mod yarn_pnp;
322#[cfg(test)]
323mod yarn_pnp_test;
324
325#[cfg(all(test, feature = "golden-tests"))]
326mod golden_test;
327
328use std::cell::RefCell;
329use std::panic::{AssertUnwindSafe, catch_unwind};
330use std::path::Path;
331use std::sync::Arc;
332
333use crate::license_detection::LicenseDetectionEngine;
334use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
335use crate::parsers::license_normalization::finalize_package_declared_license_references;
336use crate::parsers::utils::MAX_ITERATION_COUNT;
337
338thread_local! {
339    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
340    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
341}
342
343#[derive(Debug, Default)]
344pub struct ParsePackagesResult {
345    pub packages: Vec<PackageData>,
346    pub scan_diagnostics: Vec<ScanDiagnostic>,
347    pub scan_errors: Vec<String>,
348}
349
350fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
351    if let Some(message) = payload.downcast_ref::<&str>() {
352        (*message).to_string()
353    } else if let Some(message) = payload.downcast_ref::<String>() {
354        message.clone()
355    } else {
356        "unknown panic payload".to_string()
357    }
358}
359
360pub(crate) fn capture_parser_diagnostics<F>(
361    extract: F,
362    handler_name: &str,
363    path: &Path,
364    license_engine: Option<Arc<LicenseDetectionEngine>>,
365) -> ParsePackagesResult
366where
367    F: FnOnce() -> Vec<PackageData>,
368{
369    PARSER_DIAGNOSTIC_STACK.with(|stack| {
370        stack.borrow_mut().push(Vec::new());
371    });
372    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
373        stack.borrow_mut().push(license_engine);
374    });
375
376    let extract_result = catch_unwind(AssertUnwindSafe(|| {
377        extract()
378            .into_iter()
379            .map(|mut package| {
380                finalize_package_declared_license_references(&mut package);
381                package
382            })
383            .take(MAX_ITERATION_COUNT)
384            .collect::<Vec<_>>()
385    }));
386    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
387        stack.borrow_mut().pop();
388    });
389    let mut scan_diagnostics =
390        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
391
392    match extract_result {
393        Ok(packages) => ParsePackagesResult {
394            packages,
395            scan_errors: scan_diagnostics
396                .iter()
397                .map(|diagnostic| diagnostic.message.clone())
398                .collect(),
399            scan_diagnostics,
400        },
401        Err(payload) => {
402            scan_diagnostics.push(ScanDiagnostic::error(format!(
403                "{} panicked while parsing {}: {}",
404                handler_name,
405                path.display(),
406                panic_payload_to_string(payload.as_ref())
407            )));
408            ParsePackagesResult {
409                packages: Vec::new(),
410                scan_errors: scan_diagnostics
411                    .iter()
412                    .map(|diagnostic| diagnostic.message.clone())
413                    .collect(),
414                scan_diagnostics,
415            }
416        }
417    }
418}
419
420pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
421    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
422}
423
424pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
425    PARSER_DIAGNOSTIC_STACK.with(|stack| {
426        let mut stack = stack.borrow_mut();
427        let Some(active) = stack.last_mut() else {
428            return false;
429        };
430        active.push(ScanDiagnostic { severity, message });
431        true
432    })
433}
434
435#[macro_export]
436macro_rules! parser_warn {
437    ($($arg:tt)*) => {{
438        let message = format!($($arg)*);
439        if !$crate::parsers::record_parser_diagnostic(
440            message.clone(),
441            $crate::models::DiagnosticSeverity::Warning,
442        ) {
443            log::warn!("{message}");
444        }
445    }};
446}
447
448/// Package parser trait for extracting metadata from package manifest files.
449///
450/// Each parser implementation handles a specific package manager/ecosystem
451/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
452/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
453///
454/// # Implementation Guide
455///
456/// Implementors must provide:
457/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
458/// - `is_match()`: Returns true if the given file path matches this parser's expected format
459/// - `extract_packages()`: Parses the file and returns all extracted package metadata
460///
461/// # Error Handling
462///
463/// Parsers should handle errors gracefully by returning default/empty `PackageData`
464/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
465/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
466/// CI output and serialized scan results stay aligned.
467/// This allows the scan to continue processing other files even when individual
468/// files fail to parse.
469///
470/// # Example
471///
472/// ```ignore
473/// use provenant::models::{PackageData, PackageType};
474/// use provenant::parsers::PackageParser;
475/// use std::path::Path;
476///
477/// pub struct MyParser;
478///
479/// impl PackageParser for MyParser {
480///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
481///
482///     fn is_match(path: &Path) -> bool {
483///         path.file_name().is_some_and(|name| name == "package.json")
484///     }
485///
486///     fn extract_packages(path: &Path) -> Vec<PackageData> {
487///         vec![PackageData::default()]
488///     }
489/// }
490/// ```
491pub trait PackageParser {
492    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
493    const PACKAGE_TYPE: PackageType;
494
495    /// Extracts all packages from the given file path.
496    ///
497    /// Returns a vector of `PackageData` structures containing all extracted metadata
498    /// including name, version, dependencies, licenses, etc. Most parsers return a
499    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
500    /// can contain multiple packages in a single file.
501    ///
502    /// On parse errors, returns a vector with a default `PackageData` with minimal or
503    /// no fields populated.
504    fn extract_packages(path: &Path) -> Vec<PackageData>;
505
506    /// Checks if the given file path matches this parser's expected format.
507    ///
508    /// Returns true if the file should be handled by this parser based on filename,
509    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
510    fn is_match(path: &Path) -> bool;
511
512    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
513    /// or a default [`PackageData`] if the file contains no packages.
514    fn extract_first_package(path: &Path) -> PackageData {
515        Self::extract_packages(path)
516            .into_iter()
517            .map(|mut package| {
518                finalize_package_declared_license_references(&mut package);
519                package
520            })
521            .next()
522            .unwrap_or_default()
523    }
524}
525
526pub fn try_parse_rpm_archive_with_license_engine(
527    path: &Path,
528    license_engine: Option<Arc<LicenseDetectionEngine>>,
529) -> Option<ParsePackagesResult> {
530    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
531        return None;
532    }
533
534    if <RpmParser as PackageParser>::is_match(path) {
535        return Some(capture_parser_diagnostics(
536            || self::rpm_parser::extract_rpm_packages(path),
537            stringify!(RpmParser),
538            path,
539            license_engine,
540        ));
541    }
542
543    None
544}
545
546pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
547    try_parse_rpm_archive_with_license_engine(path, None)
548}
549
550pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
551    self::rpm_parser::path_looks_like_rpm_archive(path)
552}
553
554pub use self::about::AboutFileParser;
555pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
556pub use self::android::{
557    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
558};
559pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
560pub use self::autotools::AutotoolsConfigureParser;
561pub use self::bazel::{BazelBuildParser, BazelModuleParser};
562pub use self::bitbake::BitbakeRecipeParser;
563pub use self::bower::BowerJsonParser;
564pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
565pub use self::bun_lock::BunLockParser;
566pub use self::bun_lockb::BunLockbParser;
567pub use self::cargo::CargoParser;
568#[cfg_attr(not(test), allow(unused_imports))]
569pub use self::cargo_lock::CargoLockParser;
570pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
571pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
572pub use self::citation::CitationCffParser;
573pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
574pub use self::composer::{ComposerJsonParser, ComposerLockParser};
575pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
576pub use self::conan_data::ConanDataParser;
577pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
578pub use self::conda_meta_json::CondaMetaJsonParser;
579pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
580pub use self::cpan_dist_ini::CpanDistIniParser;
581pub use self::cpan_makefile_pl::CpanMakefilePlParser;
582pub use self::cran::CranParser;
583pub use self::dart::{PubspecLockParser, PubspecYamlParser};
584pub use self::debian::{
585    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
586    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
587    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
588    DebianMd5sumInPackageParser, DebianOrigTarParser,
589};
590pub use self::deno::DenoParser;
591pub use self::deno_lock::DenoLockParser;
592pub use self::docker::DockerfileParser;
593pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
594pub use self::freebsd::FreebsdCompactManifestParser;
595pub use self::gitmodules::GitmodulesParser;
596pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
597pub use self::go_mod_graph::GoModGraphParser;
598pub use self::gradle::GradleParser;
599pub use self::gradle_lock::GradleLockfileParser;
600pub use self::gradle_module::GradleModuleParser;
601pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
602pub use self::haxe::HaxeParser;
603pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
604pub use self::hex_lock::HexLockParser;
605pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
606pub use self::maven::MavenParser;
607pub use self::meson::MesonParser;
608pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
609pub use self::misc::{
610    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
611    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
612    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
613    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
614    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
615    SharArchiveRecognizer, SquashfsRecognizer,
616};
617pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
618pub use self::npm::NpmParser;
619pub use self::npm_lock::NpmLockParser;
620pub use self::npm_workspace::NpmWorkspaceParser;
621pub use self::nuget::{
622    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
623    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
624    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
625};
626pub use self::opam::OpamParser;
627pub use self::os_release::OsReleaseParser;
628pub use self::pip_inspect_deplock::PipInspectDeplockParser;
629pub use self::pipfile_lock::PipfileLockParser;
630pub use self::pixi::{PixiLockParser, PixiTomlParser};
631pub use self::pnpm_lock::PnpmLockParser;
632pub use self::podfile::PodfileParser;
633pub use self::podfile_lock::PodfileLockParser;
634pub use self::podspec::PodspecParser;
635pub use self::podspec_json::PodspecJsonParser;
636pub use self::poetry_lock::PoetryLockParser;
637pub use self::publiccode::PubliccodeParser;
638pub use self::pylock_toml::PylockTomlParser;
639pub use self::python::PythonParser;
640pub use self::readme::ReadmeParser;
641pub use self::requirements_txt::RequirementsTxtParser;
642#[cfg(feature = "rpm-sqlite")]
643pub use self::rpm_db::RpmSqliteDatabaseParser;
644pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
645pub use self::rpm_license_files::RpmLicenseFilesParser;
646pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
647pub use self::rpm_parser::RpmParser;
648pub use self::rpm_specfile::RpmSpecfileParser;
649pub use self::rpm_yumdb::RpmYumdbParser;
650pub use self::ruby::{
651    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
652};
653pub use self::sbt::SbtParser;
654pub use self::swift_manifest_json::SwiftManifestJsonParser;
655pub use self::swift_resolved::SwiftPackageResolvedParser;
656pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
657pub use self::uv_lock::UvLockParser;
658pub use self::vcpkg::VcpkgManifestParser;
659pub use self::yarn_lock::YarnLockParser;
660pub use self::yarn_pnp::YarnPnpParser;
661
662/// Registers all parsers and recognizers, generating dispatch functions.
663///
664/// Parsers are tried first, then recognizers. This ordering is important because
665/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
666/// more specific parsers if checked first.
667macro_rules! register_package_handlers {
668    (
669        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
670        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
671    ) => {
672        pub fn try_parse_file_with_license_engine(
673            path: &Path,
674            license_engine: Option<Arc<LicenseDetectionEngine>>,
675        ) -> Option<ParsePackagesResult> {
676            $(
677                $(#[$parser_meta])*
678                if <$parser>::is_match(path) {
679                    return Some(capture_parser_diagnostics(
680                        || <$parser>::extract_packages(path),
681                        stringify!($parser),
682                        path,
683                        license_engine.clone(),
684                    ));
685                }
686            )*
687            $(
688                if <$recognizer>::is_match(path) {
689                    return Some(capture_parser_diagnostics(
690                        || <$recognizer>::extract_packages(path),
691                        stringify!($recognizer),
692                        path,
693                        license_engine.clone(),
694                    ));
695                }
696            )*
697            None
698        }
699
700        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
701            try_parse_file_with_license_engine(path, None)
702        }
703
704        // Used by the parser-golden maintenance tool in `xtask`.
705        // Scanner runtime dispatch goes through `try_parse_file()`.
706        #[allow(dead_code)]
707        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
708            match type_name {
709                $(
710                    $(#[$parser_meta])*
711                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
712                )*
713                $(
714                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
715                )*
716                _ => None
717            }
718        }
719
720        // Used by the parser-golden maintenance tool in `xtask` and by
721        // `tests/scanner_integration.rs` to verify parser registration.
722        #[allow(dead_code)]
723        pub fn list_parser_types() -> Vec<&'static str> {
724            vec![
725                $(
726                    $(#[$parser_meta])*
727                    stringify!($parser),
728                )*
729                $(
730                    stringify!($recognizer),
731                )*
732            ]
733        }
734    };
735}
736
737#[cfg(test)]
738mod tests {
739    use std::collections::HashMap;
740
741    use super::{active_parser_license_engine, capture_parser_diagnostics};
742    use crate::license_detection::LicenseDetectionEngine;
743    use crate::models::PackageData;
744    use crate::parsers::license_normalization::{
745        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
746    };
747    use std::path::Path;
748    use std::sync::Arc;
749
750    #[test]
751    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
752        let engine =
753            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
754
755        let result = capture_parser_diagnostics(
756            || {
757                assert!(active_parser_license_engine().is_some());
758                vec![PackageData::default()]
759            },
760            "TestParser",
761            Path::new("testdata/package.json"),
762            Some(engine),
763        );
764
765        assert_eq!(result.packages.len(), 1);
766        assert!(active_parser_license_engine().is_none());
767    }
768
769    #[test]
770    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
771        let engine =
772            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
773        clear_last_parser_license_engine_ptr();
774
775        let result = capture_parser_diagnostics(
776            || {
777                vec![PackageData {
778                    declared_license_expression: Some("mit".to_string()),
779                    declared_license_expression_spdx: Some("MIT".to_string()),
780                    extracted_license_statement: Some("MIT".to_string()),
781                    extra_data: Some(HashMap::from([(
782                        "license_file".to_string(),
783                        serde_json::Value::String("LICENSE".to_string()),
784                    )])),
785                    ..Default::default()
786                }]
787            },
788            "TestParser",
789            Path::new("testdata/package.json"),
790            Some(Arc::clone(&engine)),
791        );
792
793        assert_eq!(result.packages.len(), 1);
794        assert_eq!(
795            last_parser_license_engine_ptr(),
796            Some(Arc::as_ptr(&engine) as usize)
797        );
798        assert_eq!(
799            result.packages[0].license_detections[0].matches[0]
800                .referenced_filenames
801                .as_ref(),
802            Some(&vec!["LICENSE".to_string()])
803        );
804        assert!(active_parser_license_engine().is_none());
805    }
806}
807
808register_package_handlers! {
809    parsers: [
810        AboutFileParser,
811        AndroidAabParser,
812        AndroidApkParser,
813        AndroidManifestParser,
814        AndroidSoongMetadataParser,
815        AlpineApkParser,
816        AlpineApkbuildParser,
817        AlpineInstalledParser,
818        ArchPkginfoParser,
819        ArchSrcinfoParser,
820        AutotoolsConfigureParser,
821        BazelBuildParser,
822        BazelModuleParser,
823        BitbakeRecipeParser,
824        BowerJsonParser,
825        BunLockParser,
826        BunLockbParser,
827        BuckBuildParser,
828        BuckMetadataBzlParser,
829        CargoLockParser,
830        CargoParser,
831        CarthageCartfileParser,
832        CarthageCartfileResolvedParser,
833        ChefMetadataJsonParser,
834        ChefMetadataRbParser,
835        CitationCffParser,
836        ClojureDepsEdnParser,
837        ClojureProjectCljParser,
838        ComposerJsonParser,
839        ComposerLockParser,
840        ConanDataParser,
841        ConanFilePyParser,
842        ConanfileTxtParser,
843        ConanLockParser,
844        CondaEnvironmentYmlParser,
845        CondaMetaJsonParser,
846        CondaMetaYamlParser,
847        CpanDistIniParser,
848        CpanMakefilePlParser,
849        CpanManifestParser,
850        CpanMetaJsonParser,
851        CpanMetaYmlParser,
852        CranParser,
853        DebianControlInExtractedDebParser,
854        DebianControlParser,
855        DebianCopyrightParser,
856        DebianDebianTarParser,
857        DebianDebParser,
858        DebianDistrolessInstalledParser,
859        DebianDscParser,
860        DebianInstalledListParser,
861        DebianInstalledMd5sumsParser,
862        DebianInstalledParser,
863        DebianMd5sumInPackageParser,
864        DebianOrigTarParser,
865        DenoParser,
866        DenoLockParser,
867        DockerfileParser,
868        ErlangAppSrcParser,
869        RebarConfigParser,
870        RebarLockParser,
871        FreebsdCompactManifestParser,
872        GemArchiveParser,
873        GemfileLockParser,
874        GemfileParser,
875        GemMetadataExtractedParser,
876        GemspecParser,
877        GitmodulesParser,
878        GodepsParser,
879        GoModParser,
880        GoModGraphParser,
881        GoSumParser,
882        GoWorkParser,
883        GradleLockfileParser,
884        GradleParser,
885        GradleModuleParser,
886        HackageCabalParser,
887        HackageCabalProjectParser,
888        HackageStackYamlParser,
889        HelmChartYamlParser,
890        HelmChartLockParser,
891        HaxeParser,
892        HexLockParser,
893        JuliaManifestTomlParser,
894        JuliaProjectTomlParser,
895        MavenParser,
896        MesonParser,
897        MicrosoftUpdateManifestParser,
898        NixDefaultParser,
899        NixFlakeLockParser,
900        NixFlakeParser,
901        NpmLockParser,
902        NpmParser,
903        NpmWorkspaceParser,
904        DotNetDepsJsonParser,
905        CentralPackageManagementPropsParser,
906        DirectoryBuildPropsParser,
907        NupkgParser,
908        NuspecParser,
909        PackageReferenceProjectParser,
910        OpamParser,
911        OsReleaseParser,
912        PackagesConfigParser,
913        PackagesLockParser,
914        ProjectJsonParser,
915        ProjectLockJsonParser,
916        PipfileLockParser,
917        PipInspectDeplockParser,
918        PixiTomlParser,
919        PixiLockParser,
920        PnpmLockParser,
921        PodfileLockParser,
922        PodfileParser,
923        PodspecJsonParser,
924        PodspecParser,
925        PoetryLockParser,
926        PubliccodeParser,
927        PylockTomlParser,
928        PubspecLockParser,
929        PubspecYamlParser,
930        PythonParser,
931        UvLockParser,
932        VcpkgManifestParser,
933        ReadmeParser,
934        RequirementsTxtParser,
935        RpmBdbDatabaseParser,
936        RpmLicenseFilesParser,
937        RpmMarinerManifestParser,
938        RpmNdbDatabaseParser,
939        RpmParser,
940        RpmSpecfileParser,
941        #[cfg(feature = "rpm-sqlite")]
942        RpmSqliteDatabaseParser,
943        RpmYumdbParser,
944        SbtParser,
945        SwiftManifestJsonParser,
946        SwiftPackageResolvedParser,
947        SwiftShowDependenciesParser,
948        YarnLockParser,
949        YarnPnpParser,
950    ],
951    recognizers: [
952        AndroidLibraryRecognizer,
953        AppleDmgRecognizer,
954        Axis2MarRecognizer,
955        Axis2ModuleXmlRecognizer,
956        CabArchiveRecognizer,
957        ChromeCrxRecognizer,
958        InstallShieldRecognizer,
959        IosIpaRecognizer,
960        IsoImageRecognizer,
961        IvyXmlRecognizer,
962        JavaEarAppXmlRecognizer,
963        JavaEarRecognizer,
964        JavaJarRecognizer,
965        JavaWarRecognizer,
966        JavaWarWebXmlRecognizer,
967        JBossSarRecognizer,
968        JBossServiceXmlRecognizer,
969        MeteorPackageRecognizer,
970        MozillaXpiRecognizer,
971        NsisRecognizer,
972        SharArchiveRecognizer,
973        SquashfsRecognizer,
974    ],
975}
976
977#[cfg(test)]
978mod panic_isolation_tests {
979    use super::*;
980    use crate::models::DiagnosticSeverity;
981
982    #[test]
983    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
984        let path = Path::new("fixtures/panic-package.json");
985        let result = capture_parser_diagnostics(
986            || -> Vec<PackageData> { panic!("panic boom") },
987            "PanicParser",
988            path,
989            None,
990        );
991
992        assert!(result.packages.is_empty());
993        assert_eq!(result.scan_errors.len(), 1);
994        assert_eq!(result.scan_diagnostics.len(), 1);
995        assert_eq!(
996            result.scan_diagnostics[0].severity,
997            DiagnosticSeverity::Error
998        );
999        assert!(result.scan_errors[0].contains("PanicParser"));
1000        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1001        assert!(result.scan_errors[0].contains("panic boom"));
1002    }
1003
1004    #[test]
1005    fn capture_parser_diagnostics_recovers_after_panic() {
1006        let panic_path = Path::new("fixtures/panic-package.json");
1007        let _ = capture_parser_diagnostics(
1008            || -> Vec<PackageData> { panic!("panic boom") },
1009            "PanicParser",
1010            panic_path,
1011            None,
1012        );
1013
1014        let ok_path = Path::new("fixtures/recovered-package.json");
1015        let result = capture_parser_diagnostics(
1016            || {
1017                crate::parser_warn!("recoverable parser warning");
1018                vec![PackageData {
1019                    package_type: Some(PackageType::Npm),
1020                    ..Default::default()
1021                }]
1022            },
1023            "RecoveringParser",
1024            ok_path,
1025            None,
1026        );
1027
1028        assert_eq!(result.packages.len(), 1);
1029        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1030        assert_eq!(result.scan_diagnostics.len(), 1);
1031        assert_eq!(
1032            result.scan_diagnostics[0].severity,
1033            DiagnosticSeverity::Warning
1034        );
1035    }
1036}