Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185pub(crate) mod license_normalization;
186mod maven;
187mod meson;
188#[cfg(test)]
189mod meson_scan_test;
190#[cfg(test)]
191mod meson_test;
192pub mod metadata;
193mod microsoft_update_manifest;
194#[cfg(test)]
195mod microsoft_update_manifest_test;
196mod misc;
197#[cfg(test)]
198mod misc_test;
199mod nix;
200#[cfg(test)]
201mod nix_scan_test;
202#[cfg(test)]
203mod nix_test;
204mod npm;
205mod npm_lock;
206#[cfg(test)]
207mod npm_lock_test;
208#[cfg(test)]
209mod npm_scan_test;
210#[cfg(test)]
211mod npm_test;
212mod npm_workspace;
213#[cfg(test)]
214mod npm_workspace_test;
215mod nuget;
216mod opam;
217#[cfg(test)]
218mod opam_scan_test;
219mod os_release;
220#[cfg(test)]
221mod os_release_test;
222mod pep508;
223mod pip_inspect_deplock;
224#[cfg(test)]
225mod pip_inspect_deplock_test;
226mod pipfile_lock;
227#[cfg(test)]
228mod pipfile_lock_test;
229mod pixi;
230#[cfg(test)]
231mod pixi_scan_test;
232#[cfg(test)]
233mod pixi_test;
234mod pnpm_lock;
235#[cfg(test)]
236mod pnpm_lock_test;
237mod podfile;
238mod podfile_lock;
239#[cfg(test)]
240mod podfile_lock_test;
241mod podspec;
242mod podspec_json;
243#[cfg(test)]
244mod podspec_json_test;
245mod poetry_lock;
246#[cfg(test)]
247mod poetry_lock_test;
248mod publiccode;
249#[cfg(test)]
250mod publiccode_test;
251mod pylock_toml;
252#[cfg(test)]
253mod pylock_toml_test;
254mod python;
255mod readme;
256#[cfg(test)]
257mod readme_test;
258mod requirements_txt;
259#[cfg(test)]
260mod requirements_txt_test;
261pub(crate) mod rfc822;
262mod rpm_db;
263mod rpm_db_native;
264#[cfg(test)]
265mod rpm_db_scan_test;
266mod rpm_license_files;
267#[cfg(test)]
268mod rpm_license_files_test;
269mod rpm_mariner_manifest;
270#[cfg(test)]
271mod rpm_mariner_manifest_test;
272mod rpm_parser;
273#[cfg(test)]
274mod rpm_scan_test;
275mod rpm_specfile;
276#[cfg(test)]
277mod rpm_specfile_test;
278mod rpm_yumdb;
279mod ruby;
280#[cfg(test)]
281mod ruby_scan_test;
282#[cfg(test)]
283mod ruby_test;
284mod sbt;
285#[cfg(test)]
286mod sbt_test;
287#[cfg(test)]
288mod scan_test_utils;
289mod swift_manifest_json;
290#[cfg(test)]
291mod swift_manifest_json_test;
292mod swift_resolved;
293#[cfg(test)]
294mod swift_resolved_test;
295#[cfg(test)]
296mod swift_scan_test;
297mod swift_show_dependencies;
298#[cfg(test)]
299mod swift_show_dependencies_test;
300pub mod utils;
301mod uv_lock;
302#[cfg(test)]
303mod uv_lock_test;
304mod vcpkg;
305#[cfg(test)]
306mod vcpkg_scan_test;
307#[cfg(test)]
308mod vcpkg_test;
309pub(crate) mod windows_executable;
310#[cfg(test)]
311mod windows_executable_golden_test;
312mod yarn_lock;
313#[cfg(test)]
314mod yarn_lock_test;
315mod yarn_pnp;
316#[cfg(test)]
317mod yarn_pnp_test;
318
319use std::cell::RefCell;
320use std::panic::{AssertUnwindSafe, catch_unwind};
321use std::path::Path;
322use std::sync::Arc;
323
324use crate::license_detection::LicenseDetectionEngine;
325use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
326use crate::parsers::license_normalization::finalize_package_declared_license_references;
327use crate::parsers::utils::MAX_ITERATION_COUNT;
328
329thread_local! {
330    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
331    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
332    static PARSER_SCAN_ROOT_STACK: RefCell<Vec<Option<std::path::PathBuf>>> = const { RefCell::new(Vec::new()) };
333}
334
335#[derive(Debug, Default)]
336pub struct ParsePackagesResult {
337    pub packages: Vec<PackageData>,
338    pub scan_diagnostics: Vec<ScanDiagnostic>,
339    pub scan_errors: Vec<String>,
340}
341
342fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
343    if let Some(message) = payload.downcast_ref::<&str>() {
344        (*message).to_string()
345    } else if let Some(message) = payload.downcast_ref::<String>() {
346        message.clone()
347    } else {
348        "unknown panic payload".to_string()
349    }
350}
351
352pub(crate) fn capture_parser_diagnostics<F>(
353    extract: F,
354    handler_name: &str,
355    path: &Path,
356    license_engine: Option<Arc<LicenseDetectionEngine>>,
357) -> ParsePackagesResult
358where
359    F: FnOnce() -> Vec<PackageData>,
360{
361    PARSER_DIAGNOSTIC_STACK.with(|stack| {
362        stack.borrow_mut().push(Vec::new());
363    });
364    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
365        stack.borrow_mut().push(license_engine);
366    });
367
368    let extract_result = catch_unwind(AssertUnwindSafe(|| {
369        extract()
370            .into_iter()
371            .map(|mut package| {
372                finalize_package_declared_license_references(&mut package);
373                package
374            })
375            .take(MAX_ITERATION_COUNT)
376            .collect::<Vec<_>>()
377    }));
378    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
379        stack.borrow_mut().pop();
380    });
381    let mut scan_diagnostics =
382        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
383
384    match extract_result {
385        Ok(packages) => ParsePackagesResult {
386            packages,
387            scan_errors: scan_diagnostics
388                .iter()
389                .map(|diagnostic| diagnostic.message.clone())
390                .collect(),
391            scan_diagnostics,
392        },
393        Err(payload) => {
394            scan_diagnostics.push(ScanDiagnostic::error(format!(
395                "{} panicked while parsing {}: {}",
396                handler_name,
397                path.display(),
398                panic_payload_to_string(payload.as_ref())
399            )));
400            ParsePackagesResult {
401                packages: Vec::new(),
402                scan_errors: scan_diagnostics
403                    .iter()
404                    .map(|diagnostic| diagnostic.message.clone())
405                    .collect(),
406                scan_diagnostics,
407            }
408        }
409    }
410}
411
412pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
413    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
414}
415
416pub(crate) fn active_parser_scan_root() -> Option<std::path::PathBuf> {
417    PARSER_SCAN_ROOT_STACK.with(|stack| stack.borrow().last().cloned().flatten())
418}
419
420pub(crate) fn with_parser_scan_root<T>(scan_root: Option<&Path>, f: impl FnOnce() -> T) -> T {
421    PARSER_SCAN_ROOT_STACK.with(|stack| {
422        stack.borrow_mut().push(scan_root.map(Path::to_path_buf));
423    });
424    let result = f();
425    PARSER_SCAN_ROOT_STACK.with(|stack| {
426        stack.borrow_mut().pop();
427    });
428    result
429}
430
431pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
432    PARSER_DIAGNOSTIC_STACK.with(|stack| {
433        let mut stack = stack.borrow_mut();
434        let Some(active) = stack.last_mut() else {
435            return false;
436        };
437        active.push(ScanDiagnostic { severity, message });
438        true
439    })
440}
441
442#[macro_export]
443macro_rules! parser_warn {
444    ($($arg:tt)*) => {{
445        let message = format!($($arg)*);
446        if !$crate::parsers::record_parser_diagnostic(
447            message.clone(),
448            $crate::models::DiagnosticSeverity::Warning,
449        ) {
450            log::warn!("{message}");
451        }
452    }};
453}
454
455/// Package parser trait for extracting metadata from package manifest files.
456///
457/// Each parser implementation handles a specific package manager/ecosystem
458/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
459/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
460///
461/// # Implementation Guide
462///
463/// Implementors must provide:
464/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
465/// - `is_match()`: Returns true if the given file path matches this parser's expected format
466/// - `extract_packages()`: Parses the file and returns all extracted package metadata
467///
468/// # Error Handling
469///
470/// Parsers should handle errors gracefully by returning default/empty `PackageData`
471/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
472/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
473/// CI output and serialized scan results stay aligned.
474/// This allows the scan to continue processing other files even when individual
475/// files fail to parse.
476///
477/// # Example
478///
479/// ```no_run
480/// use provenant::models::{PackageData, PackageType};
481/// use provenant::parsers::PackageParser;
482/// use std::path::Path;
483///
484/// pub struct MyParser;
485///
486/// impl PackageParser for MyParser {
487///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
488///
489///     fn is_match(path: &Path) -> bool {
490///         path.file_name().is_some_and(|name| name == "package.json")
491///     }
492///
493///     fn extract_packages(path: &Path) -> Vec<PackageData> {
494///         vec![PackageData::default()]
495///     }
496/// }
497/// ```
498pub trait PackageParser {
499    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
500    const PACKAGE_TYPE: PackageType;
501
502    /// Extracts all packages from the given file path.
503    ///
504    /// Returns a vector of `PackageData` structures containing all extracted metadata
505    /// including name, version, dependencies, licenses, etc. Most parsers return a
506    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
507    /// can contain multiple packages in a single file.
508    ///
509    /// On parse errors, returns a vector with a default `PackageData` with minimal or
510    /// no fields populated.
511    fn extract_packages(path: &Path) -> Vec<PackageData>;
512
513    /// Checks if the given file path matches this parser's expected format.
514    ///
515    /// Returns true if the file should be handled by this parser based on filename,
516    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
517    fn is_match(path: &Path) -> bool;
518
519    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
520    /// or a default [`PackageData`] if the file contains no packages.
521    fn extract_first_package(path: &Path) -> PackageData {
522        Self::extract_packages(path)
523            .into_iter()
524            .map(|mut package| {
525                finalize_package_declared_license_references(&mut package);
526                package
527            })
528            .next()
529            .unwrap_or_default()
530    }
531}
532
533pub fn try_parse_rpm_archive_with_license_engine(
534    path: &Path,
535    license_engine: Option<Arc<LicenseDetectionEngine>>,
536) -> Option<ParsePackagesResult> {
537    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
538        return None;
539    }
540
541    if <RpmParser as PackageParser>::is_match(path) {
542        return Some(capture_parser_diagnostics(
543            || self::rpm_parser::extract_rpm_packages(path),
544            stringify!(RpmParser),
545            path,
546            license_engine,
547        ));
548    }
549
550    None
551}
552
553pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
554    try_parse_rpm_archive_with_license_engine(path, None)
555}
556
557#[cfg(feature = "golden-tests")]
558pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
559    self::compiled_binary::try_parse_compiled_bytes(bytes)
560}
561
562#[cfg(feature = "golden-tests")]
563pub fn try_parse_windows_executable_bytes(
564    path: &Path,
565    bytes: &[u8],
566) -> Option<ParsePackagesResult> {
567    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
568}
569
570pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
571    self::rpm_parser::path_looks_like_rpm_archive(path)
572}
573
574pub use self::about::AboutFileParser;
575pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
576pub use self::android::{
577    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
578};
579#[cfg(feature = "golden-tests")]
580pub use self::android::{
581    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
582    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
583    proto_primitive, proto_xml_node,
584};
585pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
586pub use self::autotools::AutotoolsConfigureParser;
587pub use self::bazel::{BazelBuildParser, BazelModuleParser};
588pub use self::bitbake::BitbakeRecipeParser;
589pub use self::bower::BowerJsonParser;
590pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
591pub use self::bun_lock::BunLockParser;
592pub use self::bun_lockb::BunLockbParser;
593pub use self::cargo::CargoParser;
594#[cfg_attr(not(test), allow(unused_imports))]
595pub use self::cargo_lock::CargoLockParser;
596pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
597pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
598pub use self::citation::CitationCffParser;
599pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
600pub use self::composer::{ComposerJsonParser, ComposerLockParser};
601pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
602pub use self::conan_data::ConanDataParser;
603pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
604pub use self::conda_meta_json::CondaMetaJsonParser;
605pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
606pub use self::cpan_dist_ini::CpanDistIniParser;
607pub use self::cpan_makefile_pl::CpanMakefilePlParser;
608pub use self::cran::CranParser;
609pub use self::dart::{PubspecLockParser, PubspecYamlParser};
610pub use self::debian::{
611    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
612    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
613    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
614    DebianMd5sumInPackageParser, DebianOrigTarParser,
615};
616pub use self::deno::DenoParser;
617pub use self::deno_lock::DenoLockParser;
618pub use self::docker::DockerfileParser;
619pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
620pub use self::freebsd::FreebsdCompactManifestParser;
621pub use self::gitmodules::GitmodulesParser;
622pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
623pub use self::go_mod_graph::GoModGraphParser;
624pub use self::gradle::GradleParser;
625pub use self::gradle_lock::GradleLockfileParser;
626pub use self::gradle_module::GradleModuleParser;
627pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
628pub use self::haxe::HaxeParser;
629pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
630pub use self::hex_lock::HexLockParser;
631pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
632pub use self::maven::MavenParser;
633pub use self::meson::MesonParser;
634pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
635pub use self::misc::{
636    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
637    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
638    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
639    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
640    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
641    SharArchiveRecognizer, SquashfsRecognizer,
642};
643pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
644pub use self::npm::NpmParser;
645pub use self::npm_lock::NpmLockParser;
646pub use self::npm_workspace::NpmWorkspaceParser;
647pub use self::nuget::{
648    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
649    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
650    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
651};
652pub use self::opam::OpamParser;
653pub use self::os_release::OsReleaseParser;
654pub use self::pip_inspect_deplock::PipInspectDeplockParser;
655pub use self::pipfile_lock::PipfileLockParser;
656pub use self::pixi::{PixiLockParser, PixiTomlParser};
657pub use self::pnpm_lock::PnpmLockParser;
658pub use self::podfile::PodfileParser;
659pub use self::podfile_lock::PodfileLockParser;
660pub use self::podspec::PodspecParser;
661pub use self::podspec_json::PodspecJsonParser;
662pub use self::poetry_lock::PoetryLockParser;
663pub use self::publiccode::PubliccodeParser;
664pub use self::pylock_toml::PylockTomlParser;
665pub use self::python::PythonParser;
666pub use self::readme::ReadmeParser;
667pub use self::requirements_txt::RequirementsTxtParser;
668#[cfg(feature = "rpm-sqlite")]
669pub use self::rpm_db::RpmSqliteDatabaseParser;
670pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
671pub use self::rpm_license_files::RpmLicenseFilesParser;
672pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
673pub use self::rpm_parser::RpmParser;
674pub use self::rpm_specfile::RpmSpecfileParser;
675pub use self::rpm_yumdb::RpmYumdbParser;
676pub use self::ruby::{
677    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
678};
679pub use self::sbt::SbtParser;
680pub use self::swift_manifest_json::SwiftManifestJsonParser;
681pub use self::swift_resolved::SwiftPackageResolvedParser;
682pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
683pub use self::uv_lock::UvLockParser;
684pub use self::vcpkg::VcpkgManifestParser;
685pub use self::yarn_lock::YarnLockParser;
686pub use self::yarn_pnp::YarnPnpParser;
687
688/// Registers all parsers and recognizers, generating dispatch functions.
689///
690/// Parsers are tried first, then recognizers. This ordering is important because
691/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
692/// more specific parsers if checked first.
693macro_rules! register_package_handlers {
694    (
695        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
696        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
697    ) => {
698        pub fn try_parse_file_with_license_engine(
699            path: &Path,
700            license_engine: Option<Arc<LicenseDetectionEngine>>,
701        ) -> Option<ParsePackagesResult> {
702            $(
703                $(#[$parser_meta])*
704                if <$parser>::is_match(path) {
705                    return Some(capture_parser_diagnostics(
706                        || <$parser>::extract_packages(path),
707                        stringify!($parser),
708                        path,
709                        license_engine.clone(),
710                    ));
711                }
712            )*
713            $(
714                if <$recognizer>::is_match(path) {
715                    return Some(capture_parser_diagnostics(
716                        || <$recognizer>::extract_packages(path),
717                        stringify!($recognizer),
718                        path,
719                        license_engine.clone(),
720                    ));
721                }
722            )*
723            None
724        }
725
726        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
727            try_parse_file_with_license_engine(path, None)
728        }
729
730        // Used by the parser-golden maintenance tool in `xtask`.
731        // Scanner runtime dispatch goes through `try_parse_file()`.
732        #[allow(dead_code)]
733        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
734            match type_name {
735                $(
736                    $(#[$parser_meta])*
737                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
738                )*
739                $(
740                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
741                )*
742                _ => None
743            }
744        }
745
746        // Used by the parser-golden maintenance tool in `xtask` and by
747        // `tests/scanner_integration.rs` to verify parser registration.
748        #[allow(dead_code)]
749        pub fn list_parser_types() -> Vec<&'static str> {
750            vec![
751                $(
752                    $(#[$parser_meta])*
753                    stringify!($parser),
754                )*
755                $(
756                    stringify!($recognizer),
757                )*
758            ]
759        }
760    };
761}
762
763#[cfg(test)]
764mod tests {
765    use std::collections::HashMap;
766
767    use super::{active_parser_license_engine, capture_parser_diagnostics};
768    use crate::license_detection::LicenseDetectionEngine;
769    use crate::models::PackageData;
770    use crate::parsers::license_normalization::{
771        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
772    };
773    use std::path::Path;
774    use std::sync::Arc;
775
776    #[test]
777    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
778        let engine =
779            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
780
781        let result = capture_parser_diagnostics(
782            || {
783                assert!(active_parser_license_engine().is_some());
784                vec![PackageData::default()]
785            },
786            "TestParser",
787            Path::new("testdata/package.json"),
788            Some(engine),
789        );
790
791        assert_eq!(result.packages.len(), 1);
792        assert!(active_parser_license_engine().is_none());
793    }
794
795    #[test]
796    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
797        let engine =
798            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
799        clear_last_parser_license_engine_ptr();
800
801        let result = capture_parser_diagnostics(
802            || {
803                vec![PackageData {
804                    declared_license_expression: Some("mit".to_string()),
805                    declared_license_expression_spdx: Some("MIT".to_string()),
806                    extracted_license_statement: Some("MIT".to_string()),
807                    extra_data: Some(HashMap::from([(
808                        "license_file".to_string(),
809                        serde_json::Value::String("LICENSE".to_string()),
810                    )])),
811                    ..Default::default()
812                }]
813            },
814            "TestParser",
815            Path::new("testdata/package.json"),
816            Some(Arc::clone(&engine)),
817        );
818
819        assert_eq!(result.packages.len(), 1);
820        assert_eq!(
821            last_parser_license_engine_ptr(),
822            Some(Arc::as_ptr(&engine) as usize)
823        );
824        assert_eq!(
825            result.packages[0].license_detections[0].matches[0]
826                .referenced_filenames
827                .as_ref(),
828            Some(&vec!["LICENSE".to_string()])
829        );
830        assert!(active_parser_license_engine().is_none());
831    }
832}
833
834register_package_handlers! {
835    parsers: [
836        AboutFileParser,
837        AndroidAabParser,
838        AndroidApkParser,
839        AndroidManifestParser,
840        AndroidSoongMetadataParser,
841        AlpineApkParser,
842        AlpineApkbuildParser,
843        AlpineInstalledParser,
844        ArchPkginfoParser,
845        ArchSrcinfoParser,
846        AutotoolsConfigureParser,
847        BazelBuildParser,
848        BazelModuleParser,
849        BitbakeRecipeParser,
850        BowerJsonParser,
851        BunLockParser,
852        BunLockbParser,
853        BuckBuildParser,
854        BuckMetadataBzlParser,
855        CargoLockParser,
856        CargoParser,
857        CarthageCartfileParser,
858        CarthageCartfileResolvedParser,
859        ChefMetadataJsonParser,
860        ChefMetadataRbParser,
861        CitationCffParser,
862        ClojureDepsEdnParser,
863        ClojureProjectCljParser,
864        ComposerJsonParser,
865        ComposerLockParser,
866        ConanDataParser,
867        ConanFilePyParser,
868        ConanfileTxtParser,
869        ConanLockParser,
870        CondaEnvironmentYmlParser,
871        CondaMetaJsonParser,
872        CondaMetaYamlParser,
873        CpanDistIniParser,
874        CpanMakefilePlParser,
875        CpanManifestParser,
876        CpanMetaJsonParser,
877        CpanMetaYmlParser,
878        CranParser,
879        DebianControlInExtractedDebParser,
880        DebianControlParser,
881        DebianCopyrightParser,
882        DebianDebianTarParser,
883        DebianDebParser,
884        DebianDistrolessInstalledParser,
885        DebianDscParser,
886        DebianInstalledListParser,
887        DebianInstalledMd5sumsParser,
888        DebianInstalledParser,
889        DebianMd5sumInPackageParser,
890        DebianOrigTarParser,
891        DenoParser,
892        DenoLockParser,
893        DockerfileParser,
894        ErlangAppSrcParser,
895        RebarConfigParser,
896        RebarLockParser,
897        FreebsdCompactManifestParser,
898        GemArchiveParser,
899        GemfileLockParser,
900        GemfileParser,
901        GemMetadataExtractedParser,
902        GemspecParser,
903        GitmodulesParser,
904        GodepsParser,
905        GoModParser,
906        GoModGraphParser,
907        GoSumParser,
908        GoWorkParser,
909        GradleLockfileParser,
910        GradleParser,
911        GradleModuleParser,
912        HackageCabalParser,
913        HackageCabalProjectParser,
914        HackageStackYamlParser,
915        HelmChartYamlParser,
916        HelmChartLockParser,
917        HaxeParser,
918        HexLockParser,
919        JuliaManifestTomlParser,
920        JuliaProjectTomlParser,
921        MavenParser,
922        MesonParser,
923        MicrosoftUpdateManifestParser,
924        NixDefaultParser,
925        NixFlakeLockParser,
926        NixFlakeParser,
927        NpmLockParser,
928        NpmParser,
929        NpmWorkspaceParser,
930        DotNetDepsJsonParser,
931        CentralPackageManagementPropsParser,
932        DirectoryBuildPropsParser,
933        NupkgParser,
934        NuspecParser,
935        PackageReferenceProjectParser,
936        OpamParser,
937        OsReleaseParser,
938        PackagesConfigParser,
939        PackagesLockParser,
940        ProjectJsonParser,
941        ProjectLockJsonParser,
942        PipfileLockParser,
943        PipInspectDeplockParser,
944        PixiTomlParser,
945        PixiLockParser,
946        PnpmLockParser,
947        PodfileLockParser,
948        PodfileParser,
949        PodspecJsonParser,
950        PodspecParser,
951        PoetryLockParser,
952        PubliccodeParser,
953        PylockTomlParser,
954        PubspecLockParser,
955        PubspecYamlParser,
956        PythonParser,
957        UvLockParser,
958        VcpkgManifestParser,
959        ReadmeParser,
960        RequirementsTxtParser,
961        RpmBdbDatabaseParser,
962        RpmLicenseFilesParser,
963        RpmMarinerManifestParser,
964        RpmNdbDatabaseParser,
965        RpmParser,
966        RpmSpecfileParser,
967        #[cfg(feature = "rpm-sqlite")]
968        RpmSqliteDatabaseParser,
969        RpmYumdbParser,
970        SbtParser,
971        SwiftManifestJsonParser,
972        SwiftPackageResolvedParser,
973        SwiftShowDependenciesParser,
974        YarnLockParser,
975        YarnPnpParser,
976    ],
977    recognizers: [
978        AndroidLibraryRecognizer,
979        AppleDmgRecognizer,
980        Axis2MarRecognizer,
981        Axis2ModuleXmlRecognizer,
982        CabArchiveRecognizer,
983        ChromeCrxRecognizer,
984        InstallShieldRecognizer,
985        IosIpaRecognizer,
986        IsoImageRecognizer,
987        IvyXmlRecognizer,
988        JavaEarAppXmlRecognizer,
989        JavaEarRecognizer,
990        JavaJarRecognizer,
991        JavaWarRecognizer,
992        JavaWarWebXmlRecognizer,
993        JBossSarRecognizer,
994        JBossServiceXmlRecognizer,
995        MeteorPackageRecognizer,
996        MozillaXpiRecognizer,
997        NsisRecognizer,
998        SharArchiveRecognizer,
999        SquashfsRecognizer,
1000    ],
1001}
1002
1003#[cfg(test)]
1004mod panic_isolation_tests {
1005    use super::*;
1006    use crate::models::DiagnosticSeverity;
1007
1008    #[test]
1009    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
1010        let path = Path::new("fixtures/panic-package.json");
1011        let result = capture_parser_diagnostics(
1012            || -> Vec<PackageData> { panic!("panic boom") },
1013            "PanicParser",
1014            path,
1015            None,
1016        );
1017
1018        assert!(result.packages.is_empty());
1019        assert_eq!(result.scan_errors.len(), 1);
1020        assert_eq!(result.scan_diagnostics.len(), 1);
1021        assert_eq!(
1022            result.scan_diagnostics[0].severity,
1023            DiagnosticSeverity::Error
1024        );
1025        assert!(result.scan_errors[0].contains("PanicParser"));
1026        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1027        assert!(result.scan_errors[0].contains("panic boom"));
1028    }
1029
1030    #[test]
1031    fn capture_parser_diagnostics_recovers_after_panic() {
1032        let panic_path = Path::new("fixtures/panic-package.json");
1033        let _ = capture_parser_diagnostics(
1034            || -> Vec<PackageData> { panic!("panic boom") },
1035            "PanicParser",
1036            panic_path,
1037            None,
1038        );
1039
1040        let ok_path = Path::new("fixtures/recovered-package.json");
1041        let result = capture_parser_diagnostics(
1042            || {
1043                crate::parser_warn!("recoverable parser warning");
1044                vec![PackageData {
1045                    package_type: Some(PackageType::Npm),
1046                    ..Default::default()
1047                }]
1048            },
1049            "RecoveringParser",
1050            ok_path,
1051            None,
1052        );
1053
1054        assert_eq!(result.packages.len(), 1);
1055        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1056        assert_eq!(result.scan_diagnostics.len(), 1);
1057        assert_eq!(
1058            result.scan_diagnostics[0].severity,
1059            DiagnosticSeverity::Warning
1060        );
1061    }
1062}