Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185mod license_normalization;
186mod maven;
187#[cfg(test)]
188mod maven_scan_test;
189#[cfg(test)]
190mod maven_test;
191mod meson;
192#[cfg(test)]
193mod meson_scan_test;
194#[cfg(test)]
195mod meson_test;
196pub mod metadata;
197mod microsoft_update_manifest;
198#[cfg(test)]
199mod microsoft_update_manifest_test;
200mod misc;
201#[cfg(test)]
202mod misc_test;
203mod nix;
204#[cfg(test)]
205mod nix_scan_test;
206#[cfg(test)]
207mod nix_test;
208mod npm;
209mod npm_lock;
210#[cfg(test)]
211mod npm_lock_test;
212#[cfg(test)]
213mod npm_scan_test;
214#[cfg(test)]
215mod npm_test;
216mod npm_workspace;
217#[cfg(test)]
218mod npm_workspace_test;
219mod nuget;
220mod opam;
221#[cfg(test)]
222mod opam_scan_test;
223mod os_release;
224#[cfg(test)]
225mod os_release_test;
226#[cfg(test)]
227mod osgi_test;
228mod pep508;
229mod pip_inspect_deplock;
230#[cfg(test)]
231mod pip_inspect_deplock_test;
232mod pipfile_lock;
233#[cfg(test)]
234mod pipfile_lock_test;
235mod pixi;
236#[cfg(test)]
237mod pixi_scan_test;
238#[cfg(test)]
239mod pixi_test;
240mod pnpm_lock;
241#[cfg(test)]
242mod pnpm_lock_test;
243mod podfile;
244mod podfile_lock;
245#[cfg(test)]
246mod podfile_lock_test;
247mod podspec;
248mod podspec_json;
249#[cfg(test)]
250mod podspec_json_test;
251mod poetry_lock;
252#[cfg(test)]
253mod poetry_lock_test;
254mod publiccode;
255#[cfg(test)]
256mod publiccode_test;
257mod pylock_toml;
258#[cfg(test)]
259mod pylock_toml_test;
260mod python;
261mod readme;
262#[cfg(test)]
263mod readme_test;
264mod requirements_txt;
265#[cfg(test)]
266mod requirements_txt_test;
267pub(crate) mod rfc822;
268mod rpm_db;
269mod rpm_db_native;
270#[cfg(test)]
271mod rpm_db_scan_test;
272mod rpm_license_files;
273#[cfg(test)]
274mod rpm_license_files_test;
275mod rpm_mariner_manifest;
276#[cfg(test)]
277mod rpm_mariner_manifest_test;
278mod rpm_parser;
279#[cfg(test)]
280mod rpm_scan_test;
281mod rpm_specfile;
282#[cfg(test)]
283mod rpm_specfile_test;
284mod rpm_yumdb;
285mod ruby;
286#[cfg(test)]
287mod ruby_scan_test;
288#[cfg(test)]
289mod ruby_test;
290mod sbt;
291#[cfg(test)]
292mod sbt_test;
293#[cfg(test)]
294mod scan_test_utils;
295mod swift_manifest_json;
296#[cfg(test)]
297mod swift_manifest_json_test;
298mod swift_resolved;
299#[cfg(test)]
300mod swift_resolved_test;
301#[cfg(test)]
302mod swift_scan_test;
303mod swift_show_dependencies;
304#[cfg(test)]
305mod swift_show_dependencies_test;
306pub mod utils;
307mod uv_lock;
308#[cfg(test)]
309mod uv_lock_test;
310mod vcpkg;
311#[cfg(test)]
312mod vcpkg_scan_test;
313#[cfg(test)]
314mod vcpkg_test;
315pub(crate) mod windows_executable;
316#[cfg(test)]
317mod windows_executable_golden_test;
318mod yarn_lock;
319#[cfg(test)]
320mod yarn_lock_test;
321mod yarn_pnp;
322#[cfg(test)]
323mod yarn_pnp_test;
324
325use std::cell::RefCell;
326use std::panic::{AssertUnwindSafe, catch_unwind};
327use std::path::Path;
328use std::sync::Arc;
329
330use crate::license_detection::LicenseDetectionEngine;
331use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
332use crate::parsers::license_normalization::finalize_package_declared_license_references;
333use crate::parsers::utils::MAX_ITERATION_COUNT;
334
335thread_local! {
336    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
337    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
338}
339
340#[derive(Debug, Default)]
341pub struct ParsePackagesResult {
342    pub packages: Vec<PackageData>,
343    pub scan_diagnostics: Vec<ScanDiagnostic>,
344    pub scan_errors: Vec<String>,
345}
346
347fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
348    if let Some(message) = payload.downcast_ref::<&str>() {
349        (*message).to_string()
350    } else if let Some(message) = payload.downcast_ref::<String>() {
351        message.clone()
352    } else {
353        "unknown panic payload".to_string()
354    }
355}
356
357pub(crate) fn capture_parser_diagnostics<F>(
358    extract: F,
359    handler_name: &str,
360    path: &Path,
361    license_engine: Option<Arc<LicenseDetectionEngine>>,
362) -> ParsePackagesResult
363where
364    F: FnOnce() -> Vec<PackageData>,
365{
366    PARSER_DIAGNOSTIC_STACK.with(|stack| {
367        stack.borrow_mut().push(Vec::new());
368    });
369    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
370        stack.borrow_mut().push(license_engine);
371    });
372
373    let extract_result = catch_unwind(AssertUnwindSafe(|| {
374        extract()
375            .into_iter()
376            .map(|mut package| {
377                finalize_package_declared_license_references(&mut package);
378                package
379            })
380            .take(MAX_ITERATION_COUNT)
381            .collect::<Vec<_>>()
382    }));
383    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
384        stack.borrow_mut().pop();
385    });
386    let mut scan_diagnostics =
387        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
388
389    match extract_result {
390        Ok(packages) => ParsePackagesResult {
391            packages,
392            scan_errors: scan_diagnostics
393                .iter()
394                .map(|diagnostic| diagnostic.message.clone())
395                .collect(),
396            scan_diagnostics,
397        },
398        Err(payload) => {
399            scan_diagnostics.push(ScanDiagnostic::error(format!(
400                "{} panicked while parsing {}: {}",
401                handler_name,
402                path.display(),
403                panic_payload_to_string(payload.as_ref())
404            )));
405            ParsePackagesResult {
406                packages: Vec::new(),
407                scan_errors: scan_diagnostics
408                    .iter()
409                    .map(|diagnostic| diagnostic.message.clone())
410                    .collect(),
411                scan_diagnostics,
412            }
413        }
414    }
415}
416
417pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
418    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
419}
420
421pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
422    PARSER_DIAGNOSTIC_STACK.with(|stack| {
423        let mut stack = stack.borrow_mut();
424        let Some(active) = stack.last_mut() else {
425            return false;
426        };
427        active.push(ScanDiagnostic { severity, message });
428        true
429    })
430}
431
432#[macro_export]
433macro_rules! parser_warn {
434    ($($arg:tt)*) => {{
435        let message = format!($($arg)*);
436        if !$crate::parsers::record_parser_diagnostic(
437            message.clone(),
438            $crate::models::DiagnosticSeverity::Warning,
439        ) {
440            log::warn!("{message}");
441        }
442    }};
443}
444
445/// Package parser trait for extracting metadata from package manifest files.
446///
447/// Each parser implementation handles a specific package manager/ecosystem
448/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
449/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
450///
451/// # Implementation Guide
452///
453/// Implementors must provide:
454/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
455/// - `is_match()`: Returns true if the given file path matches this parser's expected format
456/// - `extract_packages()`: Parses the file and returns all extracted package metadata
457///
458/// # Error Handling
459///
460/// Parsers should handle errors gracefully by returning default/empty `PackageData`
461/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
462/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
463/// CI output and serialized scan results stay aligned.
464/// This allows the scan to continue processing other files even when individual
465/// files fail to parse.
466///
467/// # Example
468///
469/// ```ignore
470/// use provenant::models::{PackageData, PackageType};
471/// use provenant::parsers::PackageParser;
472/// use std::path::Path;
473///
474/// pub struct MyParser;
475///
476/// impl PackageParser for MyParser {
477///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
478///
479///     fn is_match(path: &Path) -> bool {
480///         path.file_name().is_some_and(|name| name == "package.json")
481///     }
482///
483///     fn extract_packages(path: &Path) -> Vec<PackageData> {
484///         vec![PackageData::default()]
485///     }
486/// }
487/// ```
488pub trait PackageParser {
489    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
490    const PACKAGE_TYPE: PackageType;
491
492    /// Extracts all packages from the given file path.
493    ///
494    /// Returns a vector of `PackageData` structures containing all extracted metadata
495    /// including name, version, dependencies, licenses, etc. Most parsers return a
496    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
497    /// can contain multiple packages in a single file.
498    ///
499    /// On parse errors, returns a vector with a default `PackageData` with minimal or
500    /// no fields populated.
501    fn extract_packages(path: &Path) -> Vec<PackageData>;
502
503    /// Checks if the given file path matches this parser's expected format.
504    ///
505    /// Returns true if the file should be handled by this parser based on filename,
506    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
507    fn is_match(path: &Path) -> bool;
508
509    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
510    /// or a default [`PackageData`] if the file contains no packages.
511    fn extract_first_package(path: &Path) -> PackageData {
512        Self::extract_packages(path)
513            .into_iter()
514            .map(|mut package| {
515                finalize_package_declared_license_references(&mut package);
516                package
517            })
518            .next()
519            .unwrap_or_default()
520    }
521}
522
523pub fn try_parse_rpm_archive_with_license_engine(
524    path: &Path,
525    license_engine: Option<Arc<LicenseDetectionEngine>>,
526) -> Option<ParsePackagesResult> {
527    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
528        return None;
529    }
530
531    if <RpmParser as PackageParser>::is_match(path) {
532        return Some(capture_parser_diagnostics(
533            || self::rpm_parser::extract_rpm_packages(path),
534            stringify!(RpmParser),
535            path,
536            license_engine,
537        ));
538    }
539
540    None
541}
542
543pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
544    try_parse_rpm_archive_with_license_engine(path, None)
545}
546
547#[cfg(feature = "golden-tests")]
548pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
549    self::compiled_binary::try_parse_compiled_bytes(bytes)
550}
551
552#[cfg(feature = "golden-tests")]
553pub fn try_parse_windows_executable_bytes(
554    path: &Path,
555    bytes: &[u8],
556) -> Option<ParsePackagesResult> {
557    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
558}
559
560pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
561    self::rpm_parser::path_looks_like_rpm_archive(path)
562}
563
564pub use self::about::AboutFileParser;
565pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
566pub use self::android::{
567    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
568};
569#[cfg(feature = "golden-tests")]
570pub use self::android::{
571    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
572    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
573    proto_primitive, proto_xml_node,
574};
575pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
576pub use self::autotools::AutotoolsConfigureParser;
577pub use self::bazel::{BazelBuildParser, BazelModuleParser};
578pub use self::bitbake::BitbakeRecipeParser;
579pub use self::bower::BowerJsonParser;
580pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
581pub use self::bun_lock::BunLockParser;
582pub use self::bun_lockb::BunLockbParser;
583pub use self::cargo::CargoParser;
584#[cfg_attr(not(test), allow(unused_imports))]
585pub use self::cargo_lock::CargoLockParser;
586pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
587pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
588pub use self::citation::CitationCffParser;
589pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
590pub use self::composer::{ComposerJsonParser, ComposerLockParser};
591pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
592pub use self::conan_data::ConanDataParser;
593pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
594pub use self::conda_meta_json::CondaMetaJsonParser;
595pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
596pub use self::cpan_dist_ini::CpanDistIniParser;
597pub use self::cpan_makefile_pl::CpanMakefilePlParser;
598pub use self::cran::CranParser;
599pub use self::dart::{PubspecLockParser, PubspecYamlParser};
600pub use self::debian::{
601    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
602    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
603    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
604    DebianMd5sumInPackageParser, DebianOrigTarParser,
605};
606pub use self::deno::DenoParser;
607pub use self::deno_lock::DenoLockParser;
608pub use self::docker::DockerfileParser;
609pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
610pub use self::freebsd::FreebsdCompactManifestParser;
611pub use self::gitmodules::GitmodulesParser;
612pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
613pub use self::go_mod_graph::GoModGraphParser;
614pub use self::gradle::GradleParser;
615pub use self::gradle_lock::GradleLockfileParser;
616pub use self::gradle_module::GradleModuleParser;
617pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
618pub use self::haxe::HaxeParser;
619pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
620pub use self::hex_lock::HexLockParser;
621pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
622pub use self::maven::MavenParser;
623pub use self::meson::MesonParser;
624pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
625pub use self::misc::{
626    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
627    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
628    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
629    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
630    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
631    SharArchiveRecognizer, SquashfsRecognizer,
632};
633pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
634pub use self::npm::NpmParser;
635pub use self::npm_lock::NpmLockParser;
636pub use self::npm_workspace::NpmWorkspaceParser;
637pub use self::nuget::{
638    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
639    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
640    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
641};
642pub use self::opam::OpamParser;
643pub use self::os_release::OsReleaseParser;
644pub use self::pip_inspect_deplock::PipInspectDeplockParser;
645pub use self::pipfile_lock::PipfileLockParser;
646pub use self::pixi::{PixiLockParser, PixiTomlParser};
647pub use self::pnpm_lock::PnpmLockParser;
648pub use self::podfile::PodfileParser;
649pub use self::podfile_lock::PodfileLockParser;
650pub use self::podspec::PodspecParser;
651pub use self::podspec_json::PodspecJsonParser;
652pub use self::poetry_lock::PoetryLockParser;
653pub use self::publiccode::PubliccodeParser;
654pub use self::pylock_toml::PylockTomlParser;
655pub use self::python::PythonParser;
656pub use self::readme::ReadmeParser;
657pub use self::requirements_txt::RequirementsTxtParser;
658#[cfg(feature = "rpm-sqlite")]
659pub use self::rpm_db::RpmSqliteDatabaseParser;
660pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
661pub use self::rpm_license_files::RpmLicenseFilesParser;
662pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
663pub use self::rpm_parser::RpmParser;
664pub use self::rpm_specfile::RpmSpecfileParser;
665pub use self::rpm_yumdb::RpmYumdbParser;
666pub use self::ruby::{
667    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
668};
669pub use self::sbt::SbtParser;
670pub use self::swift_manifest_json::SwiftManifestJsonParser;
671pub use self::swift_resolved::SwiftPackageResolvedParser;
672pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
673pub use self::uv_lock::UvLockParser;
674pub use self::vcpkg::VcpkgManifestParser;
675pub use self::yarn_lock::YarnLockParser;
676pub use self::yarn_pnp::YarnPnpParser;
677
678/// Registers all parsers and recognizers, generating dispatch functions.
679///
680/// Parsers are tried first, then recognizers. This ordering is important because
681/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
682/// more specific parsers if checked first.
683macro_rules! register_package_handlers {
684    (
685        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
686        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
687    ) => {
688        pub fn try_parse_file_with_license_engine(
689            path: &Path,
690            license_engine: Option<Arc<LicenseDetectionEngine>>,
691        ) -> Option<ParsePackagesResult> {
692            $(
693                $(#[$parser_meta])*
694                if <$parser>::is_match(path) {
695                    return Some(capture_parser_diagnostics(
696                        || <$parser>::extract_packages(path),
697                        stringify!($parser),
698                        path,
699                        license_engine.clone(),
700                    ));
701                }
702            )*
703            $(
704                if <$recognizer>::is_match(path) {
705                    return Some(capture_parser_diagnostics(
706                        || <$recognizer>::extract_packages(path),
707                        stringify!($recognizer),
708                        path,
709                        license_engine.clone(),
710                    ));
711                }
712            )*
713            None
714        }
715
716        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
717            try_parse_file_with_license_engine(path, None)
718        }
719
720        // Used by the parser-golden maintenance tool in `xtask`.
721        // Scanner runtime dispatch goes through `try_parse_file()`.
722        #[allow(dead_code)]
723        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
724            match type_name {
725                $(
726                    $(#[$parser_meta])*
727                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
728                )*
729                $(
730                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
731                )*
732                _ => None
733            }
734        }
735
736        // Used by the parser-golden maintenance tool in `xtask` and by
737        // `tests/scanner_integration.rs` to verify parser registration.
738        #[allow(dead_code)]
739        pub fn list_parser_types() -> Vec<&'static str> {
740            vec![
741                $(
742                    $(#[$parser_meta])*
743                    stringify!($parser),
744                )*
745                $(
746                    stringify!($recognizer),
747                )*
748            ]
749        }
750    };
751}
752
753#[cfg(test)]
754mod tests {
755    use std::collections::HashMap;
756
757    use super::{active_parser_license_engine, capture_parser_diagnostics};
758    use crate::license_detection::LicenseDetectionEngine;
759    use crate::models::PackageData;
760    use crate::parsers::license_normalization::{
761        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
762    };
763    use std::path::Path;
764    use std::sync::Arc;
765
766    #[test]
767    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
768        let engine =
769            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
770
771        let result = capture_parser_diagnostics(
772            || {
773                assert!(active_parser_license_engine().is_some());
774                vec![PackageData::default()]
775            },
776            "TestParser",
777            Path::new("testdata/package.json"),
778            Some(engine),
779        );
780
781        assert_eq!(result.packages.len(), 1);
782        assert!(active_parser_license_engine().is_none());
783    }
784
785    #[test]
786    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
787        let engine =
788            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
789        clear_last_parser_license_engine_ptr();
790
791        let result = capture_parser_diagnostics(
792            || {
793                vec![PackageData {
794                    declared_license_expression: Some("mit".to_string()),
795                    declared_license_expression_spdx: Some("MIT".to_string()),
796                    extracted_license_statement: Some("MIT".to_string()),
797                    extra_data: Some(HashMap::from([(
798                        "license_file".to_string(),
799                        serde_json::Value::String("LICENSE".to_string()),
800                    )])),
801                    ..Default::default()
802                }]
803            },
804            "TestParser",
805            Path::new("testdata/package.json"),
806            Some(Arc::clone(&engine)),
807        );
808
809        assert_eq!(result.packages.len(), 1);
810        assert_eq!(
811            last_parser_license_engine_ptr(),
812            Some(Arc::as_ptr(&engine) as usize)
813        );
814        assert_eq!(
815            result.packages[0].license_detections[0].matches[0]
816                .referenced_filenames
817                .as_ref(),
818            Some(&vec!["LICENSE".to_string()])
819        );
820        assert!(active_parser_license_engine().is_none());
821    }
822}
823
824register_package_handlers! {
825    parsers: [
826        AboutFileParser,
827        AndroidAabParser,
828        AndroidApkParser,
829        AndroidManifestParser,
830        AndroidSoongMetadataParser,
831        AlpineApkParser,
832        AlpineApkbuildParser,
833        AlpineInstalledParser,
834        ArchPkginfoParser,
835        ArchSrcinfoParser,
836        AutotoolsConfigureParser,
837        BazelBuildParser,
838        BazelModuleParser,
839        BitbakeRecipeParser,
840        BowerJsonParser,
841        BunLockParser,
842        BunLockbParser,
843        BuckBuildParser,
844        BuckMetadataBzlParser,
845        CargoLockParser,
846        CargoParser,
847        CarthageCartfileParser,
848        CarthageCartfileResolvedParser,
849        ChefMetadataJsonParser,
850        ChefMetadataRbParser,
851        CitationCffParser,
852        ClojureDepsEdnParser,
853        ClojureProjectCljParser,
854        ComposerJsonParser,
855        ComposerLockParser,
856        ConanDataParser,
857        ConanFilePyParser,
858        ConanfileTxtParser,
859        ConanLockParser,
860        CondaEnvironmentYmlParser,
861        CondaMetaJsonParser,
862        CondaMetaYamlParser,
863        CpanDistIniParser,
864        CpanMakefilePlParser,
865        CpanManifestParser,
866        CpanMetaJsonParser,
867        CpanMetaYmlParser,
868        CranParser,
869        DebianControlInExtractedDebParser,
870        DebianControlParser,
871        DebianCopyrightParser,
872        DebianDebianTarParser,
873        DebianDebParser,
874        DebianDistrolessInstalledParser,
875        DebianDscParser,
876        DebianInstalledListParser,
877        DebianInstalledMd5sumsParser,
878        DebianInstalledParser,
879        DebianMd5sumInPackageParser,
880        DebianOrigTarParser,
881        DenoParser,
882        DenoLockParser,
883        DockerfileParser,
884        ErlangAppSrcParser,
885        RebarConfigParser,
886        RebarLockParser,
887        FreebsdCompactManifestParser,
888        GemArchiveParser,
889        GemfileLockParser,
890        GemfileParser,
891        GemMetadataExtractedParser,
892        GemspecParser,
893        GitmodulesParser,
894        GodepsParser,
895        GoModParser,
896        GoModGraphParser,
897        GoSumParser,
898        GoWorkParser,
899        GradleLockfileParser,
900        GradleParser,
901        GradleModuleParser,
902        HackageCabalParser,
903        HackageCabalProjectParser,
904        HackageStackYamlParser,
905        HelmChartYamlParser,
906        HelmChartLockParser,
907        HaxeParser,
908        HexLockParser,
909        JuliaManifestTomlParser,
910        JuliaProjectTomlParser,
911        MavenParser,
912        MesonParser,
913        MicrosoftUpdateManifestParser,
914        NixDefaultParser,
915        NixFlakeLockParser,
916        NixFlakeParser,
917        NpmLockParser,
918        NpmParser,
919        NpmWorkspaceParser,
920        DotNetDepsJsonParser,
921        CentralPackageManagementPropsParser,
922        DirectoryBuildPropsParser,
923        NupkgParser,
924        NuspecParser,
925        PackageReferenceProjectParser,
926        OpamParser,
927        OsReleaseParser,
928        PackagesConfigParser,
929        PackagesLockParser,
930        ProjectJsonParser,
931        ProjectLockJsonParser,
932        PipfileLockParser,
933        PipInspectDeplockParser,
934        PixiTomlParser,
935        PixiLockParser,
936        PnpmLockParser,
937        PodfileLockParser,
938        PodfileParser,
939        PodspecJsonParser,
940        PodspecParser,
941        PoetryLockParser,
942        PubliccodeParser,
943        PylockTomlParser,
944        PubspecLockParser,
945        PubspecYamlParser,
946        PythonParser,
947        UvLockParser,
948        VcpkgManifestParser,
949        ReadmeParser,
950        RequirementsTxtParser,
951        RpmBdbDatabaseParser,
952        RpmLicenseFilesParser,
953        RpmMarinerManifestParser,
954        RpmNdbDatabaseParser,
955        RpmParser,
956        RpmSpecfileParser,
957        #[cfg(feature = "rpm-sqlite")]
958        RpmSqliteDatabaseParser,
959        RpmYumdbParser,
960        SbtParser,
961        SwiftManifestJsonParser,
962        SwiftPackageResolvedParser,
963        SwiftShowDependenciesParser,
964        YarnLockParser,
965        YarnPnpParser,
966    ],
967    recognizers: [
968        AndroidLibraryRecognizer,
969        AppleDmgRecognizer,
970        Axis2MarRecognizer,
971        Axis2ModuleXmlRecognizer,
972        CabArchiveRecognizer,
973        ChromeCrxRecognizer,
974        InstallShieldRecognizer,
975        IosIpaRecognizer,
976        IsoImageRecognizer,
977        IvyXmlRecognizer,
978        JavaEarAppXmlRecognizer,
979        JavaEarRecognizer,
980        JavaJarRecognizer,
981        JavaWarRecognizer,
982        JavaWarWebXmlRecognizer,
983        JBossSarRecognizer,
984        JBossServiceXmlRecognizer,
985        MeteorPackageRecognizer,
986        MozillaXpiRecognizer,
987        NsisRecognizer,
988        SharArchiveRecognizer,
989        SquashfsRecognizer,
990    ],
991}
992
993#[cfg(test)]
994mod panic_isolation_tests {
995    use super::*;
996    use crate::models::DiagnosticSeverity;
997
998    #[test]
999    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
1000        let path = Path::new("fixtures/panic-package.json");
1001        let result = capture_parser_diagnostics(
1002            || -> Vec<PackageData> { panic!("panic boom") },
1003            "PanicParser",
1004            path,
1005            None,
1006        );
1007
1008        assert!(result.packages.is_empty());
1009        assert_eq!(result.scan_errors.len(), 1);
1010        assert_eq!(result.scan_diagnostics.len(), 1);
1011        assert_eq!(
1012            result.scan_diagnostics[0].severity,
1013            DiagnosticSeverity::Error
1014        );
1015        assert!(result.scan_errors[0].contains("PanicParser"));
1016        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1017        assert!(result.scan_errors[0].contains("panic boom"));
1018    }
1019
1020    #[test]
1021    fn capture_parser_diagnostics_recovers_after_panic() {
1022        let panic_path = Path::new("fixtures/panic-package.json");
1023        let _ = capture_parser_diagnostics(
1024            || -> Vec<PackageData> { panic!("panic boom") },
1025            "PanicParser",
1026            panic_path,
1027            None,
1028        );
1029
1030        let ok_path = Path::new("fixtures/recovered-package.json");
1031        let result = capture_parser_diagnostics(
1032            || {
1033                crate::parser_warn!("recoverable parser warning");
1034                vec![PackageData {
1035                    package_type: Some(PackageType::Npm),
1036                    ..Default::default()
1037                }]
1038            },
1039            "RecoveringParser",
1040            ok_path,
1041            None,
1042        );
1043
1044        assert_eq!(result.packages.len(), 1);
1045        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1046        assert_eq!(result.scan_diagnostics.len(), 1);
1047        assert_eq!(
1048            result.scan_diagnostics[0].severity,
1049            DiagnosticSeverity::Warning
1050        );
1051    }
1052}