Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185pub(crate) mod license_normalization;
186mod maven;
187mod meson;
188#[cfg(test)]
189mod meson_scan_test;
190#[cfg(test)]
191mod meson_test;
192pub mod metadata;
193mod microsoft_update_manifest;
194#[cfg(test)]
195mod microsoft_update_manifest_test;
196mod misc;
197#[cfg(test)]
198mod misc_test;
199mod nix;
200#[cfg(test)]
201mod nix_scan_test;
202#[cfg(test)]
203mod nix_test;
204mod npm;
205mod npm_lock;
206#[cfg(test)]
207mod npm_lock_test;
208#[cfg(test)]
209mod npm_scan_test;
210#[cfg(test)]
211mod npm_test;
212mod npm_workspace;
213#[cfg(test)]
214mod npm_workspace_test;
215mod nuget;
216mod opam;
217#[cfg(test)]
218mod opam_scan_test;
219mod os_release;
220#[cfg(test)]
221mod os_release_test;
222mod pep508;
223mod pip_inspect_deplock;
224#[cfg(test)]
225mod pip_inspect_deplock_test;
226mod pipfile_lock;
227#[cfg(test)]
228mod pipfile_lock_test;
229mod pixi;
230#[cfg(test)]
231mod pixi_scan_test;
232#[cfg(test)]
233mod pixi_test;
234mod pnpm_lock;
235#[cfg(test)]
236mod pnpm_lock_test;
237mod podfile;
238mod podfile_lock;
239#[cfg(test)]
240mod podfile_lock_test;
241mod podspec;
242mod podspec_json;
243#[cfg(test)]
244mod podspec_json_test;
245mod poetry_lock;
246#[cfg(test)]
247mod poetry_lock_test;
248mod publiccode;
249#[cfg(test)]
250mod publiccode_test;
251mod pylock_toml;
252#[cfg(test)]
253mod pylock_toml_test;
254mod python;
255mod readme;
256#[cfg(test)]
257mod readme_test;
258mod requirements_txt;
259#[cfg(test)]
260mod requirements_txt_test;
261pub(crate) mod rfc822;
262mod rpm_db;
263mod rpm_db_native;
264#[cfg(test)]
265mod rpm_db_scan_test;
266mod rpm_license_files;
267#[cfg(test)]
268mod rpm_license_files_test;
269mod rpm_mariner_manifest;
270#[cfg(test)]
271mod rpm_mariner_manifest_test;
272mod rpm_parser;
273#[cfg(test)]
274mod rpm_scan_test;
275mod rpm_specfile;
276#[cfg(test)]
277mod rpm_specfile_test;
278mod rpm_yumdb;
279mod ruby;
280#[cfg(test)]
281mod ruby_scan_test;
282#[cfg(test)]
283mod ruby_test;
284mod sbt;
285#[cfg(test)]
286mod sbt_test;
287#[cfg(test)]
288mod scan_test_utils;
289mod swift_manifest_json;
290#[cfg(test)]
291mod swift_manifest_json_test;
292mod swift_resolved;
293#[cfg(test)]
294mod swift_resolved_test;
295#[cfg(test)]
296mod swift_scan_test;
297mod swift_show_dependencies;
298#[cfg(test)]
299mod swift_show_dependencies_test;
300pub mod utils;
301mod uv_lock;
302#[cfg(test)]
303mod uv_lock_test;
304mod vcpkg;
305#[cfg(test)]
306mod vcpkg_scan_test;
307#[cfg(test)]
308mod vcpkg_test;
309pub(crate) mod windows_executable;
310#[cfg(test)]
311mod windows_executable_golden_test;
312mod yarn_lock;
313#[cfg(test)]
314mod yarn_lock_test;
315mod yarn_pnp;
316#[cfg(test)]
317mod yarn_pnp_test;
318
319use std::cell::RefCell;
320use std::panic::{AssertUnwindSafe, catch_unwind};
321use std::path::Path;
322use std::sync::Arc;
323
324use crate::license_detection::LicenseDetectionEngine;
325use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
326use crate::parsers::license_normalization::finalize_package_declared_license_references;
327use crate::parsers::utils::MAX_ITERATION_COUNT;
328
329thread_local! {
330    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
331    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
332}
333
334#[derive(Debug, Default)]
335pub struct ParsePackagesResult {
336    pub packages: Vec<PackageData>,
337    pub scan_diagnostics: Vec<ScanDiagnostic>,
338    pub scan_errors: Vec<String>,
339}
340
341fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
342    if let Some(message) = payload.downcast_ref::<&str>() {
343        (*message).to_string()
344    } else if let Some(message) = payload.downcast_ref::<String>() {
345        message.clone()
346    } else {
347        "unknown panic payload".to_string()
348    }
349}
350
351pub(crate) fn capture_parser_diagnostics<F>(
352    extract: F,
353    handler_name: &str,
354    path: &Path,
355    license_engine: Option<Arc<LicenseDetectionEngine>>,
356) -> ParsePackagesResult
357where
358    F: FnOnce() -> Vec<PackageData>,
359{
360    PARSER_DIAGNOSTIC_STACK.with(|stack| {
361        stack.borrow_mut().push(Vec::new());
362    });
363    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
364        stack.borrow_mut().push(license_engine);
365    });
366
367    let extract_result = catch_unwind(AssertUnwindSafe(|| {
368        extract()
369            .into_iter()
370            .map(|mut package| {
371                finalize_package_declared_license_references(&mut package);
372                package
373            })
374            .take(MAX_ITERATION_COUNT)
375            .collect::<Vec<_>>()
376    }));
377    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
378        stack.borrow_mut().pop();
379    });
380    let mut scan_diagnostics =
381        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
382
383    match extract_result {
384        Ok(packages) => ParsePackagesResult {
385            packages,
386            scan_errors: scan_diagnostics
387                .iter()
388                .map(|diagnostic| diagnostic.message.clone())
389                .collect(),
390            scan_diagnostics,
391        },
392        Err(payload) => {
393            scan_diagnostics.push(ScanDiagnostic::error(format!(
394                "{} panicked while parsing {}: {}",
395                handler_name,
396                path.display(),
397                panic_payload_to_string(payload.as_ref())
398            )));
399            ParsePackagesResult {
400                packages: Vec::new(),
401                scan_errors: scan_diagnostics
402                    .iter()
403                    .map(|diagnostic| diagnostic.message.clone())
404                    .collect(),
405                scan_diagnostics,
406            }
407        }
408    }
409}
410
411pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
412    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
413}
414
415pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
416    PARSER_DIAGNOSTIC_STACK.with(|stack| {
417        let mut stack = stack.borrow_mut();
418        let Some(active) = stack.last_mut() else {
419            return false;
420        };
421        active.push(ScanDiagnostic { severity, message });
422        true
423    })
424}
425
426#[macro_export]
427macro_rules! parser_warn {
428    ($($arg:tt)*) => {{
429        let message = format!($($arg)*);
430        if !$crate::parsers::record_parser_diagnostic(
431            message.clone(),
432            $crate::models::DiagnosticSeverity::Warning,
433        ) {
434            log::warn!("{message}");
435        }
436    }};
437}
438
439/// Package parser trait for extracting metadata from package manifest files.
440///
441/// Each parser implementation handles a specific package manager/ecosystem
442/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
443/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
444///
445/// # Implementation Guide
446///
447/// Implementors must provide:
448/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
449/// - `is_match()`: Returns true if the given file path matches this parser's expected format
450/// - `extract_packages()`: Parses the file and returns all extracted package metadata
451///
452/// # Error Handling
453///
454/// Parsers should handle errors gracefully by returning default/empty `PackageData`
455/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
456/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
457/// CI output and serialized scan results stay aligned.
458/// This allows the scan to continue processing other files even when individual
459/// files fail to parse.
460///
461/// # Example
462///
463/// ```no_run
464/// use provenant::models::{PackageData, PackageType};
465/// use provenant::parsers::PackageParser;
466/// use std::path::Path;
467///
468/// pub struct MyParser;
469///
470/// impl PackageParser for MyParser {
471///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
472///
473///     fn is_match(path: &Path) -> bool {
474///         path.file_name().is_some_and(|name| name == "package.json")
475///     }
476///
477///     fn extract_packages(path: &Path) -> Vec<PackageData> {
478///         vec![PackageData::default()]
479///     }
480/// }
481/// ```
482pub trait PackageParser {
483    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
484    const PACKAGE_TYPE: PackageType;
485
486    /// Extracts all packages from the given file path.
487    ///
488    /// Returns a vector of `PackageData` structures containing all extracted metadata
489    /// including name, version, dependencies, licenses, etc. Most parsers return a
490    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
491    /// can contain multiple packages in a single file.
492    ///
493    /// On parse errors, returns a vector with a default `PackageData` with minimal or
494    /// no fields populated.
495    fn extract_packages(path: &Path) -> Vec<PackageData>;
496
497    /// Checks if the given file path matches this parser's expected format.
498    ///
499    /// Returns true if the file should be handled by this parser based on filename,
500    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
501    fn is_match(path: &Path) -> bool;
502
503    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
504    /// or a default [`PackageData`] if the file contains no packages.
505    fn extract_first_package(path: &Path) -> PackageData {
506        Self::extract_packages(path)
507            .into_iter()
508            .map(|mut package| {
509                finalize_package_declared_license_references(&mut package);
510                package
511            })
512            .next()
513            .unwrap_or_default()
514    }
515}
516
517pub fn try_parse_rpm_archive_with_license_engine(
518    path: &Path,
519    license_engine: Option<Arc<LicenseDetectionEngine>>,
520) -> Option<ParsePackagesResult> {
521    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
522        return None;
523    }
524
525    if <RpmParser as PackageParser>::is_match(path) {
526        return Some(capture_parser_diagnostics(
527            || self::rpm_parser::extract_rpm_packages(path),
528            stringify!(RpmParser),
529            path,
530            license_engine,
531        ));
532    }
533
534    None
535}
536
537pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
538    try_parse_rpm_archive_with_license_engine(path, None)
539}
540
541#[cfg(feature = "golden-tests")]
542pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
543    self::compiled_binary::try_parse_compiled_bytes(bytes)
544}
545
546#[cfg(feature = "golden-tests")]
547pub fn try_parse_windows_executable_bytes(
548    path: &Path,
549    bytes: &[u8],
550) -> Option<ParsePackagesResult> {
551    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
552}
553
554pub(crate) fn path_looks_like_rpm_archive(path: &Path) -> bool {
555    self::rpm_parser::path_looks_like_rpm_archive(path)
556}
557
558pub use self::about::AboutFileParser;
559pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
560pub use self::android::{
561    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
562};
563#[cfg(feature = "golden-tests")]
564pub use self::android::{
565    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
566    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
567    proto_primitive, proto_xml_node,
568};
569pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
570pub use self::autotools::AutotoolsConfigureParser;
571pub use self::bazel::{BazelBuildParser, BazelModuleParser};
572pub use self::bitbake::BitbakeRecipeParser;
573pub use self::bower::BowerJsonParser;
574pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
575pub use self::bun_lock::BunLockParser;
576pub use self::bun_lockb::BunLockbParser;
577pub use self::cargo::CargoParser;
578#[cfg_attr(not(test), allow(unused_imports))]
579pub use self::cargo_lock::CargoLockParser;
580pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
581pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
582pub use self::citation::CitationCffParser;
583pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
584pub use self::composer::{ComposerJsonParser, ComposerLockParser};
585pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
586pub use self::conan_data::ConanDataParser;
587pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
588pub use self::conda_meta_json::CondaMetaJsonParser;
589pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
590pub use self::cpan_dist_ini::CpanDistIniParser;
591pub use self::cpan_makefile_pl::CpanMakefilePlParser;
592pub use self::cran::CranParser;
593pub use self::dart::{PubspecLockParser, PubspecYamlParser};
594pub use self::debian::{
595    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
596    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
597    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
598    DebianMd5sumInPackageParser, DebianOrigTarParser,
599};
600pub use self::deno::DenoParser;
601pub use self::deno_lock::DenoLockParser;
602pub use self::docker::DockerfileParser;
603pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
604pub use self::freebsd::FreebsdCompactManifestParser;
605pub use self::gitmodules::GitmodulesParser;
606pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
607pub use self::go_mod_graph::GoModGraphParser;
608pub use self::gradle::GradleParser;
609pub use self::gradle_lock::GradleLockfileParser;
610pub use self::gradle_module::GradleModuleParser;
611pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
612pub use self::haxe::HaxeParser;
613pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
614pub use self::hex_lock::HexLockParser;
615pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
616pub use self::maven::MavenParser;
617pub use self::meson::MesonParser;
618pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
619pub use self::misc::{
620    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
621    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
622    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
623    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
624    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
625    SharArchiveRecognizer, SquashfsRecognizer,
626};
627pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
628pub use self::npm::NpmParser;
629pub use self::npm_lock::NpmLockParser;
630pub use self::npm_workspace::NpmWorkspaceParser;
631pub use self::nuget::{
632    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
633    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
634    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
635};
636pub use self::opam::OpamParser;
637pub use self::os_release::OsReleaseParser;
638pub use self::pip_inspect_deplock::PipInspectDeplockParser;
639pub use self::pipfile_lock::PipfileLockParser;
640pub use self::pixi::{PixiLockParser, PixiTomlParser};
641pub use self::pnpm_lock::PnpmLockParser;
642pub use self::podfile::PodfileParser;
643pub use self::podfile_lock::PodfileLockParser;
644pub use self::podspec::PodspecParser;
645pub use self::podspec_json::PodspecJsonParser;
646pub use self::poetry_lock::PoetryLockParser;
647pub use self::publiccode::PubliccodeParser;
648pub use self::pylock_toml::PylockTomlParser;
649pub use self::python::PythonParser;
650pub use self::readme::ReadmeParser;
651pub use self::requirements_txt::RequirementsTxtParser;
652#[cfg(feature = "rpm-sqlite")]
653pub use self::rpm_db::RpmSqliteDatabaseParser;
654pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
655pub use self::rpm_license_files::RpmLicenseFilesParser;
656pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
657pub use self::rpm_parser::RpmParser;
658pub use self::rpm_specfile::RpmSpecfileParser;
659pub use self::rpm_yumdb::RpmYumdbParser;
660pub use self::ruby::{
661    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
662};
663pub use self::sbt::SbtParser;
664pub use self::swift_manifest_json::SwiftManifestJsonParser;
665pub use self::swift_resolved::SwiftPackageResolvedParser;
666pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
667pub use self::uv_lock::UvLockParser;
668pub use self::vcpkg::VcpkgManifestParser;
669pub use self::yarn_lock::YarnLockParser;
670pub use self::yarn_pnp::YarnPnpParser;
671
672/// Registers all parsers and recognizers, generating dispatch functions.
673///
674/// Parsers are tried first, then recognizers. This ordering is important because
675/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
676/// more specific parsers if checked first.
677macro_rules! register_package_handlers {
678    (
679        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
680        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
681    ) => {
682        pub fn try_parse_file_with_license_engine(
683            path: &Path,
684            license_engine: Option<Arc<LicenseDetectionEngine>>,
685        ) -> Option<ParsePackagesResult> {
686            $(
687                $(#[$parser_meta])*
688                if <$parser>::is_match(path) {
689                    return Some(capture_parser_diagnostics(
690                        || <$parser>::extract_packages(path),
691                        stringify!($parser),
692                        path,
693                        license_engine.clone(),
694                    ));
695                }
696            )*
697            $(
698                if <$recognizer>::is_match(path) {
699                    return Some(capture_parser_diagnostics(
700                        || <$recognizer>::extract_packages(path),
701                        stringify!($recognizer),
702                        path,
703                        license_engine.clone(),
704                    ));
705                }
706            )*
707            None
708        }
709
710        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
711            try_parse_file_with_license_engine(path, None)
712        }
713
714        // Used by the parser-golden maintenance tool in `xtask`.
715        // Scanner runtime dispatch goes through `try_parse_file()`.
716        #[allow(dead_code)]
717        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
718            match type_name {
719                $(
720                    $(#[$parser_meta])*
721                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
722                )*
723                $(
724                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
725                )*
726                _ => None
727            }
728        }
729
730        // Used by the parser-golden maintenance tool in `xtask` and by
731        // `tests/scanner_integration.rs` to verify parser registration.
732        #[allow(dead_code)]
733        pub fn list_parser_types() -> Vec<&'static str> {
734            vec![
735                $(
736                    $(#[$parser_meta])*
737                    stringify!($parser),
738                )*
739                $(
740                    stringify!($recognizer),
741                )*
742            ]
743        }
744    };
745}
746
747#[cfg(test)]
748mod tests {
749    use std::collections::HashMap;
750
751    use super::{active_parser_license_engine, capture_parser_diagnostics};
752    use crate::license_detection::LicenseDetectionEngine;
753    use crate::models::PackageData;
754    use crate::parsers::license_normalization::{
755        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
756    };
757    use std::path::Path;
758    use std::sync::Arc;
759
760    #[test]
761    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
762        let engine =
763            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
764
765        let result = capture_parser_diagnostics(
766            || {
767                assert!(active_parser_license_engine().is_some());
768                vec![PackageData::default()]
769            },
770            "TestParser",
771            Path::new("testdata/package.json"),
772            Some(engine),
773        );
774
775        assert_eq!(result.packages.len(), 1);
776        assert!(active_parser_license_engine().is_none());
777    }
778
779    #[test]
780    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
781        let engine =
782            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
783        clear_last_parser_license_engine_ptr();
784
785        let result = capture_parser_diagnostics(
786            || {
787                vec![PackageData {
788                    declared_license_expression: Some("mit".to_string()),
789                    declared_license_expression_spdx: Some("MIT".to_string()),
790                    extracted_license_statement: Some("MIT".to_string()),
791                    extra_data: Some(HashMap::from([(
792                        "license_file".to_string(),
793                        serde_json::Value::String("LICENSE".to_string()),
794                    )])),
795                    ..Default::default()
796                }]
797            },
798            "TestParser",
799            Path::new("testdata/package.json"),
800            Some(Arc::clone(&engine)),
801        );
802
803        assert_eq!(result.packages.len(), 1);
804        assert_eq!(
805            last_parser_license_engine_ptr(),
806            Some(Arc::as_ptr(&engine) as usize)
807        );
808        assert_eq!(
809            result.packages[0].license_detections[0].matches[0]
810                .referenced_filenames
811                .as_ref(),
812            Some(&vec!["LICENSE".to_string()])
813        );
814        assert!(active_parser_license_engine().is_none());
815    }
816}
817
818register_package_handlers! {
819    parsers: [
820        AboutFileParser,
821        AndroidAabParser,
822        AndroidApkParser,
823        AndroidManifestParser,
824        AndroidSoongMetadataParser,
825        AlpineApkParser,
826        AlpineApkbuildParser,
827        AlpineInstalledParser,
828        ArchPkginfoParser,
829        ArchSrcinfoParser,
830        AutotoolsConfigureParser,
831        BazelBuildParser,
832        BazelModuleParser,
833        BitbakeRecipeParser,
834        BowerJsonParser,
835        BunLockParser,
836        BunLockbParser,
837        BuckBuildParser,
838        BuckMetadataBzlParser,
839        CargoLockParser,
840        CargoParser,
841        CarthageCartfileParser,
842        CarthageCartfileResolvedParser,
843        ChefMetadataJsonParser,
844        ChefMetadataRbParser,
845        CitationCffParser,
846        ClojureDepsEdnParser,
847        ClojureProjectCljParser,
848        ComposerJsonParser,
849        ComposerLockParser,
850        ConanDataParser,
851        ConanFilePyParser,
852        ConanfileTxtParser,
853        ConanLockParser,
854        CondaEnvironmentYmlParser,
855        CondaMetaJsonParser,
856        CondaMetaYamlParser,
857        CpanDistIniParser,
858        CpanMakefilePlParser,
859        CpanManifestParser,
860        CpanMetaJsonParser,
861        CpanMetaYmlParser,
862        CranParser,
863        DebianControlInExtractedDebParser,
864        DebianControlParser,
865        DebianCopyrightParser,
866        DebianDebianTarParser,
867        DebianDebParser,
868        DebianDistrolessInstalledParser,
869        DebianDscParser,
870        DebianInstalledListParser,
871        DebianInstalledMd5sumsParser,
872        DebianInstalledParser,
873        DebianMd5sumInPackageParser,
874        DebianOrigTarParser,
875        DenoParser,
876        DenoLockParser,
877        DockerfileParser,
878        ErlangAppSrcParser,
879        RebarConfigParser,
880        RebarLockParser,
881        FreebsdCompactManifestParser,
882        GemArchiveParser,
883        GemfileLockParser,
884        GemfileParser,
885        GemMetadataExtractedParser,
886        GemspecParser,
887        GitmodulesParser,
888        GodepsParser,
889        GoModParser,
890        GoModGraphParser,
891        GoSumParser,
892        GoWorkParser,
893        GradleLockfileParser,
894        GradleParser,
895        GradleModuleParser,
896        HackageCabalParser,
897        HackageCabalProjectParser,
898        HackageStackYamlParser,
899        HelmChartYamlParser,
900        HelmChartLockParser,
901        HaxeParser,
902        HexLockParser,
903        JuliaManifestTomlParser,
904        JuliaProjectTomlParser,
905        MavenParser,
906        MesonParser,
907        MicrosoftUpdateManifestParser,
908        NixDefaultParser,
909        NixFlakeLockParser,
910        NixFlakeParser,
911        NpmLockParser,
912        NpmParser,
913        NpmWorkspaceParser,
914        DotNetDepsJsonParser,
915        CentralPackageManagementPropsParser,
916        DirectoryBuildPropsParser,
917        NupkgParser,
918        NuspecParser,
919        PackageReferenceProjectParser,
920        OpamParser,
921        OsReleaseParser,
922        PackagesConfigParser,
923        PackagesLockParser,
924        ProjectJsonParser,
925        ProjectLockJsonParser,
926        PipfileLockParser,
927        PipInspectDeplockParser,
928        PixiTomlParser,
929        PixiLockParser,
930        PnpmLockParser,
931        PodfileLockParser,
932        PodfileParser,
933        PodspecJsonParser,
934        PodspecParser,
935        PoetryLockParser,
936        PubliccodeParser,
937        PylockTomlParser,
938        PubspecLockParser,
939        PubspecYamlParser,
940        PythonParser,
941        UvLockParser,
942        VcpkgManifestParser,
943        ReadmeParser,
944        RequirementsTxtParser,
945        RpmBdbDatabaseParser,
946        RpmLicenseFilesParser,
947        RpmMarinerManifestParser,
948        RpmNdbDatabaseParser,
949        RpmParser,
950        RpmSpecfileParser,
951        #[cfg(feature = "rpm-sqlite")]
952        RpmSqliteDatabaseParser,
953        RpmYumdbParser,
954        SbtParser,
955        SwiftManifestJsonParser,
956        SwiftPackageResolvedParser,
957        SwiftShowDependenciesParser,
958        YarnLockParser,
959        YarnPnpParser,
960    ],
961    recognizers: [
962        AndroidLibraryRecognizer,
963        AppleDmgRecognizer,
964        Axis2MarRecognizer,
965        Axis2ModuleXmlRecognizer,
966        CabArchiveRecognizer,
967        ChromeCrxRecognizer,
968        InstallShieldRecognizer,
969        IosIpaRecognizer,
970        IsoImageRecognizer,
971        IvyXmlRecognizer,
972        JavaEarAppXmlRecognizer,
973        JavaEarRecognizer,
974        JavaJarRecognizer,
975        JavaWarRecognizer,
976        JavaWarWebXmlRecognizer,
977        JBossSarRecognizer,
978        JBossServiceXmlRecognizer,
979        MeteorPackageRecognizer,
980        MozillaXpiRecognizer,
981        NsisRecognizer,
982        SharArchiveRecognizer,
983        SquashfsRecognizer,
984    ],
985}
986
987#[cfg(test)]
988mod panic_isolation_tests {
989    use super::*;
990    use crate::models::DiagnosticSeverity;
991
992    #[test]
993    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
994        let path = Path::new("fixtures/panic-package.json");
995        let result = capture_parser_diagnostics(
996            || -> Vec<PackageData> { panic!("panic boom") },
997            "PanicParser",
998            path,
999            None,
1000        );
1001
1002        assert!(result.packages.is_empty());
1003        assert_eq!(result.scan_errors.len(), 1);
1004        assert_eq!(result.scan_diagnostics.len(), 1);
1005        assert_eq!(
1006            result.scan_diagnostics[0].severity,
1007            DiagnosticSeverity::Error
1008        );
1009        assert!(result.scan_errors[0].contains("PanicParser"));
1010        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1011        assert!(result.scan_errors[0].contains("panic boom"));
1012    }
1013
1014    #[test]
1015    fn capture_parser_diagnostics_recovers_after_panic() {
1016        let panic_path = Path::new("fixtures/panic-package.json");
1017        let _ = capture_parser_diagnostics(
1018            || -> Vec<PackageData> { panic!("panic boom") },
1019            "PanicParser",
1020            panic_path,
1021            None,
1022        );
1023
1024        let ok_path = Path::new("fixtures/recovered-package.json");
1025        let result = capture_parser_diagnostics(
1026            || {
1027                crate::parser_warn!("recoverable parser warning");
1028                vec![PackageData {
1029                    package_type: Some(PackageType::Npm),
1030                    ..Default::default()
1031                }]
1032            },
1033            "RecoveringParser",
1034            ok_path,
1035            None,
1036        );
1037
1038        assert_eq!(result.packages.len(), 1);
1039        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1040        assert_eq!(result.scan_diagnostics.len(), 1);
1041        assert_eq!(
1042            result.scan_diagnostics[0].severity,
1043            DiagnosticSeverity::Warning
1044        );
1045    }
1046}