Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185pub(crate) mod license_normalization;
186mod maven;
187mod meson;
188#[cfg(test)]
189mod meson_scan_test;
190#[cfg(test)]
191mod meson_test;
192pub mod metadata;
193mod microsoft_update_manifest;
194#[cfg(test)]
195mod microsoft_update_manifest_test;
196mod misc;
197#[cfg(test)]
198mod misc_test;
199mod nix;
200#[cfg(test)]
201mod nix_scan_test;
202#[cfg(test)]
203mod nix_test;
204mod npm;
205mod npm_lock;
206#[cfg(test)]
207mod npm_lock_test;
208#[cfg(test)]
209mod npm_scan_test;
210#[cfg(test)]
211mod npm_test;
212mod npm_workspace;
213#[cfg(test)]
214mod npm_workspace_test;
215mod nuget;
216mod opam;
217#[cfg(test)]
218mod opam_scan_test;
219mod os_release;
220#[cfg(test)]
221mod os_release_test;
222mod pep508;
223mod pip_inspect_deplock;
224#[cfg(test)]
225mod pip_inspect_deplock_test;
226mod pipfile_lock;
227#[cfg(test)]
228mod pipfile_lock_test;
229mod pixi;
230#[cfg(test)]
231mod pixi_scan_test;
232#[cfg(test)]
233mod pixi_test;
234mod pnpm_lock;
235#[cfg(test)]
236mod pnpm_lock_test;
237mod podfile;
238mod podfile_lock;
239#[cfg(test)]
240mod podfile_lock_test;
241mod podspec;
242mod podspec_json;
243#[cfg(test)]
244mod podspec_json_test;
245mod poetry_lock;
246#[cfg(test)]
247mod poetry_lock_test;
248mod publiccode;
249#[cfg(test)]
250mod publiccode_test;
251mod pylock_toml;
252#[cfg(test)]
253mod pylock_toml_test;
254mod python;
255mod readme;
256#[cfg(test)]
257mod readme_test;
258mod requirements_txt;
259#[cfg(test)]
260mod requirements_txt_test;
261pub(crate) mod rfc822;
262mod rpm_db;
263mod rpm_db_native;
264#[cfg(test)]
265mod rpm_db_scan_test;
266mod rpm_license_files;
267#[cfg(test)]
268mod rpm_license_files_test;
269mod rpm_mariner_manifest;
270#[cfg(test)]
271mod rpm_mariner_manifest_test;
272mod rpm_parser;
273#[cfg(test)]
274mod rpm_scan_test;
275mod rpm_specfile;
276#[cfg(test)]
277mod rpm_specfile_test;
278mod rpm_yumdb;
279mod ruby;
280#[cfg(test)]
281mod ruby_scan_test;
282#[cfg(test)]
283mod ruby_test;
284mod sbt;
285#[cfg(test)]
286mod sbt_test;
287#[cfg(test)]
288mod scan_test_utils;
289mod swift_manifest_json;
290#[cfg(test)]
291mod swift_manifest_json_test;
292mod swift_resolved;
293#[cfg(test)]
294mod swift_resolved_test;
295#[cfg(test)]
296mod swift_scan_test;
297mod swift_show_dependencies;
298#[cfg(test)]
299mod swift_show_dependencies_test;
300pub mod utils;
301mod uv_lock;
302#[cfg(test)]
303mod uv_lock_test;
304mod vcpkg;
305#[cfg(test)]
306mod vcpkg_scan_test;
307#[cfg(test)]
308mod vcpkg_test;
309pub(crate) mod windows_executable;
310#[cfg(test)]
311mod windows_executable_golden_test;
312mod yarn_lock;
313#[cfg(test)]
314mod yarn_lock_test;
315mod yarn_pnp;
316#[cfg(test)]
317mod yarn_pnp_test;
318
319use std::cell::RefCell;
320use std::panic::{AssertUnwindSafe, catch_unwind};
321use std::path::Path;
322use std::sync::Arc;
323
324use crate::license_detection::LicenseDetectionEngine;
325use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
326use crate::parsers::license_normalization::finalize_package_declared_license_references;
327use crate::parsers::utils::MAX_ITERATION_COUNT;
328
329thread_local! {
330    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
331    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
332    static PARSER_SCAN_ROOT_STACK: RefCell<Vec<Option<std::path::PathBuf>>> = const { RefCell::new(Vec::new()) };
333}
334
335#[derive(Debug, Default)]
336pub struct ParsePackagesResult {
337    pub packages: Vec<PackageData>,
338    pub scan_diagnostics: Vec<ScanDiagnostic>,
339}
340
341fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
342    if let Some(message) = payload.downcast_ref::<&str>() {
343        (*message).to_string()
344    } else if let Some(message) = payload.downcast_ref::<String>() {
345        message.clone()
346    } else {
347        "unknown panic payload".to_string()
348    }
349}
350
351pub(crate) fn capture_parser_diagnostics<F>(
352    extract: F,
353    handler_name: &str,
354    path: &Path,
355    license_engine: Option<Arc<LicenseDetectionEngine>>,
356) -> ParsePackagesResult
357where
358    F: FnOnce() -> Vec<PackageData>,
359{
360    PARSER_DIAGNOSTIC_STACK.with(|stack| {
361        stack.borrow_mut().push(Vec::new());
362    });
363    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
364        stack.borrow_mut().push(license_engine);
365    });
366
367    let extract_result = catch_unwind(AssertUnwindSafe(|| {
368        extract()
369            .into_iter()
370            .map(|mut package| {
371                finalize_package_declared_license_references(&mut package);
372                package
373            })
374            .take(MAX_ITERATION_COUNT)
375            .collect::<Vec<_>>()
376    }));
377    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
378        stack.borrow_mut().pop();
379    });
380    let mut scan_diagnostics =
381        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
382
383    match extract_result {
384        Ok(packages) => ParsePackagesResult {
385            packages,
386            scan_diagnostics,
387        },
388        Err(payload) => {
389            scan_diagnostics.push(ScanDiagnostic::error(format!(
390                "{} panicked while parsing {}: {}",
391                handler_name,
392                path.display(),
393                panic_payload_to_string(payload.as_ref())
394            )));
395            ParsePackagesResult {
396                packages: Vec::new(),
397                scan_diagnostics,
398            }
399        }
400    }
401}
402
403pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
404    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
405}
406
407pub(crate) fn active_parser_scan_root() -> Option<std::path::PathBuf> {
408    PARSER_SCAN_ROOT_STACK.with(|stack| stack.borrow().last().cloned().flatten())
409}
410
411pub(crate) fn with_parser_scan_root<T>(scan_root: Option<&Path>, f: impl FnOnce() -> T) -> T {
412    PARSER_SCAN_ROOT_STACK.with(|stack| {
413        stack.borrow_mut().push(scan_root.map(Path::to_path_buf));
414    });
415    let result = f();
416    PARSER_SCAN_ROOT_STACK.with(|stack| {
417        stack.borrow_mut().pop();
418    });
419    result
420}
421
422pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
423    PARSER_DIAGNOSTIC_STACK.with(|stack| {
424        let mut stack = stack.borrow_mut();
425        let Some(active) = stack.last_mut() else {
426            return false;
427        };
428        active.push(ScanDiagnostic { severity, message });
429        true
430    })
431}
432
433#[macro_export]
434macro_rules! parser_warn {
435    ($($arg:tt)*) => {{
436        let message = format!($($arg)*);
437        if !$crate::parsers::record_parser_diagnostic(
438            message.clone(),
439            $crate::models::DiagnosticSeverity::Warning,
440        ) {
441            log::warn!("{message}");
442        }
443    }};
444}
445
446/// Package parser trait for extracting metadata from package manifest files.
447///
448/// Each parser implementation handles a specific package manager/ecosystem
449/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
450/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
451///
452/// # Implementation Guide
453///
454/// Implementors must provide:
455/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
456/// - `is_match()`: Returns true if the given file path matches this parser's expected format
457/// - `extract_packages()`: Parses the file and returns all extracted package metadata
458///
459/// # Error Handling
460///
461/// Parsers should handle errors gracefully by returning default/empty `PackageData`
462/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
463/// dispatch captures those warnings and attaches them to `FileInfo.scan_diagnostics` so
464/// CI output and serialized scan results stay aligned.
465/// This allows the scan to continue processing other files even when individual
466/// files fail to parse.
467///
468/// # Example
469///
470/// ```no_run
471/// use provenant::models::{PackageData, PackageType};
472/// use provenant::parsers::PackageParser;
473/// use std::path::Path;
474///
475/// pub struct MyParser;
476///
477/// impl PackageParser for MyParser {
478///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
479///
480///     fn is_match(path: &Path) -> bool {
481///         path.file_name().is_some_and(|name| name == "package.json")
482///     }
483///
484///     fn extract_packages(path: &Path) -> Vec<PackageData> {
485///         vec![PackageData::default()]
486///     }
487/// }
488/// ```
489pub trait PackageParser {
490    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
491    const PACKAGE_TYPE: PackageType;
492
493    /// Extracts all packages from the given file path.
494    ///
495    /// Returns a vector of `PackageData` structures containing all extracted metadata
496    /// including name, version, dependencies, licenses, etc. Most parsers return a
497    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
498    /// can contain multiple packages in a single file.
499    ///
500    /// On parse errors, returns a vector with a default `PackageData` with minimal or
501    /// no fields populated.
502    fn extract_packages(path: &Path) -> Vec<PackageData>;
503
504    /// Checks if the given file path matches this parser's expected format.
505    ///
506    /// Returns true if the file should be handled by this parser based on filename,
507    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
508    fn is_match(path: &Path) -> bool;
509
510    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
511    /// or a default [`PackageData`] if the file contains no packages.
512    fn extract_first_package(path: &Path) -> PackageData {
513        Self::extract_packages(path)
514            .into_iter()
515            .map(|mut package| {
516                finalize_package_declared_license_references(&mut package);
517                package
518            })
519            .next()
520            .unwrap_or_default()
521    }
522
523    /// Returns documentation metadata for the file-format surfaces this parser handles.
524    ///
525    /// Used to auto-generate `docs/SUPPORTED_FORMATS.md`. Parsers that share a
526    /// documentation entry with another parser (e.g., AlpineApkParser and
527    /// AlpineInstalledParser) should return the entry from only one of them
528    /// and use the default empty implementation in the other.
529    fn metadata() -> Vec<metadata::ParserMetadata> {
530        Vec::new()
531    }
532}
533
534pub fn try_parse_rpm_archive_with_license_engine(
535    path: &Path,
536    license_engine: Option<Arc<LicenseDetectionEngine>>,
537) -> Option<ParsePackagesResult> {
538    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
539        return None;
540    }
541
542    if <RpmParser as PackageParser>::is_match(path) {
543        return Some(capture_parser_diagnostics(
544            || self::rpm_parser::extract_rpm_packages(path),
545            stringify!(RpmParser),
546            path,
547            license_engine,
548        ));
549    }
550
551    None
552}
553
554pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
555    try_parse_rpm_archive_with_license_engine(path, None)
556}
557
558#[cfg(feature = "golden-tests")]
559pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
560    self::compiled_binary::try_parse_compiled_bytes(bytes)
561}
562
563#[cfg(feature = "golden-tests")]
564pub fn try_parse_windows_executable_bytes(
565    path: &Path,
566    bytes: &[u8],
567) -> Option<ParsePackagesResult> {
568    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
569}
570
571pub fn path_looks_like_rpm_archive(path: &Path) -> bool {
572    self::rpm_parser::path_looks_like_rpm_archive(path)
573}
574
575/// Collects all registered parser and detection-surface metadata.
576///
577/// Used by the `generate-supported-formats` xtask to auto-generate
578/// `docs/SUPPORTED_FORMATS.md`.
579pub fn all_metadata() -> Vec<metadata::ParserMetadata> {
580    let mut entries = collect_parser_metadata();
581    entries.extend_from_slice(self::compiled_binary::COMPILED_BINARY_METADATA);
582    entries.extend_from_slice(self::windows_executable::WINDOWS_EXE_METADATA);
583    entries.extend_from_slice(self::misc::RECOGNIZER_METADATA);
584    entries.extend_from_slice(crate::utils::font::FONT_METADATA);
585    entries
586}
587
588pub use self::about::AboutFileParser;
589pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
590pub use self::android::{
591    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
592};
593#[cfg(feature = "golden-tests")]
594pub use self::android::{
595    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
596    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
597    proto_primitive, proto_xml_node,
598};
599pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
600pub use self::autotools::AutotoolsConfigureParser;
601pub use self::bazel::{BazelBuildParser, BazelModuleParser};
602pub use self::bitbake::BitbakeRecipeParser;
603pub use self::bower::BowerJsonParser;
604pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
605pub use self::bun_lock::BunLockParser;
606pub use self::bun_lockb::BunLockbParser;
607pub use self::cargo::CargoParser;
608#[cfg_attr(not(test), allow(unused_imports))]
609pub use self::cargo_lock::CargoLockParser;
610pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
611pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
612pub use self::citation::CitationCffParser;
613pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
614pub use self::composer::{ComposerJsonParser, ComposerLockParser};
615pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
616pub use self::conan_data::ConanDataParser;
617pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
618pub use self::conda_meta_json::CondaMetaJsonParser;
619pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
620pub use self::cpan_dist_ini::CpanDistIniParser;
621pub use self::cpan_makefile_pl::CpanMakefilePlParser;
622pub use self::cran::CranParser;
623pub use self::dart::{PubspecLockParser, PubspecYamlParser};
624pub use self::debian::{
625    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
626    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
627    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
628    DebianMd5sumInPackageParser, DebianOrigTarParser,
629};
630pub use self::deno::DenoParser;
631pub use self::deno_lock::DenoLockParser;
632pub use self::docker::DockerfileParser;
633pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
634pub use self::freebsd::FreebsdCompactManifestParser;
635pub use self::gitmodules::GitmodulesParser;
636pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
637pub use self::go_mod_graph::GoModGraphParser;
638pub use self::gradle::GradleParser;
639pub use self::gradle_lock::GradleLockfileParser;
640pub use self::gradle_module::GradleModuleParser;
641pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
642pub use self::haxe::HaxeParser;
643pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
644pub use self::hex_lock::HexLockParser;
645pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
646pub use self::maven::MavenParser;
647pub use self::meson::MesonParser;
648pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
649pub use self::misc::{
650    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
651    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
652    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
653    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
654    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
655    SharArchiveRecognizer, SquashfsRecognizer,
656};
657pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
658pub use self::npm::NpmParser;
659pub use self::npm_lock::NpmLockParser;
660pub use self::npm_workspace::NpmWorkspaceParser;
661pub use self::nuget::{
662    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
663    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
664    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
665};
666pub use self::opam::OpamParser;
667pub use self::os_release::OsReleaseParser;
668pub use self::pip_inspect_deplock::PipInspectDeplockParser;
669pub use self::pipfile_lock::PipfileLockParser;
670pub use self::pixi::{PixiLockParser, PixiTomlParser};
671pub use self::pnpm_lock::PnpmLockParser;
672pub use self::podfile::PodfileParser;
673pub use self::podfile_lock::PodfileLockParser;
674pub use self::podspec::PodspecParser;
675pub use self::podspec_json::PodspecJsonParser;
676pub use self::poetry_lock::PoetryLockParser;
677pub use self::publiccode::PubliccodeParser;
678pub use self::pylock_toml::PylockTomlParser;
679pub use self::python::PythonParser;
680pub use self::readme::ReadmeParser;
681pub use self::requirements_txt::RequirementsTxtParser;
682#[cfg(feature = "rpm-sqlite")]
683pub use self::rpm_db::RpmSqliteDatabaseParser;
684pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
685pub use self::rpm_license_files::RpmLicenseFilesParser;
686pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
687pub use self::rpm_parser::RpmParser;
688pub use self::rpm_specfile::RpmSpecfileParser;
689pub use self::rpm_yumdb::RpmYumdbParser;
690pub use self::ruby::{
691    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
692};
693pub use self::sbt::SbtParser;
694pub use self::swift_manifest_json::SwiftManifestJsonParser;
695pub use self::swift_resolved::SwiftPackageResolvedParser;
696pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
697pub use self::uv_lock::UvLockParser;
698pub use self::vcpkg::VcpkgManifestParser;
699pub use self::yarn_lock::YarnLockParser;
700pub use self::yarn_pnp::YarnPnpParser;
701
702/// Registers all parsers and recognizers, generating dispatch functions.
703///
704/// Parsers are tried first, then recognizers. This ordering is important because
705/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
706/// more specific parsers if checked first.
707macro_rules! register_package_handlers {
708    (
709        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
710        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
711    ) => {
712        pub fn try_parse_file_with_license_engine(
713            path: &Path,
714            license_engine: Option<Arc<LicenseDetectionEngine>>,
715        ) -> Option<ParsePackagesResult> {
716            $(
717                $(#[$parser_meta])*
718                if <$parser>::is_match(path) {
719                    return Some(capture_parser_diagnostics(
720                        || <$parser>::extract_packages(path),
721                        stringify!($parser),
722                        path,
723                        license_engine.clone(),
724                    ));
725                }
726            )*
727            $(
728                if <$recognizer>::is_match(path) {
729                    return Some(capture_parser_diagnostics(
730                        || <$recognizer>::extract_packages(path),
731                        stringify!($recognizer),
732                        path,
733                        license_engine.clone(),
734                    ));
735                }
736            )*
737            None
738        }
739
740        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
741            try_parse_file_with_license_engine(path, None)
742        }
743
744        // Used by the parser-golden maintenance tool in `xtask`.
745        // Scanner runtime dispatch goes through `try_parse_file()`.
746        #[allow(dead_code)]
747        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
748            match type_name {
749                $(
750                    $(#[$parser_meta])*
751                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
752                )*
753                $(
754                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
755                )*
756                _ => None
757            }
758        }
759
760        // Used by the parser-golden maintenance tool in `xtask` and by
761        // `tests/scanner_integration.rs` to verify parser registration.
762        #[allow(dead_code)]
763        pub fn list_parser_types() -> Vec<&'static str> {
764            vec![
765                $(
766                    $(#[$parser_meta])*
767                    stringify!($parser),
768                )*
769                $(
770                    stringify!($recognizer),
771                )*
772            ]
773        }
774
775        /// Collects documentation metadata from all registered parsers and recognizers.
776        pub fn collect_parser_metadata() -> Vec<metadata::ParserMetadata> {
777            let mut entries = Vec::new();
778            $(
779                $(#[$parser_meta])*
780                entries.extend(<$parser>::metadata());
781            )*
782            $(
783                entries.extend(<$recognizer>::metadata());
784            )*
785            entries
786        }
787    };
788}
789
790#[cfg(test)]
791mod tests {
792    use std::collections::HashMap;
793
794    use super::{active_parser_license_engine, capture_parser_diagnostics};
795    use crate::license_detection::LicenseDetectionEngine;
796    use crate::models::PackageData;
797    use crate::parsers::license_normalization::{
798        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
799    };
800    use std::path::Path;
801    use std::sync::Arc;
802
803    #[test]
804    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
805        let engine =
806            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
807
808        let result = capture_parser_diagnostics(
809            || {
810                assert!(active_parser_license_engine().is_some());
811                vec![PackageData::default()]
812            },
813            "TestParser",
814            Path::new("testdata/package.json"),
815            Some(engine),
816        );
817
818        assert_eq!(result.packages.len(), 1);
819        assert!(active_parser_license_engine().is_none());
820    }
821
822    #[test]
823    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
824        let engine =
825            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
826        clear_last_parser_license_engine_ptr();
827
828        let result = capture_parser_diagnostics(
829            || {
830                vec![PackageData {
831                    declared_license_expression: Some("mit".to_string()),
832                    declared_license_expression_spdx: Some("MIT".to_string()),
833                    extracted_license_statement: Some("MIT".to_string()),
834                    extra_data: Some(HashMap::from([(
835                        "license_file".to_string(),
836                        serde_json::Value::String("LICENSE".to_string()),
837                    )])),
838                    ..Default::default()
839                }]
840            },
841            "TestParser",
842            Path::new("testdata/package.json"),
843            Some(Arc::clone(&engine)),
844        );
845
846        assert_eq!(result.packages.len(), 1);
847        assert_eq!(
848            last_parser_license_engine_ptr(),
849            Some(Arc::as_ptr(&engine) as usize)
850        );
851        assert_eq!(
852            result.packages[0].license_detections[0].matches[0]
853                .referenced_filenames
854                .as_ref(),
855            Some(&vec!["LICENSE".to_string()])
856        );
857        assert!(active_parser_license_engine().is_none());
858    }
859}
860
861register_package_handlers! {
862    parsers: [
863        AboutFileParser,
864        AndroidAabParser,
865        AndroidApkParser,
866        AndroidManifestParser,
867        AndroidSoongMetadataParser,
868        AlpineApkParser,
869        AlpineApkbuildParser,
870        AlpineInstalledParser,
871        ArchPkginfoParser,
872        ArchSrcinfoParser,
873        AutotoolsConfigureParser,
874        BazelBuildParser,
875        BazelModuleParser,
876        BitbakeRecipeParser,
877        BowerJsonParser,
878        BunLockParser,
879        BunLockbParser,
880        BuckBuildParser,
881        BuckMetadataBzlParser,
882        CargoLockParser,
883        CargoParser,
884        CarthageCartfileParser,
885        CarthageCartfileResolvedParser,
886        ChefMetadataJsonParser,
887        ChefMetadataRbParser,
888        CitationCffParser,
889        ClojureDepsEdnParser,
890        ClojureProjectCljParser,
891        ComposerJsonParser,
892        ComposerLockParser,
893        ConanDataParser,
894        ConanFilePyParser,
895        ConanfileTxtParser,
896        ConanLockParser,
897        CondaEnvironmentYmlParser,
898        CondaMetaJsonParser,
899        CondaMetaYamlParser,
900        CpanDistIniParser,
901        CpanMakefilePlParser,
902        CpanManifestParser,
903        CpanMetaJsonParser,
904        CpanMetaYmlParser,
905        CranParser,
906        DebianControlInExtractedDebParser,
907        DebianControlParser,
908        DebianCopyrightParser,
909        DebianDebianTarParser,
910        DebianDebParser,
911        DebianDistrolessInstalledParser,
912        DebianDscParser,
913        DebianInstalledListParser,
914        DebianInstalledMd5sumsParser,
915        DebianInstalledParser,
916        DebianMd5sumInPackageParser,
917        DebianOrigTarParser,
918        DenoParser,
919        DenoLockParser,
920        DockerfileParser,
921        ErlangAppSrcParser,
922        RebarConfigParser,
923        RebarLockParser,
924        FreebsdCompactManifestParser,
925        GemArchiveParser,
926        GemfileLockParser,
927        GemfileParser,
928        GemMetadataExtractedParser,
929        GemspecParser,
930        GitmodulesParser,
931        GodepsParser,
932        GoModParser,
933        GoModGraphParser,
934        GoSumParser,
935        GoWorkParser,
936        GradleLockfileParser,
937        GradleParser,
938        GradleModuleParser,
939        HackageCabalParser,
940        HackageCabalProjectParser,
941        HackageStackYamlParser,
942        HelmChartYamlParser,
943        HelmChartLockParser,
944        HaxeParser,
945        HexLockParser,
946        JuliaManifestTomlParser,
947        JuliaProjectTomlParser,
948        MavenParser,
949        MesonParser,
950        MicrosoftUpdateManifestParser,
951        NixDefaultParser,
952        NixFlakeLockParser,
953        NixFlakeParser,
954        NpmLockParser,
955        NpmParser,
956        NpmWorkspaceParser,
957        DotNetDepsJsonParser,
958        CentralPackageManagementPropsParser,
959        DirectoryBuildPropsParser,
960        NupkgParser,
961        NuspecParser,
962        PackageReferenceProjectParser,
963        OpamParser,
964        OsReleaseParser,
965        PackagesConfigParser,
966        PackagesLockParser,
967        ProjectJsonParser,
968        ProjectLockJsonParser,
969        PipfileLockParser,
970        PipInspectDeplockParser,
971        PixiTomlParser,
972        PixiLockParser,
973        PnpmLockParser,
974        PodfileLockParser,
975        PodfileParser,
976        PodspecJsonParser,
977        PodspecParser,
978        PoetryLockParser,
979        PubliccodeParser,
980        PylockTomlParser,
981        PubspecLockParser,
982        PubspecYamlParser,
983        PythonParser,
984        UvLockParser,
985        VcpkgManifestParser,
986        ReadmeParser,
987        RequirementsTxtParser,
988        RpmBdbDatabaseParser,
989        RpmLicenseFilesParser,
990        RpmMarinerManifestParser,
991        RpmNdbDatabaseParser,
992        RpmParser,
993        RpmSpecfileParser,
994        #[cfg(feature = "rpm-sqlite")]
995        RpmSqliteDatabaseParser,
996        RpmYumdbParser,
997        SbtParser,
998        SwiftManifestJsonParser,
999        SwiftPackageResolvedParser,
1000        SwiftShowDependenciesParser,
1001        YarnLockParser,
1002        YarnPnpParser,
1003    ],
1004    recognizers: [
1005        AndroidLibraryRecognizer,
1006        AppleDmgRecognizer,
1007        Axis2MarRecognizer,
1008        Axis2ModuleXmlRecognizer,
1009        CabArchiveRecognizer,
1010        ChromeCrxRecognizer,
1011        InstallShieldRecognizer,
1012        IosIpaRecognizer,
1013        IsoImageRecognizer,
1014        IvyXmlRecognizer,
1015        JavaEarAppXmlRecognizer,
1016        JavaEarRecognizer,
1017        JavaJarRecognizer,
1018        JavaWarRecognizer,
1019        JavaWarWebXmlRecognizer,
1020        JBossSarRecognizer,
1021        JBossServiceXmlRecognizer,
1022        MeteorPackageRecognizer,
1023        MozillaXpiRecognizer,
1024        NsisRecognizer,
1025        SharArchiveRecognizer,
1026        SquashfsRecognizer,
1027    ],
1028}
1029
1030#[cfg(test)]
1031mod panic_isolation_tests {
1032    use super::*;
1033    use crate::models::DiagnosticSeverity;
1034
1035    #[test]
1036    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
1037        let path = Path::new("fixtures/panic-package.json");
1038        let result = capture_parser_diagnostics(
1039            || -> Vec<PackageData> { panic!("panic boom") },
1040            "PanicParser",
1041            path,
1042            None,
1043        );
1044
1045        assert!(result.packages.is_empty());
1046        assert_eq!(result.scan_diagnostics.len(), 1);
1047        assert_eq!(
1048            result.scan_diagnostics[0].severity,
1049            DiagnosticSeverity::Error
1050        );
1051        assert!(result.scan_diagnostics[0].message.contains("PanicParser"));
1052        assert!(
1053            result.scan_diagnostics[0]
1054                .message
1055                .contains("fixtures/panic-package.json")
1056        );
1057        assert!(result.scan_diagnostics[0].message.contains("panic boom"));
1058    }
1059
1060    #[test]
1061    fn capture_parser_diagnostics_recovers_after_panic() {
1062        let panic_path = Path::new("fixtures/panic-package.json");
1063        let _ = capture_parser_diagnostics(
1064            || -> Vec<PackageData> { panic!("panic boom") },
1065            "PanicParser",
1066            panic_path,
1067            None,
1068        );
1069
1070        let ok_path = Path::new("fixtures/recovered-package.json");
1071        let result = capture_parser_diagnostics(
1072            || {
1073                crate::parser_warn!("recoverable parser warning");
1074                vec![PackageData {
1075                    package_type: Some(PackageType::Npm),
1076                    ..Default::default()
1077                }]
1078            },
1079            "RecoveringParser",
1080            ok_path,
1081            None,
1082        );
1083
1084        assert_eq!(result.packages.len(), 1);
1085        assert_eq!(result.scan_diagnostics.len(), 1);
1086        assert_eq!(
1087            result.scan_diagnostics[0].message,
1088            "recoverable parser warning"
1089        );
1090        assert_eq!(
1091            result.scan_diagnostics[0].severity,
1092            DiagnosticSeverity::Warning
1093        );
1094    }
1095}