Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185pub(crate) mod license_normalization;
186mod maven;
187mod meson;
188#[cfg(test)]
189mod meson_scan_test;
190#[cfg(test)]
191mod meson_test;
192pub mod metadata;
193mod microsoft_update_manifest;
194#[cfg(test)]
195mod microsoft_update_manifest_test;
196mod misc;
197#[cfg(test)]
198mod misc_test;
199mod nix;
200#[cfg(test)]
201mod nix_scan_test;
202#[cfg(test)]
203mod nix_test;
204mod npm;
205mod npm_lock;
206#[cfg(test)]
207mod npm_lock_test;
208#[cfg(test)]
209mod npm_scan_test;
210#[cfg(test)]
211mod npm_test;
212mod npm_workspace;
213#[cfg(test)]
214mod npm_workspace_test;
215mod nuget;
216mod opam;
217#[cfg(test)]
218mod opam_scan_test;
219mod os_release;
220#[cfg(test)]
221mod os_release_test;
222mod pep508;
223mod pip_inspect_deplock;
224#[cfg(test)]
225mod pip_inspect_deplock_test;
226mod pipfile_lock;
227#[cfg(test)]
228mod pipfile_lock_test;
229mod pixi;
230#[cfg(test)]
231mod pixi_scan_test;
232#[cfg(test)]
233mod pixi_test;
234mod pnpm_lock;
235#[cfg(test)]
236mod pnpm_lock_test;
237mod podfile;
238mod podfile_lock;
239#[cfg(test)]
240mod podfile_lock_test;
241mod podspec;
242mod podspec_json;
243#[cfg(test)]
244mod podspec_json_test;
245mod poetry_lock;
246#[cfg(test)]
247mod poetry_lock_test;
248mod publiccode;
249#[cfg(test)]
250mod publiccode_test;
251mod pylock_toml;
252#[cfg(test)]
253mod pylock_toml_test;
254mod python;
255mod readme;
256#[cfg(test)]
257mod readme_test;
258mod requirements_txt;
259#[cfg(test)]
260mod requirements_txt_test;
261pub(crate) mod rfc822;
262mod rpm_db;
263mod rpm_db_native;
264#[cfg(test)]
265mod rpm_db_scan_test;
266mod rpm_license_files;
267#[cfg(test)]
268mod rpm_license_files_test;
269mod rpm_mariner_manifest;
270#[cfg(test)]
271mod rpm_mariner_manifest_test;
272mod rpm_parser;
273#[cfg(test)]
274mod rpm_scan_test;
275mod rpm_specfile;
276#[cfg(test)]
277mod rpm_specfile_test;
278mod rpm_yumdb;
279mod ruby;
280#[cfg(test)]
281mod ruby_scan_test;
282#[cfg(test)]
283mod ruby_test;
284mod sbt;
285#[cfg(test)]
286mod sbt_test;
287#[cfg(test)]
288mod scan_test_utils;
289mod swift_manifest_json;
290#[cfg(test)]
291mod swift_manifest_json_test;
292mod swift_resolved;
293#[cfg(test)]
294mod swift_resolved_test;
295#[cfg(test)]
296mod swift_scan_test;
297mod swift_show_dependencies;
298#[cfg(test)]
299mod swift_show_dependencies_test;
300pub mod utils;
301mod uv_lock;
302#[cfg(test)]
303mod uv_lock_test;
304mod vcpkg;
305#[cfg(test)]
306mod vcpkg_scan_test;
307#[cfg(test)]
308mod vcpkg_test;
309pub(crate) mod windows_executable;
310#[cfg(test)]
311mod windows_executable_golden_test;
312mod yarn_lock;
313#[cfg(test)]
314mod yarn_lock_test;
315mod yarn_pnp;
316#[cfg(test)]
317mod yarn_pnp_test;
318
319use std::cell::RefCell;
320use std::panic::{AssertUnwindSafe, catch_unwind};
321use std::path::Path;
322use std::sync::Arc;
323
324use crate::license_detection::LicenseDetectionEngine;
325use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
326use crate::parsers::license_normalization::finalize_package_declared_license_references;
327use crate::parsers::utils::MAX_ITERATION_COUNT;
328
329thread_local! {
330    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
331    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
332    static PARSER_SCAN_ROOT_STACK: RefCell<Vec<Option<std::path::PathBuf>>> = const { RefCell::new(Vec::new()) };
333}
334
335#[derive(Debug, Default)]
336pub struct ParsePackagesResult {
337    pub packages: Vec<PackageData>,
338    pub scan_diagnostics: Vec<ScanDiagnostic>,
339    pub scan_errors: Vec<String>,
340}
341
342fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
343    if let Some(message) = payload.downcast_ref::<&str>() {
344        (*message).to_string()
345    } else if let Some(message) = payload.downcast_ref::<String>() {
346        message.clone()
347    } else {
348        "unknown panic payload".to_string()
349    }
350}
351
352pub(crate) fn capture_parser_diagnostics<F>(
353    extract: F,
354    handler_name: &str,
355    path: &Path,
356    license_engine: Option<Arc<LicenseDetectionEngine>>,
357) -> ParsePackagesResult
358where
359    F: FnOnce() -> Vec<PackageData>,
360{
361    PARSER_DIAGNOSTIC_STACK.with(|stack| {
362        stack.borrow_mut().push(Vec::new());
363    });
364    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
365        stack.borrow_mut().push(license_engine);
366    });
367
368    let extract_result = catch_unwind(AssertUnwindSafe(|| {
369        extract()
370            .into_iter()
371            .map(|mut package| {
372                finalize_package_declared_license_references(&mut package);
373                package
374            })
375            .take(MAX_ITERATION_COUNT)
376            .collect::<Vec<_>>()
377    }));
378    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
379        stack.borrow_mut().pop();
380    });
381    let mut scan_diagnostics =
382        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
383
384    match extract_result {
385        Ok(packages) => ParsePackagesResult {
386            packages,
387            scan_errors: scan_diagnostics
388                .iter()
389                .map(|diagnostic| diagnostic.message.clone())
390                .collect(),
391            scan_diagnostics,
392        },
393        Err(payload) => {
394            scan_diagnostics.push(ScanDiagnostic::error(format!(
395                "{} panicked while parsing {}: {}",
396                handler_name,
397                path.display(),
398                panic_payload_to_string(payload.as_ref())
399            )));
400            ParsePackagesResult {
401                packages: Vec::new(),
402                scan_errors: scan_diagnostics
403                    .iter()
404                    .map(|diagnostic| diagnostic.message.clone())
405                    .collect(),
406                scan_diagnostics,
407            }
408        }
409    }
410}
411
412pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
413    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
414}
415
416pub(crate) fn active_parser_scan_root() -> Option<std::path::PathBuf> {
417    PARSER_SCAN_ROOT_STACK.with(|stack| stack.borrow().last().cloned().flatten())
418}
419
420pub(crate) fn with_parser_scan_root<T>(scan_root: Option<&Path>, f: impl FnOnce() -> T) -> T {
421    PARSER_SCAN_ROOT_STACK.with(|stack| {
422        stack.borrow_mut().push(scan_root.map(Path::to_path_buf));
423    });
424    let result = f();
425    PARSER_SCAN_ROOT_STACK.with(|stack| {
426        stack.borrow_mut().pop();
427    });
428    result
429}
430
431pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
432    PARSER_DIAGNOSTIC_STACK.with(|stack| {
433        let mut stack = stack.borrow_mut();
434        let Some(active) = stack.last_mut() else {
435            return false;
436        };
437        active.push(ScanDiagnostic { severity, message });
438        true
439    })
440}
441
442#[macro_export]
443macro_rules! parser_warn {
444    ($($arg:tt)*) => {{
445        let message = format!($($arg)*);
446        if !$crate::parsers::record_parser_diagnostic(
447            message.clone(),
448            $crate::models::DiagnosticSeverity::Warning,
449        ) {
450            log::warn!("{message}");
451        }
452    }};
453}
454
455/// Package parser trait for extracting metadata from package manifest files.
456///
457/// Each parser implementation handles a specific package manager/ecosystem
458/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
459/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
460///
461/// # Implementation Guide
462///
463/// Implementors must provide:
464/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
465/// - `is_match()`: Returns true if the given file path matches this parser's expected format
466/// - `extract_packages()`: Parses the file and returns all extracted package metadata
467///
468/// # Error Handling
469///
470/// Parsers should handle errors gracefully by returning default/empty `PackageData`
471/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
472/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
473/// CI output and serialized scan results stay aligned.
474/// This allows the scan to continue processing other files even when individual
475/// files fail to parse.
476///
477/// # Example
478///
479/// ```no_run
480/// use provenant::models::{PackageData, PackageType};
481/// use provenant::parsers::PackageParser;
482/// use std::path::Path;
483///
484/// pub struct MyParser;
485///
486/// impl PackageParser for MyParser {
487///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
488///
489///     fn is_match(path: &Path) -> bool {
490///         path.file_name().is_some_and(|name| name == "package.json")
491///     }
492///
493///     fn extract_packages(path: &Path) -> Vec<PackageData> {
494///         vec![PackageData::default()]
495///     }
496/// }
497/// ```
498pub trait PackageParser {
499    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
500    const PACKAGE_TYPE: PackageType;
501
502    /// Extracts all packages from the given file path.
503    ///
504    /// Returns a vector of `PackageData` structures containing all extracted metadata
505    /// including name, version, dependencies, licenses, etc. Most parsers return a
506    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
507    /// can contain multiple packages in a single file.
508    ///
509    /// On parse errors, returns a vector with a default `PackageData` with minimal or
510    /// no fields populated.
511    fn extract_packages(path: &Path) -> Vec<PackageData>;
512
513    /// Checks if the given file path matches this parser's expected format.
514    ///
515    /// Returns true if the file should be handled by this parser based on filename,
516    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
517    fn is_match(path: &Path) -> bool;
518
519    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
520    /// or a default [`PackageData`] if the file contains no packages.
521    fn extract_first_package(path: &Path) -> PackageData {
522        Self::extract_packages(path)
523            .into_iter()
524            .map(|mut package| {
525                finalize_package_declared_license_references(&mut package);
526                package
527            })
528            .next()
529            .unwrap_or_default()
530    }
531
532    /// Returns documentation metadata for the file-format surfaces this parser handles.
533    ///
534    /// Used to auto-generate `docs/SUPPORTED_FORMATS.md`. Parsers that share a
535    /// documentation entry with another parser (e.g., AlpineApkParser and
536    /// AlpineInstalledParser) should return the entry from only one of them
537    /// and use the default empty implementation in the other.
538    fn metadata() -> Vec<metadata::ParserMetadata> {
539        Vec::new()
540    }
541}
542
543pub fn try_parse_rpm_archive_with_license_engine(
544    path: &Path,
545    license_engine: Option<Arc<LicenseDetectionEngine>>,
546) -> Option<ParsePackagesResult> {
547    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
548        return None;
549    }
550
551    if <RpmParser as PackageParser>::is_match(path) {
552        return Some(capture_parser_diagnostics(
553            || self::rpm_parser::extract_rpm_packages(path),
554            stringify!(RpmParser),
555            path,
556            license_engine,
557        ));
558    }
559
560    None
561}
562
563pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
564    try_parse_rpm_archive_with_license_engine(path, None)
565}
566
567#[cfg(feature = "golden-tests")]
568pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
569    self::compiled_binary::try_parse_compiled_bytes(bytes)
570}
571
572#[cfg(feature = "golden-tests")]
573pub fn try_parse_windows_executable_bytes(
574    path: &Path,
575    bytes: &[u8],
576) -> Option<ParsePackagesResult> {
577    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
578}
579
580pub fn path_looks_like_rpm_archive(path: &Path) -> bool {
581    self::rpm_parser::path_looks_like_rpm_archive(path)
582}
583
584/// Collects all registered parser and detection-surface metadata.
585///
586/// Used by the `generate-supported-formats` xtask to auto-generate
587/// `docs/SUPPORTED_FORMATS.md`.
588pub fn all_metadata() -> Vec<metadata::ParserMetadata> {
589    let mut entries = collect_parser_metadata();
590    entries.extend_from_slice(self::compiled_binary::COMPILED_BINARY_METADATA);
591    entries.extend_from_slice(self::windows_executable::WINDOWS_EXE_METADATA);
592    entries.extend_from_slice(self::misc::RECOGNIZER_METADATA);
593    entries.extend_from_slice(crate::utils::font::FONT_METADATA);
594    entries
595}
596
597pub use self::about::AboutFileParser;
598pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
599pub use self::android::{
600    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
601};
602#[cfg(feature = "golden-tests")]
603pub use self::android::{
604    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
605    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
606    proto_primitive, proto_xml_node,
607};
608pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
609pub use self::autotools::AutotoolsConfigureParser;
610pub use self::bazel::{BazelBuildParser, BazelModuleParser};
611pub use self::bitbake::BitbakeRecipeParser;
612pub use self::bower::BowerJsonParser;
613pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
614pub use self::bun_lock::BunLockParser;
615pub use self::bun_lockb::BunLockbParser;
616pub use self::cargo::CargoParser;
617#[cfg_attr(not(test), allow(unused_imports))]
618pub use self::cargo_lock::CargoLockParser;
619pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
620pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
621pub use self::citation::CitationCffParser;
622pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
623pub use self::composer::{ComposerJsonParser, ComposerLockParser};
624pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
625pub use self::conan_data::ConanDataParser;
626pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
627pub use self::conda_meta_json::CondaMetaJsonParser;
628pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
629pub use self::cpan_dist_ini::CpanDistIniParser;
630pub use self::cpan_makefile_pl::CpanMakefilePlParser;
631pub use self::cran::CranParser;
632pub use self::dart::{PubspecLockParser, PubspecYamlParser};
633pub use self::debian::{
634    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
635    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
636    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
637    DebianMd5sumInPackageParser, DebianOrigTarParser,
638};
639pub use self::deno::DenoParser;
640pub use self::deno_lock::DenoLockParser;
641pub use self::docker::DockerfileParser;
642pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
643pub use self::freebsd::FreebsdCompactManifestParser;
644pub use self::gitmodules::GitmodulesParser;
645pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
646pub use self::go_mod_graph::GoModGraphParser;
647pub use self::gradle::GradleParser;
648pub use self::gradle_lock::GradleLockfileParser;
649pub use self::gradle_module::GradleModuleParser;
650pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
651pub use self::haxe::HaxeParser;
652pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
653pub use self::hex_lock::HexLockParser;
654pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
655pub use self::maven::MavenParser;
656pub use self::meson::MesonParser;
657pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
658pub use self::misc::{
659    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
660    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
661    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
662    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
663    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
664    SharArchiveRecognizer, SquashfsRecognizer,
665};
666pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
667pub use self::npm::NpmParser;
668pub use self::npm_lock::NpmLockParser;
669pub use self::npm_workspace::NpmWorkspaceParser;
670pub use self::nuget::{
671    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
672    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
673    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
674};
675pub use self::opam::OpamParser;
676pub use self::os_release::OsReleaseParser;
677pub use self::pip_inspect_deplock::PipInspectDeplockParser;
678pub use self::pipfile_lock::PipfileLockParser;
679pub use self::pixi::{PixiLockParser, PixiTomlParser};
680pub use self::pnpm_lock::PnpmLockParser;
681pub use self::podfile::PodfileParser;
682pub use self::podfile_lock::PodfileLockParser;
683pub use self::podspec::PodspecParser;
684pub use self::podspec_json::PodspecJsonParser;
685pub use self::poetry_lock::PoetryLockParser;
686pub use self::publiccode::PubliccodeParser;
687pub use self::pylock_toml::PylockTomlParser;
688pub use self::python::PythonParser;
689pub use self::readme::ReadmeParser;
690pub use self::requirements_txt::RequirementsTxtParser;
691#[cfg(feature = "rpm-sqlite")]
692pub use self::rpm_db::RpmSqliteDatabaseParser;
693pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
694pub use self::rpm_license_files::RpmLicenseFilesParser;
695pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
696pub use self::rpm_parser::RpmParser;
697pub use self::rpm_specfile::RpmSpecfileParser;
698pub use self::rpm_yumdb::RpmYumdbParser;
699pub use self::ruby::{
700    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
701};
702pub use self::sbt::SbtParser;
703pub use self::swift_manifest_json::SwiftManifestJsonParser;
704pub use self::swift_resolved::SwiftPackageResolvedParser;
705pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
706pub use self::uv_lock::UvLockParser;
707pub use self::vcpkg::VcpkgManifestParser;
708pub use self::yarn_lock::YarnLockParser;
709pub use self::yarn_pnp::YarnPnpParser;
710
711/// Registers all parsers and recognizers, generating dispatch functions.
712///
713/// Parsers are tried first, then recognizers. This ordering is important because
714/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
715/// more specific parsers if checked first.
716macro_rules! register_package_handlers {
717    (
718        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
719        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
720    ) => {
721        pub fn try_parse_file_with_license_engine(
722            path: &Path,
723            license_engine: Option<Arc<LicenseDetectionEngine>>,
724        ) -> Option<ParsePackagesResult> {
725            $(
726                $(#[$parser_meta])*
727                if <$parser>::is_match(path) {
728                    return Some(capture_parser_diagnostics(
729                        || <$parser>::extract_packages(path),
730                        stringify!($parser),
731                        path,
732                        license_engine.clone(),
733                    ));
734                }
735            )*
736            $(
737                if <$recognizer>::is_match(path) {
738                    return Some(capture_parser_diagnostics(
739                        || <$recognizer>::extract_packages(path),
740                        stringify!($recognizer),
741                        path,
742                        license_engine.clone(),
743                    ));
744                }
745            )*
746            None
747        }
748
749        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
750            try_parse_file_with_license_engine(path, None)
751        }
752
753        // Used by the parser-golden maintenance tool in `xtask`.
754        // Scanner runtime dispatch goes through `try_parse_file()`.
755        #[allow(dead_code)]
756        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
757            match type_name {
758                $(
759                    $(#[$parser_meta])*
760                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
761                )*
762                $(
763                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
764                )*
765                _ => None
766            }
767        }
768
769        // Used by the parser-golden maintenance tool in `xtask` and by
770        // `tests/scanner_integration.rs` to verify parser registration.
771        #[allow(dead_code)]
772        pub fn list_parser_types() -> Vec<&'static str> {
773            vec![
774                $(
775                    $(#[$parser_meta])*
776                    stringify!($parser),
777                )*
778                $(
779                    stringify!($recognizer),
780                )*
781            ]
782        }
783
784        /// Collects documentation metadata from all registered parsers and recognizers.
785        pub fn collect_parser_metadata() -> Vec<metadata::ParserMetadata> {
786            let mut entries = Vec::new();
787            $(
788                $(#[$parser_meta])*
789                entries.extend(<$parser>::metadata());
790            )*
791            $(
792                entries.extend(<$recognizer>::metadata());
793            )*
794            entries
795        }
796    };
797}
798
799#[cfg(test)]
800mod tests {
801    use std::collections::HashMap;
802
803    use super::{active_parser_license_engine, capture_parser_diagnostics};
804    use crate::license_detection::LicenseDetectionEngine;
805    use crate::models::PackageData;
806    use crate::parsers::license_normalization::{
807        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
808    };
809    use std::path::Path;
810    use std::sync::Arc;
811
812    #[test]
813    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
814        let engine =
815            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
816
817        let result = capture_parser_diagnostics(
818            || {
819                assert!(active_parser_license_engine().is_some());
820                vec![PackageData::default()]
821            },
822            "TestParser",
823            Path::new("testdata/package.json"),
824            Some(engine),
825        );
826
827        assert_eq!(result.packages.len(), 1);
828        assert!(active_parser_license_engine().is_none());
829    }
830
831    #[test]
832    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
833        let engine =
834            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
835        clear_last_parser_license_engine_ptr();
836
837        let result = capture_parser_diagnostics(
838            || {
839                vec![PackageData {
840                    declared_license_expression: Some("mit".to_string()),
841                    declared_license_expression_spdx: Some("MIT".to_string()),
842                    extracted_license_statement: Some("MIT".to_string()),
843                    extra_data: Some(HashMap::from([(
844                        "license_file".to_string(),
845                        serde_json::Value::String("LICENSE".to_string()),
846                    )])),
847                    ..Default::default()
848                }]
849            },
850            "TestParser",
851            Path::new("testdata/package.json"),
852            Some(Arc::clone(&engine)),
853        );
854
855        assert_eq!(result.packages.len(), 1);
856        assert_eq!(
857            last_parser_license_engine_ptr(),
858            Some(Arc::as_ptr(&engine) as usize)
859        );
860        assert_eq!(
861            result.packages[0].license_detections[0].matches[0]
862                .referenced_filenames
863                .as_ref(),
864            Some(&vec!["LICENSE".to_string()])
865        );
866        assert!(active_parser_license_engine().is_none());
867    }
868}
869
870register_package_handlers! {
871    parsers: [
872        AboutFileParser,
873        AndroidAabParser,
874        AndroidApkParser,
875        AndroidManifestParser,
876        AndroidSoongMetadataParser,
877        AlpineApkParser,
878        AlpineApkbuildParser,
879        AlpineInstalledParser,
880        ArchPkginfoParser,
881        ArchSrcinfoParser,
882        AutotoolsConfigureParser,
883        BazelBuildParser,
884        BazelModuleParser,
885        BitbakeRecipeParser,
886        BowerJsonParser,
887        BunLockParser,
888        BunLockbParser,
889        BuckBuildParser,
890        BuckMetadataBzlParser,
891        CargoLockParser,
892        CargoParser,
893        CarthageCartfileParser,
894        CarthageCartfileResolvedParser,
895        ChefMetadataJsonParser,
896        ChefMetadataRbParser,
897        CitationCffParser,
898        ClojureDepsEdnParser,
899        ClojureProjectCljParser,
900        ComposerJsonParser,
901        ComposerLockParser,
902        ConanDataParser,
903        ConanFilePyParser,
904        ConanfileTxtParser,
905        ConanLockParser,
906        CondaEnvironmentYmlParser,
907        CondaMetaJsonParser,
908        CondaMetaYamlParser,
909        CpanDistIniParser,
910        CpanMakefilePlParser,
911        CpanManifestParser,
912        CpanMetaJsonParser,
913        CpanMetaYmlParser,
914        CranParser,
915        DebianControlInExtractedDebParser,
916        DebianControlParser,
917        DebianCopyrightParser,
918        DebianDebianTarParser,
919        DebianDebParser,
920        DebianDistrolessInstalledParser,
921        DebianDscParser,
922        DebianInstalledListParser,
923        DebianInstalledMd5sumsParser,
924        DebianInstalledParser,
925        DebianMd5sumInPackageParser,
926        DebianOrigTarParser,
927        DenoParser,
928        DenoLockParser,
929        DockerfileParser,
930        ErlangAppSrcParser,
931        RebarConfigParser,
932        RebarLockParser,
933        FreebsdCompactManifestParser,
934        GemArchiveParser,
935        GemfileLockParser,
936        GemfileParser,
937        GemMetadataExtractedParser,
938        GemspecParser,
939        GitmodulesParser,
940        GodepsParser,
941        GoModParser,
942        GoModGraphParser,
943        GoSumParser,
944        GoWorkParser,
945        GradleLockfileParser,
946        GradleParser,
947        GradleModuleParser,
948        HackageCabalParser,
949        HackageCabalProjectParser,
950        HackageStackYamlParser,
951        HelmChartYamlParser,
952        HelmChartLockParser,
953        HaxeParser,
954        HexLockParser,
955        JuliaManifestTomlParser,
956        JuliaProjectTomlParser,
957        MavenParser,
958        MesonParser,
959        MicrosoftUpdateManifestParser,
960        NixDefaultParser,
961        NixFlakeLockParser,
962        NixFlakeParser,
963        NpmLockParser,
964        NpmParser,
965        NpmWorkspaceParser,
966        DotNetDepsJsonParser,
967        CentralPackageManagementPropsParser,
968        DirectoryBuildPropsParser,
969        NupkgParser,
970        NuspecParser,
971        PackageReferenceProjectParser,
972        OpamParser,
973        OsReleaseParser,
974        PackagesConfigParser,
975        PackagesLockParser,
976        ProjectJsonParser,
977        ProjectLockJsonParser,
978        PipfileLockParser,
979        PipInspectDeplockParser,
980        PixiTomlParser,
981        PixiLockParser,
982        PnpmLockParser,
983        PodfileLockParser,
984        PodfileParser,
985        PodspecJsonParser,
986        PodspecParser,
987        PoetryLockParser,
988        PubliccodeParser,
989        PylockTomlParser,
990        PubspecLockParser,
991        PubspecYamlParser,
992        PythonParser,
993        UvLockParser,
994        VcpkgManifestParser,
995        ReadmeParser,
996        RequirementsTxtParser,
997        RpmBdbDatabaseParser,
998        RpmLicenseFilesParser,
999        RpmMarinerManifestParser,
1000        RpmNdbDatabaseParser,
1001        RpmParser,
1002        RpmSpecfileParser,
1003        #[cfg(feature = "rpm-sqlite")]
1004        RpmSqliteDatabaseParser,
1005        RpmYumdbParser,
1006        SbtParser,
1007        SwiftManifestJsonParser,
1008        SwiftPackageResolvedParser,
1009        SwiftShowDependenciesParser,
1010        YarnLockParser,
1011        YarnPnpParser,
1012    ],
1013    recognizers: [
1014        AndroidLibraryRecognizer,
1015        AppleDmgRecognizer,
1016        Axis2MarRecognizer,
1017        Axis2ModuleXmlRecognizer,
1018        CabArchiveRecognizer,
1019        ChromeCrxRecognizer,
1020        InstallShieldRecognizer,
1021        IosIpaRecognizer,
1022        IsoImageRecognizer,
1023        IvyXmlRecognizer,
1024        JavaEarAppXmlRecognizer,
1025        JavaEarRecognizer,
1026        JavaJarRecognizer,
1027        JavaWarRecognizer,
1028        JavaWarWebXmlRecognizer,
1029        JBossSarRecognizer,
1030        JBossServiceXmlRecognizer,
1031        MeteorPackageRecognizer,
1032        MozillaXpiRecognizer,
1033        NsisRecognizer,
1034        SharArchiveRecognizer,
1035        SquashfsRecognizer,
1036    ],
1037}
1038
1039#[cfg(test)]
1040mod panic_isolation_tests {
1041    use super::*;
1042    use crate::models::DiagnosticSeverity;
1043
1044    #[test]
1045    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
1046        let path = Path::new("fixtures/panic-package.json");
1047        let result = capture_parser_diagnostics(
1048            || -> Vec<PackageData> { panic!("panic boom") },
1049            "PanicParser",
1050            path,
1051            None,
1052        );
1053
1054        assert!(result.packages.is_empty());
1055        assert_eq!(result.scan_errors.len(), 1);
1056        assert_eq!(result.scan_diagnostics.len(), 1);
1057        assert_eq!(
1058            result.scan_diagnostics[0].severity,
1059            DiagnosticSeverity::Error
1060        );
1061        assert!(result.scan_errors[0].contains("PanicParser"));
1062        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1063        assert!(result.scan_errors[0].contains("panic boom"));
1064    }
1065
1066    #[test]
1067    fn capture_parser_diagnostics_recovers_after_panic() {
1068        let panic_path = Path::new("fixtures/panic-package.json");
1069        let _ = capture_parser_diagnostics(
1070            || -> Vec<PackageData> { panic!("panic boom") },
1071            "PanicParser",
1072            panic_path,
1073            None,
1074        );
1075
1076        let ok_path = Path::new("fixtures/recovered-package.json");
1077        let result = capture_parser_diagnostics(
1078            || {
1079                crate::parser_warn!("recoverable parser warning");
1080                vec![PackageData {
1081                    package_type: Some(PackageType::Npm),
1082                    ..Default::default()
1083                }]
1084            },
1085            "RecoveringParser",
1086            ok_path,
1087            None,
1088        );
1089
1090        assert_eq!(result.packages.len(), 1);
1091        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1092        assert_eq!(result.scan_diagnostics.len(), 1);
1093        assert_eq!(
1094            result.scan_diagnostics[0].severity,
1095            DiagnosticSeverity::Warning
1096        );
1097    }
1098}