Skip to main content

provenant/parsers/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod about;
5#[cfg(test)]
6mod about_scan_test;
7#[cfg(test)]
8mod about_test;
9mod alpine;
10#[cfg(test)]
11mod alpine_scan_test;
12mod android;
13#[cfg(test)]
14mod android_test;
15mod arch;
16#[cfg(test)]
17mod arch_scan_test;
18#[cfg(test)]
19mod arch_test;
20mod autotools;
21#[cfg(test)]
22mod autotools_test;
23mod bazel;
24#[cfg(test)]
25mod bazel_module_test;
26#[cfg(test)]
27mod bazel_test;
28mod bitbake;
29#[cfg(test)]
30mod bitbake_scan_test;
31#[cfg(test)]
32mod bitbake_test;
33mod bower;
34#[cfg(test)]
35mod bower_scan_test;
36#[cfg(test)]
37mod bower_test;
38mod buck;
39#[cfg(test)]
40mod buck_test;
41mod bun_lock;
42#[cfg(test)]
43mod bun_lock_test;
44mod bun_lockb;
45#[cfg(test)]
46mod bun_lockb_test;
47mod cargo;
48mod cargo_lock;
49#[cfg(test)]
50mod cargo_lock_test;
51#[cfg(test)]
52mod cargo_scan_test;
53#[cfg(test)]
54mod cargo_test;
55mod carthage;
56#[cfg(test)]
57mod carthage_scan_test;
58#[cfg(test)]
59mod carthage_test;
60mod chef;
61#[cfg(test)]
62mod chef_scan_test;
63#[cfg(test)]
64mod chef_test;
65mod citation;
66#[cfg(test)]
67mod citation_test;
68mod clojure;
69#[cfg(test)]
70mod clojure_test;
71#[cfg(test)]
72mod cocoapods_scan_test;
73pub(crate) mod compiled_binary;
74mod composer;
75#[cfg(test)]
76mod composer_scan_test;
77#[cfg(test)]
78mod composer_test;
79mod conan;
80mod conan_data;
81#[cfg(test)]
82mod conan_data_test;
83#[cfg(test)]
84mod conan_scan_test;
85#[cfg(test)]
86mod conan_test;
87mod conda;
88mod conda_meta_json;
89#[cfg(test)]
90mod conda_meta_json_test;
91#[cfg(test)]
92mod conda_scan_test;
93#[cfg(test)]
94mod conda_test;
95mod cpan;
96mod cpan_dist_ini;
97#[cfg(test)]
98mod cpan_dist_ini_test;
99mod cpan_makefile_pl;
100#[cfg(test)]
101mod cpan_makefile_pl_test;
102#[cfg(test)]
103mod cpan_scan_test;
104#[cfg(test)]
105mod cpan_test;
106mod cran;
107#[cfg(test)]
108mod cran_scan_test;
109#[cfg(test)]
110mod cran_test;
111mod dart;
112#[cfg(test)]
113mod dart_scan_test;
114#[cfg(test)]
115mod dart_test;
116mod debian;
117mod deno;
118mod deno_lock;
119#[cfg(test)]
120mod deno_lock_test;
121#[cfg(test)]
122mod deno_scan_test;
123#[cfg(test)]
124mod deno_test;
125mod docker;
126#[cfg(test)]
127mod docker_scan_test;
128#[cfg(test)]
129mod docker_test;
130mod erlang_otp;
131#[cfg(test)]
132mod erlang_otp_scan_test;
133#[cfg(test)]
134mod erlang_otp_test;
135mod freebsd;
136#[cfg(test)]
137mod freebsd_scan_test;
138#[cfg(test)]
139mod freebsd_test;
140mod gitmodules;
141#[cfg(test)]
142mod gitmodules_scan_test;
143mod go;
144mod go_mod_graph;
145#[cfg(test)]
146mod go_scan_test;
147#[cfg(test)]
148mod go_test;
149#[cfg(test)]
150mod go_work_test;
151#[cfg(feature = "golden-tests")]
152pub mod golden_test_utils;
153mod gradle;
154mod gradle_lock;
155#[cfg(test)]
156mod gradle_lock_test;
157mod gradle_module;
158#[cfg(test)]
159mod gradle_module_scan_test;
160#[cfg(test)]
161mod gradle_module_test;
162#[cfg(test)]
163mod gradle_scan_test;
164mod hackage;
165#[cfg(test)]
166mod hackage_scan_test;
167#[cfg(test)]
168mod hackage_test;
169mod haxe;
170#[cfg(test)]
171mod haxe_scan_test;
172#[cfg(test)]
173mod haxe_test;
174mod helm;
175#[cfg(test)]
176mod helm_scan_test;
177#[cfg(test)]
178mod helm_test;
179mod hex_lock;
180#[cfg(test)]
181mod hex_lock_test;
182mod julia;
183#[cfg(test)]
184mod julia_test;
185pub(crate) mod license_normalization;
186mod maven;
187mod meson;
188#[cfg(test)]
189mod meson_scan_test;
190#[cfg(test)]
191mod meson_test;
192pub mod metadata;
193mod microsoft_update_manifest;
194#[cfg(test)]
195mod microsoft_update_manifest_test;
196mod misc;
197#[cfg(test)]
198mod misc_test;
199mod nix;
200#[cfg(test)]
201mod nix_scan_test;
202#[cfg(test)]
203mod nix_test;
204mod npm;
205mod npm_lock;
206#[cfg(test)]
207mod npm_lock_test;
208#[cfg(test)]
209mod npm_scan_test;
210#[cfg(test)]
211mod npm_test;
212mod npm_workspace;
213#[cfg(test)]
214mod npm_workspace_test;
215mod nuget;
216mod opam;
217#[cfg(test)]
218mod opam_scan_test;
219mod os_release;
220#[cfg(test)]
221mod os_release_test;
222mod pep508;
223mod pip_inspect_deplock;
224#[cfg(test)]
225mod pip_inspect_deplock_test;
226mod pipfile_lock;
227#[cfg(test)]
228mod pipfile_lock_test;
229mod pixi;
230#[cfg(test)]
231mod pixi_scan_test;
232#[cfg(test)]
233mod pixi_test;
234mod pnpm_lock;
235#[cfg(test)]
236mod pnpm_lock_test;
237mod podfile;
238mod podfile_lock;
239#[cfg(test)]
240mod podfile_lock_test;
241mod podspec;
242mod podspec_json;
243#[cfg(test)]
244mod podspec_json_test;
245mod poetry_lock;
246#[cfg(test)]
247mod poetry_lock_test;
248mod publiccode;
249#[cfg(test)]
250mod publiccode_test;
251mod pylock_toml;
252#[cfg(test)]
253mod pylock_toml_test;
254mod python;
255mod readme;
256#[cfg(test)]
257mod readme_test;
258mod requirements_txt;
259#[cfg(test)]
260mod requirements_txt_test;
261pub(crate) mod rfc822;
262mod rpm_db;
263mod rpm_db_native;
264#[cfg(test)]
265mod rpm_db_scan_test;
266mod rpm_license_files;
267#[cfg(test)]
268mod rpm_license_files_test;
269mod rpm_mariner_manifest;
270#[cfg(test)]
271mod rpm_mariner_manifest_test;
272mod rpm_parser;
273#[cfg(test)]
274mod rpm_scan_test;
275mod rpm_specfile;
276#[cfg(test)]
277mod rpm_specfile_test;
278mod rpm_yumdb;
279mod ruby;
280#[cfg(test)]
281mod ruby_scan_test;
282#[cfg(test)]
283mod ruby_test;
284mod sbt;
285#[cfg(test)]
286mod sbt_test;
287#[cfg(test)]
288mod scan_test_utils;
289mod swift_manifest_json;
290#[cfg(test)]
291mod swift_manifest_json_test;
292mod swift_resolved;
293#[cfg(test)]
294mod swift_resolved_test;
295#[cfg(test)]
296mod swift_scan_test;
297mod swift_show_dependencies;
298#[cfg(test)]
299mod swift_show_dependencies_test;
300pub mod utils;
301mod uv_lock;
302#[cfg(test)]
303mod uv_lock_test;
304mod vcpkg;
305#[cfg(test)]
306mod vcpkg_scan_test;
307#[cfg(test)]
308mod vcpkg_test;
309pub(crate) mod windows_executable;
310#[cfg(test)]
311mod windows_executable_golden_test;
312mod yarn_lock;
313#[cfg(test)]
314mod yarn_lock_test;
315mod yarn_pnp;
316#[cfg(test)]
317mod yarn_pnp_test;
318
319use std::cell::RefCell;
320use std::panic::{AssertUnwindSafe, catch_unwind};
321use std::path::Path;
322use std::sync::Arc;
323
324use crate::license_detection::LicenseDetectionEngine;
325use crate::models::{DiagnosticSeverity, PackageData, PackageType, ScanDiagnostic};
326use crate::parsers::license_normalization::finalize_package_declared_license_references;
327use crate::parsers::utils::MAX_ITERATION_COUNT;
328
329thread_local! {
330    static PARSER_DIAGNOSTIC_STACK: RefCell<Vec<Vec<ScanDiagnostic>>> = const { RefCell::new(Vec::new()) };
331    static PARSER_LICENSE_ENGINE_STACK: RefCell<Vec<Option<Arc<LicenseDetectionEngine>>>> = const { RefCell::new(Vec::new()) };
332    static PARSER_SCAN_ROOT_STACK: RefCell<Vec<Option<std::path::PathBuf>>> = const { RefCell::new(Vec::new()) };
333}
334
335#[derive(Debug, Default)]
336pub struct ParsePackagesResult {
337    pub packages: Vec<PackageData>,
338    pub scan_diagnostics: Vec<ScanDiagnostic>,
339    pub scan_errors: Vec<String>,
340}
341
342fn panic_payload_to_string(payload: &(dyn std::any::Any + Send)) -> String {
343    if let Some(message) = payload.downcast_ref::<&str>() {
344        (*message).to_string()
345    } else if let Some(message) = payload.downcast_ref::<String>() {
346        message.clone()
347    } else {
348        "unknown panic payload".to_string()
349    }
350}
351
352pub(crate) fn capture_parser_diagnostics<F>(
353    extract: F,
354    handler_name: &str,
355    path: &Path,
356    license_engine: Option<Arc<LicenseDetectionEngine>>,
357) -> ParsePackagesResult
358where
359    F: FnOnce() -> Vec<PackageData>,
360{
361    PARSER_DIAGNOSTIC_STACK.with(|stack| {
362        stack.borrow_mut().push(Vec::new());
363    });
364    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
365        stack.borrow_mut().push(license_engine);
366    });
367
368    let extract_result = catch_unwind(AssertUnwindSafe(|| {
369        extract()
370            .into_iter()
371            .map(|mut package| {
372                finalize_package_declared_license_references(&mut package);
373                package
374            })
375            .take(MAX_ITERATION_COUNT)
376            .collect::<Vec<_>>()
377    }));
378    PARSER_LICENSE_ENGINE_STACK.with(|stack| {
379        stack.borrow_mut().pop();
380    });
381    let mut scan_diagnostics =
382        PARSER_DIAGNOSTIC_STACK.with(|stack| stack.borrow_mut().pop().unwrap_or_default());
383
384    match extract_result {
385        Ok(packages) => ParsePackagesResult {
386            packages,
387            scan_errors: scan_diagnostics
388                .iter()
389                .map(|diagnostic| diagnostic.message.clone())
390                .collect(),
391            scan_diagnostics,
392        },
393        Err(payload) => {
394            scan_diagnostics.push(ScanDiagnostic::error(format!(
395                "{} panicked while parsing {}: {}",
396                handler_name,
397                path.display(),
398                panic_payload_to_string(payload.as_ref())
399            )));
400            ParsePackagesResult {
401                packages: Vec::new(),
402                scan_errors: scan_diagnostics
403                    .iter()
404                    .map(|diagnostic| diagnostic.message.clone())
405                    .collect(),
406                scan_diagnostics,
407            }
408        }
409    }
410}
411
412pub(crate) fn active_parser_license_engine() -> Option<Arc<LicenseDetectionEngine>> {
413    PARSER_LICENSE_ENGINE_STACK.with(|stack| stack.borrow().last().cloned().flatten())
414}
415
416pub(crate) fn active_parser_scan_root() -> Option<std::path::PathBuf> {
417    PARSER_SCAN_ROOT_STACK.with(|stack| stack.borrow().last().cloned().flatten())
418}
419
420pub(crate) fn with_parser_scan_root<T>(scan_root: Option<&Path>, f: impl FnOnce() -> T) -> T {
421    PARSER_SCAN_ROOT_STACK.with(|stack| {
422        stack.borrow_mut().push(scan_root.map(Path::to_path_buf));
423    });
424    let result = f();
425    PARSER_SCAN_ROOT_STACK.with(|stack| {
426        stack.borrow_mut().pop();
427    });
428    result
429}
430
431pub(crate) fn record_parser_diagnostic(message: String, severity: DiagnosticSeverity) -> bool {
432    PARSER_DIAGNOSTIC_STACK.with(|stack| {
433        let mut stack = stack.borrow_mut();
434        let Some(active) = stack.last_mut() else {
435            return false;
436        };
437        active.push(ScanDiagnostic {
438            severity,
439            message,
440            is_timeout: false,
441        });
442        true
443    })
444}
445
446#[macro_export]
447macro_rules! parser_warn {
448    ($($arg:tt)*) => {{
449        let message = format!($($arg)*);
450        if !$crate::parsers::record_parser_diagnostic(
451            message.clone(),
452            $crate::models::DiagnosticSeverity::Warning,
453        ) {
454            log::warn!("{message}");
455        }
456    }};
457}
458
459/// Package parser trait for extracting metadata from package manifest files.
460///
461/// Each parser implementation handles a specific package manager/ecosystem
462/// (npm, Maven, Python, Cargo, etc.) and extracts standardized metadata into
463/// `PackageData` structures compatible with ScanCode Toolkit JSON output format.
464///
465/// # Implementation Guide
466///
467/// Implementors must provide:
468/// - `PACKAGE_TYPE`: Package URL (purl) type identifier (e.g., "npm", "pypi", "maven")
469/// - `is_match()`: Returns true if the given file path matches this parser's expected format
470/// - `extract_packages()`: Parses the file and returns all extracted package metadata
471///
472/// # Error Handling
473///
474/// Parsers should handle errors gracefully by returning default/empty `PackageData`
475/// and logging warnings with [`crate::parser_warn!`] rather than panicking. Scanner
476/// dispatch captures those warnings and attaches them to `FileInfo.scan_errors` so
477/// CI output and serialized scan results stay aligned.
478/// This allows the scan to continue processing other files even when individual
479/// files fail to parse.
480///
481/// # Example
482///
483/// ```no_run
484/// use provenant::models::{PackageData, PackageType};
485/// use provenant::parsers::PackageParser;
486/// use std::path::Path;
487///
488/// pub struct MyParser;
489///
490/// impl PackageParser for MyParser {
491///     const PACKAGE_TYPE: PackageType = PackageType::Npm;
492///
493///     fn is_match(path: &Path) -> bool {
494///         path.file_name().is_some_and(|name| name == "package.json")
495///     }
496///
497///     fn extract_packages(path: &Path) -> Vec<PackageData> {
498///         vec![PackageData::default()]
499///     }
500/// }
501/// ```
502pub trait PackageParser {
503    /// Package URL type identifier for this parser (e.g., PackageType::Npm, PackageType::Pypi).
504    const PACKAGE_TYPE: PackageType;
505
506    /// Extracts all packages from the given file path.
507    ///
508    /// Returns a vector of `PackageData` structures containing all extracted metadata
509    /// including name, version, dependencies, licenses, etc. Most parsers return a
510    /// single-element vector, but some (e.g., Bazel BUILD, Buck BUCK, Debian control)
511    /// can contain multiple packages in a single file.
512    ///
513    /// On parse errors, returns a vector with a default `PackageData` with minimal or
514    /// no fields populated.
515    fn extract_packages(path: &Path) -> Vec<PackageData>;
516
517    /// Checks if the given file path matches this parser's expected format.
518    ///
519    /// Returns true if the file should be handled by this parser based on filename,
520    /// extension, or path patterns. Used by the scanner to route files to appropriate parsers.
521    fn is_match(path: &Path) -> bool;
522
523    /// Returns the first package from [`extract_packages()`](Self::extract_packages),
524    /// or a default [`PackageData`] if the file contains no packages.
525    fn extract_first_package(path: &Path) -> PackageData {
526        Self::extract_packages(path)
527            .into_iter()
528            .map(|mut package| {
529                finalize_package_declared_license_references(&mut package);
530                package
531            })
532            .next()
533            .unwrap_or_default()
534    }
535
536    /// Returns documentation metadata for the file-format surfaces this parser handles.
537    ///
538    /// Used to auto-generate `docs/SUPPORTED_FORMATS.md`. Parsers that share a
539    /// documentation entry with another parser (e.g., AlpineApkParser and
540    /// AlpineInstalledParser) should return the entry from only one of them
541    /// and use the default empty implementation in the other.
542    fn metadata() -> Vec<metadata::ParserMetadata> {
543        Vec::new()
544    }
545}
546
547pub fn try_parse_rpm_archive_with_license_engine(
548    path: &Path,
549    license_engine: Option<Arc<LicenseDetectionEngine>>,
550) -> Option<ParsePackagesResult> {
551    if !self::rpm_parser::path_looks_like_rpm_archive(path) {
552        return None;
553    }
554
555    if <RpmParser as PackageParser>::is_match(path) {
556        return Some(capture_parser_diagnostics(
557            || self::rpm_parser::extract_rpm_packages(path),
558            stringify!(RpmParser),
559            path,
560            license_engine,
561        ));
562    }
563
564    None
565}
566
567pub fn try_parse_rpm_archive(path: &Path) -> Option<ParsePackagesResult> {
568    try_parse_rpm_archive_with_license_engine(path, None)
569}
570
571#[cfg(feature = "golden-tests")]
572pub fn try_parse_compiled_bytes(bytes: &[u8]) -> Option<ParsePackagesResult> {
573    self::compiled_binary::try_parse_compiled_bytes(bytes)
574}
575
576#[cfg(feature = "golden-tests")]
577pub fn try_parse_windows_executable_bytes(
578    path: &Path,
579    bytes: &[u8],
580) -> Option<ParsePackagesResult> {
581    self::windows_executable::try_parse_windows_executable_bytes(path, bytes)
582}
583
584pub fn path_looks_like_rpm_archive(path: &Path) -> bool {
585    self::rpm_parser::path_looks_like_rpm_archive(path)
586}
587
588/// Collects all registered parser and detection-surface metadata.
589///
590/// Used by the `generate-supported-formats` xtask to auto-generate
591/// `docs/SUPPORTED_FORMATS.md`.
592pub fn all_metadata() -> Vec<metadata::ParserMetadata> {
593    let mut entries = collect_parser_metadata();
594    entries.extend_from_slice(self::compiled_binary::COMPILED_BINARY_METADATA);
595    entries.extend_from_slice(self::windows_executable::WINDOWS_EXE_METADATA);
596    entries.extend_from_slice(self::misc::RECOGNIZER_METADATA);
597    entries.extend_from_slice(crate::utils::font::FONT_METADATA);
598    entries
599}
600
601pub use self::about::AboutFileParser;
602pub use self::alpine::{AlpineApkParser, AlpineApkbuildParser, AlpineInstalledParser};
603pub use self::android::{
604    AndroidAabParser, AndroidApkParser, AndroidManifestParser, AndroidSoongMetadataParser,
605};
606#[cfg(feature = "golden-tests")]
607pub use self::android::{
608    ProtoItem, ProtoPrimitive, ProtoRawStringValue, ProtoSourcePosition, ProtoStringValue,
609    ProtoXmlAttribute, ProtoXmlElement, ProtoXmlNamespace, ProtoXmlNode, proto_item,
610    proto_primitive, proto_xml_node,
611};
612pub use self::arch::{ArchPkginfoParser, ArchSrcinfoParser};
613pub use self::autotools::AutotoolsConfigureParser;
614pub use self::bazel::{BazelBuildParser, BazelModuleParser};
615pub use self::bitbake::BitbakeRecipeParser;
616pub use self::bower::BowerJsonParser;
617pub use self::buck::{BuckBuildParser, BuckMetadataBzlParser};
618pub use self::bun_lock::BunLockParser;
619pub use self::bun_lockb::BunLockbParser;
620pub use self::cargo::CargoParser;
621#[cfg_attr(not(test), allow(unused_imports))]
622pub use self::cargo_lock::CargoLockParser;
623pub use self::carthage::{CarthageCartfileParser, CarthageCartfileResolvedParser};
624pub use self::chef::{ChefMetadataJsonParser, ChefMetadataRbParser};
625pub use self::citation::CitationCffParser;
626pub use self::clojure::{ClojureDepsEdnParser, ClojureProjectCljParser};
627pub use self::composer::{ComposerJsonParser, ComposerLockParser};
628pub use self::conan::{ConanFilePyParser, ConanLockParser, ConanfileTxtParser};
629pub use self::conan_data::ConanDataParser;
630pub use self::conda::{CondaEnvironmentYmlParser, CondaMetaYamlParser};
631pub use self::conda_meta_json::CondaMetaJsonParser;
632pub use self::cpan::{CpanManifestParser, CpanMetaJsonParser, CpanMetaYmlParser};
633pub use self::cpan_dist_ini::CpanDistIniParser;
634pub use self::cpan_makefile_pl::CpanMakefilePlParser;
635pub use self::cran::CranParser;
636pub use self::dart::{PubspecLockParser, PubspecYamlParser};
637pub use self::debian::{
638    DebianControlInExtractedDebParser, DebianControlParser, DebianCopyrightParser, DebianDebParser,
639    DebianDebianTarParser, DebianDistrolessInstalledParser, DebianDscParser,
640    DebianInstalledListParser, DebianInstalledMd5sumsParser, DebianInstalledParser,
641    DebianMd5sumInPackageParser, DebianOrigTarParser,
642};
643pub use self::deno::DenoParser;
644pub use self::deno_lock::DenoLockParser;
645pub use self::docker::DockerfileParser;
646pub use self::erlang_otp::{ErlangAppSrcParser, RebarConfigParser, RebarLockParser};
647pub use self::freebsd::FreebsdCompactManifestParser;
648pub use self::gitmodules::GitmodulesParser;
649pub use self::go::{GoModParser, GoSumParser, GoWorkParser, GodepsParser};
650pub use self::go_mod_graph::GoModGraphParser;
651pub use self::gradle::GradleParser;
652pub use self::gradle_lock::GradleLockfileParser;
653pub use self::gradle_module::GradleModuleParser;
654pub use self::hackage::{HackageCabalParser, HackageCabalProjectParser, HackageStackYamlParser};
655pub use self::haxe::HaxeParser;
656pub use self::helm::{HelmChartLockParser, HelmChartYamlParser};
657pub use self::hex_lock::HexLockParser;
658pub use self::julia::{JuliaManifestTomlParser, JuliaProjectTomlParser};
659pub use self::maven::MavenParser;
660pub use self::meson::MesonParser;
661pub use self::microsoft_update_manifest::MicrosoftUpdateManifestParser;
662pub use self::misc::{
663    AndroidLibraryRecognizer, AppleDmgRecognizer, Axis2MarRecognizer, Axis2ModuleXmlRecognizer,
664    CabArchiveRecognizer, ChromeCrxRecognizer, InstallShieldRecognizer, IosIpaRecognizer,
665    IsoImageRecognizer, IvyXmlRecognizer, JBossSarRecognizer, JBossServiceXmlRecognizer,
666    JavaEarAppXmlRecognizer, JavaEarRecognizer, JavaJarRecognizer, JavaWarRecognizer,
667    JavaWarWebXmlRecognizer, MeteorPackageRecognizer, MozillaXpiRecognizer, NsisRecognizer,
668    SharArchiveRecognizer, SquashfsRecognizer,
669};
670pub use self::nix::{NixDefaultParser, NixFlakeLockParser, NixFlakeParser};
671pub use self::npm::NpmParser;
672pub use self::npm_lock::NpmLockParser;
673pub use self::npm_workspace::NpmWorkspaceParser;
674pub use self::nuget::{
675    CentralPackageManagementPropsParser, DirectoryBuildPropsParser, DotNetDepsJsonParser,
676    NupkgParser, NuspecParser, PackageReferenceProjectParser, PackagesConfigParser,
677    PackagesLockParser, ProjectJsonParser, ProjectLockJsonParser,
678};
679pub use self::opam::OpamParser;
680pub use self::os_release::OsReleaseParser;
681pub use self::pip_inspect_deplock::PipInspectDeplockParser;
682pub use self::pipfile_lock::PipfileLockParser;
683pub use self::pixi::{PixiLockParser, PixiTomlParser};
684pub use self::pnpm_lock::PnpmLockParser;
685pub use self::podfile::PodfileParser;
686pub use self::podfile_lock::PodfileLockParser;
687pub use self::podspec::PodspecParser;
688pub use self::podspec_json::PodspecJsonParser;
689pub use self::poetry_lock::PoetryLockParser;
690pub use self::publiccode::PubliccodeParser;
691pub use self::pylock_toml::PylockTomlParser;
692pub use self::python::PythonParser;
693pub use self::readme::ReadmeParser;
694pub use self::requirements_txt::RequirementsTxtParser;
695#[cfg(feature = "rpm-sqlite")]
696pub use self::rpm_db::RpmSqliteDatabaseParser;
697pub use self::rpm_db::{RpmBdbDatabaseParser, RpmNdbDatabaseParser};
698pub use self::rpm_license_files::RpmLicenseFilesParser;
699pub use self::rpm_mariner_manifest::RpmMarinerManifestParser;
700pub use self::rpm_parser::RpmParser;
701pub use self::rpm_specfile::RpmSpecfileParser;
702pub use self::rpm_yumdb::RpmYumdbParser;
703pub use self::ruby::{
704    GemArchiveParser, GemMetadataExtractedParser, GemfileLockParser, GemfileParser, GemspecParser,
705};
706pub use self::sbt::SbtParser;
707pub use self::swift_manifest_json::SwiftManifestJsonParser;
708pub use self::swift_resolved::SwiftPackageResolvedParser;
709pub use self::swift_show_dependencies::SwiftShowDependenciesParser;
710pub use self::uv_lock::UvLockParser;
711pub use self::vcpkg::VcpkgManifestParser;
712pub use self::yarn_lock::YarnLockParser;
713pub use self::yarn_pnp::YarnPnpParser;
714
715/// Registers all parsers and recognizers, generating dispatch functions.
716///
717/// Parsers are tried first, then recognizers. This ordering is important because
718/// recognizers match broadly by file extension (e.g., `.jar`) and would shadow
719/// more specific parsers if checked first.
720macro_rules! register_package_handlers {
721    (
722        parsers: [$($(#[$parser_meta:meta])* $parser:ty),* $(,)?],
723        recognizers: [$($recognizer:ty),* $(,)?] $(,)?
724    ) => {
725        pub fn try_parse_file_with_license_engine(
726            path: &Path,
727            license_engine: Option<Arc<LicenseDetectionEngine>>,
728        ) -> Option<ParsePackagesResult> {
729            $(
730                $(#[$parser_meta])*
731                if <$parser>::is_match(path) {
732                    return Some(capture_parser_diagnostics(
733                        || <$parser>::extract_packages(path),
734                        stringify!($parser),
735                        path,
736                        license_engine.clone(),
737                    ));
738                }
739            )*
740            $(
741                if <$recognizer>::is_match(path) {
742                    return Some(capture_parser_diagnostics(
743                        || <$recognizer>::extract_packages(path),
744                        stringify!($recognizer),
745                        path,
746                        license_engine.clone(),
747                    ));
748                }
749            )*
750            None
751        }
752
753        pub fn try_parse_file(path: &Path) -> Option<ParsePackagesResult> {
754            try_parse_file_with_license_engine(path, None)
755        }
756
757        // Used by the parser-golden maintenance tool in `xtask`.
758        // Scanner runtime dispatch goes through `try_parse_file()`.
759        #[allow(dead_code)]
760        pub fn parse_by_type_name(type_name: &str, path: &Path) -> Option<PackageData> {
761            match type_name {
762                $(
763                    $(#[$parser_meta])*
764                    stringify!($parser) => Some(<$parser>::extract_first_package(path)),
765                )*
766                $(
767                    stringify!($recognizer) => Some(<$recognizer>::extract_first_package(path)),
768                )*
769                _ => None
770            }
771        }
772
773        // Used by the parser-golden maintenance tool in `xtask` and by
774        // `tests/scanner_integration.rs` to verify parser registration.
775        #[allow(dead_code)]
776        pub fn list_parser_types() -> Vec<&'static str> {
777            vec![
778                $(
779                    $(#[$parser_meta])*
780                    stringify!($parser),
781                )*
782                $(
783                    stringify!($recognizer),
784                )*
785            ]
786        }
787
788        /// Collects documentation metadata from all registered parsers and recognizers.
789        pub fn collect_parser_metadata() -> Vec<metadata::ParserMetadata> {
790            let mut entries = Vec::new();
791            $(
792                $(#[$parser_meta])*
793                entries.extend(<$parser>::metadata());
794            )*
795            $(
796                entries.extend(<$recognizer>::metadata());
797            )*
798            entries
799        }
800    };
801}
802
803#[cfg(test)]
804mod tests {
805    use std::collections::HashMap;
806
807    use super::{active_parser_license_engine, capture_parser_diagnostics};
808    use crate::license_detection::LicenseDetectionEngine;
809    use crate::models::PackageData;
810    use crate::parsers::license_normalization::{
811        clear_last_parser_license_engine_ptr, last_parser_license_engine_ptr,
812    };
813    use std::path::Path;
814    use std::sync::Arc;
815
816    #[test]
817    fn test_capture_parser_diagnostics_exposes_active_license_engine() {
818        let engine =
819            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
820
821        let result = capture_parser_diagnostics(
822            || {
823                assert!(active_parser_license_engine().is_some());
824                vec![PackageData::default()]
825            },
826            "TestParser",
827            Path::new("testdata/package.json"),
828            Some(engine),
829        );
830
831        assert_eq!(result.packages.len(), 1);
832        assert!(active_parser_license_engine().is_none());
833    }
834
835    #[test]
836    fn test_capture_parser_diagnostics_keeps_active_license_engine_for_finalization() {
837        let engine =
838            Arc::new(LicenseDetectionEngine::from_embedded().expect("embedded engine should load"));
839        clear_last_parser_license_engine_ptr();
840
841        let result = capture_parser_diagnostics(
842            || {
843                vec![PackageData {
844                    declared_license_expression: Some("mit".to_string()),
845                    declared_license_expression_spdx: Some("MIT".to_string()),
846                    extracted_license_statement: Some("MIT".to_string()),
847                    extra_data: Some(HashMap::from([(
848                        "license_file".to_string(),
849                        serde_json::Value::String("LICENSE".to_string()),
850                    )])),
851                    ..Default::default()
852                }]
853            },
854            "TestParser",
855            Path::new("testdata/package.json"),
856            Some(Arc::clone(&engine)),
857        );
858
859        assert_eq!(result.packages.len(), 1);
860        assert_eq!(
861            last_parser_license_engine_ptr(),
862            Some(Arc::as_ptr(&engine) as usize)
863        );
864        assert_eq!(
865            result.packages[0].license_detections[0].matches[0]
866                .referenced_filenames
867                .as_ref(),
868            Some(&vec!["LICENSE".to_string()])
869        );
870        assert!(active_parser_license_engine().is_none());
871    }
872}
873
874register_package_handlers! {
875    parsers: [
876        AboutFileParser,
877        AndroidAabParser,
878        AndroidApkParser,
879        AndroidManifestParser,
880        AndroidSoongMetadataParser,
881        AlpineApkParser,
882        AlpineApkbuildParser,
883        AlpineInstalledParser,
884        ArchPkginfoParser,
885        ArchSrcinfoParser,
886        AutotoolsConfigureParser,
887        BazelBuildParser,
888        BazelModuleParser,
889        BitbakeRecipeParser,
890        BowerJsonParser,
891        BunLockParser,
892        BunLockbParser,
893        BuckBuildParser,
894        BuckMetadataBzlParser,
895        CargoLockParser,
896        CargoParser,
897        CarthageCartfileParser,
898        CarthageCartfileResolvedParser,
899        ChefMetadataJsonParser,
900        ChefMetadataRbParser,
901        CitationCffParser,
902        ClojureDepsEdnParser,
903        ClojureProjectCljParser,
904        ComposerJsonParser,
905        ComposerLockParser,
906        ConanDataParser,
907        ConanFilePyParser,
908        ConanfileTxtParser,
909        ConanLockParser,
910        CondaEnvironmentYmlParser,
911        CondaMetaJsonParser,
912        CondaMetaYamlParser,
913        CpanDistIniParser,
914        CpanMakefilePlParser,
915        CpanManifestParser,
916        CpanMetaJsonParser,
917        CpanMetaYmlParser,
918        CranParser,
919        DebianControlInExtractedDebParser,
920        DebianControlParser,
921        DebianCopyrightParser,
922        DebianDebianTarParser,
923        DebianDebParser,
924        DebianDistrolessInstalledParser,
925        DebianDscParser,
926        DebianInstalledListParser,
927        DebianInstalledMd5sumsParser,
928        DebianInstalledParser,
929        DebianMd5sumInPackageParser,
930        DebianOrigTarParser,
931        DenoParser,
932        DenoLockParser,
933        DockerfileParser,
934        ErlangAppSrcParser,
935        RebarConfigParser,
936        RebarLockParser,
937        FreebsdCompactManifestParser,
938        GemArchiveParser,
939        GemfileLockParser,
940        GemfileParser,
941        GemMetadataExtractedParser,
942        GemspecParser,
943        GitmodulesParser,
944        GodepsParser,
945        GoModParser,
946        GoModGraphParser,
947        GoSumParser,
948        GoWorkParser,
949        GradleLockfileParser,
950        GradleParser,
951        GradleModuleParser,
952        HackageCabalParser,
953        HackageCabalProjectParser,
954        HackageStackYamlParser,
955        HelmChartYamlParser,
956        HelmChartLockParser,
957        HaxeParser,
958        HexLockParser,
959        JuliaManifestTomlParser,
960        JuliaProjectTomlParser,
961        MavenParser,
962        MesonParser,
963        MicrosoftUpdateManifestParser,
964        NixDefaultParser,
965        NixFlakeLockParser,
966        NixFlakeParser,
967        NpmLockParser,
968        NpmParser,
969        NpmWorkspaceParser,
970        DotNetDepsJsonParser,
971        CentralPackageManagementPropsParser,
972        DirectoryBuildPropsParser,
973        NupkgParser,
974        NuspecParser,
975        PackageReferenceProjectParser,
976        OpamParser,
977        OsReleaseParser,
978        PackagesConfigParser,
979        PackagesLockParser,
980        ProjectJsonParser,
981        ProjectLockJsonParser,
982        PipfileLockParser,
983        PipInspectDeplockParser,
984        PixiTomlParser,
985        PixiLockParser,
986        PnpmLockParser,
987        PodfileLockParser,
988        PodfileParser,
989        PodspecJsonParser,
990        PodspecParser,
991        PoetryLockParser,
992        PubliccodeParser,
993        PylockTomlParser,
994        PubspecLockParser,
995        PubspecYamlParser,
996        PythonParser,
997        UvLockParser,
998        VcpkgManifestParser,
999        ReadmeParser,
1000        RequirementsTxtParser,
1001        RpmBdbDatabaseParser,
1002        RpmLicenseFilesParser,
1003        RpmMarinerManifestParser,
1004        RpmNdbDatabaseParser,
1005        RpmParser,
1006        RpmSpecfileParser,
1007        #[cfg(feature = "rpm-sqlite")]
1008        RpmSqliteDatabaseParser,
1009        RpmYumdbParser,
1010        SbtParser,
1011        SwiftManifestJsonParser,
1012        SwiftPackageResolvedParser,
1013        SwiftShowDependenciesParser,
1014        YarnLockParser,
1015        YarnPnpParser,
1016    ],
1017    recognizers: [
1018        AndroidLibraryRecognizer,
1019        AppleDmgRecognizer,
1020        Axis2MarRecognizer,
1021        Axis2ModuleXmlRecognizer,
1022        CabArchiveRecognizer,
1023        ChromeCrxRecognizer,
1024        InstallShieldRecognizer,
1025        IosIpaRecognizer,
1026        IsoImageRecognizer,
1027        IvyXmlRecognizer,
1028        JavaEarAppXmlRecognizer,
1029        JavaEarRecognizer,
1030        JavaJarRecognizer,
1031        JavaWarRecognizer,
1032        JavaWarWebXmlRecognizer,
1033        JBossSarRecognizer,
1034        JBossServiceXmlRecognizer,
1035        MeteorPackageRecognizer,
1036        MozillaXpiRecognizer,
1037        NsisRecognizer,
1038        SharArchiveRecognizer,
1039        SquashfsRecognizer,
1040    ],
1041}
1042
1043#[cfg(test)]
1044mod panic_isolation_tests {
1045    use super::*;
1046    use crate::models::DiagnosticSeverity;
1047
1048    #[test]
1049    fn capture_parser_diagnostics_turns_panics_into_scan_errors() {
1050        let path = Path::new("fixtures/panic-package.json");
1051        let result = capture_parser_diagnostics(
1052            || -> Vec<PackageData> { panic!("panic boom") },
1053            "PanicParser",
1054            path,
1055            None,
1056        );
1057
1058        assert!(result.packages.is_empty());
1059        assert_eq!(result.scan_errors.len(), 1);
1060        assert_eq!(result.scan_diagnostics.len(), 1);
1061        assert_eq!(
1062            result.scan_diagnostics[0].severity,
1063            DiagnosticSeverity::Error
1064        );
1065        assert!(result.scan_errors[0].contains("PanicParser"));
1066        assert!(result.scan_errors[0].contains("fixtures/panic-package.json"));
1067        assert!(result.scan_errors[0].contains("panic boom"));
1068    }
1069
1070    #[test]
1071    fn capture_parser_diagnostics_recovers_after_panic() {
1072        let panic_path = Path::new("fixtures/panic-package.json");
1073        let _ = capture_parser_diagnostics(
1074            || -> Vec<PackageData> { panic!("panic boom") },
1075            "PanicParser",
1076            panic_path,
1077            None,
1078        );
1079
1080        let ok_path = Path::new("fixtures/recovered-package.json");
1081        let result = capture_parser_diagnostics(
1082            || {
1083                crate::parser_warn!("recoverable parser warning");
1084                vec![PackageData {
1085                    package_type: Some(PackageType::Npm),
1086                    ..Default::default()
1087                }]
1088            },
1089            "RecoveringParser",
1090            ok_path,
1091            None,
1092        );
1093
1094        assert_eq!(result.packages.len(), 1);
1095        assert_eq!(result.scan_errors, vec!["recoverable parser warning"]);
1096        assert_eq!(result.scan_diagnostics.len(), 1);
1097        assert_eq!(
1098            result.scan_diagnostics[0].severity,
1099            DiagnosticSeverity::Warning
1100        );
1101    }
1102}