Skip to main content

provenant/assembly/
assemblers.rs

1use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
2use strum::EnumIter;
3
4use super::{
5    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, cargo_resource_assign,
6    cargo_workspace_merge, composer_resource_assign, conda_rootfs_merge, file_ref_resolve,
7    hackage_merge, npm_resource_assign, npm_workspace_merge, nuget_cpm_resolve,
8    python_requirements_assign, ruby_resource_assign, swift_merge,
9};
10
11#[derive(Clone, Copy)]
12pub(super) enum SpecialDirectoryMergerKind {
13    Skip,
14    Hackage,
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
18pub(super) enum PostAssemblyPassKind {
19    SwiftMerge,
20    CondaRootfsMerge,
21    NpmResourceAssign,
22    PythonRequirementsAssign,
23    FileReferenceResolve,
24    RpmYumdbMerge,
25    NpmWorkspaceMerge,
26    CargoWorkspaceMerge,
27    NugetCpmResolve,
28    CargoResourceAssign,
29    ComposerResourceAssign,
30    RubyResourceAssign,
31}
32
33pub(super) fn special_directory_merger_for(
34    config_key: DatasourceId,
35) -> Option<SpecialDirectoryMergerKind> {
36    match config_key {
37        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
38        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
39        _ => None,
40    }
41}
42
43pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
44    PostAssemblyPassKind::SwiftMerge,
45    PostAssemblyPassKind::CondaRootfsMerge,
46    PostAssemblyPassKind::NpmResourceAssign,
47    PostAssemblyPassKind::PythonRequirementsAssign,
48    PostAssemblyPassKind::FileReferenceResolve,
49    PostAssemblyPassKind::RpmYumdbMerge,
50    PostAssemblyPassKind::NpmWorkspaceMerge,
51    PostAssemblyPassKind::CargoWorkspaceMerge,
52    PostAssemblyPassKind::NugetCpmResolve,
53    PostAssemblyPassKind::CargoResourceAssign,
54    PostAssemblyPassKind::ComposerResourceAssign,
55    PostAssemblyPassKind::RubyResourceAssign,
56];
57
58pub(super) fn run_post_assembly_passes(
59    files: &mut [FileInfo],
60    packages: &mut Vec<Package>,
61    dependencies: &mut Vec<TopLevelDependency>,
62) {
63    for pass in POST_ASSEMBLY_PASSES {
64        pass.run(files, packages, dependencies);
65    }
66}
67
68impl SpecialDirectoryMergerKind {
69    pub(super) fn run(
70        self,
71        files: &[FileInfo],
72        file_indices: &[usize],
73    ) -> Vec<DirectoryMergeOutput> {
74        match self {
75            Self::Skip => Vec::new(),
76            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
77        }
78    }
79}
80
81impl PostAssemblyPassKind {
82    fn run(
83        self,
84        files: &mut [FileInfo],
85        packages: &mut Vec<Package>,
86        dependencies: &mut Vec<TopLevelDependency>,
87    ) {
88        match self {
89            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
90            Self::CondaRootfsMerge => {
91                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
92            }
93            Self::NpmResourceAssign => {
94                npm_resource_assign::assign_npm_package_resources(files, packages)
95            }
96            Self::PythonRequirementsAssign => {
97                python_requirements_assign::assign_python_requirements_to_projects(
98                    files,
99                    packages,
100                    dependencies,
101                )
102            }
103            Self::FileReferenceResolve => {
104                file_ref_resolve::resolve_file_references(files, packages, dependencies)
105            }
106            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
107            Self::NpmWorkspaceMerge => {
108                npm_workspace_merge::assemble_npm_workspaces(files, packages, dependencies)
109            }
110            Self::CargoWorkspaceMerge => {
111                cargo_workspace_merge::assemble_cargo_workspaces(files, packages, dependencies)
112            }
113            Self::NugetCpmResolve => {
114                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
115            }
116            Self::CargoResourceAssign => {
117                cargo_resource_assign::assign_cargo_package_resources(files, packages)
118            }
119            Self::ComposerResourceAssign => {
120                composer_resource_assign::assign_composer_package_resources(files, packages)
121            }
122            Self::RubyResourceAssign => {
123                ruby_resource_assign::assign_ruby_package_resources(files, packages)
124            }
125        }
126    }
127}
128
129pub static ASSEMBLERS: &[AssemblerConfig] = &[
130    // ── Sibling-merge assemblers ──
131    //
132    // npm ecosystem: package.json + lockfiles in same directory.
133    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
134    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
135    AssemblerConfig {
136        datasource_ids: &[
137            DatasourceId::BunLock,
138            DatasourceId::BunLockb,
139            DatasourceId::NpmPackageJson,
140            DatasourceId::NpmPackageLockJson,
141            DatasourceId::YarnLock,
142            DatasourceId::YarnLockV1,
143            DatasourceId::YarnLockV2,
144            DatasourceId::PnpmLockYaml,
145            DatasourceId::PnpmWorkspaceYaml,
146        ],
147        sibling_file_patterns: &[
148            "package.json",
149            "bun.lock",
150            "bun.lockb",
151            ".package-lock.json",
152            "package-lock.json",
153            ".npm-shrinkwrap.json",
154            "npm-shrinkwrap.json",
155            "yarn.lock",
156            "pnpm-lock.yaml",
157            "shrinkwrap.yaml",
158            "pnpm-workspace.yaml",
159        ],
160        mode: AssemblyMode::SiblingMerge,
161    },
162    // Rust/Cargo ecosystem
163    AssemblerConfig {
164        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
165        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
166        mode: AssemblyMode::SiblingMerge,
167    },
168    // CocoaPods ecosystem
169    AssemblerConfig {
170        datasource_ids: &[
171            DatasourceId::CocoapodsPodspec,
172            DatasourceId::CocoapodsPodspecJson,
173            DatasourceId::CocoapodsPodfile,
174            DatasourceId::CocoapodsPodfileLock,
175        ],
176        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
177        mode: AssemblyMode::SiblingMerge,
178    },
179    // PHP Composer ecosystem
180    AssemblerConfig {
181        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
182        sibling_file_patterns: &[
183            "*composer.json",
184            "composer.*.json",
185            "*composer.lock",
186            "composer.*.lock",
187        ],
188        mode: AssemblyMode::SiblingMerge,
189    },
190    // Go ecosystem (includes legacy Godeps)
191    AssemblerConfig {
192        datasource_ids: &[
193            DatasourceId::GoMod,
194            DatasourceId::GoModGraph,
195            DatasourceId::GoSum,
196            DatasourceId::GoWork,
197            DatasourceId::Godeps,
198        ],
199        sibling_file_patterns: &[
200            "go.mod",
201            "go.work",
202            "go.mod.graph",
203            "go.modgraph",
204            "go.sum",
205            "Godeps.json",
206        ],
207        mode: AssemblyMode::SiblingMerge,
208    },
209    // Dart/Flutter ecosystem
210    AssemblerConfig {
211        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
212        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
213        mode: AssemblyMode::SiblingMerge,
214    },
215    // Pixi ecosystem
216    AssemblerConfig {
217        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
218        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
219        mode: AssemblyMode::SiblingMerge,
220    },
221    AssemblerConfig {
222        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
223        sibling_file_patterns: &["flake.nix", "flake.lock"],
224        mode: AssemblyMode::SiblingMerge,
225    },
226    AssemblerConfig {
227        datasource_ids: &[DatasourceId::NixDefaultNix],
228        sibling_file_patterns: &["default.nix"],
229        mode: AssemblyMode::OnePerPackageData,
230    },
231    // Helm chart ecosystem
232    AssemblerConfig {
233        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
234        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
235        mode: AssemblyMode::SiblingMerge,
236    },
237    AssemblerConfig {
238        datasource_ids: &[
239            DatasourceId::HackageCabal,
240            DatasourceId::HackageCabalProject,
241            DatasourceId::HackageStackYaml,
242        ],
243        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
244        mode: AssemblyMode::SiblingMerge,
245    },
246    // Chef ecosystem
247    AssemblerConfig {
248        datasource_ids: &[
249            DatasourceId::ChefCookbookMetadataJson,
250            DatasourceId::ChefCookbookMetadataRb,
251        ],
252        sibling_file_patterns: &["metadata.json", "metadata.rb"],
253        mode: AssemblyMode::SiblingMerge,
254    },
255    // Conan (C/C++) ecosystem
256    AssemblerConfig {
257        datasource_ids: &[
258            DatasourceId::ConanConanFilePy,
259            DatasourceId::ConanConanFileTxt,
260            DatasourceId::ConanLock,
261            DatasourceId::ConanConanDataYml,
262        ],
263        sibling_file_patterns: &[
264            "conanfile.py",
265            "conanfile.txt",
266            "conan.lock",
267            "conandata.yml",
268        ],
269        mode: AssemblyMode::SiblingMerge,
270    },
271    // Maven/Java ecosystem (nested merge via META-INF)
272    AssemblerConfig {
273        datasource_ids: &[
274            DatasourceId::MavenPom,
275            DatasourceId::MavenPomProperties,
276            DatasourceId::JavaJarManifest,
277            DatasourceId::JavaOsgiManifest,
278        ],
279        sibling_file_patterns: &[
280            "pom.xml",
281            "*.pom",
282            "pom.properties",
283            "**/META-INF/MANIFEST.MF",
284        ],
285        mode: AssemblyMode::SiblingMerge,
286    },
287    AssemblerConfig {
288        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
289        sibling_file_patterns: &["*.whl", "origin.json"],
290        mode: AssemblyMode::SiblingMerge,
291    },
292    // Python/PyPI ecosystem
293    AssemblerConfig {
294        datasource_ids: &[
295            DatasourceId::PypiPyprojectToml,
296            DatasourceId::PypiPoetryPyprojectToml,
297            DatasourceId::PypiSetupPy,
298            DatasourceId::PypiSetupCfg,
299            DatasourceId::PypiWheel,
300            DatasourceId::PypiWheelMetadata,
301            DatasourceId::PypiEgg,
302            DatasourceId::PypiEggPkginfo,
303            DatasourceId::PypiEditableEggPkginfo,
304            DatasourceId::PypiJson,
305            DatasourceId::PypiSdist,
306            DatasourceId::PypiSdistPkginfo,
307            DatasourceId::PypiInspectDeplock,
308            DatasourceId::PipRequirements,
309            DatasourceId::PypiPoetryLock,
310            DatasourceId::PypiPylockToml,
311            DatasourceId::PypiUvLock,
312            DatasourceId::Pipfile,
313            DatasourceId::PipfileLock,
314        ],
315        sibling_file_patterns: &[
316            "pyproject.toml",
317            "setup.py",
318            "setup.cfg",
319            "PKG-INFO",
320            "METADATA",
321            "pypi.json",
322            "pip-inspect.deplock",
323            "*.tar.gz",
324            "*.tgz",
325            "*.tar.bz2",
326            "*.tar.xz",
327            "*.zip",
328            "requirements*.txt",
329            "Pipfile",
330            "Pipfile.lock",
331            "poetry.lock",
332            "pylock.toml",
333            "pylock.*.toml",
334            "uv.lock",
335        ],
336        mode: AssemblyMode::SiblingMerge,
337    },
338    AssemblerConfig {
339        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
340        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
341        mode: AssemblyMode::SiblingMerge,
342    },
343    // Ruby/RubyGems ecosystem
344    AssemblerConfig {
345        datasource_ids: &[
346            DatasourceId::GemArchiveExtracted,
347            DatasourceId::Gemspec,
348            DatasourceId::GemspecExtracted,
349            DatasourceId::Gemfile,
350            DatasourceId::GemfileExtracted,
351            DatasourceId::GemfileLock,
352            DatasourceId::GemfileLockExtracted,
353            DatasourceId::GemArchive,
354        ],
355        sibling_file_patterns: &[
356            "metadata.gz-extract",
357            "**/data.gz-extract/*.gemspec",
358            "**/data.gz-extract/Gemfile",
359            "**/data.gz-extract/Gemfile.lock",
360            "*.gemspec",
361            "Gemfile",
362            "Gemfile.lock",
363        ],
364        mode: AssemblyMode::SiblingMerge,
365    },
366    // Conda ecosystem
367    AssemblerConfig {
368        datasource_ids: &[
369            DatasourceId::CondaMetaYaml,
370            DatasourceId::CondaYaml,
371            DatasourceId::CondaMetaJson,
372        ],
373        sibling_file_patterns: &[
374            "meta.yaml",
375            "meta.yml",
376            "environment.yml",
377            "environment.yaml",
378            "conda.yaml",
379            "conda.yml",
380            "*conda*.yaml",
381            "*conda*.yml",
382            "env.yaml",
383            "env.yml",
384            "*env*.yaml",
385            "*env*.yml",
386            "*environment*.yaml",
387            "*environment*.yml",
388            "*.json",
389        ],
390        mode: AssemblyMode::SiblingMerge,
391    },
392    // RPM specfile (source packages)
393    AssemblerConfig {
394        datasource_ids: &[DatasourceId::RpmSpecfile],
395        sibling_file_patterns: &["*.spec"],
396        mode: AssemblyMode::SiblingMerge,
397    },
398    // Debian source packages (nested merge via debian/ directory)
399    AssemblerConfig {
400        datasource_ids: &[
401            DatasourceId::DebianControlInSource,
402            DatasourceId::DebianCopyrightInSource,
403        ],
404        sibling_file_patterns: &["**/debian/control", "**/debian/copyright"],
405        mode: AssemblyMode::SiblingMerge,
406    },
407    // Gradle/Android ecosystem
408    AssemblerConfig {
409        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
410        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
411        mode: AssemblyMode::SiblingMerge,
412    },
413    AssemblerConfig {
414        datasource_ids: &[DatasourceId::GradleModule],
415        sibling_file_patterns: &["*.module"],
416        mode: AssemblyMode::OnePerPackageData,
417    },
418    // CPAN/Perl ecosystem
419    AssemblerConfig {
420        datasource_ids: &[
421            DatasourceId::CpanMetaJson,
422            DatasourceId::CpanMetaYml,
423            DatasourceId::CpanManifest,
424            DatasourceId::CpanDistIni,
425            DatasourceId::CpanMakefile,
426        ],
427        sibling_file_patterns: &[
428            "META.json",
429            "META.yml",
430            "MANIFEST",
431            "dist.ini",
432            "Makefile.PL",
433        ],
434        mode: AssemblyMode::SiblingMerge,
435    },
436    // NuGet/.NET ecosystem
437    AssemblerConfig {
438        datasource_ids: &[
439            DatasourceId::NugetCsproj,
440            DatasourceId::NugetFsproj,
441            DatasourceId::NugetNuspec,
442            DatasourceId::NugetNupkg,
443            DatasourceId::NugetProjectJson,
444            DatasourceId::NugetProjectLockJson,
445            DatasourceId::NugetPackagesConfig,
446            DatasourceId::NugetPackagesLock,
447            DatasourceId::NugetVbproj,
448        ],
449        sibling_file_patterns: &[
450            "*.csproj",
451            "*.fsproj",
452            "*.nuspec",
453            "*.nupkg",
454            "project.json",
455            "project.lock.json",
456            "packages.config",
457            "packages.lock.json",
458            "*.packages.lock.json",
459            "*.vbproj",
460        ],
461        mode: AssemblyMode::SiblingMerge,
462    },
463    AssemblerConfig {
464        datasource_ids: &[DatasourceId::NugetDepsJson],
465        sibling_file_patterns: &["*.deps.json"],
466        mode: AssemblyMode::OnePerPackageData,
467    },
468    // Swift/SPM ecosystem
469    AssemblerConfig {
470        datasource_ids: &[
471            DatasourceId::SwiftPackageManifestJson,
472            DatasourceId::SwiftPackageResolved,
473            DatasourceId::SwiftPackageShowDependencies,
474        ],
475        sibling_file_patterns: &[
476            "Package.swift.json",
477            "Package.swift.deplock",
478            "Package.resolved",
479            ".package.resolved",
480            "swift-show-dependencies.deplock",
481        ],
482        mode: AssemblyMode::SiblingMerge,
483    },
484    // ── Standalone assemblers (single file → single package) ──
485    //
486    // These ecosystems have only one manifest file type with no sibling merging.
487    // They still need configs so their datasource_ids are recognized by the assembler.
488    //
489    // Bower (JavaScript)
490    AssemblerConfig {
491        datasource_ids: &[DatasourceId::BowerJson],
492        sibling_file_patterns: &["bower.json"],
493        mode: AssemblyMode::SiblingMerge,
494    },
495    // CRAN (R language)
496    AssemblerConfig {
497        datasource_ids: &[DatasourceId::CranDescription],
498        sibling_file_patterns: &["DESCRIPTION"],
499        mode: AssemblyMode::SiblingMerge,
500    },
501    // FreeBSD packages
502    AssemblerConfig {
503        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
504        sibling_file_patterns: &["+COMPACT_MANIFEST"],
505        mode: AssemblyMode::SiblingMerge,
506    },
507    // Haxe ecosystem
508    AssemblerConfig {
509        datasource_ids: &[DatasourceId::HaxelibJson],
510        sibling_file_patterns: &["haxelib.json"],
511        mode: AssemblyMode::SiblingMerge,
512    },
513    AssemblerConfig {
514        datasource_ids: &[DatasourceId::Gitmodules],
515        sibling_file_patterns: &[".gitmodules"],
516        mode: AssemblyMode::SiblingMerge,
517    },
518    // OCaml/opam ecosystem
519    AssemblerConfig {
520        datasource_ids: &[DatasourceId::OpamFile],
521        sibling_file_patterns: &["opam", "*.opam"],
522        mode: AssemblyMode::SiblingMerge,
523    },
524    // RPM Mariner manifest
525    AssemblerConfig {
526        datasource_ids: &[DatasourceId::RpmMarinerManifest],
527        sibling_file_patterns: &["*.rpm.manifest"],
528        mode: AssemblyMode::SiblingMerge,
529    },
530    AssemblerConfig {
531        datasource_ids: &[DatasourceId::RpmYumdb],
532        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
533        mode: AssemblyMode::OnePerPackageData,
534    },
535    // Microsoft Update Manifest
536    AssemblerConfig {
537        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
538        sibling_file_patterns: &["*.mum"],
539        mode: AssemblyMode::SiblingMerge,
540    },
541    // Autotools (C/C++ build system)
542    AssemblerConfig {
543        datasource_ids: &[DatasourceId::AutotoolsConfigure],
544        sibling_file_patterns: &["configure", "configure.ac"],
545        mode: AssemblyMode::SiblingMerge,
546    },
547    // Bazel (build system)
548    AssemblerConfig {
549        datasource_ids: &[DatasourceId::BazelBuild],
550        sibling_file_patterns: &["BUILD"],
551        mode: AssemblyMode::SiblingMerge,
552    },
553    AssemblerConfig {
554        datasource_ids: &[DatasourceId::BazelModule],
555        sibling_file_patterns: &["MODULE.bazel"],
556        mode: AssemblyMode::OnePerPackageData,
557    },
558    // Buck (build system)
559    AssemblerConfig {
560        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
561        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
562        mode: AssemblyMode::SiblingMerge,
563    },
564    // Ant/Ivy (Java dependency management)
565    AssemblerConfig {
566        datasource_ids: &[DatasourceId::AntIvyXml],
567        sibling_file_patterns: &["ivy.xml"],
568        mode: AssemblyMode::SiblingMerge,
569    },
570    // Meteor (JavaScript platform)
571    AssemblerConfig {
572        datasource_ids: &[DatasourceId::MeteorPackage],
573        sibling_file_patterns: &["package.js"],
574        mode: AssemblyMode::SiblingMerge,
575    },
576    // ── One-per-PackageData assemblers (database files with many packages) ──
577    //
578    // Alpine installed package database
579    AssemblerConfig {
580        datasource_ids: &[DatasourceId::AlpineInstalledDb],
581        sibling_file_patterns: &["installed"],
582        mode: AssemblyMode::OnePerPackageData,
583    },
584    AssemblerConfig {
585        datasource_ids: &[DatasourceId::AlpineApkbuild],
586        sibling_file_patterns: &["APKBUILD"],
587        mode: AssemblyMode::SiblingMerge,
588    },
589    // RPM installed package databases (BDB, NDB, SQLite)
590    AssemblerConfig {
591        datasource_ids: &[
592            DatasourceId::RpmInstalledDatabaseBdb,
593            DatasourceId::RpmInstalledDatabaseNdb,
594            DatasourceId::RpmInstalledDatabaseSqlite,
595        ],
596        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
597        mode: AssemblyMode::OnePerPackageData,
598    },
599    // Debian installed package databases
600    AssemblerConfig {
601        datasource_ids: &[
602            DatasourceId::DebianInstalledStatusDb,
603            DatasourceId::DebianDistrolessInstalledDb,
604        ],
605        sibling_file_patterns: &["status"],
606        mode: AssemblyMode::OnePerPackageData,
607    },
608    AssemblerConfig {
609        datasource_ids: &[
610            DatasourceId::DebianControlExtractedDeb,
611            DatasourceId::DebianMd5SumsInExtractedDeb,
612        ],
613        sibling_file_patterns: &["control", "md5sums"],
614        mode: AssemblyMode::SiblingMerge,
615    },
616    AssemblerConfig {
617        datasource_ids: &[DatasourceId::AboutFile],
618        sibling_file_patterns: &["*.ABOUT"],
619        mode: AssemblyMode::OnePerPackageData,
620    },
621];
622
623// Datasource IDs intentionally excluded from package assembly.
624//
625// This list is runtime-significant: files with these datasource IDs may remain
626// unowned by any Package, while their dependencies are still eligible for
627// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
628pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
629    // Non-package metadata
630    DatasourceId::Readme,
631    DatasourceId::EtcOsRelease,
632    // Binary archives (require external extraction via ExtractCode before scanning)
633    DatasourceId::AlpineApkArchive,
634    DatasourceId::AndroidAarLibrary,
635    DatasourceId::AndroidApk,
636    DatasourceId::AppleDmg,
637    DatasourceId::Axis2Mar,
638    DatasourceId::ChromeCrx,
639    DatasourceId::DebianDeb,
640    DatasourceId::DebianOriginalSourceTarball,
641    DatasourceId::DebianSourceMetadataTarball,
642    DatasourceId::InstallshieldInstaller,
643    DatasourceId::IosIpa,
644    DatasourceId::IsoDiskImage,
645    DatasourceId::JavaEarArchive,
646    DatasourceId::JavaJar,
647    DatasourceId::JavaWarArchive,
648    DatasourceId::JbossSar,
649    DatasourceId::MicrosoftCabinet,
650    DatasourceId::MozillaXpi,
651    DatasourceId::NsisInstaller,
652    DatasourceId::RpmArchive,
653    DatasourceId::SharShellArchive,
654    DatasourceId::SquashfsDiskImage,
655    // Supplementary metadata (not primary package definitions)
656    DatasourceId::ArchAurinfo,
657    DatasourceId::ArchPkginfo,
658    DatasourceId::ArchSrcinfo,
659    DatasourceId::Axis2ModuleXml,
660    DatasourceId::ClojureDepsEdn,
661    DatasourceId::ClojureProjectClj,
662    DatasourceId::DebianInstalledFilesList,
663    DatasourceId::DebianInstalledMd5Sums,
664    DatasourceId::DebianCopyright,
665    DatasourceId::DebianCopyrightInPackage,
666    DatasourceId::DebianCopyrightStandalone,
667    DatasourceId::GoBinary,
668    DatasourceId::DebianSourceControlDsc,
669    DatasourceId::Dockerfile,
670    DatasourceId::HexMixLock,
671    DatasourceId::JavaEarApplicationXml,
672    DatasourceId::JavaWarWebXml,
673    DatasourceId::JbossServiceXml,
674    DatasourceId::MesonBuild,
675    DatasourceId::GemGemspecInstalledSpecifications,
676    DatasourceId::NugetDirectoryBuildProps,
677    DatasourceId::NugetDirectoryPackagesProps,
678    DatasourceId::RpmPackageLicenses,
679    DatasourceId::RustBinary,
680    DatasourceId::SbtBuildSbt,
681    DatasourceId::VcpkgJson,
682];
683
684#[cfg(test)]
685mod tests {
686    use super::*;
687    use std::collections::HashSet;
688    use strum::IntoEnumIterator;
689
690    #[test]
691    fn test_every_datasource_id_is_accounted_for() {
692        let mut assembled: HashSet<DatasourceId> = HashSet::new();
693        for config in ASSEMBLERS {
694            for &dsid in config.datasource_ids {
695                assembled.insert(dsid);
696            }
697        }
698
699        let unassembled: HashSet<DatasourceId> =
700            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
701
702        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
703        assert!(
704            overlap.is_empty(),
705            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
706        );
707
708        let missing: Vec<_> = DatasourceId::iter()
709            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
710            .collect();
711
712        assert!(
713            missing.is_empty(),
714            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
715             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
716        );
717    }
718
719    #[test]
720    fn test_post_assembly_passes_are_unique() {
721        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
722
723        assert_eq!(
724            unique.len(),
725            POST_ASSEMBLY_PASSES.len(),
726            "POST_ASSEMBLY_PASSES contains duplicate entries"
727        );
728    }
729
730    #[test]
731    fn test_every_post_assembly_pass_kind_is_registered_once() {
732        let registered: HashSet<PostAssemblyPassKind> =
733            POST_ASSEMBLY_PASSES.iter().copied().collect();
734
735        let missing: Vec<_> = PostAssemblyPassKind::iter()
736            .filter(|pass| !registered.contains(pass))
737            .collect();
738
739        assert!(
740            missing.is_empty(),
741            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
742        );
743
744        for pass in PostAssemblyPassKind::iter() {
745            let count = POST_ASSEMBLY_PASSES
746                .iter()
747                .filter(|registered| **registered == pass)
748                .count();
749            assert_eq!(
750                count, 1,
751                "Post-assembly pass {pass:?} should be registered exactly once"
752            );
753        }
754    }
755}