Skip to main content

provenant/assembly/
assemblers.rs

1use std::collections::HashSet;
2
3use crate::models::PackageType;
4use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
5use strum::EnumIter;
6
7use super::{
8    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, cargo_resource_assign,
9    composer_resource_assign, conda_rootfs_merge, file_ref_resolve, hackage_merge,
10    npm_resource_assign, nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign,
11    swift_merge, topology,
12};
13
14#[derive(Clone, Copy)]
15pub(super) enum SpecialDirectoryMergerKind {
16    Skip,
17    Hackage,
18}
19
20#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
21pub(super) enum PostAssemblyPassKind {
22    SwiftMerge,
23    CondaRootfsMerge,
24    NpmResourceAssign,
25    PythonRequirementsAssign,
26    FileReferenceResolve,
27    RpmYumdbMerge,
28    NpmWorkspaceMerge,
29    CargoWorkspaceMerge,
30    NugetCpmResolve,
31    CargoResourceAssign,
32    ComposerResourceAssign,
33    RubyResourceAssign,
34}
35
36pub(super) fn special_directory_merger_for(
37    config_key: DatasourceId,
38) -> Option<SpecialDirectoryMergerKind> {
39    match config_key {
40        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
41        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
42        _ => None,
43    }
44}
45
46pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
47    PostAssemblyPassKind::SwiftMerge,
48    PostAssemblyPassKind::CondaRootfsMerge,
49    PostAssemblyPassKind::NpmResourceAssign,
50    PostAssemblyPassKind::PythonRequirementsAssign,
51    PostAssemblyPassKind::FileReferenceResolve,
52    PostAssemblyPassKind::RpmYumdbMerge,
53    PostAssemblyPassKind::NpmWorkspaceMerge,
54    PostAssemblyPassKind::CargoWorkspaceMerge,
55    PostAssemblyPassKind::NugetCpmResolve,
56    PostAssemblyPassKind::CargoResourceAssign,
57    PostAssemblyPassKind::ComposerResourceAssign,
58    PostAssemblyPassKind::RubyResourceAssign,
59];
60
61const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
62    DatasourceId::SwiftPackageManifestJson,
63    DatasourceId::SwiftPackageResolved,
64    DatasourceId::SwiftPackageShowDependencies,
65];
66
67const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
68    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
69
70const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
71    DatasourceId::RpmInstalledDatabaseBdb,
72    DatasourceId::RpmInstalledDatabaseNdb,
73    DatasourceId::RpmInstalledDatabaseSqlite,
74];
75
76const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::NugetDirectoryBuildProps,
78    DatasourceId::NugetDirectoryPackagesProps,
79];
80
81const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
82    DatasourceId::NugetCsproj,
83    DatasourceId::NugetFsproj,
84    DatasourceId::NugetVbproj,
85];
86
87#[derive(Default)]
88struct PostAssemblyInputs {
89    package_types: HashSet<PackageType>,
90    file_datasource_ids: HashSet<DatasourceId>,
91    has_npm_workspace_markers: bool,
92    has_cargo_workspace_markers: bool,
93}
94
95pub(super) fn run_post_assembly_passes(
96    files: &mut [FileInfo],
97    packages: &mut Vec<Package>,
98    dependencies: &mut Vec<TopLevelDependency>,
99    topology_plan: &topology::TopologyPlan,
100) {
101    let inputs = PostAssemblyInputs::collect(files, packages);
102
103    for pass in POST_ASSEMBLY_PASSES {
104        if !pass.should_run(&inputs) {
105            continue;
106        }
107
108        pass.run(files, packages, dependencies, topology_plan);
109    }
110}
111
112impl PostAssemblyInputs {
113    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
114        let mut inputs = Self {
115            package_types: packages
116                .iter()
117                .filter_map(|package| package.package_type)
118                .collect(),
119            ..Self::default()
120        };
121
122        for file in files {
123            for package_data in &file.package_data {
124                let Some(datasource_id) = package_data.datasource_id else {
125                    continue;
126                };
127
128                inputs.file_datasource_ids.insert(datasource_id);
129
130                if matches!(
131                    datasource_id,
132                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
133                ) && package_data
134                    .extra_data
135                    .as_ref()
136                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
137                {
138                    inputs.has_npm_workspace_markers = true;
139                }
140
141                if datasource_id == DatasourceId::CargoToml
142                    && package_data
143                        .extra_data
144                        .as_ref()
145                        .and_then(|extra_data| extra_data.get("workspace"))
146                        .and_then(|workspace| workspace.get("members"))
147                        .and_then(|members| members.as_array())
148                        .is_some_and(|members| !members.is_empty())
149                {
150                    inputs.has_cargo_workspace_markers = true;
151                }
152            }
153        }
154
155        inputs
156    }
157
158    fn has_package_type(&self, package_type: PackageType) -> bool {
159        self.package_types.contains(&package_type)
160    }
161
162    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
163        datasource_ids
164            .iter()
165            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
166    }
167
168    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
169        datasource_ids
170            .iter()
171            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
172    }
173}
174
175impl SpecialDirectoryMergerKind {
176    pub(super) fn run(
177        self,
178        files: &[FileInfo],
179        file_indices: &[usize],
180    ) -> Vec<DirectoryMergeOutput> {
181        match self {
182            Self::Skip => Vec::new(),
183            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
184        }
185    }
186}
187
188impl PostAssemblyPassKind {
189    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
190        match self {
191            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
192            Self::CondaRootfsMerge => {
193                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
194            }
195            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
196            Self::PythonRequirementsAssign => {
197                inputs.has_package_type(PackageType::Pypi)
198                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
199            }
200            Self::FileReferenceResolve => {
201                file_ref_resolve::has_relevant_file_reference_datasource_ids(
202                    &inputs.file_datasource_ids,
203                )
204            }
205            Self::RpmYumdbMerge => {
206                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
207                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
208            }
209            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
210            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
211            Self::NugetCpmResolve => {
212                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
213                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
214            }
215            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
216            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
217            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
218        }
219    }
220
221    fn run(
222        self,
223        files: &mut [FileInfo],
224        packages: &mut Vec<Package>,
225        dependencies: &mut Vec<TopLevelDependency>,
226        topology_plan: &topology::TopologyPlan,
227    ) {
228        match self {
229            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
230            Self::CondaRootfsMerge => {
231                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
232            }
233            Self::NpmResourceAssign => {
234                npm_resource_assign::assign_npm_package_resources(files, packages)
235            }
236            Self::PythonRequirementsAssign => {
237                python_requirements_assign::assign_python_requirements_to_projects(
238                    files,
239                    packages,
240                    dependencies,
241                )
242            }
243            Self::FileReferenceResolve => {
244                file_ref_resolve::resolve_file_references(files, packages, dependencies)
245            }
246            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
247            Self::NpmWorkspaceMerge => {
248                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
249            }
250            Self::CargoWorkspaceMerge => {
251                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
252            }
253            Self::NugetCpmResolve => {
254                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
255            }
256            Self::CargoResourceAssign => {
257                cargo_resource_assign::assign_cargo_package_resources(files, packages)
258            }
259            Self::ComposerResourceAssign => {
260                composer_resource_assign::assign_composer_package_resources(files, packages)
261            }
262            Self::RubyResourceAssign => {
263                ruby_resource_assign::assign_ruby_package_resources(files, packages)
264            }
265        }
266    }
267}
268
269pub static ASSEMBLERS: &[AssemblerConfig] = &[
270    // ── Sibling-merge assemblers ──
271    //
272    // npm ecosystem: package.json + lockfiles in same directory.
273    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
274    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
275    AssemblerConfig {
276        datasource_ids: &[
277            DatasourceId::BunLock,
278            DatasourceId::BunLockb,
279            DatasourceId::NpmPackageJson,
280            DatasourceId::NpmPackageLockJson,
281            DatasourceId::YarnLock,
282            DatasourceId::YarnLockV1,
283            DatasourceId::YarnLockV2,
284            DatasourceId::YarnPnpCjs,
285            DatasourceId::PnpmLockYaml,
286            DatasourceId::PnpmWorkspaceYaml,
287        ],
288        sibling_file_patterns: &[
289            "package.json",
290            "bun.lock",
291            "bun.lockb",
292            ".package-lock.json",
293            "package-lock.json",
294            ".npm-shrinkwrap.json",
295            "npm-shrinkwrap.json",
296            "yarn.lock",
297            ".pnp.cjs",
298            "pnpm-lock.yaml",
299            "shrinkwrap.yaml",
300            "pnpm-workspace.yaml",
301        ],
302        mode: AssemblyMode::SiblingMerge,
303    },
304    // Rust/Cargo ecosystem
305    AssemblerConfig {
306        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
307        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
308        mode: AssemblyMode::SiblingMerge,
309    },
310    // CocoaPods ecosystem
311    AssemblerConfig {
312        datasource_ids: &[
313            DatasourceId::CocoapodsPodspec,
314            DatasourceId::CocoapodsPodspecJson,
315            DatasourceId::CocoapodsPodfile,
316            DatasourceId::CocoapodsPodfileLock,
317        ],
318        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
319        mode: AssemblyMode::SiblingMerge,
320    },
321    // PHP Composer ecosystem
322    AssemblerConfig {
323        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
324        sibling_file_patterns: &[
325            "*composer.json",
326            "composer.*.json",
327            "*composer.lock",
328            "composer.*.lock",
329        ],
330        mode: AssemblyMode::SiblingMerge,
331    },
332    // Go ecosystem (includes legacy Godeps)
333    AssemblerConfig {
334        datasource_ids: &[
335            DatasourceId::GoMod,
336            DatasourceId::GoModGraph,
337            DatasourceId::GoSum,
338            DatasourceId::GoWork,
339            DatasourceId::Godeps,
340        ],
341        sibling_file_patterns: &[
342            "go.mod",
343            "go.work",
344            "go.mod.graph",
345            "go.modgraph",
346            "go.sum",
347            "Godeps.json",
348        ],
349        mode: AssemblyMode::SiblingMerge,
350    },
351    // Dart/Flutter ecosystem
352    AssemblerConfig {
353        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
354        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
355        mode: AssemblyMode::SiblingMerge,
356    },
357    // Pixi ecosystem
358    AssemblerConfig {
359        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
360        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
361        mode: AssemblyMode::SiblingMerge,
362    },
363    AssemblerConfig {
364        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
365        sibling_file_patterns: &["flake.nix", "flake.lock"],
366        mode: AssemblyMode::SiblingMerge,
367    },
368    AssemblerConfig {
369        datasource_ids: &[DatasourceId::NixDefaultNix],
370        sibling_file_patterns: &["default.nix"],
371        mode: AssemblyMode::OnePerPackageData,
372    },
373    // Helm chart ecosystem
374    AssemblerConfig {
375        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
376        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
377        mode: AssemblyMode::SiblingMerge,
378    },
379    AssemblerConfig {
380        datasource_ids: &[
381            DatasourceId::HackageCabal,
382            DatasourceId::HackageCabalProject,
383            DatasourceId::HackageStackYaml,
384        ],
385        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
386        mode: AssemblyMode::SiblingMerge,
387    },
388    // Chef ecosystem
389    AssemblerConfig {
390        datasource_ids: &[
391            DatasourceId::ChefCookbookMetadataJson,
392            DatasourceId::ChefCookbookMetadataRb,
393        ],
394        sibling_file_patterns: &["metadata.json", "metadata.rb"],
395        mode: AssemblyMode::SiblingMerge,
396    },
397    // Conan (C/C++) ecosystem
398    AssemblerConfig {
399        datasource_ids: &[
400            DatasourceId::ConanConanFilePy,
401            DatasourceId::ConanConanFileTxt,
402            DatasourceId::ConanLock,
403            DatasourceId::ConanConanDataYml,
404        ],
405        sibling_file_patterns: &[
406            "conanfile.py",
407            "conanfile.txt",
408            "conan.lock",
409            "conandata.yml",
410        ],
411        mode: AssemblyMode::SiblingMerge,
412    },
413    // Maven/Java ecosystem (nested merge via META-INF)
414    AssemblerConfig {
415        datasource_ids: &[
416            DatasourceId::MavenPom,
417            DatasourceId::MavenPomProperties,
418            DatasourceId::JavaJarManifest,
419            DatasourceId::JavaOsgiManifest,
420        ],
421        sibling_file_patterns: &[
422            "pom.xml",
423            "*.pom",
424            "pom.properties",
425            "**/META-INF/MANIFEST.MF",
426        ],
427        mode: AssemblyMode::SiblingMerge,
428    },
429    AssemblerConfig {
430        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
431        sibling_file_patterns: &["*.whl", "origin.json"],
432        mode: AssemblyMode::SiblingMerge,
433    },
434    // Python/PyPI ecosystem
435    AssemblerConfig {
436        datasource_ids: &[
437            DatasourceId::PypiPyprojectToml,
438            DatasourceId::PypiPoetryPyprojectToml,
439            DatasourceId::PypiSetupPy,
440            DatasourceId::PypiSetupCfg,
441            DatasourceId::PypiWheel,
442            DatasourceId::PypiWheelMetadata,
443            DatasourceId::PypiEgg,
444            DatasourceId::PypiEggPkginfo,
445            DatasourceId::PypiEditableEggPkginfo,
446            DatasourceId::PypiJson,
447            DatasourceId::PypiSdist,
448            DatasourceId::PypiSdistPkginfo,
449            DatasourceId::PypiInspectDeplock,
450            DatasourceId::PipRequirements,
451            DatasourceId::PypiPoetryLock,
452            DatasourceId::PypiPylockToml,
453            DatasourceId::PypiUvLock,
454            DatasourceId::Pipfile,
455            DatasourceId::PipfileLock,
456        ],
457        sibling_file_patterns: &[
458            "pyproject.toml",
459            "setup.py",
460            "setup.cfg",
461            "PKG-INFO",
462            "METADATA",
463            "pypi.json",
464            "pip-inspect.deplock",
465            "*.tar.gz",
466            "*.tgz",
467            "*.tar.bz2",
468            "*.tar.xz",
469            "*.zip",
470            "requirements*.txt",
471            "Pipfile",
472            "Pipfile.lock",
473            "poetry.lock",
474            "pylock.toml",
475            "pylock.*.toml",
476            "uv.lock",
477        ],
478        mode: AssemblyMode::SiblingMerge,
479    },
480    AssemblerConfig {
481        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
482        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
483        mode: AssemblyMode::SiblingMerge,
484    },
485    // Ruby/RubyGems ecosystem
486    AssemblerConfig {
487        datasource_ids: &[
488            DatasourceId::GemArchiveExtracted,
489            DatasourceId::Gemspec,
490            DatasourceId::GemspecExtracted,
491            DatasourceId::Gemfile,
492            DatasourceId::GemfileExtracted,
493            DatasourceId::GemfileLock,
494            DatasourceId::GemfileLockExtracted,
495            DatasourceId::GemArchive,
496        ],
497        sibling_file_patterns: &[
498            "metadata.gz-extract",
499            "**/data.gz-extract/*.gemspec",
500            "**/data.gz-extract/Gemfile",
501            "**/data.gz-extract/Gemfile.lock",
502            "*.gemspec",
503            "Gemfile",
504            "Gemfile.lock",
505        ],
506        mode: AssemblyMode::SiblingMerge,
507    },
508    // Conda ecosystem
509    AssemblerConfig {
510        datasource_ids: &[
511            DatasourceId::CondaMetaYaml,
512            DatasourceId::CondaYaml,
513            DatasourceId::CondaMetaJson,
514        ],
515        sibling_file_patterns: &[
516            "meta.yaml",
517            "meta.yml",
518            "environment.yml",
519            "environment.yaml",
520            "conda.yaml",
521            "conda.yml",
522            "*conda*.yaml",
523            "*conda*.yml",
524            "env.yaml",
525            "env.yml",
526            "*env*.yaml",
527            "*env*.yml",
528            "*environment*.yaml",
529            "*environment*.yml",
530            "*.json",
531        ],
532        mode: AssemblyMode::SiblingMerge,
533    },
534    // RPM specfile (source packages)
535    AssemblerConfig {
536        datasource_ids: &[DatasourceId::RpmSpecfile],
537        sibling_file_patterns: &["*.spec"],
538        mode: AssemblyMode::SiblingMerge,
539    },
540    // Debian source packages (nested merge via debian/ directory)
541    AssemblerConfig {
542        datasource_ids: &[
543            DatasourceId::DebianControlInSource,
544            DatasourceId::DebianCopyrightInSource,
545        ],
546        sibling_file_patterns: &["**/debian/control", "**/debian/copyright"],
547        mode: AssemblyMode::SiblingMerge,
548    },
549    // Gradle/Android ecosystem
550    AssemblerConfig {
551        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
552        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
553        mode: AssemblyMode::SiblingMerge,
554    },
555    AssemblerConfig {
556        datasource_ids: &[DatasourceId::GradleModule],
557        sibling_file_patterns: &["*.module"],
558        mode: AssemblyMode::OnePerPackageData,
559    },
560    // CPAN/Perl ecosystem
561    AssemblerConfig {
562        datasource_ids: &[
563            DatasourceId::CpanMetaJson,
564            DatasourceId::CpanMetaYml,
565            DatasourceId::CpanManifest,
566            DatasourceId::CpanDistIni,
567            DatasourceId::CpanMakefile,
568        ],
569        sibling_file_patterns: &[
570            "META.json",
571            "META.yml",
572            "MANIFEST",
573            "dist.ini",
574            "Makefile.PL",
575        ],
576        mode: AssemblyMode::SiblingMerge,
577    },
578    // NuGet/.NET ecosystem
579    AssemblerConfig {
580        datasource_ids: &[
581            DatasourceId::NugetCsproj,
582            DatasourceId::NugetFsproj,
583            DatasourceId::NugetNuspec,
584            DatasourceId::NugetNupkg,
585            DatasourceId::NugetProjectJson,
586            DatasourceId::NugetProjectLockJson,
587            DatasourceId::NugetPackagesConfig,
588            DatasourceId::NugetPackagesLock,
589            DatasourceId::NugetVbproj,
590        ],
591        sibling_file_patterns: &[
592            "*.csproj",
593            "*.fsproj",
594            "*.nuspec",
595            "*.nupkg",
596            "project.json",
597            "project.lock.json",
598            "packages.config",
599            "packages.lock.json",
600            "*.packages.lock.json",
601            "*.vbproj",
602        ],
603        mode: AssemblyMode::SiblingMerge,
604    },
605    AssemblerConfig {
606        datasource_ids: &[DatasourceId::NugetDepsJson],
607        sibling_file_patterns: &["*.deps.json"],
608        mode: AssemblyMode::OnePerPackageData,
609    },
610    // Swift/SPM ecosystem
611    AssemblerConfig {
612        datasource_ids: &[
613            DatasourceId::SwiftPackageManifestJson,
614            DatasourceId::SwiftPackageResolved,
615            DatasourceId::SwiftPackageShowDependencies,
616        ],
617        sibling_file_patterns: &[
618            "Package.swift.json",
619            "Package.swift.deplock",
620            "Package.resolved",
621            ".package.resolved",
622            "swift-show-dependencies.deplock",
623        ],
624        mode: AssemblyMode::SiblingMerge,
625    },
626    // ── Standalone assemblers (single file → single package) ──
627    //
628    // These ecosystems have only one manifest file type with no sibling merging.
629    // They still need configs so their datasource_ids are recognized by the assembler.
630    //
631    // Bower (JavaScript)
632    AssemblerConfig {
633        datasource_ids: &[DatasourceId::BowerJson],
634        sibling_file_patterns: &["bower.json"],
635        mode: AssemblyMode::SiblingMerge,
636    },
637    // CRAN (R language)
638    AssemblerConfig {
639        datasource_ids: &[DatasourceId::CranDescription],
640        sibling_file_patterns: &["DESCRIPTION"],
641        mode: AssemblyMode::SiblingMerge,
642    },
643    // FreeBSD packages
644    AssemblerConfig {
645        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
646        sibling_file_patterns: &["+COMPACT_MANIFEST"],
647        mode: AssemblyMode::SiblingMerge,
648    },
649    // Haxe ecosystem
650    AssemblerConfig {
651        datasource_ids: &[DatasourceId::HaxelibJson],
652        sibling_file_patterns: &["haxelib.json"],
653        mode: AssemblyMode::SiblingMerge,
654    },
655    AssemblerConfig {
656        datasource_ids: &[DatasourceId::Gitmodules],
657        sibling_file_patterns: &[".gitmodules"],
658        mode: AssemblyMode::SiblingMerge,
659    },
660    // OCaml/opam ecosystem
661    AssemblerConfig {
662        datasource_ids: &[DatasourceId::OpamFile],
663        sibling_file_patterns: &["opam", "*.opam"],
664        mode: AssemblyMode::SiblingMerge,
665    },
666    // RPM Mariner manifest
667    AssemblerConfig {
668        datasource_ids: &[DatasourceId::RpmMarinerManifest],
669        sibling_file_patterns: &["*.rpm.manifest"],
670        mode: AssemblyMode::SiblingMerge,
671    },
672    AssemblerConfig {
673        datasource_ids: &[DatasourceId::RpmYumdb],
674        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
675        mode: AssemblyMode::OnePerPackageData,
676    },
677    // Microsoft Update Manifest
678    AssemblerConfig {
679        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
680        sibling_file_patterns: &["*.mum"],
681        mode: AssemblyMode::SiblingMerge,
682    },
683    // Autotools (C/C++ build system)
684    AssemblerConfig {
685        datasource_ids: &[DatasourceId::AutotoolsConfigure],
686        sibling_file_patterns: &["configure", "configure.ac"],
687        mode: AssemblyMode::SiblingMerge,
688    },
689    // Bazel (build system)
690    AssemblerConfig {
691        datasource_ids: &[DatasourceId::BazelBuild],
692        sibling_file_patterns: &["BUILD"],
693        mode: AssemblyMode::SiblingMerge,
694    },
695    AssemblerConfig {
696        datasource_ids: &[DatasourceId::BazelModule],
697        sibling_file_patterns: &["MODULE.bazel"],
698        mode: AssemblyMode::OnePerPackageData,
699    },
700    // Buck (build system)
701    AssemblerConfig {
702        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
703        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
704        mode: AssemblyMode::SiblingMerge,
705    },
706    // Ant/Ivy (Java dependency management)
707    AssemblerConfig {
708        datasource_ids: &[DatasourceId::AntIvyXml],
709        sibling_file_patterns: &["ivy.xml"],
710        mode: AssemblyMode::SiblingMerge,
711    },
712    // Meteor (JavaScript platform)
713    AssemblerConfig {
714        datasource_ids: &[DatasourceId::MeteorPackage],
715        sibling_file_patterns: &["package.js"],
716        mode: AssemblyMode::SiblingMerge,
717    },
718    // ── One-per-PackageData assemblers (database files with many packages) ──
719    //
720    // Alpine installed package database
721    AssemblerConfig {
722        datasource_ids: &[DatasourceId::AlpineInstalledDb],
723        sibling_file_patterns: &["installed"],
724        mode: AssemblyMode::OnePerPackageData,
725    },
726    AssemblerConfig {
727        datasource_ids: &[DatasourceId::AlpineApkbuild],
728        sibling_file_patterns: &["APKBUILD"],
729        mode: AssemblyMode::SiblingMerge,
730    },
731    // RPM installed package databases (BDB, NDB, SQLite)
732    AssemblerConfig {
733        datasource_ids: &[
734            DatasourceId::RpmInstalledDatabaseBdb,
735            DatasourceId::RpmInstalledDatabaseNdb,
736            DatasourceId::RpmInstalledDatabaseSqlite,
737        ],
738        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
739        mode: AssemblyMode::OnePerPackageData,
740    },
741    // Debian installed package databases
742    AssemblerConfig {
743        datasource_ids: &[
744            DatasourceId::DebianInstalledStatusDb,
745            DatasourceId::DebianDistrolessInstalledDb,
746        ],
747        sibling_file_patterns: &["status"],
748        mode: AssemblyMode::OnePerPackageData,
749    },
750    AssemblerConfig {
751        datasource_ids: &[
752            DatasourceId::DebianControlExtractedDeb,
753            DatasourceId::DebianMd5SumsInExtractedDeb,
754        ],
755        sibling_file_patterns: &["control", "md5sums"],
756        mode: AssemblyMode::SiblingMerge,
757    },
758    AssemblerConfig {
759        datasource_ids: &[DatasourceId::AboutFile],
760        sibling_file_patterns: &["*.ABOUT"],
761        mode: AssemblyMode::OnePerPackageData,
762    },
763];
764
765// Datasource IDs intentionally excluded from package assembly.
766//
767// This list is runtime-significant: files with these datasource IDs may remain
768// unowned by any Package, while their dependencies are still eligible for
769// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
770pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
771    // Non-package metadata
772    DatasourceId::Readme,
773    DatasourceId::EtcOsRelease,
774    // Binary archives (require external extraction via ExtractCode before scanning)
775    DatasourceId::AlpineApkArchive,
776    DatasourceId::AndroidAarLibrary,
777    DatasourceId::AndroidApk,
778    DatasourceId::AppleDmg,
779    DatasourceId::Axis2Mar,
780    DatasourceId::ChromeCrx,
781    DatasourceId::DebianDeb,
782    DatasourceId::DebianOriginalSourceTarball,
783    DatasourceId::DebianSourceMetadataTarball,
784    DatasourceId::InstallshieldInstaller,
785    DatasourceId::IosIpa,
786    DatasourceId::IsoDiskImage,
787    DatasourceId::JavaEarArchive,
788    DatasourceId::JavaJar,
789    DatasourceId::JavaWarArchive,
790    DatasourceId::JbossSar,
791    DatasourceId::MicrosoftCabinet,
792    DatasourceId::MozillaXpi,
793    DatasourceId::NsisInstaller,
794    DatasourceId::RpmArchive,
795    DatasourceId::SharShellArchive,
796    DatasourceId::SquashfsDiskImage,
797    // Supplementary metadata (not primary package definitions)
798    DatasourceId::ArchAurinfo,
799    DatasourceId::ArchPkginfo,
800    DatasourceId::ArchSrcinfo,
801    DatasourceId::Axis2ModuleXml,
802    DatasourceId::ClojureDepsEdn,
803    DatasourceId::ClojureProjectClj,
804    DatasourceId::DebianInstalledFilesList,
805    DatasourceId::DebianInstalledMd5Sums,
806    DatasourceId::DebianCopyright,
807    DatasourceId::DebianCopyrightInPackage,
808    DatasourceId::DebianCopyrightStandalone,
809    DatasourceId::GoBinary,
810    DatasourceId::DebianSourceControlDsc,
811    DatasourceId::Dockerfile,
812    DatasourceId::HexMixLock,
813    DatasourceId::JavaEarApplicationXml,
814    DatasourceId::JavaWarWebXml,
815    DatasourceId::JbossServiceXml,
816    DatasourceId::MesonBuild,
817    DatasourceId::GemGemspecInstalledSpecifications,
818    DatasourceId::NugetDirectoryBuildProps,
819    DatasourceId::NugetDirectoryPackagesProps,
820    DatasourceId::CitationCff,
821    DatasourceId::PubliccodeYaml,
822    DatasourceId::RpmPackageLicenses,
823    DatasourceId::RustBinary,
824    DatasourceId::SbtBuildSbt,
825    DatasourceId::VcpkgJson,
826];
827
828#[cfg(test)]
829mod tests {
830    use super::*;
831    use std::collections::HashSet;
832    use strum::IntoEnumIterator;
833
834    #[test]
835    fn test_every_datasource_id_is_accounted_for() {
836        let mut assembled: HashSet<DatasourceId> = HashSet::new();
837        for config in ASSEMBLERS {
838            for &dsid in config.datasource_ids {
839                assembled.insert(dsid);
840            }
841        }
842
843        let unassembled: HashSet<DatasourceId> =
844            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
845
846        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
847        assert!(
848            overlap.is_empty(),
849            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
850        );
851
852        let missing: Vec<_> = DatasourceId::iter()
853            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
854            .collect();
855
856        assert!(
857            missing.is_empty(),
858            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
859             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
860        );
861    }
862
863    #[test]
864    fn test_post_assembly_passes_are_unique() {
865        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
866
867        assert_eq!(
868            unique.len(),
869            POST_ASSEMBLY_PASSES.len(),
870            "POST_ASSEMBLY_PASSES contains duplicate entries"
871        );
872    }
873
874    #[test]
875    fn test_every_post_assembly_pass_kind_is_registered_once() {
876        let registered: HashSet<PostAssemblyPassKind> =
877            POST_ASSEMBLY_PASSES.iter().copied().collect();
878
879        let missing: Vec<_> = PostAssemblyPassKind::iter()
880            .filter(|pass| !registered.contains(pass))
881            .collect();
882
883        assert!(
884            missing.is_empty(),
885            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
886        );
887
888        for pass in PostAssemblyPassKind::iter() {
889            let count = POST_ASSEMBLY_PASSES
890                .iter()
891                .filter(|registered| **registered == pass)
892                .count();
893            assert_eq!(
894                count, 1,
895                "Post-assembly pass {pass:?} should be registered exactly once"
896            );
897        }
898    }
899
900    #[test]
901    fn test_post_assembly_passes_skip_irrelevant_inputs() {
902        let inputs = PostAssemblyInputs::default();
903
904        for pass in PostAssemblyPassKind::iter() {
905            assert!(
906                !pass.should_run(&inputs),
907                "{pass:?} should skip when no relevant inputs are present"
908            );
909        }
910    }
911
912    #[test]
913    fn test_npm_workspace_inputs_only_run_npm_passes() {
914        let inputs = PostAssemblyInputs {
915            package_types: HashSet::from([PackageType::Npm]),
916            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
917            has_npm_workspace_markers: true,
918            has_cargo_workspace_markers: false,
919        };
920
921        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
922            .filter(|pass| pass.should_run(&inputs))
923            .collect();
924
925        assert_eq!(
926            runnable,
927            HashSet::from([
928                PostAssemblyPassKind::NpmResourceAssign,
929                PostAssemblyPassKind::NpmWorkspaceMerge,
930            ])
931        );
932    }
933
934    #[test]
935    fn test_cargo_workspace_merge_requires_workspace_markers() {
936        let without_markers = PostAssemblyInputs {
937            package_types: HashSet::from([PackageType::Cargo]),
938            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
939            has_npm_workspace_markers: false,
940            has_cargo_workspace_markers: false,
941        };
942
943        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
944
945        let with_markers = PostAssemblyInputs {
946            has_cargo_workspace_markers: true,
947            ..without_markers
948        };
949
950        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
951    }
952}