Skip to main content

provenant/assembly/
assemblers.rs

1use std::collections::HashSet;
2
3use crate::models::PackageType;
4use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
5use strum::EnumIter;
6
7use super::{
8    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
9    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, file_ref_resolve,
10    hackage_merge, nix_flake_compat_merge, npm_resource_assign, nuget_cpm_resolve,
11    python_requirements_assign, ruby_resource_assign, swift_merge, topology,
12};
13
14#[derive(Clone, Copy)]
15pub(super) enum SpecialDirectoryMergerKind {
16    Skip,
17    Bazel,
18    Hackage,
19}
20
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
22pub(super) enum PostAssemblyPassKind {
23    SwiftMerge,
24    CondaRootfsMerge,
25    NpmResourceAssign,
26    PythonRequirementsAssign,
27    FileReferenceResolve,
28    RpmYumdbMerge,
29    NpmWorkspaceMerge,
30    CargoWorkspaceMerge,
31    NugetCpmResolve,
32    CargoResourceAssign,
33    ComposerResourceAssign,
34    RubyResourceAssign,
35    NixFlakeCompatMerge,
36    BazelPrune,
37}
38
39pub(super) fn special_directory_merger_for(
40    config_key: DatasourceId,
41) -> Option<SpecialDirectoryMergerKind> {
42    match config_key {
43        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
44        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
45        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
46        _ => None,
47    }
48}
49
50pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
51    PostAssemblyPassKind::SwiftMerge,
52    PostAssemblyPassKind::CondaRootfsMerge,
53    PostAssemblyPassKind::NpmResourceAssign,
54    PostAssemblyPassKind::PythonRequirementsAssign,
55    PostAssemblyPassKind::FileReferenceResolve,
56    PostAssemblyPassKind::RpmYumdbMerge,
57    PostAssemblyPassKind::NpmWorkspaceMerge,
58    PostAssemblyPassKind::CargoWorkspaceMerge,
59    PostAssemblyPassKind::NugetCpmResolve,
60    PostAssemblyPassKind::CargoResourceAssign,
61    PostAssemblyPassKind::ComposerResourceAssign,
62    PostAssemblyPassKind::RubyResourceAssign,
63    PostAssemblyPassKind::NixFlakeCompatMerge,
64    PostAssemblyPassKind::BazelPrune,
65];
66
67const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
68    DatasourceId::SwiftPackageManifestJson,
69    DatasourceId::SwiftPackageResolved,
70    DatasourceId::SwiftPackageShowDependencies,
71];
72
73const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
74    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
75
76const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::RpmInstalledDatabaseBdb,
78    DatasourceId::RpmInstalledDatabaseNdb,
79    DatasourceId::RpmInstalledDatabaseSqlite,
80];
81
82const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
83    DatasourceId::NugetDirectoryBuildProps,
84    DatasourceId::NugetDirectoryPackagesProps,
85];
86
87const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
88    DatasourceId::NugetCsproj,
89    DatasourceId::NugetFsproj,
90    DatasourceId::NugetVbproj,
91];
92
93#[derive(Default)]
94struct PostAssemblyInputs {
95    package_types: HashSet<PackageType>,
96    file_datasource_ids: HashSet<DatasourceId>,
97    has_npm_workspace_markers: bool,
98    has_cargo_workspace_markers: bool,
99}
100
101pub(super) fn run_post_assembly_passes(
102    files: &mut [FileInfo],
103    packages: &mut Vec<Package>,
104    dependencies: &mut Vec<TopLevelDependency>,
105    topology_plan: &topology::TopologyPlan,
106) {
107    let inputs = PostAssemblyInputs::collect(files, packages);
108
109    for pass in POST_ASSEMBLY_PASSES {
110        if !pass.should_run(&inputs) {
111            continue;
112        }
113
114        pass.run(files, packages, dependencies, topology_plan);
115    }
116}
117
118impl PostAssemblyInputs {
119    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
120        let mut inputs = Self {
121            package_types: packages
122                .iter()
123                .filter_map(|package| package.package_type)
124                .collect(),
125            ..Self::default()
126        };
127
128        for file in files {
129            for package_data in &file.package_data {
130                let Some(datasource_id) = package_data.datasource_id else {
131                    continue;
132                };
133
134                inputs.file_datasource_ids.insert(datasource_id);
135
136                if matches!(
137                    datasource_id,
138                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
139                ) && package_data
140                    .extra_data
141                    .as_ref()
142                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
143                {
144                    inputs.has_npm_workspace_markers = true;
145                }
146
147                if datasource_id == DatasourceId::CargoToml
148                    && package_data
149                        .extra_data
150                        .as_ref()
151                        .and_then(|extra_data| extra_data.get("workspace"))
152                        .and_then(|workspace| workspace.get("members"))
153                        .and_then(|members| members.as_array())
154                        .is_some_and(|members| !members.is_empty())
155                {
156                    inputs.has_cargo_workspace_markers = true;
157                }
158            }
159        }
160
161        inputs
162    }
163
164    fn has_package_type(&self, package_type: PackageType) -> bool {
165        self.package_types.contains(&package_type)
166    }
167
168    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
169        datasource_ids
170            .iter()
171            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
172    }
173
174    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
175        datasource_ids
176            .iter()
177            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
178    }
179}
180
181impl SpecialDirectoryMergerKind {
182    pub(super) fn run(
183        self,
184        config: &AssemblerConfig,
185        files: &[FileInfo],
186        file_indices: &[usize],
187    ) -> Vec<DirectoryMergeOutput> {
188        match self {
189            Self::Skip => Vec::new(),
190            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
191            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
192        }
193    }
194}
195
196impl PostAssemblyPassKind {
197    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
198        match self {
199            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
200            Self::CondaRootfsMerge => {
201                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
202            }
203            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
204            Self::PythonRequirementsAssign => {
205                inputs.has_package_type(PackageType::Pypi)
206                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
207            }
208            Self::FileReferenceResolve => {
209                file_ref_resolve::has_relevant_file_reference_datasource_ids(
210                    &inputs.file_datasource_ids,
211                )
212            }
213            Self::RpmYumdbMerge => {
214                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
215                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
216            }
217            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
218            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
219            Self::NugetCpmResolve => {
220                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
221                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
222            }
223            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
224            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
225            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
226            Self::NixFlakeCompatMerge => {
227                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
228                    && inputs.has_any_file_datasource(&[
229                        DatasourceId::NixFlakeNix,
230                        DatasourceId::NixFlakeLock,
231                    ])
232            }
233            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
234        }
235    }
236
237    fn run(
238        self,
239        files: &mut [FileInfo],
240        packages: &mut Vec<Package>,
241        dependencies: &mut Vec<TopLevelDependency>,
242        topology_plan: &topology::TopologyPlan,
243    ) {
244        match self {
245            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
246            Self::CondaRootfsMerge => {
247                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
248            }
249            Self::NpmResourceAssign => {
250                npm_resource_assign::assign_npm_package_resources(files, packages)
251            }
252            Self::PythonRequirementsAssign => {
253                python_requirements_assign::assign_python_requirements_to_projects(
254                    files,
255                    packages,
256                    dependencies,
257                )
258            }
259            Self::FileReferenceResolve => {
260                file_ref_resolve::resolve_file_references(files, packages, dependencies)
261            }
262            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
263            Self::NpmWorkspaceMerge => {
264                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
265            }
266            Self::CargoWorkspaceMerge => {
267                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
268            }
269            Self::NugetCpmResolve => {
270                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
271            }
272            Self::CargoResourceAssign => {
273                cargo_resource_assign::assign_cargo_package_resources(files, packages)
274            }
275            Self::ComposerResourceAssign => {
276                composer_resource_assign::assign_composer_package_resources(files, packages)
277            }
278            Self::RubyResourceAssign => {
279                ruby_resource_assign::assign_ruby_package_resources(files, packages)
280            }
281            Self::NixFlakeCompatMerge => {
282                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
283            }
284            Self::BazelPrune => {
285                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
286            }
287        }
288    }
289}
290
291pub static ASSEMBLERS: &[AssemblerConfig] = &[
292    // ── Sibling-merge assemblers ──
293    //
294    // npm ecosystem: package.json + lockfiles in same directory.
295    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
296    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
297    AssemblerConfig {
298        datasource_ids: &[
299            DatasourceId::BunLock,
300            DatasourceId::BunLockb,
301            DatasourceId::NpmPackageJson,
302            DatasourceId::NpmPackageLockJson,
303            DatasourceId::YarnLock,
304            DatasourceId::YarnLockV1,
305            DatasourceId::YarnLockV2,
306            DatasourceId::YarnPnpCjs,
307            DatasourceId::PnpmLockYaml,
308            DatasourceId::PnpmWorkspaceYaml,
309        ],
310        sibling_file_patterns: &[
311            "package.json",
312            "bun.lock",
313            "bun.lockb",
314            ".package-lock.json",
315            "package-lock.json",
316            ".npm-shrinkwrap.json",
317            "npm-shrinkwrap.json",
318            "yarn.lock",
319            ".pnp.cjs",
320            "pnpm-lock.yaml",
321            "shrinkwrap.yaml",
322            "pnpm-workspace.yaml",
323        ],
324        mode: AssemblyMode::SiblingMerge,
325    },
326    // Rust/Cargo ecosystem
327    AssemblerConfig {
328        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
329        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
330        mode: AssemblyMode::SiblingMerge,
331    },
332    // Julia ecosystem
333    AssemblerConfig {
334        datasource_ids: &[
335            DatasourceId::JuliaProjectToml,
336            DatasourceId::JuliaManifestToml,
337        ],
338        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
339        mode: AssemblyMode::SiblingMerge,
340    },
341    // CocoaPods ecosystem
342    AssemblerConfig {
343        datasource_ids: &[
344            DatasourceId::CocoapodsPodspec,
345            DatasourceId::CocoapodsPodspecJson,
346            DatasourceId::CocoapodsPodfile,
347            DatasourceId::CocoapodsPodfileLock,
348        ],
349        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
350        mode: AssemblyMode::SiblingMerge,
351    },
352    // PHP Composer ecosystem
353    AssemblerConfig {
354        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
355        sibling_file_patterns: &[
356            "*composer.json",
357            "composer.*.json",
358            "*composer.lock",
359            "composer.*.lock",
360        ],
361        mode: AssemblyMode::SiblingMerge,
362    },
363    // Go ecosystem (includes legacy Godeps)
364    AssemblerConfig {
365        datasource_ids: &[
366            DatasourceId::GoMod,
367            DatasourceId::GoModGraph,
368            DatasourceId::GoSum,
369            DatasourceId::GoWork,
370            DatasourceId::Godeps,
371        ],
372        sibling_file_patterns: &[
373            "go.mod",
374            "go.work",
375            "go.mod.graph",
376            "go.modgraph",
377            "go.sum",
378            "Godeps.json",
379        ],
380        mode: AssemblyMode::SiblingMerge,
381    },
382    // Dart/Flutter ecosystem
383    AssemblerConfig {
384        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
385        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
386        mode: AssemblyMode::SiblingMerge,
387    },
388    // Pixi ecosystem
389    AssemblerConfig {
390        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
391        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
392        mode: AssemblyMode::SiblingMerge,
393    },
394    AssemblerConfig {
395        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
396        sibling_file_patterns: &["flake.nix", "flake.lock"],
397        mode: AssemblyMode::SiblingMerge,
398    },
399    AssemblerConfig {
400        datasource_ids: &[DatasourceId::NixDefaultNix],
401        sibling_file_patterns: &["default.nix"],
402        mode: AssemblyMode::OnePerPackageData,
403    },
404    // Helm chart ecosystem
405    AssemblerConfig {
406        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
407        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
408        mode: AssemblyMode::SiblingMerge,
409    },
410    AssemblerConfig {
411        datasource_ids: &[
412            DatasourceId::HackageCabal,
413            DatasourceId::HackageCabalProject,
414            DatasourceId::HackageStackYaml,
415        ],
416        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
417        mode: AssemblyMode::SiblingMerge,
418    },
419    // Chef ecosystem
420    AssemblerConfig {
421        datasource_ids: &[
422            DatasourceId::ChefCookbookMetadataJson,
423            DatasourceId::ChefCookbookMetadataRb,
424        ],
425        sibling_file_patterns: &["metadata.json", "metadata.rb"],
426        mode: AssemblyMode::SiblingMerge,
427    },
428    // Conan (C/C++) ecosystem
429    AssemblerConfig {
430        datasource_ids: &[
431            DatasourceId::ConanConanFilePy,
432            DatasourceId::ConanConanFileTxt,
433            DatasourceId::ConanLock,
434            DatasourceId::ConanConanDataYml,
435        ],
436        sibling_file_patterns: &[
437            "conanfile.py",
438            "conanfile.txt",
439            "conan.lock",
440            "conandata.yml",
441        ],
442        mode: AssemblyMode::SiblingMerge,
443    },
444    // Maven/Java ecosystem (nested merge via META-INF)
445    AssemblerConfig {
446        datasource_ids: &[
447            DatasourceId::MavenPom,
448            DatasourceId::MavenPomProperties,
449            DatasourceId::JavaJarManifest,
450            DatasourceId::JavaOsgiManifest,
451        ],
452        sibling_file_patterns: &[
453            "pom.xml",
454            "*.pom",
455            "pom.properties",
456            "**/META-INF/MANIFEST.MF",
457        ],
458        mode: AssemblyMode::SiblingMerge,
459    },
460    AssemblerConfig {
461        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
462        sibling_file_patterns: &["*.whl", "origin.json"],
463        mode: AssemblyMode::SiblingMerge,
464    },
465    // Python/PyPI ecosystem
466    AssemblerConfig {
467        datasource_ids: &[
468            DatasourceId::PypiPyprojectToml,
469            DatasourceId::PypiPoetryPyprojectToml,
470            DatasourceId::PypiSetupPy,
471            DatasourceId::PypiSetupCfg,
472            DatasourceId::PypiWheel,
473            DatasourceId::PypiWheelMetadata,
474            DatasourceId::PypiEgg,
475            DatasourceId::PypiEggPkginfo,
476            DatasourceId::PypiEditableEggPkginfo,
477            DatasourceId::PypiJson,
478            DatasourceId::PypiSdist,
479            DatasourceId::PypiSdistPkginfo,
480            DatasourceId::PypiInspectDeplock,
481            DatasourceId::PipRequirements,
482            DatasourceId::PypiPoetryLock,
483            DatasourceId::PypiPylockToml,
484            DatasourceId::PypiUvLock,
485            DatasourceId::Pipfile,
486            DatasourceId::PipfileLock,
487        ],
488        sibling_file_patterns: &[
489            "pyproject.toml",
490            "setup.py",
491            "setup.cfg",
492            "PKG-INFO",
493            "METADATA",
494            "pypi.json",
495            "pip-inspect.deplock",
496            "*.tar.gz",
497            "*.tgz",
498            "*.tar.bz2",
499            "*.tar.xz",
500            "*.zip",
501            "requirements*.txt",
502            "Pipfile",
503            "Pipfile.lock",
504            "poetry.lock",
505            "pylock.toml",
506            "pylock.*.toml",
507            "uv.lock",
508        ],
509        mode: AssemblyMode::SiblingMerge,
510    },
511    AssemblerConfig {
512        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
513        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
514        mode: AssemblyMode::SiblingMerge,
515    },
516    // Ruby/RubyGems ecosystem
517    AssemblerConfig {
518        datasource_ids: &[
519            DatasourceId::GemArchiveExtracted,
520            DatasourceId::Gemspec,
521            DatasourceId::GemspecExtracted,
522            DatasourceId::Gemfile,
523            DatasourceId::GemfileExtracted,
524            DatasourceId::GemfileLock,
525            DatasourceId::GemfileLockExtracted,
526            DatasourceId::GemArchive,
527        ],
528        sibling_file_patterns: &[
529            "metadata.gz-extract",
530            "**/data.gz-extract/*.gemspec",
531            "**/data.gz-extract/Gemfile",
532            "**/data.gz-extract/Gemfile.lock",
533            "*.gemspec",
534            "Gemfile",
535            "Gemfile.lock",
536        ],
537        mode: AssemblyMode::SiblingMerge,
538    },
539    // Conda ecosystem
540    AssemblerConfig {
541        datasource_ids: &[
542            DatasourceId::CondaMetaYaml,
543            DatasourceId::CondaYaml,
544            DatasourceId::CondaMetaJson,
545        ],
546        sibling_file_patterns: &[
547            "meta.yaml",
548            "meta.yml",
549            "environment.yml",
550            "environment.yaml",
551            "conda.yaml",
552            "conda.yml",
553            "*conda*.yaml",
554            "*conda*.yml",
555            "env.yaml",
556            "env.yml",
557            "*env*.yaml",
558            "*env*.yml",
559            "*environment*.yaml",
560            "*environment*.yml",
561            "*.json",
562        ],
563        mode: AssemblyMode::SiblingMerge,
564    },
565    // RPM specfile (source packages)
566    AssemblerConfig {
567        datasource_ids: &[DatasourceId::RpmSpecfile],
568        sibling_file_patterns: &["*.spec"],
569        mode: AssemblyMode::SiblingMerge,
570    },
571    // Debian source packages (nested merge via debian/ directory)
572    AssemblerConfig {
573        datasource_ids: &[
574            DatasourceId::DebianControlInSource,
575            DatasourceId::DebianCopyrightInSource,
576        ],
577        sibling_file_patterns: &["**/debian/control", "**/debian/copyright"],
578        mode: AssemblyMode::SiblingMerge,
579    },
580    // Gradle/Android ecosystem
581    AssemblerConfig {
582        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
583        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
584        mode: AssemblyMode::SiblingMerge,
585    },
586    AssemblerConfig {
587        datasource_ids: &[DatasourceId::GradleModule],
588        sibling_file_patterns: &["*.module"],
589        mode: AssemblyMode::OnePerPackageData,
590    },
591    // CPAN/Perl ecosystem
592    AssemblerConfig {
593        datasource_ids: &[
594            DatasourceId::CpanMetaJson,
595            DatasourceId::CpanMetaYml,
596            DatasourceId::CpanManifest,
597            DatasourceId::CpanDistIni,
598            DatasourceId::CpanMakefile,
599        ],
600        sibling_file_patterns: &[
601            "META.json",
602            "META.yml",
603            "MANIFEST",
604            "dist.ini",
605            "Makefile.PL",
606        ],
607        mode: AssemblyMode::SiblingMerge,
608    },
609    // NuGet/.NET ecosystem
610    AssemblerConfig {
611        datasource_ids: &[
612            DatasourceId::NugetCsproj,
613            DatasourceId::NugetFsproj,
614            DatasourceId::NugetNuspec,
615            DatasourceId::NugetNupkg,
616            DatasourceId::NugetProjectJson,
617            DatasourceId::NugetProjectLockJson,
618            DatasourceId::NugetPackagesConfig,
619            DatasourceId::NugetPackagesLock,
620            DatasourceId::NugetVbproj,
621        ],
622        sibling_file_patterns: &[
623            "*.csproj",
624            "*.fsproj",
625            "*.nuspec",
626            "*.nupkg",
627            "project.json",
628            "project.lock.json",
629            "packages.config",
630            "packages.lock.json",
631            "*.packages.lock.json",
632            "*.vbproj",
633        ],
634        mode: AssemblyMode::SiblingMerge,
635    },
636    AssemblerConfig {
637        datasource_ids: &[DatasourceId::NugetDepsJson],
638        sibling_file_patterns: &["*.deps.json"],
639        mode: AssemblyMode::OnePerPackageData,
640    },
641    // Swift/SPM ecosystem
642    AssemblerConfig {
643        datasource_ids: &[
644            DatasourceId::SwiftPackageManifestJson,
645            DatasourceId::SwiftPackageResolved,
646            DatasourceId::SwiftPackageShowDependencies,
647        ],
648        sibling_file_patterns: &[
649            "Package.swift.json",
650            "Package.swift.deplock",
651            "Package.resolved",
652            ".package.resolved",
653            "swift-show-dependencies.deplock",
654        ],
655        mode: AssemblyMode::SiblingMerge,
656    },
657    // ── Standalone assemblers (single file → single package) ──
658    //
659    // These ecosystems have only one manifest file type with no sibling merging.
660    // They still need configs so their datasource_ids are recognized by the assembler.
661    //
662    // Bower (JavaScript)
663    AssemblerConfig {
664        datasource_ids: &[DatasourceId::BowerJson],
665        sibling_file_patterns: &["bower.json"],
666        mode: AssemblyMode::SiblingMerge,
667    },
668    // CRAN (R language)
669    AssemblerConfig {
670        datasource_ids: &[DatasourceId::CranDescription],
671        sibling_file_patterns: &["DESCRIPTION"],
672        mode: AssemblyMode::SiblingMerge,
673    },
674    // FreeBSD packages
675    AssemblerConfig {
676        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
677        sibling_file_patterns: &["+COMPACT_MANIFEST"],
678        mode: AssemblyMode::SiblingMerge,
679    },
680    // Haxe ecosystem
681    AssemblerConfig {
682        datasource_ids: &[DatasourceId::HaxelibJson],
683        sibling_file_patterns: &["haxelib.json"],
684        mode: AssemblyMode::SiblingMerge,
685    },
686    AssemblerConfig {
687        datasource_ids: &[DatasourceId::Gitmodules],
688        sibling_file_patterns: &[".gitmodules"],
689        mode: AssemblyMode::SiblingMerge,
690    },
691    // OCaml/opam ecosystem
692    AssemblerConfig {
693        datasource_ids: &[DatasourceId::OpamFile],
694        sibling_file_patterns: &["opam", "*.opam"],
695        mode: AssemblyMode::SiblingMerge,
696    },
697    // RPM Mariner manifest
698    AssemblerConfig {
699        datasource_ids: &[DatasourceId::RpmMarinerManifest],
700        sibling_file_patterns: &["*.rpm.manifest"],
701        mode: AssemblyMode::SiblingMerge,
702    },
703    AssemblerConfig {
704        datasource_ids: &[DatasourceId::RpmYumdb],
705        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
706        mode: AssemblyMode::OnePerPackageData,
707    },
708    // Microsoft Update Manifest
709    AssemblerConfig {
710        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
711        sibling_file_patterns: &["*.mum"],
712        mode: AssemblyMode::SiblingMerge,
713    },
714    // Autotools (C/C++ build system)
715    AssemblerConfig {
716        datasource_ids: &[DatasourceId::AutotoolsConfigure],
717        sibling_file_patterns: &["configure", "configure.ac"],
718        mode: AssemblyMode::SiblingMerge,
719    },
720    // Bazel (build system)
721    AssemblerConfig {
722        datasource_ids: &[DatasourceId::BazelBuild],
723        sibling_file_patterns: &["BUILD"],
724        mode: AssemblyMode::SiblingMerge,
725    },
726    AssemblerConfig {
727        datasource_ids: &[DatasourceId::BazelModule],
728        sibling_file_patterns: &["MODULE.bazel"],
729        mode: AssemblyMode::OnePerPackageData,
730    },
731    // Buck (build system)
732    AssemblerConfig {
733        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
734        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
735        mode: AssemblyMode::SiblingMerge,
736    },
737    // Ant/Ivy (Java dependency management)
738    AssemblerConfig {
739        datasource_ids: &[DatasourceId::AntIvyXml],
740        sibling_file_patterns: &["ivy.xml"],
741        mode: AssemblyMode::SiblingMerge,
742    },
743    // Meteor (JavaScript platform)
744    AssemblerConfig {
745        datasource_ids: &[DatasourceId::MeteorPackage],
746        sibling_file_patterns: &["package.js"],
747        mode: AssemblyMode::SiblingMerge,
748    },
749    // ── One-per-PackageData assemblers (database files with many packages) ──
750    //
751    // Alpine installed package database
752    AssemblerConfig {
753        datasource_ids: &[DatasourceId::AlpineInstalledDb],
754        sibling_file_patterns: &["installed"],
755        mode: AssemblyMode::OnePerPackageData,
756    },
757    AssemblerConfig {
758        datasource_ids: &[DatasourceId::AlpineApkbuild],
759        sibling_file_patterns: &["APKBUILD"],
760        mode: AssemblyMode::SiblingMerge,
761    },
762    // RPM installed package databases (BDB, NDB, SQLite)
763    AssemblerConfig {
764        datasource_ids: &[
765            DatasourceId::RpmInstalledDatabaseBdb,
766            DatasourceId::RpmInstalledDatabaseNdb,
767            DatasourceId::RpmInstalledDatabaseSqlite,
768        ],
769        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
770        mode: AssemblyMode::OnePerPackageData,
771    },
772    // Debian installed package databases
773    AssemblerConfig {
774        datasource_ids: &[
775            DatasourceId::DebianInstalledStatusDb,
776            DatasourceId::DebianDistrolessInstalledDb,
777        ],
778        sibling_file_patterns: &["status"],
779        mode: AssemblyMode::OnePerPackageData,
780    },
781    AssemblerConfig {
782        datasource_ids: &[
783            DatasourceId::DebianControlExtractedDeb,
784            DatasourceId::DebianMd5SumsInExtractedDeb,
785        ],
786        sibling_file_patterns: &["control", "md5sums"],
787        mode: AssemblyMode::SiblingMerge,
788    },
789    AssemblerConfig {
790        datasource_ids: &[DatasourceId::AboutFile],
791        sibling_file_patterns: &["*.ABOUT"],
792        mode: AssemblyMode::OnePerPackageData,
793    },
794];
795
796// Datasource IDs intentionally excluded from package assembly.
797//
798// This list is runtime-significant: files with these datasource IDs may remain
799// unowned by any Package, while their dependencies are still eligible for
800// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
801pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
802    // Non-package metadata
803    DatasourceId::Readme,
804    DatasourceId::EtcOsRelease,
805    // Binary archives (require external extraction via ExtractCode before scanning)
806    DatasourceId::AlpineApkArchive,
807    DatasourceId::AndroidAarLibrary,
808    DatasourceId::AndroidApk,
809    DatasourceId::AppleDmg,
810    DatasourceId::Axis2Mar,
811    DatasourceId::ChromeCrx,
812    DatasourceId::DebianDeb,
813    DatasourceId::DebianOriginalSourceTarball,
814    DatasourceId::DebianSourceMetadataTarball,
815    DatasourceId::InstallshieldInstaller,
816    DatasourceId::IosIpa,
817    DatasourceId::IsoDiskImage,
818    DatasourceId::JavaEarArchive,
819    DatasourceId::JavaJar,
820    DatasourceId::JavaWarArchive,
821    DatasourceId::JbossSar,
822    DatasourceId::MicrosoftCabinet,
823    DatasourceId::MozillaXpi,
824    DatasourceId::NsisInstaller,
825    DatasourceId::RpmArchive,
826    DatasourceId::SharShellArchive,
827    DatasourceId::SquashfsDiskImage,
828    // Supplementary metadata (not primary package definitions)
829    DatasourceId::ArchAurinfo,
830    DatasourceId::ArchPkginfo,
831    DatasourceId::ArchSrcinfo,
832    DatasourceId::Axis2ModuleXml,
833    DatasourceId::ClojureDepsEdn,
834    DatasourceId::ClojureProjectClj,
835    DatasourceId::DebianInstalledFilesList,
836    DatasourceId::DebianInstalledMd5Sums,
837    DatasourceId::DebianCopyright,
838    DatasourceId::DebianCopyrightInPackage,
839    DatasourceId::DebianCopyrightStandalone,
840    DatasourceId::GoBinary,
841    DatasourceId::WindowsExecutable,
842    DatasourceId::DebianSourceControlDsc,
843    DatasourceId::Dockerfile,
844    DatasourceId::HexMixLock,
845    DatasourceId::JavaEarApplicationXml,
846    DatasourceId::JavaWarWebXml,
847    DatasourceId::JbossServiceXml,
848    DatasourceId::MesonBuild,
849    DatasourceId::GemGemspecInstalledSpecifications,
850    DatasourceId::NugetDirectoryBuildProps,
851    DatasourceId::NugetDirectoryPackagesProps,
852    DatasourceId::CitationCff,
853    DatasourceId::PubliccodeYaml,
854    DatasourceId::RpmPackageLicenses,
855    DatasourceId::RustBinary,
856    DatasourceId::SbtBuildSbt,
857    DatasourceId::VcpkgJson,
858];
859
860#[cfg(test)]
861mod tests {
862    use super::*;
863    use std::collections::HashSet;
864    use strum::IntoEnumIterator;
865
866    #[test]
867    fn test_every_datasource_id_is_accounted_for() {
868        let mut assembled: HashSet<DatasourceId> = HashSet::new();
869        for config in ASSEMBLERS {
870            for &dsid in config.datasource_ids {
871                assembled.insert(dsid);
872            }
873        }
874
875        let unassembled: HashSet<DatasourceId> =
876            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
877
878        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
879        assert!(
880            overlap.is_empty(),
881            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
882        );
883
884        let missing: Vec<_> = DatasourceId::iter()
885            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
886            .collect();
887
888        assert!(
889            missing.is_empty(),
890            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
891             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
892        );
893    }
894
895    #[test]
896    fn test_post_assembly_passes_are_unique() {
897        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
898
899        assert_eq!(
900            unique.len(),
901            POST_ASSEMBLY_PASSES.len(),
902            "POST_ASSEMBLY_PASSES contains duplicate entries"
903        );
904    }
905
906    #[test]
907    fn test_every_post_assembly_pass_kind_is_registered_once() {
908        let registered: HashSet<PostAssemblyPassKind> =
909            POST_ASSEMBLY_PASSES.iter().copied().collect();
910
911        let missing: Vec<_> = PostAssemblyPassKind::iter()
912            .filter(|pass| !registered.contains(pass))
913            .collect();
914
915        assert!(
916            missing.is_empty(),
917            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
918        );
919
920        for pass in PostAssemblyPassKind::iter() {
921            let count = POST_ASSEMBLY_PASSES
922                .iter()
923                .filter(|registered| **registered == pass)
924                .count();
925            assert_eq!(
926                count, 1,
927                "Post-assembly pass {pass:?} should be registered exactly once"
928            );
929        }
930    }
931
932    #[test]
933    fn test_post_assembly_passes_skip_irrelevant_inputs() {
934        let inputs = PostAssemblyInputs::default();
935
936        for pass in PostAssemblyPassKind::iter() {
937            assert!(
938                !pass.should_run(&inputs),
939                "{pass:?} should skip when no relevant inputs are present"
940            );
941        }
942    }
943
944    #[test]
945    fn test_npm_workspace_inputs_only_run_npm_passes() {
946        let inputs = PostAssemblyInputs {
947            package_types: HashSet::from([PackageType::Npm]),
948            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
949            has_npm_workspace_markers: true,
950            has_cargo_workspace_markers: false,
951        };
952
953        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
954            .filter(|pass| pass.should_run(&inputs))
955            .collect();
956
957        assert_eq!(
958            runnable,
959            HashSet::from([
960                PostAssemblyPassKind::NpmResourceAssign,
961                PostAssemblyPassKind::NpmWorkspaceMerge,
962            ])
963        );
964    }
965
966    #[test]
967    fn test_cargo_workspace_merge_requires_workspace_markers() {
968        let without_markers = PostAssemblyInputs {
969            package_types: HashSet::from([PackageType::Cargo]),
970            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
971            has_npm_workspace_markers: false,
972            has_cargo_workspace_markers: false,
973        };
974
975        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
976
977        let with_markers = PostAssemblyInputs {
978            has_cargo_workspace_markers: true,
979            ..without_markers
980        };
981
982        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
983    }
984}