Skip to main content

provenant/assembly/
assemblers.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashSet;
5
6use crate::models::PackageType;
7use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
8use strum::EnumIter;
9
10use super::{
11    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
12    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, debian_source_merge,
13    file_ref_resolve, hackage_merge, nix_flake_compat_merge, npm_resource_assign,
14    nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign, swift_merge, topology,
15};
16
17#[derive(Clone, Copy)]
18pub(super) enum SpecialDirectoryMergerKind {
19    Skip,
20    Bazel,
21    DebianSource,
22    Hackage,
23    WindowsUpdate,
24}
25
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
27pub(super) enum PostAssemblyPassKind {
28    SwiftMerge,
29    CondaRootfsMerge,
30    NpmResourceAssign,
31    PythonRequirementsAssign,
32    FileReferenceResolve,
33    RpmYumdbMerge,
34    NpmWorkspaceMerge,
35    CargoWorkspaceMerge,
36    NugetCpmResolve,
37    CargoResourceAssign,
38    ComposerResourceAssign,
39    RubyResourceAssign,
40    NixFlakeCompatMerge,
41    BazelPrune,
42}
43
44pub(super) fn special_directory_merger_for(
45    config_key: DatasourceId,
46) -> Option<SpecialDirectoryMergerKind> {
47    match config_key {
48        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
49        DatasourceId::DebianControlInSource => Some(SpecialDirectoryMergerKind::DebianSource),
50        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
51        DatasourceId::MicrosoftUpdateManifestMum => Some(SpecialDirectoryMergerKind::WindowsUpdate),
52        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
53        _ => None,
54    }
55}
56
57pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
58    PostAssemblyPassKind::SwiftMerge,
59    PostAssemblyPassKind::CondaRootfsMerge,
60    PostAssemblyPassKind::NpmResourceAssign,
61    PostAssemblyPassKind::PythonRequirementsAssign,
62    PostAssemblyPassKind::FileReferenceResolve,
63    PostAssemblyPassKind::RpmYumdbMerge,
64    PostAssemblyPassKind::NpmWorkspaceMerge,
65    PostAssemblyPassKind::CargoWorkspaceMerge,
66    PostAssemblyPassKind::NugetCpmResolve,
67    PostAssemblyPassKind::CargoResourceAssign,
68    PostAssemblyPassKind::ComposerResourceAssign,
69    PostAssemblyPassKind::RubyResourceAssign,
70    PostAssemblyPassKind::NixFlakeCompatMerge,
71    PostAssemblyPassKind::BazelPrune,
72];
73
74const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
75    DatasourceId::SwiftPackageManifestJson,
76    DatasourceId::SwiftPackageResolved,
77    DatasourceId::SwiftPackageShowDependencies,
78];
79
80const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
81    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
82
83const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
84    DatasourceId::RpmInstalledDatabaseBdb,
85    DatasourceId::RpmInstalledDatabaseNdb,
86    DatasourceId::RpmInstalledDatabaseSqlite,
87];
88
89const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
90    DatasourceId::NugetDirectoryBuildProps,
91    DatasourceId::NugetDirectoryPackagesProps,
92];
93
94const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
95    DatasourceId::NugetCsproj,
96    DatasourceId::NugetFsproj,
97    DatasourceId::NugetVbproj,
98];
99
100#[derive(Default)]
101struct PostAssemblyInputs {
102    package_types: HashSet<PackageType>,
103    file_datasource_ids: HashSet<DatasourceId>,
104    has_npm_workspace_markers: bool,
105    has_cargo_workspace_markers: bool,
106}
107
108pub(super) fn run_post_assembly_passes(
109    files: &mut [FileInfo],
110    packages: &mut Vec<Package>,
111    dependencies: &mut Vec<TopLevelDependency>,
112    topology_plan: &topology::TopologyPlan,
113) {
114    let inputs = PostAssemblyInputs::collect(files, packages);
115
116    for pass in POST_ASSEMBLY_PASSES {
117        if !pass.should_run(&inputs) {
118            continue;
119        }
120
121        pass.run(files, packages, dependencies, topology_plan);
122    }
123}
124
125impl PostAssemblyInputs {
126    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
127        let mut inputs = Self {
128            package_types: packages
129                .iter()
130                .filter_map(|package| package.package_type)
131                .collect(),
132            ..Self::default()
133        };
134
135        for file in files {
136            for package_data in &file.package_data {
137                let Some(datasource_id) = package_data.datasource_id else {
138                    continue;
139                };
140
141                inputs.file_datasource_ids.insert(datasource_id);
142
143                if matches!(
144                    datasource_id,
145                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
146                ) && package_data
147                    .extra_data
148                    .as_ref()
149                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
150                {
151                    inputs.has_npm_workspace_markers = true;
152                }
153
154                if datasource_id == DatasourceId::CargoToml
155                    && package_data
156                        .extra_data
157                        .as_ref()
158                        .and_then(|extra_data| extra_data.get("workspace"))
159                        .and_then(|workspace| workspace.get("members"))
160                        .and_then(|members| members.as_array())
161                        .is_some_and(|members| !members.is_empty())
162                {
163                    inputs.has_cargo_workspace_markers = true;
164                }
165            }
166        }
167
168        inputs
169    }
170
171    fn has_package_type(&self, package_type: PackageType) -> bool {
172        self.package_types.contains(&package_type)
173    }
174
175    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
176        datasource_ids
177            .iter()
178            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
179    }
180
181    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
182        datasource_ids
183            .iter()
184            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
185    }
186}
187
188impl SpecialDirectoryMergerKind {
189    pub(super) fn run(
190        self,
191        config: &AssemblerConfig,
192        files: &[FileInfo],
193        file_indices: &[usize],
194    ) -> Vec<DirectoryMergeOutput> {
195        match self {
196            Self::Skip => Vec::new(),
197            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
198            Self::DebianSource => {
199                debian_source_merge::assemble_debian_source_packages(config, files, file_indices)
200            }
201            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
202            Self::WindowsUpdate => super::windows_update_merge::assemble_windows_update_packages(
203                config,
204                files,
205                file_indices,
206            ),
207        }
208    }
209}
210
211impl PostAssemblyPassKind {
212    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
213        match self {
214            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
215            Self::CondaRootfsMerge => {
216                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
217            }
218            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
219            Self::PythonRequirementsAssign => {
220                inputs.has_package_type(PackageType::Pypi)
221                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
222            }
223            Self::FileReferenceResolve => {
224                file_ref_resolve::has_relevant_file_reference_datasource_ids(
225                    &inputs.file_datasource_ids,
226                )
227            }
228            Self::RpmYumdbMerge => {
229                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
230                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
231            }
232            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
233            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
234            Self::NugetCpmResolve => {
235                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
236                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
237            }
238            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
239            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
240            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
241            Self::NixFlakeCompatMerge => {
242                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
243                    && inputs.has_any_file_datasource(&[
244                        DatasourceId::NixFlakeNix,
245                        DatasourceId::NixFlakeLock,
246                    ])
247            }
248            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
249        }
250    }
251
252    fn run(
253        self,
254        files: &mut [FileInfo],
255        packages: &mut Vec<Package>,
256        dependencies: &mut Vec<TopLevelDependency>,
257        topology_plan: &topology::TopologyPlan,
258    ) {
259        match self {
260            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
261            Self::CondaRootfsMerge => {
262                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
263            }
264            Self::NpmResourceAssign => {
265                npm_resource_assign::assign_npm_package_resources(files, packages)
266            }
267            Self::PythonRequirementsAssign => {
268                python_requirements_assign::assign_python_requirements_to_projects(
269                    files,
270                    packages,
271                    dependencies,
272                )
273            }
274            Self::FileReferenceResolve => {
275                file_ref_resolve::resolve_file_references(files, packages, dependencies)
276            }
277            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
278            Self::NpmWorkspaceMerge => {
279                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
280            }
281            Self::CargoWorkspaceMerge => {
282                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
283            }
284            Self::NugetCpmResolve => {
285                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
286            }
287            Self::CargoResourceAssign => {
288                cargo_resource_assign::assign_cargo_package_resources(files, packages)
289            }
290            Self::ComposerResourceAssign => {
291                composer_resource_assign::assign_composer_package_resources(files, packages)
292            }
293            Self::RubyResourceAssign => {
294                ruby_resource_assign::assign_ruby_package_resources(files, packages)
295            }
296            Self::NixFlakeCompatMerge => {
297                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
298            }
299            Self::BazelPrune => {
300                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
301            }
302        }
303    }
304}
305
306pub static ASSEMBLERS: &[AssemblerConfig] = &[
307    // ── Sibling-merge assemblers ──
308    //
309    // npm ecosystem: package.json + lockfiles in same directory.
310    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
311    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
312    AssemblerConfig {
313        datasource_ids: &[
314            DatasourceId::BunLock,
315            DatasourceId::BunLockb,
316            DatasourceId::NpmPackageJson,
317            DatasourceId::NpmPackageLockJson,
318            DatasourceId::YarnLock,
319            DatasourceId::YarnLockV1,
320            DatasourceId::YarnLockV2,
321            DatasourceId::YarnPnpCjs,
322            DatasourceId::PnpmLockYaml,
323            DatasourceId::PnpmWorkspaceYaml,
324        ],
325        sibling_file_patterns: &[
326            "package.json",
327            "bun.lock",
328            "bun.lockb",
329            ".package-lock.json",
330            "package-lock.json",
331            ".npm-shrinkwrap.json",
332            "npm-shrinkwrap.json",
333            "yarn.lock",
334            ".pnp.cjs",
335            "pnpm-lock.yaml",
336            "shrinkwrap.yaml",
337            "pnpm-workspace.yaml",
338        ],
339        mode: AssemblyMode::SiblingMerge,
340    },
341    // Rust/Cargo ecosystem
342    AssemblerConfig {
343        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
344        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
345        mode: AssemblyMode::SiblingMerge,
346    },
347    // Julia ecosystem
348    AssemblerConfig {
349        datasource_ids: &[
350            DatasourceId::JuliaProjectToml,
351            DatasourceId::JuliaManifestToml,
352        ],
353        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
354        mode: AssemblyMode::SiblingMerge,
355    },
356    // Erlang/OTP Rebar ecosystem
357    AssemblerConfig {
358        datasource_ids: &[DatasourceId::RebarConfig, DatasourceId::RebarLock],
359        sibling_file_patterns: &["rebar.config", "rebar.lock"],
360        mode: AssemblyMode::SiblingMerge,
361    },
362    // Carthage ecosystem
363    AssemblerConfig {
364        datasource_ids: &[
365            DatasourceId::CarthageCartfile,
366            DatasourceId::CarthageCartfileResolved,
367        ],
368        sibling_file_patterns: &["Cartfile", "Cartfile.private", "Cartfile.resolved"],
369        mode: AssemblyMode::SiblingMerge,
370    },
371    // CocoaPods ecosystem
372    AssemblerConfig {
373        datasource_ids: &[
374            DatasourceId::CocoapodsPodspec,
375            DatasourceId::CocoapodsPodspecJson,
376            DatasourceId::CocoapodsPodfile,
377            DatasourceId::CocoapodsPodfileLock,
378        ],
379        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
380        mode: AssemblyMode::SiblingMerge,
381    },
382    // PHP Composer ecosystem
383    AssemblerConfig {
384        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
385        sibling_file_patterns: &[
386            "*composer.json",
387            "composer.*.json",
388            "*composer.lock",
389            "composer.*.lock",
390        ],
391        mode: AssemblyMode::SiblingMerge,
392    },
393    // Go ecosystem (includes legacy Godeps)
394    AssemblerConfig {
395        datasource_ids: &[
396            DatasourceId::GoMod,
397            DatasourceId::GoModGraph,
398            DatasourceId::GoSum,
399            DatasourceId::GoWork,
400            DatasourceId::Godeps,
401        ],
402        sibling_file_patterns: &[
403            "go.mod",
404            "go.work",
405            "go.mod.graph",
406            "go.modgraph",
407            "go.sum",
408            "Godeps.json",
409        ],
410        mode: AssemblyMode::SiblingMerge,
411    },
412    // Dart/Flutter ecosystem
413    AssemblerConfig {
414        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
415        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
416        mode: AssemblyMode::SiblingMerge,
417    },
418    // Pixi ecosystem
419    AssemblerConfig {
420        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
421        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
422        mode: AssemblyMode::SiblingMerge,
423    },
424    AssemblerConfig {
425        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
426        sibling_file_patterns: &["flake.nix", "flake.lock"],
427        mode: AssemblyMode::SiblingMerge,
428    },
429    AssemblerConfig {
430        datasource_ids: &[DatasourceId::NixDefaultNix],
431        sibling_file_patterns: &["default.nix"],
432        mode: AssemblyMode::OnePerPackageData,
433    },
434    // Helm chart ecosystem
435    AssemblerConfig {
436        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
437        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
438        mode: AssemblyMode::SiblingMerge,
439    },
440    AssemblerConfig {
441        datasource_ids: &[
442            DatasourceId::HackageCabal,
443            DatasourceId::HackageCabalProject,
444            DatasourceId::HackageStackYaml,
445        ],
446        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
447        mode: AssemblyMode::SiblingMerge,
448    },
449    // Chef ecosystem
450    AssemblerConfig {
451        datasource_ids: &[
452            DatasourceId::ChefCookbookMetadataJson,
453            DatasourceId::ChefCookbookMetadataRb,
454        ],
455        sibling_file_patterns: &["metadata.json", "metadata.rb"],
456        mode: AssemblyMode::SiblingMerge,
457    },
458    // Conan (C/C++) ecosystem
459    AssemblerConfig {
460        datasource_ids: &[
461            DatasourceId::ConanConanFilePy,
462            DatasourceId::ConanConanFileTxt,
463            DatasourceId::ConanLock,
464            DatasourceId::ConanConanDataYml,
465        ],
466        sibling_file_patterns: &[
467            "conanfile.py",
468            "conanfile.txt",
469            "conan.lock",
470            "conandata.yml",
471        ],
472        mode: AssemblyMode::SiblingMerge,
473    },
474    // Maven/Java ecosystem (nested merge via META-INF)
475    AssemblerConfig {
476        datasource_ids: &[
477            DatasourceId::MavenPom,
478            DatasourceId::MavenPomProperties,
479            DatasourceId::JavaJarManifest,
480            DatasourceId::JavaOsgiManifest,
481        ],
482        sibling_file_patterns: &[
483            "pom.xml",
484            "*.pom",
485            "pom.properties",
486            "**/META-INF/MANIFEST.MF",
487        ],
488        mode: AssemblyMode::SiblingMerge,
489    },
490    AssemblerConfig {
491        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
492        sibling_file_patterns: &["*.whl", "origin.json"],
493        mode: AssemblyMode::SiblingMerge,
494    },
495    // Python/PyPI ecosystem
496    AssemblerConfig {
497        datasource_ids: &[
498            DatasourceId::PypiPyprojectToml,
499            DatasourceId::PypiPoetryPyprojectToml,
500            DatasourceId::PypiSetupPy,
501            DatasourceId::PypiSetupCfg,
502            DatasourceId::PypiWheelMetadata,
503            DatasourceId::PypiEgg,
504            DatasourceId::PypiEggPkginfo,
505            DatasourceId::PypiEditableEggPkginfo,
506            DatasourceId::PypiJson,
507            DatasourceId::PypiSdist,
508            DatasourceId::PypiSdistPkginfo,
509            DatasourceId::PypiInspectDeplock,
510            DatasourceId::PipRequirements,
511            DatasourceId::PypiPoetryLock,
512            DatasourceId::PypiPylockToml,
513            DatasourceId::PypiUvLock,
514            DatasourceId::Pipfile,
515            DatasourceId::PipfileLock,
516        ],
517        sibling_file_patterns: &[
518            "pyproject.toml",
519            "setup.py",
520            "setup.cfg",
521            "PKG-INFO",
522            "METADATA",
523            "pypi.json",
524            "pip-inspect.deplock",
525            "*.tar.gz",
526            "*.tgz",
527            "*.tar.bz2",
528            "*.tar.xz",
529            "*.zip",
530            "requirements*.txt",
531            "Pipfile",
532            "Pipfile.lock",
533            "poetry.lock",
534            "pylock.toml",
535            "pylock.*.toml",
536            "uv.lock",
537        ],
538        mode: AssemblyMode::SiblingMerge,
539    },
540    AssemblerConfig {
541        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
542        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
543        mode: AssemblyMode::SiblingMerge,
544    },
545    // Ruby/RubyGems ecosystem
546    AssemblerConfig {
547        datasource_ids: &[
548            DatasourceId::GemArchiveExtracted,
549            DatasourceId::Gemspec,
550            DatasourceId::GemspecExtracted,
551            DatasourceId::Gemfile,
552            DatasourceId::GemfileExtracted,
553            DatasourceId::GemfileLock,
554            DatasourceId::GemfileLockExtracted,
555        ],
556        sibling_file_patterns: &[
557            "metadata.gz-extract",
558            "**/data.gz-extract/*.gemspec",
559            "**/data.gz-extract/Gemfile",
560            "**/data.gz-extract/Gemfile.lock",
561            "*.gemspec",
562            "Gemfile",
563            "Gemfile.lock",
564        ],
565        mode: AssemblyMode::SiblingMerge,
566    },
567    AssemblerConfig {
568        datasource_ids: &[DatasourceId::GemArchive],
569        sibling_file_patterns: &["*.gem"],
570        mode: AssemblyMode::OnePerPackageData,
571    },
572    // Conda ecosystem
573    AssemblerConfig {
574        datasource_ids: &[
575            DatasourceId::CondaMetaYaml,
576            DatasourceId::CondaYaml,
577            DatasourceId::CondaMetaJson,
578        ],
579        sibling_file_patterns: &[
580            "meta.yaml",
581            "meta.yml",
582            "recipe.yaml",
583            "recipe.yml",
584            "environment.yml",
585            "environment.yaml",
586            "conda.yaml",
587            "conda.yml",
588            "*conda*.yaml",
589            "*conda*.yml",
590            "env.yaml",
591            "env.yml",
592            "*env*.yaml",
593            "*env*.yml",
594            "*environment*.yaml",
595            "*environment*.yml",
596            "*.json",
597        ],
598        mode: AssemblyMode::SiblingMerge,
599    },
600    // RPM specfile (source packages)
601    AssemblerConfig {
602        datasource_ids: &[DatasourceId::RpmSpecfile],
603        sibling_file_patterns: &["*.spec"],
604        mode: AssemblyMode::OnePerPackageData,
605    },
606    // Debian source packages (nested merge via debian/ directory)
607    AssemblerConfig {
608        datasource_ids: &[
609            DatasourceId::DebianControlInSource,
610            DatasourceId::DebianCopyrightInSource,
611        ],
612        sibling_file_patterns: &["control", "copyright"],
613        mode: AssemblyMode::SiblingMerge,
614    },
615    // Gradle/Android ecosystem
616    AssemblerConfig {
617        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
618        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
619        mode: AssemblyMode::SiblingMerge,
620    },
621    AssemblerConfig {
622        datasource_ids: &[DatasourceId::GradleModule],
623        sibling_file_patterns: &["*.module"],
624        mode: AssemblyMode::OnePerPackageData,
625    },
626    // CPAN/Perl ecosystem
627    AssemblerConfig {
628        datasource_ids: &[
629            DatasourceId::CpanMetaJson,
630            DatasourceId::CpanMetaYml,
631            DatasourceId::CpanManifest,
632            DatasourceId::CpanDistIni,
633            DatasourceId::CpanMakefile,
634        ],
635        sibling_file_patterns: &[
636            "META.json",
637            "META.yml",
638            "MANIFEST",
639            "dist.ini",
640            "Makefile.PL",
641        ],
642        mode: AssemblyMode::SiblingMerge,
643    },
644    // NuGet/.NET ecosystem
645    AssemblerConfig {
646        datasource_ids: &[
647            DatasourceId::NugetCsproj,
648            DatasourceId::NugetFsproj,
649            DatasourceId::NugetNuspec,
650            DatasourceId::NugetNupkg,
651            DatasourceId::NugetProjectJson,
652            DatasourceId::NugetProjectLockJson,
653            DatasourceId::NugetPackagesConfig,
654            DatasourceId::NugetPackagesLock,
655            DatasourceId::NugetVbproj,
656        ],
657        sibling_file_patterns: &[
658            "*.csproj",
659            "*.fsproj",
660            "*.nuspec",
661            "*.nupkg",
662            "project.json",
663            "project.lock.json",
664            "packages.config",
665            "packages.lock.json",
666            "*.packages.lock.json",
667            "*.vbproj",
668        ],
669        mode: AssemblyMode::SiblingMerge,
670    },
671    AssemblerConfig {
672        datasource_ids: &[DatasourceId::NugetDepsJson],
673        sibling_file_patterns: &["*.deps.json"],
674        mode: AssemblyMode::OnePerPackageData,
675    },
676    // Swift/SPM ecosystem
677    AssemblerConfig {
678        datasource_ids: &[
679            DatasourceId::SwiftPackageManifestJson,
680            DatasourceId::SwiftPackageResolved,
681            DatasourceId::SwiftPackageShowDependencies,
682        ],
683        sibling_file_patterns: &[
684            "Package.swift.json",
685            "Package.swift.deplock",
686            "Package.resolved",
687            ".package.resolved",
688            "swift-show-dependencies.deplock",
689        ],
690        mode: AssemblyMode::SiblingMerge,
691    },
692    // ── Standalone assemblers (single file → single package) ──
693    //
694    // These ecosystems have only one manifest file type with no sibling merging.
695    // They still need configs so their datasource_ids are recognized by the assembler.
696    //
697    // Bower (JavaScript)
698    AssemblerConfig {
699        datasource_ids: &[DatasourceId::BowerJson],
700        sibling_file_patterns: &["bower.json"],
701        mode: AssemblyMode::SiblingMerge,
702    },
703    // CRAN (R language)
704    AssemblerConfig {
705        datasource_ids: &[DatasourceId::CranDescription],
706        sibling_file_patterns: &["DESCRIPTION"],
707        mode: AssemblyMode::SiblingMerge,
708    },
709    // FreeBSD packages
710    AssemblerConfig {
711        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
712        sibling_file_patterns: &["+COMPACT_MANIFEST"],
713        mode: AssemblyMode::SiblingMerge,
714    },
715    // Haxe ecosystem
716    AssemblerConfig {
717        datasource_ids: &[DatasourceId::HaxelibJson],
718        sibling_file_patterns: &["haxelib.json"],
719        mode: AssemblyMode::SiblingMerge,
720    },
721    AssemblerConfig {
722        datasource_ids: &[DatasourceId::Gitmodules],
723        sibling_file_patterns: &[".gitmodules"],
724        mode: AssemblyMode::SiblingMerge,
725    },
726    // OCaml/opam ecosystem
727    AssemblerConfig {
728        datasource_ids: &[DatasourceId::OpamFile],
729        sibling_file_patterns: &["opam", "*.opam"],
730        mode: AssemblyMode::SiblingMerge,
731    },
732    // RPM Mariner manifest
733    AssemblerConfig {
734        datasource_ids: &[DatasourceId::RpmMarinerManifest],
735        sibling_file_patterns: &["*.rpm.manifest"],
736        mode: AssemblyMode::SiblingMerge,
737    },
738    AssemblerConfig {
739        datasource_ids: &[DatasourceId::RpmYumdb],
740        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
741        mode: AssemblyMode::OnePerPackageData,
742    },
743    // Microsoft Update Manifest
744    AssemblerConfig {
745        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
746        sibling_file_patterns: &["*.mum"],
747        mode: AssemblyMode::SiblingMerge,
748    },
749    // Autotools (C/C++ build system)
750    AssemblerConfig {
751        datasource_ids: &[DatasourceId::AutotoolsConfigure],
752        sibling_file_patterns: &["configure", "configure.ac"],
753        mode: AssemblyMode::SiblingMerge,
754    },
755    // Bazel (build system)
756    AssemblerConfig {
757        datasource_ids: &[DatasourceId::BazelBuild],
758        sibling_file_patterns: &["BUILD"],
759        mode: AssemblyMode::SiblingMerge,
760    },
761    AssemblerConfig {
762        datasource_ids: &[DatasourceId::BazelModule],
763        sibling_file_patterns: &["MODULE.bazel"],
764        mode: AssemblyMode::OnePerPackageData,
765    },
766    // Buck (build system)
767    AssemblerConfig {
768        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
769        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
770        mode: AssemblyMode::SiblingMerge,
771    },
772    // Ant/Ivy (Java dependency management)
773    AssemblerConfig {
774        datasource_ids: &[DatasourceId::AntIvyXml],
775        sibling_file_patterns: &["ivy.xml"],
776        mode: AssemblyMode::SiblingMerge,
777    },
778    // Meteor (JavaScript platform)
779    AssemblerConfig {
780        datasource_ids: &[DatasourceId::MeteorPackage],
781        sibling_file_patterns: &["package.js"],
782        mode: AssemblyMode::SiblingMerge,
783    },
784    // ── One-per-PackageData assemblers (database files with many packages) ──
785    //
786    // Alpine installed package database
787    AssemblerConfig {
788        datasource_ids: &[DatasourceId::AlpineInstalledDb],
789        sibling_file_patterns: &["installed"],
790        mode: AssemblyMode::OnePerPackageData,
791    },
792    AssemblerConfig {
793        datasource_ids: &[DatasourceId::AlpineApkbuild],
794        sibling_file_patterns: &["APKBUILD"],
795        mode: AssemblyMode::SiblingMerge,
796    },
797    // RPM installed package databases (BDB, NDB, SQLite)
798    AssemblerConfig {
799        datasource_ids: &[
800            DatasourceId::RpmInstalledDatabaseBdb,
801            DatasourceId::RpmInstalledDatabaseNdb,
802            DatasourceId::RpmInstalledDatabaseSqlite,
803        ],
804        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
805        mode: AssemblyMode::OnePerPackageData,
806    },
807    AssemblerConfig {
808        datasource_ids: &[DatasourceId::RpmArchive],
809        sibling_file_patterns: &["*.rpm", "*.srpm"],
810        mode: AssemblyMode::OnePerPackageData,
811    },
812    // Debian installed package databases
813    AssemblerConfig {
814        datasource_ids: &[DatasourceId::DebianDeb],
815        sibling_file_patterns: &["*.deb"],
816        mode: AssemblyMode::OnePerPackageData,
817    },
818    AssemblerConfig {
819        datasource_ids: &[
820            DatasourceId::DebianInstalledStatusDb,
821            DatasourceId::DebianDistrolessInstalledDb,
822        ],
823        sibling_file_patterns: &["status"],
824        mode: AssemblyMode::OnePerPackageData,
825    },
826    AssemblerConfig {
827        datasource_ids: &[
828            DatasourceId::DebianControlExtractedDeb,
829            DatasourceId::DebianMd5SumsInExtractedDeb,
830        ],
831        sibling_file_patterns: &["control", "md5sums"],
832        mode: AssemblyMode::SiblingMerge,
833    },
834    AssemblerConfig {
835        datasource_ids: &[DatasourceId::DebianSourceControlDsc],
836        sibling_file_patterns: &["*.dsc"],
837        mode: AssemblyMode::OnePerPackageData,
838    },
839    AssemblerConfig {
840        datasource_ids: &[DatasourceId::AboutFile],
841        sibling_file_patterns: &["*.ABOUT"],
842        mode: AssemblyMode::OnePerPackageData,
843    },
844    AssemblerConfig {
845        datasource_ids: &[
846            DatasourceId::BitbakeRecipe,
847            DatasourceId::BitbakeRecipeAppend,
848        ],
849        sibling_file_patterns: &["*.bb", "*.bbappend"],
850        mode: AssemblyMode::SiblingMerge,
851    },
852];
853
854// Datasource IDs intentionally excluded from package assembly.
855//
856// This list is runtime-significant: files with these datasource IDs may remain
857// unowned by any Package, while their dependencies are still eligible for
858// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
859pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
860    // Non-package metadata
861    DatasourceId::Readme,
862    DatasourceId::EtcOsRelease,
863    // Binary archives (require external extraction via ExtractCode before scanning)
864    DatasourceId::AlpineApkArchive,
865    DatasourceId::AndroidAab,
866    DatasourceId::AndroidAarLibrary,
867    DatasourceId::AndroidApk,
868    DatasourceId::AndroidManifestXml,
869    DatasourceId::AndroidSoongMetadata,
870    DatasourceId::AppleDmg,
871    DatasourceId::Axis2Mar,
872    DatasourceId::ChromeCrx,
873    DatasourceId::DebianOriginalSourceTarball,
874    DatasourceId::DebianSourceMetadataTarball,
875    DatasourceId::InstallshieldInstaller,
876    DatasourceId::IosIpa,
877    DatasourceId::IsoDiskImage,
878    DatasourceId::JavaEarArchive,
879    DatasourceId::JavaJar,
880    DatasourceId::JavaWarArchive,
881    DatasourceId::JbossSar,
882    DatasourceId::MicrosoftCabinet,
883    DatasourceId::MozillaXpi,
884    DatasourceId::NsisInstaller,
885    DatasourceId::SharShellArchive,
886    DatasourceId::SquashfsDiskImage,
887    // Supplementary metadata (not primary package definitions)
888    DatasourceId::ArchAurinfo,
889    DatasourceId::ArchPkginfo,
890    DatasourceId::ArchSrcinfo,
891    DatasourceId::Axis2ModuleXml,
892    DatasourceId::ClojureDepsEdn,
893    DatasourceId::ClojureProjectClj,
894    DatasourceId::DebianInstalledFilesList,
895    DatasourceId::DebianInstalledMd5Sums,
896    DatasourceId::DebianCopyright,
897    DatasourceId::DebianCopyrightInPackage,
898    DatasourceId::DebianCopyrightStandalone,
899    DatasourceId::GoBinary,
900    DatasourceId::WindowsExecutable,
901    DatasourceId::Dockerfile,
902    DatasourceId::ErlangOtpAppSrc,
903    DatasourceId::HexMixLock,
904    DatasourceId::JavaEarApplicationXml,
905    DatasourceId::JavaWarWebXml,
906    DatasourceId::JbossServiceXml,
907    DatasourceId::MesonBuild,
908    DatasourceId::GemGemspecInstalledSpecifications,
909    DatasourceId::NugetDirectoryBuildProps,
910    DatasourceId::NugetDirectoryPackagesProps,
911    DatasourceId::CitationCff,
912    DatasourceId::PubliccodeYaml,
913    DatasourceId::RpmPackageLicenses,
914    DatasourceId::RustBinary,
915    DatasourceId::SbtBuildSbt,
916    DatasourceId::VcpkgJson,
917];
918
919#[cfg(test)]
920mod tests {
921    use super::*;
922    use std::collections::HashSet;
923    use strum::IntoEnumIterator;
924
925    #[test]
926    fn test_every_datasource_id_is_accounted_for() {
927        let mut assembled: HashSet<DatasourceId> = HashSet::new();
928        for config in ASSEMBLERS {
929            for &dsid in config.datasource_ids {
930                assembled.insert(dsid);
931            }
932        }
933
934        let unassembled: HashSet<DatasourceId> =
935            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
936
937        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
938        assert!(
939            overlap.is_empty(),
940            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
941        );
942
943        let missing: Vec<_> = DatasourceId::iter()
944            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
945            .collect();
946
947        assert!(
948            missing.is_empty(),
949            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
950             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
951        );
952    }
953
954    #[test]
955    fn test_post_assembly_passes_are_unique() {
956        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
957
958        assert_eq!(
959            unique.len(),
960            POST_ASSEMBLY_PASSES.len(),
961            "POST_ASSEMBLY_PASSES contains duplicate entries"
962        );
963    }
964
965    #[test]
966    fn test_every_post_assembly_pass_kind_is_registered_once() {
967        let registered: HashSet<PostAssemblyPassKind> =
968            POST_ASSEMBLY_PASSES.iter().copied().collect();
969
970        let missing: Vec<_> = PostAssemblyPassKind::iter()
971            .filter(|pass| !registered.contains(pass))
972            .collect();
973
974        assert!(
975            missing.is_empty(),
976            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
977        );
978
979        for pass in PostAssemblyPassKind::iter() {
980            let count = POST_ASSEMBLY_PASSES
981                .iter()
982                .filter(|registered| **registered == pass)
983                .count();
984            assert_eq!(
985                count, 1,
986                "Post-assembly pass {pass:?} should be registered exactly once"
987            );
988        }
989    }
990
991    #[test]
992    fn test_post_assembly_passes_skip_irrelevant_inputs() {
993        let inputs = PostAssemblyInputs::default();
994
995        for pass in PostAssemblyPassKind::iter() {
996            assert!(
997                !pass.should_run(&inputs),
998                "{pass:?} should skip when no relevant inputs are present"
999            );
1000        }
1001    }
1002
1003    #[test]
1004    fn test_npm_workspace_inputs_only_run_npm_passes() {
1005        let inputs = PostAssemblyInputs {
1006            package_types: HashSet::from([PackageType::Npm]),
1007            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
1008            has_npm_workspace_markers: true,
1009            has_cargo_workspace_markers: false,
1010        };
1011
1012        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
1013            .filter(|pass| pass.should_run(&inputs))
1014            .collect();
1015
1016        assert_eq!(
1017            runnable,
1018            HashSet::from([
1019                PostAssemblyPassKind::NpmResourceAssign,
1020                PostAssemblyPassKind::NpmWorkspaceMerge,
1021            ])
1022        );
1023    }
1024
1025    #[test]
1026    fn test_cargo_workspace_merge_requires_workspace_markers() {
1027        let without_markers = PostAssemblyInputs {
1028            package_types: HashSet::from([PackageType::Cargo]),
1029            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
1030            has_npm_workspace_markers: false,
1031            has_cargo_workspace_markers: false,
1032        };
1033
1034        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
1035
1036        let with_markers = PostAssemblyInputs {
1037            has_cargo_workspace_markers: true,
1038            ..without_markers
1039        };
1040
1041        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
1042    }
1043}