Skip to main content

provenant/assembly/
assemblers.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashSet;
5
6use crate::models::PackageType;
7use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
8use strum::EnumIter;
9
10use super::{
11    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
12    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, debian_source_merge,
13    file_ref_resolve, hackage_merge, nix_flake_compat_merge, npm_resource_assign,
14    nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign, swift_merge, topology,
15};
16
17#[derive(Clone, Copy)]
18pub(super) enum SpecialDirectoryMergerKind {
19    Skip,
20    Bazel,
21    DebianSource,
22    Hackage,
23    WindowsUpdate,
24}
25
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
27pub(super) enum PostAssemblyPassKind {
28    SwiftMerge,
29    CondaRootfsMerge,
30    NpmResourceAssign,
31    PythonRequirementsAssign,
32    FileReferenceResolve,
33    RpmYumdbMerge,
34    NpmWorkspaceMerge,
35    CargoWorkspaceMerge,
36    NugetCpmResolve,
37    CargoResourceAssign,
38    ComposerResourceAssign,
39    RubyResourceAssign,
40    NixFlakeCompatMerge,
41    BazelPrune,
42}
43
44pub(super) fn special_directory_merger_for(
45    config_key: DatasourceId,
46) -> Option<SpecialDirectoryMergerKind> {
47    match config_key {
48        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
49        DatasourceId::DebianControlInSource => Some(SpecialDirectoryMergerKind::DebianSource),
50        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
51        DatasourceId::MicrosoftUpdateManifestMum => Some(SpecialDirectoryMergerKind::WindowsUpdate),
52        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
53        _ => None,
54    }
55}
56
57pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
58    PostAssemblyPassKind::SwiftMerge,
59    PostAssemblyPassKind::CondaRootfsMerge,
60    PostAssemblyPassKind::NpmResourceAssign,
61    PostAssemblyPassKind::PythonRequirementsAssign,
62    PostAssemblyPassKind::FileReferenceResolve,
63    PostAssemblyPassKind::RpmYumdbMerge,
64    PostAssemblyPassKind::NpmWorkspaceMerge,
65    PostAssemblyPassKind::CargoWorkspaceMerge,
66    PostAssemblyPassKind::NugetCpmResolve,
67    PostAssemblyPassKind::CargoResourceAssign,
68    PostAssemblyPassKind::ComposerResourceAssign,
69    PostAssemblyPassKind::RubyResourceAssign,
70    PostAssemblyPassKind::NixFlakeCompatMerge,
71    PostAssemblyPassKind::BazelPrune,
72];
73
74const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
75    DatasourceId::SwiftPackageManifestJson,
76    DatasourceId::SwiftPackageResolved,
77    DatasourceId::SwiftPackageShowDependencies,
78];
79
80const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
81    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
82
83const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
84    DatasourceId::RpmInstalledDatabaseBdb,
85    DatasourceId::RpmInstalledDatabaseNdb,
86    DatasourceId::RpmInstalledDatabaseSqlite,
87];
88
89const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
90    DatasourceId::NugetDirectoryBuildProps,
91    DatasourceId::NugetDirectoryPackagesProps,
92];
93
94const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
95    DatasourceId::NugetCsproj,
96    DatasourceId::NugetFsproj,
97    DatasourceId::NugetVbproj,
98];
99
100#[derive(Default)]
101struct PostAssemblyInputs {
102    package_types: HashSet<PackageType>,
103    file_datasource_ids: HashSet<DatasourceId>,
104    has_npm_workspace_markers: bool,
105    has_cargo_workspace_markers: bool,
106}
107
108pub(super) fn run_post_assembly_passes(
109    files: &mut [FileInfo],
110    packages: &mut Vec<Package>,
111    dependencies: &mut Vec<TopLevelDependency>,
112    topology_plan: &topology::TopologyPlan,
113) {
114    let inputs = PostAssemblyInputs::collect(files, packages);
115
116    for pass in POST_ASSEMBLY_PASSES {
117        if !pass.should_run(&inputs) {
118            continue;
119        }
120
121        pass.run(files, packages, dependencies, topology_plan);
122    }
123}
124
125impl PostAssemblyInputs {
126    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
127        let mut inputs = Self {
128            package_types: packages
129                .iter()
130                .filter_map(|package| package.package_type)
131                .collect(),
132            ..Self::default()
133        };
134
135        for file in files {
136            for package_data in &file.package_data {
137                let Some(datasource_id) = package_data.datasource_id else {
138                    continue;
139                };
140
141                inputs.file_datasource_ids.insert(datasource_id);
142
143                if matches!(
144                    datasource_id,
145                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
146                ) && package_data
147                    .extra_data
148                    .as_ref()
149                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
150                {
151                    inputs.has_npm_workspace_markers = true;
152                }
153
154                if datasource_id == DatasourceId::CargoToml
155                    && package_data
156                        .extra_data
157                        .as_ref()
158                        .and_then(|extra_data| extra_data.get("workspace"))
159                        .and_then(|workspace| workspace.get("members"))
160                        .and_then(|members| members.as_array())
161                        .is_some_and(|members| !members.is_empty())
162                {
163                    inputs.has_cargo_workspace_markers = true;
164                }
165            }
166        }
167
168        inputs
169    }
170
171    fn has_package_type(&self, package_type: PackageType) -> bool {
172        self.package_types.contains(&package_type)
173    }
174
175    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
176        datasource_ids
177            .iter()
178            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
179    }
180
181    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
182        datasource_ids
183            .iter()
184            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
185    }
186}
187
188impl SpecialDirectoryMergerKind {
189    pub(super) fn run(
190        self,
191        config: &AssemblerConfig,
192        files: &[FileInfo],
193        file_indices: &[usize],
194    ) -> Vec<DirectoryMergeOutput> {
195        match self {
196            Self::Skip => Vec::new(),
197            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
198            Self::DebianSource => {
199                debian_source_merge::assemble_debian_source_packages(config, files, file_indices)
200            }
201            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
202            Self::WindowsUpdate => super::windows_update_merge::assemble_windows_update_packages(
203                config,
204                files,
205                file_indices,
206            ),
207        }
208    }
209}
210
211impl PostAssemblyPassKind {
212    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
213        match self {
214            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
215            Self::CondaRootfsMerge => {
216                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
217            }
218            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
219            Self::PythonRequirementsAssign => {
220                inputs.has_package_type(PackageType::Pypi)
221                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
222            }
223            Self::FileReferenceResolve => {
224                file_ref_resolve::has_relevant_file_reference_datasource_ids(
225                    &inputs.file_datasource_ids,
226                )
227            }
228            Self::RpmYumdbMerge => {
229                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
230                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
231            }
232            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
233            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
234            Self::NugetCpmResolve => {
235                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
236                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
237            }
238            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
239            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
240            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
241            Self::NixFlakeCompatMerge => {
242                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
243                    && inputs.has_any_file_datasource(&[
244                        DatasourceId::NixFlakeNix,
245                        DatasourceId::NixFlakeLock,
246                    ])
247            }
248            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
249        }
250    }
251
252    fn run(
253        self,
254        files: &mut [FileInfo],
255        packages: &mut Vec<Package>,
256        dependencies: &mut Vec<TopLevelDependency>,
257        topology_plan: &topology::TopologyPlan,
258    ) {
259        match self {
260            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
261            Self::CondaRootfsMerge => {
262                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
263            }
264            Self::NpmResourceAssign => {
265                npm_resource_assign::assign_npm_package_resources(files, packages)
266            }
267            Self::PythonRequirementsAssign => {
268                python_requirements_assign::assign_python_requirements_to_projects(
269                    files,
270                    packages,
271                    dependencies,
272                )
273            }
274            Self::FileReferenceResolve => {
275                file_ref_resolve::resolve_file_references(files, packages, dependencies)
276            }
277            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
278            Self::NpmWorkspaceMerge => {
279                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
280            }
281            Self::CargoWorkspaceMerge => {
282                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
283            }
284            Self::NugetCpmResolve => {
285                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
286            }
287            Self::CargoResourceAssign => {
288                cargo_resource_assign::assign_cargo_package_resources(files, packages)
289            }
290            Self::ComposerResourceAssign => {
291                composer_resource_assign::assign_composer_package_resources(files, packages)
292            }
293            Self::RubyResourceAssign => {
294                ruby_resource_assign::assign_ruby_package_resources(files, packages)
295            }
296            Self::NixFlakeCompatMerge => {
297                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
298            }
299            Self::BazelPrune => {
300                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
301            }
302        }
303    }
304}
305
306pub static ASSEMBLERS: &[AssemblerConfig] = &[
307    // ── Sibling-merge assemblers ──
308    //
309    // npm ecosystem: package.json + lockfiles in same directory.
310    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
311    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
312    AssemblerConfig {
313        datasource_ids: &[
314            DatasourceId::BunLock,
315            DatasourceId::BunLockb,
316            DatasourceId::NpmPackageJson,
317            DatasourceId::NpmPackageLockJson,
318            DatasourceId::YarnLock,
319            DatasourceId::YarnLockV1,
320            DatasourceId::YarnLockV2,
321            DatasourceId::YarnPnpCjs,
322            DatasourceId::PnpmLockYaml,
323            DatasourceId::PnpmWorkspaceYaml,
324        ],
325        sibling_file_patterns: &[
326            "package.json",
327            "bun.lock",
328            "bun.lockb",
329            ".package-lock.json",
330            "package-lock.json",
331            ".npm-shrinkwrap.json",
332            "npm-shrinkwrap.json",
333            "yarn.lock",
334            ".pnp.cjs",
335            "pnpm-lock.yaml",
336            "shrinkwrap.yaml",
337            "pnpm-workspace.yaml",
338        ],
339        mode: AssemblyMode::SiblingMerge,
340    },
341    // Rust/Cargo ecosystem
342    AssemblerConfig {
343        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
344        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
345        mode: AssemblyMode::SiblingMerge,
346    },
347    // Julia ecosystem
348    AssemblerConfig {
349        datasource_ids: &[
350            DatasourceId::JuliaProjectToml,
351            DatasourceId::JuliaManifestToml,
352        ],
353        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
354        mode: AssemblyMode::SiblingMerge,
355    },
356    // Erlang/OTP Rebar ecosystem
357    AssemblerConfig {
358        datasource_ids: &[DatasourceId::RebarConfig, DatasourceId::RebarLock],
359        sibling_file_patterns: &["rebar.config", "rebar.lock"],
360        mode: AssemblyMode::SiblingMerge,
361    },
362    // Carthage ecosystem
363    AssemblerConfig {
364        datasource_ids: &[
365            DatasourceId::CarthageCartfile,
366            DatasourceId::CarthageCartfileResolved,
367        ],
368        sibling_file_patterns: &["Cartfile", "Cartfile.private", "Cartfile.resolved"],
369        mode: AssemblyMode::SiblingMerge,
370    },
371    // CocoaPods ecosystem
372    AssemblerConfig {
373        datasource_ids: &[
374            DatasourceId::CocoapodsPodspec,
375            DatasourceId::CocoapodsPodspecJson,
376            DatasourceId::CocoapodsPodfile,
377            DatasourceId::CocoapodsPodfileLock,
378        ],
379        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
380        mode: AssemblyMode::SiblingMerge,
381    },
382    // PHP Composer ecosystem
383    AssemblerConfig {
384        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
385        sibling_file_patterns: &[
386            "*composer.json",
387            "composer.*.json",
388            "*composer.lock",
389            "composer.*.lock",
390        ],
391        mode: AssemblyMode::SiblingMerge,
392    },
393    // Go ecosystem (includes legacy Godeps)
394    AssemblerConfig {
395        datasource_ids: &[
396            DatasourceId::GoMod,
397            DatasourceId::GoModGraph,
398            DatasourceId::GoSum,
399            DatasourceId::GoWork,
400            DatasourceId::Godeps,
401        ],
402        sibling_file_patterns: &[
403            "go.mod",
404            "go.work",
405            "go.mod.graph",
406            "go.modgraph",
407            "go.sum",
408            "Godeps.json",
409        ],
410        mode: AssemblyMode::SiblingMerge,
411    },
412    // Dart/Flutter ecosystem
413    AssemblerConfig {
414        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
415        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
416        mode: AssemblyMode::SiblingMerge,
417    },
418    // Pixi ecosystem
419    AssemblerConfig {
420        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
421        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
422        mode: AssemblyMode::SiblingMerge,
423    },
424    AssemblerConfig {
425        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
426        sibling_file_patterns: &["flake.nix", "flake.lock"],
427        mode: AssemblyMode::SiblingMerge,
428    },
429    AssemblerConfig {
430        datasource_ids: &[DatasourceId::NixDefaultNix],
431        sibling_file_patterns: &["default.nix"],
432        mode: AssemblyMode::OnePerPackageData,
433    },
434    // Helm chart ecosystem
435    AssemblerConfig {
436        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
437        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
438        mode: AssemblyMode::SiblingMerge,
439    },
440    AssemblerConfig {
441        datasource_ids: &[
442            DatasourceId::HackageCabal,
443            DatasourceId::HackageCabalProject,
444            DatasourceId::HackageStackYaml,
445        ],
446        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
447        mode: AssemblyMode::SiblingMerge,
448    },
449    // Chef ecosystem
450    AssemblerConfig {
451        datasource_ids: &[
452            DatasourceId::ChefCookbookMetadataJson,
453            DatasourceId::ChefCookbookMetadataRb,
454        ],
455        sibling_file_patterns: &["metadata.json", "metadata.rb"],
456        mode: AssemblyMode::SiblingMerge,
457    },
458    // Conan (C/C++) ecosystem
459    AssemblerConfig {
460        datasource_ids: &[
461            DatasourceId::ConanConanFilePy,
462            DatasourceId::ConanConanFileTxt,
463            DatasourceId::ConanLock,
464            DatasourceId::ConanConanDataYml,
465        ],
466        sibling_file_patterns: &[
467            "conanfile.py",
468            "conanfile.txt",
469            "conan.lock",
470            "conandata.yml",
471        ],
472        mode: AssemblyMode::SiblingMerge,
473    },
474    // Maven/Java ecosystem (nested merge via META-INF)
475    AssemblerConfig {
476        datasource_ids: &[
477            DatasourceId::MavenPom,
478            DatasourceId::MavenPomProperties,
479            DatasourceId::JavaJarManifest,
480            DatasourceId::JavaOsgiManifest,
481        ],
482        sibling_file_patterns: &[
483            "pom.xml",
484            "*.pom",
485            "pom.properties",
486            "**/META-INF/MANIFEST.MF",
487        ],
488        mode: AssemblyMode::SiblingMerge,
489    },
490    AssemblerConfig {
491        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
492        sibling_file_patterns: &["*.whl", "origin.json"],
493        mode: AssemblyMode::SiblingMerge,
494    },
495    // Python/PyPI ecosystem
496    AssemblerConfig {
497        datasource_ids: &[
498            DatasourceId::PypiPyprojectToml,
499            DatasourceId::PypiPoetryPyprojectToml,
500            DatasourceId::PypiSetupPy,
501            DatasourceId::PypiSetupCfg,
502            DatasourceId::PypiWheel,
503            DatasourceId::PypiWheelMetadata,
504            DatasourceId::PypiEgg,
505            DatasourceId::PypiEggPkginfo,
506            DatasourceId::PypiEditableEggPkginfo,
507            DatasourceId::PypiJson,
508            DatasourceId::PypiSdist,
509            DatasourceId::PypiSdistPkginfo,
510            DatasourceId::PypiInspectDeplock,
511            DatasourceId::PipRequirements,
512            DatasourceId::PypiPoetryLock,
513            DatasourceId::PypiPylockToml,
514            DatasourceId::PypiUvLock,
515            DatasourceId::Pipfile,
516            DatasourceId::PipfileLock,
517        ],
518        sibling_file_patterns: &[
519            "pyproject.toml",
520            "setup.py",
521            "setup.cfg",
522            "PKG-INFO",
523            "METADATA",
524            "pypi.json",
525            "pip-inspect.deplock",
526            "*.tar.gz",
527            "*.tgz",
528            "*.tar.bz2",
529            "*.tar.xz",
530            "*.zip",
531            "requirements*.txt",
532            "Pipfile",
533            "Pipfile.lock",
534            "poetry.lock",
535            "pylock.toml",
536            "pylock.*.toml",
537            "uv.lock",
538        ],
539        mode: AssemblyMode::SiblingMerge,
540    },
541    AssemblerConfig {
542        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
543        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
544        mode: AssemblyMode::SiblingMerge,
545    },
546    // Ruby/RubyGems ecosystem
547    AssemblerConfig {
548        datasource_ids: &[
549            DatasourceId::GemArchiveExtracted,
550            DatasourceId::Gemspec,
551            DatasourceId::GemspecExtracted,
552            DatasourceId::Gemfile,
553            DatasourceId::GemfileExtracted,
554            DatasourceId::GemfileLock,
555            DatasourceId::GemfileLockExtracted,
556        ],
557        sibling_file_patterns: &[
558            "metadata.gz-extract",
559            "**/data.gz-extract/*.gemspec",
560            "**/data.gz-extract/Gemfile",
561            "**/data.gz-extract/Gemfile.lock",
562            "*.gemspec",
563            "Gemfile",
564            "Gemfile.lock",
565        ],
566        mode: AssemblyMode::SiblingMerge,
567    },
568    AssemblerConfig {
569        datasource_ids: &[DatasourceId::GemArchive],
570        sibling_file_patterns: &["*.gem"],
571        mode: AssemblyMode::OnePerPackageData,
572    },
573    // Conda ecosystem
574    AssemblerConfig {
575        datasource_ids: &[
576            DatasourceId::CondaMetaYaml,
577            DatasourceId::CondaYaml,
578            DatasourceId::CondaMetaJson,
579        ],
580        sibling_file_patterns: &[
581            "meta.yaml",
582            "meta.yml",
583            "recipe.yaml",
584            "recipe.yml",
585            "environment.yml",
586            "environment.yaml",
587            "conda.yaml",
588            "conda.yml",
589            "*conda*.yaml",
590            "*conda*.yml",
591            "env.yaml",
592            "env.yml",
593            "*env*.yaml",
594            "*env*.yml",
595            "*environment*.yaml",
596            "*environment*.yml",
597            "*.json",
598        ],
599        mode: AssemblyMode::SiblingMerge,
600    },
601    // RPM specfile (source packages)
602    AssemblerConfig {
603        datasource_ids: &[DatasourceId::RpmSpecfile],
604        sibling_file_patterns: &["*.spec"],
605        mode: AssemblyMode::OnePerPackageData,
606    },
607    // Debian source packages (nested merge via debian/ directory)
608    AssemblerConfig {
609        datasource_ids: &[
610            DatasourceId::DebianControlInSource,
611            DatasourceId::DebianCopyrightInSource,
612        ],
613        sibling_file_patterns: &["control", "copyright"],
614        mode: AssemblyMode::SiblingMerge,
615    },
616    // Gradle/Android ecosystem
617    AssemblerConfig {
618        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
619        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
620        mode: AssemblyMode::SiblingMerge,
621    },
622    AssemblerConfig {
623        datasource_ids: &[DatasourceId::GradleModule],
624        sibling_file_patterns: &["*.module"],
625        mode: AssemblyMode::OnePerPackageData,
626    },
627    // CPAN/Perl ecosystem
628    AssemblerConfig {
629        datasource_ids: &[
630            DatasourceId::CpanMetaJson,
631            DatasourceId::CpanMetaYml,
632            DatasourceId::CpanManifest,
633            DatasourceId::CpanDistIni,
634            DatasourceId::CpanMakefile,
635        ],
636        sibling_file_patterns: &[
637            "META.json",
638            "META.yml",
639            "MANIFEST",
640            "dist.ini",
641            "Makefile.PL",
642        ],
643        mode: AssemblyMode::SiblingMerge,
644    },
645    // NuGet/.NET ecosystem
646    AssemblerConfig {
647        datasource_ids: &[
648            DatasourceId::NugetCsproj,
649            DatasourceId::NugetFsproj,
650            DatasourceId::NugetNuspec,
651            DatasourceId::NugetNupkg,
652            DatasourceId::NugetProjectJson,
653            DatasourceId::NugetProjectLockJson,
654            DatasourceId::NugetPackagesConfig,
655            DatasourceId::NugetPackagesLock,
656            DatasourceId::NugetVbproj,
657        ],
658        sibling_file_patterns: &[
659            "*.csproj",
660            "*.fsproj",
661            "*.nuspec",
662            "*.nupkg",
663            "project.json",
664            "project.lock.json",
665            "packages.config",
666            "packages.lock.json",
667            "*.packages.lock.json",
668            "*.vbproj",
669        ],
670        mode: AssemblyMode::SiblingMerge,
671    },
672    AssemblerConfig {
673        datasource_ids: &[DatasourceId::NugetDepsJson],
674        sibling_file_patterns: &["*.deps.json"],
675        mode: AssemblyMode::OnePerPackageData,
676    },
677    // Swift/SPM ecosystem
678    AssemblerConfig {
679        datasource_ids: &[
680            DatasourceId::SwiftPackageManifestJson,
681            DatasourceId::SwiftPackageResolved,
682            DatasourceId::SwiftPackageShowDependencies,
683        ],
684        sibling_file_patterns: &[
685            "Package.swift.json",
686            "Package.swift.deplock",
687            "Package.resolved",
688            ".package.resolved",
689            "swift-show-dependencies.deplock",
690        ],
691        mode: AssemblyMode::SiblingMerge,
692    },
693    // ── Standalone assemblers (single file → single package) ──
694    //
695    // These ecosystems have only one manifest file type with no sibling merging.
696    // They still need configs so their datasource_ids are recognized by the assembler.
697    //
698    // Bower (JavaScript)
699    AssemblerConfig {
700        datasource_ids: &[DatasourceId::BowerJson],
701        sibling_file_patterns: &["bower.json"],
702        mode: AssemblyMode::SiblingMerge,
703    },
704    // CRAN (R language)
705    AssemblerConfig {
706        datasource_ids: &[DatasourceId::CranDescription],
707        sibling_file_patterns: &["DESCRIPTION"],
708        mode: AssemblyMode::SiblingMerge,
709    },
710    // FreeBSD packages
711    AssemblerConfig {
712        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
713        sibling_file_patterns: &["+COMPACT_MANIFEST"],
714        mode: AssemblyMode::SiblingMerge,
715    },
716    // Haxe ecosystem
717    AssemblerConfig {
718        datasource_ids: &[DatasourceId::HaxelibJson],
719        sibling_file_patterns: &["haxelib.json"],
720        mode: AssemblyMode::SiblingMerge,
721    },
722    AssemblerConfig {
723        datasource_ids: &[DatasourceId::Gitmodules],
724        sibling_file_patterns: &[".gitmodules"],
725        mode: AssemblyMode::SiblingMerge,
726    },
727    // OCaml/opam ecosystem
728    AssemblerConfig {
729        datasource_ids: &[DatasourceId::OpamFile],
730        sibling_file_patterns: &["opam", "*.opam"],
731        mode: AssemblyMode::SiblingMerge,
732    },
733    // RPM Mariner manifest
734    AssemblerConfig {
735        datasource_ids: &[DatasourceId::RpmMarinerManifest],
736        sibling_file_patterns: &["*.rpm.manifest"],
737        mode: AssemblyMode::SiblingMerge,
738    },
739    AssemblerConfig {
740        datasource_ids: &[DatasourceId::RpmYumdb],
741        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
742        mode: AssemblyMode::OnePerPackageData,
743    },
744    // Microsoft Update Manifest
745    AssemblerConfig {
746        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
747        sibling_file_patterns: &["*.mum"],
748        mode: AssemblyMode::SiblingMerge,
749    },
750    // Autotools (C/C++ build system)
751    AssemblerConfig {
752        datasource_ids: &[DatasourceId::AutotoolsConfigure],
753        sibling_file_patterns: &["configure", "configure.ac"],
754        mode: AssemblyMode::SiblingMerge,
755    },
756    // Bazel (build system)
757    AssemblerConfig {
758        datasource_ids: &[DatasourceId::BazelBuild],
759        sibling_file_patterns: &["BUILD"],
760        mode: AssemblyMode::SiblingMerge,
761    },
762    AssemblerConfig {
763        datasource_ids: &[DatasourceId::BazelModule],
764        sibling_file_patterns: &["MODULE.bazel"],
765        mode: AssemblyMode::OnePerPackageData,
766    },
767    // Buck (build system)
768    AssemblerConfig {
769        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
770        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
771        mode: AssemblyMode::SiblingMerge,
772    },
773    // Ant/Ivy (Java dependency management)
774    AssemblerConfig {
775        datasource_ids: &[DatasourceId::AntIvyXml],
776        sibling_file_patterns: &["ivy.xml"],
777        mode: AssemblyMode::SiblingMerge,
778    },
779    // Meteor (JavaScript platform)
780    AssemblerConfig {
781        datasource_ids: &[DatasourceId::MeteorPackage],
782        sibling_file_patterns: &["package.js"],
783        mode: AssemblyMode::SiblingMerge,
784    },
785    // ── One-per-PackageData assemblers (database files with many packages) ──
786    //
787    // Alpine installed package database
788    AssemblerConfig {
789        datasource_ids: &[DatasourceId::AlpineInstalledDb],
790        sibling_file_patterns: &["installed"],
791        mode: AssemblyMode::OnePerPackageData,
792    },
793    AssemblerConfig {
794        datasource_ids: &[DatasourceId::AlpineApkbuild],
795        sibling_file_patterns: &["APKBUILD"],
796        mode: AssemblyMode::SiblingMerge,
797    },
798    // RPM installed package databases (BDB, NDB, SQLite)
799    AssemblerConfig {
800        datasource_ids: &[
801            DatasourceId::RpmInstalledDatabaseBdb,
802            DatasourceId::RpmInstalledDatabaseNdb,
803            DatasourceId::RpmInstalledDatabaseSqlite,
804        ],
805        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
806        mode: AssemblyMode::OnePerPackageData,
807    },
808    AssemblerConfig {
809        datasource_ids: &[DatasourceId::RpmArchive],
810        sibling_file_patterns: &["*.rpm", "*.srpm"],
811        mode: AssemblyMode::OnePerPackageData,
812    },
813    // Debian installed package databases
814    AssemblerConfig {
815        datasource_ids: &[DatasourceId::DebianDeb],
816        sibling_file_patterns: &["*.deb"],
817        mode: AssemblyMode::OnePerPackageData,
818    },
819    AssemblerConfig {
820        datasource_ids: &[
821            DatasourceId::DebianInstalledStatusDb,
822            DatasourceId::DebianDistrolessInstalledDb,
823        ],
824        sibling_file_patterns: &["status"],
825        mode: AssemblyMode::OnePerPackageData,
826    },
827    AssemblerConfig {
828        datasource_ids: &[
829            DatasourceId::DebianControlExtractedDeb,
830            DatasourceId::DebianMd5SumsInExtractedDeb,
831        ],
832        sibling_file_patterns: &["control", "md5sums"],
833        mode: AssemblyMode::SiblingMerge,
834    },
835    AssemblerConfig {
836        datasource_ids: &[DatasourceId::DebianSourceControlDsc],
837        sibling_file_patterns: &["*.dsc"],
838        mode: AssemblyMode::OnePerPackageData,
839    },
840    AssemblerConfig {
841        datasource_ids: &[DatasourceId::AboutFile],
842        sibling_file_patterns: &["*.ABOUT"],
843        mode: AssemblyMode::OnePerPackageData,
844    },
845    AssemblerConfig {
846        datasource_ids: &[
847            DatasourceId::BitbakeRecipe,
848            DatasourceId::BitbakeRecipeAppend,
849        ],
850        sibling_file_patterns: &["*.bb", "*.bbappend"],
851        mode: AssemblyMode::SiblingMerge,
852    },
853];
854
855// Datasource IDs intentionally excluded from package assembly.
856//
857// This list is runtime-significant: files with these datasource IDs may remain
858// unowned by any Package, while their dependencies are still eligible for
859// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
860pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
861    // Non-package metadata
862    DatasourceId::Readme,
863    DatasourceId::EtcOsRelease,
864    // Binary archives (require external extraction via ExtractCode before scanning)
865    DatasourceId::AlpineApkArchive,
866    DatasourceId::AndroidAab,
867    DatasourceId::AndroidAarLibrary,
868    DatasourceId::AndroidApk,
869    DatasourceId::AndroidManifestXml,
870    DatasourceId::AndroidSoongMetadata,
871    DatasourceId::AppleDmg,
872    DatasourceId::Axis2Mar,
873    DatasourceId::ChromeCrx,
874    DatasourceId::DebianOriginalSourceTarball,
875    DatasourceId::DebianSourceMetadataTarball,
876    DatasourceId::InstallshieldInstaller,
877    DatasourceId::IosIpa,
878    DatasourceId::IsoDiskImage,
879    DatasourceId::JavaEarArchive,
880    DatasourceId::JavaJar,
881    DatasourceId::JavaWarArchive,
882    DatasourceId::JbossSar,
883    DatasourceId::MicrosoftCabinet,
884    DatasourceId::MozillaXpi,
885    DatasourceId::NsisInstaller,
886    DatasourceId::SharShellArchive,
887    DatasourceId::SquashfsDiskImage,
888    // Supplementary metadata (not primary package definitions)
889    DatasourceId::ArchAurinfo,
890    DatasourceId::ArchPkginfo,
891    DatasourceId::ArchSrcinfo,
892    DatasourceId::Axis2ModuleXml,
893    DatasourceId::ClojureDepsEdn,
894    DatasourceId::ClojureProjectClj,
895    DatasourceId::DebianInstalledFilesList,
896    DatasourceId::DebianInstalledMd5Sums,
897    DatasourceId::DebianCopyright,
898    DatasourceId::DebianCopyrightInPackage,
899    DatasourceId::DebianCopyrightStandalone,
900    DatasourceId::GoBinary,
901    DatasourceId::WindowsExecutable,
902    DatasourceId::Dockerfile,
903    DatasourceId::ErlangOtpAppSrc,
904    DatasourceId::HexMixLock,
905    DatasourceId::JavaEarApplicationXml,
906    DatasourceId::JavaWarWebXml,
907    DatasourceId::JbossServiceXml,
908    DatasourceId::MesonBuild,
909    DatasourceId::GemGemspecInstalledSpecifications,
910    DatasourceId::NugetDirectoryBuildProps,
911    DatasourceId::NugetDirectoryPackagesProps,
912    DatasourceId::CitationCff,
913    DatasourceId::PubliccodeYaml,
914    DatasourceId::RpmPackageLicenses,
915    DatasourceId::RustBinary,
916    DatasourceId::SbtBuildSbt,
917    DatasourceId::VcpkgJson,
918];
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923    use std::collections::HashSet;
924    use strum::IntoEnumIterator;
925
926    #[test]
927    fn test_every_datasource_id_is_accounted_for() {
928        let mut assembled: HashSet<DatasourceId> = HashSet::new();
929        for config in ASSEMBLERS {
930            for &dsid in config.datasource_ids {
931                assembled.insert(dsid);
932            }
933        }
934
935        let unassembled: HashSet<DatasourceId> =
936            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
937
938        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
939        assert!(
940            overlap.is_empty(),
941            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
942        );
943
944        let missing: Vec<_> = DatasourceId::iter()
945            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
946            .collect();
947
948        assert!(
949            missing.is_empty(),
950            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
951             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
952        );
953    }
954
955    #[test]
956    fn test_post_assembly_passes_are_unique() {
957        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
958
959        assert_eq!(
960            unique.len(),
961            POST_ASSEMBLY_PASSES.len(),
962            "POST_ASSEMBLY_PASSES contains duplicate entries"
963        );
964    }
965
966    #[test]
967    fn test_every_post_assembly_pass_kind_is_registered_once() {
968        let registered: HashSet<PostAssemblyPassKind> =
969            POST_ASSEMBLY_PASSES.iter().copied().collect();
970
971        let missing: Vec<_> = PostAssemblyPassKind::iter()
972            .filter(|pass| !registered.contains(pass))
973            .collect();
974
975        assert!(
976            missing.is_empty(),
977            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
978        );
979
980        for pass in PostAssemblyPassKind::iter() {
981            let count = POST_ASSEMBLY_PASSES
982                .iter()
983                .filter(|registered| **registered == pass)
984                .count();
985            assert_eq!(
986                count, 1,
987                "Post-assembly pass {pass:?} should be registered exactly once"
988            );
989        }
990    }
991
992    #[test]
993    fn test_post_assembly_passes_skip_irrelevant_inputs() {
994        let inputs = PostAssemblyInputs::default();
995
996        for pass in PostAssemblyPassKind::iter() {
997            assert!(
998                !pass.should_run(&inputs),
999                "{pass:?} should skip when no relevant inputs are present"
1000            );
1001        }
1002    }
1003
1004    #[test]
1005    fn test_npm_workspace_inputs_only_run_npm_passes() {
1006        let inputs = PostAssemblyInputs {
1007            package_types: HashSet::from([PackageType::Npm]),
1008            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
1009            has_npm_workspace_markers: true,
1010            has_cargo_workspace_markers: false,
1011        };
1012
1013        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
1014            .filter(|pass| pass.should_run(&inputs))
1015            .collect();
1016
1017        assert_eq!(
1018            runnable,
1019            HashSet::from([
1020                PostAssemblyPassKind::NpmResourceAssign,
1021                PostAssemblyPassKind::NpmWorkspaceMerge,
1022            ])
1023        );
1024    }
1025
1026    #[test]
1027    fn test_cargo_workspace_merge_requires_workspace_markers() {
1028        let without_markers = PostAssemblyInputs {
1029            package_types: HashSet::from([PackageType::Cargo]),
1030            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
1031            has_npm_workspace_markers: false,
1032            has_cargo_workspace_markers: false,
1033        };
1034
1035        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
1036
1037        let with_markers = PostAssemblyInputs {
1038            has_cargo_workspace_markers: true,
1039            ..without_markers
1040        };
1041
1042        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
1043    }
1044}