Skip to main content

provenant/assembly/
assemblers.rs

1use std::collections::HashSet;
2
3use crate::models::PackageType;
4use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
5use strum::EnumIter;
6
7use super::{
8    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
9    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, file_ref_resolve,
10    hackage_merge, nix_flake_compat_merge, npm_resource_assign, nuget_cpm_resolve,
11    python_requirements_assign, ruby_resource_assign, swift_merge, topology,
12};
13
14#[derive(Clone, Copy)]
15pub(super) enum SpecialDirectoryMergerKind {
16    Skip,
17    Bazel,
18    Hackage,
19}
20
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
22pub(super) enum PostAssemblyPassKind {
23    SwiftMerge,
24    CondaRootfsMerge,
25    NpmResourceAssign,
26    PythonRequirementsAssign,
27    FileReferenceResolve,
28    RpmYumdbMerge,
29    NpmWorkspaceMerge,
30    CargoWorkspaceMerge,
31    NugetCpmResolve,
32    CargoResourceAssign,
33    ComposerResourceAssign,
34    RubyResourceAssign,
35    NixFlakeCompatMerge,
36    BazelPrune,
37}
38
39pub(super) fn special_directory_merger_for(
40    config_key: DatasourceId,
41) -> Option<SpecialDirectoryMergerKind> {
42    match config_key {
43        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
44        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
45        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
46        _ => None,
47    }
48}
49
50pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
51    PostAssemblyPassKind::SwiftMerge,
52    PostAssemblyPassKind::CondaRootfsMerge,
53    PostAssemblyPassKind::NpmResourceAssign,
54    PostAssemblyPassKind::PythonRequirementsAssign,
55    PostAssemblyPassKind::FileReferenceResolve,
56    PostAssemblyPassKind::RpmYumdbMerge,
57    PostAssemblyPassKind::NpmWorkspaceMerge,
58    PostAssemblyPassKind::CargoWorkspaceMerge,
59    PostAssemblyPassKind::NugetCpmResolve,
60    PostAssemblyPassKind::CargoResourceAssign,
61    PostAssemblyPassKind::ComposerResourceAssign,
62    PostAssemblyPassKind::RubyResourceAssign,
63    PostAssemblyPassKind::NixFlakeCompatMerge,
64    PostAssemblyPassKind::BazelPrune,
65];
66
67const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
68    DatasourceId::SwiftPackageManifestJson,
69    DatasourceId::SwiftPackageResolved,
70    DatasourceId::SwiftPackageShowDependencies,
71];
72
73const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
74    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
75
76const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::RpmInstalledDatabaseBdb,
78    DatasourceId::RpmInstalledDatabaseNdb,
79    DatasourceId::RpmInstalledDatabaseSqlite,
80];
81
82const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
83    DatasourceId::NugetDirectoryBuildProps,
84    DatasourceId::NugetDirectoryPackagesProps,
85];
86
87const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
88    DatasourceId::NugetCsproj,
89    DatasourceId::NugetFsproj,
90    DatasourceId::NugetVbproj,
91];
92
93#[derive(Default)]
94struct PostAssemblyInputs {
95    package_types: HashSet<PackageType>,
96    file_datasource_ids: HashSet<DatasourceId>,
97    has_npm_workspace_markers: bool,
98    has_cargo_workspace_markers: bool,
99}
100
101pub(super) fn run_post_assembly_passes(
102    files: &mut [FileInfo],
103    packages: &mut Vec<Package>,
104    dependencies: &mut Vec<TopLevelDependency>,
105    topology_plan: &topology::TopologyPlan,
106) {
107    let inputs = PostAssemblyInputs::collect(files, packages);
108
109    for pass in POST_ASSEMBLY_PASSES {
110        if !pass.should_run(&inputs) {
111            continue;
112        }
113
114        pass.run(files, packages, dependencies, topology_plan);
115    }
116}
117
118impl PostAssemblyInputs {
119    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
120        let mut inputs = Self {
121            package_types: packages
122                .iter()
123                .filter_map(|package| package.package_type)
124                .collect(),
125            ..Self::default()
126        };
127
128        for file in files {
129            for package_data in &file.package_data {
130                let Some(datasource_id) = package_data.datasource_id else {
131                    continue;
132                };
133
134                inputs.file_datasource_ids.insert(datasource_id);
135
136                if matches!(
137                    datasource_id,
138                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
139                ) && package_data
140                    .extra_data
141                    .as_ref()
142                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
143                {
144                    inputs.has_npm_workspace_markers = true;
145                }
146
147                if datasource_id == DatasourceId::CargoToml
148                    && package_data
149                        .extra_data
150                        .as_ref()
151                        .and_then(|extra_data| extra_data.get("workspace"))
152                        .and_then(|workspace| workspace.get("members"))
153                        .and_then(|members| members.as_array())
154                        .is_some_and(|members| !members.is_empty())
155                {
156                    inputs.has_cargo_workspace_markers = true;
157                }
158            }
159        }
160
161        inputs
162    }
163
164    fn has_package_type(&self, package_type: PackageType) -> bool {
165        self.package_types.contains(&package_type)
166    }
167
168    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
169        datasource_ids
170            .iter()
171            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
172    }
173
174    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
175        datasource_ids
176            .iter()
177            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
178    }
179}
180
181impl SpecialDirectoryMergerKind {
182    pub(super) fn run(
183        self,
184        config: &AssemblerConfig,
185        files: &[FileInfo],
186        file_indices: &[usize],
187    ) -> Vec<DirectoryMergeOutput> {
188        match self {
189            Self::Skip => Vec::new(),
190            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
191            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
192        }
193    }
194}
195
196impl PostAssemblyPassKind {
197    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
198        match self {
199            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
200            Self::CondaRootfsMerge => {
201                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
202            }
203            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
204            Self::PythonRequirementsAssign => {
205                inputs.has_package_type(PackageType::Pypi)
206                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
207            }
208            Self::FileReferenceResolve => {
209                file_ref_resolve::has_relevant_file_reference_datasource_ids(
210                    &inputs.file_datasource_ids,
211                )
212            }
213            Self::RpmYumdbMerge => {
214                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
215                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
216            }
217            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
218            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
219            Self::NugetCpmResolve => {
220                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
221                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
222            }
223            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
224            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
225            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
226            Self::NixFlakeCompatMerge => {
227                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
228                    && inputs.has_any_file_datasource(&[
229                        DatasourceId::NixFlakeNix,
230                        DatasourceId::NixFlakeLock,
231                    ])
232            }
233            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
234        }
235    }
236
237    fn run(
238        self,
239        files: &mut [FileInfo],
240        packages: &mut Vec<Package>,
241        dependencies: &mut Vec<TopLevelDependency>,
242        topology_plan: &topology::TopologyPlan,
243    ) {
244        match self {
245            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
246            Self::CondaRootfsMerge => {
247                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
248            }
249            Self::NpmResourceAssign => {
250                npm_resource_assign::assign_npm_package_resources(files, packages)
251            }
252            Self::PythonRequirementsAssign => {
253                python_requirements_assign::assign_python_requirements_to_projects(
254                    files,
255                    packages,
256                    dependencies,
257                )
258            }
259            Self::FileReferenceResolve => {
260                file_ref_resolve::resolve_file_references(files, packages, dependencies)
261            }
262            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
263            Self::NpmWorkspaceMerge => {
264                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
265            }
266            Self::CargoWorkspaceMerge => {
267                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
268            }
269            Self::NugetCpmResolve => {
270                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
271            }
272            Self::CargoResourceAssign => {
273                cargo_resource_assign::assign_cargo_package_resources(files, packages)
274            }
275            Self::ComposerResourceAssign => {
276                composer_resource_assign::assign_composer_package_resources(files, packages)
277            }
278            Self::RubyResourceAssign => {
279                ruby_resource_assign::assign_ruby_package_resources(files, packages)
280            }
281            Self::NixFlakeCompatMerge => {
282                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
283            }
284            Self::BazelPrune => {
285                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
286            }
287        }
288    }
289}
290
291pub static ASSEMBLERS: &[AssemblerConfig] = &[
292    // ── Sibling-merge assemblers ──
293    //
294    // npm ecosystem: package.json + lockfiles in same directory.
295    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
296    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
297    AssemblerConfig {
298        datasource_ids: &[
299            DatasourceId::BunLock,
300            DatasourceId::BunLockb,
301            DatasourceId::NpmPackageJson,
302            DatasourceId::NpmPackageLockJson,
303            DatasourceId::YarnLock,
304            DatasourceId::YarnLockV1,
305            DatasourceId::YarnLockV2,
306            DatasourceId::YarnPnpCjs,
307            DatasourceId::PnpmLockYaml,
308            DatasourceId::PnpmWorkspaceYaml,
309        ],
310        sibling_file_patterns: &[
311            "package.json",
312            "bun.lock",
313            "bun.lockb",
314            ".package-lock.json",
315            "package-lock.json",
316            ".npm-shrinkwrap.json",
317            "npm-shrinkwrap.json",
318            "yarn.lock",
319            ".pnp.cjs",
320            "pnpm-lock.yaml",
321            "shrinkwrap.yaml",
322            "pnpm-workspace.yaml",
323        ],
324        mode: AssemblyMode::SiblingMerge,
325    },
326    // Rust/Cargo ecosystem
327    AssemblerConfig {
328        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
329        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
330        mode: AssemblyMode::SiblingMerge,
331    },
332    // CocoaPods ecosystem
333    AssemblerConfig {
334        datasource_ids: &[
335            DatasourceId::CocoapodsPodspec,
336            DatasourceId::CocoapodsPodspecJson,
337            DatasourceId::CocoapodsPodfile,
338            DatasourceId::CocoapodsPodfileLock,
339        ],
340        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
341        mode: AssemblyMode::SiblingMerge,
342    },
343    // PHP Composer ecosystem
344    AssemblerConfig {
345        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
346        sibling_file_patterns: &[
347            "*composer.json",
348            "composer.*.json",
349            "*composer.lock",
350            "composer.*.lock",
351        ],
352        mode: AssemblyMode::SiblingMerge,
353    },
354    // Go ecosystem (includes legacy Godeps)
355    AssemblerConfig {
356        datasource_ids: &[
357            DatasourceId::GoMod,
358            DatasourceId::GoModGraph,
359            DatasourceId::GoSum,
360            DatasourceId::GoWork,
361            DatasourceId::Godeps,
362        ],
363        sibling_file_patterns: &[
364            "go.mod",
365            "go.work",
366            "go.mod.graph",
367            "go.modgraph",
368            "go.sum",
369            "Godeps.json",
370        ],
371        mode: AssemblyMode::SiblingMerge,
372    },
373    // Dart/Flutter ecosystem
374    AssemblerConfig {
375        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
376        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
377        mode: AssemblyMode::SiblingMerge,
378    },
379    // Pixi ecosystem
380    AssemblerConfig {
381        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
382        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
383        mode: AssemblyMode::SiblingMerge,
384    },
385    AssemblerConfig {
386        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
387        sibling_file_patterns: &["flake.nix", "flake.lock"],
388        mode: AssemblyMode::SiblingMerge,
389    },
390    AssemblerConfig {
391        datasource_ids: &[DatasourceId::NixDefaultNix],
392        sibling_file_patterns: &["default.nix"],
393        mode: AssemblyMode::OnePerPackageData,
394    },
395    // Helm chart ecosystem
396    AssemblerConfig {
397        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
398        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
399        mode: AssemblyMode::SiblingMerge,
400    },
401    AssemblerConfig {
402        datasource_ids: &[
403            DatasourceId::HackageCabal,
404            DatasourceId::HackageCabalProject,
405            DatasourceId::HackageStackYaml,
406        ],
407        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
408        mode: AssemblyMode::SiblingMerge,
409    },
410    // Chef ecosystem
411    AssemblerConfig {
412        datasource_ids: &[
413            DatasourceId::ChefCookbookMetadataJson,
414            DatasourceId::ChefCookbookMetadataRb,
415        ],
416        sibling_file_patterns: &["metadata.json", "metadata.rb"],
417        mode: AssemblyMode::SiblingMerge,
418    },
419    // Conan (C/C++) ecosystem
420    AssemblerConfig {
421        datasource_ids: &[
422            DatasourceId::ConanConanFilePy,
423            DatasourceId::ConanConanFileTxt,
424            DatasourceId::ConanLock,
425            DatasourceId::ConanConanDataYml,
426        ],
427        sibling_file_patterns: &[
428            "conanfile.py",
429            "conanfile.txt",
430            "conan.lock",
431            "conandata.yml",
432        ],
433        mode: AssemblyMode::SiblingMerge,
434    },
435    // Maven/Java ecosystem (nested merge via META-INF)
436    AssemblerConfig {
437        datasource_ids: &[
438            DatasourceId::MavenPom,
439            DatasourceId::MavenPomProperties,
440            DatasourceId::JavaJarManifest,
441            DatasourceId::JavaOsgiManifest,
442        ],
443        sibling_file_patterns: &[
444            "pom.xml",
445            "*.pom",
446            "pom.properties",
447            "**/META-INF/MANIFEST.MF",
448        ],
449        mode: AssemblyMode::SiblingMerge,
450    },
451    AssemblerConfig {
452        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
453        sibling_file_patterns: &["*.whl", "origin.json"],
454        mode: AssemblyMode::SiblingMerge,
455    },
456    // Python/PyPI ecosystem
457    AssemblerConfig {
458        datasource_ids: &[
459            DatasourceId::PypiPyprojectToml,
460            DatasourceId::PypiPoetryPyprojectToml,
461            DatasourceId::PypiSetupPy,
462            DatasourceId::PypiSetupCfg,
463            DatasourceId::PypiWheel,
464            DatasourceId::PypiWheelMetadata,
465            DatasourceId::PypiEgg,
466            DatasourceId::PypiEggPkginfo,
467            DatasourceId::PypiEditableEggPkginfo,
468            DatasourceId::PypiJson,
469            DatasourceId::PypiSdist,
470            DatasourceId::PypiSdistPkginfo,
471            DatasourceId::PypiInspectDeplock,
472            DatasourceId::PipRequirements,
473            DatasourceId::PypiPoetryLock,
474            DatasourceId::PypiPylockToml,
475            DatasourceId::PypiUvLock,
476            DatasourceId::Pipfile,
477            DatasourceId::PipfileLock,
478        ],
479        sibling_file_patterns: &[
480            "pyproject.toml",
481            "setup.py",
482            "setup.cfg",
483            "PKG-INFO",
484            "METADATA",
485            "pypi.json",
486            "pip-inspect.deplock",
487            "*.tar.gz",
488            "*.tgz",
489            "*.tar.bz2",
490            "*.tar.xz",
491            "*.zip",
492            "requirements*.txt",
493            "Pipfile",
494            "Pipfile.lock",
495            "poetry.lock",
496            "pylock.toml",
497            "pylock.*.toml",
498            "uv.lock",
499        ],
500        mode: AssemblyMode::SiblingMerge,
501    },
502    AssemblerConfig {
503        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
504        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
505        mode: AssemblyMode::SiblingMerge,
506    },
507    // Ruby/RubyGems ecosystem
508    AssemblerConfig {
509        datasource_ids: &[
510            DatasourceId::GemArchiveExtracted,
511            DatasourceId::Gemspec,
512            DatasourceId::GemspecExtracted,
513            DatasourceId::Gemfile,
514            DatasourceId::GemfileExtracted,
515            DatasourceId::GemfileLock,
516            DatasourceId::GemfileLockExtracted,
517            DatasourceId::GemArchive,
518        ],
519        sibling_file_patterns: &[
520            "metadata.gz-extract",
521            "**/data.gz-extract/*.gemspec",
522            "**/data.gz-extract/Gemfile",
523            "**/data.gz-extract/Gemfile.lock",
524            "*.gemspec",
525            "Gemfile",
526            "Gemfile.lock",
527        ],
528        mode: AssemblyMode::SiblingMerge,
529    },
530    // Conda ecosystem
531    AssemblerConfig {
532        datasource_ids: &[
533            DatasourceId::CondaMetaYaml,
534            DatasourceId::CondaYaml,
535            DatasourceId::CondaMetaJson,
536        ],
537        sibling_file_patterns: &[
538            "meta.yaml",
539            "meta.yml",
540            "environment.yml",
541            "environment.yaml",
542            "conda.yaml",
543            "conda.yml",
544            "*conda*.yaml",
545            "*conda*.yml",
546            "env.yaml",
547            "env.yml",
548            "*env*.yaml",
549            "*env*.yml",
550            "*environment*.yaml",
551            "*environment*.yml",
552            "*.json",
553        ],
554        mode: AssemblyMode::SiblingMerge,
555    },
556    // RPM specfile (source packages)
557    AssemblerConfig {
558        datasource_ids: &[DatasourceId::RpmSpecfile],
559        sibling_file_patterns: &["*.spec"],
560        mode: AssemblyMode::SiblingMerge,
561    },
562    // Debian source packages (nested merge via debian/ directory)
563    AssemblerConfig {
564        datasource_ids: &[
565            DatasourceId::DebianControlInSource,
566            DatasourceId::DebianCopyrightInSource,
567        ],
568        sibling_file_patterns: &["**/debian/control", "**/debian/copyright"],
569        mode: AssemblyMode::SiblingMerge,
570    },
571    // Gradle/Android ecosystem
572    AssemblerConfig {
573        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
574        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
575        mode: AssemblyMode::SiblingMerge,
576    },
577    AssemblerConfig {
578        datasource_ids: &[DatasourceId::GradleModule],
579        sibling_file_patterns: &["*.module"],
580        mode: AssemblyMode::OnePerPackageData,
581    },
582    // CPAN/Perl ecosystem
583    AssemblerConfig {
584        datasource_ids: &[
585            DatasourceId::CpanMetaJson,
586            DatasourceId::CpanMetaYml,
587            DatasourceId::CpanManifest,
588            DatasourceId::CpanDistIni,
589            DatasourceId::CpanMakefile,
590        ],
591        sibling_file_patterns: &[
592            "META.json",
593            "META.yml",
594            "MANIFEST",
595            "dist.ini",
596            "Makefile.PL",
597        ],
598        mode: AssemblyMode::SiblingMerge,
599    },
600    // NuGet/.NET ecosystem
601    AssemblerConfig {
602        datasource_ids: &[
603            DatasourceId::NugetCsproj,
604            DatasourceId::NugetFsproj,
605            DatasourceId::NugetNuspec,
606            DatasourceId::NugetNupkg,
607            DatasourceId::NugetProjectJson,
608            DatasourceId::NugetProjectLockJson,
609            DatasourceId::NugetPackagesConfig,
610            DatasourceId::NugetPackagesLock,
611            DatasourceId::NugetVbproj,
612        ],
613        sibling_file_patterns: &[
614            "*.csproj",
615            "*.fsproj",
616            "*.nuspec",
617            "*.nupkg",
618            "project.json",
619            "project.lock.json",
620            "packages.config",
621            "packages.lock.json",
622            "*.packages.lock.json",
623            "*.vbproj",
624        ],
625        mode: AssemblyMode::SiblingMerge,
626    },
627    AssemblerConfig {
628        datasource_ids: &[DatasourceId::NugetDepsJson],
629        sibling_file_patterns: &["*.deps.json"],
630        mode: AssemblyMode::OnePerPackageData,
631    },
632    // Swift/SPM ecosystem
633    AssemblerConfig {
634        datasource_ids: &[
635            DatasourceId::SwiftPackageManifestJson,
636            DatasourceId::SwiftPackageResolved,
637            DatasourceId::SwiftPackageShowDependencies,
638        ],
639        sibling_file_patterns: &[
640            "Package.swift.json",
641            "Package.swift.deplock",
642            "Package.resolved",
643            ".package.resolved",
644            "swift-show-dependencies.deplock",
645        ],
646        mode: AssemblyMode::SiblingMerge,
647    },
648    // ── Standalone assemblers (single file → single package) ──
649    //
650    // These ecosystems have only one manifest file type with no sibling merging.
651    // They still need configs so their datasource_ids are recognized by the assembler.
652    //
653    // Bower (JavaScript)
654    AssemblerConfig {
655        datasource_ids: &[DatasourceId::BowerJson],
656        sibling_file_patterns: &["bower.json"],
657        mode: AssemblyMode::SiblingMerge,
658    },
659    // CRAN (R language)
660    AssemblerConfig {
661        datasource_ids: &[DatasourceId::CranDescription],
662        sibling_file_patterns: &["DESCRIPTION"],
663        mode: AssemblyMode::SiblingMerge,
664    },
665    // FreeBSD packages
666    AssemblerConfig {
667        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
668        sibling_file_patterns: &["+COMPACT_MANIFEST"],
669        mode: AssemblyMode::SiblingMerge,
670    },
671    // Haxe ecosystem
672    AssemblerConfig {
673        datasource_ids: &[DatasourceId::HaxelibJson],
674        sibling_file_patterns: &["haxelib.json"],
675        mode: AssemblyMode::SiblingMerge,
676    },
677    AssemblerConfig {
678        datasource_ids: &[DatasourceId::Gitmodules],
679        sibling_file_patterns: &[".gitmodules"],
680        mode: AssemblyMode::SiblingMerge,
681    },
682    // OCaml/opam ecosystem
683    AssemblerConfig {
684        datasource_ids: &[DatasourceId::OpamFile],
685        sibling_file_patterns: &["opam", "*.opam"],
686        mode: AssemblyMode::SiblingMerge,
687    },
688    // RPM Mariner manifest
689    AssemblerConfig {
690        datasource_ids: &[DatasourceId::RpmMarinerManifest],
691        sibling_file_patterns: &["*.rpm.manifest"],
692        mode: AssemblyMode::SiblingMerge,
693    },
694    AssemblerConfig {
695        datasource_ids: &[DatasourceId::RpmYumdb],
696        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
697        mode: AssemblyMode::OnePerPackageData,
698    },
699    // Microsoft Update Manifest
700    AssemblerConfig {
701        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
702        sibling_file_patterns: &["*.mum"],
703        mode: AssemblyMode::SiblingMerge,
704    },
705    // Autotools (C/C++ build system)
706    AssemblerConfig {
707        datasource_ids: &[DatasourceId::AutotoolsConfigure],
708        sibling_file_patterns: &["configure", "configure.ac"],
709        mode: AssemblyMode::SiblingMerge,
710    },
711    // Bazel (build system)
712    AssemblerConfig {
713        datasource_ids: &[DatasourceId::BazelBuild],
714        sibling_file_patterns: &["BUILD"],
715        mode: AssemblyMode::SiblingMerge,
716    },
717    AssemblerConfig {
718        datasource_ids: &[DatasourceId::BazelModule],
719        sibling_file_patterns: &["MODULE.bazel"],
720        mode: AssemblyMode::OnePerPackageData,
721    },
722    // Buck (build system)
723    AssemblerConfig {
724        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
725        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
726        mode: AssemblyMode::SiblingMerge,
727    },
728    // Ant/Ivy (Java dependency management)
729    AssemblerConfig {
730        datasource_ids: &[DatasourceId::AntIvyXml],
731        sibling_file_patterns: &["ivy.xml"],
732        mode: AssemblyMode::SiblingMerge,
733    },
734    // Meteor (JavaScript platform)
735    AssemblerConfig {
736        datasource_ids: &[DatasourceId::MeteorPackage],
737        sibling_file_patterns: &["package.js"],
738        mode: AssemblyMode::SiblingMerge,
739    },
740    // ── One-per-PackageData assemblers (database files with many packages) ──
741    //
742    // Alpine installed package database
743    AssemblerConfig {
744        datasource_ids: &[DatasourceId::AlpineInstalledDb],
745        sibling_file_patterns: &["installed"],
746        mode: AssemblyMode::OnePerPackageData,
747    },
748    AssemblerConfig {
749        datasource_ids: &[DatasourceId::AlpineApkbuild],
750        sibling_file_patterns: &["APKBUILD"],
751        mode: AssemblyMode::SiblingMerge,
752    },
753    // RPM installed package databases (BDB, NDB, SQLite)
754    AssemblerConfig {
755        datasource_ids: &[
756            DatasourceId::RpmInstalledDatabaseBdb,
757            DatasourceId::RpmInstalledDatabaseNdb,
758            DatasourceId::RpmInstalledDatabaseSqlite,
759        ],
760        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
761        mode: AssemblyMode::OnePerPackageData,
762    },
763    // Debian installed package databases
764    AssemblerConfig {
765        datasource_ids: &[
766            DatasourceId::DebianInstalledStatusDb,
767            DatasourceId::DebianDistrolessInstalledDb,
768        ],
769        sibling_file_patterns: &["status"],
770        mode: AssemblyMode::OnePerPackageData,
771    },
772    AssemblerConfig {
773        datasource_ids: &[
774            DatasourceId::DebianControlExtractedDeb,
775            DatasourceId::DebianMd5SumsInExtractedDeb,
776        ],
777        sibling_file_patterns: &["control", "md5sums"],
778        mode: AssemblyMode::SiblingMerge,
779    },
780    AssemblerConfig {
781        datasource_ids: &[DatasourceId::AboutFile],
782        sibling_file_patterns: &["*.ABOUT"],
783        mode: AssemblyMode::OnePerPackageData,
784    },
785];
786
787// Datasource IDs intentionally excluded from package assembly.
788//
789// This list is runtime-significant: files with these datasource IDs may remain
790// unowned by any Package, while their dependencies are still eligible for
791// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
792pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
793    // Non-package metadata
794    DatasourceId::Readme,
795    DatasourceId::EtcOsRelease,
796    // Binary archives (require external extraction via ExtractCode before scanning)
797    DatasourceId::AlpineApkArchive,
798    DatasourceId::AndroidAarLibrary,
799    DatasourceId::AndroidApk,
800    DatasourceId::AppleDmg,
801    DatasourceId::Axis2Mar,
802    DatasourceId::ChromeCrx,
803    DatasourceId::DebianDeb,
804    DatasourceId::DebianOriginalSourceTarball,
805    DatasourceId::DebianSourceMetadataTarball,
806    DatasourceId::InstallshieldInstaller,
807    DatasourceId::IosIpa,
808    DatasourceId::IsoDiskImage,
809    DatasourceId::JavaEarArchive,
810    DatasourceId::JavaJar,
811    DatasourceId::JavaWarArchive,
812    DatasourceId::JbossSar,
813    DatasourceId::MicrosoftCabinet,
814    DatasourceId::MozillaXpi,
815    DatasourceId::NsisInstaller,
816    DatasourceId::RpmArchive,
817    DatasourceId::SharShellArchive,
818    DatasourceId::SquashfsDiskImage,
819    // Supplementary metadata (not primary package definitions)
820    DatasourceId::ArchAurinfo,
821    DatasourceId::ArchPkginfo,
822    DatasourceId::ArchSrcinfo,
823    DatasourceId::Axis2ModuleXml,
824    DatasourceId::ClojureDepsEdn,
825    DatasourceId::ClojureProjectClj,
826    DatasourceId::DebianInstalledFilesList,
827    DatasourceId::DebianInstalledMd5Sums,
828    DatasourceId::DebianCopyright,
829    DatasourceId::DebianCopyrightInPackage,
830    DatasourceId::DebianCopyrightStandalone,
831    DatasourceId::GoBinary,
832    DatasourceId::WindowsExecutable,
833    DatasourceId::DebianSourceControlDsc,
834    DatasourceId::Dockerfile,
835    DatasourceId::HexMixLock,
836    DatasourceId::JavaEarApplicationXml,
837    DatasourceId::JavaWarWebXml,
838    DatasourceId::JbossServiceXml,
839    DatasourceId::MesonBuild,
840    DatasourceId::GemGemspecInstalledSpecifications,
841    DatasourceId::NugetDirectoryBuildProps,
842    DatasourceId::NugetDirectoryPackagesProps,
843    DatasourceId::CitationCff,
844    DatasourceId::PubliccodeYaml,
845    DatasourceId::RpmPackageLicenses,
846    DatasourceId::RustBinary,
847    DatasourceId::SbtBuildSbt,
848    DatasourceId::VcpkgJson,
849];
850
851#[cfg(test)]
852mod tests {
853    use super::*;
854    use std::collections::HashSet;
855    use strum::IntoEnumIterator;
856
857    #[test]
858    fn test_every_datasource_id_is_accounted_for() {
859        let mut assembled: HashSet<DatasourceId> = HashSet::new();
860        for config in ASSEMBLERS {
861            for &dsid in config.datasource_ids {
862                assembled.insert(dsid);
863            }
864        }
865
866        let unassembled: HashSet<DatasourceId> =
867            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
868
869        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
870        assert!(
871            overlap.is_empty(),
872            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
873        );
874
875        let missing: Vec<_> = DatasourceId::iter()
876            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
877            .collect();
878
879        assert!(
880            missing.is_empty(),
881            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
882             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
883        );
884    }
885
886    #[test]
887    fn test_post_assembly_passes_are_unique() {
888        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
889
890        assert_eq!(
891            unique.len(),
892            POST_ASSEMBLY_PASSES.len(),
893            "POST_ASSEMBLY_PASSES contains duplicate entries"
894        );
895    }
896
897    #[test]
898    fn test_every_post_assembly_pass_kind_is_registered_once() {
899        let registered: HashSet<PostAssemblyPassKind> =
900            POST_ASSEMBLY_PASSES.iter().copied().collect();
901
902        let missing: Vec<_> = PostAssemblyPassKind::iter()
903            .filter(|pass| !registered.contains(pass))
904            .collect();
905
906        assert!(
907            missing.is_empty(),
908            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
909        );
910
911        for pass in PostAssemblyPassKind::iter() {
912            let count = POST_ASSEMBLY_PASSES
913                .iter()
914                .filter(|registered| **registered == pass)
915                .count();
916            assert_eq!(
917                count, 1,
918                "Post-assembly pass {pass:?} should be registered exactly once"
919            );
920        }
921    }
922
923    #[test]
924    fn test_post_assembly_passes_skip_irrelevant_inputs() {
925        let inputs = PostAssemblyInputs::default();
926
927        for pass in PostAssemblyPassKind::iter() {
928            assert!(
929                !pass.should_run(&inputs),
930                "{pass:?} should skip when no relevant inputs are present"
931            );
932        }
933    }
934
935    #[test]
936    fn test_npm_workspace_inputs_only_run_npm_passes() {
937        let inputs = PostAssemblyInputs {
938            package_types: HashSet::from([PackageType::Npm]),
939            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
940            has_npm_workspace_markers: true,
941            has_cargo_workspace_markers: false,
942        };
943
944        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
945            .filter(|pass| pass.should_run(&inputs))
946            .collect();
947
948        assert_eq!(
949            runnable,
950            HashSet::from([
951                PostAssemblyPassKind::NpmResourceAssign,
952                PostAssemblyPassKind::NpmWorkspaceMerge,
953            ])
954        );
955    }
956
957    #[test]
958    fn test_cargo_workspace_merge_requires_workspace_markers() {
959        let without_markers = PostAssemblyInputs {
960            package_types: HashSet::from([PackageType::Cargo]),
961            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
962            has_npm_workspace_markers: false,
963            has_cargo_workspace_markers: false,
964        };
965
966        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
967
968        let with_markers = PostAssemblyInputs {
969            has_cargo_workspace_markers: true,
970            ..without_markers
971        };
972
973        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
974    }
975}