Skip to main content

provenant/assembly/
assemblers.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashSet;
5
6use crate::models::PackageType;
7use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
8use strum::EnumIter;
9
10use super::{
11    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
12    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, debian_source_merge,
13    file_ref_resolve, hackage_merge, nix_flake_compat_merge, npm_resource_assign,
14    nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign, swift_merge, topology,
15};
16
17#[derive(Clone, Copy)]
18pub(super) enum SpecialDirectoryMergerKind {
19    Skip,
20    Bazel,
21    DebianSource,
22    Hackage,
23}
24
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
26pub(super) enum PostAssemblyPassKind {
27    SwiftMerge,
28    CondaRootfsMerge,
29    NpmResourceAssign,
30    PythonRequirementsAssign,
31    FileReferenceResolve,
32    RpmYumdbMerge,
33    NpmWorkspaceMerge,
34    CargoWorkspaceMerge,
35    NugetCpmResolve,
36    CargoResourceAssign,
37    ComposerResourceAssign,
38    RubyResourceAssign,
39    NixFlakeCompatMerge,
40    BazelPrune,
41}
42
43pub(super) fn special_directory_merger_for(
44    config_key: DatasourceId,
45) -> Option<SpecialDirectoryMergerKind> {
46    match config_key {
47        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
48        DatasourceId::DebianControlInSource => Some(SpecialDirectoryMergerKind::DebianSource),
49        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
50        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
51        _ => None,
52    }
53}
54
55pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
56    PostAssemblyPassKind::SwiftMerge,
57    PostAssemblyPassKind::CondaRootfsMerge,
58    PostAssemblyPassKind::NpmResourceAssign,
59    PostAssemblyPassKind::PythonRequirementsAssign,
60    PostAssemblyPassKind::FileReferenceResolve,
61    PostAssemblyPassKind::RpmYumdbMerge,
62    PostAssemblyPassKind::NpmWorkspaceMerge,
63    PostAssemblyPassKind::CargoWorkspaceMerge,
64    PostAssemblyPassKind::NugetCpmResolve,
65    PostAssemblyPassKind::CargoResourceAssign,
66    PostAssemblyPassKind::ComposerResourceAssign,
67    PostAssemblyPassKind::RubyResourceAssign,
68    PostAssemblyPassKind::NixFlakeCompatMerge,
69    PostAssemblyPassKind::BazelPrune,
70];
71
72const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
73    DatasourceId::SwiftPackageManifestJson,
74    DatasourceId::SwiftPackageResolved,
75    DatasourceId::SwiftPackageShowDependencies,
76];
77
78const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
79    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
80
81const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
82    DatasourceId::RpmInstalledDatabaseBdb,
83    DatasourceId::RpmInstalledDatabaseNdb,
84    DatasourceId::RpmInstalledDatabaseSqlite,
85];
86
87const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
88    DatasourceId::NugetDirectoryBuildProps,
89    DatasourceId::NugetDirectoryPackagesProps,
90];
91
92const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
93    DatasourceId::NugetCsproj,
94    DatasourceId::NugetFsproj,
95    DatasourceId::NugetVbproj,
96];
97
98#[derive(Default)]
99struct PostAssemblyInputs {
100    package_types: HashSet<PackageType>,
101    file_datasource_ids: HashSet<DatasourceId>,
102    has_npm_workspace_markers: bool,
103    has_cargo_workspace_markers: bool,
104}
105
106pub(super) fn run_post_assembly_passes(
107    files: &mut [FileInfo],
108    packages: &mut Vec<Package>,
109    dependencies: &mut Vec<TopLevelDependency>,
110    topology_plan: &topology::TopologyPlan,
111) {
112    let inputs = PostAssemblyInputs::collect(files, packages);
113
114    for pass in POST_ASSEMBLY_PASSES {
115        if !pass.should_run(&inputs) {
116            continue;
117        }
118
119        pass.run(files, packages, dependencies, topology_plan);
120    }
121}
122
123impl PostAssemblyInputs {
124    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
125        let mut inputs = Self {
126            package_types: packages
127                .iter()
128                .filter_map(|package| package.package_type)
129                .collect(),
130            ..Self::default()
131        };
132
133        for file in files {
134            for package_data in &file.package_data {
135                let Some(datasource_id) = package_data.datasource_id else {
136                    continue;
137                };
138
139                inputs.file_datasource_ids.insert(datasource_id);
140
141                if matches!(
142                    datasource_id,
143                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
144                ) && package_data
145                    .extra_data
146                    .as_ref()
147                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
148                {
149                    inputs.has_npm_workspace_markers = true;
150                }
151
152                if datasource_id == DatasourceId::CargoToml
153                    && package_data
154                        .extra_data
155                        .as_ref()
156                        .and_then(|extra_data| extra_data.get("workspace"))
157                        .and_then(|workspace| workspace.get("members"))
158                        .and_then(|members| members.as_array())
159                        .is_some_and(|members| !members.is_empty())
160                {
161                    inputs.has_cargo_workspace_markers = true;
162                }
163            }
164        }
165
166        inputs
167    }
168
169    fn has_package_type(&self, package_type: PackageType) -> bool {
170        self.package_types.contains(&package_type)
171    }
172
173    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
174        datasource_ids
175            .iter()
176            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
177    }
178
179    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
180        datasource_ids
181            .iter()
182            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
183    }
184}
185
186impl SpecialDirectoryMergerKind {
187    pub(super) fn run(
188        self,
189        config: &AssemblerConfig,
190        files: &[FileInfo],
191        file_indices: &[usize],
192    ) -> Vec<DirectoryMergeOutput> {
193        match self {
194            Self::Skip => Vec::new(),
195            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
196            Self::DebianSource => {
197                debian_source_merge::assemble_debian_source_packages(config, files, file_indices)
198            }
199            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
200        }
201    }
202}
203
204impl PostAssemblyPassKind {
205    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
206        match self {
207            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
208            Self::CondaRootfsMerge => {
209                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
210            }
211            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
212            Self::PythonRequirementsAssign => {
213                inputs.has_package_type(PackageType::Pypi)
214                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
215            }
216            Self::FileReferenceResolve => {
217                file_ref_resolve::has_relevant_file_reference_datasource_ids(
218                    &inputs.file_datasource_ids,
219                )
220            }
221            Self::RpmYumdbMerge => {
222                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
223                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
224            }
225            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
226            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
227            Self::NugetCpmResolve => {
228                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
229                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
230            }
231            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
232            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
233            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
234            Self::NixFlakeCompatMerge => {
235                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
236                    && inputs.has_any_file_datasource(&[
237                        DatasourceId::NixFlakeNix,
238                        DatasourceId::NixFlakeLock,
239                    ])
240            }
241            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
242        }
243    }
244
245    fn run(
246        self,
247        files: &mut [FileInfo],
248        packages: &mut Vec<Package>,
249        dependencies: &mut Vec<TopLevelDependency>,
250        topology_plan: &topology::TopologyPlan,
251    ) {
252        match self {
253            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
254            Self::CondaRootfsMerge => {
255                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
256            }
257            Self::NpmResourceAssign => {
258                npm_resource_assign::assign_npm_package_resources(files, packages)
259            }
260            Self::PythonRequirementsAssign => {
261                python_requirements_assign::assign_python_requirements_to_projects(
262                    files,
263                    packages,
264                    dependencies,
265                )
266            }
267            Self::FileReferenceResolve => {
268                file_ref_resolve::resolve_file_references(files, packages, dependencies)
269            }
270            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
271            Self::NpmWorkspaceMerge => {
272                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
273            }
274            Self::CargoWorkspaceMerge => {
275                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
276            }
277            Self::NugetCpmResolve => {
278                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
279            }
280            Self::CargoResourceAssign => {
281                cargo_resource_assign::assign_cargo_package_resources(files, packages)
282            }
283            Self::ComposerResourceAssign => {
284                composer_resource_assign::assign_composer_package_resources(files, packages)
285            }
286            Self::RubyResourceAssign => {
287                ruby_resource_assign::assign_ruby_package_resources(files, packages)
288            }
289            Self::NixFlakeCompatMerge => {
290                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
291            }
292            Self::BazelPrune => {
293                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
294            }
295        }
296    }
297}
298
299pub static ASSEMBLERS: &[AssemblerConfig] = &[
300    // ── Sibling-merge assemblers ──
301    //
302    // npm ecosystem: package.json + lockfiles in same directory.
303    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
304    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
305    AssemblerConfig {
306        datasource_ids: &[
307            DatasourceId::BunLock,
308            DatasourceId::BunLockb,
309            DatasourceId::NpmPackageJson,
310            DatasourceId::NpmPackageLockJson,
311            DatasourceId::YarnLock,
312            DatasourceId::YarnLockV1,
313            DatasourceId::YarnLockV2,
314            DatasourceId::YarnPnpCjs,
315            DatasourceId::PnpmLockYaml,
316            DatasourceId::PnpmWorkspaceYaml,
317        ],
318        sibling_file_patterns: &[
319            "package.json",
320            "bun.lock",
321            "bun.lockb",
322            ".package-lock.json",
323            "package-lock.json",
324            ".npm-shrinkwrap.json",
325            "npm-shrinkwrap.json",
326            "yarn.lock",
327            ".pnp.cjs",
328            "pnpm-lock.yaml",
329            "shrinkwrap.yaml",
330            "pnpm-workspace.yaml",
331        ],
332        mode: AssemblyMode::SiblingMerge,
333    },
334    // Rust/Cargo ecosystem
335    AssemblerConfig {
336        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
337        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
338        mode: AssemblyMode::SiblingMerge,
339    },
340    // Julia ecosystem
341    AssemblerConfig {
342        datasource_ids: &[
343            DatasourceId::JuliaProjectToml,
344            DatasourceId::JuliaManifestToml,
345        ],
346        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
347        mode: AssemblyMode::SiblingMerge,
348    },
349    // Carthage ecosystem
350    AssemblerConfig {
351        datasource_ids: &[
352            DatasourceId::CarthageCartfile,
353            DatasourceId::CarthageCartfileResolved,
354        ],
355        sibling_file_patterns: &["Cartfile", "Cartfile.private", "Cartfile.resolved"],
356        mode: AssemblyMode::SiblingMerge,
357    },
358    // CocoaPods ecosystem
359    AssemblerConfig {
360        datasource_ids: &[
361            DatasourceId::CocoapodsPodspec,
362            DatasourceId::CocoapodsPodspecJson,
363            DatasourceId::CocoapodsPodfile,
364            DatasourceId::CocoapodsPodfileLock,
365        ],
366        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
367        mode: AssemblyMode::SiblingMerge,
368    },
369    // PHP Composer ecosystem
370    AssemblerConfig {
371        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
372        sibling_file_patterns: &[
373            "*composer.json",
374            "composer.*.json",
375            "*composer.lock",
376            "composer.*.lock",
377        ],
378        mode: AssemblyMode::SiblingMerge,
379    },
380    // Go ecosystem (includes legacy Godeps)
381    AssemblerConfig {
382        datasource_ids: &[
383            DatasourceId::GoMod,
384            DatasourceId::GoModGraph,
385            DatasourceId::GoSum,
386            DatasourceId::GoWork,
387            DatasourceId::Godeps,
388        ],
389        sibling_file_patterns: &[
390            "go.mod",
391            "go.work",
392            "go.mod.graph",
393            "go.modgraph",
394            "go.sum",
395            "Godeps.json",
396        ],
397        mode: AssemblyMode::SiblingMerge,
398    },
399    // Dart/Flutter ecosystem
400    AssemblerConfig {
401        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
402        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
403        mode: AssemblyMode::SiblingMerge,
404    },
405    // Pixi ecosystem
406    AssemblerConfig {
407        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
408        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
409        mode: AssemblyMode::SiblingMerge,
410    },
411    AssemblerConfig {
412        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
413        sibling_file_patterns: &["flake.nix", "flake.lock"],
414        mode: AssemblyMode::SiblingMerge,
415    },
416    AssemblerConfig {
417        datasource_ids: &[DatasourceId::NixDefaultNix],
418        sibling_file_patterns: &["default.nix"],
419        mode: AssemblyMode::OnePerPackageData,
420    },
421    // Helm chart ecosystem
422    AssemblerConfig {
423        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
424        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
425        mode: AssemblyMode::SiblingMerge,
426    },
427    AssemblerConfig {
428        datasource_ids: &[
429            DatasourceId::HackageCabal,
430            DatasourceId::HackageCabalProject,
431            DatasourceId::HackageStackYaml,
432        ],
433        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
434        mode: AssemblyMode::SiblingMerge,
435    },
436    // Chef ecosystem
437    AssemblerConfig {
438        datasource_ids: &[
439            DatasourceId::ChefCookbookMetadataJson,
440            DatasourceId::ChefCookbookMetadataRb,
441        ],
442        sibling_file_patterns: &["metadata.json", "metadata.rb"],
443        mode: AssemblyMode::SiblingMerge,
444    },
445    // Conan (C/C++) ecosystem
446    AssemblerConfig {
447        datasource_ids: &[
448            DatasourceId::ConanConanFilePy,
449            DatasourceId::ConanConanFileTxt,
450            DatasourceId::ConanLock,
451            DatasourceId::ConanConanDataYml,
452        ],
453        sibling_file_patterns: &[
454            "conanfile.py",
455            "conanfile.txt",
456            "conan.lock",
457            "conandata.yml",
458        ],
459        mode: AssemblyMode::SiblingMerge,
460    },
461    // Maven/Java ecosystem (nested merge via META-INF)
462    AssemblerConfig {
463        datasource_ids: &[
464            DatasourceId::MavenPom,
465            DatasourceId::MavenPomProperties,
466            DatasourceId::JavaJarManifest,
467            DatasourceId::JavaOsgiManifest,
468        ],
469        sibling_file_patterns: &[
470            "pom.xml",
471            "*.pom",
472            "pom.properties",
473            "**/META-INF/MANIFEST.MF",
474        ],
475        mode: AssemblyMode::SiblingMerge,
476    },
477    AssemblerConfig {
478        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
479        sibling_file_patterns: &["*.whl", "origin.json"],
480        mode: AssemblyMode::SiblingMerge,
481    },
482    // Python/PyPI ecosystem
483    AssemblerConfig {
484        datasource_ids: &[
485            DatasourceId::PypiPyprojectToml,
486            DatasourceId::PypiPoetryPyprojectToml,
487            DatasourceId::PypiSetupPy,
488            DatasourceId::PypiSetupCfg,
489            DatasourceId::PypiWheel,
490            DatasourceId::PypiWheelMetadata,
491            DatasourceId::PypiEgg,
492            DatasourceId::PypiEggPkginfo,
493            DatasourceId::PypiEditableEggPkginfo,
494            DatasourceId::PypiJson,
495            DatasourceId::PypiSdist,
496            DatasourceId::PypiSdistPkginfo,
497            DatasourceId::PypiInspectDeplock,
498            DatasourceId::PipRequirements,
499            DatasourceId::PypiPoetryLock,
500            DatasourceId::PypiPylockToml,
501            DatasourceId::PypiUvLock,
502            DatasourceId::Pipfile,
503            DatasourceId::PipfileLock,
504        ],
505        sibling_file_patterns: &[
506            "pyproject.toml",
507            "setup.py",
508            "setup.cfg",
509            "PKG-INFO",
510            "METADATA",
511            "pypi.json",
512            "pip-inspect.deplock",
513            "*.tar.gz",
514            "*.tgz",
515            "*.tar.bz2",
516            "*.tar.xz",
517            "*.zip",
518            "requirements*.txt",
519            "Pipfile",
520            "Pipfile.lock",
521            "poetry.lock",
522            "pylock.toml",
523            "pylock.*.toml",
524            "uv.lock",
525        ],
526        mode: AssemblyMode::SiblingMerge,
527    },
528    AssemblerConfig {
529        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
530        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
531        mode: AssemblyMode::SiblingMerge,
532    },
533    // Ruby/RubyGems ecosystem
534    AssemblerConfig {
535        datasource_ids: &[
536            DatasourceId::GemArchiveExtracted,
537            DatasourceId::Gemspec,
538            DatasourceId::GemspecExtracted,
539            DatasourceId::Gemfile,
540            DatasourceId::GemfileExtracted,
541            DatasourceId::GemfileLock,
542            DatasourceId::GemfileLockExtracted,
543        ],
544        sibling_file_patterns: &[
545            "metadata.gz-extract",
546            "**/data.gz-extract/*.gemspec",
547            "**/data.gz-extract/Gemfile",
548            "**/data.gz-extract/Gemfile.lock",
549            "*.gemspec",
550            "Gemfile",
551            "Gemfile.lock",
552        ],
553        mode: AssemblyMode::SiblingMerge,
554    },
555    AssemblerConfig {
556        datasource_ids: &[DatasourceId::GemArchive],
557        sibling_file_patterns: &["*.gem"],
558        mode: AssemblyMode::OnePerPackageData,
559    },
560    // Conda ecosystem
561    AssemblerConfig {
562        datasource_ids: &[
563            DatasourceId::CondaMetaYaml,
564            DatasourceId::CondaYaml,
565            DatasourceId::CondaMetaJson,
566        ],
567        sibling_file_patterns: &[
568            "meta.yaml",
569            "meta.yml",
570            "recipe.yaml",
571            "recipe.yml",
572            "environment.yml",
573            "environment.yaml",
574            "conda.yaml",
575            "conda.yml",
576            "*conda*.yaml",
577            "*conda*.yml",
578            "env.yaml",
579            "env.yml",
580            "*env*.yaml",
581            "*env*.yml",
582            "*environment*.yaml",
583            "*environment*.yml",
584            "*.json",
585        ],
586        mode: AssemblyMode::SiblingMerge,
587    },
588    // RPM specfile (source packages)
589    AssemblerConfig {
590        datasource_ids: &[DatasourceId::RpmSpecfile],
591        sibling_file_patterns: &["*.spec"],
592        mode: AssemblyMode::OnePerPackageData,
593    },
594    // Debian source packages (nested merge via debian/ directory)
595    AssemblerConfig {
596        datasource_ids: &[
597            DatasourceId::DebianControlInSource,
598            DatasourceId::DebianCopyrightInSource,
599        ],
600        sibling_file_patterns: &["control", "copyright"],
601        mode: AssemblyMode::SiblingMerge,
602    },
603    // Gradle/Android ecosystem
604    AssemblerConfig {
605        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
606        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
607        mode: AssemblyMode::SiblingMerge,
608    },
609    AssemblerConfig {
610        datasource_ids: &[DatasourceId::GradleModule],
611        sibling_file_patterns: &["*.module"],
612        mode: AssemblyMode::OnePerPackageData,
613    },
614    // CPAN/Perl ecosystem
615    AssemblerConfig {
616        datasource_ids: &[
617            DatasourceId::CpanMetaJson,
618            DatasourceId::CpanMetaYml,
619            DatasourceId::CpanManifest,
620            DatasourceId::CpanDistIni,
621            DatasourceId::CpanMakefile,
622        ],
623        sibling_file_patterns: &[
624            "META.json",
625            "META.yml",
626            "MANIFEST",
627            "dist.ini",
628            "Makefile.PL",
629        ],
630        mode: AssemblyMode::SiblingMerge,
631    },
632    // NuGet/.NET ecosystem
633    AssemblerConfig {
634        datasource_ids: &[
635            DatasourceId::NugetCsproj,
636            DatasourceId::NugetFsproj,
637            DatasourceId::NugetNuspec,
638            DatasourceId::NugetNupkg,
639            DatasourceId::NugetProjectJson,
640            DatasourceId::NugetProjectLockJson,
641            DatasourceId::NugetPackagesConfig,
642            DatasourceId::NugetPackagesLock,
643            DatasourceId::NugetVbproj,
644        ],
645        sibling_file_patterns: &[
646            "*.csproj",
647            "*.fsproj",
648            "*.nuspec",
649            "*.nupkg",
650            "project.json",
651            "project.lock.json",
652            "packages.config",
653            "packages.lock.json",
654            "*.packages.lock.json",
655            "*.vbproj",
656        ],
657        mode: AssemblyMode::SiblingMerge,
658    },
659    AssemblerConfig {
660        datasource_ids: &[DatasourceId::NugetDepsJson],
661        sibling_file_patterns: &["*.deps.json"],
662        mode: AssemblyMode::OnePerPackageData,
663    },
664    // Swift/SPM ecosystem
665    AssemblerConfig {
666        datasource_ids: &[
667            DatasourceId::SwiftPackageManifestJson,
668            DatasourceId::SwiftPackageResolved,
669            DatasourceId::SwiftPackageShowDependencies,
670        ],
671        sibling_file_patterns: &[
672            "Package.swift.json",
673            "Package.swift.deplock",
674            "Package.resolved",
675            ".package.resolved",
676            "swift-show-dependencies.deplock",
677        ],
678        mode: AssemblyMode::SiblingMerge,
679    },
680    // ── Standalone assemblers (single file → single package) ──
681    //
682    // These ecosystems have only one manifest file type with no sibling merging.
683    // They still need configs so their datasource_ids are recognized by the assembler.
684    //
685    // Bower (JavaScript)
686    AssemblerConfig {
687        datasource_ids: &[DatasourceId::BowerJson],
688        sibling_file_patterns: &["bower.json"],
689        mode: AssemblyMode::SiblingMerge,
690    },
691    // CRAN (R language)
692    AssemblerConfig {
693        datasource_ids: &[DatasourceId::CranDescription],
694        sibling_file_patterns: &["DESCRIPTION"],
695        mode: AssemblyMode::SiblingMerge,
696    },
697    // FreeBSD packages
698    AssemblerConfig {
699        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
700        sibling_file_patterns: &["+COMPACT_MANIFEST"],
701        mode: AssemblyMode::SiblingMerge,
702    },
703    // Haxe ecosystem
704    AssemblerConfig {
705        datasource_ids: &[DatasourceId::HaxelibJson],
706        sibling_file_patterns: &["haxelib.json"],
707        mode: AssemblyMode::SiblingMerge,
708    },
709    AssemblerConfig {
710        datasource_ids: &[DatasourceId::Gitmodules],
711        sibling_file_patterns: &[".gitmodules"],
712        mode: AssemblyMode::SiblingMerge,
713    },
714    // OCaml/opam ecosystem
715    AssemblerConfig {
716        datasource_ids: &[DatasourceId::OpamFile],
717        sibling_file_patterns: &["opam", "*.opam"],
718        mode: AssemblyMode::SiblingMerge,
719    },
720    // RPM Mariner manifest
721    AssemblerConfig {
722        datasource_ids: &[DatasourceId::RpmMarinerManifest],
723        sibling_file_patterns: &["*.rpm.manifest"],
724        mode: AssemblyMode::SiblingMerge,
725    },
726    AssemblerConfig {
727        datasource_ids: &[DatasourceId::RpmYumdb],
728        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
729        mode: AssemblyMode::OnePerPackageData,
730    },
731    // Microsoft Update Manifest
732    AssemblerConfig {
733        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
734        sibling_file_patterns: &["*.mum"],
735        mode: AssemblyMode::SiblingMerge,
736    },
737    // Autotools (C/C++ build system)
738    AssemblerConfig {
739        datasource_ids: &[DatasourceId::AutotoolsConfigure],
740        sibling_file_patterns: &["configure", "configure.ac"],
741        mode: AssemblyMode::SiblingMerge,
742    },
743    // Bazel (build system)
744    AssemblerConfig {
745        datasource_ids: &[DatasourceId::BazelBuild],
746        sibling_file_patterns: &["BUILD"],
747        mode: AssemblyMode::SiblingMerge,
748    },
749    AssemblerConfig {
750        datasource_ids: &[DatasourceId::BazelModule],
751        sibling_file_patterns: &["MODULE.bazel"],
752        mode: AssemblyMode::OnePerPackageData,
753    },
754    // Buck (build system)
755    AssemblerConfig {
756        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
757        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
758        mode: AssemblyMode::SiblingMerge,
759    },
760    // Ant/Ivy (Java dependency management)
761    AssemblerConfig {
762        datasource_ids: &[DatasourceId::AntIvyXml],
763        sibling_file_patterns: &["ivy.xml"],
764        mode: AssemblyMode::SiblingMerge,
765    },
766    // Meteor (JavaScript platform)
767    AssemblerConfig {
768        datasource_ids: &[DatasourceId::MeteorPackage],
769        sibling_file_patterns: &["package.js"],
770        mode: AssemblyMode::SiblingMerge,
771    },
772    // ── One-per-PackageData assemblers (database files with many packages) ──
773    //
774    // Alpine installed package database
775    AssemblerConfig {
776        datasource_ids: &[DatasourceId::AlpineInstalledDb],
777        sibling_file_patterns: &["installed"],
778        mode: AssemblyMode::OnePerPackageData,
779    },
780    AssemblerConfig {
781        datasource_ids: &[DatasourceId::AlpineApkbuild],
782        sibling_file_patterns: &["APKBUILD"],
783        mode: AssemblyMode::SiblingMerge,
784    },
785    // RPM installed package databases (BDB, NDB, SQLite)
786    AssemblerConfig {
787        datasource_ids: &[
788            DatasourceId::RpmInstalledDatabaseBdb,
789            DatasourceId::RpmInstalledDatabaseNdb,
790            DatasourceId::RpmInstalledDatabaseSqlite,
791        ],
792        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
793        mode: AssemblyMode::OnePerPackageData,
794    },
795    AssemblerConfig {
796        datasource_ids: &[DatasourceId::RpmArchive],
797        sibling_file_patterns: &["*.rpm", "*.srpm"],
798        mode: AssemblyMode::OnePerPackageData,
799    },
800    // Debian installed package databases
801    AssemblerConfig {
802        datasource_ids: &[DatasourceId::DebianDeb],
803        sibling_file_patterns: &["*.deb"],
804        mode: AssemblyMode::OnePerPackageData,
805    },
806    AssemblerConfig {
807        datasource_ids: &[
808            DatasourceId::DebianInstalledStatusDb,
809            DatasourceId::DebianDistrolessInstalledDb,
810        ],
811        sibling_file_patterns: &["status"],
812        mode: AssemblyMode::OnePerPackageData,
813    },
814    AssemblerConfig {
815        datasource_ids: &[
816            DatasourceId::DebianControlExtractedDeb,
817            DatasourceId::DebianMd5SumsInExtractedDeb,
818        ],
819        sibling_file_patterns: &["control", "md5sums"],
820        mode: AssemblyMode::SiblingMerge,
821    },
822    AssemblerConfig {
823        datasource_ids: &[DatasourceId::DebianSourceControlDsc],
824        sibling_file_patterns: &["*.dsc"],
825        mode: AssemblyMode::OnePerPackageData,
826    },
827    AssemblerConfig {
828        datasource_ids: &[DatasourceId::AboutFile],
829        sibling_file_patterns: &["*.ABOUT"],
830        mode: AssemblyMode::OnePerPackageData,
831    },
832    AssemblerConfig {
833        datasource_ids: &[
834            DatasourceId::BitbakeRecipe,
835            DatasourceId::BitbakeRecipeAppend,
836        ],
837        sibling_file_patterns: &["*.bb", "*.bbappend"],
838        mode: AssemblyMode::SiblingMerge,
839    },
840];
841
842// Datasource IDs intentionally excluded from package assembly.
843//
844// This list is runtime-significant: files with these datasource IDs may remain
845// unowned by any Package, while their dependencies are still eligible for
846// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
847pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
848    // Non-package metadata
849    DatasourceId::Readme,
850    DatasourceId::EtcOsRelease,
851    // Binary archives (require external extraction via ExtractCode before scanning)
852    DatasourceId::AlpineApkArchive,
853    DatasourceId::AndroidAab,
854    DatasourceId::AndroidAarLibrary,
855    DatasourceId::AndroidApk,
856    DatasourceId::AndroidManifestXml,
857    DatasourceId::AndroidSoongMetadata,
858    DatasourceId::AppleDmg,
859    DatasourceId::Axis2Mar,
860    DatasourceId::ChromeCrx,
861    DatasourceId::DebianOriginalSourceTarball,
862    DatasourceId::DebianSourceMetadataTarball,
863    DatasourceId::InstallshieldInstaller,
864    DatasourceId::IosIpa,
865    DatasourceId::IsoDiskImage,
866    DatasourceId::JavaEarArchive,
867    DatasourceId::JavaJar,
868    DatasourceId::JavaWarArchive,
869    DatasourceId::JbossSar,
870    DatasourceId::MicrosoftCabinet,
871    DatasourceId::MozillaXpi,
872    DatasourceId::NsisInstaller,
873    DatasourceId::SharShellArchive,
874    DatasourceId::SquashfsDiskImage,
875    // Supplementary metadata (not primary package definitions)
876    DatasourceId::ArchAurinfo,
877    DatasourceId::ArchPkginfo,
878    DatasourceId::ArchSrcinfo,
879    DatasourceId::Axis2ModuleXml,
880    DatasourceId::ClojureDepsEdn,
881    DatasourceId::ClojureProjectClj,
882    DatasourceId::DebianInstalledFilesList,
883    DatasourceId::DebianInstalledMd5Sums,
884    DatasourceId::DebianCopyright,
885    DatasourceId::DebianCopyrightInPackage,
886    DatasourceId::DebianCopyrightStandalone,
887    DatasourceId::GoBinary,
888    DatasourceId::WindowsExecutable,
889    DatasourceId::Dockerfile,
890    DatasourceId::HexMixLock,
891    DatasourceId::JavaEarApplicationXml,
892    DatasourceId::JavaWarWebXml,
893    DatasourceId::JbossServiceXml,
894    DatasourceId::MesonBuild,
895    DatasourceId::GemGemspecInstalledSpecifications,
896    DatasourceId::NugetDirectoryBuildProps,
897    DatasourceId::NugetDirectoryPackagesProps,
898    DatasourceId::CitationCff,
899    DatasourceId::PubliccodeYaml,
900    DatasourceId::RpmPackageLicenses,
901    DatasourceId::RustBinary,
902    DatasourceId::SbtBuildSbt,
903    DatasourceId::VcpkgJson,
904];
905
906#[cfg(test)]
907mod tests {
908    use super::*;
909    use std::collections::HashSet;
910    use strum::IntoEnumIterator;
911
912    #[test]
913    fn test_every_datasource_id_is_accounted_for() {
914        let mut assembled: HashSet<DatasourceId> = HashSet::new();
915        for config in ASSEMBLERS {
916            for &dsid in config.datasource_ids {
917                assembled.insert(dsid);
918            }
919        }
920
921        let unassembled: HashSet<DatasourceId> =
922            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
923
924        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
925        assert!(
926            overlap.is_empty(),
927            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
928        );
929
930        let missing: Vec<_> = DatasourceId::iter()
931            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
932            .collect();
933
934        assert!(
935            missing.is_empty(),
936            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
937             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
938        );
939    }
940
941    #[test]
942    fn test_post_assembly_passes_are_unique() {
943        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
944
945        assert_eq!(
946            unique.len(),
947            POST_ASSEMBLY_PASSES.len(),
948            "POST_ASSEMBLY_PASSES contains duplicate entries"
949        );
950    }
951
952    #[test]
953    fn test_every_post_assembly_pass_kind_is_registered_once() {
954        let registered: HashSet<PostAssemblyPassKind> =
955            POST_ASSEMBLY_PASSES.iter().copied().collect();
956
957        let missing: Vec<_> = PostAssemblyPassKind::iter()
958            .filter(|pass| !registered.contains(pass))
959            .collect();
960
961        assert!(
962            missing.is_empty(),
963            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
964        );
965
966        for pass in PostAssemblyPassKind::iter() {
967            let count = POST_ASSEMBLY_PASSES
968                .iter()
969                .filter(|registered| **registered == pass)
970                .count();
971            assert_eq!(
972                count, 1,
973                "Post-assembly pass {pass:?} should be registered exactly once"
974            );
975        }
976    }
977
978    #[test]
979    fn test_post_assembly_passes_skip_irrelevant_inputs() {
980        let inputs = PostAssemblyInputs::default();
981
982        for pass in PostAssemblyPassKind::iter() {
983            assert!(
984                !pass.should_run(&inputs),
985                "{pass:?} should skip when no relevant inputs are present"
986            );
987        }
988    }
989
990    #[test]
991    fn test_npm_workspace_inputs_only_run_npm_passes() {
992        let inputs = PostAssemblyInputs {
993            package_types: HashSet::from([PackageType::Npm]),
994            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
995            has_npm_workspace_markers: true,
996            has_cargo_workspace_markers: false,
997        };
998
999        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
1000            .filter(|pass| pass.should_run(&inputs))
1001            .collect();
1002
1003        assert_eq!(
1004            runnable,
1005            HashSet::from([
1006                PostAssemblyPassKind::NpmResourceAssign,
1007                PostAssemblyPassKind::NpmWorkspaceMerge,
1008            ])
1009        );
1010    }
1011
1012    #[test]
1013    fn test_cargo_workspace_merge_requires_workspace_markers() {
1014        let without_markers = PostAssemblyInputs {
1015            package_types: HashSet::from([PackageType::Cargo]),
1016            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
1017            has_npm_workspace_markers: false,
1018            has_cargo_workspace_markers: false,
1019        };
1020
1021        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
1022
1023        let with_markers = PostAssemblyInputs {
1024            has_cargo_workspace_markers: true,
1025            ..without_markers
1026        };
1027
1028        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
1029    }
1030}