Skip to main content

provenant/assembly/
assemblers.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashSet;
5
6use crate::models::PackageType;
7use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
8use strum::EnumIter;
9
10use super::{
11    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
12    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, debian_source_merge,
13    file_ref_resolve, hackage_merge, nix_flake_compat_merge, npm_resource_assign,
14    nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign, swift_merge, topology,
15};
16
17#[derive(Clone, Copy)]
18pub(super) enum SpecialDirectoryMergerKind {
19    Skip,
20    Bazel,
21    DebianSource,
22    Hackage,
23}
24
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
26pub(super) enum PostAssemblyPassKind {
27    SwiftMerge,
28    CondaRootfsMerge,
29    NpmResourceAssign,
30    PythonRequirementsAssign,
31    FileReferenceResolve,
32    RpmYumdbMerge,
33    NpmWorkspaceMerge,
34    CargoWorkspaceMerge,
35    NugetCpmResolve,
36    CargoResourceAssign,
37    ComposerResourceAssign,
38    RubyResourceAssign,
39    NixFlakeCompatMerge,
40    BazelPrune,
41}
42
43pub(super) fn special_directory_merger_for(
44    config_key: DatasourceId,
45) -> Option<SpecialDirectoryMergerKind> {
46    match config_key {
47        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
48        DatasourceId::DebianControlInSource => Some(SpecialDirectoryMergerKind::DebianSource),
49        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
50        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
51        _ => None,
52    }
53}
54
55pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
56    PostAssemblyPassKind::SwiftMerge,
57    PostAssemblyPassKind::CondaRootfsMerge,
58    PostAssemblyPassKind::NpmResourceAssign,
59    PostAssemblyPassKind::PythonRequirementsAssign,
60    PostAssemblyPassKind::FileReferenceResolve,
61    PostAssemblyPassKind::RpmYumdbMerge,
62    PostAssemblyPassKind::NpmWorkspaceMerge,
63    PostAssemblyPassKind::CargoWorkspaceMerge,
64    PostAssemblyPassKind::NugetCpmResolve,
65    PostAssemblyPassKind::CargoResourceAssign,
66    PostAssemblyPassKind::ComposerResourceAssign,
67    PostAssemblyPassKind::RubyResourceAssign,
68    PostAssemblyPassKind::NixFlakeCompatMerge,
69    PostAssemblyPassKind::BazelPrune,
70];
71
72const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
73    DatasourceId::SwiftPackageManifestJson,
74    DatasourceId::SwiftPackageResolved,
75    DatasourceId::SwiftPackageShowDependencies,
76];
77
78const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
79    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
80
81const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
82    DatasourceId::RpmInstalledDatabaseBdb,
83    DatasourceId::RpmInstalledDatabaseNdb,
84    DatasourceId::RpmInstalledDatabaseSqlite,
85];
86
87const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
88    DatasourceId::NugetDirectoryBuildProps,
89    DatasourceId::NugetDirectoryPackagesProps,
90];
91
92const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
93    DatasourceId::NugetCsproj,
94    DatasourceId::NugetFsproj,
95    DatasourceId::NugetVbproj,
96];
97
98#[derive(Default)]
99struct PostAssemblyInputs {
100    package_types: HashSet<PackageType>,
101    file_datasource_ids: HashSet<DatasourceId>,
102    has_npm_workspace_markers: bool,
103    has_cargo_workspace_markers: bool,
104}
105
106pub(super) fn run_post_assembly_passes(
107    files: &mut [FileInfo],
108    packages: &mut Vec<Package>,
109    dependencies: &mut Vec<TopLevelDependency>,
110    topology_plan: &topology::TopologyPlan,
111) {
112    let inputs = PostAssemblyInputs::collect(files, packages);
113
114    for pass in POST_ASSEMBLY_PASSES {
115        if !pass.should_run(&inputs) {
116            continue;
117        }
118
119        pass.run(files, packages, dependencies, topology_plan);
120    }
121}
122
123impl PostAssemblyInputs {
124    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
125        let mut inputs = Self {
126            package_types: packages
127                .iter()
128                .filter_map(|package| package.package_type)
129                .collect(),
130            ..Self::default()
131        };
132
133        for file in files {
134            for package_data in &file.package_data {
135                let Some(datasource_id) = package_data.datasource_id else {
136                    continue;
137                };
138
139                inputs.file_datasource_ids.insert(datasource_id);
140
141                if matches!(
142                    datasource_id,
143                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
144                ) && package_data
145                    .extra_data
146                    .as_ref()
147                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
148                {
149                    inputs.has_npm_workspace_markers = true;
150                }
151
152                if datasource_id == DatasourceId::CargoToml
153                    && package_data
154                        .extra_data
155                        .as_ref()
156                        .and_then(|extra_data| extra_data.get("workspace"))
157                        .and_then(|workspace| workspace.get("members"))
158                        .and_then(|members| members.as_array())
159                        .is_some_and(|members| !members.is_empty())
160                {
161                    inputs.has_cargo_workspace_markers = true;
162                }
163            }
164        }
165
166        inputs
167    }
168
169    fn has_package_type(&self, package_type: PackageType) -> bool {
170        self.package_types.contains(&package_type)
171    }
172
173    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
174        datasource_ids
175            .iter()
176            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
177    }
178
179    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
180        datasource_ids
181            .iter()
182            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
183    }
184}
185
186impl SpecialDirectoryMergerKind {
187    pub(super) fn run(
188        self,
189        config: &AssemblerConfig,
190        files: &[FileInfo],
191        file_indices: &[usize],
192    ) -> Vec<DirectoryMergeOutput> {
193        match self {
194            Self::Skip => Vec::new(),
195            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
196            Self::DebianSource => {
197                debian_source_merge::assemble_debian_source_packages(config, files, file_indices)
198            }
199            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
200        }
201    }
202}
203
204impl PostAssemblyPassKind {
205    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
206        match self {
207            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
208            Self::CondaRootfsMerge => {
209                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
210            }
211            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
212            Self::PythonRequirementsAssign => {
213                inputs.has_package_type(PackageType::Pypi)
214                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
215            }
216            Self::FileReferenceResolve => {
217                file_ref_resolve::has_relevant_file_reference_datasource_ids(
218                    &inputs.file_datasource_ids,
219                )
220            }
221            Self::RpmYumdbMerge => {
222                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
223                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
224            }
225            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
226            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
227            Self::NugetCpmResolve => {
228                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
229                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
230            }
231            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
232            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
233            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
234            Self::NixFlakeCompatMerge => {
235                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
236                    && inputs.has_any_file_datasource(&[
237                        DatasourceId::NixFlakeNix,
238                        DatasourceId::NixFlakeLock,
239                    ])
240            }
241            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
242        }
243    }
244
245    fn run(
246        self,
247        files: &mut [FileInfo],
248        packages: &mut Vec<Package>,
249        dependencies: &mut Vec<TopLevelDependency>,
250        topology_plan: &topology::TopologyPlan,
251    ) {
252        match self {
253            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
254            Self::CondaRootfsMerge => {
255                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
256            }
257            Self::NpmResourceAssign => {
258                npm_resource_assign::assign_npm_package_resources(files, packages)
259            }
260            Self::PythonRequirementsAssign => {
261                python_requirements_assign::assign_python_requirements_to_projects(
262                    files,
263                    packages,
264                    dependencies,
265                )
266            }
267            Self::FileReferenceResolve => {
268                file_ref_resolve::resolve_file_references(files, packages, dependencies)
269            }
270            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
271            Self::NpmWorkspaceMerge => {
272                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
273            }
274            Self::CargoWorkspaceMerge => {
275                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
276            }
277            Self::NugetCpmResolve => {
278                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
279            }
280            Self::CargoResourceAssign => {
281                cargo_resource_assign::assign_cargo_package_resources(files, packages)
282            }
283            Self::ComposerResourceAssign => {
284                composer_resource_assign::assign_composer_package_resources(files, packages)
285            }
286            Self::RubyResourceAssign => {
287                ruby_resource_assign::assign_ruby_package_resources(files, packages)
288            }
289            Self::NixFlakeCompatMerge => {
290                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
291            }
292            Self::BazelPrune => {
293                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
294            }
295        }
296    }
297}
298
299pub static ASSEMBLERS: &[AssemblerConfig] = &[
300    // ── Sibling-merge assemblers ──
301    //
302    // npm ecosystem: package.json + lockfiles in same directory.
303    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
304    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
305    AssemblerConfig {
306        datasource_ids: &[
307            DatasourceId::BunLock,
308            DatasourceId::BunLockb,
309            DatasourceId::NpmPackageJson,
310            DatasourceId::NpmPackageLockJson,
311            DatasourceId::YarnLock,
312            DatasourceId::YarnLockV1,
313            DatasourceId::YarnLockV2,
314            DatasourceId::YarnPnpCjs,
315            DatasourceId::PnpmLockYaml,
316            DatasourceId::PnpmWorkspaceYaml,
317        ],
318        sibling_file_patterns: &[
319            "package.json",
320            "bun.lock",
321            "bun.lockb",
322            ".package-lock.json",
323            "package-lock.json",
324            ".npm-shrinkwrap.json",
325            "npm-shrinkwrap.json",
326            "yarn.lock",
327            ".pnp.cjs",
328            "pnpm-lock.yaml",
329            "shrinkwrap.yaml",
330            "pnpm-workspace.yaml",
331        ],
332        mode: AssemblyMode::SiblingMerge,
333    },
334    // Rust/Cargo ecosystem
335    AssemblerConfig {
336        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
337        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
338        mode: AssemblyMode::SiblingMerge,
339    },
340    // Julia ecosystem
341    AssemblerConfig {
342        datasource_ids: &[
343            DatasourceId::JuliaProjectToml,
344            DatasourceId::JuliaManifestToml,
345        ],
346        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
347        mode: AssemblyMode::SiblingMerge,
348    },
349    // Erlang/OTP Rebar ecosystem
350    AssemblerConfig {
351        datasource_ids: &[DatasourceId::RebarConfig, DatasourceId::RebarLock],
352        sibling_file_patterns: &["rebar.config", "rebar.lock"],
353        mode: AssemblyMode::SiblingMerge,
354    },
355    // Carthage ecosystem
356    AssemblerConfig {
357        datasource_ids: &[
358            DatasourceId::CarthageCartfile,
359            DatasourceId::CarthageCartfileResolved,
360        ],
361        sibling_file_patterns: &["Cartfile", "Cartfile.private", "Cartfile.resolved"],
362        mode: AssemblyMode::SiblingMerge,
363    },
364    // CocoaPods ecosystem
365    AssemblerConfig {
366        datasource_ids: &[
367            DatasourceId::CocoapodsPodspec,
368            DatasourceId::CocoapodsPodspecJson,
369            DatasourceId::CocoapodsPodfile,
370            DatasourceId::CocoapodsPodfileLock,
371        ],
372        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
373        mode: AssemblyMode::SiblingMerge,
374    },
375    // PHP Composer ecosystem
376    AssemblerConfig {
377        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
378        sibling_file_patterns: &[
379            "*composer.json",
380            "composer.*.json",
381            "*composer.lock",
382            "composer.*.lock",
383        ],
384        mode: AssemblyMode::SiblingMerge,
385    },
386    // Go ecosystem (includes legacy Godeps)
387    AssemblerConfig {
388        datasource_ids: &[
389            DatasourceId::GoMod,
390            DatasourceId::GoModGraph,
391            DatasourceId::GoSum,
392            DatasourceId::GoWork,
393            DatasourceId::Godeps,
394        ],
395        sibling_file_patterns: &[
396            "go.mod",
397            "go.work",
398            "go.mod.graph",
399            "go.modgraph",
400            "go.sum",
401            "Godeps.json",
402        ],
403        mode: AssemblyMode::SiblingMerge,
404    },
405    // Dart/Flutter ecosystem
406    AssemblerConfig {
407        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
408        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
409        mode: AssemblyMode::SiblingMerge,
410    },
411    // Pixi ecosystem
412    AssemblerConfig {
413        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
414        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
415        mode: AssemblyMode::SiblingMerge,
416    },
417    AssemblerConfig {
418        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
419        sibling_file_patterns: &["flake.nix", "flake.lock"],
420        mode: AssemblyMode::SiblingMerge,
421    },
422    AssemblerConfig {
423        datasource_ids: &[DatasourceId::NixDefaultNix],
424        sibling_file_patterns: &["default.nix"],
425        mode: AssemblyMode::OnePerPackageData,
426    },
427    // Helm chart ecosystem
428    AssemblerConfig {
429        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
430        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
431        mode: AssemblyMode::SiblingMerge,
432    },
433    AssemblerConfig {
434        datasource_ids: &[
435            DatasourceId::HackageCabal,
436            DatasourceId::HackageCabalProject,
437            DatasourceId::HackageStackYaml,
438        ],
439        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
440        mode: AssemblyMode::SiblingMerge,
441    },
442    // Chef ecosystem
443    AssemblerConfig {
444        datasource_ids: &[
445            DatasourceId::ChefCookbookMetadataJson,
446            DatasourceId::ChefCookbookMetadataRb,
447        ],
448        sibling_file_patterns: &["metadata.json", "metadata.rb"],
449        mode: AssemblyMode::SiblingMerge,
450    },
451    // Conan (C/C++) ecosystem
452    AssemblerConfig {
453        datasource_ids: &[
454            DatasourceId::ConanConanFilePy,
455            DatasourceId::ConanConanFileTxt,
456            DatasourceId::ConanLock,
457            DatasourceId::ConanConanDataYml,
458        ],
459        sibling_file_patterns: &[
460            "conanfile.py",
461            "conanfile.txt",
462            "conan.lock",
463            "conandata.yml",
464        ],
465        mode: AssemblyMode::SiblingMerge,
466    },
467    // Maven/Java ecosystem (nested merge via META-INF)
468    AssemblerConfig {
469        datasource_ids: &[
470            DatasourceId::MavenPom,
471            DatasourceId::MavenPomProperties,
472            DatasourceId::JavaJarManifest,
473            DatasourceId::JavaOsgiManifest,
474        ],
475        sibling_file_patterns: &[
476            "pom.xml",
477            "*.pom",
478            "pom.properties",
479            "**/META-INF/MANIFEST.MF",
480        ],
481        mode: AssemblyMode::SiblingMerge,
482    },
483    AssemblerConfig {
484        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
485        sibling_file_patterns: &["*.whl", "origin.json"],
486        mode: AssemblyMode::SiblingMerge,
487    },
488    // Python/PyPI ecosystem
489    AssemblerConfig {
490        datasource_ids: &[
491            DatasourceId::PypiPyprojectToml,
492            DatasourceId::PypiPoetryPyprojectToml,
493            DatasourceId::PypiSetupPy,
494            DatasourceId::PypiSetupCfg,
495            DatasourceId::PypiWheel,
496            DatasourceId::PypiWheelMetadata,
497            DatasourceId::PypiEgg,
498            DatasourceId::PypiEggPkginfo,
499            DatasourceId::PypiEditableEggPkginfo,
500            DatasourceId::PypiJson,
501            DatasourceId::PypiSdist,
502            DatasourceId::PypiSdistPkginfo,
503            DatasourceId::PypiInspectDeplock,
504            DatasourceId::PipRequirements,
505            DatasourceId::PypiPoetryLock,
506            DatasourceId::PypiPylockToml,
507            DatasourceId::PypiUvLock,
508            DatasourceId::Pipfile,
509            DatasourceId::PipfileLock,
510        ],
511        sibling_file_patterns: &[
512            "pyproject.toml",
513            "setup.py",
514            "setup.cfg",
515            "PKG-INFO",
516            "METADATA",
517            "pypi.json",
518            "pip-inspect.deplock",
519            "*.tar.gz",
520            "*.tgz",
521            "*.tar.bz2",
522            "*.tar.xz",
523            "*.zip",
524            "requirements*.txt",
525            "Pipfile",
526            "Pipfile.lock",
527            "poetry.lock",
528            "pylock.toml",
529            "pylock.*.toml",
530            "uv.lock",
531        ],
532        mode: AssemblyMode::SiblingMerge,
533    },
534    AssemblerConfig {
535        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
536        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
537        mode: AssemblyMode::SiblingMerge,
538    },
539    // Ruby/RubyGems ecosystem
540    AssemblerConfig {
541        datasource_ids: &[
542            DatasourceId::GemArchiveExtracted,
543            DatasourceId::Gemspec,
544            DatasourceId::GemspecExtracted,
545            DatasourceId::Gemfile,
546            DatasourceId::GemfileExtracted,
547            DatasourceId::GemfileLock,
548            DatasourceId::GemfileLockExtracted,
549        ],
550        sibling_file_patterns: &[
551            "metadata.gz-extract",
552            "**/data.gz-extract/*.gemspec",
553            "**/data.gz-extract/Gemfile",
554            "**/data.gz-extract/Gemfile.lock",
555            "*.gemspec",
556            "Gemfile",
557            "Gemfile.lock",
558        ],
559        mode: AssemblyMode::SiblingMerge,
560    },
561    AssemblerConfig {
562        datasource_ids: &[DatasourceId::GemArchive],
563        sibling_file_patterns: &["*.gem"],
564        mode: AssemblyMode::OnePerPackageData,
565    },
566    // Conda ecosystem
567    AssemblerConfig {
568        datasource_ids: &[
569            DatasourceId::CondaMetaYaml,
570            DatasourceId::CondaYaml,
571            DatasourceId::CondaMetaJson,
572        ],
573        sibling_file_patterns: &[
574            "meta.yaml",
575            "meta.yml",
576            "recipe.yaml",
577            "recipe.yml",
578            "environment.yml",
579            "environment.yaml",
580            "conda.yaml",
581            "conda.yml",
582            "*conda*.yaml",
583            "*conda*.yml",
584            "env.yaml",
585            "env.yml",
586            "*env*.yaml",
587            "*env*.yml",
588            "*environment*.yaml",
589            "*environment*.yml",
590            "*.json",
591        ],
592        mode: AssemblyMode::SiblingMerge,
593    },
594    // RPM specfile (source packages)
595    AssemblerConfig {
596        datasource_ids: &[DatasourceId::RpmSpecfile],
597        sibling_file_patterns: &["*.spec"],
598        mode: AssemblyMode::OnePerPackageData,
599    },
600    // Debian source packages (nested merge via debian/ directory)
601    AssemblerConfig {
602        datasource_ids: &[
603            DatasourceId::DebianControlInSource,
604            DatasourceId::DebianCopyrightInSource,
605        ],
606        sibling_file_patterns: &["control", "copyright"],
607        mode: AssemblyMode::SiblingMerge,
608    },
609    // Gradle/Android ecosystem
610    AssemblerConfig {
611        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
612        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
613        mode: AssemblyMode::SiblingMerge,
614    },
615    AssemblerConfig {
616        datasource_ids: &[DatasourceId::GradleModule],
617        sibling_file_patterns: &["*.module"],
618        mode: AssemblyMode::OnePerPackageData,
619    },
620    // CPAN/Perl ecosystem
621    AssemblerConfig {
622        datasource_ids: &[
623            DatasourceId::CpanMetaJson,
624            DatasourceId::CpanMetaYml,
625            DatasourceId::CpanManifest,
626            DatasourceId::CpanDistIni,
627            DatasourceId::CpanMakefile,
628        ],
629        sibling_file_patterns: &[
630            "META.json",
631            "META.yml",
632            "MANIFEST",
633            "dist.ini",
634            "Makefile.PL",
635        ],
636        mode: AssemblyMode::SiblingMerge,
637    },
638    // NuGet/.NET ecosystem
639    AssemblerConfig {
640        datasource_ids: &[
641            DatasourceId::NugetCsproj,
642            DatasourceId::NugetFsproj,
643            DatasourceId::NugetNuspec,
644            DatasourceId::NugetNupkg,
645            DatasourceId::NugetProjectJson,
646            DatasourceId::NugetProjectLockJson,
647            DatasourceId::NugetPackagesConfig,
648            DatasourceId::NugetPackagesLock,
649            DatasourceId::NugetVbproj,
650        ],
651        sibling_file_patterns: &[
652            "*.csproj",
653            "*.fsproj",
654            "*.nuspec",
655            "*.nupkg",
656            "project.json",
657            "project.lock.json",
658            "packages.config",
659            "packages.lock.json",
660            "*.packages.lock.json",
661            "*.vbproj",
662        ],
663        mode: AssemblyMode::SiblingMerge,
664    },
665    AssemblerConfig {
666        datasource_ids: &[DatasourceId::NugetDepsJson],
667        sibling_file_patterns: &["*.deps.json"],
668        mode: AssemblyMode::OnePerPackageData,
669    },
670    // Swift/SPM ecosystem
671    AssemblerConfig {
672        datasource_ids: &[
673            DatasourceId::SwiftPackageManifestJson,
674            DatasourceId::SwiftPackageResolved,
675            DatasourceId::SwiftPackageShowDependencies,
676        ],
677        sibling_file_patterns: &[
678            "Package.swift.json",
679            "Package.swift.deplock",
680            "Package.resolved",
681            ".package.resolved",
682            "swift-show-dependencies.deplock",
683        ],
684        mode: AssemblyMode::SiblingMerge,
685    },
686    // ── Standalone assemblers (single file → single package) ──
687    //
688    // These ecosystems have only one manifest file type with no sibling merging.
689    // They still need configs so their datasource_ids are recognized by the assembler.
690    //
691    // Bower (JavaScript)
692    AssemblerConfig {
693        datasource_ids: &[DatasourceId::BowerJson],
694        sibling_file_patterns: &["bower.json"],
695        mode: AssemblyMode::SiblingMerge,
696    },
697    // CRAN (R language)
698    AssemblerConfig {
699        datasource_ids: &[DatasourceId::CranDescription],
700        sibling_file_patterns: &["DESCRIPTION"],
701        mode: AssemblyMode::SiblingMerge,
702    },
703    // FreeBSD packages
704    AssemblerConfig {
705        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
706        sibling_file_patterns: &["+COMPACT_MANIFEST"],
707        mode: AssemblyMode::SiblingMerge,
708    },
709    // Haxe ecosystem
710    AssemblerConfig {
711        datasource_ids: &[DatasourceId::HaxelibJson],
712        sibling_file_patterns: &["haxelib.json"],
713        mode: AssemblyMode::SiblingMerge,
714    },
715    AssemblerConfig {
716        datasource_ids: &[DatasourceId::Gitmodules],
717        sibling_file_patterns: &[".gitmodules"],
718        mode: AssemblyMode::SiblingMerge,
719    },
720    // OCaml/opam ecosystem
721    AssemblerConfig {
722        datasource_ids: &[DatasourceId::OpamFile],
723        sibling_file_patterns: &["opam", "*.opam"],
724        mode: AssemblyMode::SiblingMerge,
725    },
726    // RPM Mariner manifest
727    AssemblerConfig {
728        datasource_ids: &[DatasourceId::RpmMarinerManifest],
729        sibling_file_patterns: &["*.rpm.manifest"],
730        mode: AssemblyMode::SiblingMerge,
731    },
732    AssemblerConfig {
733        datasource_ids: &[DatasourceId::RpmYumdb],
734        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
735        mode: AssemblyMode::OnePerPackageData,
736    },
737    // Microsoft Update Manifest
738    AssemblerConfig {
739        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
740        sibling_file_patterns: &["*.mum"],
741        mode: AssemblyMode::SiblingMerge,
742    },
743    // Autotools (C/C++ build system)
744    AssemblerConfig {
745        datasource_ids: &[DatasourceId::AutotoolsConfigure],
746        sibling_file_patterns: &["configure", "configure.ac"],
747        mode: AssemblyMode::SiblingMerge,
748    },
749    // Bazel (build system)
750    AssemblerConfig {
751        datasource_ids: &[DatasourceId::BazelBuild],
752        sibling_file_patterns: &["BUILD"],
753        mode: AssemblyMode::SiblingMerge,
754    },
755    AssemblerConfig {
756        datasource_ids: &[DatasourceId::BazelModule],
757        sibling_file_patterns: &["MODULE.bazel"],
758        mode: AssemblyMode::OnePerPackageData,
759    },
760    // Buck (build system)
761    AssemblerConfig {
762        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
763        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
764        mode: AssemblyMode::SiblingMerge,
765    },
766    // Ant/Ivy (Java dependency management)
767    AssemblerConfig {
768        datasource_ids: &[DatasourceId::AntIvyXml],
769        sibling_file_patterns: &["ivy.xml"],
770        mode: AssemblyMode::SiblingMerge,
771    },
772    // Meteor (JavaScript platform)
773    AssemblerConfig {
774        datasource_ids: &[DatasourceId::MeteorPackage],
775        sibling_file_patterns: &["package.js"],
776        mode: AssemblyMode::SiblingMerge,
777    },
778    // ── One-per-PackageData assemblers (database files with many packages) ──
779    //
780    // Alpine installed package database
781    AssemblerConfig {
782        datasource_ids: &[DatasourceId::AlpineInstalledDb],
783        sibling_file_patterns: &["installed"],
784        mode: AssemblyMode::OnePerPackageData,
785    },
786    AssemblerConfig {
787        datasource_ids: &[DatasourceId::AlpineApkbuild],
788        sibling_file_patterns: &["APKBUILD"],
789        mode: AssemblyMode::SiblingMerge,
790    },
791    // RPM installed package databases (BDB, NDB, SQLite)
792    AssemblerConfig {
793        datasource_ids: &[
794            DatasourceId::RpmInstalledDatabaseBdb,
795            DatasourceId::RpmInstalledDatabaseNdb,
796            DatasourceId::RpmInstalledDatabaseSqlite,
797        ],
798        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
799        mode: AssemblyMode::OnePerPackageData,
800    },
801    AssemblerConfig {
802        datasource_ids: &[DatasourceId::RpmArchive],
803        sibling_file_patterns: &["*.rpm", "*.srpm"],
804        mode: AssemblyMode::OnePerPackageData,
805    },
806    // Debian installed package databases
807    AssemblerConfig {
808        datasource_ids: &[DatasourceId::DebianDeb],
809        sibling_file_patterns: &["*.deb"],
810        mode: AssemblyMode::OnePerPackageData,
811    },
812    AssemblerConfig {
813        datasource_ids: &[
814            DatasourceId::DebianInstalledStatusDb,
815            DatasourceId::DebianDistrolessInstalledDb,
816        ],
817        sibling_file_patterns: &["status"],
818        mode: AssemblyMode::OnePerPackageData,
819    },
820    AssemblerConfig {
821        datasource_ids: &[
822            DatasourceId::DebianControlExtractedDeb,
823            DatasourceId::DebianMd5SumsInExtractedDeb,
824        ],
825        sibling_file_patterns: &["control", "md5sums"],
826        mode: AssemblyMode::SiblingMerge,
827    },
828    AssemblerConfig {
829        datasource_ids: &[DatasourceId::DebianSourceControlDsc],
830        sibling_file_patterns: &["*.dsc"],
831        mode: AssemblyMode::OnePerPackageData,
832    },
833    AssemblerConfig {
834        datasource_ids: &[DatasourceId::AboutFile],
835        sibling_file_patterns: &["*.ABOUT"],
836        mode: AssemblyMode::OnePerPackageData,
837    },
838    AssemblerConfig {
839        datasource_ids: &[
840            DatasourceId::BitbakeRecipe,
841            DatasourceId::BitbakeRecipeAppend,
842        ],
843        sibling_file_patterns: &["*.bb", "*.bbappend"],
844        mode: AssemblyMode::SiblingMerge,
845    },
846];
847
848// Datasource IDs intentionally excluded from package assembly.
849//
850// This list is runtime-significant: files with these datasource IDs may remain
851// unowned by any Package, while their dependencies are still eligible for
852// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
853pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
854    // Non-package metadata
855    DatasourceId::Readme,
856    DatasourceId::EtcOsRelease,
857    // Binary archives (require external extraction via ExtractCode before scanning)
858    DatasourceId::AlpineApkArchive,
859    DatasourceId::AndroidAab,
860    DatasourceId::AndroidAarLibrary,
861    DatasourceId::AndroidApk,
862    DatasourceId::AndroidManifestXml,
863    DatasourceId::AndroidSoongMetadata,
864    DatasourceId::AppleDmg,
865    DatasourceId::Axis2Mar,
866    DatasourceId::ChromeCrx,
867    DatasourceId::DebianOriginalSourceTarball,
868    DatasourceId::DebianSourceMetadataTarball,
869    DatasourceId::InstallshieldInstaller,
870    DatasourceId::IosIpa,
871    DatasourceId::IsoDiskImage,
872    DatasourceId::JavaEarArchive,
873    DatasourceId::JavaJar,
874    DatasourceId::JavaWarArchive,
875    DatasourceId::JbossSar,
876    DatasourceId::MicrosoftCabinet,
877    DatasourceId::MozillaXpi,
878    DatasourceId::NsisInstaller,
879    DatasourceId::SharShellArchive,
880    DatasourceId::SquashfsDiskImage,
881    // Supplementary metadata (not primary package definitions)
882    DatasourceId::ArchAurinfo,
883    DatasourceId::ArchPkginfo,
884    DatasourceId::ArchSrcinfo,
885    DatasourceId::Axis2ModuleXml,
886    DatasourceId::ClojureDepsEdn,
887    DatasourceId::ClojureProjectClj,
888    DatasourceId::DebianInstalledFilesList,
889    DatasourceId::DebianInstalledMd5Sums,
890    DatasourceId::DebianCopyright,
891    DatasourceId::DebianCopyrightInPackage,
892    DatasourceId::DebianCopyrightStandalone,
893    DatasourceId::GoBinary,
894    DatasourceId::WindowsExecutable,
895    DatasourceId::Dockerfile,
896    DatasourceId::ErlangOtpAppSrc,
897    DatasourceId::HexMixLock,
898    DatasourceId::JavaEarApplicationXml,
899    DatasourceId::JavaWarWebXml,
900    DatasourceId::JbossServiceXml,
901    DatasourceId::MesonBuild,
902    DatasourceId::GemGemspecInstalledSpecifications,
903    DatasourceId::NugetDirectoryBuildProps,
904    DatasourceId::NugetDirectoryPackagesProps,
905    DatasourceId::CitationCff,
906    DatasourceId::PubliccodeYaml,
907    DatasourceId::RpmPackageLicenses,
908    DatasourceId::RustBinary,
909    DatasourceId::SbtBuildSbt,
910    DatasourceId::VcpkgJson,
911];
912
913#[cfg(test)]
914mod tests {
915    use super::*;
916    use std::collections::HashSet;
917    use strum::IntoEnumIterator;
918
919    #[test]
920    fn test_every_datasource_id_is_accounted_for() {
921        let mut assembled: HashSet<DatasourceId> = HashSet::new();
922        for config in ASSEMBLERS {
923            for &dsid in config.datasource_ids {
924                assembled.insert(dsid);
925            }
926        }
927
928        let unassembled: HashSet<DatasourceId> =
929            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
930
931        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
932        assert!(
933            overlap.is_empty(),
934            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
935        );
936
937        let missing: Vec<_> = DatasourceId::iter()
938            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
939            .collect();
940
941        assert!(
942            missing.is_empty(),
943            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
944             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
945        );
946    }
947
948    #[test]
949    fn test_post_assembly_passes_are_unique() {
950        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
951
952        assert_eq!(
953            unique.len(),
954            POST_ASSEMBLY_PASSES.len(),
955            "POST_ASSEMBLY_PASSES contains duplicate entries"
956        );
957    }
958
959    #[test]
960    fn test_every_post_assembly_pass_kind_is_registered_once() {
961        let registered: HashSet<PostAssemblyPassKind> =
962            POST_ASSEMBLY_PASSES.iter().copied().collect();
963
964        let missing: Vec<_> = PostAssemblyPassKind::iter()
965            .filter(|pass| !registered.contains(pass))
966            .collect();
967
968        assert!(
969            missing.is_empty(),
970            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
971        );
972
973        for pass in PostAssemblyPassKind::iter() {
974            let count = POST_ASSEMBLY_PASSES
975                .iter()
976                .filter(|registered| **registered == pass)
977                .count();
978            assert_eq!(
979                count, 1,
980                "Post-assembly pass {pass:?} should be registered exactly once"
981            );
982        }
983    }
984
985    #[test]
986    fn test_post_assembly_passes_skip_irrelevant_inputs() {
987        let inputs = PostAssemblyInputs::default();
988
989        for pass in PostAssemblyPassKind::iter() {
990            assert!(
991                !pass.should_run(&inputs),
992                "{pass:?} should skip when no relevant inputs are present"
993            );
994        }
995    }
996
997    #[test]
998    fn test_npm_workspace_inputs_only_run_npm_passes() {
999        let inputs = PostAssemblyInputs {
1000            package_types: HashSet::from([PackageType::Npm]),
1001            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
1002            has_npm_workspace_markers: true,
1003            has_cargo_workspace_markers: false,
1004        };
1005
1006        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
1007            .filter(|pass| pass.should_run(&inputs))
1008            .collect();
1009
1010        assert_eq!(
1011            runnable,
1012            HashSet::from([
1013                PostAssemblyPassKind::NpmResourceAssign,
1014                PostAssemblyPassKind::NpmWorkspaceMerge,
1015            ])
1016        );
1017    }
1018
1019    #[test]
1020    fn test_cargo_workspace_merge_requires_workspace_markers() {
1021        let without_markers = PostAssemblyInputs {
1022            package_types: HashSet::from([PackageType::Cargo]),
1023            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
1024            has_npm_workspace_markers: false,
1025            has_cargo_workspace_markers: false,
1026        };
1027
1028        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
1029
1030        let with_markers = PostAssemblyInputs {
1031            has_cargo_workspace_markers: true,
1032            ..without_markers
1033        };
1034
1035        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
1036    }
1037}