Skip to main content

provenant/assembly/
assemblers.rs

1use std::collections::HashSet;
2
3use crate::models::PackageType;
4use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
5use strum::EnumIter;
6
7use super::{
8    AssemblerConfig, AssemblyMode, DirectoryMergeOutput, bazel_merge, bazel_prune,
9    cargo_resource_assign, composer_resource_assign, conda_rootfs_merge, debian_source_merge,
10    file_ref_resolve, hackage_merge, nix_flake_compat_merge, npm_resource_assign,
11    nuget_cpm_resolve, python_requirements_assign, ruby_resource_assign, swift_merge, topology,
12};
13
14#[derive(Clone, Copy)]
15pub(super) enum SpecialDirectoryMergerKind {
16    Skip,
17    Bazel,
18    DebianSource,
19    Hackage,
20}
21
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
23pub(super) enum PostAssemblyPassKind {
24    SwiftMerge,
25    CondaRootfsMerge,
26    NpmResourceAssign,
27    PythonRequirementsAssign,
28    FileReferenceResolve,
29    RpmYumdbMerge,
30    NpmWorkspaceMerge,
31    CargoWorkspaceMerge,
32    NugetCpmResolve,
33    CargoResourceAssign,
34    ComposerResourceAssign,
35    RubyResourceAssign,
36    NixFlakeCompatMerge,
37    BazelPrune,
38}
39
40pub(super) fn special_directory_merger_for(
41    config_key: DatasourceId,
42) -> Option<SpecialDirectoryMergerKind> {
43    match config_key {
44        DatasourceId::BazelBuild => Some(SpecialDirectoryMergerKind::Bazel),
45        DatasourceId::DebianControlInSource => Some(SpecialDirectoryMergerKind::DebianSource),
46        DatasourceId::HackageCabal => Some(SpecialDirectoryMergerKind::Hackage),
47        DatasourceId::SwiftPackageManifestJson => Some(SpecialDirectoryMergerKind::Skip),
48        _ => None,
49    }
50}
51
52pub(super) static POST_ASSEMBLY_PASSES: &[PostAssemblyPassKind] = &[
53    PostAssemblyPassKind::SwiftMerge,
54    PostAssemblyPassKind::CondaRootfsMerge,
55    PostAssemblyPassKind::NpmResourceAssign,
56    PostAssemblyPassKind::PythonRequirementsAssign,
57    PostAssemblyPassKind::FileReferenceResolve,
58    PostAssemblyPassKind::RpmYumdbMerge,
59    PostAssemblyPassKind::NpmWorkspaceMerge,
60    PostAssemblyPassKind::CargoWorkspaceMerge,
61    PostAssemblyPassKind::NugetCpmResolve,
62    PostAssemblyPassKind::CargoResourceAssign,
63    PostAssemblyPassKind::ComposerResourceAssign,
64    PostAssemblyPassKind::RubyResourceAssign,
65    PostAssemblyPassKind::NixFlakeCompatMerge,
66    PostAssemblyPassKind::BazelPrune,
67];
68
69const SWIFT_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] = &[
70    DatasourceId::SwiftPackageManifestJson,
71    DatasourceId::SwiftPackageResolved,
72    DatasourceId::SwiftPackageShowDependencies,
73];
74
75const CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS: &[DatasourceId] =
76    &[DatasourceId::CondaMetaJson, DatasourceId::CondaMetaYaml];
77
78const RPM_INSTALLED_DATABASE_DATASOURCE_IDS: &[DatasourceId] = &[
79    DatasourceId::RpmInstalledDatabaseBdb,
80    DatasourceId::RpmInstalledDatabaseNdb,
81    DatasourceId::RpmInstalledDatabaseSqlite,
82];
83
84const NUGET_CPM_CONFIG_DATASOURCE_IDS: &[DatasourceId] = &[
85    DatasourceId::NugetDirectoryBuildProps,
86    DatasourceId::NugetDirectoryPackagesProps,
87];
88
89const NUGET_CPM_PROJECT_DATASOURCE_IDS: &[DatasourceId] = &[
90    DatasourceId::NugetCsproj,
91    DatasourceId::NugetFsproj,
92    DatasourceId::NugetVbproj,
93];
94
95#[derive(Default)]
96struct PostAssemblyInputs {
97    package_types: HashSet<PackageType>,
98    file_datasource_ids: HashSet<DatasourceId>,
99    has_npm_workspace_markers: bool,
100    has_cargo_workspace_markers: bool,
101}
102
103pub(super) fn run_post_assembly_passes(
104    files: &mut [FileInfo],
105    packages: &mut Vec<Package>,
106    dependencies: &mut Vec<TopLevelDependency>,
107    topology_plan: &topology::TopologyPlan,
108) {
109    let inputs = PostAssemblyInputs::collect(files, packages);
110
111    for pass in POST_ASSEMBLY_PASSES {
112        if !pass.should_run(&inputs) {
113            continue;
114        }
115
116        pass.run(files, packages, dependencies, topology_plan);
117    }
118}
119
120impl PostAssemblyInputs {
121    fn collect(files: &[FileInfo], packages: &[Package]) -> Self {
122        let mut inputs = Self {
123            package_types: packages
124                .iter()
125                .filter_map(|package| package.package_type)
126                .collect(),
127            ..Self::default()
128        };
129
130        for file in files {
131            for package_data in &file.package_data {
132                let Some(datasource_id) = package_data.datasource_id else {
133                    continue;
134                };
135
136                inputs.file_datasource_ids.insert(datasource_id);
137
138                if matches!(
139                    datasource_id,
140                    DatasourceId::NpmPackageJson | DatasourceId::PnpmWorkspaceYaml
141                ) && package_data
142                    .extra_data
143                    .as_ref()
144                    .is_some_and(|extra_data| extra_data.contains_key("workspaces"))
145                {
146                    inputs.has_npm_workspace_markers = true;
147                }
148
149                if datasource_id == DatasourceId::CargoToml
150                    && package_data
151                        .extra_data
152                        .as_ref()
153                        .and_then(|extra_data| extra_data.get("workspace"))
154                        .and_then(|workspace| workspace.get("members"))
155                        .and_then(|members| members.as_array())
156                        .is_some_and(|members| !members.is_empty())
157                {
158                    inputs.has_cargo_workspace_markers = true;
159                }
160            }
161        }
162
163        inputs
164    }
165
166    fn has_package_type(&self, package_type: PackageType) -> bool {
167        self.package_types.contains(&package_type)
168    }
169
170    fn has_any_file_datasource(&self, datasource_ids: &[DatasourceId]) -> bool {
171        datasource_ids
172            .iter()
173            .any(|datasource_id| self.file_datasource_ids.contains(datasource_id))
174    }
175
176    fn has_all_file_datasources(&self, datasource_ids: &[DatasourceId]) -> bool {
177        datasource_ids
178            .iter()
179            .all(|datasource_id| self.file_datasource_ids.contains(datasource_id))
180    }
181}
182
183impl SpecialDirectoryMergerKind {
184    pub(super) fn run(
185        self,
186        config: &AssemblerConfig,
187        files: &[FileInfo],
188        file_indices: &[usize],
189    ) -> Vec<DirectoryMergeOutput> {
190        match self {
191            Self::Skip => Vec::new(),
192            Self::Bazel => bazel_merge::assemble_bazel_packages(config, files, file_indices),
193            Self::DebianSource => {
194                debian_source_merge::assemble_debian_source_packages(config, files, file_indices)
195            }
196            Self::Hackage => hackage_merge::assemble_hackage_packages(files, file_indices),
197        }
198    }
199}
200
201impl PostAssemblyPassKind {
202    fn should_run(self, inputs: &PostAssemblyInputs) -> bool {
203        match self {
204            Self::SwiftMerge => inputs.has_any_file_datasource(SWIFT_POST_ASSEMBLY_DATASOURCE_IDS),
205            Self::CondaRootfsMerge => {
206                inputs.has_all_file_datasources(CONDA_ROOTFS_POST_ASSEMBLY_DATASOURCE_IDS)
207            }
208            Self::NpmResourceAssign => inputs.has_package_type(PackageType::Npm),
209            Self::PythonRequirementsAssign => {
210                inputs.has_package_type(PackageType::Pypi)
211                    && inputs.has_any_file_datasource(&[DatasourceId::PipRequirements])
212            }
213            Self::FileReferenceResolve => {
214                file_ref_resolve::has_relevant_file_reference_datasource_ids(
215                    &inputs.file_datasource_ids,
216                )
217            }
218            Self::RpmYumdbMerge => {
219                inputs.has_any_file_datasource(&[DatasourceId::RpmYumdb])
220                    && inputs.has_any_file_datasource(RPM_INSTALLED_DATABASE_DATASOURCE_IDS)
221            }
222            Self::NpmWorkspaceMerge => inputs.has_npm_workspace_markers,
223            Self::CargoWorkspaceMerge => inputs.has_cargo_workspace_markers,
224            Self::NugetCpmResolve => {
225                inputs.has_any_file_datasource(NUGET_CPM_CONFIG_DATASOURCE_IDS)
226                    && inputs.has_any_file_datasource(NUGET_CPM_PROJECT_DATASOURCE_IDS)
227            }
228            Self::CargoResourceAssign => inputs.has_package_type(PackageType::Cargo),
229            Self::ComposerResourceAssign => inputs.has_package_type(PackageType::Composer),
230            Self::RubyResourceAssign => inputs.has_package_type(PackageType::Gem),
231            Self::NixFlakeCompatMerge => {
232                inputs.has_any_file_datasource(&[DatasourceId::NixDefaultNix])
233                    && inputs.has_any_file_datasource(&[
234                        DatasourceId::NixFlakeNix,
235                        DatasourceId::NixFlakeLock,
236                    ])
237            }
238            Self::BazelPrune => inputs.has_package_type(PackageType::Bazel),
239        }
240    }
241
242    fn run(
243        self,
244        files: &mut [FileInfo],
245        packages: &mut Vec<Package>,
246        dependencies: &mut Vec<TopLevelDependency>,
247        topology_plan: &topology::TopologyPlan,
248    ) {
249        match self {
250            Self::SwiftMerge => swift_merge::assemble_swift_packages(files, packages, dependencies),
251            Self::CondaRootfsMerge => {
252                conda_rootfs_merge::merge_conda_rootfs_metadata(files, packages, dependencies)
253            }
254            Self::NpmResourceAssign => {
255                npm_resource_assign::assign_npm_package_resources(files, packages)
256            }
257            Self::PythonRequirementsAssign => {
258                python_requirements_assign::assign_python_requirements_to_projects(
259                    files,
260                    packages,
261                    dependencies,
262                )
263            }
264            Self::FileReferenceResolve => {
265                file_ref_resolve::resolve_file_references(files, packages, dependencies)
266            }
267            Self::RpmYumdbMerge => file_ref_resolve::merge_rpm_yumdb_metadata(files, packages),
268            Self::NpmWorkspaceMerge => {
269                topology_plan.apply_npm_workspace_domains(files, packages, dependencies)
270            }
271            Self::CargoWorkspaceMerge => {
272                topology_plan.apply_cargo_workspace_domains(files, packages, dependencies)
273            }
274            Self::NugetCpmResolve => {
275                nuget_cpm_resolve::resolve_nuget_cpm_versions(files, dependencies)
276            }
277            Self::CargoResourceAssign => {
278                cargo_resource_assign::assign_cargo_package_resources(files, packages)
279            }
280            Self::ComposerResourceAssign => {
281                composer_resource_assign::assign_composer_package_resources(files, packages)
282            }
283            Self::RubyResourceAssign => {
284                ruby_resource_assign::assign_ruby_package_resources(files, packages)
285            }
286            Self::NixFlakeCompatMerge => {
287                nix_flake_compat_merge::attach_flake_compat_default_files(files, packages)
288            }
289            Self::BazelPrune => {
290                bazel_prune::prune_unused_bazel_packages(files, packages, dependencies)
291            }
292        }
293    }
294}
295
296pub static ASSEMBLERS: &[AssemblerConfig] = &[
297    // ── Sibling-merge assemblers ──
298    //
299    // npm ecosystem: package.json + lockfiles in same directory.
300    // NOTE: npm-shrinkwrap.json emits "npm_package_lock_json" as its datasource_id,
301    // so "npm_shrinkwrap_json" is NOT a real datasource_id.
302    AssemblerConfig {
303        datasource_ids: &[
304            DatasourceId::BunLock,
305            DatasourceId::BunLockb,
306            DatasourceId::NpmPackageJson,
307            DatasourceId::NpmPackageLockJson,
308            DatasourceId::YarnLock,
309            DatasourceId::YarnLockV1,
310            DatasourceId::YarnLockV2,
311            DatasourceId::YarnPnpCjs,
312            DatasourceId::PnpmLockYaml,
313            DatasourceId::PnpmWorkspaceYaml,
314        ],
315        sibling_file_patterns: &[
316            "package.json",
317            "bun.lock",
318            "bun.lockb",
319            ".package-lock.json",
320            "package-lock.json",
321            ".npm-shrinkwrap.json",
322            "npm-shrinkwrap.json",
323            "yarn.lock",
324            ".pnp.cjs",
325            "pnpm-lock.yaml",
326            "shrinkwrap.yaml",
327            "pnpm-workspace.yaml",
328        ],
329        mode: AssemblyMode::SiblingMerge,
330    },
331    // Rust/Cargo ecosystem
332    AssemblerConfig {
333        datasource_ids: &[DatasourceId::CargoToml, DatasourceId::CargoLock],
334        sibling_file_patterns: &["Cargo.toml", "Cargo.lock"],
335        mode: AssemblyMode::SiblingMerge,
336    },
337    // Julia ecosystem
338    AssemblerConfig {
339        datasource_ids: &[
340            DatasourceId::JuliaProjectToml,
341            DatasourceId::JuliaManifestToml,
342        ],
343        sibling_file_patterns: &["Project.toml", "Manifest.toml"],
344        mode: AssemblyMode::SiblingMerge,
345    },
346    // CocoaPods ecosystem
347    AssemblerConfig {
348        datasource_ids: &[
349            DatasourceId::CocoapodsPodspec,
350            DatasourceId::CocoapodsPodspecJson,
351            DatasourceId::CocoapodsPodfile,
352            DatasourceId::CocoapodsPodfileLock,
353        ],
354        sibling_file_patterns: &["*.podspec", "*.podspec.json", "Podfile", "Podfile.lock"],
355        mode: AssemblyMode::SiblingMerge,
356    },
357    // PHP Composer ecosystem
358    AssemblerConfig {
359        datasource_ids: &[DatasourceId::PhpComposerJson, DatasourceId::PhpComposerLock],
360        sibling_file_patterns: &[
361            "*composer.json",
362            "composer.*.json",
363            "*composer.lock",
364            "composer.*.lock",
365        ],
366        mode: AssemblyMode::SiblingMerge,
367    },
368    // Go ecosystem (includes legacy Godeps)
369    AssemblerConfig {
370        datasource_ids: &[
371            DatasourceId::GoMod,
372            DatasourceId::GoModGraph,
373            DatasourceId::GoSum,
374            DatasourceId::GoWork,
375            DatasourceId::Godeps,
376        ],
377        sibling_file_patterns: &[
378            "go.mod",
379            "go.work",
380            "go.mod.graph",
381            "go.modgraph",
382            "go.sum",
383            "Godeps.json",
384        ],
385        mode: AssemblyMode::SiblingMerge,
386    },
387    // Dart/Flutter ecosystem
388    AssemblerConfig {
389        datasource_ids: &[DatasourceId::PubspecYaml, DatasourceId::PubspecLock],
390        sibling_file_patterns: &["pubspec.yaml", "pubspec.lock"],
391        mode: AssemblyMode::SiblingMerge,
392    },
393    // Pixi ecosystem
394    AssemblerConfig {
395        datasource_ids: &[DatasourceId::PixiToml, DatasourceId::PixiLock],
396        sibling_file_patterns: &["pixi.toml", "pixi.lock"],
397        mode: AssemblyMode::SiblingMerge,
398    },
399    AssemblerConfig {
400        datasource_ids: &[DatasourceId::NixFlakeNix, DatasourceId::NixFlakeLock],
401        sibling_file_patterns: &["flake.nix", "flake.lock"],
402        mode: AssemblyMode::SiblingMerge,
403    },
404    AssemblerConfig {
405        datasource_ids: &[DatasourceId::NixDefaultNix],
406        sibling_file_patterns: &["default.nix"],
407        mode: AssemblyMode::OnePerPackageData,
408    },
409    // Helm chart ecosystem
410    AssemblerConfig {
411        datasource_ids: &[DatasourceId::HelmChartYaml, DatasourceId::HelmChartLock],
412        sibling_file_patterns: &["Chart.yaml", "Chart.lock"],
413        mode: AssemblyMode::SiblingMerge,
414    },
415    AssemblerConfig {
416        datasource_ids: &[
417            DatasourceId::HackageCabal,
418            DatasourceId::HackageCabalProject,
419            DatasourceId::HackageStackYaml,
420        ],
421        sibling_file_patterns: &["*.cabal", "cabal.project", "stack.yaml"],
422        mode: AssemblyMode::SiblingMerge,
423    },
424    // Chef ecosystem
425    AssemblerConfig {
426        datasource_ids: &[
427            DatasourceId::ChefCookbookMetadataJson,
428            DatasourceId::ChefCookbookMetadataRb,
429        ],
430        sibling_file_patterns: &["metadata.json", "metadata.rb"],
431        mode: AssemblyMode::SiblingMerge,
432    },
433    // Conan (C/C++) ecosystem
434    AssemblerConfig {
435        datasource_ids: &[
436            DatasourceId::ConanConanFilePy,
437            DatasourceId::ConanConanFileTxt,
438            DatasourceId::ConanLock,
439            DatasourceId::ConanConanDataYml,
440        ],
441        sibling_file_patterns: &[
442            "conanfile.py",
443            "conanfile.txt",
444            "conan.lock",
445            "conandata.yml",
446        ],
447        mode: AssemblyMode::SiblingMerge,
448    },
449    // Maven/Java ecosystem (nested merge via META-INF)
450    AssemblerConfig {
451        datasource_ids: &[
452            DatasourceId::MavenPom,
453            DatasourceId::MavenPomProperties,
454            DatasourceId::JavaJarManifest,
455            DatasourceId::JavaOsgiManifest,
456        ],
457        sibling_file_patterns: &[
458            "pom.xml",
459            "*.pom",
460            "pom.properties",
461            "**/META-INF/MANIFEST.MF",
462        ],
463        mode: AssemblyMode::SiblingMerge,
464    },
465    AssemblerConfig {
466        datasource_ids: &[DatasourceId::PypiWheel, DatasourceId::PypiPipOriginJson],
467        sibling_file_patterns: &["*.whl", "origin.json"],
468        mode: AssemblyMode::SiblingMerge,
469    },
470    // Python/PyPI ecosystem
471    AssemblerConfig {
472        datasource_ids: &[
473            DatasourceId::PypiPyprojectToml,
474            DatasourceId::PypiPoetryPyprojectToml,
475            DatasourceId::PypiSetupPy,
476            DatasourceId::PypiSetupCfg,
477            DatasourceId::PypiWheel,
478            DatasourceId::PypiWheelMetadata,
479            DatasourceId::PypiEgg,
480            DatasourceId::PypiEggPkginfo,
481            DatasourceId::PypiEditableEggPkginfo,
482            DatasourceId::PypiJson,
483            DatasourceId::PypiSdist,
484            DatasourceId::PypiSdistPkginfo,
485            DatasourceId::PypiInspectDeplock,
486            DatasourceId::PipRequirements,
487            DatasourceId::PypiPoetryLock,
488            DatasourceId::PypiPylockToml,
489            DatasourceId::PypiUvLock,
490            DatasourceId::Pipfile,
491            DatasourceId::PipfileLock,
492        ],
493        sibling_file_patterns: &[
494            "pyproject.toml",
495            "setup.py",
496            "setup.cfg",
497            "PKG-INFO",
498            "METADATA",
499            "pypi.json",
500            "pip-inspect.deplock",
501            "*.tar.gz",
502            "*.tgz",
503            "*.tar.bz2",
504            "*.tar.xz",
505            "*.zip",
506            "requirements*.txt",
507            "Pipfile",
508            "Pipfile.lock",
509            "poetry.lock",
510            "pylock.toml",
511            "pylock.*.toml",
512            "uv.lock",
513        ],
514        mode: AssemblyMode::SiblingMerge,
515    },
516    AssemblerConfig {
517        datasource_ids: &[DatasourceId::DenoJson, DatasourceId::DenoLock],
518        sibling_file_patterns: &["deno.json", "deno.jsonc", "deno.lock"],
519        mode: AssemblyMode::SiblingMerge,
520    },
521    // Ruby/RubyGems ecosystem
522    AssemblerConfig {
523        datasource_ids: &[
524            DatasourceId::GemArchiveExtracted,
525            DatasourceId::Gemspec,
526            DatasourceId::GemspecExtracted,
527            DatasourceId::Gemfile,
528            DatasourceId::GemfileExtracted,
529            DatasourceId::GemfileLock,
530            DatasourceId::GemfileLockExtracted,
531        ],
532        sibling_file_patterns: &[
533            "metadata.gz-extract",
534            "**/data.gz-extract/*.gemspec",
535            "**/data.gz-extract/Gemfile",
536            "**/data.gz-extract/Gemfile.lock",
537            "*.gemspec",
538            "Gemfile",
539            "Gemfile.lock",
540        ],
541        mode: AssemblyMode::SiblingMerge,
542    },
543    AssemblerConfig {
544        datasource_ids: &[DatasourceId::GemArchive],
545        sibling_file_patterns: &["*.gem"],
546        mode: AssemblyMode::OnePerPackageData,
547    },
548    // Conda ecosystem
549    AssemblerConfig {
550        datasource_ids: &[
551            DatasourceId::CondaMetaYaml,
552            DatasourceId::CondaYaml,
553            DatasourceId::CondaMetaJson,
554        ],
555        sibling_file_patterns: &[
556            "meta.yaml",
557            "meta.yml",
558            "recipe.yaml",
559            "recipe.yml",
560            "environment.yml",
561            "environment.yaml",
562            "conda.yaml",
563            "conda.yml",
564            "*conda*.yaml",
565            "*conda*.yml",
566            "env.yaml",
567            "env.yml",
568            "*env*.yaml",
569            "*env*.yml",
570            "*environment*.yaml",
571            "*environment*.yml",
572            "*.json",
573        ],
574        mode: AssemblyMode::SiblingMerge,
575    },
576    // RPM specfile (source packages)
577    AssemblerConfig {
578        datasource_ids: &[DatasourceId::RpmSpecfile],
579        sibling_file_patterns: &["*.spec"],
580        mode: AssemblyMode::SiblingMerge,
581    },
582    // Debian source packages (nested merge via debian/ directory)
583    AssemblerConfig {
584        datasource_ids: &[
585            DatasourceId::DebianControlInSource,
586            DatasourceId::DebianCopyrightInSource,
587        ],
588        sibling_file_patterns: &["control", "copyright"],
589        mode: AssemblyMode::SiblingMerge,
590    },
591    // Gradle/Android ecosystem
592    AssemblerConfig {
593        datasource_ids: &[DatasourceId::BuildGradle, DatasourceId::GradleLockfile],
594        sibling_file_patterns: &["build.gradle", "build.gradle.kts", "gradle.lockfile"],
595        mode: AssemblyMode::SiblingMerge,
596    },
597    AssemblerConfig {
598        datasource_ids: &[DatasourceId::GradleModule],
599        sibling_file_patterns: &["*.module"],
600        mode: AssemblyMode::OnePerPackageData,
601    },
602    // CPAN/Perl ecosystem
603    AssemblerConfig {
604        datasource_ids: &[
605            DatasourceId::CpanMetaJson,
606            DatasourceId::CpanMetaYml,
607            DatasourceId::CpanManifest,
608            DatasourceId::CpanDistIni,
609            DatasourceId::CpanMakefile,
610        ],
611        sibling_file_patterns: &[
612            "META.json",
613            "META.yml",
614            "MANIFEST",
615            "dist.ini",
616            "Makefile.PL",
617        ],
618        mode: AssemblyMode::SiblingMerge,
619    },
620    // NuGet/.NET ecosystem
621    AssemblerConfig {
622        datasource_ids: &[
623            DatasourceId::NugetCsproj,
624            DatasourceId::NugetFsproj,
625            DatasourceId::NugetNuspec,
626            DatasourceId::NugetNupkg,
627            DatasourceId::NugetProjectJson,
628            DatasourceId::NugetProjectLockJson,
629            DatasourceId::NugetPackagesConfig,
630            DatasourceId::NugetPackagesLock,
631            DatasourceId::NugetVbproj,
632        ],
633        sibling_file_patterns: &[
634            "*.csproj",
635            "*.fsproj",
636            "*.nuspec",
637            "*.nupkg",
638            "project.json",
639            "project.lock.json",
640            "packages.config",
641            "packages.lock.json",
642            "*.packages.lock.json",
643            "*.vbproj",
644        ],
645        mode: AssemblyMode::SiblingMerge,
646    },
647    AssemblerConfig {
648        datasource_ids: &[DatasourceId::NugetDepsJson],
649        sibling_file_patterns: &["*.deps.json"],
650        mode: AssemblyMode::OnePerPackageData,
651    },
652    // Swift/SPM ecosystem
653    AssemblerConfig {
654        datasource_ids: &[
655            DatasourceId::SwiftPackageManifestJson,
656            DatasourceId::SwiftPackageResolved,
657            DatasourceId::SwiftPackageShowDependencies,
658        ],
659        sibling_file_patterns: &[
660            "Package.swift.json",
661            "Package.swift.deplock",
662            "Package.resolved",
663            ".package.resolved",
664            "swift-show-dependencies.deplock",
665        ],
666        mode: AssemblyMode::SiblingMerge,
667    },
668    // ── Standalone assemblers (single file → single package) ──
669    //
670    // These ecosystems have only one manifest file type with no sibling merging.
671    // They still need configs so their datasource_ids are recognized by the assembler.
672    //
673    // Bower (JavaScript)
674    AssemblerConfig {
675        datasource_ids: &[DatasourceId::BowerJson],
676        sibling_file_patterns: &["bower.json"],
677        mode: AssemblyMode::SiblingMerge,
678    },
679    // CRAN (R language)
680    AssemblerConfig {
681        datasource_ids: &[DatasourceId::CranDescription],
682        sibling_file_patterns: &["DESCRIPTION"],
683        mode: AssemblyMode::SiblingMerge,
684    },
685    // FreeBSD packages
686    AssemblerConfig {
687        datasource_ids: &[DatasourceId::FreebsdCompactManifest],
688        sibling_file_patterns: &["+COMPACT_MANIFEST"],
689        mode: AssemblyMode::SiblingMerge,
690    },
691    // Haxe ecosystem
692    AssemblerConfig {
693        datasource_ids: &[DatasourceId::HaxelibJson],
694        sibling_file_patterns: &["haxelib.json"],
695        mode: AssemblyMode::SiblingMerge,
696    },
697    AssemblerConfig {
698        datasource_ids: &[DatasourceId::Gitmodules],
699        sibling_file_patterns: &[".gitmodules"],
700        mode: AssemblyMode::SiblingMerge,
701    },
702    // OCaml/opam ecosystem
703    AssemblerConfig {
704        datasource_ids: &[DatasourceId::OpamFile],
705        sibling_file_patterns: &["opam", "*.opam"],
706        mode: AssemblyMode::SiblingMerge,
707    },
708    // RPM Mariner manifest
709    AssemblerConfig {
710        datasource_ids: &[DatasourceId::RpmMarinerManifest],
711        sibling_file_patterns: &["*.rpm.manifest"],
712        mode: AssemblyMode::SiblingMerge,
713    },
714    AssemblerConfig {
715        datasource_ids: &[DatasourceId::RpmYumdb],
716        sibling_file_patterns: &["**/var/lib/yum/yumdb/*/*/from_repo"],
717        mode: AssemblyMode::OnePerPackageData,
718    },
719    // Microsoft Update Manifest
720    AssemblerConfig {
721        datasource_ids: &[DatasourceId::MicrosoftUpdateManifestMum],
722        sibling_file_patterns: &["*.mum"],
723        mode: AssemblyMode::SiblingMerge,
724    },
725    // Autotools (C/C++ build system)
726    AssemblerConfig {
727        datasource_ids: &[DatasourceId::AutotoolsConfigure],
728        sibling_file_patterns: &["configure", "configure.ac"],
729        mode: AssemblyMode::SiblingMerge,
730    },
731    // Bazel (build system)
732    AssemblerConfig {
733        datasource_ids: &[DatasourceId::BazelBuild],
734        sibling_file_patterns: &["BUILD"],
735        mode: AssemblyMode::SiblingMerge,
736    },
737    AssemblerConfig {
738        datasource_ids: &[DatasourceId::BazelModule],
739        sibling_file_patterns: &["MODULE.bazel"],
740        mode: AssemblyMode::OnePerPackageData,
741    },
742    // Buck (build system)
743    AssemblerConfig {
744        datasource_ids: &[DatasourceId::BuckFile, DatasourceId::BuckMetadata],
745        sibling_file_patterns: &["BUCK", "METADATA.bzl", ".buckconfig"],
746        mode: AssemblyMode::SiblingMerge,
747    },
748    // Ant/Ivy (Java dependency management)
749    AssemblerConfig {
750        datasource_ids: &[DatasourceId::AntIvyXml],
751        sibling_file_patterns: &["ivy.xml"],
752        mode: AssemblyMode::SiblingMerge,
753    },
754    // Meteor (JavaScript platform)
755    AssemblerConfig {
756        datasource_ids: &[DatasourceId::MeteorPackage],
757        sibling_file_patterns: &["package.js"],
758        mode: AssemblyMode::SiblingMerge,
759    },
760    // ── One-per-PackageData assemblers (database files with many packages) ──
761    //
762    // Alpine installed package database
763    AssemblerConfig {
764        datasource_ids: &[DatasourceId::AlpineInstalledDb],
765        sibling_file_patterns: &["installed"],
766        mode: AssemblyMode::OnePerPackageData,
767    },
768    AssemblerConfig {
769        datasource_ids: &[DatasourceId::AlpineApkbuild],
770        sibling_file_patterns: &["APKBUILD"],
771        mode: AssemblyMode::SiblingMerge,
772    },
773    // RPM installed package databases (BDB, NDB, SQLite)
774    AssemblerConfig {
775        datasource_ids: &[
776            DatasourceId::RpmInstalledDatabaseBdb,
777            DatasourceId::RpmInstalledDatabaseNdb,
778            DatasourceId::RpmInstalledDatabaseSqlite,
779        ],
780        sibling_file_patterns: &["Packages", "Packages.db", "rpmdb.sqlite"],
781        mode: AssemblyMode::OnePerPackageData,
782    },
783    // Debian installed package databases
784    AssemblerConfig {
785        datasource_ids: &[DatasourceId::DebianDeb],
786        sibling_file_patterns: &["*.deb"],
787        mode: AssemblyMode::OnePerPackageData,
788    },
789    AssemblerConfig {
790        datasource_ids: &[
791            DatasourceId::DebianInstalledStatusDb,
792            DatasourceId::DebianDistrolessInstalledDb,
793        ],
794        sibling_file_patterns: &["status"],
795        mode: AssemblyMode::OnePerPackageData,
796    },
797    AssemblerConfig {
798        datasource_ids: &[
799            DatasourceId::DebianControlExtractedDeb,
800            DatasourceId::DebianMd5SumsInExtractedDeb,
801        ],
802        sibling_file_patterns: &["control", "md5sums"],
803        mode: AssemblyMode::SiblingMerge,
804    },
805    AssemblerConfig {
806        datasource_ids: &[DatasourceId::DebianSourceControlDsc],
807        sibling_file_patterns: &["*.dsc"],
808        mode: AssemblyMode::OnePerPackageData,
809    },
810    AssemblerConfig {
811        datasource_ids: &[DatasourceId::AboutFile],
812        sibling_file_patterns: &["*.ABOUT"],
813        mode: AssemblyMode::OnePerPackageData,
814    },
815];
816
817// Datasource IDs intentionally excluded from package assembly.
818//
819// This list is runtime-significant: files with these datasource IDs may remain
820// unowned by any Package, while their dependencies are still eligible for
821// top-level hoisting. Tests also use it to enforce explicit assembly accounting.
822pub static UNASSEMBLED_DATASOURCE_IDS: &[DatasourceId] = &[
823    // Non-package metadata
824    DatasourceId::Readme,
825    DatasourceId::EtcOsRelease,
826    // Binary archives (require external extraction via ExtractCode before scanning)
827    DatasourceId::AlpineApkArchive,
828    DatasourceId::AndroidAarLibrary,
829    DatasourceId::AndroidApk,
830    DatasourceId::AppleDmg,
831    DatasourceId::Axis2Mar,
832    DatasourceId::ChromeCrx,
833    DatasourceId::DebianOriginalSourceTarball,
834    DatasourceId::DebianSourceMetadataTarball,
835    DatasourceId::InstallshieldInstaller,
836    DatasourceId::IosIpa,
837    DatasourceId::IsoDiskImage,
838    DatasourceId::JavaEarArchive,
839    DatasourceId::JavaJar,
840    DatasourceId::JavaWarArchive,
841    DatasourceId::JbossSar,
842    DatasourceId::MicrosoftCabinet,
843    DatasourceId::MozillaXpi,
844    DatasourceId::NsisInstaller,
845    DatasourceId::RpmArchive,
846    DatasourceId::SharShellArchive,
847    DatasourceId::SquashfsDiskImage,
848    // Supplementary metadata (not primary package definitions)
849    DatasourceId::ArchAurinfo,
850    DatasourceId::ArchPkginfo,
851    DatasourceId::ArchSrcinfo,
852    DatasourceId::Axis2ModuleXml,
853    DatasourceId::ClojureDepsEdn,
854    DatasourceId::ClojureProjectClj,
855    DatasourceId::DebianInstalledFilesList,
856    DatasourceId::DebianInstalledMd5Sums,
857    DatasourceId::DebianCopyright,
858    DatasourceId::DebianCopyrightInPackage,
859    DatasourceId::DebianCopyrightStandalone,
860    DatasourceId::GoBinary,
861    DatasourceId::WindowsExecutable,
862    DatasourceId::Dockerfile,
863    DatasourceId::HexMixLock,
864    DatasourceId::JavaEarApplicationXml,
865    DatasourceId::JavaWarWebXml,
866    DatasourceId::JbossServiceXml,
867    DatasourceId::MesonBuild,
868    DatasourceId::GemGemspecInstalledSpecifications,
869    DatasourceId::NugetDirectoryBuildProps,
870    DatasourceId::NugetDirectoryPackagesProps,
871    DatasourceId::CitationCff,
872    DatasourceId::PubliccodeYaml,
873    DatasourceId::RpmPackageLicenses,
874    DatasourceId::RustBinary,
875    DatasourceId::SbtBuildSbt,
876    DatasourceId::VcpkgJson,
877];
878
879#[cfg(test)]
880mod tests {
881    use super::*;
882    use std::collections::HashSet;
883    use strum::IntoEnumIterator;
884
885    #[test]
886    fn test_every_datasource_id_is_accounted_for() {
887        let mut assembled: HashSet<DatasourceId> = HashSet::new();
888        for config in ASSEMBLERS {
889            for &dsid in config.datasource_ids {
890                assembled.insert(dsid);
891            }
892        }
893
894        let unassembled: HashSet<DatasourceId> =
895            UNASSEMBLED_DATASOURCE_IDS.iter().copied().collect();
896
897        let overlap: Vec<_> = assembled.intersection(&unassembled).collect();
898        assert!(
899            overlap.is_empty(),
900            "Datasource IDs in BOTH ASSEMBLERS and UNASSEMBLED: {overlap:?}"
901        );
902
903        let missing: Vec<_> = DatasourceId::iter()
904            .filter(|dsid| !assembled.contains(dsid) && !unassembled.contains(dsid))
905            .collect();
906
907        assert!(
908            missing.is_empty(),
909            "Datasource IDs in NEITHER ASSEMBLERS nor UNASSEMBLED: {missing:?}\n\
910             Add each to an AssemblerConfig in ASSEMBLERS, or to UNASSEMBLED_DATASOURCE_IDS."
911        );
912    }
913
914    #[test]
915    fn test_post_assembly_passes_are_unique() {
916        let unique: HashSet<PostAssemblyPassKind> = POST_ASSEMBLY_PASSES.iter().copied().collect();
917
918        assert_eq!(
919            unique.len(),
920            POST_ASSEMBLY_PASSES.len(),
921            "POST_ASSEMBLY_PASSES contains duplicate entries"
922        );
923    }
924
925    #[test]
926    fn test_every_post_assembly_pass_kind_is_registered_once() {
927        let registered: HashSet<PostAssemblyPassKind> =
928            POST_ASSEMBLY_PASSES.iter().copied().collect();
929
930        let missing: Vec<_> = PostAssemblyPassKind::iter()
931            .filter(|pass| !registered.contains(pass))
932            .collect();
933
934        assert!(
935            missing.is_empty(),
936            "Post-assembly pass variants not registered in POST_ASSEMBLY_PASSES: {missing:?}"
937        );
938
939        for pass in PostAssemblyPassKind::iter() {
940            let count = POST_ASSEMBLY_PASSES
941                .iter()
942                .filter(|registered| **registered == pass)
943                .count();
944            assert_eq!(
945                count, 1,
946                "Post-assembly pass {pass:?} should be registered exactly once"
947            );
948        }
949    }
950
951    #[test]
952    fn test_post_assembly_passes_skip_irrelevant_inputs() {
953        let inputs = PostAssemblyInputs::default();
954
955        for pass in PostAssemblyPassKind::iter() {
956            assert!(
957                !pass.should_run(&inputs),
958                "{pass:?} should skip when no relevant inputs are present"
959            );
960        }
961    }
962
963    #[test]
964    fn test_npm_workspace_inputs_only_run_npm_passes() {
965        let inputs = PostAssemblyInputs {
966            package_types: HashSet::from([PackageType::Npm]),
967            file_datasource_ids: HashSet::from([DatasourceId::NpmPackageJson]),
968            has_npm_workspace_markers: true,
969            has_cargo_workspace_markers: false,
970        };
971
972        let runnable: HashSet<_> = PostAssemblyPassKind::iter()
973            .filter(|pass| pass.should_run(&inputs))
974            .collect();
975
976        assert_eq!(
977            runnable,
978            HashSet::from([
979                PostAssemblyPassKind::NpmResourceAssign,
980                PostAssemblyPassKind::NpmWorkspaceMerge,
981            ])
982        );
983    }
984
985    #[test]
986    fn test_cargo_workspace_merge_requires_workspace_markers() {
987        let without_markers = PostAssemblyInputs {
988            package_types: HashSet::from([PackageType::Cargo]),
989            file_datasource_ids: HashSet::from([DatasourceId::CargoToml]),
990            has_npm_workspace_markers: false,
991            has_cargo_workspace_markers: false,
992        };
993
994        assert!(!PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&without_markers));
995
996        let with_markers = PostAssemblyInputs {
997            has_cargo_workspace_markers: true,
998            ..without_markers
999        };
1000
1001        assert!(PostAssemblyPassKind::CargoWorkspaceMerge.should_run(&with_markers));
1002    }
1003}