1mod assemblers;
5#[cfg(all(test, feature = "golden-tests"))]
6mod assembly_golden_test;
7#[cfg(test)]
8mod assembly_test;
9mod bazel_merge;
10mod bazel_prune;
11mod cargo_resource_assign;
12mod cargo_workspace_merge;
13mod composer_resource_assign;
14mod conda_rootfs_merge;
15mod debian_source_merge;
16pub mod file_ref_resolve;
17mod hackage_merge;
18mod nested_merge;
19mod nix_flake_compat_merge;
20mod npm_resource_assign;
21mod npm_workspace_merge;
22mod nuget_cpm_resolve;
23mod python_requirements_assign;
24mod ruby_resource_assign;
25mod sibling_merge;
26mod swift_merge;
27mod topology;
28
29use std::collections::{HashMap, HashSet};
30use std::path::PathBuf;
31use std::sync::LazyLock;
32
33use crate::models::{DatasourceId, FileInfo, Package, PackageUid, TopLevelDependency};
34
35pub use assemblers::ASSEMBLERS;
36
37type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
38
39static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
42 let mut lookup = HashMap::new();
43 for config in ASSEMBLERS {
44 let key = *config
45 .datasource_ids
46 .first()
47 .expect("assembler must have at least one datasource_id");
48 for &dsid in config.datasource_ids {
49 lookup.insert(dsid, key);
50 }
51 }
52 lookup
53});
54
55static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
56 LazyLock::new(|| {
57 let mut lookup = HashMap::new();
58 for config in ASSEMBLERS {
59 let key = *config
60 .datasource_ids
61 .first()
62 .expect("assembler must have at least one datasource_id");
63 lookup.insert(key, config);
64 }
65 lookup
66 });
67
68pub struct AssemblyResult {
71 pub packages: Vec<Package>,
72 pub dependencies: Vec<TopLevelDependency>,
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum AssemblyMode {
78 SiblingMerge,
80 OnePerPackageData,
83}
84
85pub struct AssemblerConfig {
86 pub datasource_ids: &'static [DatasourceId],
87 pub sibling_file_patterns: &'static [&'static str],
88 pub mode: AssemblyMode,
89}
90
91pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
97 let assembler_lookup = &*ASSEMBLER_LOOKUP;
98 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
99 let mut packages = Vec::new();
100 let mut dependencies = Vec::new();
101
102 let dir_files = group_files_by_directory(files);
103 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
104
105 for file_indices in dir_files.values() {
106 let mut groups: HashSet<DatasourceId> = HashSet::new();
107
108 for &idx in file_indices {
109 for pkg_data in &files[idx].package_data {
110 if let Some(dsid) = pkg_data.datasource_id
111 && let Some(&config_key) = assembler_lookup.get(&dsid)
112 {
113 groups.insert(config_key);
114 }
115 }
116 }
117
118 for &config_key in &groups {
119 let config = assembler_config_lookup
120 .get(&config_key)
121 .copied()
122 .expect("assembler config must exist");
123
124 if topology_plan.claims_directory_assembly(config, file_indices, files) {
125 continue;
126 }
127
128 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
129 let results = special_merger.run(config, files, file_indices);
130 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
131 continue;
132 }
133
134 match config.mode {
135 AssemblyMode::SiblingMerge => {
136 let results = sibling_merge::assemble_siblings(config, files, file_indices);
137 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
138 }
139 AssemblyMode::OnePerPackageData => {
140 let results = assemble_one_per_package_data(config, files, file_indices)
141 .into_iter()
142 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
143 .collect();
144 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
145 }
146 }
147 }
148 }
149
150 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
151
152 for config in ASSEMBLERS {
153 if config.mode != AssemblyMode::SiblingMerge {
154 continue;
155 }
156 if let Some((pkg, deps, affected_indices)) =
157 nested_merge::assemble_nested_patterns(files, config)
158 {
159 let package_uid = pkg.package_uid.clone();
160 let purl = pkg.purl.clone();
161 let removed_package_uids: Vec<PackageUid> = packages
162 .iter()
163 .filter(|p| p.purl == purl)
164 .map(|p| p.package_uid.clone())
165 .collect();
166
167 packages.retain(|p| p.purl != purl);
168 dependencies.retain(|d| {
169 d.for_package_uid.as_ref() != Some(&package_uid)
170 && !removed_package_uids
171 .iter()
172 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
173 });
174
175 for idx in &affected_indices {
176 files[*idx].for_packages.clear();
177 files[*idx].for_packages.push(package_uid.clone());
178 }
179
180 packages.push(pkg);
181 dependencies.extend(deps);
182 }
183 }
184
185 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
186 hoist_unassembled_file_dependencies(files, &mut dependencies);
187
188 for package in &mut packages {
189 package.datafile_paths.sort();
190 package.datafile_paths.dedup();
191 package.datasource_ids.sort_by_key(|left| left.to_string());
192 package.datasource_ids.dedup();
193 }
194
195 for file in files.iter_mut() {
196 file.for_packages
197 .sort_by(|left, right| left.stable_key().cmp(right.stable_key()));
198 file.for_packages.dedup();
199 }
200
201 packages
202 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
203 dependencies.sort_by(|left, right| {
204 left.purl
205 .as_deref()
206 .cmp(&right.purl.as_deref())
207 .then_with(|| {
208 left.extracted_requirement
209 .as_deref()
210 .cmp(&right.extracted_requirement.as_deref())
211 })
212 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
213 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
214 .then_with(|| {
215 left.datasource_id
216 .to_string()
217 .cmp(&right.datasource_id.to_string())
218 })
219 .then_with(|| {
220 left.for_package_uid
221 .as_ref()
222 .map(|uid| uid.stable_key())
223 .cmp(&right.for_package_uid.as_ref().map(|uid| uid.stable_key()))
224 })
225 });
226
227 AssemblyResult {
228 packages,
229 dependencies,
230 }
231}
232
233fn apply_directory_merge_results(
234 files: &mut [FileInfo],
235 packages: &mut Vec<Package>,
236 dependencies: &mut Vec<TopLevelDependency>,
237 results: Vec<DirectoryMergeOutput>,
238) {
239 for (package, deps, affected_indices) in results {
240 if let Some(package) = package {
241 let package_uid = package.package_uid.clone();
242 for idx in &affected_indices {
243 if !files[*idx].for_packages.contains(&package_uid) {
244 files[*idx].for_packages.push(package_uid.clone());
245 }
246 }
247 packages.push(package);
248 }
249 dependencies.extend(deps);
250 }
251}
252
253fn hoist_unassembled_file_dependencies(
254 files: &[FileInfo],
255 dependencies: &mut Vec<TopLevelDependency>,
256) {
257 for file in files {
258 if !file.for_packages.is_empty() {
259 continue;
260 }
261
262 for pkg_data in &file.package_data {
263 let Some(datasource_id) = pkg_data.datasource_id else {
264 continue;
265 };
266
267 if !should_hoist_unassembled_dependencies(datasource_id) {
268 continue;
269 }
270
271 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
272 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
273 }));
274 }
275 }
276}
277
278const HOIST_IF_UNOWNED_DATASOURCE_IDS: &[DatasourceId] = &[DatasourceId::PipRequirements];
279
280fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
281 if HOIST_IF_UNOWNED_DATASOURCE_IDS.contains(&datasource_id) {
282 return true;
283 }
284
285 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
286 return false;
287 }
288
289 !matches!(
290 datasource_id,
291 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
292 )
293}
294
295fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
296 (
297 package.purl.as_deref(),
298 package.name.as_deref(),
299 package.version.as_deref(),
300 package
301 .datafile_paths
302 .first()
303 .map(String::as_str)
304 .unwrap_or(""),
305 )
306}
307
308fn assemble_one_per_package_data(
309 config: &AssemblerConfig,
310 files: &[FileInfo],
311 file_indices: &[usize],
312) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
313 let mut results = Vec::new();
314
315 for &idx in file_indices {
316 let file = &files[idx];
317 for pkg_data in &file.package_data {
318 let dsid_matches = pkg_data
319 .datasource_id
320 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
321
322 if !dsid_matches || pkg_data.purl.is_none() {
323 continue;
324 }
325
326 let datafile_path = file.path.clone();
327 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
328 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
329 let for_package_uid = Some(pkg.package_uid.clone());
330
331 let deps: Vec<TopLevelDependency> = pkg_data
332 .dependencies
333 .iter()
334 .filter(|dep| dep.purl.is_some())
335 .map(|dep| {
336 TopLevelDependency::from_dependency(
337 dep,
338 datafile_path.clone(),
339 datasource_id,
340 for_package_uid.clone(),
341 )
342 })
343 .collect();
344
345 results.push((pkg, deps, idx));
346 }
347 }
348
349 results
350}
351
352fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
354 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
355 for (idx, file) in files.iter().enumerate() {
356 if let Some(parent) = std::path::Path::new(&file.path).parent() {
357 groups.entry(parent.to_path_buf()).or_default().push(idx);
358 }
359 }
360 groups
361}