1mod assemblers;
2#[cfg(all(test, feature = "golden-tests"))]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod bazel_merge;
7mod bazel_prune;
8mod cargo_resource_assign;
9mod cargo_workspace_merge;
10mod composer_resource_assign;
11mod conda_rootfs_merge;
12pub mod file_ref_resolve;
13mod hackage_merge;
14mod nested_merge;
15mod nix_flake_compat_merge;
16mod npm_resource_assign;
17mod npm_workspace_merge;
18mod nuget_cpm_resolve;
19mod python_requirements_assign;
20mod ruby_resource_assign;
21mod sibling_merge;
22mod swift_merge;
23mod topology;
24
25use std::collections::{HashMap, HashSet};
26use std::path::PathBuf;
27use std::sync::LazyLock;
28
29use crate::models::{DatasourceId, FileInfo, Package, PackageUid, TopLevelDependency};
30
31pub use assemblers::ASSEMBLERS;
32
33type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
34
35static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
38 let mut lookup = HashMap::new();
39 for config in ASSEMBLERS {
40 let key = *config
41 .datasource_ids
42 .first()
43 .expect("assembler must have at least one datasource_id");
44 for &dsid in config.datasource_ids {
45 lookup.insert(dsid, key);
46 }
47 }
48 lookup
49});
50
51static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
52 LazyLock::new(|| {
53 let mut lookup = HashMap::new();
54 for config in ASSEMBLERS {
55 let key = *config
56 .datasource_ids
57 .first()
58 .expect("assembler must have at least one datasource_id");
59 lookup.insert(key, config);
60 }
61 lookup
62 });
63
64pub struct AssemblyResult {
67 pub packages: Vec<Package>,
68 pub dependencies: Vec<TopLevelDependency>,
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum AssemblyMode {
74 SiblingMerge,
76 OnePerPackageData,
79}
80
81pub struct AssemblerConfig {
82 pub datasource_ids: &'static [DatasourceId],
83 pub sibling_file_patterns: &'static [&'static str],
84 pub mode: AssemblyMode,
85}
86
87pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
93 let assembler_lookup = &*ASSEMBLER_LOOKUP;
94 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
95 let mut packages = Vec::new();
96 let mut dependencies = Vec::new();
97
98 let dir_files = group_files_by_directory(files);
99 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
100
101 for file_indices in dir_files.values() {
102 let mut groups: HashSet<DatasourceId> = HashSet::new();
103
104 for &idx in file_indices {
105 for pkg_data in &files[idx].package_data {
106 if let Some(dsid) = pkg_data.datasource_id
107 && let Some(&config_key) = assembler_lookup.get(&dsid)
108 {
109 groups.insert(config_key);
110 }
111 }
112 }
113
114 for &config_key in &groups {
115 let config = assembler_config_lookup
116 .get(&config_key)
117 .copied()
118 .expect("assembler config must exist");
119
120 if topology_plan.claims_directory_assembly(config, file_indices, files) {
121 continue;
122 }
123
124 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
125 let results = special_merger.run(config, files, file_indices);
126 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
127 continue;
128 }
129
130 match config.mode {
131 AssemblyMode::SiblingMerge => {
132 let results = sibling_merge::assemble_siblings(config, files, file_indices)
133 .into_iter()
134 .collect();
135 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
136 }
137 AssemblyMode::OnePerPackageData => {
138 let results = assemble_one_per_package_data(config, files, file_indices)
139 .into_iter()
140 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
141 .collect();
142 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
143 }
144 }
145 }
146 }
147
148 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
149
150 for config in ASSEMBLERS {
151 if config.mode != AssemblyMode::SiblingMerge {
152 continue;
153 }
154 if let Some((pkg, deps, affected_indices)) =
155 nested_merge::assemble_nested_patterns(files, config)
156 {
157 let package_uid = pkg.package_uid.clone();
158 let purl = pkg.purl.clone();
159 let removed_package_uids: Vec<PackageUid> = packages
160 .iter()
161 .filter(|p| p.purl == purl)
162 .map(|p| p.package_uid.clone())
163 .collect();
164
165 packages.retain(|p| p.purl != purl);
166 dependencies.retain(|d| {
167 d.for_package_uid.as_ref() != Some(&package_uid)
168 && !removed_package_uids
169 .iter()
170 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
171 });
172
173 for idx in &affected_indices {
174 files[*idx].for_packages.clear();
175 files[*idx].for_packages.push(package_uid.clone());
176 }
177
178 packages.push(pkg);
179 dependencies.extend(deps);
180 }
181 }
182
183 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
184 hoist_unassembled_file_dependencies(files, &mut dependencies);
185
186 for package in &mut packages {
187 package.datafile_paths.sort();
188 package.datafile_paths.dedup();
189 package.datasource_ids.sort_by_key(|left| left.to_string());
190 package.datasource_ids.dedup();
191 }
192
193 for file in files.iter_mut() {
194 file.for_packages
195 .sort_by(|left, right| left.stable_key().cmp(right.stable_key()));
196 file.for_packages.dedup();
197 }
198
199 packages
200 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
201 dependencies.sort_by(|left, right| {
202 left.purl
203 .as_deref()
204 .cmp(&right.purl.as_deref())
205 .then_with(|| {
206 left.extracted_requirement
207 .as_deref()
208 .cmp(&right.extracted_requirement.as_deref())
209 })
210 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
211 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
212 .then_with(|| {
213 left.datasource_id
214 .to_string()
215 .cmp(&right.datasource_id.to_string())
216 })
217 .then_with(|| {
218 left.for_package_uid
219 .as_ref()
220 .map(|uid| uid.stable_key())
221 .cmp(&right.for_package_uid.as_ref().map(|uid| uid.stable_key()))
222 })
223 });
224
225 AssemblyResult {
226 packages,
227 dependencies,
228 }
229}
230
231fn apply_directory_merge_results(
232 files: &mut [FileInfo],
233 packages: &mut Vec<Package>,
234 dependencies: &mut Vec<TopLevelDependency>,
235 results: Vec<DirectoryMergeOutput>,
236) {
237 for (package, deps, affected_indices) in results {
238 if let Some(package) = package {
239 let package_uid = package.package_uid.clone();
240 for idx in &affected_indices {
241 if !files[*idx].for_packages.contains(&package_uid) {
242 files[*idx].for_packages.push(package_uid.clone());
243 }
244 }
245 packages.push(package);
246 }
247 dependencies.extend(deps);
248 }
249}
250
251fn hoist_unassembled_file_dependencies(
252 files: &[FileInfo],
253 dependencies: &mut Vec<TopLevelDependency>,
254) {
255 for file in files {
256 if !file.for_packages.is_empty() {
257 continue;
258 }
259
260 for pkg_data in &file.package_data {
261 let Some(datasource_id) = pkg_data.datasource_id else {
262 continue;
263 };
264
265 if !should_hoist_unassembled_dependencies(datasource_id) {
266 continue;
267 }
268
269 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
270 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
271 }));
272 }
273 }
274}
275
276const HOIST_IF_UNOWNED_DATASOURCE_IDS: &[DatasourceId] = &[DatasourceId::PipRequirements];
277
278fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
279 if HOIST_IF_UNOWNED_DATASOURCE_IDS.contains(&datasource_id) {
280 return true;
281 }
282
283 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
284 return false;
285 }
286
287 !matches!(
288 datasource_id,
289 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
290 )
291}
292
293fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
294 (
295 package.purl.as_deref(),
296 package.name.as_deref(),
297 package.version.as_deref(),
298 package
299 .datafile_paths
300 .first()
301 .map(String::as_str)
302 .unwrap_or(""),
303 )
304}
305
306fn assemble_one_per_package_data(
307 config: &AssemblerConfig,
308 files: &[FileInfo],
309 file_indices: &[usize],
310) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
311 let mut results = Vec::new();
312
313 for &idx in file_indices {
314 let file = &files[idx];
315 for pkg_data in &file.package_data {
316 let dsid_matches = pkg_data
317 .datasource_id
318 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
319
320 if !dsid_matches || pkg_data.purl.is_none() {
321 continue;
322 }
323
324 let datafile_path = file.path.clone();
325 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
326 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
327 let for_package_uid = Some(pkg.package_uid.clone());
328
329 let deps: Vec<TopLevelDependency> = pkg_data
330 .dependencies
331 .iter()
332 .filter(|dep| dep.purl.is_some())
333 .map(|dep| {
334 TopLevelDependency::from_dependency(
335 dep,
336 datafile_path.clone(),
337 datasource_id,
338 for_package_uid.clone(),
339 )
340 })
341 .collect();
342
343 results.push((pkg, deps, idx));
344 }
345 }
346
347 results
348}
349
350fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
352 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
353 for (idx, file) in files.iter().enumerate() {
354 if let Some(parent) = std::path::Path::new(&file.path).parent() {
355 groups.entry(parent.to_path_buf()).or_default().push(idx);
356 }
357 }
358 groups
359}