1mod assemblers;
5#[cfg(all(test, feature = "golden-tests"))]
6mod assembly_golden_test;
7#[cfg(test)]
8mod assembly_test;
9mod bazel_merge;
10mod bazel_prune;
11mod cargo_resource_assign;
12mod cargo_workspace_merge;
13mod composer_resource_assign;
14mod conda_rootfs_merge;
15mod debian_source_merge;
16pub mod file_ref_resolve;
17mod hackage_merge;
18mod nested_merge;
19mod nix_flake_compat_merge;
20mod npm_resource_assign;
21mod npm_workspace_merge;
22mod nuget_cpm_resolve;
23mod python_requirements_assign;
24mod ruby_resource_assign;
25mod sibling_merge;
26mod swift_merge;
27mod topology;
28mod windows_update_merge;
29
30use std::collections::{HashMap, HashSet};
31use std::path::PathBuf;
32use std::sync::LazyLock;
33
34use crate::models::{DatasourceId, FileInfo, Package, PackageUid, TopLevelDependency};
35
36pub use assemblers::ASSEMBLERS;
37
38type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
39
40static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
43 let mut lookup = HashMap::new();
44 for config in ASSEMBLERS {
45 let key = *config
46 .datasource_ids
47 .first()
48 .expect("assembler must have at least one datasource_id");
49 for &dsid in config.datasource_ids {
50 lookup.insert(dsid, key);
51 }
52 }
53 lookup
54});
55
56static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
57 LazyLock::new(|| {
58 let mut lookup = HashMap::new();
59 for config in ASSEMBLERS {
60 let key = *config
61 .datasource_ids
62 .first()
63 .expect("assembler must have at least one datasource_id");
64 lookup.insert(key, config);
65 }
66 lookup
67 });
68
69pub struct AssemblyResult {
72 pub packages: Vec<Package>,
73 pub dependencies: Vec<TopLevelDependency>,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum AssemblyMode {
79 SiblingMerge,
81 OnePerPackageData,
84}
85
86pub struct AssemblerConfig {
87 pub datasource_ids: &'static [DatasourceId],
88 pub sibling_file_patterns: &'static [&'static str],
89 pub mode: AssemblyMode,
90}
91
92pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
98 let assembler_lookup = &*ASSEMBLER_LOOKUP;
99 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
100 let mut packages = Vec::new();
101 let mut dependencies = Vec::new();
102
103 let dir_files = group_files_by_directory(files);
104 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
105
106 for file_indices in dir_files.values() {
107 let mut groups: HashSet<DatasourceId> = HashSet::new();
108
109 for &idx in file_indices {
110 for pkg_data in &files[idx].package_data {
111 if let Some(dsid) = pkg_data.datasource_id
112 && let Some(&config_key) = assembler_lookup.get(&dsid)
113 {
114 groups.insert(config_key);
115 }
116 }
117 }
118
119 for &config_key in &groups {
120 let config = assembler_config_lookup
121 .get(&config_key)
122 .copied()
123 .expect("assembler config must exist");
124
125 if topology_plan.claims_directory_assembly(config, file_indices, files) {
126 continue;
127 }
128
129 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
130 let results = special_merger.run(config, files, file_indices);
131 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
132 continue;
133 }
134
135 match config.mode {
136 AssemblyMode::SiblingMerge => {
137 let results = sibling_merge::assemble_siblings(config, files, file_indices);
138 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
139 }
140 AssemblyMode::OnePerPackageData => {
141 let results = assemble_one_per_package_data(config, files, file_indices)
142 .into_iter()
143 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
144 .collect();
145 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
146 }
147 }
148 }
149 }
150
151 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
152
153 for config in ASSEMBLERS {
154 if config.mode != AssemblyMode::SiblingMerge {
155 continue;
156 }
157 if let Some((pkg, deps, affected_indices)) =
158 nested_merge::assemble_nested_patterns(files, config)
159 {
160 let package_uid = pkg.package_uid.clone();
161 let purl = pkg.purl.clone();
162 let removed_package_uids: Vec<PackageUid> = packages
163 .iter()
164 .filter(|p| p.purl == purl)
165 .map(|p| p.package_uid.clone())
166 .collect();
167
168 packages.retain(|p| p.purl != purl);
169 dependencies.retain(|d| {
170 d.for_package_uid.as_ref() != Some(&package_uid)
171 && !removed_package_uids
172 .iter()
173 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
174 });
175
176 for idx in &affected_indices {
177 files[*idx].for_packages.clear();
178 files[*idx].for_packages.push(package_uid.clone());
179 }
180
181 packages.push(pkg);
182 dependencies.extend(deps);
183 }
184 }
185
186 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
187 hoist_unassembled_file_dependencies(files, &mut dependencies);
188
189 for package in &mut packages {
190 package.datafile_paths.sort();
191 package.datafile_paths.dedup();
192 package.datasource_ids.sort_by_key(|left| left.to_string());
193 package.datasource_ids.dedup();
194 }
195
196 for file in files.iter_mut() {
197 file.for_packages
198 .sort_by(|left, right| left.stable_key().cmp(right.stable_key()));
199 file.for_packages.dedup();
200 }
201
202 packages
203 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
204 dependencies.sort_by(|left, right| {
205 left.purl
206 .as_deref()
207 .cmp(&right.purl.as_deref())
208 .then_with(|| {
209 left.extracted_requirement
210 .as_deref()
211 .cmp(&right.extracted_requirement.as_deref())
212 })
213 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
214 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
215 .then_with(|| {
216 left.datasource_id
217 .to_string()
218 .cmp(&right.datasource_id.to_string())
219 })
220 .then_with(|| {
221 left.for_package_uid
222 .as_ref()
223 .map(|uid| uid.stable_key())
224 .cmp(&right.for_package_uid.as_ref().map(|uid| uid.stable_key()))
225 })
226 });
227
228 AssemblyResult {
229 packages,
230 dependencies,
231 }
232}
233
234fn apply_directory_merge_results(
235 files: &mut [FileInfo],
236 packages: &mut Vec<Package>,
237 dependencies: &mut Vec<TopLevelDependency>,
238 results: Vec<DirectoryMergeOutput>,
239) {
240 for (package, deps, affected_indices) in results {
241 if let Some(package) = package {
242 let package_uid = package.package_uid.clone();
243 for idx in &affected_indices {
244 if !files[*idx].for_packages.contains(&package_uid) {
245 files[*idx].for_packages.push(package_uid.clone());
246 }
247 }
248 packages.push(package);
249 }
250 dependencies.extend(deps);
251 }
252}
253
254fn hoist_unassembled_file_dependencies(
255 files: &[FileInfo],
256 dependencies: &mut Vec<TopLevelDependency>,
257) {
258 for file in files {
259 if !file.for_packages.is_empty() {
260 continue;
261 }
262
263 for pkg_data in &file.package_data {
264 let Some(datasource_id) = pkg_data.datasource_id else {
265 continue;
266 };
267
268 if !should_hoist_unassembled_dependencies(datasource_id) {
269 continue;
270 }
271
272 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
273 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
274 }));
275 }
276 }
277}
278
279const HOIST_IF_UNOWNED_DATASOURCE_IDS: &[DatasourceId] = &[DatasourceId::PipRequirements];
280
281fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
282 if HOIST_IF_UNOWNED_DATASOURCE_IDS.contains(&datasource_id) {
283 return true;
284 }
285
286 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
287 return false;
288 }
289
290 !matches!(
291 datasource_id,
292 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
293 )
294}
295
296fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
297 (
298 package.purl.as_deref(),
299 package.name.as_deref(),
300 package.version.as_deref(),
301 package
302 .datafile_paths
303 .first()
304 .map(String::as_str)
305 .unwrap_or(""),
306 )
307}
308
309fn assemble_one_per_package_data(
310 config: &AssemblerConfig,
311 files: &[FileInfo],
312 file_indices: &[usize],
313) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
314 let mut results = Vec::new();
315
316 for &idx in file_indices {
317 let file = &files[idx];
318 for pkg_data in &file.package_data {
319 let dsid_matches = pkg_data
320 .datasource_id
321 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
322
323 if !dsid_matches || pkg_data.purl.is_none() {
324 continue;
325 }
326
327 let datafile_path = file.path.clone();
328 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
329 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
330 let for_package_uid = Some(pkg.package_uid.clone());
331
332 let deps: Vec<TopLevelDependency> = pkg_data
333 .dependencies
334 .iter()
335 .filter(|dep| dep.purl.is_some())
336 .map(|dep| {
337 TopLevelDependency::from_dependency(
338 dep,
339 datafile_path.clone(),
340 datasource_id,
341 for_package_uid.clone(),
342 )
343 })
344 .collect();
345
346 results.push((pkg, deps, idx));
347 }
348 }
349
350 results
351}
352
353fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
355 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
356 for (idx, file) in files.iter().enumerate() {
357 if let Some(parent) = std::path::Path::new(&file.path).parent() {
358 groups.entry(parent.to_path_buf()).or_default().push(idx);
359 }
360 }
361 groups
362}