1mod assemblers;
5#[cfg(test)]
6mod assembly_test;
7mod bazel_merge;
8mod bazel_prune;
9mod cargo_resource_assign;
10mod cargo_workspace_merge;
11mod composer_resource_assign;
12mod conda_rootfs_merge;
13mod debian_source_merge;
14pub mod file_ref_resolve;
15mod hackage_merge;
16mod nested_merge;
17mod nix_flake_compat_merge;
18mod npm_resource_assign;
19mod npm_workspace_merge;
20mod nuget_cpm_resolve;
21mod python_requirements_assign;
22mod ruby_resource_assign;
23mod sibling_merge;
24mod swift_merge;
25mod topology;
26mod windows_update_merge;
27
28use std::collections::{HashMap, HashSet};
29use std::path::PathBuf;
30use std::sync::LazyLock;
31
32use crate::models::{DatasourceId, FileInfo, Package, PackageUid, TopLevelDependency};
33
34pub use assemblers::ASSEMBLERS;
35
36type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
37
38static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
41 let mut lookup = HashMap::new();
42 for config in ASSEMBLERS {
43 let key = *config
44 .datasource_ids
45 .first()
46 .expect("assembler must have at least one datasource_id");
47 for &dsid in config.datasource_ids {
48 lookup.insert(dsid, key);
49 }
50 }
51 lookup
52});
53
54static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
55 LazyLock::new(|| {
56 let mut lookup = HashMap::new();
57 for config in ASSEMBLERS {
58 let key = *config
59 .datasource_ids
60 .first()
61 .expect("assembler must have at least one datasource_id");
62 lookup.insert(key, config);
63 }
64 lookup
65 });
66
67pub struct AssemblyResult {
70 pub packages: Vec<Package>,
71 pub dependencies: Vec<TopLevelDependency>,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum AssemblyMode {
77 SiblingMerge,
79 OnePerPackageData,
82}
83
84pub struct AssemblerConfig {
85 pub datasource_ids: &'static [DatasourceId],
86 pub sibling_file_patterns: &'static [&'static str],
87 pub mode: AssemblyMode,
88}
89
90pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
96 let assembler_lookup = &*ASSEMBLER_LOOKUP;
97 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
98 let mut packages = Vec::new();
99 let mut dependencies = Vec::new();
100
101 let dir_files = group_files_by_directory(files);
102 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
103
104 for file_indices in dir_files.values() {
105 let mut groups: HashSet<DatasourceId> = HashSet::new();
106
107 for &idx in file_indices {
108 for pkg_data in &files[idx].package_data {
109 if let Some(dsid) = pkg_data.datasource_id
110 && let Some(&config_key) = assembler_lookup.get(&dsid)
111 {
112 groups.insert(config_key);
113 }
114 }
115 }
116
117 for &config_key in &groups {
118 let config = assembler_config_lookup
119 .get(&config_key)
120 .copied()
121 .expect("assembler config must exist");
122
123 if topology_plan.claims_directory_assembly(config, file_indices, files) {
124 continue;
125 }
126
127 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
128 let results = special_merger.run(config, files, file_indices);
129 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
130 continue;
131 }
132
133 match config.mode {
134 AssemblyMode::SiblingMerge => {
135 let results = sibling_merge::assemble_siblings(config, files, file_indices);
136 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
137 }
138 AssemblyMode::OnePerPackageData => {
139 let results = assemble_one_per_package_data(config, files, file_indices)
140 .into_iter()
141 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
142 .collect();
143 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
144 }
145 }
146 }
147 }
148
149 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
150
151 for config in ASSEMBLERS {
152 if config.mode != AssemblyMode::SiblingMerge {
153 continue;
154 }
155 if let Some((pkg, deps, affected_indices)) =
156 nested_merge::assemble_nested_patterns(files, config)
157 {
158 let package_uid = pkg.package_uid.clone();
159 let purl = pkg.purl.clone();
160 let removed_package_uids: Vec<PackageUid> = packages
161 .iter()
162 .filter(|p| p.purl == purl)
163 .map(|p| p.package_uid.clone())
164 .collect();
165
166 packages.retain(|p| p.purl != purl);
167 dependencies.retain(|d| {
168 d.for_package_uid.as_ref() != Some(&package_uid)
169 && !removed_package_uids
170 .iter()
171 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
172 });
173
174 for idx in &affected_indices {
175 files[*idx].for_packages.clear();
176 files[*idx].for_packages.push(package_uid.clone());
177 }
178
179 packages.push(pkg);
180 dependencies.extend(deps);
181 }
182 }
183
184 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
185 hoist_unassembled_file_dependencies(files, &mut dependencies);
186
187 for package in &mut packages {
188 package.datafile_paths.sort();
189 package.datafile_paths.dedup();
190 package.datasource_ids.sort_by_key(|left| left.to_string());
191 package.datasource_ids.dedup();
192 }
193
194 for file in files.iter_mut() {
195 file.for_packages
196 .sort_by(|left, right| left.stable_key().cmp(right.stable_key()));
197 file.for_packages.dedup();
198 }
199
200 packages
201 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
202 dependencies.sort_by(|left, right| {
203 left.purl
204 .as_deref()
205 .cmp(&right.purl.as_deref())
206 .then_with(|| {
207 left.extracted_requirement
208 .as_deref()
209 .cmp(&right.extracted_requirement.as_deref())
210 })
211 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
212 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
213 .then_with(|| {
214 left.datasource_id
215 .to_string()
216 .cmp(&right.datasource_id.to_string())
217 })
218 .then_with(|| {
219 left.for_package_uid
220 .as_ref()
221 .map(|uid| uid.stable_key())
222 .cmp(&right.for_package_uid.as_ref().map(|uid| uid.stable_key()))
223 })
224 });
225
226 AssemblyResult {
227 packages,
228 dependencies,
229 }
230}
231
232fn apply_directory_merge_results(
233 files: &mut [FileInfo],
234 packages: &mut Vec<Package>,
235 dependencies: &mut Vec<TopLevelDependency>,
236 results: Vec<DirectoryMergeOutput>,
237) {
238 for (package, deps, affected_indices) in results {
239 if let Some(package) = package {
240 let package_uid = package.package_uid.clone();
241 for idx in &affected_indices {
242 if !files[*idx].for_packages.contains(&package_uid) {
243 files[*idx].for_packages.push(package_uid.clone());
244 }
245 }
246 packages.push(package);
247 }
248 dependencies.extend(deps);
249 }
250}
251
252fn hoist_unassembled_file_dependencies(
253 files: &[FileInfo],
254 dependencies: &mut Vec<TopLevelDependency>,
255) {
256 for file in files {
257 if !file.for_packages.is_empty() {
258 continue;
259 }
260
261 for pkg_data in &file.package_data {
262 let Some(datasource_id) = pkg_data.datasource_id else {
263 continue;
264 };
265
266 if !should_hoist_unassembled_dependencies(datasource_id) {
267 continue;
268 }
269
270 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
271 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
272 }));
273 }
274 }
275}
276
277const HOIST_IF_UNOWNED_DATASOURCE_IDS: &[DatasourceId] = &[DatasourceId::PipRequirements];
278
279fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
280 if HOIST_IF_UNOWNED_DATASOURCE_IDS.contains(&datasource_id) {
281 return true;
282 }
283
284 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
285 return false;
286 }
287
288 !matches!(
289 datasource_id,
290 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
291 )
292}
293
294fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
295 (
296 package.purl.as_deref(),
297 package.name.as_deref(),
298 package.version.as_deref(),
299 package
300 .datafile_paths
301 .first()
302 .map(String::as_str)
303 .unwrap_or(""),
304 )
305}
306
307fn assemble_one_per_package_data(
308 config: &AssemblerConfig,
309 files: &[FileInfo],
310 file_indices: &[usize],
311) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
312 let mut results = Vec::new();
313
314 for &idx in file_indices {
315 let file = &files[idx];
316 for pkg_data in &file.package_data {
317 let dsid_matches = pkg_data
318 .datasource_id
319 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
320
321 if !dsid_matches
322 || pkg_data.purl.is_none()
323 || should_skip_placeholder_only_cocoapods_podspec(pkg_data)
324 {
325 continue;
326 }
327
328 let datafile_path = file.path.clone();
329 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
330 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
331 let for_package_uid = Some(pkg.package_uid.clone());
332
333 let deps: Vec<TopLevelDependency> = pkg_data
334 .dependencies
335 .iter()
336 .filter(|dep| dep.purl.is_some() || dep.extracted_requirement.is_some())
337 .map(|dep| {
338 TopLevelDependency::from_dependency(
339 dep,
340 datafile_path.clone(),
341 datasource_id,
342 for_package_uid.clone(),
343 )
344 })
345 .collect();
346
347 results.push((pkg, deps, idx));
348 }
349 }
350
351 results
352}
353
354pub(super) fn should_skip_placeholder_only_cocoapods_podspec(
355 pkg_data: &crate::models::PackageData,
356) -> bool {
357 pkg_data.datasource_id == Some(DatasourceId::CocoapodsPodspec)
358 && pkg_data
359 .extra_data
360 .as_ref()
361 .and_then(|data| data.get("dynamic_identity_placeholders"))
362 .and_then(|value| value.as_bool())
363 == Some(true)
364}
365
366fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
368 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
369 for (idx, file) in files.iter().enumerate() {
370 if let Some(parent) = std::path::Path::new(&file.path).parent() {
371 groups.entry(parent.to_path_buf()).or_default().push(idx);
372 }
373 }
374 groups
375}