1mod assemblers;
2#[cfg(test)]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod bazel_merge;
7mod bazel_prune;
8mod cargo_resource_assign;
9mod cargo_workspace_merge;
10mod composer_resource_assign;
11mod conda_rootfs_merge;
12pub mod file_ref_resolve;
13mod hackage_merge;
14mod nested_merge;
15mod npm_resource_assign;
16mod npm_workspace_merge;
17mod nuget_cpm_resolve;
18mod python_requirements_assign;
19mod ruby_resource_assign;
20mod sibling_merge;
21mod swift_merge;
22mod topology;
23
24use std::collections::{HashMap, HashSet};
25use std::path::PathBuf;
26use std::sync::LazyLock;
27
28use crate::models::{DatasourceId, FileInfo, Package, PackageUid, TopLevelDependency};
29
30pub use assemblers::ASSEMBLERS;
31
32type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
33
34static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
37 let mut lookup = HashMap::new();
38 for config in ASSEMBLERS {
39 let key = *config
40 .datasource_ids
41 .first()
42 .expect("assembler must have at least one datasource_id");
43 for &dsid in config.datasource_ids {
44 lookup.insert(dsid, key);
45 }
46 }
47 lookup
48});
49
50static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
51 LazyLock::new(|| {
52 let mut lookup = HashMap::new();
53 for config in ASSEMBLERS {
54 let key = *config
55 .datasource_ids
56 .first()
57 .expect("assembler must have at least one datasource_id");
58 lookup.insert(key, config);
59 }
60 lookup
61 });
62
63pub struct AssemblyResult {
66 pub packages: Vec<Package>,
67 pub dependencies: Vec<TopLevelDependency>,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum AssemblyMode {
73 SiblingMerge,
75 OnePerPackageData,
78}
79
80pub struct AssemblerConfig {
81 pub datasource_ids: &'static [DatasourceId],
82 pub sibling_file_patterns: &'static [&'static str],
83 pub mode: AssemblyMode,
84}
85
86pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
92 let assembler_lookup = &*ASSEMBLER_LOOKUP;
93 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
94 let mut packages = Vec::new();
95 let mut dependencies = Vec::new();
96
97 let dir_files = group_files_by_directory(files);
98 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
99
100 for file_indices in dir_files.values() {
101 let mut groups: HashSet<DatasourceId> = HashSet::new();
102
103 for &idx in file_indices {
104 for pkg_data in &files[idx].package_data {
105 if let Some(dsid) = pkg_data.datasource_id
106 && let Some(&config_key) = assembler_lookup.get(&dsid)
107 {
108 groups.insert(config_key);
109 }
110 }
111 }
112
113 for &config_key in &groups {
114 let config = assembler_config_lookup
115 .get(&config_key)
116 .copied()
117 .expect("assembler config must exist");
118
119 if topology_plan.claims_directory_assembly(config, file_indices, files) {
120 continue;
121 }
122
123 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
124 let results = special_merger.run(config, files, file_indices);
125 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
126 continue;
127 }
128
129 match config.mode {
130 AssemblyMode::SiblingMerge => {
131 let results = sibling_merge::assemble_siblings(config, files, file_indices)
132 .into_iter()
133 .collect();
134 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
135 }
136 AssemblyMode::OnePerPackageData => {
137 let results = assemble_one_per_package_data(config, files, file_indices)
138 .into_iter()
139 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
140 .collect();
141 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
142 }
143 }
144 }
145 }
146
147 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
148
149 for config in ASSEMBLERS {
150 if config.mode != AssemblyMode::SiblingMerge {
151 continue;
152 }
153 if let Some((pkg, deps, affected_indices)) =
154 nested_merge::assemble_nested_patterns(files, config)
155 {
156 let package_uid = pkg.package_uid.clone();
157 let purl = pkg.purl.clone();
158 let removed_package_uids: Vec<PackageUid> = packages
159 .iter()
160 .filter(|p| p.purl == purl)
161 .map(|p| p.package_uid.clone())
162 .collect();
163
164 packages.retain(|p| p.purl != purl);
165 dependencies.retain(|d| {
166 d.for_package_uid.as_ref() != Some(&package_uid)
167 && !removed_package_uids
168 .iter()
169 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
170 });
171
172 for idx in &affected_indices {
173 files[*idx].for_packages.clear();
174 files[*idx].for_packages.push(package_uid.clone());
175 }
176
177 packages.push(pkg);
178 dependencies.extend(deps);
179 }
180 }
181
182 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
183 hoist_unassembled_file_dependencies(files, &mut dependencies);
184
185 for package in &mut packages {
186 package.datafile_paths.sort();
187 package.datafile_paths.dedup();
188 package.datasource_ids.sort_by_key(|left| left.to_string());
189 package.datasource_ids.dedup();
190 }
191
192 for file in files.iter_mut() {
193 file.for_packages
194 .sort_by(|left, right| left.stable_key().cmp(right.stable_key()));
195 file.for_packages.dedup();
196 }
197
198 packages
199 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
200 dependencies.sort_by(|left, right| {
201 left.purl
202 .as_deref()
203 .cmp(&right.purl.as_deref())
204 .then_with(|| {
205 left.extracted_requirement
206 .as_deref()
207 .cmp(&right.extracted_requirement.as_deref())
208 })
209 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
210 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
211 .then_with(|| {
212 left.datasource_id
213 .to_string()
214 .cmp(&right.datasource_id.to_string())
215 })
216 .then_with(|| {
217 left.for_package_uid
218 .as_ref()
219 .map(|uid| uid.stable_key())
220 .cmp(&right.for_package_uid.as_ref().map(|uid| uid.stable_key()))
221 })
222 });
223
224 AssemblyResult {
225 packages,
226 dependencies,
227 }
228}
229
230fn apply_directory_merge_results(
231 files: &mut [FileInfo],
232 packages: &mut Vec<Package>,
233 dependencies: &mut Vec<TopLevelDependency>,
234 results: Vec<DirectoryMergeOutput>,
235) {
236 for (package, deps, affected_indices) in results {
237 if let Some(package) = package {
238 let package_uid = package.package_uid.clone();
239 for idx in &affected_indices {
240 if !files[*idx].for_packages.contains(&package_uid) {
241 files[*idx].for_packages.push(package_uid.clone());
242 }
243 }
244 packages.push(package);
245 }
246 dependencies.extend(deps);
247 }
248}
249
250fn hoist_unassembled_file_dependencies(
251 files: &[FileInfo],
252 dependencies: &mut Vec<TopLevelDependency>,
253) {
254 for file in files {
255 if !file.for_packages.is_empty() {
256 continue;
257 }
258
259 for pkg_data in &file.package_data {
260 let Some(datasource_id) = pkg_data.datasource_id else {
261 continue;
262 };
263
264 if !should_hoist_unassembled_dependencies(datasource_id) {
265 continue;
266 }
267
268 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
269 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
270 }));
271 }
272 }
273}
274
275const HOIST_IF_UNOWNED_DATASOURCE_IDS: &[DatasourceId] = &[DatasourceId::PipRequirements];
276
277fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
278 if HOIST_IF_UNOWNED_DATASOURCE_IDS.contains(&datasource_id) {
279 return true;
280 }
281
282 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
283 return false;
284 }
285
286 !matches!(
287 datasource_id,
288 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
289 )
290}
291
292fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
293 (
294 package.purl.as_deref(),
295 package.name.as_deref(),
296 package.version.as_deref(),
297 package
298 .datafile_paths
299 .first()
300 .map(String::as_str)
301 .unwrap_or(""),
302 )
303}
304
305fn assemble_one_per_package_data(
306 config: &AssemblerConfig,
307 files: &[FileInfo],
308 file_indices: &[usize],
309) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
310 let mut results = Vec::new();
311
312 for &idx in file_indices {
313 let file = &files[idx];
314 for pkg_data in &file.package_data {
315 let dsid_matches = pkg_data
316 .datasource_id
317 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
318
319 if !dsid_matches || pkg_data.purl.is_none() {
320 continue;
321 }
322
323 let datafile_path = file.path.clone();
324 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
325 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
326 let for_package_uid = Some(pkg.package_uid.clone());
327
328 let deps: Vec<TopLevelDependency> = pkg_data
329 .dependencies
330 .iter()
331 .filter(|dep| dep.purl.is_some())
332 .map(|dep| {
333 TopLevelDependency::from_dependency(
334 dep,
335 datafile_path.clone(),
336 datasource_id,
337 for_package_uid.clone(),
338 )
339 })
340 .collect();
341
342 results.push((pkg, deps, idx));
343 }
344 }
345
346 results
347}
348
349fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
351 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
352 for (idx, file) in files.iter().enumerate() {
353 if let Some(parent) = std::path::Path::new(&file.path).parent() {
354 groups.entry(parent.to_path_buf()).or_default().push(idx);
355 }
356 }
357 groups
358}