1mod assemblers;
2#[cfg(test)]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod bazel_merge;
7mod bazel_prune;
8mod cargo_resource_assign;
9mod cargo_workspace_merge;
10mod composer_resource_assign;
11mod conda_rootfs_merge;
12pub mod file_ref_resolve;
13mod hackage_merge;
14mod nested_merge;
15mod npm_resource_assign;
16mod npm_workspace_merge;
17mod nuget_cpm_resolve;
18mod python_requirements_assign;
19mod ruby_resource_assign;
20mod sibling_merge;
21mod swift_merge;
22mod topology;
23
24use std::collections::{HashMap, HashSet};
25use std::path::PathBuf;
26use std::sync::LazyLock;
27
28use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
29
30pub use assemblers::ASSEMBLERS;
31
32type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
33
34static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
37 let mut lookup = HashMap::new();
38 for config in ASSEMBLERS {
39 let key = *config
40 .datasource_ids
41 .first()
42 .expect("assembler must have at least one datasource_id");
43 for &dsid in config.datasource_ids {
44 lookup.insert(dsid, key);
45 }
46 }
47 lookup
48});
49
50static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
51 LazyLock::new(|| {
52 let mut lookup = HashMap::new();
53 for config in ASSEMBLERS {
54 let key = *config
55 .datasource_ids
56 .first()
57 .expect("assembler must have at least one datasource_id");
58 lookup.insert(key, config);
59 }
60 lookup
61 });
62
63#[derive(serde::Serialize)]
66pub struct AssemblyResult {
67 pub packages: Vec<Package>,
68 pub dependencies: Vec<TopLevelDependency>,
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum AssemblyMode {
74 SiblingMerge,
76 OnePerPackageData,
79}
80
81pub struct AssemblerConfig {
82 pub datasource_ids: &'static [DatasourceId],
83 pub sibling_file_patterns: &'static [&'static str],
84 pub mode: AssemblyMode,
85}
86
87pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
93 let assembler_lookup = &*ASSEMBLER_LOOKUP;
94 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
95 let mut packages = Vec::new();
96 let mut dependencies = Vec::new();
97
98 let dir_files = group_files_by_directory(files);
99 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
100
101 for file_indices in dir_files.values() {
102 let mut groups: HashSet<DatasourceId> = HashSet::new();
103
104 for &idx in file_indices {
105 for pkg_data in &files[idx].package_data {
106 if let Some(dsid) = pkg_data.datasource_id
107 && let Some(&config_key) = assembler_lookup.get(&dsid)
108 {
109 groups.insert(config_key);
110 }
111 }
112 }
113
114 for &config_key in &groups {
115 let config = assembler_config_lookup
116 .get(&config_key)
117 .copied()
118 .expect("assembler config must exist");
119
120 if topology_plan.claims_directory_assembly(config, file_indices, files) {
121 continue;
122 }
123
124 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
125 let results = special_merger.run(config, files, file_indices);
126 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
127 continue;
128 }
129
130 match config.mode {
131 AssemblyMode::SiblingMerge => {
132 let results = sibling_merge::assemble_siblings(config, files, file_indices)
133 .into_iter()
134 .collect();
135 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
136 }
137 AssemblyMode::OnePerPackageData => {
138 let results = assemble_one_per_package_data(config, files, file_indices)
139 .into_iter()
140 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
141 .collect();
142 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
143 }
144 }
145 }
146 }
147
148 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
149
150 for config in ASSEMBLERS {
151 if config.mode != AssemblyMode::SiblingMerge {
152 continue;
153 }
154 if let Some((pkg, deps, affected_indices)) =
155 nested_merge::assemble_nested_patterns(files, config)
156 {
157 let package_uid = pkg.package_uid.clone();
158 let purl = pkg.purl.clone();
159 let removed_package_uids: Vec<String> = packages
160 .iter()
161 .filter(|p| p.purl == purl)
162 .map(|p| p.package_uid.clone())
163 .collect();
164
165 packages.retain(|p| p.purl != purl);
166 dependencies.retain(|d| {
167 d.for_package_uid.as_ref() != Some(&package_uid)
168 && !removed_package_uids
169 .iter()
170 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
171 });
172
173 for idx in &affected_indices {
174 files[*idx].for_packages.clear();
175 files[*idx].for_packages.push(package_uid.clone());
176 }
177
178 packages.push(pkg);
179 dependencies.extend(deps);
180 }
181 }
182
183 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
184 hoist_unassembled_file_dependencies(files, &mut dependencies);
185
186 for package in &mut packages {
187 package.datafile_paths.sort();
188 package.datafile_paths.dedup();
189 package.datasource_ids.sort_by_key(|left| left.to_string());
190 package.datasource_ids.dedup();
191 }
192
193 for file in files.iter_mut() {
194 file.for_packages
195 .sort_by(|left, right| stable_uid_key(left).cmp(stable_uid_key(right)));
196 file.for_packages.dedup();
197 }
198
199 packages
200 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
201 dependencies.sort_by(|left, right| {
202 left.purl
203 .as_deref()
204 .cmp(&right.purl.as_deref())
205 .then_with(|| {
206 left.extracted_requirement
207 .as_deref()
208 .cmp(&right.extracted_requirement.as_deref())
209 })
210 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
211 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
212 .then_with(|| {
213 left.datasource_id
214 .to_string()
215 .cmp(&right.datasource_id.to_string())
216 })
217 .then_with(|| {
218 left.for_package_uid
219 .as_deref()
220 .map(stable_uid_key)
221 .cmp(&right.for_package_uid.as_deref().map(stable_uid_key))
222 })
223 });
224
225 AssemblyResult {
226 packages,
227 dependencies,
228 }
229}
230
231fn apply_directory_merge_results(
232 files: &mut [FileInfo],
233 packages: &mut Vec<Package>,
234 dependencies: &mut Vec<TopLevelDependency>,
235 results: Vec<DirectoryMergeOutput>,
236) {
237 for (package, deps, affected_indices) in results {
238 if let Some(package) = package {
239 let package_uid = package.package_uid.clone();
240 for idx in &affected_indices {
241 if !files[*idx].for_packages.contains(&package_uid) {
242 files[*idx].for_packages.push(package_uid.clone());
243 }
244 }
245 packages.push(package);
246 }
247 dependencies.extend(deps);
248 }
249}
250
251fn hoist_unassembled_file_dependencies(
252 files: &[FileInfo],
253 dependencies: &mut Vec<TopLevelDependency>,
254) {
255 for file in files {
256 if !file.for_packages.is_empty() {
257 continue;
258 }
259
260 for pkg_data in &file.package_data {
261 let Some(datasource_id) = pkg_data.datasource_id else {
262 continue;
263 };
264
265 if !should_hoist_unassembled_dependencies(datasource_id) {
266 continue;
267 }
268
269 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
270 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
271 }));
272 }
273 }
274}
275
276fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
277 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
278 return false;
279 }
280
281 !matches!(
282 datasource_id,
283 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
284 )
285}
286
287fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
288 (
289 package.purl.as_deref(),
290 package.name.as_deref(),
291 package.version.as_deref(),
292 package
293 .datafile_paths
294 .first()
295 .map(String::as_str)
296 .unwrap_or(""),
297 )
298}
299
300fn stable_uid_key(uid: &str) -> &str {
301 uid.split_once("?uuid=")
302 .map(|(prefix, _)| prefix)
303 .or_else(|| uid.split_once("&uuid=").map(|(prefix, _)| prefix))
304 .unwrap_or(uid)
305}
306
307fn assemble_one_per_package_data(
308 config: &AssemblerConfig,
309 files: &[FileInfo],
310 file_indices: &[usize],
311) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
312 let mut results = Vec::new();
313
314 for &idx in file_indices {
315 let file = &files[idx];
316 for pkg_data in &file.package_data {
317 let dsid_matches = pkg_data
318 .datasource_id
319 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
320
321 if !dsid_matches || pkg_data.purl.is_none() {
322 continue;
323 }
324
325 let datafile_path = file.path.clone();
326 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
327 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
328 let for_package_uid = Some(pkg.package_uid.clone());
329
330 let deps: Vec<TopLevelDependency> = pkg_data
331 .dependencies
332 .iter()
333 .filter(|dep| dep.purl.is_some())
334 .map(|dep| {
335 TopLevelDependency::from_dependency(
336 dep,
337 datafile_path.clone(),
338 datasource_id,
339 for_package_uid.clone(),
340 )
341 })
342 .collect();
343
344 results.push((pkg, deps, idx));
345 }
346 }
347
348 results
349}
350
351fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
353 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
354 for (idx, file) in files.iter().enumerate() {
355 if let Some(parent) = std::path::Path::new(&file.path).parent() {
356 groups.entry(parent.to_path_buf()).or_default().push(idx);
357 }
358 }
359 groups
360}