1mod assemblers;
2#[cfg(test)]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod cargo_resource_assign;
7mod cargo_workspace_merge;
8mod composer_resource_assign;
9mod conda_rootfs_merge;
10pub mod file_ref_resolve;
11mod hackage_merge;
12mod nested_merge;
13mod npm_resource_assign;
14mod npm_workspace_merge;
15mod nuget_cpm_resolve;
16mod python_requirements_assign;
17mod ruby_resource_assign;
18mod sibling_merge;
19mod swift_merge;
20mod topology;
21
22use std::collections::{HashMap, HashSet};
23use std::path::PathBuf;
24use std::sync::LazyLock;
25
26use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
27
28pub use assemblers::ASSEMBLERS;
29
30type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
31
32static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
35 let mut lookup = HashMap::new();
36 for config in ASSEMBLERS {
37 let key = *config
38 .datasource_ids
39 .first()
40 .expect("assembler must have at least one datasource_id");
41 for &dsid in config.datasource_ids {
42 lookup.insert(dsid, key);
43 }
44 }
45 lookup
46});
47
48static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
49 LazyLock::new(|| {
50 let mut lookup = HashMap::new();
51 for config in ASSEMBLERS {
52 let key = *config
53 .datasource_ids
54 .first()
55 .expect("assembler must have at least one datasource_id");
56 lookup.insert(key, config);
57 }
58 lookup
59 });
60
61#[derive(serde::Serialize)]
64pub struct AssemblyResult {
65 pub packages: Vec<Package>,
66 pub dependencies: Vec<TopLevelDependency>,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum AssemblyMode {
72 SiblingMerge,
74 OnePerPackageData,
77}
78
79pub struct AssemblerConfig {
80 pub datasource_ids: &'static [DatasourceId],
81 pub sibling_file_patterns: &'static [&'static str],
82 pub mode: AssemblyMode,
83}
84
85pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
91 let assembler_lookup = &*ASSEMBLER_LOOKUP;
92 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
93 let mut packages = Vec::new();
94 let mut dependencies = Vec::new();
95
96 let dir_files = group_files_by_directory(files);
97 let topology_plan = topology::TopologyPlan::build(files, &dir_files);
98
99 for file_indices in dir_files.values() {
100 let mut groups: HashSet<DatasourceId> = HashSet::new();
101
102 for &idx in file_indices {
103 for pkg_data in &files[idx].package_data {
104 if let Some(dsid) = pkg_data.datasource_id
105 && let Some(&config_key) = assembler_lookup.get(&dsid)
106 {
107 groups.insert(config_key);
108 }
109 }
110 }
111
112 for &config_key in &groups {
113 let config = assembler_config_lookup
114 .get(&config_key)
115 .copied()
116 .expect("assembler config must exist");
117
118 if topology_plan.claims_directory_assembly(config, file_indices, files) {
119 continue;
120 }
121
122 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
123 let results = special_merger.run(files, file_indices);
124 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
125 continue;
126 }
127
128 match config.mode {
129 AssemblyMode::SiblingMerge => {
130 let results = sibling_merge::assemble_siblings(config, files, file_indices)
131 .into_iter()
132 .collect();
133 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
134 }
135 AssemblyMode::OnePerPackageData => {
136 let results = assemble_one_per_package_data(config, files, file_indices)
137 .into_iter()
138 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
139 .collect();
140 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
141 }
142 }
143 }
144 }
145
146 topology_plan.apply_directory_scoped_domains(files, &mut packages, &mut dependencies);
147
148 for config in ASSEMBLERS {
149 if config.mode != AssemblyMode::SiblingMerge {
150 continue;
151 }
152 if let Some((pkg, deps, affected_indices)) =
153 nested_merge::assemble_nested_patterns(files, config)
154 {
155 let package_uid = pkg.package_uid.clone();
156 let purl = pkg.purl.clone();
157 let removed_package_uids: Vec<String> = packages
158 .iter()
159 .filter(|p| p.purl == purl)
160 .map(|p| p.package_uid.clone())
161 .collect();
162
163 packages.retain(|p| p.purl != purl);
164 dependencies.retain(|d| {
165 d.for_package_uid.as_ref() != Some(&package_uid)
166 && !removed_package_uids
167 .iter()
168 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
169 });
170
171 for idx in &affected_indices {
172 files[*idx].for_packages.clear();
173 files[*idx].for_packages.push(package_uid.clone());
174 }
175
176 packages.push(pkg);
177 dependencies.extend(deps);
178 }
179 }
180
181 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies, &topology_plan);
182 hoist_unassembled_file_dependencies(files, &mut dependencies);
183
184 for package in &mut packages {
185 package.datafile_paths.sort();
186 package.datafile_paths.dedup();
187 package.datasource_ids.sort_by_key(|left| left.to_string());
188 package.datasource_ids.dedup();
189 }
190
191 for file in files.iter_mut() {
192 file.for_packages
193 .sort_by(|left, right| stable_uid_key(left).cmp(stable_uid_key(right)));
194 file.for_packages.dedup();
195 }
196
197 packages
198 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
199 dependencies.sort_by(|left, right| {
200 left.purl
201 .as_deref()
202 .cmp(&right.purl.as_deref())
203 .then_with(|| {
204 left.extracted_requirement
205 .as_deref()
206 .cmp(&right.extracted_requirement.as_deref())
207 })
208 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
209 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
210 .then_with(|| {
211 left.datasource_id
212 .to_string()
213 .cmp(&right.datasource_id.to_string())
214 })
215 .then_with(|| {
216 left.for_package_uid
217 .as_deref()
218 .map(stable_uid_key)
219 .cmp(&right.for_package_uid.as_deref().map(stable_uid_key))
220 })
221 });
222
223 AssemblyResult {
224 packages,
225 dependencies,
226 }
227}
228
229fn apply_directory_merge_results(
230 files: &mut [FileInfo],
231 packages: &mut Vec<Package>,
232 dependencies: &mut Vec<TopLevelDependency>,
233 results: Vec<DirectoryMergeOutput>,
234) {
235 for (package, deps, affected_indices) in results {
236 if let Some(package) = package {
237 let package_uid = package.package_uid.clone();
238 for idx in &affected_indices {
239 if !files[*idx].for_packages.contains(&package_uid) {
240 files[*idx].for_packages.push(package_uid.clone());
241 }
242 }
243 packages.push(package);
244 }
245 dependencies.extend(deps);
246 }
247}
248
249fn hoist_unassembled_file_dependencies(
250 files: &[FileInfo],
251 dependencies: &mut Vec<TopLevelDependency>,
252) {
253 for file in files {
254 if !file.for_packages.is_empty() {
255 continue;
256 }
257
258 for pkg_data in &file.package_data {
259 let Some(datasource_id) = pkg_data.datasource_id else {
260 continue;
261 };
262
263 if !should_hoist_unassembled_dependencies(datasource_id) {
264 continue;
265 }
266
267 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
268 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
269 }));
270 }
271 }
272}
273
274fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
275 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
276 return false;
277 }
278
279 !matches!(
280 datasource_id,
281 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
282 )
283}
284
285fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
286 (
287 package.purl.as_deref(),
288 package.name.as_deref(),
289 package.version.as_deref(),
290 package
291 .datafile_paths
292 .first()
293 .map(String::as_str)
294 .unwrap_or(""),
295 )
296}
297
298fn stable_uid_key(uid: &str) -> &str {
299 uid.split_once("?uuid=")
300 .map(|(prefix, _)| prefix)
301 .or_else(|| uid.split_once("&uuid=").map(|(prefix, _)| prefix))
302 .unwrap_or(uid)
303}
304
305fn assemble_one_per_package_data(
306 config: &AssemblerConfig,
307 files: &[FileInfo],
308 file_indices: &[usize],
309) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
310 let mut results = Vec::new();
311
312 for &idx in file_indices {
313 let file = &files[idx];
314 for pkg_data in &file.package_data {
315 let dsid_matches = pkg_data
316 .datasource_id
317 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
318
319 if !dsid_matches || pkg_data.purl.is_none() {
320 continue;
321 }
322
323 let datafile_path = file.path.clone();
324 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
325 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
326 let for_package_uid = Some(pkg.package_uid.clone());
327
328 let deps: Vec<TopLevelDependency> = pkg_data
329 .dependencies
330 .iter()
331 .filter(|dep| dep.purl.is_some())
332 .map(|dep| {
333 TopLevelDependency::from_dependency(
334 dep,
335 datafile_path.clone(),
336 datasource_id,
337 for_package_uid.clone(),
338 )
339 })
340 .collect();
341
342 results.push((pkg, deps, idx));
343 }
344 }
345
346 results
347}
348
349fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
351 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
352 for (idx, file) in files.iter().enumerate() {
353 if let Some(parent) = std::path::Path::new(&file.path).parent() {
354 groups.entry(parent.to_path_buf()).or_default().push(idx);
355 }
356 }
357 groups
358}