1mod assemblers;
2#[cfg(test)]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod cargo_resource_assign;
7mod cargo_workspace_merge;
8mod composer_resource_assign;
9mod conda_rootfs_merge;
10pub mod file_ref_resolve;
11mod hackage_merge;
12mod nested_merge;
13mod npm_resource_assign;
14mod npm_workspace_merge;
15mod nuget_cpm_resolve;
16mod ruby_resource_assign;
17mod sibling_merge;
18mod swift_merge;
19
20use std::collections::{HashMap, HashSet};
21use std::path::PathBuf;
22use std::sync::LazyLock;
23
24use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
25
26pub use assemblers::ASSEMBLERS;
27
28type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
29
30static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
33 let mut lookup = HashMap::new();
34 for config in ASSEMBLERS {
35 let key = *config
36 .datasource_ids
37 .first()
38 .expect("assembler must have at least one datasource_id");
39 for &dsid in config.datasource_ids {
40 lookup.insert(dsid, key);
41 }
42 }
43 lookup
44});
45
46static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
47 LazyLock::new(|| {
48 let mut lookup = HashMap::new();
49 for config in ASSEMBLERS {
50 let key = *config
51 .datasource_ids
52 .first()
53 .expect("assembler must have at least one datasource_id");
54 lookup.insert(key, config);
55 }
56 lookup
57 });
58
59#[derive(serde::Serialize)]
62pub struct AssemblyResult {
63 pub packages: Vec<Package>,
64 pub dependencies: Vec<TopLevelDependency>,
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum AssemblyMode {
70 SiblingMerge,
72 OnePerPackageData,
75}
76
77pub struct AssemblerConfig {
78 pub datasource_ids: &'static [DatasourceId],
79 pub sibling_file_patterns: &'static [&'static str],
80 pub mode: AssemblyMode,
81}
82
83pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
89 let assembler_lookup = &*ASSEMBLER_LOOKUP;
90 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
91 let mut packages = Vec::new();
92 let mut dependencies = Vec::new();
93
94 let dir_files = group_files_by_directory(files);
95
96 for file_indices in dir_files.values() {
97 let mut groups: HashSet<DatasourceId> = HashSet::new();
98
99 for &idx in file_indices {
100 for pkg_data in &files[idx].package_data {
101 if let Some(dsid) = pkg_data.datasource_id
102 && let Some(&config_key) = assembler_lookup.get(&dsid)
103 {
104 groups.insert(config_key);
105 }
106 }
107 }
108
109 for &config_key in &groups {
110 let config = assembler_config_lookup
111 .get(&config_key)
112 .copied()
113 .expect("assembler config must exist");
114
115 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
116 let results = special_merger.run(files, file_indices);
117 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
118 continue;
119 }
120
121 match config.mode {
122 AssemblyMode::SiblingMerge => {
123 let results = sibling_merge::assemble_siblings(config, files, file_indices)
124 .into_iter()
125 .collect();
126 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
127 }
128 AssemblyMode::OnePerPackageData => {
129 let results = assemble_one_per_package_data(config, files, file_indices)
130 .into_iter()
131 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
132 .collect();
133 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
134 }
135 }
136 }
137 }
138
139 for config in ASSEMBLERS {
140 if config.mode != AssemblyMode::SiblingMerge {
141 continue;
142 }
143 if let Some((pkg, deps, affected_indices)) =
144 nested_merge::assemble_nested_patterns(files, config)
145 {
146 let package_uid = pkg.package_uid.clone();
147 let purl = pkg.purl.clone();
148 let removed_package_uids: Vec<String> = packages
149 .iter()
150 .filter(|p| p.purl == purl)
151 .map(|p| p.package_uid.clone())
152 .collect();
153
154 packages.retain(|p| p.purl != purl);
155 dependencies.retain(|d| {
156 d.for_package_uid.as_ref() != Some(&package_uid)
157 && !removed_package_uids
158 .iter()
159 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
160 });
161
162 for idx in &affected_indices {
163 files[*idx].for_packages.clear();
164 files[*idx].for_packages.push(package_uid.clone());
165 }
166
167 packages.push(pkg);
168 dependencies.extend(deps);
169 }
170 }
171
172 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies);
173 hoist_unassembled_file_dependencies(files, &mut dependencies);
174
175 for package in &mut packages {
176 package.datafile_paths.sort();
177 package.datafile_paths.dedup();
178 package.datasource_ids.sort_by_key(|left| left.to_string());
179 package.datasource_ids.dedup();
180 }
181
182 for file in files.iter_mut() {
183 file.for_packages
184 .sort_by(|left, right| stable_uid_key(left).cmp(stable_uid_key(right)));
185 file.for_packages.dedup();
186 }
187
188 packages
189 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
190 dependencies.sort_by(|left, right| {
191 left.purl
192 .as_deref()
193 .cmp(&right.purl.as_deref())
194 .then_with(|| {
195 left.extracted_requirement
196 .as_deref()
197 .cmp(&right.extracted_requirement.as_deref())
198 })
199 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
200 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
201 .then_with(|| {
202 left.datasource_id
203 .to_string()
204 .cmp(&right.datasource_id.to_string())
205 })
206 .then_with(|| {
207 left.for_package_uid
208 .as_deref()
209 .map(stable_uid_key)
210 .cmp(&right.for_package_uid.as_deref().map(stable_uid_key))
211 })
212 });
213
214 AssemblyResult {
215 packages,
216 dependencies,
217 }
218}
219
220fn apply_directory_merge_results(
221 files: &mut [FileInfo],
222 packages: &mut Vec<Package>,
223 dependencies: &mut Vec<TopLevelDependency>,
224 results: Vec<DirectoryMergeOutput>,
225) {
226 for (package, deps, affected_indices) in results {
227 if let Some(package) = package {
228 let package_uid = package.package_uid.clone();
229 for idx in &affected_indices {
230 if !files[*idx].for_packages.contains(&package_uid) {
231 files[*idx].for_packages.push(package_uid.clone());
232 }
233 }
234 packages.push(package);
235 }
236 dependencies.extend(deps);
237 }
238}
239
240fn hoist_unassembled_file_dependencies(
241 files: &[FileInfo],
242 dependencies: &mut Vec<TopLevelDependency>,
243) {
244 for file in files {
245 if !file.for_packages.is_empty() {
246 continue;
247 }
248
249 for pkg_data in &file.package_data {
250 let Some(datasource_id) = pkg_data.datasource_id else {
251 continue;
252 };
253
254 if !should_hoist_unassembled_dependencies(datasource_id) {
255 continue;
256 }
257
258 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
259 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
260 }));
261 }
262 }
263}
264
265fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
266 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
267 return false;
268 }
269
270 !matches!(
271 datasource_id,
272 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
273 )
274}
275
276fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
277 (
278 package.purl.as_deref(),
279 package.name.as_deref(),
280 package.version.as_deref(),
281 package
282 .datafile_paths
283 .first()
284 .map(String::as_str)
285 .unwrap_or(""),
286 )
287}
288
289fn stable_uid_key(uid: &str) -> &str {
290 uid.split_once("?uuid=")
291 .map(|(prefix, _)| prefix)
292 .or_else(|| uid.split_once("&uuid=").map(|(prefix, _)| prefix))
293 .unwrap_or(uid)
294}
295
296fn assemble_one_per_package_data(
297 config: &AssemblerConfig,
298 files: &[FileInfo],
299 file_indices: &[usize],
300) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
301 let mut results = Vec::new();
302
303 for &idx in file_indices {
304 let file = &files[idx];
305 for pkg_data in &file.package_data {
306 let dsid_matches = pkg_data
307 .datasource_id
308 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
309
310 if !dsid_matches || pkg_data.purl.is_none() {
311 continue;
312 }
313
314 let datafile_path = file.path.clone();
315 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
316 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
317 let for_package_uid = Some(pkg.package_uid.clone());
318
319 let deps: Vec<TopLevelDependency> = pkg_data
320 .dependencies
321 .iter()
322 .filter(|dep| dep.purl.is_some())
323 .map(|dep| {
324 TopLevelDependency::from_dependency(
325 dep,
326 datafile_path.clone(),
327 datasource_id,
328 for_package_uid.clone(),
329 )
330 })
331 .collect();
332
333 results.push((pkg, deps, idx));
334 }
335 }
336
337 results
338}
339
340fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
342 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
343 for (idx, file) in files.iter().enumerate() {
344 if let Some(parent) = std::path::Path::new(&file.path).parent() {
345 groups.entry(parent.to_path_buf()).or_default().push(idx);
346 }
347 }
348 groups
349}