1mod assemblers;
2#[cfg(test)]
3mod assembly_golden_test;
4#[cfg(test)]
5mod assembly_test;
6mod cargo_resource_assign;
7mod cargo_workspace_merge;
8mod composer_resource_assign;
9mod conda_rootfs_merge;
10pub mod file_ref_resolve;
11mod hackage_merge;
12mod nested_merge;
13mod npm_resource_assign;
14mod npm_workspace_merge;
15mod nuget_cpm_resolve;
16mod python_requirements_assign;
17mod ruby_resource_assign;
18mod sibling_merge;
19mod swift_merge;
20
21use std::collections::{HashMap, HashSet};
22use std::path::PathBuf;
23use std::sync::LazyLock;
24
25use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
26
27pub use assemblers::ASSEMBLERS;
28
29type DirectoryMergeOutput = (Option<Package>, Vec<TopLevelDependency>, Vec<usize>);
30
31static ASSEMBLER_LOOKUP: LazyLock<HashMap<DatasourceId, DatasourceId>> = LazyLock::new(|| {
34 let mut lookup = HashMap::new();
35 for config in ASSEMBLERS {
36 let key = *config
37 .datasource_ids
38 .first()
39 .expect("assembler must have at least one datasource_id");
40 for &dsid in config.datasource_ids {
41 lookup.insert(dsid, key);
42 }
43 }
44 lookup
45});
46
47static ASSEMBLER_CONFIG_LOOKUP: LazyLock<HashMap<DatasourceId, &'static AssemblerConfig>> =
48 LazyLock::new(|| {
49 let mut lookup = HashMap::new();
50 for config in ASSEMBLERS {
51 let key = *config
52 .datasource_ids
53 .first()
54 .expect("assembler must have at least one datasource_id");
55 lookup.insert(key, config);
56 }
57 lookup
58 });
59
60#[derive(serde::Serialize)]
63pub struct AssemblyResult {
64 pub packages: Vec<Package>,
65 pub dependencies: Vec<TopLevelDependency>,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum AssemblyMode {
71 SiblingMerge,
73 OnePerPackageData,
76}
77
78pub struct AssemblerConfig {
79 pub datasource_ids: &'static [DatasourceId],
80 pub sibling_file_patterns: &'static [&'static str],
81 pub mode: AssemblyMode,
82}
83
84pub fn assemble(files: &mut [FileInfo]) -> AssemblyResult {
90 let assembler_lookup = &*ASSEMBLER_LOOKUP;
91 let assembler_config_lookup = &*ASSEMBLER_CONFIG_LOOKUP;
92 let mut packages = Vec::new();
93 let mut dependencies = Vec::new();
94
95 let dir_files = group_files_by_directory(files);
96
97 for file_indices in dir_files.values() {
98 let mut groups: HashSet<DatasourceId> = HashSet::new();
99
100 for &idx in file_indices {
101 for pkg_data in &files[idx].package_data {
102 if let Some(dsid) = pkg_data.datasource_id
103 && let Some(&config_key) = assembler_lookup.get(&dsid)
104 {
105 groups.insert(config_key);
106 }
107 }
108 }
109
110 for &config_key in &groups {
111 let config = assembler_config_lookup
112 .get(&config_key)
113 .copied()
114 .expect("assembler config must exist");
115
116 if let Some(special_merger) = assemblers::special_directory_merger_for(config_key) {
117 let results = special_merger.run(files, file_indices);
118 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
119 continue;
120 }
121
122 match config.mode {
123 AssemblyMode::SiblingMerge => {
124 let results = sibling_merge::assemble_siblings(config, files, file_indices)
125 .into_iter()
126 .collect();
127 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
128 }
129 AssemblyMode::OnePerPackageData => {
130 let results = assemble_one_per_package_data(config, files, file_indices)
131 .into_iter()
132 .map(|(pkg, deps, affected_idx)| (Some(pkg), deps, vec![affected_idx]))
133 .collect();
134 apply_directory_merge_results(files, &mut packages, &mut dependencies, results);
135 }
136 }
137 }
138 }
139
140 for config in ASSEMBLERS {
141 if config.mode != AssemblyMode::SiblingMerge {
142 continue;
143 }
144 if let Some((pkg, deps, affected_indices)) =
145 nested_merge::assemble_nested_patterns(files, config)
146 {
147 let package_uid = pkg.package_uid.clone();
148 let purl = pkg.purl.clone();
149 let removed_package_uids: Vec<String> = packages
150 .iter()
151 .filter(|p| p.purl == purl)
152 .map(|p| p.package_uid.clone())
153 .collect();
154
155 packages.retain(|p| p.purl != purl);
156 dependencies.retain(|d| {
157 d.for_package_uid.as_ref() != Some(&package_uid)
158 && !removed_package_uids
159 .iter()
160 .any(|old_uid| d.for_package_uid.as_ref() == Some(old_uid))
161 });
162
163 for idx in &affected_indices {
164 files[*idx].for_packages.clear();
165 files[*idx].for_packages.push(package_uid.clone());
166 }
167
168 packages.push(pkg);
169 dependencies.extend(deps);
170 }
171 }
172
173 assemblers::run_post_assembly_passes(files, &mut packages, &mut dependencies);
174 hoist_unassembled_file_dependencies(files, &mut dependencies);
175
176 for package in &mut packages {
177 package.datafile_paths.sort();
178 package.datafile_paths.dedup();
179 package.datasource_ids.sort_by_key(|left| left.to_string());
180 package.datasource_ids.dedup();
181 }
182
183 for file in files.iter_mut() {
184 file.for_packages
185 .sort_by(|left, right| stable_uid_key(left).cmp(stable_uid_key(right)));
186 file.for_packages.dedup();
187 }
188
189 packages
190 .sort_by(|left, right| stable_package_sort_key(left).cmp(&stable_package_sort_key(right)));
191 dependencies.sort_by(|left, right| {
192 left.purl
193 .as_deref()
194 .cmp(&right.purl.as_deref())
195 .then_with(|| {
196 left.extracted_requirement
197 .as_deref()
198 .cmp(&right.extracted_requirement.as_deref())
199 })
200 .then_with(|| left.scope.as_deref().cmp(&right.scope.as_deref()))
201 .then_with(|| left.datafile_path.cmp(&right.datafile_path))
202 .then_with(|| {
203 left.datasource_id
204 .to_string()
205 .cmp(&right.datasource_id.to_string())
206 })
207 .then_with(|| {
208 left.for_package_uid
209 .as_deref()
210 .map(stable_uid_key)
211 .cmp(&right.for_package_uid.as_deref().map(stable_uid_key))
212 })
213 });
214
215 AssemblyResult {
216 packages,
217 dependencies,
218 }
219}
220
221fn apply_directory_merge_results(
222 files: &mut [FileInfo],
223 packages: &mut Vec<Package>,
224 dependencies: &mut Vec<TopLevelDependency>,
225 results: Vec<DirectoryMergeOutput>,
226) {
227 for (package, deps, affected_indices) in results {
228 if let Some(package) = package {
229 let package_uid = package.package_uid.clone();
230 for idx in &affected_indices {
231 if !files[*idx].for_packages.contains(&package_uid) {
232 files[*idx].for_packages.push(package_uid.clone());
233 }
234 }
235 packages.push(package);
236 }
237 dependencies.extend(deps);
238 }
239}
240
241fn hoist_unassembled_file_dependencies(
242 files: &[FileInfo],
243 dependencies: &mut Vec<TopLevelDependency>,
244) {
245 for file in files {
246 if !file.for_packages.is_empty() {
247 continue;
248 }
249
250 for pkg_data in &file.package_data {
251 let Some(datasource_id) = pkg_data.datasource_id else {
252 continue;
253 };
254
255 if !should_hoist_unassembled_dependencies(datasource_id) {
256 continue;
257 }
258
259 dependencies.extend(pkg_data.dependencies.iter().map(|dep| {
260 TopLevelDependency::from_dependency(dep, file.path.clone(), datasource_id, None)
261 }));
262 }
263 }
264}
265
266fn should_hoist_unassembled_dependencies(datasource_id: DatasourceId) -> bool {
267 if !assemblers::UNASSEMBLED_DATASOURCE_IDS.contains(&datasource_id) {
268 return false;
269 }
270
271 !matches!(
272 datasource_id,
273 DatasourceId::NugetDirectoryBuildProps | DatasourceId::NugetDirectoryPackagesProps
274 )
275}
276
277fn stable_package_sort_key(package: &Package) -> (Option<&str>, Option<&str>, Option<&str>, &str) {
278 (
279 package.purl.as_deref(),
280 package.name.as_deref(),
281 package.version.as_deref(),
282 package
283 .datafile_paths
284 .first()
285 .map(String::as_str)
286 .unwrap_or(""),
287 )
288}
289
290fn stable_uid_key(uid: &str) -> &str {
291 uid.split_once("?uuid=")
292 .map(|(prefix, _)| prefix)
293 .or_else(|| uid.split_once("&uuid=").map(|(prefix, _)| prefix))
294 .unwrap_or(uid)
295}
296
297fn assemble_one_per_package_data(
298 config: &AssemblerConfig,
299 files: &[FileInfo],
300 file_indices: &[usize],
301) -> Vec<(Package, Vec<TopLevelDependency>, usize)> {
302 let mut results = Vec::new();
303
304 for &idx in file_indices {
305 let file = &files[idx];
306 for pkg_data in &file.package_data {
307 let dsid_matches = pkg_data
308 .datasource_id
309 .is_some_and(|dsid| config.datasource_ids.contains(&dsid));
310
311 if !dsid_matches || pkg_data.purl.is_none() {
312 continue;
313 }
314
315 let datafile_path = file.path.clone();
316 let datasource_id = pkg_data.datasource_id.expect("datasource_id must be Some");
317 let pkg = Package::from_package_data(pkg_data, datafile_path.clone());
318 let for_package_uid = Some(pkg.package_uid.clone());
319
320 let deps: Vec<TopLevelDependency> = pkg_data
321 .dependencies
322 .iter()
323 .filter(|dep| dep.purl.is_some())
324 .map(|dep| {
325 TopLevelDependency::from_dependency(
326 dep,
327 datafile_path.clone(),
328 datasource_id,
329 for_package_uid.clone(),
330 )
331 })
332 .collect();
333
334 results.push((pkg, deps, idx));
335 }
336 }
337
338 results
339}
340
341fn group_files_by_directory(files: &[FileInfo]) -> HashMap<PathBuf, Vec<usize>> {
343 let mut groups: HashMap<PathBuf, Vec<usize>> = HashMap::new();
344 for (idx, file) in files.iter().enumerate() {
345 if let Some(parent) = std::path::Path::new(&file.path).parent() {
346 groups.entry(parent.to_path_buf()).or_default().push(idx);
347 }
348 }
349 groups
350}