1use std::collections::{HashMap, HashSet};
5use std::path::Path;
6use std::str::FromStr;
7
8use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
9use packageurl::PackageUrl;
10use strum::EnumIter;
11
12struct DbPathConfig {
13 datasource_ids: &'static [DatasourceId],
14 path_suffix: &'static str,
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
18enum FileReferenceResolverKind {
19 About,
20 AttachedManifest,
21 CondaMeta,
22 DebianExtractedDeb,
23 InstalledDb,
24 PythonMetadata,
25 RelativeToDatafileParent,
26}
27
28struct FileReferenceResolverConfig {
29 datasource_ids: &'static [DatasourceId],
30 kind: FileReferenceResolverKind,
31}
32
33const DB_PATH_CONFIGS: &[DbPathConfig] = &[
34 DbPathConfig {
35 datasource_ids: &[DatasourceId::AlpineInstalledDb],
36 path_suffix: "lib/apk/db/installed",
37 },
38 DbPathConfig {
39 datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
40 path_suffix: "var/lib/rpm/Packages",
41 },
42 DbPathConfig {
43 datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
44 path_suffix: "usr/lib/sysimage/rpm/Packages",
45 },
46 DbPathConfig {
47 datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
48 path_suffix: "var/lib/rpm/Packages.db",
49 },
50 DbPathConfig {
51 datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
52 path_suffix: "usr/lib/sysimage/rpm/Packages.db",
53 },
54 DbPathConfig {
55 datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
56 path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
57 },
58 DbPathConfig {
59 datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
60 path_suffix: "var/lib/rpm/rpmdb.sqlite",
61 },
62 DbPathConfig {
63 datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
64 path_suffix: "var/lib/dpkg/status",
65 },
66 DbPathConfig {
67 datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
68 path_suffix: "var/lib/dpkg/status.d/",
69 },
70];
71
72const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
73 DatasourceId::RpmInstalledDatabaseBdb,
74 DatasourceId::RpmInstalledDatabaseNdb,
75 DatasourceId::RpmInstalledDatabaseSqlite,
76];
77const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
78const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
79const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
80 DatasourceId::PypiWheelMetadata,
81 DatasourceId::PypiSdistPkginfo,
82 DatasourceId::PypiEggPkginfo,
83 DatasourceId::PypiEditableEggPkginfo,
84];
85const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
86const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
87 DatasourceId::DebianInstalledFilesList,
88 DatasourceId::DebianInstalledMd5Sums,
89];
90
91const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
92 DatasourceId::AlpineInstalledDb,
93 DatasourceId::RpmInstalledDatabaseBdb,
94 DatasourceId::RpmInstalledDatabaseNdb,
95 DatasourceId::RpmInstalledDatabaseSqlite,
96 DatasourceId::DebianInstalledStatusDb,
97 DatasourceId::DebianDistrolessInstalledDb,
98];
99
100const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
101 FileReferenceResolverConfig {
102 datasource_ids: &[DatasourceId::AboutFile],
103 kind: FileReferenceResolverKind::About,
104 },
105 FileReferenceResolverConfig {
106 datasource_ids: &[DatasourceId::CpanManifest],
107 kind: FileReferenceResolverKind::AttachedManifest,
108 },
109 FileReferenceResolverConfig {
110 datasource_ids: &[DatasourceId::CondaMetaJson],
111 kind: FileReferenceResolverKind::CondaMeta,
112 },
113 FileReferenceResolverConfig {
114 datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
115 kind: FileReferenceResolverKind::DebianExtractedDeb,
116 },
117 FileReferenceResolverConfig {
118 datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
119 kind: FileReferenceResolverKind::InstalledDb,
120 },
121 FileReferenceResolverConfig {
122 datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
123 kind: FileReferenceResolverKind::PythonMetadata,
124 },
125 FileReferenceResolverConfig {
126 datasource_ids: &[DatasourceId::GradleModule],
127 kind: FileReferenceResolverKind::RelativeToDatafileParent,
128 },
129 FileReferenceResolverConfig {
130 datasource_ids: &[
131 DatasourceId::BitbakeRecipe,
132 DatasourceId::BitbakeRecipeAppend,
133 ],
134 kind: FileReferenceResolverKind::RelativeToDatafileParent,
135 },
136];
137
138struct PythonMetadataResolution {
139 base_path: String,
140 allowed_root: String,
141}
142
143pub fn resolve_file_references(
144 files: &mut [FileInfo],
145 packages: &mut [Package],
146 dependencies: &mut [TopLevelDependency],
147) {
148 if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
149 return;
150 }
151
152 let path_index = build_path_index(&*files);
153
154 for package in packages.iter_mut() {
155 let Some(config) = find_file_reference_resolver(files, package) else {
156 continue;
157 };
158
159 match config.kind {
160 FileReferenceResolverKind::About
161 | FileReferenceResolverKind::RelativeToDatafileParent => {
162 resolve_relative_to_datafile_parent(
163 files,
164 &path_index,
165 package,
166 config.datasource_ids,
167 );
168 }
169 FileReferenceResolverKind::AttachedManifest => {
170 resolve_attached_manifest_file_references(
171 files,
172 &path_index,
173 package,
174 config.datasource_ids[0],
175 );
176 }
177 FileReferenceResolverKind::CondaMeta => {
178 resolve_conda_file_references(files, &path_index, package);
179 }
180 FileReferenceResolverKind::DebianExtractedDeb => {
181 resolve_debian_extracted_deb_file_references(files, &path_index, package)
182 }
183 FileReferenceResolverKind::InstalledDb => {
184 resolve_installed_db_file_references(files, &path_index, package, dependencies);
185 }
186 FileReferenceResolverKind::PythonMetadata => {
187 resolve_python_metadata_file_references(files, &path_index, package);
188 }
189 }
190 }
191}
192
193pub(super) fn has_relevant_file_reference_datasource_ids(
194 file_datasource_ids: &HashSet<DatasourceId>,
195) -> bool {
196 FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
197 config
198 .datasource_ids
199 .iter()
200 .any(|datasource_id| file_datasource_ids.contains(datasource_id))
201 })
202}
203
204fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
205 let file_datasource_ids: HashSet<DatasourceId> = files
206 .iter()
207 .flat_map(|file| {
208 file.package_data
209 .iter()
210 .filter_map(|package_data| package_data.datasource_id)
211 })
212 .collect();
213
214 has_relevant_file_reference_datasource_ids(&file_datasource_ids)
215}
216
217fn resolve_relative_to_datafile_parent(
218 files: &mut [FileInfo],
219 path_index: &HashMap<String, usize>,
220 package: &mut Package,
221 datasource_ids: &[DatasourceId],
222) {
223 let mut missing_refs = Vec::new();
224 for datafile_path in &package.datafile_paths {
225 let root = Path::new(datafile_path)
226 .parent()
227 .map(|p| p.to_string_lossy().to_string())
228 .unwrap_or_default();
229
230 let file_references = collect_file_references(
231 files,
232 path_index,
233 datafile_path,
234 &package.datasource_ids,
235 datasource_ids,
236 package.purl.as_deref(),
237 );
238
239 for file_ref in &file_references {
240 let resolved_path = if root.is_empty() {
241 file_ref.path.clone()
242 } else {
243 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
244 };
245 if let Some(&file_idx) = path_index.get(&resolved_path) {
246 let package_uid = package.package_uid.clone();
247 if !files[file_idx].for_packages.contains(&package_uid) {
248 files[file_idx].for_packages.push(package_uid);
249 }
250 } else {
251 missing_refs.push(file_ref.path.clone());
252 }
253 }
254 }
255
256 record_missing_file_references(package, missing_refs);
257}
258
259fn resolve_attached_manifest_file_references(
260 files: &mut [FileInfo],
261 path_index: &HashMap<String, usize>,
262 package: &mut Package,
263 datasource_id: DatasourceId,
264) {
265 let Some((datafile_path, file_references)) =
266 find_attached_manifest_file_references(files, package, datasource_id)
267 else {
268 return;
269 };
270
271 let root = Path::new(datafile_path)
272 .parent()
273 .map(|p| p.to_string_lossy().to_string())
274 .unwrap_or_default();
275
276 let mut missing_refs = Vec::new();
277 for file_ref in &file_references {
278 let resolved_path = if root.is_empty() {
279 file_ref.path.clone()
280 } else {
281 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
282 };
283
284 if let Some(&file_idx) = path_index.get(&resolved_path) {
285 let package_uid = package.package_uid.clone();
286 if !files[file_idx].for_packages.contains(&package_uid) {
287 files[file_idx].for_packages.push(package_uid);
288 }
289 } else {
290 missing_refs.push(file_ref.path.clone());
291 }
292 }
293
294 record_missing_file_references(package, missing_refs);
295}
296
297fn resolve_conda_file_references(
298 files: &mut [FileInfo],
299 path_index: &HashMap<String, usize>,
300 package: &mut Package,
301) {
302 let Some(conda_meta_path) = package
303 .datafile_paths
304 .iter()
305 .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
306 else {
307 return;
308 };
309 let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
310 return;
311 };
312
313 let file_references = collect_file_references(
314 files,
315 path_index,
316 conda_meta_path,
317 &package.datasource_ids,
318 &[DatasourceId::CondaMetaJson],
319 package.purl.as_deref(),
320 );
321
322 let mut missing_refs = Vec::new();
323 for file_ref in &file_references {
324 let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
325 if let Some(&file_idx) = path_index.get(&resolved_path) {
326 let package_uid = package.package_uid.clone();
327 if !files[file_idx].for_packages.contains(&package_uid) {
328 files[file_idx].for_packages.push(package_uid);
329 }
330 } else {
331 missing_refs.push(file_ref.path.clone());
332 }
333 }
334
335 record_missing_file_references(package, missing_refs);
336}
337
338fn resolve_installed_db_file_references(
339 files: &mut [FileInfo],
340 path_index: &HashMap<String, usize>,
341 package: &mut Package,
342 dependencies: &mut [TopLevelDependency],
343) {
344 let Some(config) = find_db_config(package) else {
345 return;
346 };
347 let Some(datafile_path) = package.datafile_paths.first() else {
348 return;
349 };
350
351 let root = compute_root(datafile_path, config.path_suffix);
352
353 let mut file_references = collect_file_references(
354 files,
355 path_index,
356 datafile_path,
357 &package.datasource_ids,
358 config.datasource_ids,
359 package.purl.as_deref(),
360 );
361
362 if is_debian_installed_package(package) {
363 merge_file_references(
364 &mut file_references,
365 collect_debian_installed_file_references(files, package),
366 );
367 }
368
369 let mut missing_refs = Vec::new();
370 for file_ref in &file_references {
371 let ref_path = file_ref.path.trim_start_matches('/');
372 let resolved_path = if root.is_empty() {
373 ref_path.to_string()
374 } else {
375 format!("{}{}", root, ref_path)
376 };
377
378 if let Some(&file_idx) = path_index.get(&resolved_path) {
379 let package_uid = package.package_uid.clone();
380 if !files[file_idx].for_packages.contains(&package_uid) {
381 files[file_idx].for_packages.push(package_uid);
382 }
383 } else {
384 missing_refs.push(file_ref.path.clone());
385 }
386 }
387
388 record_missing_file_references(package, missing_refs);
389
390 if is_rpm_package(package)
391 && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
392 {
393 apply_rpm_namespace(files, package, dependencies, &namespace);
394 }
395}
396
397fn resolve_debian_extracted_deb_file_references(
398 files: &mut [FileInfo],
399 path_index: &HashMap<String, usize>,
400 package: &mut Package,
401) {
402 let Some(datafile_path) = package
403 .datafile_paths
404 .iter()
405 .find(|path| path.ends_with("/md5sums"))
406 else {
407 return;
408 };
409
410 let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
411 return;
412 };
413 let Some(extracted_root) = md5sums_parent.parent() else {
414 return;
415 };
416 let root = extracted_root.to_string_lossy().to_string();
417
418 let Some(&file_idx) = path_index.get(datafile_path) else {
419 return;
420 };
421 let file_references: Vec<_> = files[file_idx]
422 .package_data
423 .iter()
424 .filter(|pkg_data| {
425 pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
426 })
427 .flat_map(|pkg_data| pkg_data.file_references.clone())
428 .collect();
429
430 let mut missing_refs = Vec::new();
431 for file_ref in &file_references {
432 let resolved_path = if root.is_empty() {
433 file_ref.path.trim_start_matches('/').to_string()
434 } else {
435 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
436 };
437
438 if let Some(&file_idx) = path_index.get(&resolved_path) {
439 let package_uid = package.package_uid.clone();
440 if !files[file_idx].for_packages.contains(&package_uid) {
441 files[file_idx].for_packages.push(package_uid);
442 }
443 } else {
444 missing_refs.push(file_ref.path.clone());
445 }
446 }
447
448 record_missing_file_references(package, missing_refs);
449}
450
451fn resolve_python_metadata_file_references(
452 files: &mut [FileInfo],
453 path_index: &HashMap<String, usize>,
454 package: &mut Package,
455) {
456 let Some(python_resolution) = find_python_metadata_root(package) else {
457 return;
458 };
459 let Some(datafile_path) = package
460 .datafile_paths
461 .iter()
462 .find(|path| is_python_metadata_layout(path))
463 else {
464 return;
465 };
466
467 let file_references = collect_file_references(
468 files,
469 path_index,
470 datafile_path,
471 &package.datasource_ids,
472 PYTHON_METADATA_DATASOURCE_IDS,
473 package.purl.as_deref(),
474 );
475
476 let mut missing_refs = Vec::new();
477 for file_ref in &file_references {
478 let Some(resolved_path) = normalize_relative_path(
479 &python_resolution.base_path,
480 &python_resolution.allowed_root,
481 &file_ref.path,
482 ) else {
483 missing_refs.push(file_ref.path.clone());
484 continue;
485 };
486
487 if let Some(&file_idx) = path_index.get(&resolved_path) {
488 let package_uid = package.package_uid.clone();
489 if !files[file_idx].for_packages.contains(&package_uid) {
490 files[file_idx].for_packages.push(package_uid);
491 }
492 } else {
493 missing_refs.push(file_ref.path.clone());
494 }
495 }
496
497 record_missing_file_references(package, missing_refs);
498}
499
500fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
501 if missing_refs.is_empty() {
502 return;
503 }
504
505 missing_refs.sort();
506 let missing_refs_json: Vec<serde_json::Value> = missing_refs
507 .into_iter()
508 .map(|path| serde_json::json!({"path": path}))
509 .collect();
510
511 let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
512 extra_data.insert(
513 "missing_file_references".to_string(),
514 serde_json::Value::Array(missing_refs_json),
515 );
516}
517
518fn find_file_reference_resolver(
519 files: &[FileInfo],
520 package: &Package,
521) -> Option<&'static FileReferenceResolverConfig> {
522 FILE_REFERENCE_RESOLVER_CONFIGS
523 .iter()
524 .find(|config| match config.kind {
525 FileReferenceResolverKind::AttachedManifest => {
526 config.datasource_ids.iter().any(|datasource_id| {
527 files.iter().any(|file| {
528 file.for_packages.contains(&package.package_uid)
529 && file
530 .package_data
531 .iter()
532 .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
533 })
534 })
535 }
536 _ => config
537 .datasource_ids
538 .iter()
539 .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
540 })
541}
542
543fn is_python_metadata_layout(path: &str) -> bool {
544 path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
545}
546
547fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
548 let datafile_path = package
549 .datafile_paths
550 .iter()
551 .find(|path| is_python_metadata_layout(path))?;
552
553 if !package
554 .datasource_ids
555 .iter()
556 .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
557 {
558 return None;
559 }
560
561 for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
562 if let Some(idx) = datafile_path.rfind(segment) {
563 if datafile_path.ends_with("/METADATA") {
564 let root_end = idx + segment.len();
565 let root = datafile_path[..root_end].to_string();
566 return Some(PythonMetadataResolution {
567 base_path: root.clone(),
568 allowed_root: root,
569 });
570 }
571
572 if datafile_path.ends_with("/PKG-INFO") {
573 let parent = Path::new(datafile_path).parent()?;
574 let allowed_root = datafile_path[..idx + segment.len()].to_string();
575 return Some(PythonMetadataResolution {
576 base_path: parent.to_string_lossy().to_string(),
577 allowed_root,
578 });
579 }
580 }
581 }
582
583 if datafile_path.ends_with(".egg-info/PKG-INFO") {
584 let metadata_parent = Path::new(datafile_path).parent()?;
585 let project_root = metadata_parent.parent()?;
586 let project_root = project_root.to_string_lossy().to_string();
587 return Some(PythonMetadataResolution {
588 base_path: project_root.clone(),
589 allowed_root: project_root,
590 });
591 }
592
593 None
594}
595
596fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
597 let joined = Path::new(base).join(relative.trim_start_matches('/'));
598 let mut normalized = Path::new("").to_path_buf();
599
600 for component in joined.components() {
601 match component {
602 std::path::Component::CurDir => {}
603 std::path::Component::ParentDir => {
604 normalized.pop();
605 }
606 _ => normalized.push(component.as_os_str()),
607 }
608 }
609
610 let normalized_str = normalized.to_string_lossy().to_string();
611 if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
612 Some(normalized_str)
613 } else {
614 None
615 }
616}
617
618fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
619 let path = datafile_path?;
620 let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
621 Some(path[..idx].to_string())
622}
623
624pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
625 let yumdb_indices: Vec<usize> = packages
626 .iter()
627 .enumerate()
628 .filter_map(|(idx, package)| {
629 package
630 .datasource_ids
631 .contains(&DatasourceId::RpmYumdb)
632 .then_some(idx)
633 })
634 .collect();
635 let mut removal_indices = Vec::new();
636
637 for yumdb_idx in yumdb_indices {
638 let yumdb_package = packages[yumdb_idx].clone();
639 let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
640 continue;
641 };
642 let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
643 let yumdb_arch = yumdb_package
644 .qualifiers
645 .as_ref()
646 .and_then(|qualifiers| qualifiers.get("arch"));
647
648 let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
649 if idx == yumdb_idx || !is_rpm_package(package) {
650 return None;
651 }
652
653 let config = find_db_config(package)?;
654 let datafile_path = package.datafile_paths.first()?;
655 let target_root = compute_root(datafile_path, config.path_suffix);
656 let target_arch = package
657 .qualifiers
658 .as_ref()
659 .and_then(|qualifiers| qualifiers.get("arch"));
660
661 (target_root == yumdb_root
662 && package.name == yumdb_package.name
663 && package.version == yumdb_package.version
664 && target_arch == yumdb_arch)
665 .then_some(idx)
666 }) else {
667 continue;
668 };
669
670 let target_package_uid = packages[target_idx].package_uid.clone();
671 {
672 let target = &mut packages[target_idx];
673 target
674 .datafile_paths
675 .extend(yumdb_package.datafile_paths.clone());
676 target
677 .datasource_ids
678 .extend(yumdb_package.datasource_ids.clone());
679
680 if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
681 && !yumdb_extra.is_empty()
682 {
683 let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
684 let mut merged_yumdb = extra_data
685 .get("yumdb")
686 .and_then(|value| value.as_object().cloned())
687 .unwrap_or_default();
688 for (key, value) in yumdb_extra {
689 merged_yumdb.insert(key, value);
690 }
691 extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
692 }
693 }
694
695 for file in files.iter_mut() {
696 for package_uid in &mut file.for_packages {
697 if *package_uid == yumdb_package.package_uid {
698 *package_uid = target_package_uid.clone();
699 }
700 }
701 }
702
703 removal_indices.push(yumdb_idx);
704 }
705
706 removal_indices.sort_unstable();
707 removal_indices.dedup();
708 for idx in removal_indices.into_iter().rev() {
709 packages.remove(idx);
710 }
711}
712
713fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
714 files
715 .iter()
716 .enumerate()
717 .map(|(idx, file)| (file.path.clone(), idx))
718 .collect()
719}
720
721fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
722 let datafile_paths = &package.datafile_paths;
723
724 for config in DB_PATH_CONFIGS {
725 if !datafile_paths.is_empty()
726 && !datafile_paths
727 .iter()
728 .any(|path| path.ends_with(config.path_suffix))
729 {
730 continue;
731 }
732
733 for &config_dsid in config.datasource_ids {
734 for &pkg_dsid in &package.datasource_ids {
735 if config_dsid == pkg_dsid {
736 return Some(config);
737 }
738 }
739 }
740 }
741
742 for config in DB_PATH_CONFIGS {
743 for &config_dsid in config.datasource_ids {
744 for &pkg_dsid in &package.datasource_ids {
745 if config_dsid == pkg_dsid {
746 return Some(config);
747 }
748 }
749 }
750 }
751
752 None
753}
754
755fn compute_root(datafile_path: &str, suffix: &str) -> String {
756 if let Some(pos) = datafile_path.rfind(suffix) {
757 let root = &datafile_path[..pos];
758 if root.is_empty() {
759 String::new()
760 } else {
761 root.to_string()
762 }
763 } else {
764 String::new()
765 }
766}
767
768fn collect_file_references(
769 files: &[FileInfo],
770 path_index: &HashMap<String, usize>,
771 datafile_path: &str,
772 package_datasource_ids: &[DatasourceId],
773 config_datasource_ids: &[DatasourceId],
774 package_purl: Option<&str>,
775) -> Vec<crate::models::FileReference> {
776 let file_idx = match path_index.get(datafile_path) {
777 Some(&idx) => idx,
778 None => return Vec::new(),
779 };
780
781 let file = &files[file_idx];
782 let mut refs = Vec::new();
783
784 for pkg_data in &file.package_data {
785 let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
786 package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
787 });
788
789 if !dsid_matches {
790 continue;
791 }
792
793 let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
794 (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
795 _ => true,
796 };
797
798 if purl_matches {
799 refs.extend(pkg_data.file_references.clone());
800 }
801 }
802
803 refs
804}
805
806fn is_rpm_package(package: &Package) -> bool {
807 for &dsid in &package.datasource_ids {
808 for &rpm_dsid in RPM_DATASOURCE_IDS {
809 if rpm_dsid == dsid {
810 return true;
811 }
812 }
813 }
814 false
815}
816
817fn is_debian_installed_package(package: &Package) -> bool {
818 package
819 .datasource_ids
820 .contains(&DatasourceId::DebianInstalledStatusDb)
821 || package
822 .datasource_ids
823 .contains(&DatasourceId::DebianDistrolessInstalledDb)
824}
825
826fn collect_debian_installed_file_references(
827 files: &[FileInfo],
828 package: &Package,
829) -> Vec<crate::models::FileReference> {
830 let mut refs = Vec::new();
831
832 for file in files {
833 for pkg_data in &file.package_data {
834 let Some(dsid) = pkg_data.datasource_id else {
835 continue;
836 };
837 if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
838 continue;
839 }
840
841 if pkg_data.name != package.name {
842 continue;
843 }
844 if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
845 continue;
846 }
847 if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
848 continue;
849 }
850
851 merge_file_references(&mut refs, pkg_data.file_references.clone());
852 }
853 }
854
855 refs
856}
857
858fn find_attached_manifest_file_references<'a>(
859 files: &'a [FileInfo],
860 package: &Package,
861 datasource_id: DatasourceId,
862) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
863 for file in files {
864 if !file.for_packages.contains(&package.package_uid) {
865 continue;
866 }
867
868 for pkg_data in &file.package_data {
869 if pkg_data.datasource_id == Some(datasource_id) {
870 return Some((&file.path, pkg_data.file_references.clone()));
871 }
872 }
873 }
874
875 None
876}
877
878fn debian_installed_namespace_matches(
879 supplemental_namespace: &Option<String>,
880 package_namespace: &Option<String>,
881) -> bool {
882 match (
883 supplemental_namespace.as_deref(),
884 package_namespace.as_deref(),
885 ) {
886 (None, _) => true,
887 (Some("debian"), Some("ubuntu")) => true,
888 (Some(left), Some(right)) => left == right,
889 (Some(_), None) => true,
890 }
891}
892
893fn debian_installed_arch_matches(
894 supplemental_qualifiers: &Option<HashMap<String, String>>,
895 package_qualifiers: &Option<HashMap<String, String>>,
896) -> bool {
897 let supplemental_arch = supplemental_qualifiers
898 .as_ref()
899 .and_then(|qualifiers| qualifiers.get("arch"));
900 let package_arch = package_qualifiers
901 .as_ref()
902 .and_then(|qualifiers| qualifiers.get("arch"));
903
904 match (supplemental_arch, package_arch) {
905 (Some(left), Some(right)) => left == right,
906 (Some(_), None) => false,
907 _ => true,
908 }
909}
910
911fn merge_file_references(
912 target: &mut Vec<crate::models::FileReference>,
913 incoming: Vec<crate::models::FileReference>,
914) {
915 for file_ref in incoming {
916 if let Some(existing) = target
917 .iter_mut()
918 .find(|existing| existing.path == file_ref.path)
919 {
920 if existing.size.is_none() {
921 existing.size = file_ref.size;
922 }
923 if existing.sha1.is_none() {
924 existing.sha1 = file_ref.sha1;
925 }
926 if existing.md5.is_none() {
927 existing.md5 = file_ref.md5;
928 }
929 if existing.sha256.is_none() {
930 existing.sha256 = file_ref.sha256;
931 }
932 if existing.sha512.is_none() {
933 existing.sha512 = file_ref.sha512;
934 }
935 if existing.extra_data.is_none() {
936 existing.extra_data = file_ref.extra_data.clone();
937 }
938 } else {
939 target.push(file_ref);
940 }
941 }
942}
943
944fn resolve_rpm_namespace(
945 files: &[FileInfo],
946 path_index: &HashMap<String, usize>,
947 root: &str,
948) -> Option<String> {
949 let os_release_paths = [
950 format!("{}etc/os-release", root),
951 format!("{}usr/lib/os-release", root),
952 ];
953
954 for os_release_path in &os_release_paths {
955 if let Some(&file_idx) = path_index.get(os_release_path) {
956 let file = &files[file_idx];
957 for pkg_data in &file.package_data {
958 if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
959 && let Some(namespace) = &pkg_data.namespace
960 {
961 return Some(namespace.clone());
962 }
963 }
964 }
965 }
966
967 None
968}
969
970fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
971 let parsed = PackageUrl::from_str(existing_purl).ok()?;
972 let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
973
974 updated.with_namespace(namespace).ok()?;
975
976 if let Some(version) = parsed.version() {
977 updated.with_version(version).ok()?;
978 }
979
980 if let Some(subpath) = parsed.subpath() {
981 updated.with_subpath(subpath).ok()?;
982 }
983
984 for (key, value) in parsed.qualifiers() {
985 updated
986 .add_qualifier(key.to_string(), value.to_string())
987 .ok()?;
988 }
989
990 Some(updated.to_string())
991}
992
993fn apply_rpm_namespace(
994 files: &mut [FileInfo],
995 package: &mut Package,
996 dependencies: &mut [TopLevelDependency],
997 namespace: &str,
998) {
999 let old_package_uid = package.package_uid.clone();
1000
1001 package.namespace = Some(namespace.to_string());
1002
1003 if let Some(current_purl) = package.purl.as_deref()
1004 && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1005 {
1006 package.purl = Some(updated_purl.clone());
1007 package.package_uid = old_package_uid.replace_base(&updated_purl);
1008 }
1009
1010 for file in files.iter_mut() {
1011 for package_uid in &mut file.for_packages {
1012 if *package_uid == old_package_uid {
1013 *package_uid = package.package_uid.clone();
1014 }
1015 }
1016 }
1017
1018 for dep in dependencies.iter_mut() {
1019 if dep.for_package_uid.as_ref() == Some(&old_package_uid) {
1020 dep.for_package_uid = Some(package.package_uid.clone());
1021 }
1022
1023 if dep.for_package_uid.as_ref() == Some(&package.package_uid) {
1024 dep.namespace = Some(namespace.to_string());
1025
1026 if let Some(current_purl) = dep.purl.as_deref()
1027 && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1028 {
1029 dep.purl = Some(updated_purl.clone());
1030 dep.dependency_uid = dep.dependency_uid.replace_base(&updated_purl);
1031 }
1032 }
1033 }
1034}
1035
1036#[cfg(test)]
1037#[path = "file_ref_resolve_test.rs"]
1038mod tests;