1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10 datasource_ids: &'static [DatasourceId],
11 path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16 About,
17 AttachedManifest,
18 CondaMeta,
19 DebianExtractedDeb,
20 InstalledDb,
21 PythonMetadata,
22 RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26 datasource_ids: &'static [DatasourceId],
27 kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31 DbPathConfig {
32 datasource_ids: &[DatasourceId::AlpineInstalledDb],
33 path_suffix: "lib/apk/db/installed",
34 },
35 DbPathConfig {
36 datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37 path_suffix: "var/lib/rpm/Packages",
38 },
39 DbPathConfig {
40 datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
41 path_suffix: "usr/lib/sysimage/rpm/Packages.db",
42 },
43 DbPathConfig {
44 datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
45 path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
46 },
47 DbPathConfig {
48 datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
49 path_suffix: "var/lib/rpm/rpmdb.sqlite",
50 },
51 DbPathConfig {
52 datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
53 path_suffix: "var/lib/dpkg/status",
54 },
55 DbPathConfig {
56 datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
57 path_suffix: "var/lib/dpkg/status.d/",
58 },
59];
60
61const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
62 DatasourceId::RpmInstalledDatabaseBdb,
63 DatasourceId::RpmInstalledDatabaseNdb,
64 DatasourceId::RpmInstalledDatabaseSqlite,
65];
66const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
67const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
68const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
69 DatasourceId::PypiWheelMetadata,
70 DatasourceId::PypiSdistPkginfo,
71 DatasourceId::PypiEggPkginfo,
72 DatasourceId::PypiEditableEggPkginfo,
73];
74const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
75const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
76 DatasourceId::DebianInstalledFilesList,
77 DatasourceId::DebianInstalledMd5Sums,
78];
79
80const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
81 DatasourceId::AlpineInstalledDb,
82 DatasourceId::RpmInstalledDatabaseBdb,
83 DatasourceId::RpmInstalledDatabaseNdb,
84 DatasourceId::RpmInstalledDatabaseSqlite,
85 DatasourceId::DebianInstalledStatusDb,
86 DatasourceId::DebianDistrolessInstalledDb,
87];
88
89const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
90 FileReferenceResolverConfig {
91 datasource_ids: &[DatasourceId::AboutFile],
92 kind: FileReferenceResolverKind::About,
93 },
94 FileReferenceResolverConfig {
95 datasource_ids: &[DatasourceId::CpanManifest],
96 kind: FileReferenceResolverKind::AttachedManifest,
97 },
98 FileReferenceResolverConfig {
99 datasource_ids: &[DatasourceId::CondaMetaJson],
100 kind: FileReferenceResolverKind::CondaMeta,
101 },
102 FileReferenceResolverConfig {
103 datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
104 kind: FileReferenceResolverKind::DebianExtractedDeb,
105 },
106 FileReferenceResolverConfig {
107 datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
108 kind: FileReferenceResolverKind::InstalledDb,
109 },
110 FileReferenceResolverConfig {
111 datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
112 kind: FileReferenceResolverKind::PythonMetadata,
113 },
114 FileReferenceResolverConfig {
115 datasource_ids: &[DatasourceId::GradleModule],
116 kind: FileReferenceResolverKind::RelativeToDatafileParent,
117 },
118];
119
120struct PythonMetadataResolution {
121 base_path: String,
122 allowed_root: String,
123}
124
125pub fn resolve_file_references(
126 files: &mut [FileInfo],
127 packages: &mut [Package],
128 dependencies: &mut [TopLevelDependency],
129) {
130 if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
131 return;
132 }
133
134 let path_index = build_path_index(&*files);
135
136 for package in packages.iter_mut() {
137 let Some(config) = find_file_reference_resolver(files, package) else {
138 continue;
139 };
140
141 match config.kind {
142 FileReferenceResolverKind::About
143 | FileReferenceResolverKind::RelativeToDatafileParent => {
144 resolve_relative_to_datafile_parent(
145 files,
146 &path_index,
147 package,
148 config.datasource_ids,
149 );
150 }
151 FileReferenceResolverKind::AttachedManifest => {
152 resolve_attached_manifest_file_references(
153 files,
154 &path_index,
155 package,
156 config.datasource_ids[0],
157 );
158 }
159 FileReferenceResolverKind::CondaMeta => {
160 resolve_conda_file_references(files, &path_index, package);
161 }
162 FileReferenceResolverKind::DebianExtractedDeb => {
163 resolve_debian_extracted_deb_file_references(files, &path_index, package)
164 }
165 FileReferenceResolverKind::InstalledDb => {
166 resolve_installed_db_file_references(files, &path_index, package, dependencies);
167 }
168 FileReferenceResolverKind::PythonMetadata => {
169 resolve_python_metadata_file_references(files, &path_index, package);
170 }
171 }
172 }
173}
174
175pub(super) fn has_relevant_file_reference_datasource_ids(
176 file_datasource_ids: &HashSet<DatasourceId>,
177) -> bool {
178 FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
179 config
180 .datasource_ids
181 .iter()
182 .any(|datasource_id| file_datasource_ids.contains(datasource_id))
183 })
184}
185
186fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
187 let file_datasource_ids: HashSet<DatasourceId> = files
188 .iter()
189 .flat_map(|file| {
190 file.package_data
191 .iter()
192 .filter_map(|package_data| package_data.datasource_id)
193 })
194 .collect();
195
196 has_relevant_file_reference_datasource_ids(&file_datasource_ids)
197}
198
199fn resolve_relative_to_datafile_parent(
200 files: &mut [FileInfo],
201 path_index: &HashMap<String, usize>,
202 package: &mut Package,
203 datasource_ids: &[DatasourceId],
204) {
205 let Some(datafile_path) = package.datafile_paths.first() else {
206 return;
207 };
208 let root = Path::new(datafile_path)
209 .parent()
210 .map(|p| p.to_string_lossy().to_string())
211 .unwrap_or_default();
212
213 let file_references = collect_file_references(
214 files,
215 path_index,
216 datafile_path,
217 &package.datasource_ids,
218 datasource_ids,
219 package.purl.as_deref(),
220 );
221
222 let mut missing_refs = Vec::new();
223 for file_ref in &file_references {
224 let resolved_path = if root.is_empty() {
225 file_ref.path.clone()
226 } else {
227 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
228 };
229 if let Some(&file_idx) = path_index.get(&resolved_path) {
230 let package_uid = package.package_uid.clone();
231 if !files[file_idx].for_packages.contains(&package_uid) {
232 files[file_idx].for_packages.push(package_uid);
233 }
234 } else {
235 missing_refs.push(file_ref.path.clone());
236 }
237 }
238
239 record_missing_file_references(package, missing_refs);
240}
241
242fn resolve_attached_manifest_file_references(
243 files: &mut [FileInfo],
244 path_index: &HashMap<String, usize>,
245 package: &mut Package,
246 datasource_id: DatasourceId,
247) {
248 let Some((datafile_path, file_references)) =
249 find_attached_manifest_file_references(files, package, datasource_id)
250 else {
251 return;
252 };
253
254 let root = Path::new(datafile_path)
255 .parent()
256 .map(|p| p.to_string_lossy().to_string())
257 .unwrap_or_default();
258
259 let mut missing_refs = Vec::new();
260 for file_ref in &file_references {
261 let resolved_path = if root.is_empty() {
262 file_ref.path.clone()
263 } else {
264 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
265 };
266
267 if let Some(&file_idx) = path_index.get(&resolved_path) {
268 let package_uid = package.package_uid.clone();
269 if !files[file_idx].for_packages.contains(&package_uid) {
270 files[file_idx].for_packages.push(package_uid);
271 }
272 } else {
273 missing_refs.push(file_ref.path.clone());
274 }
275 }
276
277 record_missing_file_references(package, missing_refs);
278}
279
280fn resolve_conda_file_references(
281 files: &mut [FileInfo],
282 path_index: &HashMap<String, usize>,
283 package: &mut Package,
284) {
285 let Some(conda_meta_path) = package
286 .datafile_paths
287 .iter()
288 .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
289 else {
290 return;
291 };
292 let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
293 return;
294 };
295
296 let file_references = collect_file_references(
297 files,
298 path_index,
299 conda_meta_path,
300 &package.datasource_ids,
301 &[DatasourceId::CondaMetaJson],
302 package.purl.as_deref(),
303 );
304
305 let mut missing_refs = Vec::new();
306 for file_ref in &file_references {
307 let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
308 if let Some(&file_idx) = path_index.get(&resolved_path) {
309 let package_uid = package.package_uid.clone();
310 if !files[file_idx].for_packages.contains(&package_uid) {
311 files[file_idx].for_packages.push(package_uid);
312 }
313 } else {
314 missing_refs.push(file_ref.path.clone());
315 }
316 }
317
318 record_missing_file_references(package, missing_refs);
319}
320
321fn resolve_installed_db_file_references(
322 files: &mut [FileInfo],
323 path_index: &HashMap<String, usize>,
324 package: &mut Package,
325 dependencies: &mut [TopLevelDependency],
326) {
327 let Some(config) = find_db_config(package) else {
328 return;
329 };
330 let Some(datafile_path) = package.datafile_paths.first() else {
331 return;
332 };
333
334 let root = compute_root(datafile_path, config.path_suffix);
335
336 let mut file_references = collect_file_references(
337 files,
338 path_index,
339 datafile_path,
340 &package.datasource_ids,
341 config.datasource_ids,
342 package.purl.as_deref(),
343 );
344
345 if is_debian_installed_package(package) {
346 merge_file_references(
347 &mut file_references,
348 collect_debian_installed_file_references(files, package),
349 );
350 }
351
352 let mut missing_refs = Vec::new();
353 for file_ref in &file_references {
354 let ref_path = file_ref.path.trim_start_matches('/');
355 let resolved_path = if root.is_empty() {
356 ref_path.to_string()
357 } else {
358 format!("{}{}", root, ref_path)
359 };
360
361 if let Some(&file_idx) = path_index.get(&resolved_path) {
362 let package_uid = package.package_uid.clone();
363 if !files[file_idx].for_packages.contains(&package_uid) {
364 files[file_idx].for_packages.push(package_uid);
365 }
366 } else {
367 missing_refs.push(file_ref.path.clone());
368 }
369 }
370
371 record_missing_file_references(package, missing_refs);
372
373 if is_rpm_package(package)
374 && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
375 {
376 apply_rpm_namespace(files, package, dependencies, &namespace);
377 }
378}
379
380fn resolve_debian_extracted_deb_file_references(
381 files: &mut [FileInfo],
382 path_index: &HashMap<String, usize>,
383 package: &mut Package,
384) {
385 let Some(datafile_path) = package
386 .datafile_paths
387 .iter()
388 .find(|path| path.ends_with("/md5sums"))
389 else {
390 return;
391 };
392
393 let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
394 return;
395 };
396 let Some(extracted_root) = md5sums_parent.parent() else {
397 return;
398 };
399 let root = extracted_root.to_string_lossy().to_string();
400
401 let Some(&file_idx) = path_index.get(datafile_path) else {
402 return;
403 };
404 let file_references: Vec<_> = files[file_idx]
405 .package_data
406 .iter()
407 .filter(|pkg_data| {
408 pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
409 })
410 .flat_map(|pkg_data| pkg_data.file_references.clone())
411 .collect();
412
413 let mut missing_refs = Vec::new();
414 for file_ref in &file_references {
415 let resolved_path = if root.is_empty() {
416 file_ref.path.trim_start_matches('/').to_string()
417 } else {
418 format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
419 };
420
421 if let Some(&file_idx) = path_index.get(&resolved_path) {
422 let package_uid = package.package_uid.clone();
423 if !files[file_idx].for_packages.contains(&package_uid) {
424 files[file_idx].for_packages.push(package_uid);
425 }
426 } else {
427 missing_refs.push(file_ref.path.clone());
428 }
429 }
430
431 record_missing_file_references(package, missing_refs);
432}
433
434fn resolve_python_metadata_file_references(
435 files: &mut [FileInfo],
436 path_index: &HashMap<String, usize>,
437 package: &mut Package,
438) {
439 let Some(python_resolution) = find_python_metadata_root(package) else {
440 return;
441 };
442 let Some(datafile_path) = package
443 .datafile_paths
444 .iter()
445 .find(|path| is_python_metadata_layout(path))
446 else {
447 return;
448 };
449
450 let file_references = collect_file_references(
451 files,
452 path_index,
453 datafile_path,
454 &package.datasource_ids,
455 PYTHON_METADATA_DATASOURCE_IDS,
456 package.purl.as_deref(),
457 );
458
459 let mut missing_refs = Vec::new();
460 for file_ref in &file_references {
461 let Some(resolved_path) = normalize_relative_path(
462 &python_resolution.base_path,
463 &python_resolution.allowed_root,
464 &file_ref.path,
465 ) else {
466 missing_refs.push(file_ref.path.clone());
467 continue;
468 };
469
470 if let Some(&file_idx) = path_index.get(&resolved_path) {
471 let package_uid = package.package_uid.clone();
472 if !files[file_idx].for_packages.contains(&package_uid) {
473 files[file_idx].for_packages.push(package_uid);
474 }
475 } else {
476 missing_refs.push(file_ref.path.clone());
477 }
478 }
479
480 record_missing_file_references(package, missing_refs);
481}
482
483fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
484 if missing_refs.is_empty() {
485 return;
486 }
487
488 missing_refs.sort();
489 let missing_refs_json: Vec<serde_json::Value> = missing_refs
490 .into_iter()
491 .map(|path| serde_json::json!({"path": path}))
492 .collect();
493
494 let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
495 extra_data.insert(
496 "missing_file_references".to_string(),
497 serde_json::Value::Array(missing_refs_json),
498 );
499}
500
501fn find_file_reference_resolver(
502 files: &[FileInfo],
503 package: &Package,
504) -> Option<&'static FileReferenceResolverConfig> {
505 FILE_REFERENCE_RESOLVER_CONFIGS
506 .iter()
507 .find(|config| match config.kind {
508 FileReferenceResolverKind::AttachedManifest => {
509 config.datasource_ids.iter().any(|datasource_id| {
510 files.iter().any(|file| {
511 file.for_packages.contains(&package.package_uid)
512 && file
513 .package_data
514 .iter()
515 .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
516 })
517 })
518 }
519 _ => config
520 .datasource_ids
521 .iter()
522 .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
523 })
524}
525
526fn is_python_metadata_layout(path: &str) -> bool {
527 path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
528}
529
530fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
531 let datafile_path = package
532 .datafile_paths
533 .iter()
534 .find(|path| is_python_metadata_layout(path))?;
535
536 if !package
537 .datasource_ids
538 .iter()
539 .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
540 {
541 return None;
542 }
543
544 for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
545 if let Some(idx) = datafile_path.rfind(segment) {
546 if datafile_path.ends_with("/METADATA") {
547 let root_end = idx + segment.len();
548 let root = datafile_path[..root_end].to_string();
549 return Some(PythonMetadataResolution {
550 base_path: root.clone(),
551 allowed_root: root,
552 });
553 }
554
555 if datafile_path.ends_with("/PKG-INFO") {
556 let parent = Path::new(datafile_path).parent()?;
557 let allowed_root = datafile_path[..idx + segment.len()].to_string();
558 return Some(PythonMetadataResolution {
559 base_path: parent.to_string_lossy().to_string(),
560 allowed_root,
561 });
562 }
563 }
564 }
565
566 if datafile_path.ends_with(".egg-info/PKG-INFO") {
567 let metadata_parent = Path::new(datafile_path).parent()?;
568 let project_root = metadata_parent.parent()?;
569 let project_root = project_root.to_string_lossy().to_string();
570 return Some(PythonMetadataResolution {
571 base_path: project_root.clone(),
572 allowed_root: project_root,
573 });
574 }
575
576 None
577}
578
579fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
580 let joined = Path::new(base).join(relative.trim_start_matches('/'));
581 let mut normalized = Path::new("").to_path_buf();
582
583 for component in joined.components() {
584 match component {
585 std::path::Component::CurDir => {}
586 std::path::Component::ParentDir => {
587 normalized.pop();
588 }
589 _ => normalized.push(component.as_os_str()),
590 }
591 }
592
593 let normalized_str = normalized.to_string_lossy().to_string();
594 if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
595 Some(normalized_str)
596 } else {
597 None
598 }
599}
600
601fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
602 let path = datafile_path?;
603 let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
604 Some(path[..idx].to_string())
605}
606
607pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
608 let yumdb_indices: Vec<usize> = packages
609 .iter()
610 .enumerate()
611 .filter_map(|(idx, package)| {
612 package
613 .datasource_ids
614 .contains(&DatasourceId::RpmYumdb)
615 .then_some(idx)
616 })
617 .collect();
618 let mut removal_indices = Vec::new();
619
620 for yumdb_idx in yumdb_indices {
621 let yumdb_package = packages[yumdb_idx].clone();
622 let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
623 continue;
624 };
625 let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
626 let yumdb_arch = yumdb_package
627 .qualifiers
628 .as_ref()
629 .and_then(|qualifiers| qualifiers.get("arch"));
630
631 let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
632 if idx == yumdb_idx || !is_rpm_package(package) {
633 return None;
634 }
635
636 let config = find_db_config(package)?;
637 let datafile_path = package.datafile_paths.first()?;
638 let target_root = compute_root(datafile_path, config.path_suffix);
639 let target_arch = package
640 .qualifiers
641 .as_ref()
642 .and_then(|qualifiers| qualifiers.get("arch"));
643
644 (target_root == yumdb_root
645 && package.name == yumdb_package.name
646 && package.version == yumdb_package.version
647 && target_arch == yumdb_arch)
648 .then_some(idx)
649 }) else {
650 continue;
651 };
652
653 let target_package_uid = packages[target_idx].package_uid.clone();
654 {
655 let target = &mut packages[target_idx];
656 target
657 .datafile_paths
658 .extend(yumdb_package.datafile_paths.clone());
659 target
660 .datasource_ids
661 .extend(yumdb_package.datasource_ids.clone());
662
663 if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
664 && !yumdb_extra.is_empty()
665 {
666 let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
667 let mut merged_yumdb = extra_data
668 .get("yumdb")
669 .and_then(|value| value.as_object().cloned())
670 .unwrap_or_default();
671 for (key, value) in yumdb_extra {
672 merged_yumdb.insert(key, value);
673 }
674 extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
675 }
676 }
677
678 for file in files.iter_mut() {
679 for package_uid in &mut file.for_packages {
680 if *package_uid == yumdb_package.package_uid {
681 *package_uid = target_package_uid.clone();
682 }
683 }
684 }
685
686 removal_indices.push(yumdb_idx);
687 }
688
689 removal_indices.sort_unstable();
690 removal_indices.dedup();
691 for idx in removal_indices.into_iter().rev() {
692 packages.remove(idx);
693 }
694}
695
696fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
697 files
698 .iter()
699 .enumerate()
700 .map(|(idx, file)| (file.path.clone(), idx))
701 .collect()
702}
703
704fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
705 let datafile_paths = &package.datafile_paths;
706
707 for config in DB_PATH_CONFIGS {
708 if !datafile_paths.is_empty()
709 && !datafile_paths
710 .iter()
711 .any(|path| path.ends_with(config.path_suffix))
712 {
713 continue;
714 }
715
716 for &config_dsid in config.datasource_ids {
717 for &pkg_dsid in &package.datasource_ids {
718 if config_dsid == pkg_dsid {
719 return Some(config);
720 }
721 }
722 }
723 }
724
725 for config in DB_PATH_CONFIGS {
726 for &config_dsid in config.datasource_ids {
727 for &pkg_dsid in &package.datasource_ids {
728 if config_dsid == pkg_dsid {
729 return Some(config);
730 }
731 }
732 }
733 }
734
735 None
736}
737
738fn compute_root(datafile_path: &str, suffix: &str) -> String {
739 if let Some(pos) = datafile_path.rfind(suffix) {
740 let root = &datafile_path[..pos];
741 if root.is_empty() {
742 String::new()
743 } else {
744 root.to_string()
745 }
746 } else {
747 String::new()
748 }
749}
750
751fn collect_file_references(
752 files: &[FileInfo],
753 path_index: &HashMap<String, usize>,
754 datafile_path: &str,
755 package_datasource_ids: &[DatasourceId],
756 config_datasource_ids: &[DatasourceId],
757 package_purl: Option<&str>,
758) -> Vec<crate::models::FileReference> {
759 let file_idx = match path_index.get(datafile_path) {
760 Some(&idx) => idx,
761 None => return Vec::new(),
762 };
763
764 let file = &files[file_idx];
765 let mut refs = Vec::new();
766
767 for pkg_data in &file.package_data {
768 let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
769 package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
770 });
771
772 if !dsid_matches {
773 continue;
774 }
775
776 let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
777 (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
778 _ => true,
779 };
780
781 if purl_matches {
782 refs.extend(pkg_data.file_references.clone());
783 }
784 }
785
786 refs
787}
788
789fn is_rpm_package(package: &Package) -> bool {
790 for &dsid in &package.datasource_ids {
791 for &rpm_dsid in RPM_DATASOURCE_IDS {
792 if rpm_dsid == dsid {
793 return true;
794 }
795 }
796 }
797 false
798}
799
800fn is_debian_installed_package(package: &Package) -> bool {
801 package
802 .datasource_ids
803 .contains(&DatasourceId::DebianInstalledStatusDb)
804 || package
805 .datasource_ids
806 .contains(&DatasourceId::DebianDistrolessInstalledDb)
807}
808
809fn collect_debian_installed_file_references(
810 files: &[FileInfo],
811 package: &Package,
812) -> Vec<crate::models::FileReference> {
813 let mut refs = Vec::new();
814
815 for file in files {
816 for pkg_data in &file.package_data {
817 let Some(dsid) = pkg_data.datasource_id else {
818 continue;
819 };
820 if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
821 continue;
822 }
823
824 if pkg_data.name != package.name {
825 continue;
826 }
827 if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
828 continue;
829 }
830 if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
831 continue;
832 }
833
834 merge_file_references(&mut refs, pkg_data.file_references.clone());
835 }
836 }
837
838 refs
839}
840
841fn find_attached_manifest_file_references<'a>(
842 files: &'a [FileInfo],
843 package: &Package,
844 datasource_id: DatasourceId,
845) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
846 for file in files {
847 if !file.for_packages.contains(&package.package_uid) {
848 continue;
849 }
850
851 for pkg_data in &file.package_data {
852 if pkg_data.datasource_id == Some(datasource_id) {
853 return Some((&file.path, pkg_data.file_references.clone()));
854 }
855 }
856 }
857
858 None
859}
860
861fn debian_installed_namespace_matches(
862 supplemental_namespace: &Option<String>,
863 package_namespace: &Option<String>,
864) -> bool {
865 match (
866 supplemental_namespace.as_deref(),
867 package_namespace.as_deref(),
868 ) {
869 (None, _) => true,
870 (Some("debian"), Some("ubuntu")) => true,
871 (Some(left), Some(right)) => left == right,
872 (Some(_), None) => true,
873 }
874}
875
876fn debian_installed_arch_matches(
877 supplemental_qualifiers: &Option<HashMap<String, String>>,
878 package_qualifiers: &Option<HashMap<String, String>>,
879) -> bool {
880 let supplemental_arch = supplemental_qualifiers
881 .as_ref()
882 .and_then(|qualifiers| qualifiers.get("arch"));
883 let package_arch = package_qualifiers
884 .as_ref()
885 .and_then(|qualifiers| qualifiers.get("arch"));
886
887 match (supplemental_arch, package_arch) {
888 (Some(left), Some(right)) => left == right,
889 (Some(_), None) => false,
890 _ => true,
891 }
892}
893
894fn merge_file_references(
895 target: &mut Vec<crate::models::FileReference>,
896 incoming: Vec<crate::models::FileReference>,
897) {
898 for file_ref in incoming {
899 if let Some(existing) = target
900 .iter_mut()
901 .find(|existing| existing.path == file_ref.path)
902 {
903 if existing.size.is_none() {
904 existing.size = file_ref.size;
905 }
906 if existing.sha1.is_none() {
907 existing.sha1 = file_ref.sha1.clone();
908 }
909 if existing.md5.is_none() {
910 existing.md5 = file_ref.md5.clone();
911 }
912 if existing.sha256.is_none() {
913 existing.sha256 = file_ref.sha256.clone();
914 }
915 if existing.sha512.is_none() {
916 existing.sha512 = file_ref.sha512.clone();
917 }
918 if existing.extra_data.is_none() {
919 existing.extra_data = file_ref.extra_data.clone();
920 }
921 } else {
922 target.push(file_ref);
923 }
924 }
925}
926
927fn resolve_rpm_namespace(
928 files: &[FileInfo],
929 path_index: &HashMap<String, usize>,
930 root: &str,
931) -> Option<String> {
932 let os_release_paths = [
933 format!("{}etc/os-release", root),
934 format!("{}usr/lib/os-release", root),
935 ];
936
937 for os_release_path in &os_release_paths {
938 if let Some(&file_idx) = path_index.get(os_release_path) {
939 let file = &files[file_idx];
940 for pkg_data in &file.package_data {
941 if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
942 && let Some(namespace) = &pkg_data.namespace
943 {
944 return Some(namespace.clone());
945 }
946 }
947 }
948 }
949
950 None
951}
952
953fn replace_uid_base(old_uid: &str, new_purl: &str) -> String {
954 if let Some((_, suffix)) = old_uid.split_once("?uuid=") {
955 return format!("{}?uuid={}", new_purl, suffix);
956 }
957
958 if let Some((_, suffix)) = old_uid.split_once("&uuid=") {
959 let separator = if new_purl.contains('?') { '&' } else { '?' };
960 return format!("{}{separator}uuid={suffix}", new_purl);
961 }
962
963 old_uid.to_string()
964}
965
966fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
967 let parsed = PackageUrl::from_str(existing_purl).ok()?;
968 let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
969
970 updated.with_namespace(namespace).ok()?;
971
972 if let Some(version) = parsed.version() {
973 updated.with_version(version).ok()?;
974 }
975
976 if let Some(subpath) = parsed.subpath() {
977 updated.with_subpath(subpath).ok()?;
978 }
979
980 for (key, value) in parsed.qualifiers() {
981 updated
982 .add_qualifier(key.to_string(), value.to_string())
983 .ok()?;
984 }
985
986 Some(updated.to_string())
987}
988
989fn apply_rpm_namespace(
990 files: &mut [FileInfo],
991 package: &mut Package,
992 dependencies: &mut [TopLevelDependency],
993 namespace: &str,
994) {
995 let old_package_uid = package.package_uid.clone();
996
997 package.namespace = Some(namespace.to_string());
998
999 if let Some(current_purl) = package.purl.as_deref()
1000 && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1001 {
1002 package.purl = Some(updated_purl.clone());
1003 package.package_uid = replace_uid_base(&old_package_uid, &updated_purl);
1004 }
1005
1006 for file in files.iter_mut() {
1007 for package_uid in &mut file.for_packages {
1008 if *package_uid == old_package_uid {
1009 *package_uid = package.package_uid.clone();
1010 }
1011 }
1012 }
1013
1014 for dep in dependencies.iter_mut() {
1015 if dep.for_package_uid.as_deref() == Some(old_package_uid.as_str()) {
1016 dep.for_package_uid = Some(package.package_uid.clone());
1017 }
1018
1019 if dep.for_package_uid.as_deref() == Some(package.package_uid.as_str()) {
1020 dep.namespace = Some(namespace.to_string());
1021
1022 if let Some(current_purl) = dep.purl.as_deref()
1023 && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1024 {
1025 dep.purl = Some(updated_purl.clone());
1026 dep.dependency_uid = replace_uid_base(&dep.dependency_uid, &updated_purl);
1027 }
1028 }
1029 }
1030}
1031
1032#[cfg(test)]
1033#[path = "file_ref_resolve_test.rs"]
1034mod tests;