Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    DebianExtractedDeb,
20    InstalledDb,
21    PythonMetadata,
22    RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26    datasource_ids: &'static [DatasourceId],
27    kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31    DbPathConfig {
32        datasource_ids: &[DatasourceId::AlpineInstalledDb],
33        path_suffix: "lib/apk/db/installed",
34    },
35    DbPathConfig {
36        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37        path_suffix: "var/lib/rpm/Packages",
38    },
39    DbPathConfig {
40        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
41        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
42    },
43    DbPathConfig {
44        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
45        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
46    },
47    DbPathConfig {
48        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
49        path_suffix: "var/lib/rpm/rpmdb.sqlite",
50    },
51    DbPathConfig {
52        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
53        path_suffix: "var/lib/dpkg/status",
54    },
55    DbPathConfig {
56        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
57        path_suffix: "var/lib/dpkg/status.d/",
58    },
59];
60
61const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
62    DatasourceId::RpmInstalledDatabaseBdb,
63    DatasourceId::RpmInstalledDatabaseNdb,
64    DatasourceId::RpmInstalledDatabaseSqlite,
65];
66const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
67const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
68const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
69    DatasourceId::PypiWheelMetadata,
70    DatasourceId::PypiSdistPkginfo,
71    DatasourceId::PypiEggPkginfo,
72    DatasourceId::PypiEditableEggPkginfo,
73];
74const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
75const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
76    DatasourceId::DebianInstalledFilesList,
77    DatasourceId::DebianInstalledMd5Sums,
78];
79
80const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
81    DatasourceId::AlpineInstalledDb,
82    DatasourceId::RpmInstalledDatabaseBdb,
83    DatasourceId::RpmInstalledDatabaseNdb,
84    DatasourceId::RpmInstalledDatabaseSqlite,
85    DatasourceId::DebianInstalledStatusDb,
86    DatasourceId::DebianDistrolessInstalledDb,
87];
88
89const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
90    FileReferenceResolverConfig {
91        datasource_ids: &[DatasourceId::AboutFile],
92        kind: FileReferenceResolverKind::About,
93    },
94    FileReferenceResolverConfig {
95        datasource_ids: &[DatasourceId::CpanManifest],
96        kind: FileReferenceResolverKind::AttachedManifest,
97    },
98    FileReferenceResolverConfig {
99        datasource_ids: &[DatasourceId::CondaMetaJson],
100        kind: FileReferenceResolverKind::CondaMeta,
101    },
102    FileReferenceResolverConfig {
103        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
104        kind: FileReferenceResolverKind::DebianExtractedDeb,
105    },
106    FileReferenceResolverConfig {
107        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
108        kind: FileReferenceResolverKind::InstalledDb,
109    },
110    FileReferenceResolverConfig {
111        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
112        kind: FileReferenceResolverKind::PythonMetadata,
113    },
114    FileReferenceResolverConfig {
115        datasource_ids: &[DatasourceId::GradleModule],
116        kind: FileReferenceResolverKind::RelativeToDatafileParent,
117    },
118];
119
120struct PythonMetadataResolution {
121    base_path: String,
122    allowed_root: String,
123}
124
125pub fn resolve_file_references(
126    files: &mut [FileInfo],
127    packages: &mut [Package],
128    dependencies: &mut [TopLevelDependency],
129) {
130    if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
131        return;
132    }
133
134    let path_index = build_path_index(&*files);
135
136    for package in packages.iter_mut() {
137        let Some(config) = find_file_reference_resolver(files, package) else {
138            continue;
139        };
140
141        match config.kind {
142            FileReferenceResolverKind::About
143            | FileReferenceResolverKind::RelativeToDatafileParent => {
144                resolve_relative_to_datafile_parent(
145                    files,
146                    &path_index,
147                    package,
148                    config.datasource_ids,
149                );
150            }
151            FileReferenceResolverKind::AttachedManifest => {
152                resolve_attached_manifest_file_references(
153                    files,
154                    &path_index,
155                    package,
156                    config.datasource_ids[0],
157                );
158            }
159            FileReferenceResolverKind::CondaMeta => {
160                resolve_conda_file_references(files, &path_index, package);
161            }
162            FileReferenceResolverKind::DebianExtractedDeb => {
163                resolve_debian_extracted_deb_file_references(files, &path_index, package)
164            }
165            FileReferenceResolverKind::InstalledDb => {
166                resolve_installed_db_file_references(files, &path_index, package, dependencies);
167            }
168            FileReferenceResolverKind::PythonMetadata => {
169                resolve_python_metadata_file_references(files, &path_index, package);
170            }
171        }
172    }
173}
174
175pub(super) fn has_relevant_file_reference_datasource_ids(
176    file_datasource_ids: &HashSet<DatasourceId>,
177) -> bool {
178    FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
179        config
180            .datasource_ids
181            .iter()
182            .any(|datasource_id| file_datasource_ids.contains(datasource_id))
183    })
184}
185
186fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
187    let file_datasource_ids: HashSet<DatasourceId> = files
188        .iter()
189        .flat_map(|file| {
190            file.package_data
191                .iter()
192                .filter_map(|package_data| package_data.datasource_id)
193        })
194        .collect();
195
196    has_relevant_file_reference_datasource_ids(&file_datasource_ids)
197}
198
199fn resolve_relative_to_datafile_parent(
200    files: &mut [FileInfo],
201    path_index: &HashMap<String, usize>,
202    package: &mut Package,
203    datasource_ids: &[DatasourceId],
204) {
205    let Some(datafile_path) = package.datafile_paths.first() else {
206        return;
207    };
208    let root = Path::new(datafile_path)
209        .parent()
210        .map(|p| p.to_string_lossy().to_string())
211        .unwrap_or_default();
212
213    let file_references = collect_file_references(
214        files,
215        path_index,
216        datafile_path,
217        &package.datasource_ids,
218        datasource_ids,
219        package.purl.as_deref(),
220    );
221
222    let mut missing_refs = Vec::new();
223    for file_ref in &file_references {
224        let resolved_path = if root.is_empty() {
225            file_ref.path.clone()
226        } else {
227            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
228        };
229        if let Some(&file_idx) = path_index.get(&resolved_path) {
230            let package_uid = package.package_uid.clone();
231            if !files[file_idx].for_packages.contains(&package_uid) {
232                files[file_idx].for_packages.push(package_uid);
233            }
234        } else {
235            missing_refs.push(file_ref.path.clone());
236        }
237    }
238
239    record_missing_file_references(package, missing_refs);
240}
241
242fn resolve_attached_manifest_file_references(
243    files: &mut [FileInfo],
244    path_index: &HashMap<String, usize>,
245    package: &mut Package,
246    datasource_id: DatasourceId,
247) {
248    let Some((datafile_path, file_references)) =
249        find_attached_manifest_file_references(files, package, datasource_id)
250    else {
251        return;
252    };
253
254    let root = Path::new(datafile_path)
255        .parent()
256        .map(|p| p.to_string_lossy().to_string())
257        .unwrap_or_default();
258
259    let mut missing_refs = Vec::new();
260    for file_ref in &file_references {
261        let resolved_path = if root.is_empty() {
262            file_ref.path.clone()
263        } else {
264            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
265        };
266
267        if let Some(&file_idx) = path_index.get(&resolved_path) {
268            let package_uid = package.package_uid.clone();
269            if !files[file_idx].for_packages.contains(&package_uid) {
270                files[file_idx].for_packages.push(package_uid);
271            }
272        } else {
273            missing_refs.push(file_ref.path.clone());
274        }
275    }
276
277    record_missing_file_references(package, missing_refs);
278}
279
280fn resolve_conda_file_references(
281    files: &mut [FileInfo],
282    path_index: &HashMap<String, usize>,
283    package: &mut Package,
284) {
285    let Some(conda_meta_path) = package
286        .datafile_paths
287        .iter()
288        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
289    else {
290        return;
291    };
292    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
293        return;
294    };
295
296    let file_references = collect_file_references(
297        files,
298        path_index,
299        conda_meta_path,
300        &package.datasource_ids,
301        &[DatasourceId::CondaMetaJson],
302        package.purl.as_deref(),
303    );
304
305    let mut missing_refs = Vec::new();
306    for file_ref in &file_references {
307        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
308        if let Some(&file_idx) = path_index.get(&resolved_path) {
309            let package_uid = package.package_uid.clone();
310            if !files[file_idx].for_packages.contains(&package_uid) {
311                files[file_idx].for_packages.push(package_uid);
312            }
313        } else {
314            missing_refs.push(file_ref.path.clone());
315        }
316    }
317
318    record_missing_file_references(package, missing_refs);
319}
320
321fn resolve_installed_db_file_references(
322    files: &mut [FileInfo],
323    path_index: &HashMap<String, usize>,
324    package: &mut Package,
325    dependencies: &mut [TopLevelDependency],
326) {
327    let Some(config) = find_db_config(package) else {
328        return;
329    };
330    let Some(datafile_path) = package.datafile_paths.first() else {
331        return;
332    };
333
334    let root = compute_root(datafile_path, config.path_suffix);
335
336    let mut file_references = collect_file_references(
337        files,
338        path_index,
339        datafile_path,
340        &package.datasource_ids,
341        config.datasource_ids,
342        package.purl.as_deref(),
343    );
344
345    if is_debian_installed_package(package) {
346        merge_file_references(
347            &mut file_references,
348            collect_debian_installed_file_references(files, package),
349        );
350    }
351
352    let mut missing_refs = Vec::new();
353    for file_ref in &file_references {
354        let ref_path = file_ref.path.trim_start_matches('/');
355        let resolved_path = if root.is_empty() {
356            ref_path.to_string()
357        } else {
358            format!("{}{}", root, ref_path)
359        };
360
361        if let Some(&file_idx) = path_index.get(&resolved_path) {
362            let package_uid = package.package_uid.clone();
363            if !files[file_idx].for_packages.contains(&package_uid) {
364                files[file_idx].for_packages.push(package_uid);
365            }
366        } else {
367            missing_refs.push(file_ref.path.clone());
368        }
369    }
370
371    record_missing_file_references(package, missing_refs);
372
373    if is_rpm_package(package)
374        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
375    {
376        apply_rpm_namespace(files, package, dependencies, &namespace);
377    }
378}
379
380fn resolve_debian_extracted_deb_file_references(
381    files: &mut [FileInfo],
382    path_index: &HashMap<String, usize>,
383    package: &mut Package,
384) {
385    let Some(datafile_path) = package
386        .datafile_paths
387        .iter()
388        .find(|path| path.ends_with("/md5sums"))
389    else {
390        return;
391    };
392
393    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
394        return;
395    };
396    let Some(extracted_root) = md5sums_parent.parent() else {
397        return;
398    };
399    let root = extracted_root.to_string_lossy().to_string();
400
401    let Some(&file_idx) = path_index.get(datafile_path) else {
402        return;
403    };
404    let file_references: Vec<_> = files[file_idx]
405        .package_data
406        .iter()
407        .filter(|pkg_data| {
408            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
409        })
410        .flat_map(|pkg_data| pkg_data.file_references.clone())
411        .collect();
412
413    let mut missing_refs = Vec::new();
414    for file_ref in &file_references {
415        let resolved_path = if root.is_empty() {
416            file_ref.path.trim_start_matches('/').to_string()
417        } else {
418            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
419        };
420
421        if let Some(&file_idx) = path_index.get(&resolved_path) {
422            let package_uid = package.package_uid.clone();
423            if !files[file_idx].for_packages.contains(&package_uid) {
424                files[file_idx].for_packages.push(package_uid);
425            }
426        } else {
427            missing_refs.push(file_ref.path.clone());
428        }
429    }
430
431    record_missing_file_references(package, missing_refs);
432}
433
434fn resolve_python_metadata_file_references(
435    files: &mut [FileInfo],
436    path_index: &HashMap<String, usize>,
437    package: &mut Package,
438) {
439    let Some(python_resolution) = find_python_metadata_root(package) else {
440        return;
441    };
442    let Some(datafile_path) = package
443        .datafile_paths
444        .iter()
445        .find(|path| is_python_metadata_layout(path))
446    else {
447        return;
448    };
449
450    let file_references = collect_file_references(
451        files,
452        path_index,
453        datafile_path,
454        &package.datasource_ids,
455        PYTHON_METADATA_DATASOURCE_IDS,
456        package.purl.as_deref(),
457    );
458
459    let mut missing_refs = Vec::new();
460    for file_ref in &file_references {
461        let Some(resolved_path) = normalize_relative_path(
462            &python_resolution.base_path,
463            &python_resolution.allowed_root,
464            &file_ref.path,
465        ) else {
466            missing_refs.push(file_ref.path.clone());
467            continue;
468        };
469
470        if let Some(&file_idx) = path_index.get(&resolved_path) {
471            let package_uid = package.package_uid.clone();
472            if !files[file_idx].for_packages.contains(&package_uid) {
473                files[file_idx].for_packages.push(package_uid);
474            }
475        } else {
476            missing_refs.push(file_ref.path.clone());
477        }
478    }
479
480    record_missing_file_references(package, missing_refs);
481}
482
483fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
484    if missing_refs.is_empty() {
485        return;
486    }
487
488    missing_refs.sort();
489    let missing_refs_json: Vec<serde_json::Value> = missing_refs
490        .into_iter()
491        .map(|path| serde_json::json!({"path": path}))
492        .collect();
493
494    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
495    extra_data.insert(
496        "missing_file_references".to_string(),
497        serde_json::Value::Array(missing_refs_json),
498    );
499}
500
501fn find_file_reference_resolver(
502    files: &[FileInfo],
503    package: &Package,
504) -> Option<&'static FileReferenceResolverConfig> {
505    FILE_REFERENCE_RESOLVER_CONFIGS
506        .iter()
507        .find(|config| match config.kind {
508            FileReferenceResolverKind::AttachedManifest => {
509                config.datasource_ids.iter().any(|datasource_id| {
510                    files.iter().any(|file| {
511                        file.for_packages.contains(&package.package_uid)
512                            && file
513                                .package_data
514                                .iter()
515                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
516                    })
517                })
518            }
519            _ => config
520                .datasource_ids
521                .iter()
522                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
523        })
524}
525
526fn is_python_metadata_layout(path: &str) -> bool {
527    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
528}
529
530fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
531    let datafile_path = package
532        .datafile_paths
533        .iter()
534        .find(|path| is_python_metadata_layout(path))?;
535
536    if !package
537        .datasource_ids
538        .iter()
539        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
540    {
541        return None;
542    }
543
544    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
545        if let Some(idx) = datafile_path.rfind(segment) {
546            if datafile_path.ends_with("/METADATA") {
547                let root_end = idx + segment.len();
548                let root = datafile_path[..root_end].to_string();
549                return Some(PythonMetadataResolution {
550                    base_path: root.clone(),
551                    allowed_root: root,
552                });
553            }
554
555            if datafile_path.ends_with("/PKG-INFO") {
556                let parent = Path::new(datafile_path).parent()?;
557                let allowed_root = datafile_path[..idx + segment.len()].to_string();
558                return Some(PythonMetadataResolution {
559                    base_path: parent.to_string_lossy().to_string(),
560                    allowed_root,
561                });
562            }
563        }
564    }
565
566    if datafile_path.ends_with(".egg-info/PKG-INFO") {
567        let metadata_parent = Path::new(datafile_path).parent()?;
568        let project_root = metadata_parent.parent()?;
569        let project_root = project_root.to_string_lossy().to_string();
570        return Some(PythonMetadataResolution {
571            base_path: project_root.clone(),
572            allowed_root: project_root,
573        });
574    }
575
576    None
577}
578
579fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
580    let joined = Path::new(base).join(relative.trim_start_matches('/'));
581    let mut normalized = Path::new("").to_path_buf();
582
583    for component in joined.components() {
584        match component {
585            std::path::Component::CurDir => {}
586            std::path::Component::ParentDir => {
587                normalized.pop();
588            }
589            _ => normalized.push(component.as_os_str()),
590        }
591    }
592
593    let normalized_str = normalized.to_string_lossy().to_string();
594    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
595        Some(normalized_str)
596    } else {
597        None
598    }
599}
600
601fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
602    let path = datafile_path?;
603    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
604    Some(path[..idx].to_string())
605}
606
607pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
608    let yumdb_indices: Vec<usize> = packages
609        .iter()
610        .enumerate()
611        .filter_map(|(idx, package)| {
612            package
613                .datasource_ids
614                .contains(&DatasourceId::RpmYumdb)
615                .then_some(idx)
616        })
617        .collect();
618    let mut removal_indices = Vec::new();
619
620    for yumdb_idx in yumdb_indices {
621        let yumdb_package = packages[yumdb_idx].clone();
622        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
623            continue;
624        };
625        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
626        let yumdb_arch = yumdb_package
627            .qualifiers
628            .as_ref()
629            .and_then(|qualifiers| qualifiers.get("arch"));
630
631        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
632            if idx == yumdb_idx || !is_rpm_package(package) {
633                return None;
634            }
635
636            let config = find_db_config(package)?;
637            let datafile_path = package.datafile_paths.first()?;
638            let target_root = compute_root(datafile_path, config.path_suffix);
639            let target_arch = package
640                .qualifiers
641                .as_ref()
642                .and_then(|qualifiers| qualifiers.get("arch"));
643
644            (target_root == yumdb_root
645                && package.name == yumdb_package.name
646                && package.version == yumdb_package.version
647                && target_arch == yumdb_arch)
648                .then_some(idx)
649        }) else {
650            continue;
651        };
652
653        let target_package_uid = packages[target_idx].package_uid.clone();
654        {
655            let target = &mut packages[target_idx];
656            target
657                .datafile_paths
658                .extend(yumdb_package.datafile_paths.clone());
659            target
660                .datasource_ids
661                .extend(yumdb_package.datasource_ids.clone());
662
663            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
664                && !yumdb_extra.is_empty()
665            {
666                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
667                let mut merged_yumdb = extra_data
668                    .get("yumdb")
669                    .and_then(|value| value.as_object().cloned())
670                    .unwrap_or_default();
671                for (key, value) in yumdb_extra {
672                    merged_yumdb.insert(key, value);
673                }
674                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
675            }
676        }
677
678        for file in files.iter_mut() {
679            for package_uid in &mut file.for_packages {
680                if *package_uid == yumdb_package.package_uid {
681                    *package_uid = target_package_uid.clone();
682                }
683            }
684        }
685
686        removal_indices.push(yumdb_idx);
687    }
688
689    removal_indices.sort_unstable();
690    removal_indices.dedup();
691    for idx in removal_indices.into_iter().rev() {
692        packages.remove(idx);
693    }
694}
695
696fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
697    files
698        .iter()
699        .enumerate()
700        .map(|(idx, file)| (file.path.clone(), idx))
701        .collect()
702}
703
704fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
705    let datafile_paths = &package.datafile_paths;
706
707    for config in DB_PATH_CONFIGS {
708        if !datafile_paths.is_empty()
709            && !datafile_paths
710                .iter()
711                .any(|path| path.ends_with(config.path_suffix))
712        {
713            continue;
714        }
715
716        for &config_dsid in config.datasource_ids {
717            for &pkg_dsid in &package.datasource_ids {
718                if config_dsid == pkg_dsid {
719                    return Some(config);
720                }
721            }
722        }
723    }
724
725    for config in DB_PATH_CONFIGS {
726        for &config_dsid in config.datasource_ids {
727            for &pkg_dsid in &package.datasource_ids {
728                if config_dsid == pkg_dsid {
729                    return Some(config);
730                }
731            }
732        }
733    }
734
735    None
736}
737
738fn compute_root(datafile_path: &str, suffix: &str) -> String {
739    if let Some(pos) = datafile_path.rfind(suffix) {
740        let root = &datafile_path[..pos];
741        if root.is_empty() {
742            String::new()
743        } else {
744            root.to_string()
745        }
746    } else {
747        String::new()
748    }
749}
750
751fn collect_file_references(
752    files: &[FileInfo],
753    path_index: &HashMap<String, usize>,
754    datafile_path: &str,
755    package_datasource_ids: &[DatasourceId],
756    config_datasource_ids: &[DatasourceId],
757    package_purl: Option<&str>,
758) -> Vec<crate::models::FileReference> {
759    let file_idx = match path_index.get(datafile_path) {
760        Some(&idx) => idx,
761        None => return Vec::new(),
762    };
763
764    let file = &files[file_idx];
765    let mut refs = Vec::new();
766
767    for pkg_data in &file.package_data {
768        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
769            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
770        });
771
772        if !dsid_matches {
773            continue;
774        }
775
776        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
777            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
778            _ => true,
779        };
780
781        if purl_matches {
782            refs.extend(pkg_data.file_references.clone());
783        }
784    }
785
786    refs
787}
788
789fn is_rpm_package(package: &Package) -> bool {
790    for &dsid in &package.datasource_ids {
791        for &rpm_dsid in RPM_DATASOURCE_IDS {
792            if rpm_dsid == dsid {
793                return true;
794            }
795        }
796    }
797    false
798}
799
800fn is_debian_installed_package(package: &Package) -> bool {
801    package
802        .datasource_ids
803        .contains(&DatasourceId::DebianInstalledStatusDb)
804        || package
805            .datasource_ids
806            .contains(&DatasourceId::DebianDistrolessInstalledDb)
807}
808
809fn collect_debian_installed_file_references(
810    files: &[FileInfo],
811    package: &Package,
812) -> Vec<crate::models::FileReference> {
813    let mut refs = Vec::new();
814
815    for file in files {
816        for pkg_data in &file.package_data {
817            let Some(dsid) = pkg_data.datasource_id else {
818                continue;
819            };
820            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
821                continue;
822            }
823
824            if pkg_data.name != package.name {
825                continue;
826            }
827            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
828                continue;
829            }
830            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
831                continue;
832            }
833
834            merge_file_references(&mut refs, pkg_data.file_references.clone());
835        }
836    }
837
838    refs
839}
840
841fn find_attached_manifest_file_references<'a>(
842    files: &'a [FileInfo],
843    package: &Package,
844    datasource_id: DatasourceId,
845) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
846    for file in files {
847        if !file.for_packages.contains(&package.package_uid) {
848            continue;
849        }
850
851        for pkg_data in &file.package_data {
852            if pkg_data.datasource_id == Some(datasource_id) {
853                return Some((&file.path, pkg_data.file_references.clone()));
854            }
855        }
856    }
857
858    None
859}
860
861fn debian_installed_namespace_matches(
862    supplemental_namespace: &Option<String>,
863    package_namespace: &Option<String>,
864) -> bool {
865    match (
866        supplemental_namespace.as_deref(),
867        package_namespace.as_deref(),
868    ) {
869        (None, _) => true,
870        (Some("debian"), Some("ubuntu")) => true,
871        (Some(left), Some(right)) => left == right,
872        (Some(_), None) => true,
873    }
874}
875
876fn debian_installed_arch_matches(
877    supplemental_qualifiers: &Option<HashMap<String, String>>,
878    package_qualifiers: &Option<HashMap<String, String>>,
879) -> bool {
880    let supplemental_arch = supplemental_qualifiers
881        .as_ref()
882        .and_then(|qualifiers| qualifiers.get("arch"));
883    let package_arch = package_qualifiers
884        .as_ref()
885        .and_then(|qualifiers| qualifiers.get("arch"));
886
887    match (supplemental_arch, package_arch) {
888        (Some(left), Some(right)) => left == right,
889        (Some(_), None) => false,
890        _ => true,
891    }
892}
893
894fn merge_file_references(
895    target: &mut Vec<crate::models::FileReference>,
896    incoming: Vec<crate::models::FileReference>,
897) {
898    for file_ref in incoming {
899        if let Some(existing) = target
900            .iter_mut()
901            .find(|existing| existing.path == file_ref.path)
902        {
903            if existing.size.is_none() {
904                existing.size = file_ref.size;
905            }
906            if existing.sha1.is_none() {
907                existing.sha1 = file_ref.sha1;
908            }
909            if existing.md5.is_none() {
910                existing.md5 = file_ref.md5;
911            }
912            if existing.sha256.is_none() {
913                existing.sha256 = file_ref.sha256;
914            }
915            if existing.sha512.is_none() {
916                existing.sha512 = file_ref.sha512;
917            }
918            if existing.extra_data.is_none() {
919                existing.extra_data = file_ref.extra_data.clone();
920            }
921        } else {
922            target.push(file_ref);
923        }
924    }
925}
926
927fn resolve_rpm_namespace(
928    files: &[FileInfo],
929    path_index: &HashMap<String, usize>,
930    root: &str,
931) -> Option<String> {
932    let os_release_paths = [
933        format!("{}etc/os-release", root),
934        format!("{}usr/lib/os-release", root),
935    ];
936
937    for os_release_path in &os_release_paths {
938        if let Some(&file_idx) = path_index.get(os_release_path) {
939            let file = &files[file_idx];
940            for pkg_data in &file.package_data {
941                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
942                    && let Some(namespace) = &pkg_data.namespace
943                {
944                    return Some(namespace.clone());
945                }
946            }
947        }
948    }
949
950    None
951}
952
953fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
954    let parsed = PackageUrl::from_str(existing_purl).ok()?;
955    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
956
957    updated.with_namespace(namespace).ok()?;
958
959    if let Some(version) = parsed.version() {
960        updated.with_version(version).ok()?;
961    }
962
963    if let Some(subpath) = parsed.subpath() {
964        updated.with_subpath(subpath).ok()?;
965    }
966
967    for (key, value) in parsed.qualifiers() {
968        updated
969            .add_qualifier(key.to_string(), value.to_string())
970            .ok()?;
971    }
972
973    Some(updated.to_string())
974}
975
976fn apply_rpm_namespace(
977    files: &mut [FileInfo],
978    package: &mut Package,
979    dependencies: &mut [TopLevelDependency],
980    namespace: &str,
981) {
982    let old_package_uid = package.package_uid.clone();
983
984    package.namespace = Some(namespace.to_string());
985
986    if let Some(current_purl) = package.purl.as_deref()
987        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
988    {
989        package.purl = Some(updated_purl.clone());
990        package.package_uid = old_package_uid.replace_base(&updated_purl);
991    }
992
993    for file in files.iter_mut() {
994        for package_uid in &mut file.for_packages {
995            if *package_uid == old_package_uid {
996                *package_uid = package.package_uid.clone();
997            }
998        }
999    }
1000
1001    for dep in dependencies.iter_mut() {
1002        if dep.for_package_uid.as_ref() == Some(&old_package_uid) {
1003            dep.for_package_uid = Some(package.package_uid.clone());
1004        }
1005
1006        if dep.for_package_uid.as_ref() == Some(&package.package_uid) {
1007            dep.namespace = Some(namespace.to_string());
1008
1009            if let Some(current_purl) = dep.purl.as_deref()
1010                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1011            {
1012                dep.purl = Some(updated_purl.clone());
1013                dep.dependency_uid = dep.dependency_uid.replace_base(&updated_purl);
1014            }
1015        }
1016    }
1017}
1018
1019#[cfg(test)]
1020#[path = "file_ref_resolve_test.rs"]
1021mod tests;