Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    DebianExtractedDeb,
20    InstalledDb,
21    PythonMetadata,
22    RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26    datasource_ids: &'static [DatasourceId],
27    kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31    DbPathConfig {
32        datasource_ids: &[DatasourceId::AlpineInstalledDb],
33        path_suffix: "lib/apk/db/installed",
34    },
35    DbPathConfig {
36        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37        path_suffix: "var/lib/rpm/Packages",
38    },
39    DbPathConfig {
40        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
41        path_suffix: "usr/lib/sysimage/rpm/Packages",
42    },
43    DbPathConfig {
44        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
45        path_suffix: "var/lib/rpm/Packages.db",
46    },
47    DbPathConfig {
48        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
49        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
50    },
51    DbPathConfig {
52        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
53        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
54    },
55    DbPathConfig {
56        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
57        path_suffix: "var/lib/rpm/rpmdb.sqlite",
58    },
59    DbPathConfig {
60        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
61        path_suffix: "var/lib/dpkg/status",
62    },
63    DbPathConfig {
64        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
65        path_suffix: "var/lib/dpkg/status.d/",
66    },
67];
68
69const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
70    DatasourceId::RpmInstalledDatabaseBdb,
71    DatasourceId::RpmInstalledDatabaseNdb,
72    DatasourceId::RpmInstalledDatabaseSqlite,
73];
74const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
75const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
76const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::PypiWheelMetadata,
78    DatasourceId::PypiSdistPkginfo,
79    DatasourceId::PypiEggPkginfo,
80    DatasourceId::PypiEditableEggPkginfo,
81];
82const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
83const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
84    DatasourceId::DebianInstalledFilesList,
85    DatasourceId::DebianInstalledMd5Sums,
86];
87
88const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
89    DatasourceId::AlpineInstalledDb,
90    DatasourceId::RpmInstalledDatabaseBdb,
91    DatasourceId::RpmInstalledDatabaseNdb,
92    DatasourceId::RpmInstalledDatabaseSqlite,
93    DatasourceId::DebianInstalledStatusDb,
94    DatasourceId::DebianDistrolessInstalledDb,
95];
96
97const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
98    FileReferenceResolverConfig {
99        datasource_ids: &[DatasourceId::AboutFile],
100        kind: FileReferenceResolverKind::About,
101    },
102    FileReferenceResolverConfig {
103        datasource_ids: &[DatasourceId::CpanManifest],
104        kind: FileReferenceResolverKind::AttachedManifest,
105    },
106    FileReferenceResolverConfig {
107        datasource_ids: &[DatasourceId::CondaMetaJson],
108        kind: FileReferenceResolverKind::CondaMeta,
109    },
110    FileReferenceResolverConfig {
111        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
112        kind: FileReferenceResolverKind::DebianExtractedDeb,
113    },
114    FileReferenceResolverConfig {
115        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
116        kind: FileReferenceResolverKind::InstalledDb,
117    },
118    FileReferenceResolverConfig {
119        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
120        kind: FileReferenceResolverKind::PythonMetadata,
121    },
122    FileReferenceResolverConfig {
123        datasource_ids: &[DatasourceId::GradleModule],
124        kind: FileReferenceResolverKind::RelativeToDatafileParent,
125    },
126];
127
128struct PythonMetadataResolution {
129    base_path: String,
130    allowed_root: String,
131}
132
133pub fn resolve_file_references(
134    files: &mut [FileInfo],
135    packages: &mut [Package],
136    dependencies: &mut [TopLevelDependency],
137) {
138    if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
139        return;
140    }
141
142    let path_index = build_path_index(&*files);
143
144    for package in packages.iter_mut() {
145        let Some(config) = find_file_reference_resolver(files, package) else {
146            continue;
147        };
148
149        match config.kind {
150            FileReferenceResolverKind::About
151            | FileReferenceResolverKind::RelativeToDatafileParent => {
152                resolve_relative_to_datafile_parent(
153                    files,
154                    &path_index,
155                    package,
156                    config.datasource_ids,
157                );
158            }
159            FileReferenceResolverKind::AttachedManifest => {
160                resolve_attached_manifest_file_references(
161                    files,
162                    &path_index,
163                    package,
164                    config.datasource_ids[0],
165                );
166            }
167            FileReferenceResolverKind::CondaMeta => {
168                resolve_conda_file_references(files, &path_index, package);
169            }
170            FileReferenceResolverKind::DebianExtractedDeb => {
171                resolve_debian_extracted_deb_file_references(files, &path_index, package)
172            }
173            FileReferenceResolverKind::InstalledDb => {
174                resolve_installed_db_file_references(files, &path_index, package, dependencies);
175            }
176            FileReferenceResolverKind::PythonMetadata => {
177                resolve_python_metadata_file_references(files, &path_index, package);
178            }
179        }
180    }
181}
182
183pub(super) fn has_relevant_file_reference_datasource_ids(
184    file_datasource_ids: &HashSet<DatasourceId>,
185) -> bool {
186    FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
187        config
188            .datasource_ids
189            .iter()
190            .any(|datasource_id| file_datasource_ids.contains(datasource_id))
191    })
192}
193
194fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
195    let file_datasource_ids: HashSet<DatasourceId> = files
196        .iter()
197        .flat_map(|file| {
198            file.package_data
199                .iter()
200                .filter_map(|package_data| package_data.datasource_id)
201        })
202        .collect();
203
204    has_relevant_file_reference_datasource_ids(&file_datasource_ids)
205}
206
207fn resolve_relative_to_datafile_parent(
208    files: &mut [FileInfo],
209    path_index: &HashMap<String, usize>,
210    package: &mut Package,
211    datasource_ids: &[DatasourceId],
212) {
213    let Some(datafile_path) = package.datafile_paths.first() else {
214        return;
215    };
216    let root = Path::new(datafile_path)
217        .parent()
218        .map(|p| p.to_string_lossy().to_string())
219        .unwrap_or_default();
220
221    let file_references = collect_file_references(
222        files,
223        path_index,
224        datafile_path,
225        &package.datasource_ids,
226        datasource_ids,
227        package.purl.as_deref(),
228    );
229
230    let mut missing_refs = Vec::new();
231    for file_ref in &file_references {
232        let resolved_path = if root.is_empty() {
233            file_ref.path.clone()
234        } else {
235            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
236        };
237        if let Some(&file_idx) = path_index.get(&resolved_path) {
238            let package_uid = package.package_uid.clone();
239            if !files[file_idx].for_packages.contains(&package_uid) {
240                files[file_idx].for_packages.push(package_uid);
241            }
242        } else {
243            missing_refs.push(file_ref.path.clone());
244        }
245    }
246
247    record_missing_file_references(package, missing_refs);
248}
249
250fn resolve_attached_manifest_file_references(
251    files: &mut [FileInfo],
252    path_index: &HashMap<String, usize>,
253    package: &mut Package,
254    datasource_id: DatasourceId,
255) {
256    let Some((datafile_path, file_references)) =
257        find_attached_manifest_file_references(files, package, datasource_id)
258    else {
259        return;
260    };
261
262    let root = Path::new(datafile_path)
263        .parent()
264        .map(|p| p.to_string_lossy().to_string())
265        .unwrap_or_default();
266
267    let mut missing_refs = Vec::new();
268    for file_ref in &file_references {
269        let resolved_path = if root.is_empty() {
270            file_ref.path.clone()
271        } else {
272            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
273        };
274
275        if let Some(&file_idx) = path_index.get(&resolved_path) {
276            let package_uid = package.package_uid.clone();
277            if !files[file_idx].for_packages.contains(&package_uid) {
278                files[file_idx].for_packages.push(package_uid);
279            }
280        } else {
281            missing_refs.push(file_ref.path.clone());
282        }
283    }
284
285    record_missing_file_references(package, missing_refs);
286}
287
288fn resolve_conda_file_references(
289    files: &mut [FileInfo],
290    path_index: &HashMap<String, usize>,
291    package: &mut Package,
292) {
293    let Some(conda_meta_path) = package
294        .datafile_paths
295        .iter()
296        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
297    else {
298        return;
299    };
300    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
301        return;
302    };
303
304    let file_references = collect_file_references(
305        files,
306        path_index,
307        conda_meta_path,
308        &package.datasource_ids,
309        &[DatasourceId::CondaMetaJson],
310        package.purl.as_deref(),
311    );
312
313    let mut missing_refs = Vec::new();
314    for file_ref in &file_references {
315        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
316        if let Some(&file_idx) = path_index.get(&resolved_path) {
317            let package_uid = package.package_uid.clone();
318            if !files[file_idx].for_packages.contains(&package_uid) {
319                files[file_idx].for_packages.push(package_uid);
320            }
321        } else {
322            missing_refs.push(file_ref.path.clone());
323        }
324    }
325
326    record_missing_file_references(package, missing_refs);
327}
328
329fn resolve_installed_db_file_references(
330    files: &mut [FileInfo],
331    path_index: &HashMap<String, usize>,
332    package: &mut Package,
333    dependencies: &mut [TopLevelDependency],
334) {
335    let Some(config) = find_db_config(package) else {
336        return;
337    };
338    let Some(datafile_path) = package.datafile_paths.first() else {
339        return;
340    };
341
342    let root = compute_root(datafile_path, config.path_suffix);
343
344    let mut file_references = collect_file_references(
345        files,
346        path_index,
347        datafile_path,
348        &package.datasource_ids,
349        config.datasource_ids,
350        package.purl.as_deref(),
351    );
352
353    if is_debian_installed_package(package) {
354        merge_file_references(
355            &mut file_references,
356            collect_debian_installed_file_references(files, package),
357        );
358    }
359
360    let mut missing_refs = Vec::new();
361    for file_ref in &file_references {
362        let ref_path = file_ref.path.trim_start_matches('/');
363        let resolved_path = if root.is_empty() {
364            ref_path.to_string()
365        } else {
366            format!("{}{}", root, ref_path)
367        };
368
369        if let Some(&file_idx) = path_index.get(&resolved_path) {
370            let package_uid = package.package_uid.clone();
371            if !files[file_idx].for_packages.contains(&package_uid) {
372                files[file_idx].for_packages.push(package_uid);
373            }
374        } else {
375            missing_refs.push(file_ref.path.clone());
376        }
377    }
378
379    record_missing_file_references(package, missing_refs);
380
381    if is_rpm_package(package)
382        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
383    {
384        apply_rpm_namespace(files, package, dependencies, &namespace);
385    }
386}
387
388fn resolve_debian_extracted_deb_file_references(
389    files: &mut [FileInfo],
390    path_index: &HashMap<String, usize>,
391    package: &mut Package,
392) {
393    let Some(datafile_path) = package
394        .datafile_paths
395        .iter()
396        .find(|path| path.ends_with("/md5sums"))
397    else {
398        return;
399    };
400
401    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
402        return;
403    };
404    let Some(extracted_root) = md5sums_parent.parent() else {
405        return;
406    };
407    let root = extracted_root.to_string_lossy().to_string();
408
409    let Some(&file_idx) = path_index.get(datafile_path) else {
410        return;
411    };
412    let file_references: Vec<_> = files[file_idx]
413        .package_data
414        .iter()
415        .filter(|pkg_data| {
416            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
417        })
418        .flat_map(|pkg_data| pkg_data.file_references.clone())
419        .collect();
420
421    let mut missing_refs = Vec::new();
422    for file_ref in &file_references {
423        let resolved_path = if root.is_empty() {
424            file_ref.path.trim_start_matches('/').to_string()
425        } else {
426            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
427        };
428
429        if let Some(&file_idx) = path_index.get(&resolved_path) {
430            let package_uid = package.package_uid.clone();
431            if !files[file_idx].for_packages.contains(&package_uid) {
432                files[file_idx].for_packages.push(package_uid);
433            }
434        } else {
435            missing_refs.push(file_ref.path.clone());
436        }
437    }
438
439    record_missing_file_references(package, missing_refs);
440}
441
442fn resolve_python_metadata_file_references(
443    files: &mut [FileInfo],
444    path_index: &HashMap<String, usize>,
445    package: &mut Package,
446) {
447    let Some(python_resolution) = find_python_metadata_root(package) else {
448        return;
449    };
450    let Some(datafile_path) = package
451        .datafile_paths
452        .iter()
453        .find(|path| is_python_metadata_layout(path))
454    else {
455        return;
456    };
457
458    let file_references = collect_file_references(
459        files,
460        path_index,
461        datafile_path,
462        &package.datasource_ids,
463        PYTHON_METADATA_DATASOURCE_IDS,
464        package.purl.as_deref(),
465    );
466
467    let mut missing_refs = Vec::new();
468    for file_ref in &file_references {
469        let Some(resolved_path) = normalize_relative_path(
470            &python_resolution.base_path,
471            &python_resolution.allowed_root,
472            &file_ref.path,
473        ) else {
474            missing_refs.push(file_ref.path.clone());
475            continue;
476        };
477
478        if let Some(&file_idx) = path_index.get(&resolved_path) {
479            let package_uid = package.package_uid.clone();
480            if !files[file_idx].for_packages.contains(&package_uid) {
481                files[file_idx].for_packages.push(package_uid);
482            }
483        } else {
484            missing_refs.push(file_ref.path.clone());
485        }
486    }
487
488    record_missing_file_references(package, missing_refs);
489}
490
491fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
492    if missing_refs.is_empty() {
493        return;
494    }
495
496    missing_refs.sort();
497    let missing_refs_json: Vec<serde_json::Value> = missing_refs
498        .into_iter()
499        .map(|path| serde_json::json!({"path": path}))
500        .collect();
501
502    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
503    extra_data.insert(
504        "missing_file_references".to_string(),
505        serde_json::Value::Array(missing_refs_json),
506    );
507}
508
509fn find_file_reference_resolver(
510    files: &[FileInfo],
511    package: &Package,
512) -> Option<&'static FileReferenceResolverConfig> {
513    FILE_REFERENCE_RESOLVER_CONFIGS
514        .iter()
515        .find(|config| match config.kind {
516            FileReferenceResolverKind::AttachedManifest => {
517                config.datasource_ids.iter().any(|datasource_id| {
518                    files.iter().any(|file| {
519                        file.for_packages.contains(&package.package_uid)
520                            && file
521                                .package_data
522                                .iter()
523                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
524                    })
525                })
526            }
527            _ => config
528                .datasource_ids
529                .iter()
530                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
531        })
532}
533
534fn is_python_metadata_layout(path: &str) -> bool {
535    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
536}
537
538fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
539    let datafile_path = package
540        .datafile_paths
541        .iter()
542        .find(|path| is_python_metadata_layout(path))?;
543
544    if !package
545        .datasource_ids
546        .iter()
547        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
548    {
549        return None;
550    }
551
552    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
553        if let Some(idx) = datafile_path.rfind(segment) {
554            if datafile_path.ends_with("/METADATA") {
555                let root_end = idx + segment.len();
556                let root = datafile_path[..root_end].to_string();
557                return Some(PythonMetadataResolution {
558                    base_path: root.clone(),
559                    allowed_root: root,
560                });
561            }
562
563            if datafile_path.ends_with("/PKG-INFO") {
564                let parent = Path::new(datafile_path).parent()?;
565                let allowed_root = datafile_path[..idx + segment.len()].to_string();
566                return Some(PythonMetadataResolution {
567                    base_path: parent.to_string_lossy().to_string(),
568                    allowed_root,
569                });
570            }
571        }
572    }
573
574    if datafile_path.ends_with(".egg-info/PKG-INFO") {
575        let metadata_parent = Path::new(datafile_path).parent()?;
576        let project_root = metadata_parent.parent()?;
577        let project_root = project_root.to_string_lossy().to_string();
578        return Some(PythonMetadataResolution {
579            base_path: project_root.clone(),
580            allowed_root: project_root,
581        });
582    }
583
584    None
585}
586
587fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
588    let joined = Path::new(base).join(relative.trim_start_matches('/'));
589    let mut normalized = Path::new("").to_path_buf();
590
591    for component in joined.components() {
592        match component {
593            std::path::Component::CurDir => {}
594            std::path::Component::ParentDir => {
595                normalized.pop();
596            }
597            _ => normalized.push(component.as_os_str()),
598        }
599    }
600
601    let normalized_str = normalized.to_string_lossy().to_string();
602    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
603        Some(normalized_str)
604    } else {
605        None
606    }
607}
608
609fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
610    let path = datafile_path?;
611    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
612    Some(path[..idx].to_string())
613}
614
615pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
616    let yumdb_indices: Vec<usize> = packages
617        .iter()
618        .enumerate()
619        .filter_map(|(idx, package)| {
620            package
621                .datasource_ids
622                .contains(&DatasourceId::RpmYumdb)
623                .then_some(idx)
624        })
625        .collect();
626    let mut removal_indices = Vec::new();
627
628    for yumdb_idx in yumdb_indices {
629        let yumdb_package = packages[yumdb_idx].clone();
630        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
631            continue;
632        };
633        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
634        let yumdb_arch = yumdb_package
635            .qualifiers
636            .as_ref()
637            .and_then(|qualifiers| qualifiers.get("arch"));
638
639        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
640            if idx == yumdb_idx || !is_rpm_package(package) {
641                return None;
642            }
643
644            let config = find_db_config(package)?;
645            let datafile_path = package.datafile_paths.first()?;
646            let target_root = compute_root(datafile_path, config.path_suffix);
647            let target_arch = package
648                .qualifiers
649                .as_ref()
650                .and_then(|qualifiers| qualifiers.get("arch"));
651
652            (target_root == yumdb_root
653                && package.name == yumdb_package.name
654                && package.version == yumdb_package.version
655                && target_arch == yumdb_arch)
656                .then_some(idx)
657        }) else {
658            continue;
659        };
660
661        let target_package_uid = packages[target_idx].package_uid.clone();
662        {
663            let target = &mut packages[target_idx];
664            target
665                .datafile_paths
666                .extend(yumdb_package.datafile_paths.clone());
667            target
668                .datasource_ids
669                .extend(yumdb_package.datasource_ids.clone());
670
671            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
672                && !yumdb_extra.is_empty()
673            {
674                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
675                let mut merged_yumdb = extra_data
676                    .get("yumdb")
677                    .and_then(|value| value.as_object().cloned())
678                    .unwrap_or_default();
679                for (key, value) in yumdb_extra {
680                    merged_yumdb.insert(key, value);
681                }
682                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
683            }
684        }
685
686        for file in files.iter_mut() {
687            for package_uid in &mut file.for_packages {
688                if *package_uid == yumdb_package.package_uid {
689                    *package_uid = target_package_uid.clone();
690                }
691            }
692        }
693
694        removal_indices.push(yumdb_idx);
695    }
696
697    removal_indices.sort_unstable();
698    removal_indices.dedup();
699    for idx in removal_indices.into_iter().rev() {
700        packages.remove(idx);
701    }
702}
703
704fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
705    files
706        .iter()
707        .enumerate()
708        .map(|(idx, file)| (file.path.clone(), idx))
709        .collect()
710}
711
712fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
713    let datafile_paths = &package.datafile_paths;
714
715    for config in DB_PATH_CONFIGS {
716        if !datafile_paths.is_empty()
717            && !datafile_paths
718                .iter()
719                .any(|path| path.ends_with(config.path_suffix))
720        {
721            continue;
722        }
723
724        for &config_dsid in config.datasource_ids {
725            for &pkg_dsid in &package.datasource_ids {
726                if config_dsid == pkg_dsid {
727                    return Some(config);
728                }
729            }
730        }
731    }
732
733    for config in DB_PATH_CONFIGS {
734        for &config_dsid in config.datasource_ids {
735            for &pkg_dsid in &package.datasource_ids {
736                if config_dsid == pkg_dsid {
737                    return Some(config);
738                }
739            }
740        }
741    }
742
743    None
744}
745
746fn compute_root(datafile_path: &str, suffix: &str) -> String {
747    if let Some(pos) = datafile_path.rfind(suffix) {
748        let root = &datafile_path[..pos];
749        if root.is_empty() {
750            String::new()
751        } else {
752            root.to_string()
753        }
754    } else {
755        String::new()
756    }
757}
758
759fn collect_file_references(
760    files: &[FileInfo],
761    path_index: &HashMap<String, usize>,
762    datafile_path: &str,
763    package_datasource_ids: &[DatasourceId],
764    config_datasource_ids: &[DatasourceId],
765    package_purl: Option<&str>,
766) -> Vec<crate::models::FileReference> {
767    let file_idx = match path_index.get(datafile_path) {
768        Some(&idx) => idx,
769        None => return Vec::new(),
770    };
771
772    let file = &files[file_idx];
773    let mut refs = Vec::new();
774
775    for pkg_data in &file.package_data {
776        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
777            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
778        });
779
780        if !dsid_matches {
781            continue;
782        }
783
784        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
785            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
786            _ => true,
787        };
788
789        if purl_matches {
790            refs.extend(pkg_data.file_references.clone());
791        }
792    }
793
794    refs
795}
796
797fn is_rpm_package(package: &Package) -> bool {
798    for &dsid in &package.datasource_ids {
799        for &rpm_dsid in RPM_DATASOURCE_IDS {
800            if rpm_dsid == dsid {
801                return true;
802            }
803        }
804    }
805    false
806}
807
808fn is_debian_installed_package(package: &Package) -> bool {
809    package
810        .datasource_ids
811        .contains(&DatasourceId::DebianInstalledStatusDb)
812        || package
813            .datasource_ids
814            .contains(&DatasourceId::DebianDistrolessInstalledDb)
815}
816
817fn collect_debian_installed_file_references(
818    files: &[FileInfo],
819    package: &Package,
820) -> Vec<crate::models::FileReference> {
821    let mut refs = Vec::new();
822
823    for file in files {
824        for pkg_data in &file.package_data {
825            let Some(dsid) = pkg_data.datasource_id else {
826                continue;
827            };
828            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
829                continue;
830            }
831
832            if pkg_data.name != package.name {
833                continue;
834            }
835            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
836                continue;
837            }
838            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
839                continue;
840            }
841
842            merge_file_references(&mut refs, pkg_data.file_references.clone());
843        }
844    }
845
846    refs
847}
848
849fn find_attached_manifest_file_references<'a>(
850    files: &'a [FileInfo],
851    package: &Package,
852    datasource_id: DatasourceId,
853) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
854    for file in files {
855        if !file.for_packages.contains(&package.package_uid) {
856            continue;
857        }
858
859        for pkg_data in &file.package_data {
860            if pkg_data.datasource_id == Some(datasource_id) {
861                return Some((&file.path, pkg_data.file_references.clone()));
862            }
863        }
864    }
865
866    None
867}
868
869fn debian_installed_namespace_matches(
870    supplemental_namespace: &Option<String>,
871    package_namespace: &Option<String>,
872) -> bool {
873    match (
874        supplemental_namespace.as_deref(),
875        package_namespace.as_deref(),
876    ) {
877        (None, _) => true,
878        (Some("debian"), Some("ubuntu")) => true,
879        (Some(left), Some(right)) => left == right,
880        (Some(_), None) => true,
881    }
882}
883
884fn debian_installed_arch_matches(
885    supplemental_qualifiers: &Option<HashMap<String, String>>,
886    package_qualifiers: &Option<HashMap<String, String>>,
887) -> bool {
888    let supplemental_arch = supplemental_qualifiers
889        .as_ref()
890        .and_then(|qualifiers| qualifiers.get("arch"));
891    let package_arch = package_qualifiers
892        .as_ref()
893        .and_then(|qualifiers| qualifiers.get("arch"));
894
895    match (supplemental_arch, package_arch) {
896        (Some(left), Some(right)) => left == right,
897        (Some(_), None) => false,
898        _ => true,
899    }
900}
901
902fn merge_file_references(
903    target: &mut Vec<crate::models::FileReference>,
904    incoming: Vec<crate::models::FileReference>,
905) {
906    for file_ref in incoming {
907        if let Some(existing) = target
908            .iter_mut()
909            .find(|existing| existing.path == file_ref.path)
910        {
911            if existing.size.is_none() {
912                existing.size = file_ref.size;
913            }
914            if existing.sha1.is_none() {
915                existing.sha1 = file_ref.sha1;
916            }
917            if existing.md5.is_none() {
918                existing.md5 = file_ref.md5;
919            }
920            if existing.sha256.is_none() {
921                existing.sha256 = file_ref.sha256;
922            }
923            if existing.sha512.is_none() {
924                existing.sha512 = file_ref.sha512;
925            }
926            if existing.extra_data.is_none() {
927                existing.extra_data = file_ref.extra_data.clone();
928            }
929        } else {
930            target.push(file_ref);
931        }
932    }
933}
934
935fn resolve_rpm_namespace(
936    files: &[FileInfo],
937    path_index: &HashMap<String, usize>,
938    root: &str,
939) -> Option<String> {
940    let os_release_paths = [
941        format!("{}etc/os-release", root),
942        format!("{}usr/lib/os-release", root),
943    ];
944
945    for os_release_path in &os_release_paths {
946        if let Some(&file_idx) = path_index.get(os_release_path) {
947            let file = &files[file_idx];
948            for pkg_data in &file.package_data {
949                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
950                    && let Some(namespace) = &pkg_data.namespace
951                {
952                    return Some(namespace.clone());
953                }
954            }
955        }
956    }
957
958    None
959}
960
961fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
962    let parsed = PackageUrl::from_str(existing_purl).ok()?;
963    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
964
965    updated.with_namespace(namespace).ok()?;
966
967    if let Some(version) = parsed.version() {
968        updated.with_version(version).ok()?;
969    }
970
971    if let Some(subpath) = parsed.subpath() {
972        updated.with_subpath(subpath).ok()?;
973    }
974
975    for (key, value) in parsed.qualifiers() {
976        updated
977            .add_qualifier(key.to_string(), value.to_string())
978            .ok()?;
979    }
980
981    Some(updated.to_string())
982}
983
984fn apply_rpm_namespace(
985    files: &mut [FileInfo],
986    package: &mut Package,
987    dependencies: &mut [TopLevelDependency],
988    namespace: &str,
989) {
990    let old_package_uid = package.package_uid.clone();
991
992    package.namespace = Some(namespace.to_string());
993
994    if let Some(current_purl) = package.purl.as_deref()
995        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
996    {
997        package.purl = Some(updated_purl.clone());
998        package.package_uid = old_package_uid.replace_base(&updated_purl);
999    }
1000
1001    for file in files.iter_mut() {
1002        for package_uid in &mut file.for_packages {
1003            if *package_uid == old_package_uid {
1004                *package_uid = package.package_uid.clone();
1005            }
1006        }
1007    }
1008
1009    for dep in dependencies.iter_mut() {
1010        if dep.for_package_uid.as_ref() == Some(&old_package_uid) {
1011            dep.for_package_uid = Some(package.package_uid.clone());
1012        }
1013
1014        if dep.for_package_uid.as_ref() == Some(&package.package_uid) {
1015            dep.namespace = Some(namespace.to_string());
1016
1017            if let Some(current_purl) = dep.purl.as_deref()
1018                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1019            {
1020                dep.purl = Some(updated_purl.clone());
1021                dep.dependency_uid = dep.dependency_uid.replace_base(&updated_purl);
1022            }
1023        }
1024    }
1025}
1026
1027#[cfg(test)]
1028#[path = "file_ref_resolve_test.rs"]
1029mod tests;