Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    DebianExtractedDeb,
20    InstalledDb,
21    PythonMetadata,
22    RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26    datasource_ids: &'static [DatasourceId],
27    kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31    DbPathConfig {
32        datasource_ids: &[DatasourceId::AlpineInstalledDb],
33        path_suffix: "lib/apk/db/installed",
34    },
35    DbPathConfig {
36        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37        path_suffix: "var/lib/rpm/Packages",
38    },
39    DbPathConfig {
40        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
41        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
42    },
43    DbPathConfig {
44        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
45        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
46    },
47    DbPathConfig {
48        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
49        path_suffix: "var/lib/dpkg/status",
50    },
51    DbPathConfig {
52        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
53        path_suffix: "var/lib/dpkg/status.d/",
54    },
55];
56
57const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
58    DatasourceId::RpmInstalledDatabaseBdb,
59    DatasourceId::RpmInstalledDatabaseNdb,
60    DatasourceId::RpmInstalledDatabaseSqlite,
61];
62const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
63const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
64const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
65    DatasourceId::PypiWheelMetadata,
66    DatasourceId::PypiSdistPkginfo,
67    DatasourceId::PypiEggPkginfo,
68    DatasourceId::PypiEditableEggPkginfo,
69];
70const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
71const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
72    DatasourceId::DebianInstalledFilesList,
73    DatasourceId::DebianInstalledMd5Sums,
74];
75
76const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::AlpineInstalledDb,
78    DatasourceId::RpmInstalledDatabaseBdb,
79    DatasourceId::RpmInstalledDatabaseNdb,
80    DatasourceId::RpmInstalledDatabaseSqlite,
81    DatasourceId::DebianInstalledStatusDb,
82    DatasourceId::DebianDistrolessInstalledDb,
83];
84
85const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
86    FileReferenceResolverConfig {
87        datasource_ids: &[DatasourceId::AboutFile],
88        kind: FileReferenceResolverKind::About,
89    },
90    FileReferenceResolverConfig {
91        datasource_ids: &[DatasourceId::CpanManifest],
92        kind: FileReferenceResolverKind::AttachedManifest,
93    },
94    FileReferenceResolverConfig {
95        datasource_ids: &[DatasourceId::CondaMetaJson],
96        kind: FileReferenceResolverKind::CondaMeta,
97    },
98    FileReferenceResolverConfig {
99        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
100        kind: FileReferenceResolverKind::DebianExtractedDeb,
101    },
102    FileReferenceResolverConfig {
103        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
104        kind: FileReferenceResolverKind::InstalledDb,
105    },
106    FileReferenceResolverConfig {
107        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
108        kind: FileReferenceResolverKind::PythonMetadata,
109    },
110    FileReferenceResolverConfig {
111        datasource_ids: &[DatasourceId::GradleModule],
112        kind: FileReferenceResolverKind::RelativeToDatafileParent,
113    },
114];
115
116struct PythonMetadataResolution {
117    base_path: String,
118    allowed_root: String,
119}
120
121pub fn resolve_file_references(
122    files: &mut [FileInfo],
123    packages: &mut [Package],
124    dependencies: &mut [TopLevelDependency],
125) {
126    let path_index = build_path_index(&*files);
127
128    for package in packages.iter_mut() {
129        let Some(config) = find_file_reference_resolver(files, package) else {
130            continue;
131        };
132
133        match config.kind {
134            FileReferenceResolverKind::About
135            | FileReferenceResolverKind::RelativeToDatafileParent => {
136                resolve_relative_to_datafile_parent(
137                    files,
138                    &path_index,
139                    package,
140                    config.datasource_ids,
141                );
142            }
143            FileReferenceResolverKind::AttachedManifest => {
144                resolve_attached_manifest_file_references(
145                    files,
146                    &path_index,
147                    package,
148                    config.datasource_ids[0],
149                );
150            }
151            FileReferenceResolverKind::CondaMeta => {
152                resolve_conda_file_references(files, &path_index, package);
153            }
154            FileReferenceResolverKind::DebianExtractedDeb => {
155                resolve_debian_extracted_deb_file_references(files, &path_index, package)
156            }
157            FileReferenceResolverKind::InstalledDb => {
158                resolve_installed_db_file_references(files, &path_index, package, dependencies);
159            }
160            FileReferenceResolverKind::PythonMetadata => {
161                resolve_python_metadata_file_references(files, &path_index, package);
162            }
163        }
164    }
165}
166
167fn resolve_relative_to_datafile_parent(
168    files: &mut [FileInfo],
169    path_index: &HashMap<String, usize>,
170    package: &mut Package,
171    datasource_ids: &[DatasourceId],
172) {
173    let Some(datafile_path) = package.datafile_paths.first() else {
174        return;
175    };
176    let root = Path::new(datafile_path)
177        .parent()
178        .map(|p| p.to_string_lossy().to_string())
179        .unwrap_or_default();
180
181    let file_references = collect_file_references(
182        files,
183        path_index,
184        datafile_path,
185        &package.datasource_ids,
186        datasource_ids,
187        package.purl.as_deref(),
188    );
189
190    let mut missing_refs = Vec::new();
191    for file_ref in &file_references {
192        let resolved_path = if root.is_empty() {
193            file_ref.path.clone()
194        } else {
195            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
196        };
197        if let Some(&file_idx) = path_index.get(&resolved_path) {
198            let package_uid = package.package_uid.clone();
199            if !files[file_idx].for_packages.contains(&package_uid) {
200                files[file_idx].for_packages.push(package_uid);
201            }
202        } else {
203            missing_refs.push(file_ref.path.clone());
204        }
205    }
206
207    record_missing_file_references(package, missing_refs);
208}
209
210fn resolve_attached_manifest_file_references(
211    files: &mut [FileInfo],
212    path_index: &HashMap<String, usize>,
213    package: &mut Package,
214    datasource_id: DatasourceId,
215) {
216    let Some((datafile_path, file_references)) =
217        find_attached_manifest_file_references(files, package, datasource_id)
218    else {
219        return;
220    };
221
222    let root = Path::new(datafile_path)
223        .parent()
224        .map(|p| p.to_string_lossy().to_string())
225        .unwrap_or_default();
226
227    let mut missing_refs = Vec::new();
228    for file_ref in &file_references {
229        let resolved_path = if root.is_empty() {
230            file_ref.path.clone()
231        } else {
232            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
233        };
234
235        if let Some(&file_idx) = path_index.get(&resolved_path) {
236            let package_uid = package.package_uid.clone();
237            if !files[file_idx].for_packages.contains(&package_uid) {
238                files[file_idx].for_packages.push(package_uid);
239            }
240        } else {
241            missing_refs.push(file_ref.path.clone());
242        }
243    }
244
245    record_missing_file_references(package, missing_refs);
246}
247
248fn resolve_conda_file_references(
249    files: &mut [FileInfo],
250    path_index: &HashMap<String, usize>,
251    package: &mut Package,
252) {
253    let Some(conda_meta_path) = package
254        .datafile_paths
255        .iter()
256        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
257    else {
258        return;
259    };
260    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
261        return;
262    };
263
264    let file_references = collect_file_references(
265        files,
266        path_index,
267        conda_meta_path,
268        &package.datasource_ids,
269        &[DatasourceId::CondaMetaJson],
270        package.purl.as_deref(),
271    );
272
273    let mut missing_refs = Vec::new();
274    for file_ref in &file_references {
275        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
276        if let Some(&file_idx) = path_index.get(&resolved_path) {
277            let package_uid = package.package_uid.clone();
278            if !files[file_idx].for_packages.contains(&package_uid) {
279                files[file_idx].for_packages.push(package_uid);
280            }
281        } else {
282            missing_refs.push(file_ref.path.clone());
283        }
284    }
285
286    record_missing_file_references(package, missing_refs);
287}
288
289fn resolve_installed_db_file_references(
290    files: &mut [FileInfo],
291    path_index: &HashMap<String, usize>,
292    package: &mut Package,
293    dependencies: &mut [TopLevelDependency],
294) {
295    let Some(config) = find_db_config(package) else {
296        return;
297    };
298    let Some(datafile_path) = package.datafile_paths.first() else {
299        return;
300    };
301
302    let root = compute_root(datafile_path, config.path_suffix);
303
304    let mut file_references = collect_file_references(
305        files,
306        path_index,
307        datafile_path,
308        &package.datasource_ids,
309        config.datasource_ids,
310        package.purl.as_deref(),
311    );
312
313    if is_debian_installed_package(package) {
314        merge_file_references(
315            &mut file_references,
316            collect_debian_installed_file_references(files, package),
317        );
318    }
319
320    let mut missing_refs = Vec::new();
321    for file_ref in &file_references {
322        let ref_path = file_ref.path.trim_start_matches('/');
323        let resolved_path = if root.is_empty() {
324            ref_path.to_string()
325        } else {
326            format!("{}{}", root, ref_path)
327        };
328
329        if let Some(&file_idx) = path_index.get(&resolved_path) {
330            let package_uid = package.package_uid.clone();
331            if !files[file_idx].for_packages.contains(&package_uid) {
332                files[file_idx].for_packages.push(package_uid);
333            }
334        } else {
335            missing_refs.push(file_ref.path.clone());
336        }
337    }
338
339    record_missing_file_references(package, missing_refs);
340
341    if is_rpm_package(package)
342        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
343    {
344        apply_rpm_namespace(files, package, dependencies, &namespace);
345    }
346}
347
348fn resolve_debian_extracted_deb_file_references(
349    files: &mut [FileInfo],
350    path_index: &HashMap<String, usize>,
351    package: &mut Package,
352) {
353    let Some(datafile_path) = package
354        .datafile_paths
355        .iter()
356        .find(|path| path.ends_with("/md5sums"))
357    else {
358        return;
359    };
360
361    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
362        return;
363    };
364    let Some(extracted_root) = md5sums_parent.parent() else {
365        return;
366    };
367    let root = extracted_root.to_string_lossy().to_string();
368
369    let Some(&file_idx) = path_index.get(datafile_path) else {
370        return;
371    };
372    let file_references: Vec<_> = files[file_idx]
373        .package_data
374        .iter()
375        .filter(|pkg_data| {
376            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
377        })
378        .flat_map(|pkg_data| pkg_data.file_references.clone())
379        .collect();
380
381    let mut missing_refs = Vec::new();
382    for file_ref in &file_references {
383        let resolved_path = if root.is_empty() {
384            file_ref.path.trim_start_matches('/').to_string()
385        } else {
386            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
387        };
388
389        if let Some(&file_idx) = path_index.get(&resolved_path) {
390            let package_uid = package.package_uid.clone();
391            if !files[file_idx].for_packages.contains(&package_uid) {
392                files[file_idx].for_packages.push(package_uid);
393            }
394        } else {
395            missing_refs.push(file_ref.path.clone());
396        }
397    }
398
399    record_missing_file_references(package, missing_refs);
400}
401
402fn resolve_python_metadata_file_references(
403    files: &mut [FileInfo],
404    path_index: &HashMap<String, usize>,
405    package: &mut Package,
406) {
407    let Some(python_resolution) = find_python_metadata_root(package) else {
408        return;
409    };
410    let Some(datafile_path) = package
411        .datafile_paths
412        .iter()
413        .find(|path| is_python_metadata_layout(path))
414    else {
415        return;
416    };
417
418    let file_references = collect_file_references(
419        files,
420        path_index,
421        datafile_path,
422        &package.datasource_ids,
423        PYTHON_METADATA_DATASOURCE_IDS,
424        package.purl.as_deref(),
425    );
426
427    let mut missing_refs = Vec::new();
428    for file_ref in &file_references {
429        let Some(resolved_path) = normalize_relative_path(
430            &python_resolution.base_path,
431            &python_resolution.allowed_root,
432            &file_ref.path,
433        ) else {
434            missing_refs.push(file_ref.path.clone());
435            continue;
436        };
437
438        if let Some(&file_idx) = path_index.get(&resolved_path) {
439            let package_uid = package.package_uid.clone();
440            if !files[file_idx].for_packages.contains(&package_uid) {
441                files[file_idx].for_packages.push(package_uid);
442            }
443        } else {
444            missing_refs.push(file_ref.path.clone());
445        }
446    }
447
448    record_missing_file_references(package, missing_refs);
449}
450
451fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
452    if missing_refs.is_empty() {
453        return;
454    }
455
456    missing_refs.sort();
457    let missing_refs_json: Vec<serde_json::Value> = missing_refs
458        .into_iter()
459        .map(|path| serde_json::json!({"path": path}))
460        .collect();
461
462    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
463    extra_data.insert(
464        "missing_file_references".to_string(),
465        serde_json::Value::Array(missing_refs_json),
466    );
467}
468
469fn find_file_reference_resolver(
470    files: &[FileInfo],
471    package: &Package,
472) -> Option<&'static FileReferenceResolverConfig> {
473    FILE_REFERENCE_RESOLVER_CONFIGS
474        .iter()
475        .find(|config| match config.kind {
476            FileReferenceResolverKind::AttachedManifest => {
477                config.datasource_ids.iter().any(|datasource_id| {
478                    files.iter().any(|file| {
479                        file.for_packages.contains(&package.package_uid)
480                            && file
481                                .package_data
482                                .iter()
483                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
484                    })
485                })
486            }
487            _ => config
488                .datasource_ids
489                .iter()
490                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
491        })
492}
493
494fn is_python_metadata_layout(path: &str) -> bool {
495    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
496}
497
498fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
499    let datafile_path = package
500        .datafile_paths
501        .iter()
502        .find(|path| is_python_metadata_layout(path))?;
503
504    if !package
505        .datasource_ids
506        .iter()
507        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
508    {
509        return None;
510    }
511
512    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
513        if let Some(idx) = datafile_path.rfind(segment) {
514            if datafile_path.ends_with("/METADATA") {
515                let root_end = idx + segment.len();
516                let root = datafile_path[..root_end].to_string();
517                return Some(PythonMetadataResolution {
518                    base_path: root.clone(),
519                    allowed_root: root,
520                });
521            }
522
523            if datafile_path.ends_with("/PKG-INFO") {
524                let parent = Path::new(datafile_path).parent()?;
525                let allowed_root = datafile_path[..idx + segment.len()].to_string();
526                return Some(PythonMetadataResolution {
527                    base_path: parent.to_string_lossy().to_string(),
528                    allowed_root,
529                });
530            }
531        }
532    }
533
534    if datafile_path.ends_with(".egg-info/PKG-INFO") {
535        let metadata_parent = Path::new(datafile_path).parent()?;
536        let project_root = metadata_parent.parent()?;
537        let project_root = project_root.to_string_lossy().to_string();
538        return Some(PythonMetadataResolution {
539            base_path: project_root.clone(),
540            allowed_root: project_root,
541        });
542    }
543
544    None
545}
546
547fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
548    let joined = Path::new(base).join(relative.trim_start_matches('/'));
549    let mut normalized = Path::new("").to_path_buf();
550
551    for component in joined.components() {
552        match component {
553            std::path::Component::CurDir => {}
554            std::path::Component::ParentDir => {
555                normalized.pop();
556            }
557            _ => normalized.push(component.as_os_str()),
558        }
559    }
560
561    let normalized_str = normalized.to_string_lossy().to_string();
562    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
563        Some(normalized_str)
564    } else {
565        None
566    }
567}
568
569fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
570    let path = datafile_path?;
571    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
572    Some(path[..idx].to_string())
573}
574
575pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
576    let yumdb_indices: Vec<usize> = packages
577        .iter()
578        .enumerate()
579        .filter_map(|(idx, package)| {
580            package
581                .datasource_ids
582                .contains(&DatasourceId::RpmYumdb)
583                .then_some(idx)
584        })
585        .collect();
586    let mut removal_indices = Vec::new();
587
588    for yumdb_idx in yumdb_indices {
589        let yumdb_package = packages[yumdb_idx].clone();
590        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
591            continue;
592        };
593        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
594        let yumdb_arch = yumdb_package
595            .qualifiers
596            .as_ref()
597            .and_then(|qualifiers| qualifiers.get("arch"));
598
599        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
600            if idx == yumdb_idx || !is_rpm_package(package) {
601                return None;
602            }
603
604            let config = find_db_config(package)?;
605            let datafile_path = package.datafile_paths.first()?;
606            let target_root = compute_root(datafile_path, config.path_suffix);
607            let target_arch = package
608                .qualifiers
609                .as_ref()
610                .and_then(|qualifiers| qualifiers.get("arch"));
611
612            (target_root == yumdb_root
613                && package.name == yumdb_package.name
614                && package.version == yumdb_package.version
615                && target_arch == yumdb_arch)
616                .then_some(idx)
617        }) else {
618            continue;
619        };
620
621        let target_package_uid = packages[target_idx].package_uid.clone();
622        {
623            let target = &mut packages[target_idx];
624            target
625                .datafile_paths
626                .extend(yumdb_package.datafile_paths.clone());
627            target
628                .datasource_ids
629                .extend(yumdb_package.datasource_ids.clone());
630
631            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
632                && !yumdb_extra.is_empty()
633            {
634                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
635                let mut merged_yumdb = extra_data
636                    .get("yumdb")
637                    .and_then(|value| value.as_object().cloned())
638                    .unwrap_or_default();
639                for (key, value) in yumdb_extra {
640                    merged_yumdb.insert(key, value);
641                }
642                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
643            }
644        }
645
646        for file in files.iter_mut() {
647            for package_uid in &mut file.for_packages {
648                if *package_uid == yumdb_package.package_uid {
649                    *package_uid = target_package_uid.clone();
650                }
651            }
652        }
653
654        removal_indices.push(yumdb_idx);
655    }
656
657    removal_indices.sort_unstable();
658    removal_indices.dedup();
659    for idx in removal_indices.into_iter().rev() {
660        packages.remove(idx);
661    }
662}
663
664fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
665    files
666        .iter()
667        .enumerate()
668        .map(|(idx, file)| (file.path.clone(), idx))
669        .collect()
670}
671
672fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
673    for config in DB_PATH_CONFIGS {
674        for &config_dsid in config.datasource_ids {
675            for &pkg_dsid in &package.datasource_ids {
676                if config_dsid == pkg_dsid {
677                    return Some(config);
678                }
679            }
680        }
681    }
682    None
683}
684
685fn compute_root(datafile_path: &str, suffix: &str) -> String {
686    if let Some(pos) = datafile_path.rfind(suffix) {
687        let root = &datafile_path[..pos];
688        if root.is_empty() {
689            String::new()
690        } else {
691            root.to_string()
692        }
693    } else {
694        String::new()
695    }
696}
697
698fn collect_file_references(
699    files: &[FileInfo],
700    path_index: &HashMap<String, usize>,
701    datafile_path: &str,
702    package_datasource_ids: &[DatasourceId],
703    config_datasource_ids: &[DatasourceId],
704    package_purl: Option<&str>,
705) -> Vec<crate::models::FileReference> {
706    let file_idx = match path_index.get(datafile_path) {
707        Some(&idx) => idx,
708        None => return Vec::new(),
709    };
710
711    let file = &files[file_idx];
712    let mut refs = Vec::new();
713
714    for pkg_data in &file.package_data {
715        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
716            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
717        });
718
719        if !dsid_matches {
720            continue;
721        }
722
723        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
724            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
725            _ => true,
726        };
727
728        if purl_matches {
729            refs.extend(pkg_data.file_references.clone());
730        }
731    }
732
733    refs
734}
735
736fn is_rpm_package(package: &Package) -> bool {
737    for &dsid in &package.datasource_ids {
738        for &rpm_dsid in RPM_DATASOURCE_IDS {
739            if rpm_dsid == dsid {
740                return true;
741            }
742        }
743    }
744    false
745}
746
747fn is_debian_installed_package(package: &Package) -> bool {
748    package
749        .datasource_ids
750        .contains(&DatasourceId::DebianInstalledStatusDb)
751        || package
752            .datasource_ids
753            .contains(&DatasourceId::DebianDistrolessInstalledDb)
754}
755
756fn collect_debian_installed_file_references(
757    files: &[FileInfo],
758    package: &Package,
759) -> Vec<crate::models::FileReference> {
760    let mut refs = Vec::new();
761
762    for file in files {
763        for pkg_data in &file.package_data {
764            let Some(dsid) = pkg_data.datasource_id else {
765                continue;
766            };
767            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
768                continue;
769            }
770
771            if pkg_data.name != package.name {
772                continue;
773            }
774            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
775                continue;
776            }
777            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
778                continue;
779            }
780
781            merge_file_references(&mut refs, pkg_data.file_references.clone());
782        }
783    }
784
785    refs
786}
787
788fn find_attached_manifest_file_references<'a>(
789    files: &'a [FileInfo],
790    package: &Package,
791    datasource_id: DatasourceId,
792) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
793    for file in files {
794        if !file.for_packages.contains(&package.package_uid) {
795            continue;
796        }
797
798        for pkg_data in &file.package_data {
799            if pkg_data.datasource_id == Some(datasource_id) {
800                return Some((&file.path, pkg_data.file_references.clone()));
801            }
802        }
803    }
804
805    None
806}
807
808fn debian_installed_namespace_matches(
809    supplemental_namespace: &Option<String>,
810    package_namespace: &Option<String>,
811) -> bool {
812    match (
813        supplemental_namespace.as_deref(),
814        package_namespace.as_deref(),
815    ) {
816        (None, _) => true,
817        (Some("debian"), Some("ubuntu")) => true,
818        (Some(left), Some(right)) => left == right,
819        (Some(_), None) => true,
820    }
821}
822
823fn debian_installed_arch_matches(
824    supplemental_qualifiers: &Option<HashMap<String, String>>,
825    package_qualifiers: &Option<HashMap<String, String>>,
826) -> bool {
827    let supplemental_arch = supplemental_qualifiers
828        .as_ref()
829        .and_then(|qualifiers| qualifiers.get("arch"));
830    let package_arch = package_qualifiers
831        .as_ref()
832        .and_then(|qualifiers| qualifiers.get("arch"));
833
834    match (supplemental_arch, package_arch) {
835        (Some(left), Some(right)) => left == right,
836        (Some(_), None) => false,
837        _ => true,
838    }
839}
840
841fn merge_file_references(
842    target: &mut Vec<crate::models::FileReference>,
843    incoming: Vec<crate::models::FileReference>,
844) {
845    for file_ref in incoming {
846        if let Some(existing) = target
847            .iter_mut()
848            .find(|existing| existing.path == file_ref.path)
849        {
850            if existing.size.is_none() {
851                existing.size = file_ref.size;
852            }
853            if existing.sha1.is_none() {
854                existing.sha1 = file_ref.sha1.clone();
855            }
856            if existing.md5.is_none() {
857                existing.md5 = file_ref.md5.clone();
858            }
859            if existing.sha256.is_none() {
860                existing.sha256 = file_ref.sha256.clone();
861            }
862            if existing.sha512.is_none() {
863                existing.sha512 = file_ref.sha512.clone();
864            }
865            if existing.extra_data.is_none() {
866                existing.extra_data = file_ref.extra_data.clone();
867            }
868        } else {
869            target.push(file_ref);
870        }
871    }
872}
873
874fn resolve_rpm_namespace(
875    files: &[FileInfo],
876    path_index: &HashMap<String, usize>,
877    root: &str,
878) -> Option<String> {
879    let os_release_paths = [
880        format!("{}etc/os-release", root),
881        format!("{}usr/lib/os-release", root),
882    ];
883
884    for os_release_path in &os_release_paths {
885        if let Some(&file_idx) = path_index.get(os_release_path) {
886            let file = &files[file_idx];
887            for pkg_data in &file.package_data {
888                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
889                    && let Some(namespace) = &pkg_data.namespace
890                {
891                    return Some(namespace.clone());
892                }
893            }
894        }
895    }
896
897    None
898}
899
900fn replace_uid_base(old_uid: &str, new_purl: &str) -> String {
901    if let Some((_, suffix)) = old_uid.split_once("?uuid=") {
902        return format!("{}?uuid={}", new_purl, suffix);
903    }
904
905    if let Some((_, suffix)) = old_uid.split_once("&uuid=") {
906        let separator = if new_purl.contains('?') { '&' } else { '?' };
907        return format!("{}{separator}uuid={suffix}", new_purl);
908    }
909
910    old_uid.to_string()
911}
912
913fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
914    let parsed = PackageUrl::from_str(existing_purl).ok()?;
915    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
916
917    updated.with_namespace(namespace).ok()?;
918
919    if let Some(version) = parsed.version() {
920        updated.with_version(version).ok()?;
921    }
922
923    if let Some(subpath) = parsed.subpath() {
924        updated.with_subpath(subpath).ok()?;
925    }
926
927    for (key, value) in parsed.qualifiers() {
928        updated
929            .add_qualifier(key.to_string(), value.to_string())
930            .ok()?;
931    }
932
933    Some(updated.to_string())
934}
935
936fn apply_rpm_namespace(
937    files: &mut [FileInfo],
938    package: &mut Package,
939    dependencies: &mut [TopLevelDependency],
940    namespace: &str,
941) {
942    let old_package_uid = package.package_uid.clone();
943
944    package.namespace = Some(namespace.to_string());
945
946    if let Some(current_purl) = package.purl.as_deref()
947        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
948    {
949        package.purl = Some(updated_purl.clone());
950        package.package_uid = replace_uid_base(&old_package_uid, &updated_purl);
951    }
952
953    for file in files.iter_mut() {
954        for package_uid in &mut file.for_packages {
955            if *package_uid == old_package_uid {
956                *package_uid = package.package_uid.clone();
957            }
958        }
959    }
960
961    for dep in dependencies.iter_mut() {
962        if dep.for_package_uid.as_deref() == Some(old_package_uid.as_str()) {
963            dep.for_package_uid = Some(package.package_uid.clone());
964        }
965
966        if dep.for_package_uid.as_deref() == Some(package.package_uid.as_str()) {
967            dep.namespace = Some(namespace.to_string());
968
969            if let Some(current_purl) = dep.purl.as_deref()
970                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
971            {
972                dep.purl = Some(updated_purl.clone());
973                dep.dependency_uid = replace_uid_base(&dep.dependency_uid, &updated_purl);
974            }
975        }
976    }
977}
978
979#[cfg(test)]
980#[path = "file_ref_resolve_test.rs"]
981mod tests;