Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    DebianExtractedDeb,
20    InstalledDb,
21    PythonMetadata,
22    RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26    datasource_ids: &'static [DatasourceId],
27    kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31    DbPathConfig {
32        datasource_ids: &[DatasourceId::AlpineInstalledDb],
33        path_suffix: "lib/apk/db/installed",
34    },
35    DbPathConfig {
36        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37        path_suffix: "var/lib/rpm/Packages",
38    },
39    DbPathConfig {
40        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
41        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
42    },
43    DbPathConfig {
44        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
45        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
46    },
47    DbPathConfig {
48        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
49        path_suffix: "var/lib/dpkg/status",
50    },
51    DbPathConfig {
52        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
53        path_suffix: "var/lib/dpkg/status.d/",
54    },
55];
56
57const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
58    DatasourceId::RpmInstalledDatabaseBdb,
59    DatasourceId::RpmInstalledDatabaseNdb,
60    DatasourceId::RpmInstalledDatabaseSqlite,
61];
62const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
63const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
64const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
65    DatasourceId::PypiWheelMetadata,
66    DatasourceId::PypiSdistPkginfo,
67];
68const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
69const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
70    DatasourceId::DebianInstalledFilesList,
71    DatasourceId::DebianInstalledMd5Sums,
72];
73
74const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
75    DatasourceId::AlpineInstalledDb,
76    DatasourceId::RpmInstalledDatabaseBdb,
77    DatasourceId::RpmInstalledDatabaseNdb,
78    DatasourceId::RpmInstalledDatabaseSqlite,
79    DatasourceId::DebianInstalledStatusDb,
80    DatasourceId::DebianDistrolessInstalledDb,
81];
82
83const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
84    FileReferenceResolverConfig {
85        datasource_ids: &[DatasourceId::AboutFile],
86        kind: FileReferenceResolverKind::About,
87    },
88    FileReferenceResolverConfig {
89        datasource_ids: &[DatasourceId::CpanManifest],
90        kind: FileReferenceResolverKind::AttachedManifest,
91    },
92    FileReferenceResolverConfig {
93        datasource_ids: &[DatasourceId::CondaMetaJson],
94        kind: FileReferenceResolverKind::CondaMeta,
95    },
96    FileReferenceResolverConfig {
97        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
98        kind: FileReferenceResolverKind::DebianExtractedDeb,
99    },
100    FileReferenceResolverConfig {
101        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
102        kind: FileReferenceResolverKind::InstalledDb,
103    },
104    FileReferenceResolverConfig {
105        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
106        kind: FileReferenceResolverKind::PythonMetadata,
107    },
108    FileReferenceResolverConfig {
109        datasource_ids: &[DatasourceId::GradleModule],
110        kind: FileReferenceResolverKind::RelativeToDatafileParent,
111    },
112];
113
114struct PythonMetadataResolution {
115    base_path: String,
116    allowed_root: String,
117}
118
119pub fn resolve_file_references(
120    files: &mut [FileInfo],
121    packages: &mut [Package],
122    dependencies: &mut [TopLevelDependency],
123) {
124    let path_index = build_path_index(&*files);
125
126    for package in packages.iter_mut() {
127        let Some(config) = find_file_reference_resolver(files, package) else {
128            continue;
129        };
130
131        match config.kind {
132            FileReferenceResolverKind::About
133            | FileReferenceResolverKind::RelativeToDatafileParent => {
134                resolve_relative_to_datafile_parent(
135                    files,
136                    &path_index,
137                    package,
138                    config.datasource_ids,
139                );
140            }
141            FileReferenceResolverKind::AttachedManifest => {
142                resolve_attached_manifest_file_references(
143                    files,
144                    &path_index,
145                    package,
146                    config.datasource_ids[0],
147                );
148            }
149            FileReferenceResolverKind::CondaMeta => {
150                resolve_conda_file_references(files, &path_index, package);
151            }
152            FileReferenceResolverKind::DebianExtractedDeb => {
153                resolve_debian_extracted_deb_file_references(files, &path_index, package)
154            }
155            FileReferenceResolverKind::InstalledDb => {
156                resolve_installed_db_file_references(files, &path_index, package, dependencies);
157            }
158            FileReferenceResolverKind::PythonMetadata => {
159                resolve_python_metadata_file_references(files, &path_index, package);
160            }
161        }
162    }
163}
164
165fn resolve_relative_to_datafile_parent(
166    files: &mut [FileInfo],
167    path_index: &HashMap<String, usize>,
168    package: &mut Package,
169    datasource_ids: &[DatasourceId],
170) {
171    let Some(datafile_path) = package.datafile_paths.first() else {
172        return;
173    };
174    let root = Path::new(datafile_path)
175        .parent()
176        .map(|p| p.to_string_lossy().to_string())
177        .unwrap_or_default();
178
179    let file_references = collect_file_references(
180        files,
181        path_index,
182        datafile_path,
183        &package.datasource_ids,
184        datasource_ids,
185        package.purl.as_deref(),
186    );
187
188    let mut missing_refs = Vec::new();
189    for file_ref in &file_references {
190        let resolved_path = if root.is_empty() {
191            file_ref.path.clone()
192        } else {
193            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
194        };
195        if let Some(&file_idx) = path_index.get(&resolved_path) {
196            let package_uid = package.package_uid.clone();
197            if !files[file_idx].for_packages.contains(&package_uid) {
198                files[file_idx].for_packages.push(package_uid);
199            }
200        } else {
201            missing_refs.push(file_ref.path.clone());
202        }
203    }
204
205    record_missing_file_references(package, missing_refs);
206}
207
208fn resolve_attached_manifest_file_references(
209    files: &mut [FileInfo],
210    path_index: &HashMap<String, usize>,
211    package: &mut Package,
212    datasource_id: DatasourceId,
213) {
214    let Some((datafile_path, file_references)) =
215        find_attached_manifest_file_references(files, package, datasource_id)
216    else {
217        return;
218    };
219
220    let root = Path::new(datafile_path)
221        .parent()
222        .map(|p| p.to_string_lossy().to_string())
223        .unwrap_or_default();
224
225    let mut missing_refs = Vec::new();
226    for file_ref in &file_references {
227        let resolved_path = if root.is_empty() {
228            file_ref.path.clone()
229        } else {
230            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
231        };
232
233        if let Some(&file_idx) = path_index.get(&resolved_path) {
234            let package_uid = package.package_uid.clone();
235            if !files[file_idx].for_packages.contains(&package_uid) {
236                files[file_idx].for_packages.push(package_uid);
237            }
238        } else {
239            missing_refs.push(file_ref.path.clone());
240        }
241    }
242
243    record_missing_file_references(package, missing_refs);
244}
245
246fn resolve_conda_file_references(
247    files: &mut [FileInfo],
248    path_index: &HashMap<String, usize>,
249    package: &mut Package,
250) {
251    let Some(conda_meta_path) = package
252        .datafile_paths
253        .iter()
254        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
255    else {
256        return;
257    };
258    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
259        return;
260    };
261
262    let file_references = collect_file_references(
263        files,
264        path_index,
265        conda_meta_path,
266        &package.datasource_ids,
267        &[DatasourceId::CondaMetaJson],
268        package.purl.as_deref(),
269    );
270
271    let mut missing_refs = Vec::new();
272    for file_ref in &file_references {
273        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
274        if let Some(&file_idx) = path_index.get(&resolved_path) {
275            let package_uid = package.package_uid.clone();
276            if !files[file_idx].for_packages.contains(&package_uid) {
277                files[file_idx].for_packages.push(package_uid);
278            }
279        } else {
280            missing_refs.push(file_ref.path.clone());
281        }
282    }
283
284    record_missing_file_references(package, missing_refs);
285}
286
287fn resolve_installed_db_file_references(
288    files: &mut [FileInfo],
289    path_index: &HashMap<String, usize>,
290    package: &mut Package,
291    dependencies: &mut [TopLevelDependency],
292) {
293    let Some(config) = find_db_config(package) else {
294        return;
295    };
296    let Some(datafile_path) = package.datafile_paths.first() else {
297        return;
298    };
299
300    let root = compute_root(datafile_path, config.path_suffix);
301
302    let mut file_references = collect_file_references(
303        files,
304        path_index,
305        datafile_path,
306        &package.datasource_ids,
307        config.datasource_ids,
308        package.purl.as_deref(),
309    );
310
311    if is_debian_installed_package(package) {
312        merge_file_references(
313            &mut file_references,
314            collect_debian_installed_file_references(files, package),
315        );
316    }
317
318    let mut missing_refs = Vec::new();
319    for file_ref in &file_references {
320        let ref_path = file_ref.path.trim_start_matches('/');
321        let resolved_path = if root.is_empty() {
322            ref_path.to_string()
323        } else {
324            format!("{}{}", root, ref_path)
325        };
326
327        if let Some(&file_idx) = path_index.get(&resolved_path) {
328            let package_uid = package.package_uid.clone();
329            if !files[file_idx].for_packages.contains(&package_uid) {
330                files[file_idx].for_packages.push(package_uid);
331            }
332        } else {
333            missing_refs.push(file_ref.path.clone());
334        }
335    }
336
337    record_missing_file_references(package, missing_refs);
338
339    if is_rpm_package(package)
340        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
341    {
342        apply_rpm_namespace(files, package, dependencies, &namespace);
343    }
344}
345
346fn resolve_debian_extracted_deb_file_references(
347    files: &mut [FileInfo],
348    path_index: &HashMap<String, usize>,
349    package: &mut Package,
350) {
351    let Some(datafile_path) = package
352        .datafile_paths
353        .iter()
354        .find(|path| path.ends_with("/md5sums"))
355    else {
356        return;
357    };
358
359    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
360        return;
361    };
362    let Some(extracted_root) = md5sums_parent.parent() else {
363        return;
364    };
365    let root = extracted_root.to_string_lossy().to_string();
366
367    let Some(&file_idx) = path_index.get(datafile_path) else {
368        return;
369    };
370    let file_references: Vec<_> = files[file_idx]
371        .package_data
372        .iter()
373        .filter(|pkg_data| {
374            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
375        })
376        .flat_map(|pkg_data| pkg_data.file_references.clone())
377        .collect();
378
379    let mut missing_refs = Vec::new();
380    for file_ref in &file_references {
381        let resolved_path = if root.is_empty() {
382            file_ref.path.trim_start_matches('/').to_string()
383        } else {
384            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
385        };
386
387        if let Some(&file_idx) = path_index.get(&resolved_path) {
388            let package_uid = package.package_uid.clone();
389            if !files[file_idx].for_packages.contains(&package_uid) {
390                files[file_idx].for_packages.push(package_uid);
391            }
392        } else {
393            missing_refs.push(file_ref.path.clone());
394        }
395    }
396
397    record_missing_file_references(package, missing_refs);
398}
399
400fn resolve_python_metadata_file_references(
401    files: &mut [FileInfo],
402    path_index: &HashMap<String, usize>,
403    package: &mut Package,
404) {
405    let Some(python_resolution) = find_python_metadata_root(package) else {
406        return;
407    };
408    let Some(datafile_path) = package
409        .datafile_paths
410        .iter()
411        .find(|path| is_python_metadata_layout(path))
412    else {
413        return;
414    };
415
416    let file_references = collect_file_references(
417        files,
418        path_index,
419        datafile_path,
420        &package.datasource_ids,
421        PYTHON_METADATA_DATASOURCE_IDS,
422        package.purl.as_deref(),
423    );
424
425    let mut missing_refs = Vec::new();
426    for file_ref in &file_references {
427        let Some(resolved_path) = normalize_relative_path(
428            &python_resolution.base_path,
429            &python_resolution.allowed_root,
430            &file_ref.path,
431        ) else {
432            missing_refs.push(file_ref.path.clone());
433            continue;
434        };
435
436        if let Some(&file_idx) = path_index.get(&resolved_path) {
437            let package_uid = package.package_uid.clone();
438            if !files[file_idx].for_packages.contains(&package_uid) {
439                files[file_idx].for_packages.push(package_uid);
440            }
441        } else {
442            missing_refs.push(file_ref.path.clone());
443        }
444    }
445
446    record_missing_file_references(package, missing_refs);
447}
448
449fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
450    if missing_refs.is_empty() {
451        return;
452    }
453
454    missing_refs.sort();
455    let missing_refs_json: Vec<serde_json::Value> = missing_refs
456        .into_iter()
457        .map(|path| serde_json::json!({"path": path}))
458        .collect();
459
460    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
461    extra_data.insert(
462        "missing_file_references".to_string(),
463        serde_json::Value::Array(missing_refs_json),
464    );
465}
466
467fn find_file_reference_resolver(
468    files: &[FileInfo],
469    package: &Package,
470) -> Option<&'static FileReferenceResolverConfig> {
471    FILE_REFERENCE_RESOLVER_CONFIGS
472        .iter()
473        .find(|config| match config.kind {
474            FileReferenceResolverKind::AttachedManifest => {
475                config.datasource_ids.iter().any(|datasource_id| {
476                    files.iter().any(|file| {
477                        file.for_packages.contains(&package.package_uid)
478                            && file
479                                .package_data
480                                .iter()
481                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
482                    })
483                })
484            }
485            _ => config
486                .datasource_ids
487                .iter()
488                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
489        })
490}
491
492fn is_python_metadata_layout(path: &str) -> bool {
493    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
494}
495
496fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
497    let datafile_path = package
498        .datafile_paths
499        .iter()
500        .find(|path| is_python_metadata_layout(path))?;
501
502    if !package
503        .datasource_ids
504        .iter()
505        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
506    {
507        return None;
508    }
509
510    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
511        if let Some(idx) = datafile_path.rfind(segment) {
512            if datafile_path.ends_with("/METADATA") {
513                let root_end = idx + segment.len();
514                let root = datafile_path[..root_end].to_string();
515                return Some(PythonMetadataResolution {
516                    base_path: root.clone(),
517                    allowed_root: root,
518                });
519            }
520
521            if datafile_path.ends_with("/PKG-INFO") {
522                let parent = Path::new(datafile_path).parent()?;
523                let allowed_root = datafile_path[..idx + segment.len()].to_string();
524                return Some(PythonMetadataResolution {
525                    base_path: parent.to_string_lossy().to_string(),
526                    allowed_root,
527                });
528            }
529        }
530    }
531
532    if datafile_path.ends_with(".egg-info/PKG-INFO") {
533        let metadata_parent = Path::new(datafile_path).parent()?;
534        let project_root = metadata_parent.parent()?;
535        let project_root = project_root.to_string_lossy().to_string();
536        return Some(PythonMetadataResolution {
537            base_path: project_root.clone(),
538            allowed_root: project_root,
539        });
540    }
541
542    None
543}
544
545fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
546    let joined = Path::new(base).join(relative.trim_start_matches('/'));
547    let mut normalized = Path::new("").to_path_buf();
548
549    for component in joined.components() {
550        match component {
551            std::path::Component::CurDir => {}
552            std::path::Component::ParentDir => {
553                normalized.pop();
554            }
555            _ => normalized.push(component.as_os_str()),
556        }
557    }
558
559    let normalized_str = normalized.to_string_lossy().to_string();
560    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
561        Some(normalized_str)
562    } else {
563        None
564    }
565}
566
567fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
568    let path = datafile_path?;
569    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
570    Some(path[..idx].to_string())
571}
572
573pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
574    let yumdb_indices: Vec<usize> = packages
575        .iter()
576        .enumerate()
577        .filter_map(|(idx, package)| {
578            package
579                .datasource_ids
580                .contains(&DatasourceId::RpmYumdb)
581                .then_some(idx)
582        })
583        .collect();
584    let mut removal_indices = Vec::new();
585
586    for yumdb_idx in yumdb_indices {
587        let yumdb_package = packages[yumdb_idx].clone();
588        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
589            continue;
590        };
591        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
592        let yumdb_arch = yumdb_package
593            .qualifiers
594            .as_ref()
595            .and_then(|qualifiers| qualifiers.get("arch"));
596
597        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
598            if idx == yumdb_idx || !is_rpm_package(package) {
599                return None;
600            }
601
602            let config = find_db_config(package)?;
603            let datafile_path = package.datafile_paths.first()?;
604            let target_root = compute_root(datafile_path, config.path_suffix);
605            let target_arch = package
606                .qualifiers
607                .as_ref()
608                .and_then(|qualifiers| qualifiers.get("arch"));
609
610            (target_root == yumdb_root
611                && package.name == yumdb_package.name
612                && package.version == yumdb_package.version
613                && target_arch == yumdb_arch)
614                .then_some(idx)
615        }) else {
616            continue;
617        };
618
619        let target_package_uid = packages[target_idx].package_uid.clone();
620        {
621            let target = &mut packages[target_idx];
622            target
623                .datafile_paths
624                .extend(yumdb_package.datafile_paths.clone());
625            target
626                .datasource_ids
627                .extend(yumdb_package.datasource_ids.clone());
628
629            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
630                && !yumdb_extra.is_empty()
631            {
632                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
633                let mut merged_yumdb = extra_data
634                    .get("yumdb")
635                    .and_then(|value| value.as_object().cloned())
636                    .unwrap_or_default();
637                for (key, value) in yumdb_extra {
638                    merged_yumdb.insert(key, value);
639                }
640                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
641            }
642        }
643
644        for file in files.iter_mut() {
645            for package_uid in &mut file.for_packages {
646                if *package_uid == yumdb_package.package_uid {
647                    *package_uid = target_package_uid.clone();
648                }
649            }
650        }
651
652        removal_indices.push(yumdb_idx);
653    }
654
655    removal_indices.sort_unstable();
656    removal_indices.dedup();
657    for idx in removal_indices.into_iter().rev() {
658        packages.remove(idx);
659    }
660}
661
662fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
663    files
664        .iter()
665        .enumerate()
666        .map(|(idx, file)| (file.path.clone(), idx))
667        .collect()
668}
669
670fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
671    for config in DB_PATH_CONFIGS {
672        for &config_dsid in config.datasource_ids {
673            for &pkg_dsid in &package.datasource_ids {
674                if config_dsid == pkg_dsid {
675                    return Some(config);
676                }
677            }
678        }
679    }
680    None
681}
682
683fn compute_root(datafile_path: &str, suffix: &str) -> String {
684    if let Some(pos) = datafile_path.rfind(suffix) {
685        let root = &datafile_path[..pos];
686        if root.is_empty() {
687            String::new()
688        } else {
689            root.to_string()
690        }
691    } else {
692        String::new()
693    }
694}
695
696fn collect_file_references(
697    files: &[FileInfo],
698    path_index: &HashMap<String, usize>,
699    datafile_path: &str,
700    package_datasource_ids: &[DatasourceId],
701    config_datasource_ids: &[DatasourceId],
702    package_purl: Option<&str>,
703) -> Vec<crate::models::FileReference> {
704    let file_idx = match path_index.get(datafile_path) {
705        Some(&idx) => idx,
706        None => return Vec::new(),
707    };
708
709    let file = &files[file_idx];
710    let mut refs = Vec::new();
711
712    for pkg_data in &file.package_data {
713        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
714            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
715        });
716
717        if !dsid_matches {
718            continue;
719        }
720
721        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
722            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
723            _ => true,
724        };
725
726        if purl_matches {
727            refs.extend(pkg_data.file_references.clone());
728        }
729    }
730
731    refs
732}
733
734fn is_rpm_package(package: &Package) -> bool {
735    for &dsid in &package.datasource_ids {
736        for &rpm_dsid in RPM_DATASOURCE_IDS {
737            if rpm_dsid == dsid {
738                return true;
739            }
740        }
741    }
742    false
743}
744
745fn is_debian_installed_package(package: &Package) -> bool {
746    package
747        .datasource_ids
748        .contains(&DatasourceId::DebianInstalledStatusDb)
749        || package
750            .datasource_ids
751            .contains(&DatasourceId::DebianDistrolessInstalledDb)
752}
753
754fn collect_debian_installed_file_references(
755    files: &[FileInfo],
756    package: &Package,
757) -> Vec<crate::models::FileReference> {
758    let mut refs = Vec::new();
759
760    for file in files {
761        for pkg_data in &file.package_data {
762            let Some(dsid) = pkg_data.datasource_id else {
763                continue;
764            };
765            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
766                continue;
767            }
768
769            if pkg_data.name != package.name {
770                continue;
771            }
772            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
773                continue;
774            }
775            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
776                continue;
777            }
778
779            merge_file_references(&mut refs, pkg_data.file_references.clone());
780        }
781    }
782
783    refs
784}
785
786fn find_attached_manifest_file_references<'a>(
787    files: &'a [FileInfo],
788    package: &Package,
789    datasource_id: DatasourceId,
790) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
791    for file in files {
792        if !file.for_packages.contains(&package.package_uid) {
793            continue;
794        }
795
796        for pkg_data in &file.package_data {
797            if pkg_data.datasource_id == Some(datasource_id) {
798                return Some((&file.path, pkg_data.file_references.clone()));
799            }
800        }
801    }
802
803    None
804}
805
806fn debian_installed_namespace_matches(
807    supplemental_namespace: &Option<String>,
808    package_namespace: &Option<String>,
809) -> bool {
810    match (
811        supplemental_namespace.as_deref(),
812        package_namespace.as_deref(),
813    ) {
814        (None, _) => true,
815        (Some("debian"), Some("ubuntu")) => true,
816        (Some(left), Some(right)) => left == right,
817        (Some(_), None) => true,
818    }
819}
820
821fn debian_installed_arch_matches(
822    supplemental_qualifiers: &Option<HashMap<String, String>>,
823    package_qualifiers: &Option<HashMap<String, String>>,
824) -> bool {
825    let supplemental_arch = supplemental_qualifiers
826        .as_ref()
827        .and_then(|qualifiers| qualifiers.get("arch"));
828    let package_arch = package_qualifiers
829        .as_ref()
830        .and_then(|qualifiers| qualifiers.get("arch"));
831
832    match (supplemental_arch, package_arch) {
833        (Some(left), Some(right)) => left == right,
834        (Some(_), None) => false,
835        _ => true,
836    }
837}
838
839fn merge_file_references(
840    target: &mut Vec<crate::models::FileReference>,
841    incoming: Vec<crate::models::FileReference>,
842) {
843    for file_ref in incoming {
844        if let Some(existing) = target
845            .iter_mut()
846            .find(|existing| existing.path == file_ref.path)
847        {
848            if existing.size.is_none() {
849                existing.size = file_ref.size;
850            }
851            if existing.sha1.is_none() {
852                existing.sha1 = file_ref.sha1.clone();
853            }
854            if existing.md5.is_none() {
855                existing.md5 = file_ref.md5.clone();
856            }
857            if existing.sha256.is_none() {
858                existing.sha256 = file_ref.sha256.clone();
859            }
860            if existing.sha512.is_none() {
861                existing.sha512 = file_ref.sha512.clone();
862            }
863            if existing.extra_data.is_none() {
864                existing.extra_data = file_ref.extra_data.clone();
865            }
866        } else {
867            target.push(file_ref);
868        }
869    }
870}
871
872fn resolve_rpm_namespace(
873    files: &[FileInfo],
874    path_index: &HashMap<String, usize>,
875    root: &str,
876) -> Option<String> {
877    let os_release_paths = [
878        format!("{}etc/os-release", root),
879        format!("{}usr/lib/os-release", root),
880    ];
881
882    for os_release_path in &os_release_paths {
883        if let Some(&file_idx) = path_index.get(os_release_path) {
884            let file = &files[file_idx];
885            for pkg_data in &file.package_data {
886                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
887                    && let Some(namespace) = &pkg_data.namespace
888                {
889                    return Some(namespace.clone());
890                }
891            }
892        }
893    }
894
895    None
896}
897
898fn replace_uid_base(old_uid: &str, new_purl: &str) -> String {
899    if let Some((_, suffix)) = old_uid.split_once("?uuid=") {
900        return format!("{}?uuid={}", new_purl, suffix);
901    }
902
903    if let Some((_, suffix)) = old_uid.split_once("&uuid=") {
904        let separator = if new_purl.contains('?') { '&' } else { '?' };
905        return format!("{}{separator}uuid={suffix}", new_purl);
906    }
907
908    old_uid.to_string()
909}
910
911fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
912    let parsed = PackageUrl::from_str(existing_purl).ok()?;
913    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
914
915    updated.with_namespace(namespace).ok()?;
916
917    if let Some(version) = parsed.version() {
918        updated.with_version(version).ok()?;
919    }
920
921    if let Some(subpath) = parsed.subpath() {
922        updated.with_subpath(subpath).ok()?;
923    }
924
925    for (key, value) in parsed.qualifiers() {
926        updated
927            .add_qualifier(key.to_string(), value.to_string())
928            .ok()?;
929    }
930
931    Some(updated.to_string())
932}
933
934fn apply_rpm_namespace(
935    files: &mut [FileInfo],
936    package: &mut Package,
937    dependencies: &mut [TopLevelDependency],
938    namespace: &str,
939) {
940    let old_package_uid = package.package_uid.clone();
941
942    package.namespace = Some(namespace.to_string());
943
944    if let Some(current_purl) = package.purl.as_deref()
945        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
946    {
947        package.purl = Some(updated_purl.clone());
948        package.package_uid = replace_uid_base(&old_package_uid, &updated_purl);
949    }
950
951    for file in files.iter_mut() {
952        for package_uid in &mut file.for_packages {
953            if *package_uid == old_package_uid {
954                *package_uid = package.package_uid.clone();
955            }
956        }
957    }
958
959    for dep in dependencies.iter_mut() {
960        if dep.for_package_uid.as_deref() == Some(old_package_uid.as_str()) {
961            dep.for_package_uid = Some(package.package_uid.clone());
962        }
963
964        if dep.for_package_uid.as_deref() == Some(package.package_uid.as_str()) {
965            dep.namespace = Some(namespace.to_string());
966
967            if let Some(current_purl) = dep.purl.as_deref()
968                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
969            {
970                dep.purl = Some(updated_purl.clone());
971                dep.dependency_uid = replace_uid_base(&dep.dependency_uid, &updated_purl);
972            }
973        }
974    }
975}
976
977#[cfg(test)]
978#[path = "file_ref_resolve_test.rs"]
979mod tests;