Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    DebianExtractedDeb,
20    InstalledDb,
21    PythonMetadata,
22    RelativeToDatafileParent,
23}
24
25struct FileReferenceResolverConfig {
26    datasource_ids: &'static [DatasourceId],
27    kind: FileReferenceResolverKind,
28}
29
30const DB_PATH_CONFIGS: &[DbPathConfig] = &[
31    DbPathConfig {
32        datasource_ids: &[DatasourceId::AlpineInstalledDb],
33        path_suffix: "lib/apk/db/installed",
34    },
35    DbPathConfig {
36        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
37        path_suffix: "var/lib/rpm/Packages",
38    },
39    DbPathConfig {
40        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
41        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
42    },
43    DbPathConfig {
44        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
45        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
46    },
47    DbPathConfig {
48        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
49        path_suffix: "var/lib/dpkg/status",
50    },
51    DbPathConfig {
52        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
53        path_suffix: "var/lib/dpkg/status.d/",
54    },
55];
56
57const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
58    DatasourceId::RpmInstalledDatabaseBdb,
59    DatasourceId::RpmInstalledDatabaseNdb,
60    DatasourceId::RpmInstalledDatabaseSqlite,
61];
62const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
63const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
64const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
65    DatasourceId::PypiWheelMetadata,
66    DatasourceId::PypiSdistPkginfo,
67    DatasourceId::PypiEggPkginfo,
68    DatasourceId::PypiEditableEggPkginfo,
69];
70const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
71const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
72    DatasourceId::DebianInstalledFilesList,
73    DatasourceId::DebianInstalledMd5Sums,
74];
75
76const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
77    DatasourceId::AlpineInstalledDb,
78    DatasourceId::RpmInstalledDatabaseBdb,
79    DatasourceId::RpmInstalledDatabaseNdb,
80    DatasourceId::RpmInstalledDatabaseSqlite,
81    DatasourceId::DebianInstalledStatusDb,
82    DatasourceId::DebianDistrolessInstalledDb,
83];
84
85const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
86    FileReferenceResolverConfig {
87        datasource_ids: &[DatasourceId::AboutFile],
88        kind: FileReferenceResolverKind::About,
89    },
90    FileReferenceResolverConfig {
91        datasource_ids: &[DatasourceId::CpanManifest],
92        kind: FileReferenceResolverKind::AttachedManifest,
93    },
94    FileReferenceResolverConfig {
95        datasource_ids: &[DatasourceId::CondaMetaJson],
96        kind: FileReferenceResolverKind::CondaMeta,
97    },
98    FileReferenceResolverConfig {
99        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
100        kind: FileReferenceResolverKind::DebianExtractedDeb,
101    },
102    FileReferenceResolverConfig {
103        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
104        kind: FileReferenceResolverKind::InstalledDb,
105    },
106    FileReferenceResolverConfig {
107        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
108        kind: FileReferenceResolverKind::PythonMetadata,
109    },
110    FileReferenceResolverConfig {
111        datasource_ids: &[DatasourceId::GradleModule],
112        kind: FileReferenceResolverKind::RelativeToDatafileParent,
113    },
114];
115
116struct PythonMetadataResolution {
117    base_path: String,
118    allowed_root: String,
119}
120
121pub fn resolve_file_references(
122    files: &mut [FileInfo],
123    packages: &mut [Package],
124    dependencies: &mut [TopLevelDependency],
125) {
126    if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
127        return;
128    }
129
130    let path_index = build_path_index(&*files);
131
132    for package in packages.iter_mut() {
133        let Some(config) = find_file_reference_resolver(files, package) else {
134            continue;
135        };
136
137        match config.kind {
138            FileReferenceResolverKind::About
139            | FileReferenceResolverKind::RelativeToDatafileParent => {
140                resolve_relative_to_datafile_parent(
141                    files,
142                    &path_index,
143                    package,
144                    config.datasource_ids,
145                );
146            }
147            FileReferenceResolverKind::AttachedManifest => {
148                resolve_attached_manifest_file_references(
149                    files,
150                    &path_index,
151                    package,
152                    config.datasource_ids[0],
153                );
154            }
155            FileReferenceResolverKind::CondaMeta => {
156                resolve_conda_file_references(files, &path_index, package);
157            }
158            FileReferenceResolverKind::DebianExtractedDeb => {
159                resolve_debian_extracted_deb_file_references(files, &path_index, package)
160            }
161            FileReferenceResolverKind::InstalledDb => {
162                resolve_installed_db_file_references(files, &path_index, package, dependencies);
163            }
164            FileReferenceResolverKind::PythonMetadata => {
165                resolve_python_metadata_file_references(files, &path_index, package);
166            }
167        }
168    }
169}
170
171pub(super) fn has_relevant_file_reference_datasource_ids(
172    file_datasource_ids: &HashSet<DatasourceId>,
173) -> bool {
174    FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
175        config
176            .datasource_ids
177            .iter()
178            .any(|datasource_id| file_datasource_ids.contains(datasource_id))
179    })
180}
181
182fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
183    let file_datasource_ids: HashSet<DatasourceId> = files
184        .iter()
185        .flat_map(|file| {
186            file.package_data
187                .iter()
188                .filter_map(|package_data| package_data.datasource_id)
189        })
190        .collect();
191
192    has_relevant_file_reference_datasource_ids(&file_datasource_ids)
193}
194
195fn resolve_relative_to_datafile_parent(
196    files: &mut [FileInfo],
197    path_index: &HashMap<String, usize>,
198    package: &mut Package,
199    datasource_ids: &[DatasourceId],
200) {
201    let Some(datafile_path) = package.datafile_paths.first() else {
202        return;
203    };
204    let root = Path::new(datafile_path)
205        .parent()
206        .map(|p| p.to_string_lossy().to_string())
207        .unwrap_or_default();
208
209    let file_references = collect_file_references(
210        files,
211        path_index,
212        datafile_path,
213        &package.datasource_ids,
214        datasource_ids,
215        package.purl.as_deref(),
216    );
217
218    let mut missing_refs = Vec::new();
219    for file_ref in &file_references {
220        let resolved_path = if root.is_empty() {
221            file_ref.path.clone()
222        } else {
223            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
224        };
225        if let Some(&file_idx) = path_index.get(&resolved_path) {
226            let package_uid = package.package_uid.clone();
227            if !files[file_idx].for_packages.contains(&package_uid) {
228                files[file_idx].for_packages.push(package_uid);
229            }
230        } else {
231            missing_refs.push(file_ref.path.clone());
232        }
233    }
234
235    record_missing_file_references(package, missing_refs);
236}
237
238fn resolve_attached_manifest_file_references(
239    files: &mut [FileInfo],
240    path_index: &HashMap<String, usize>,
241    package: &mut Package,
242    datasource_id: DatasourceId,
243) {
244    let Some((datafile_path, file_references)) =
245        find_attached_manifest_file_references(files, package, datasource_id)
246    else {
247        return;
248    };
249
250    let root = Path::new(datafile_path)
251        .parent()
252        .map(|p| p.to_string_lossy().to_string())
253        .unwrap_or_default();
254
255    let mut missing_refs = Vec::new();
256    for file_ref in &file_references {
257        let resolved_path = if root.is_empty() {
258            file_ref.path.clone()
259        } else {
260            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
261        };
262
263        if let Some(&file_idx) = path_index.get(&resolved_path) {
264            let package_uid = package.package_uid.clone();
265            if !files[file_idx].for_packages.contains(&package_uid) {
266                files[file_idx].for_packages.push(package_uid);
267            }
268        } else {
269            missing_refs.push(file_ref.path.clone());
270        }
271    }
272
273    record_missing_file_references(package, missing_refs);
274}
275
276fn resolve_conda_file_references(
277    files: &mut [FileInfo],
278    path_index: &HashMap<String, usize>,
279    package: &mut Package,
280) {
281    let Some(conda_meta_path) = package
282        .datafile_paths
283        .iter()
284        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
285    else {
286        return;
287    };
288    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
289        return;
290    };
291
292    let file_references = collect_file_references(
293        files,
294        path_index,
295        conda_meta_path,
296        &package.datasource_ids,
297        &[DatasourceId::CondaMetaJson],
298        package.purl.as_deref(),
299    );
300
301    let mut missing_refs = Vec::new();
302    for file_ref in &file_references {
303        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
304        if let Some(&file_idx) = path_index.get(&resolved_path) {
305            let package_uid = package.package_uid.clone();
306            if !files[file_idx].for_packages.contains(&package_uid) {
307                files[file_idx].for_packages.push(package_uid);
308            }
309        } else {
310            missing_refs.push(file_ref.path.clone());
311        }
312    }
313
314    record_missing_file_references(package, missing_refs);
315}
316
317fn resolve_installed_db_file_references(
318    files: &mut [FileInfo],
319    path_index: &HashMap<String, usize>,
320    package: &mut Package,
321    dependencies: &mut [TopLevelDependency],
322) {
323    let Some(config) = find_db_config(package) else {
324        return;
325    };
326    let Some(datafile_path) = package.datafile_paths.first() else {
327        return;
328    };
329
330    let root = compute_root(datafile_path, config.path_suffix);
331
332    let mut file_references = collect_file_references(
333        files,
334        path_index,
335        datafile_path,
336        &package.datasource_ids,
337        config.datasource_ids,
338        package.purl.as_deref(),
339    );
340
341    if is_debian_installed_package(package) {
342        merge_file_references(
343            &mut file_references,
344            collect_debian_installed_file_references(files, package),
345        );
346    }
347
348    let mut missing_refs = Vec::new();
349    for file_ref in &file_references {
350        let ref_path = file_ref.path.trim_start_matches('/');
351        let resolved_path = if root.is_empty() {
352            ref_path.to_string()
353        } else {
354            format!("{}{}", root, ref_path)
355        };
356
357        if let Some(&file_idx) = path_index.get(&resolved_path) {
358            let package_uid = package.package_uid.clone();
359            if !files[file_idx].for_packages.contains(&package_uid) {
360                files[file_idx].for_packages.push(package_uid);
361            }
362        } else {
363            missing_refs.push(file_ref.path.clone());
364        }
365    }
366
367    record_missing_file_references(package, missing_refs);
368
369    if is_rpm_package(package)
370        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
371    {
372        apply_rpm_namespace(files, package, dependencies, &namespace);
373    }
374}
375
376fn resolve_debian_extracted_deb_file_references(
377    files: &mut [FileInfo],
378    path_index: &HashMap<String, usize>,
379    package: &mut Package,
380) {
381    let Some(datafile_path) = package
382        .datafile_paths
383        .iter()
384        .find(|path| path.ends_with("/md5sums"))
385    else {
386        return;
387    };
388
389    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
390        return;
391    };
392    let Some(extracted_root) = md5sums_parent.parent() else {
393        return;
394    };
395    let root = extracted_root.to_string_lossy().to_string();
396
397    let Some(&file_idx) = path_index.get(datafile_path) else {
398        return;
399    };
400    let file_references: Vec<_> = files[file_idx]
401        .package_data
402        .iter()
403        .filter(|pkg_data| {
404            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
405        })
406        .flat_map(|pkg_data| pkg_data.file_references.clone())
407        .collect();
408
409    let mut missing_refs = Vec::new();
410    for file_ref in &file_references {
411        let resolved_path = if root.is_empty() {
412            file_ref.path.trim_start_matches('/').to_string()
413        } else {
414            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
415        };
416
417        if let Some(&file_idx) = path_index.get(&resolved_path) {
418            let package_uid = package.package_uid.clone();
419            if !files[file_idx].for_packages.contains(&package_uid) {
420                files[file_idx].for_packages.push(package_uid);
421            }
422        } else {
423            missing_refs.push(file_ref.path.clone());
424        }
425    }
426
427    record_missing_file_references(package, missing_refs);
428}
429
430fn resolve_python_metadata_file_references(
431    files: &mut [FileInfo],
432    path_index: &HashMap<String, usize>,
433    package: &mut Package,
434) {
435    let Some(python_resolution) = find_python_metadata_root(package) else {
436        return;
437    };
438    let Some(datafile_path) = package
439        .datafile_paths
440        .iter()
441        .find(|path| is_python_metadata_layout(path))
442    else {
443        return;
444    };
445
446    let file_references = collect_file_references(
447        files,
448        path_index,
449        datafile_path,
450        &package.datasource_ids,
451        PYTHON_METADATA_DATASOURCE_IDS,
452        package.purl.as_deref(),
453    );
454
455    let mut missing_refs = Vec::new();
456    for file_ref in &file_references {
457        let Some(resolved_path) = normalize_relative_path(
458            &python_resolution.base_path,
459            &python_resolution.allowed_root,
460            &file_ref.path,
461        ) else {
462            missing_refs.push(file_ref.path.clone());
463            continue;
464        };
465
466        if let Some(&file_idx) = path_index.get(&resolved_path) {
467            let package_uid = package.package_uid.clone();
468            if !files[file_idx].for_packages.contains(&package_uid) {
469                files[file_idx].for_packages.push(package_uid);
470            }
471        } else {
472            missing_refs.push(file_ref.path.clone());
473        }
474    }
475
476    record_missing_file_references(package, missing_refs);
477}
478
479fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
480    if missing_refs.is_empty() {
481        return;
482    }
483
484    missing_refs.sort();
485    let missing_refs_json: Vec<serde_json::Value> = missing_refs
486        .into_iter()
487        .map(|path| serde_json::json!({"path": path}))
488        .collect();
489
490    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
491    extra_data.insert(
492        "missing_file_references".to_string(),
493        serde_json::Value::Array(missing_refs_json),
494    );
495}
496
497fn find_file_reference_resolver(
498    files: &[FileInfo],
499    package: &Package,
500) -> Option<&'static FileReferenceResolverConfig> {
501    FILE_REFERENCE_RESOLVER_CONFIGS
502        .iter()
503        .find(|config| match config.kind {
504            FileReferenceResolverKind::AttachedManifest => {
505                config.datasource_ids.iter().any(|datasource_id| {
506                    files.iter().any(|file| {
507                        file.for_packages.contains(&package.package_uid)
508                            && file
509                                .package_data
510                                .iter()
511                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
512                    })
513                })
514            }
515            _ => config
516                .datasource_ids
517                .iter()
518                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
519        })
520}
521
522fn is_python_metadata_layout(path: &str) -> bool {
523    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
524}
525
526fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
527    let datafile_path = package
528        .datafile_paths
529        .iter()
530        .find(|path| is_python_metadata_layout(path))?;
531
532    if !package
533        .datasource_ids
534        .iter()
535        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
536    {
537        return None;
538    }
539
540    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
541        if let Some(idx) = datafile_path.rfind(segment) {
542            if datafile_path.ends_with("/METADATA") {
543                let root_end = idx + segment.len();
544                let root = datafile_path[..root_end].to_string();
545                return Some(PythonMetadataResolution {
546                    base_path: root.clone(),
547                    allowed_root: root,
548                });
549            }
550
551            if datafile_path.ends_with("/PKG-INFO") {
552                let parent = Path::new(datafile_path).parent()?;
553                let allowed_root = datafile_path[..idx + segment.len()].to_string();
554                return Some(PythonMetadataResolution {
555                    base_path: parent.to_string_lossy().to_string(),
556                    allowed_root,
557                });
558            }
559        }
560    }
561
562    if datafile_path.ends_with(".egg-info/PKG-INFO") {
563        let metadata_parent = Path::new(datafile_path).parent()?;
564        let project_root = metadata_parent.parent()?;
565        let project_root = project_root.to_string_lossy().to_string();
566        return Some(PythonMetadataResolution {
567            base_path: project_root.clone(),
568            allowed_root: project_root,
569        });
570    }
571
572    None
573}
574
575fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
576    let joined = Path::new(base).join(relative.trim_start_matches('/'));
577    let mut normalized = Path::new("").to_path_buf();
578
579    for component in joined.components() {
580        match component {
581            std::path::Component::CurDir => {}
582            std::path::Component::ParentDir => {
583                normalized.pop();
584            }
585            _ => normalized.push(component.as_os_str()),
586        }
587    }
588
589    let normalized_str = normalized.to_string_lossy().to_string();
590    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
591        Some(normalized_str)
592    } else {
593        None
594    }
595}
596
597fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
598    let path = datafile_path?;
599    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
600    Some(path[..idx].to_string())
601}
602
603pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
604    let yumdb_indices: Vec<usize> = packages
605        .iter()
606        .enumerate()
607        .filter_map(|(idx, package)| {
608            package
609                .datasource_ids
610                .contains(&DatasourceId::RpmYumdb)
611                .then_some(idx)
612        })
613        .collect();
614    let mut removal_indices = Vec::new();
615
616    for yumdb_idx in yumdb_indices {
617        let yumdb_package = packages[yumdb_idx].clone();
618        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
619            continue;
620        };
621        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
622        let yumdb_arch = yumdb_package
623            .qualifiers
624            .as_ref()
625            .and_then(|qualifiers| qualifiers.get("arch"));
626
627        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
628            if idx == yumdb_idx || !is_rpm_package(package) {
629                return None;
630            }
631
632            let config = find_db_config(package)?;
633            let datafile_path = package.datafile_paths.first()?;
634            let target_root = compute_root(datafile_path, config.path_suffix);
635            let target_arch = package
636                .qualifiers
637                .as_ref()
638                .and_then(|qualifiers| qualifiers.get("arch"));
639
640            (target_root == yumdb_root
641                && package.name == yumdb_package.name
642                && package.version == yumdb_package.version
643                && target_arch == yumdb_arch)
644                .then_some(idx)
645        }) else {
646            continue;
647        };
648
649        let target_package_uid = packages[target_idx].package_uid.clone();
650        {
651            let target = &mut packages[target_idx];
652            target
653                .datafile_paths
654                .extend(yumdb_package.datafile_paths.clone());
655            target
656                .datasource_ids
657                .extend(yumdb_package.datasource_ids.clone());
658
659            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
660                && !yumdb_extra.is_empty()
661            {
662                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
663                let mut merged_yumdb = extra_data
664                    .get("yumdb")
665                    .and_then(|value| value.as_object().cloned())
666                    .unwrap_or_default();
667                for (key, value) in yumdb_extra {
668                    merged_yumdb.insert(key, value);
669                }
670                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
671            }
672        }
673
674        for file in files.iter_mut() {
675            for package_uid in &mut file.for_packages {
676                if *package_uid == yumdb_package.package_uid {
677                    *package_uid = target_package_uid.clone();
678                }
679            }
680        }
681
682        removal_indices.push(yumdb_idx);
683    }
684
685    removal_indices.sort_unstable();
686    removal_indices.dedup();
687    for idx in removal_indices.into_iter().rev() {
688        packages.remove(idx);
689    }
690}
691
692fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
693    files
694        .iter()
695        .enumerate()
696        .map(|(idx, file)| (file.path.clone(), idx))
697        .collect()
698}
699
700fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
701    for config in DB_PATH_CONFIGS {
702        for &config_dsid in config.datasource_ids {
703            for &pkg_dsid in &package.datasource_ids {
704                if config_dsid == pkg_dsid {
705                    return Some(config);
706                }
707            }
708        }
709    }
710    None
711}
712
713fn compute_root(datafile_path: &str, suffix: &str) -> String {
714    if let Some(pos) = datafile_path.rfind(suffix) {
715        let root = &datafile_path[..pos];
716        if root.is_empty() {
717            String::new()
718        } else {
719            root.to_string()
720        }
721    } else {
722        String::new()
723    }
724}
725
726fn collect_file_references(
727    files: &[FileInfo],
728    path_index: &HashMap<String, usize>,
729    datafile_path: &str,
730    package_datasource_ids: &[DatasourceId],
731    config_datasource_ids: &[DatasourceId],
732    package_purl: Option<&str>,
733) -> Vec<crate::models::FileReference> {
734    let file_idx = match path_index.get(datafile_path) {
735        Some(&idx) => idx,
736        None => return Vec::new(),
737    };
738
739    let file = &files[file_idx];
740    let mut refs = Vec::new();
741
742    for pkg_data in &file.package_data {
743        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
744            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
745        });
746
747        if !dsid_matches {
748            continue;
749        }
750
751        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
752            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
753            _ => true,
754        };
755
756        if purl_matches {
757            refs.extend(pkg_data.file_references.clone());
758        }
759    }
760
761    refs
762}
763
764fn is_rpm_package(package: &Package) -> bool {
765    for &dsid in &package.datasource_ids {
766        for &rpm_dsid in RPM_DATASOURCE_IDS {
767            if rpm_dsid == dsid {
768                return true;
769            }
770        }
771    }
772    false
773}
774
775fn is_debian_installed_package(package: &Package) -> bool {
776    package
777        .datasource_ids
778        .contains(&DatasourceId::DebianInstalledStatusDb)
779        || package
780            .datasource_ids
781            .contains(&DatasourceId::DebianDistrolessInstalledDb)
782}
783
784fn collect_debian_installed_file_references(
785    files: &[FileInfo],
786    package: &Package,
787) -> Vec<crate::models::FileReference> {
788    let mut refs = Vec::new();
789
790    for file in files {
791        for pkg_data in &file.package_data {
792            let Some(dsid) = pkg_data.datasource_id else {
793                continue;
794            };
795            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
796                continue;
797            }
798
799            if pkg_data.name != package.name {
800                continue;
801            }
802            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
803                continue;
804            }
805            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
806                continue;
807            }
808
809            merge_file_references(&mut refs, pkg_data.file_references.clone());
810        }
811    }
812
813    refs
814}
815
816fn find_attached_manifest_file_references<'a>(
817    files: &'a [FileInfo],
818    package: &Package,
819    datasource_id: DatasourceId,
820) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
821    for file in files {
822        if !file.for_packages.contains(&package.package_uid) {
823            continue;
824        }
825
826        for pkg_data in &file.package_data {
827            if pkg_data.datasource_id == Some(datasource_id) {
828                return Some((&file.path, pkg_data.file_references.clone()));
829            }
830        }
831    }
832
833    None
834}
835
836fn debian_installed_namespace_matches(
837    supplemental_namespace: &Option<String>,
838    package_namespace: &Option<String>,
839) -> bool {
840    match (
841        supplemental_namespace.as_deref(),
842        package_namespace.as_deref(),
843    ) {
844        (None, _) => true,
845        (Some("debian"), Some("ubuntu")) => true,
846        (Some(left), Some(right)) => left == right,
847        (Some(_), None) => true,
848    }
849}
850
851fn debian_installed_arch_matches(
852    supplemental_qualifiers: &Option<HashMap<String, String>>,
853    package_qualifiers: &Option<HashMap<String, String>>,
854) -> bool {
855    let supplemental_arch = supplemental_qualifiers
856        .as_ref()
857        .and_then(|qualifiers| qualifiers.get("arch"));
858    let package_arch = package_qualifiers
859        .as_ref()
860        .and_then(|qualifiers| qualifiers.get("arch"));
861
862    match (supplemental_arch, package_arch) {
863        (Some(left), Some(right)) => left == right,
864        (Some(_), None) => false,
865        _ => true,
866    }
867}
868
869fn merge_file_references(
870    target: &mut Vec<crate::models::FileReference>,
871    incoming: Vec<crate::models::FileReference>,
872) {
873    for file_ref in incoming {
874        if let Some(existing) = target
875            .iter_mut()
876            .find(|existing| existing.path == file_ref.path)
877        {
878            if existing.size.is_none() {
879                existing.size = file_ref.size;
880            }
881            if existing.sha1.is_none() {
882                existing.sha1 = file_ref.sha1.clone();
883            }
884            if existing.md5.is_none() {
885                existing.md5 = file_ref.md5.clone();
886            }
887            if existing.sha256.is_none() {
888                existing.sha256 = file_ref.sha256.clone();
889            }
890            if existing.sha512.is_none() {
891                existing.sha512 = file_ref.sha512.clone();
892            }
893            if existing.extra_data.is_none() {
894                existing.extra_data = file_ref.extra_data.clone();
895            }
896        } else {
897            target.push(file_ref);
898        }
899    }
900}
901
902fn resolve_rpm_namespace(
903    files: &[FileInfo],
904    path_index: &HashMap<String, usize>,
905    root: &str,
906) -> Option<String> {
907    let os_release_paths = [
908        format!("{}etc/os-release", root),
909        format!("{}usr/lib/os-release", root),
910    ];
911
912    for os_release_path in &os_release_paths {
913        if let Some(&file_idx) = path_index.get(os_release_path) {
914            let file = &files[file_idx];
915            for pkg_data in &file.package_data {
916                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
917                    && let Some(namespace) = &pkg_data.namespace
918                {
919                    return Some(namespace.clone());
920                }
921            }
922        }
923    }
924
925    None
926}
927
928fn replace_uid_base(old_uid: &str, new_purl: &str) -> String {
929    if let Some((_, suffix)) = old_uid.split_once("?uuid=") {
930        return format!("{}?uuid={}", new_purl, suffix);
931    }
932
933    if let Some((_, suffix)) = old_uid.split_once("&uuid=") {
934        let separator = if new_purl.contains('?') { '&' } else { '?' };
935        return format!("{}{separator}uuid={suffix}", new_purl);
936    }
937
938    old_uid.to_string()
939}
940
941fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
942    let parsed = PackageUrl::from_str(existing_purl).ok()?;
943    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
944
945    updated.with_namespace(namespace).ok()?;
946
947    if let Some(version) = parsed.version() {
948        updated.with_version(version).ok()?;
949    }
950
951    if let Some(subpath) = parsed.subpath() {
952        updated.with_subpath(subpath).ok()?;
953    }
954
955    for (key, value) in parsed.qualifiers() {
956        updated
957            .add_qualifier(key.to_string(), value.to_string())
958            .ok()?;
959    }
960
961    Some(updated.to_string())
962}
963
964fn apply_rpm_namespace(
965    files: &mut [FileInfo],
966    package: &mut Package,
967    dependencies: &mut [TopLevelDependency],
968    namespace: &str,
969) {
970    let old_package_uid = package.package_uid.clone();
971
972    package.namespace = Some(namespace.to_string());
973
974    if let Some(current_purl) = package.purl.as_deref()
975        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
976    {
977        package.purl = Some(updated_purl.clone());
978        package.package_uid = replace_uid_base(&old_package_uid, &updated_purl);
979    }
980
981    for file in files.iter_mut() {
982        for package_uid in &mut file.for_packages {
983            if *package_uid == old_package_uid {
984                *package_uid = package.package_uid.clone();
985            }
986        }
987    }
988
989    for dep in dependencies.iter_mut() {
990        if dep.for_package_uid.as_deref() == Some(old_package_uid.as_str()) {
991            dep.for_package_uid = Some(package.package_uid.clone());
992        }
993
994        if dep.for_package_uid.as_deref() == Some(package.package_uid.as_str()) {
995            dep.namespace = Some(namespace.to_string());
996
997            if let Some(current_purl) = dep.purl.as_deref()
998                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
999            {
1000                dep.purl = Some(updated_purl.clone());
1001                dep.dependency_uid = replace_uid_base(&dep.dependency_uid, &updated_purl);
1002            }
1003        }
1004    }
1005}
1006
1007#[cfg(test)]
1008#[path = "file_ref_resolve_test.rs"]
1009mod tests;