Skip to main content

provenant/assembly/
file_ref_resolve.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6use std::str::FromStr;
7
8use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
9use packageurl::PackageUrl;
10use strum::EnumIter;
11
12struct DbPathConfig {
13    datasource_ids: &'static [DatasourceId],
14    path_suffix: &'static str,
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
18enum FileReferenceResolverKind {
19    About,
20    AttachedManifest,
21    CondaMeta,
22    DebianExtractedDeb,
23    InstalledDb,
24    PythonMetadata,
25    RelativeToDatafileParent,
26}
27
28struct FileReferenceResolverConfig {
29    datasource_ids: &'static [DatasourceId],
30    kind: FileReferenceResolverKind,
31}
32
33const DB_PATH_CONFIGS: &[DbPathConfig] = &[
34    DbPathConfig {
35        datasource_ids: &[DatasourceId::AlpineInstalledDb],
36        path_suffix: "lib/apk/db/installed",
37    },
38    DbPathConfig {
39        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
40        path_suffix: "var/lib/rpm/Packages",
41    },
42    DbPathConfig {
43        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
44        path_suffix: "usr/lib/sysimage/rpm/Packages",
45    },
46    DbPathConfig {
47        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
48        path_suffix: "var/lib/rpm/Packages.db",
49    },
50    DbPathConfig {
51        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
52        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
53    },
54    DbPathConfig {
55        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
56        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
57    },
58    DbPathConfig {
59        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
60        path_suffix: "var/lib/rpm/rpmdb.sqlite",
61    },
62    DbPathConfig {
63        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
64        path_suffix: "var/lib/dpkg/status",
65    },
66    DbPathConfig {
67        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
68        path_suffix: "var/lib/dpkg/status.d/",
69    },
70];
71
72const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
73    DatasourceId::RpmInstalledDatabaseBdb,
74    DatasourceId::RpmInstalledDatabaseNdb,
75    DatasourceId::RpmInstalledDatabaseSqlite,
76];
77const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
78const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
79const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
80    DatasourceId::PypiWheelMetadata,
81    DatasourceId::PypiSdistPkginfo,
82    DatasourceId::PypiEggPkginfo,
83    DatasourceId::PypiEditableEggPkginfo,
84];
85const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
86const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
87    DatasourceId::DebianInstalledFilesList,
88    DatasourceId::DebianInstalledMd5Sums,
89];
90
91const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
92    DatasourceId::AlpineInstalledDb,
93    DatasourceId::RpmInstalledDatabaseBdb,
94    DatasourceId::RpmInstalledDatabaseNdb,
95    DatasourceId::RpmInstalledDatabaseSqlite,
96    DatasourceId::DebianInstalledStatusDb,
97    DatasourceId::DebianDistrolessInstalledDb,
98];
99
100const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
101    FileReferenceResolverConfig {
102        datasource_ids: &[DatasourceId::AboutFile],
103        kind: FileReferenceResolverKind::About,
104    },
105    FileReferenceResolverConfig {
106        datasource_ids: &[DatasourceId::CpanManifest],
107        kind: FileReferenceResolverKind::AttachedManifest,
108    },
109    FileReferenceResolverConfig {
110        datasource_ids: &[DatasourceId::CondaMetaJson],
111        kind: FileReferenceResolverKind::CondaMeta,
112    },
113    FileReferenceResolverConfig {
114        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
115        kind: FileReferenceResolverKind::DebianExtractedDeb,
116    },
117    FileReferenceResolverConfig {
118        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
119        kind: FileReferenceResolverKind::InstalledDb,
120    },
121    FileReferenceResolverConfig {
122        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
123        kind: FileReferenceResolverKind::PythonMetadata,
124    },
125    FileReferenceResolverConfig {
126        datasource_ids: &[DatasourceId::GradleModule],
127        kind: FileReferenceResolverKind::RelativeToDatafileParent,
128    },
129    FileReferenceResolverConfig {
130        datasource_ids: &[
131            DatasourceId::BitbakeRecipe,
132            DatasourceId::BitbakeRecipeAppend,
133        ],
134        kind: FileReferenceResolverKind::RelativeToDatafileParent,
135    },
136];
137
138struct PythonMetadataResolution {
139    base_path: String,
140    allowed_root: String,
141}
142
143pub fn resolve_file_references(
144    files: &mut [FileInfo],
145    packages: &mut [Package],
146    dependencies: &mut [TopLevelDependency],
147) {
148    if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
149        return;
150    }
151
152    let path_index = build_path_index(&*files);
153
154    for package in packages.iter_mut() {
155        let Some(config) = find_file_reference_resolver(files, package) else {
156            continue;
157        };
158
159        match config.kind {
160            FileReferenceResolverKind::About
161            | FileReferenceResolverKind::RelativeToDatafileParent => {
162                resolve_relative_to_datafile_parent(
163                    files,
164                    &path_index,
165                    package,
166                    config.datasource_ids,
167                );
168            }
169            FileReferenceResolverKind::AttachedManifest => {
170                resolve_attached_manifest_file_references(
171                    files,
172                    &path_index,
173                    package,
174                    config.datasource_ids[0],
175                );
176            }
177            FileReferenceResolverKind::CondaMeta => {
178                resolve_conda_file_references(files, &path_index, package);
179            }
180            FileReferenceResolverKind::DebianExtractedDeb => {
181                resolve_debian_extracted_deb_file_references(files, &path_index, package)
182            }
183            FileReferenceResolverKind::InstalledDb => {
184                resolve_installed_db_file_references(files, &path_index, package, dependencies);
185            }
186            FileReferenceResolverKind::PythonMetadata => {
187                resolve_python_metadata_file_references(files, &path_index, package);
188            }
189        }
190    }
191}
192
193pub(super) fn has_relevant_file_reference_datasource_ids(
194    file_datasource_ids: &HashSet<DatasourceId>,
195) -> bool {
196    FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
197        config
198            .datasource_ids
199            .iter()
200            .any(|datasource_id| file_datasource_ids.contains(datasource_id))
201    })
202}
203
204fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
205    let file_datasource_ids: HashSet<DatasourceId> = files
206        .iter()
207        .flat_map(|file| {
208            file.package_data
209                .iter()
210                .filter_map(|package_data| package_data.datasource_id)
211        })
212        .collect();
213
214    has_relevant_file_reference_datasource_ids(&file_datasource_ids)
215}
216
217fn resolve_relative_to_datafile_parent(
218    files: &mut [FileInfo],
219    path_index: &HashMap<String, usize>,
220    package: &mut Package,
221    datasource_ids: &[DatasourceId],
222) {
223    let mut missing_refs = Vec::new();
224    for datafile_path in &package.datafile_paths {
225        let root = Path::new(datafile_path)
226            .parent()
227            .map(|p| p.to_string_lossy().to_string())
228            .unwrap_or_default();
229
230        let file_references = collect_file_references(
231            files,
232            path_index,
233            datafile_path,
234            &package.datasource_ids,
235            datasource_ids,
236            package.purl.as_deref(),
237        );
238
239        for file_ref in &file_references {
240            let resolved_path = if root.is_empty() {
241                file_ref.path.clone()
242            } else {
243                format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
244            };
245            if let Some(&file_idx) = path_index.get(&resolved_path) {
246                let package_uid = package.package_uid.clone();
247                if !files[file_idx].for_packages.contains(&package_uid) {
248                    files[file_idx].for_packages.push(package_uid);
249                }
250            } else {
251                missing_refs.push(file_ref.path.clone());
252            }
253        }
254    }
255
256    record_missing_file_references(package, missing_refs);
257}
258
259fn resolve_attached_manifest_file_references(
260    files: &mut [FileInfo],
261    path_index: &HashMap<String, usize>,
262    package: &mut Package,
263    datasource_id: DatasourceId,
264) {
265    let Some((datafile_path, file_references)) =
266        find_attached_manifest_file_references(files, package, datasource_id)
267    else {
268        return;
269    };
270
271    let root = Path::new(datafile_path)
272        .parent()
273        .map(|p| p.to_string_lossy().to_string())
274        .unwrap_or_default();
275
276    let mut missing_refs = Vec::new();
277    for file_ref in &file_references {
278        let resolved_path = if root.is_empty() {
279            file_ref.path.clone()
280        } else {
281            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
282        };
283
284        if let Some(&file_idx) = path_index.get(&resolved_path) {
285            let package_uid = package.package_uid.clone();
286            if !files[file_idx].for_packages.contains(&package_uid) {
287                files[file_idx].for_packages.push(package_uid);
288            }
289        } else {
290            missing_refs.push(file_ref.path.clone());
291        }
292    }
293
294    record_missing_file_references(package, missing_refs);
295}
296
297fn resolve_conda_file_references(
298    files: &mut [FileInfo],
299    path_index: &HashMap<String, usize>,
300    package: &mut Package,
301) {
302    let Some(conda_meta_path) = package
303        .datafile_paths
304        .iter()
305        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
306    else {
307        return;
308    };
309    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
310        return;
311    };
312
313    let file_references = collect_file_references(
314        files,
315        path_index,
316        conda_meta_path,
317        &package.datasource_ids,
318        &[DatasourceId::CondaMetaJson],
319        package.purl.as_deref(),
320    );
321
322    let mut missing_refs = Vec::new();
323    for file_ref in &file_references {
324        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
325        if let Some(&file_idx) = path_index.get(&resolved_path) {
326            let package_uid = package.package_uid.clone();
327            if !files[file_idx].for_packages.contains(&package_uid) {
328                files[file_idx].for_packages.push(package_uid);
329            }
330        } else {
331            missing_refs.push(file_ref.path.clone());
332        }
333    }
334
335    record_missing_file_references(package, missing_refs);
336}
337
338fn resolve_installed_db_file_references(
339    files: &mut [FileInfo],
340    path_index: &HashMap<String, usize>,
341    package: &mut Package,
342    dependencies: &mut [TopLevelDependency],
343) {
344    let Some(config) = find_db_config(package) else {
345        return;
346    };
347    let Some(datafile_path) = package.datafile_paths.first() else {
348        return;
349    };
350
351    let root = compute_root(datafile_path, config.path_suffix);
352
353    let mut file_references = collect_file_references(
354        files,
355        path_index,
356        datafile_path,
357        &package.datasource_ids,
358        config.datasource_ids,
359        package.purl.as_deref(),
360    );
361
362    if is_debian_installed_package(package) {
363        merge_file_references(
364            &mut file_references,
365            collect_debian_installed_file_references(files, package),
366        );
367    }
368
369    let mut missing_refs = Vec::new();
370    for file_ref in &file_references {
371        let ref_path = file_ref.path.trim_start_matches('/');
372        let resolved_path = if root.is_empty() {
373            ref_path.to_string()
374        } else {
375            format!("{}{}", root, ref_path)
376        };
377
378        if let Some(&file_idx) = path_index.get(&resolved_path) {
379            let package_uid = package.package_uid.clone();
380            if !files[file_idx].for_packages.contains(&package_uid) {
381                files[file_idx].for_packages.push(package_uid);
382            }
383        } else {
384            missing_refs.push(file_ref.path.clone());
385        }
386    }
387
388    record_missing_file_references(package, missing_refs);
389
390    if is_rpm_package(package)
391        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
392    {
393        apply_rpm_namespace(files, package, dependencies, &namespace);
394    }
395}
396
397fn resolve_debian_extracted_deb_file_references(
398    files: &mut [FileInfo],
399    path_index: &HashMap<String, usize>,
400    package: &mut Package,
401) {
402    let Some(datafile_path) = package
403        .datafile_paths
404        .iter()
405        .find(|path| path.ends_with("/md5sums"))
406    else {
407        return;
408    };
409
410    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
411        return;
412    };
413    let Some(extracted_root) = md5sums_parent.parent() else {
414        return;
415    };
416    let root = extracted_root.to_string_lossy().to_string();
417
418    let Some(&file_idx) = path_index.get(datafile_path) else {
419        return;
420    };
421    let file_references: Vec<_> = files[file_idx]
422        .package_data
423        .iter()
424        .filter(|pkg_data| {
425            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
426        })
427        .flat_map(|pkg_data| pkg_data.file_references.clone())
428        .collect();
429
430    let mut missing_refs = Vec::new();
431    for file_ref in &file_references {
432        let resolved_path = if root.is_empty() {
433            file_ref.path.trim_start_matches('/').to_string()
434        } else {
435            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
436        };
437
438        if let Some(&file_idx) = path_index.get(&resolved_path) {
439            let package_uid = package.package_uid.clone();
440            if !files[file_idx].for_packages.contains(&package_uid) {
441                files[file_idx].for_packages.push(package_uid);
442            }
443        } else {
444            missing_refs.push(file_ref.path.clone());
445        }
446    }
447
448    record_missing_file_references(package, missing_refs);
449}
450
451fn resolve_python_metadata_file_references(
452    files: &mut [FileInfo],
453    path_index: &HashMap<String, usize>,
454    package: &mut Package,
455) {
456    let Some(python_resolution) = find_python_metadata_root(package) else {
457        return;
458    };
459    let Some(datafile_path) = package
460        .datafile_paths
461        .iter()
462        .find(|path| is_python_metadata_layout(path))
463    else {
464        return;
465    };
466
467    let file_references = collect_file_references(
468        files,
469        path_index,
470        datafile_path,
471        &package.datasource_ids,
472        PYTHON_METADATA_DATASOURCE_IDS,
473        package.purl.as_deref(),
474    );
475
476    let mut missing_refs = Vec::new();
477    for file_ref in &file_references {
478        let Some(resolved_path) = normalize_relative_path(
479            &python_resolution.base_path,
480            &python_resolution.allowed_root,
481            &file_ref.path,
482        ) else {
483            missing_refs.push(file_ref.path.clone());
484            continue;
485        };
486
487        if let Some(&file_idx) = path_index.get(&resolved_path) {
488            let package_uid = package.package_uid.clone();
489            if !files[file_idx].for_packages.contains(&package_uid) {
490                files[file_idx].for_packages.push(package_uid);
491            }
492        } else {
493            missing_refs.push(file_ref.path.clone());
494        }
495    }
496
497    record_missing_file_references(package, missing_refs);
498}
499
500fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
501    if missing_refs.is_empty() {
502        return;
503    }
504
505    missing_refs.sort();
506    let missing_refs_json: Vec<serde_json::Value> = missing_refs
507        .into_iter()
508        .map(|path| serde_json::json!({"path": path}))
509        .collect();
510
511    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
512    extra_data.insert(
513        "missing_file_references".to_string(),
514        serde_json::Value::Array(missing_refs_json),
515    );
516}
517
518fn find_file_reference_resolver(
519    files: &[FileInfo],
520    package: &Package,
521) -> Option<&'static FileReferenceResolverConfig> {
522    FILE_REFERENCE_RESOLVER_CONFIGS
523        .iter()
524        .find(|config| match config.kind {
525            FileReferenceResolverKind::AttachedManifest => {
526                config.datasource_ids.iter().any(|datasource_id| {
527                    files.iter().any(|file| {
528                        file.for_packages.contains(&package.package_uid)
529                            && file
530                                .package_data
531                                .iter()
532                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
533                    })
534                })
535            }
536            _ => config
537                .datasource_ids
538                .iter()
539                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
540        })
541}
542
543fn is_python_metadata_layout(path: &str) -> bool {
544    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
545}
546
547fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
548    let datafile_path = package
549        .datafile_paths
550        .iter()
551        .find(|path| is_python_metadata_layout(path))?;
552
553    if !package
554        .datasource_ids
555        .iter()
556        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
557    {
558        return None;
559    }
560
561    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
562        if let Some(idx) = datafile_path.rfind(segment) {
563            if datafile_path.ends_with("/METADATA") {
564                let root_end = idx + segment.len();
565                let root = datafile_path[..root_end].to_string();
566                return Some(PythonMetadataResolution {
567                    base_path: root.clone(),
568                    allowed_root: root,
569                });
570            }
571
572            if datafile_path.ends_with("/PKG-INFO") {
573                let parent = Path::new(datafile_path).parent()?;
574                let allowed_root = datafile_path[..idx + segment.len()].to_string();
575                return Some(PythonMetadataResolution {
576                    base_path: parent.to_string_lossy().to_string(),
577                    allowed_root,
578                });
579            }
580        }
581    }
582
583    if datafile_path.ends_with(".egg-info/PKG-INFO") {
584        let metadata_parent = Path::new(datafile_path).parent()?;
585        let project_root = metadata_parent.parent()?;
586        let project_root = project_root.to_string_lossy().to_string();
587        return Some(PythonMetadataResolution {
588            base_path: project_root.clone(),
589            allowed_root: project_root,
590        });
591    }
592
593    None
594}
595
596fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
597    let joined = Path::new(base).join(relative.trim_start_matches('/'));
598    let mut normalized = Path::new("").to_path_buf();
599
600    for component in joined.components() {
601        match component {
602            std::path::Component::CurDir => {}
603            std::path::Component::ParentDir => {
604                normalized.pop();
605            }
606            _ => normalized.push(component.as_os_str()),
607        }
608    }
609
610    let normalized_str = normalized.to_string_lossy().to_string();
611    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
612        Some(normalized_str)
613    } else {
614        None
615    }
616}
617
618fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
619    let path = datafile_path?;
620    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
621    Some(path[..idx].to_string())
622}
623
624pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
625    let yumdb_indices: Vec<usize> = packages
626        .iter()
627        .enumerate()
628        .filter_map(|(idx, package)| {
629            package
630                .datasource_ids
631                .contains(&DatasourceId::RpmYumdb)
632                .then_some(idx)
633        })
634        .collect();
635    let mut removal_indices = Vec::new();
636
637    for yumdb_idx in yumdb_indices {
638        let yumdb_package = packages[yumdb_idx].clone();
639        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
640            continue;
641        };
642        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
643        let yumdb_arch = yumdb_package
644            .qualifiers
645            .as_ref()
646            .and_then(|qualifiers| qualifiers.get("arch"));
647
648        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
649            if idx == yumdb_idx || !is_rpm_package(package) {
650                return None;
651            }
652
653            let config = find_db_config(package)?;
654            let datafile_path = package.datafile_paths.first()?;
655            let target_root = compute_root(datafile_path, config.path_suffix);
656            let target_arch = package
657                .qualifiers
658                .as_ref()
659                .and_then(|qualifiers| qualifiers.get("arch"));
660
661            (target_root == yumdb_root
662                && package.name == yumdb_package.name
663                && package.version == yumdb_package.version
664                && target_arch == yumdb_arch)
665                .then_some(idx)
666        }) else {
667            continue;
668        };
669
670        let target_package_uid = packages[target_idx].package_uid.clone();
671        {
672            let target = &mut packages[target_idx];
673            target
674                .datafile_paths
675                .extend(yumdb_package.datafile_paths.clone());
676            target
677                .datasource_ids
678                .extend(yumdb_package.datasource_ids.clone());
679
680            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
681                && !yumdb_extra.is_empty()
682            {
683                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
684                let mut merged_yumdb = extra_data
685                    .get("yumdb")
686                    .and_then(|value| value.as_object().cloned())
687                    .unwrap_or_default();
688                for (key, value) in yumdb_extra {
689                    merged_yumdb.insert(key, value);
690                }
691                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
692            }
693        }
694
695        for file in files.iter_mut() {
696            for package_uid in &mut file.for_packages {
697                if *package_uid == yumdb_package.package_uid {
698                    *package_uid = target_package_uid.clone();
699                }
700            }
701        }
702
703        removal_indices.push(yumdb_idx);
704    }
705
706    removal_indices.sort_unstable();
707    removal_indices.dedup();
708    for idx in removal_indices.into_iter().rev() {
709        packages.remove(idx);
710    }
711}
712
713fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
714    files
715        .iter()
716        .enumerate()
717        .map(|(idx, file)| (file.path.clone(), idx))
718        .collect()
719}
720
721fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
722    let datafile_paths = &package.datafile_paths;
723
724    for config in DB_PATH_CONFIGS {
725        if !datafile_paths.is_empty()
726            && !datafile_paths
727                .iter()
728                .any(|path| path.ends_with(config.path_suffix))
729        {
730            continue;
731        }
732
733        for &config_dsid in config.datasource_ids {
734            for &pkg_dsid in &package.datasource_ids {
735                if config_dsid == pkg_dsid {
736                    return Some(config);
737                }
738            }
739        }
740    }
741
742    for config in DB_PATH_CONFIGS {
743        for &config_dsid in config.datasource_ids {
744            for &pkg_dsid in &package.datasource_ids {
745                if config_dsid == pkg_dsid {
746                    return Some(config);
747                }
748            }
749        }
750    }
751
752    None
753}
754
755fn compute_root(datafile_path: &str, suffix: &str) -> String {
756    if let Some(pos) = datafile_path.rfind(suffix) {
757        let root = &datafile_path[..pos];
758        if root.is_empty() {
759            String::new()
760        } else {
761            root.to_string()
762        }
763    } else {
764        String::new()
765    }
766}
767
768fn collect_file_references(
769    files: &[FileInfo],
770    path_index: &HashMap<String, usize>,
771    datafile_path: &str,
772    package_datasource_ids: &[DatasourceId],
773    config_datasource_ids: &[DatasourceId],
774    package_purl: Option<&str>,
775) -> Vec<crate::models::FileReference> {
776    let file_idx = match path_index.get(datafile_path) {
777        Some(&idx) => idx,
778        None => return Vec::new(),
779    };
780
781    let file = &files[file_idx];
782    let mut refs = Vec::new();
783
784    for pkg_data in &file.package_data {
785        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
786            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
787        });
788
789        if !dsid_matches {
790            continue;
791        }
792
793        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
794            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
795            _ => true,
796        };
797
798        if purl_matches {
799            refs.extend(pkg_data.file_references.clone());
800        }
801    }
802
803    refs
804}
805
806fn is_rpm_package(package: &Package) -> bool {
807    for &dsid in &package.datasource_ids {
808        for &rpm_dsid in RPM_DATASOURCE_IDS {
809            if rpm_dsid == dsid {
810                return true;
811            }
812        }
813    }
814    false
815}
816
817fn is_debian_installed_package(package: &Package) -> bool {
818    package
819        .datasource_ids
820        .contains(&DatasourceId::DebianInstalledStatusDb)
821        || package
822            .datasource_ids
823            .contains(&DatasourceId::DebianDistrolessInstalledDb)
824}
825
826fn collect_debian_installed_file_references(
827    files: &[FileInfo],
828    package: &Package,
829) -> Vec<crate::models::FileReference> {
830    let mut refs = Vec::new();
831
832    for file in files {
833        for pkg_data in &file.package_data {
834            let Some(dsid) = pkg_data.datasource_id else {
835                continue;
836            };
837            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
838                continue;
839            }
840
841            if pkg_data.name != package.name {
842                continue;
843            }
844            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
845                continue;
846            }
847            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
848                continue;
849            }
850
851            merge_file_references(&mut refs, pkg_data.file_references.clone());
852        }
853    }
854
855    refs
856}
857
858fn find_attached_manifest_file_references<'a>(
859    files: &'a [FileInfo],
860    package: &Package,
861    datasource_id: DatasourceId,
862) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
863    for file in files {
864        if !file.for_packages.contains(&package.package_uid) {
865            continue;
866        }
867
868        for pkg_data in &file.package_data {
869            if pkg_data.datasource_id == Some(datasource_id) {
870                return Some((&file.path, pkg_data.file_references.clone()));
871            }
872        }
873    }
874
875    None
876}
877
878fn debian_installed_namespace_matches(
879    supplemental_namespace: &Option<String>,
880    package_namespace: &Option<String>,
881) -> bool {
882    match (
883        supplemental_namespace.as_deref(),
884        package_namespace.as_deref(),
885    ) {
886        (None, _) => true,
887        (Some("debian"), Some("ubuntu")) => true,
888        (Some(left), Some(right)) => left == right,
889        (Some(_), None) => true,
890    }
891}
892
893fn debian_installed_arch_matches(
894    supplemental_qualifiers: &Option<HashMap<String, String>>,
895    package_qualifiers: &Option<HashMap<String, String>>,
896) -> bool {
897    let supplemental_arch = supplemental_qualifiers
898        .as_ref()
899        .and_then(|qualifiers| qualifiers.get("arch"));
900    let package_arch = package_qualifiers
901        .as_ref()
902        .and_then(|qualifiers| qualifiers.get("arch"));
903
904    match (supplemental_arch, package_arch) {
905        (Some(left), Some(right)) => left == right,
906        (Some(_), None) => false,
907        _ => true,
908    }
909}
910
911fn merge_file_references(
912    target: &mut Vec<crate::models::FileReference>,
913    incoming: Vec<crate::models::FileReference>,
914) {
915    for file_ref in incoming {
916        if let Some(existing) = target
917            .iter_mut()
918            .find(|existing| existing.path == file_ref.path)
919        {
920            if existing.size.is_none() {
921                existing.size = file_ref.size;
922            }
923            if existing.sha1.is_none() {
924                existing.sha1 = file_ref.sha1;
925            }
926            if existing.md5.is_none() {
927                existing.md5 = file_ref.md5;
928            }
929            if existing.sha256.is_none() {
930                existing.sha256 = file_ref.sha256;
931            }
932            if existing.sha512.is_none() {
933                existing.sha512 = file_ref.sha512;
934            }
935            if existing.extra_data.is_none() {
936                existing.extra_data = file_ref.extra_data.clone();
937            }
938        } else {
939            target.push(file_ref);
940        }
941    }
942}
943
944fn resolve_rpm_namespace(
945    files: &[FileInfo],
946    path_index: &HashMap<String, usize>,
947    root: &str,
948) -> Option<String> {
949    let os_release_paths = [
950        format!("{}etc/os-release", root),
951        format!("{}usr/lib/os-release", root),
952    ];
953
954    for os_release_path in &os_release_paths {
955        if let Some(&file_idx) = path_index.get(os_release_path) {
956            let file = &files[file_idx];
957            for pkg_data in &file.package_data {
958                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
959                    && let Some(namespace) = &pkg_data.namespace
960                {
961                    return Some(namespace.clone());
962                }
963            }
964        }
965    }
966
967    None
968}
969
970fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
971    let parsed = PackageUrl::from_str(existing_purl).ok()?;
972    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
973
974    updated.with_namespace(namespace).ok()?;
975
976    if let Some(version) = parsed.version() {
977        updated.with_version(version).ok()?;
978    }
979
980    if let Some(subpath) = parsed.subpath() {
981        updated.with_subpath(subpath).ok()?;
982    }
983
984    for (key, value) in parsed.qualifiers() {
985        updated
986            .add_qualifier(key.to_string(), value.to_string())
987            .ok()?;
988    }
989
990    Some(updated.to_string())
991}
992
993fn apply_rpm_namespace(
994    files: &mut [FileInfo],
995    package: &mut Package,
996    dependencies: &mut [TopLevelDependency],
997    namespace: &str,
998) {
999    let old_package_uid = package.package_uid.clone();
1000
1001    package.namespace = Some(namespace.to_string());
1002
1003    if let Some(current_purl) = package.purl.as_deref()
1004        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1005    {
1006        package.purl = Some(updated_purl.clone());
1007        package.package_uid = old_package_uid.replace_base(&updated_purl);
1008    }
1009
1010    for file in files.iter_mut() {
1011        for package_uid in &mut file.for_packages {
1012            if *package_uid == old_package_uid {
1013                *package_uid = package.package_uid.clone();
1014            }
1015        }
1016    }
1017
1018    for dep in dependencies.iter_mut() {
1019        if dep.for_package_uid.as_ref() == Some(&old_package_uid) {
1020            dep.for_package_uid = Some(package.package_uid.clone());
1021        }
1022
1023        if dep.for_package_uid.as_ref() == Some(&package.package_uid) {
1024            dep.namespace = Some(namespace.to_string());
1025
1026            if let Some(current_purl) = dep.purl.as_deref()
1027                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1028            {
1029                dep.purl = Some(updated_purl.clone());
1030                dep.dependency_uid = dep.dependency_uid.replace_base(&updated_purl);
1031            }
1032        }
1033    }
1034}
1035
1036#[cfg(test)]
1037#[path = "file_ref_resolve_test.rs"]
1038mod tests;