Skip to main content

provenant/assembly/
file_ref_resolve.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6use std::str::FromStr;
7
8use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
9use packageurl::PackageUrl;
10use strum::EnumIter;
11
12struct DbPathConfig {
13    datasource_ids: &'static [DatasourceId],
14    path_suffix: &'static str,
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
18enum FileReferenceResolverKind {
19    About,
20    AttachedManifest,
21    CondaMeta,
22    DebianExtractedDeb,
23    InstalledDb,
24    PythonMetadata,
25    RelativeToDatafileParent,
26}
27
28struct FileReferenceResolverConfig {
29    datasource_ids: &'static [DatasourceId],
30    kind: FileReferenceResolverKind,
31}
32
33const DB_PATH_CONFIGS: &[DbPathConfig] = &[
34    DbPathConfig {
35        datasource_ids: &[DatasourceId::AlpineInstalledDb],
36        path_suffix: "lib/apk/db/installed",
37    },
38    DbPathConfig {
39        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
40        path_suffix: "var/lib/rpm/Packages",
41    },
42    DbPathConfig {
43        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
44        path_suffix: "usr/lib/sysimage/rpm/Packages",
45    },
46    DbPathConfig {
47        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
48        path_suffix: "var/lib/rpm/Packages.db",
49    },
50    DbPathConfig {
51        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
52        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
53    },
54    DbPathConfig {
55        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
56        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
57    },
58    DbPathConfig {
59        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
60        path_suffix: "var/lib/rpm/rpmdb.sqlite",
61    },
62    DbPathConfig {
63        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
64        path_suffix: "var/lib/dpkg/status",
65    },
66    DbPathConfig {
67        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
68        path_suffix: "var/lib/dpkg/status.d/",
69    },
70];
71
72const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
73    DatasourceId::RpmInstalledDatabaseBdb,
74    DatasourceId::RpmInstalledDatabaseNdb,
75    DatasourceId::RpmInstalledDatabaseSqlite,
76];
77const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
78const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
79const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
80    DatasourceId::PypiWheelMetadata,
81    DatasourceId::PypiSdistPkginfo,
82    DatasourceId::PypiEggPkginfo,
83    DatasourceId::PypiEditableEggPkginfo,
84];
85const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
86const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
87    DatasourceId::DebianInstalledFilesList,
88    DatasourceId::DebianInstalledMd5Sums,
89];
90
91const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
92    DatasourceId::AlpineInstalledDb,
93    DatasourceId::RpmInstalledDatabaseBdb,
94    DatasourceId::RpmInstalledDatabaseNdb,
95    DatasourceId::RpmInstalledDatabaseSqlite,
96    DatasourceId::DebianInstalledStatusDb,
97    DatasourceId::DebianDistrolessInstalledDb,
98];
99
100const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
101    FileReferenceResolverConfig {
102        datasource_ids: &[DatasourceId::AboutFile],
103        kind: FileReferenceResolverKind::About,
104    },
105    FileReferenceResolverConfig {
106        datasource_ids: &[DatasourceId::CpanManifest],
107        kind: FileReferenceResolverKind::AttachedManifest,
108    },
109    FileReferenceResolverConfig {
110        datasource_ids: &[DatasourceId::CondaMetaJson],
111        kind: FileReferenceResolverKind::CondaMeta,
112    },
113    FileReferenceResolverConfig {
114        datasource_ids: &[DatasourceId::DebianMd5SumsInExtractedDeb],
115        kind: FileReferenceResolverKind::DebianExtractedDeb,
116    },
117    FileReferenceResolverConfig {
118        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
119        kind: FileReferenceResolverKind::InstalledDb,
120    },
121    FileReferenceResolverConfig {
122        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
123        kind: FileReferenceResolverKind::PythonMetadata,
124    },
125    FileReferenceResolverConfig {
126        datasource_ids: &[
127            DatasourceId::GradleModule,
128            DatasourceId::BitbakeRecipe,
129            DatasourceId::BitbakeRecipeAppend,
130        ],
131        kind: FileReferenceResolverKind::RelativeToDatafileParent,
132    },
133];
134
135struct PythonMetadataResolution {
136    base_path: String,
137    allowed_root: String,
138}
139
140pub fn resolve_file_references(
141    files: &mut [FileInfo],
142    packages: &mut [Package],
143    dependencies: &mut [TopLevelDependency],
144) {
145    if packages.is_empty() || !has_relevant_file_reference_inputs(files) {
146        return;
147    }
148
149    let path_index = build_path_index(&*files);
150
151    for package in packages.iter_mut() {
152        let Some(config) = find_file_reference_resolver(files, package) else {
153            continue;
154        };
155
156        match config.kind {
157            FileReferenceResolverKind::About
158            | FileReferenceResolverKind::RelativeToDatafileParent => {
159                resolve_relative_to_datafile_parent(
160                    files,
161                    &path_index,
162                    package,
163                    config.datasource_ids,
164                );
165            }
166            FileReferenceResolverKind::AttachedManifest => {
167                resolve_attached_manifest_file_references(
168                    files,
169                    &path_index,
170                    package,
171                    config.datasource_ids[0],
172                );
173            }
174            FileReferenceResolverKind::CondaMeta => {
175                resolve_conda_file_references(files, &path_index, package);
176            }
177            FileReferenceResolverKind::DebianExtractedDeb => {
178                resolve_debian_extracted_deb_file_references(files, &path_index, package)
179            }
180            FileReferenceResolverKind::InstalledDb => {
181                resolve_installed_db_file_references(files, &path_index, package, dependencies);
182            }
183            FileReferenceResolverKind::PythonMetadata => {
184                resolve_python_metadata_file_references(files, &path_index, package);
185            }
186        }
187    }
188}
189
190pub(super) fn has_relevant_file_reference_datasource_ids(
191    file_datasource_ids: &HashSet<DatasourceId>,
192) -> bool {
193    FILE_REFERENCE_RESOLVER_CONFIGS.iter().any(|config| {
194        config
195            .datasource_ids
196            .iter()
197            .any(|datasource_id| file_datasource_ids.contains(datasource_id))
198    })
199}
200
201fn has_relevant_file_reference_inputs(files: &[FileInfo]) -> bool {
202    let file_datasource_ids: HashSet<DatasourceId> = files
203        .iter()
204        .flat_map(|file| {
205            file.package_data
206                .iter()
207                .filter_map(|package_data| package_data.datasource_id)
208        })
209        .collect();
210
211    has_relevant_file_reference_datasource_ids(&file_datasource_ids)
212}
213
214fn resolve_relative_to_datafile_parent(
215    files: &mut [FileInfo],
216    path_index: &HashMap<String, usize>,
217    package: &mut Package,
218    datasource_ids: &[DatasourceId],
219) {
220    let mut missing_refs = Vec::new();
221    for datafile_path in &package.datafile_paths {
222        let root = Path::new(datafile_path)
223            .parent()
224            .map(|p| p.to_string_lossy().to_string())
225            .unwrap_or_default();
226
227        let file_references = collect_file_references(
228            files,
229            path_index,
230            datafile_path,
231            &package.datasource_ids,
232            datasource_ids,
233            package.purl.as_deref(),
234        );
235
236        for file_ref in &file_references {
237            let resolved_path = if root.is_empty() {
238                file_ref.path.clone()
239            } else {
240                format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
241            };
242            if let Some(&file_idx) = path_index.get(&resolved_path) {
243                let package_uid = package.package_uid.clone();
244                if !files[file_idx].for_packages.contains(&package_uid) {
245                    files[file_idx].for_packages.push(package_uid);
246                }
247            } else {
248                missing_refs.push(file_ref.path.clone());
249            }
250        }
251    }
252
253    record_missing_file_references(package, missing_refs);
254}
255
256fn resolve_attached_manifest_file_references(
257    files: &mut [FileInfo],
258    path_index: &HashMap<String, usize>,
259    package: &mut Package,
260    datasource_id: DatasourceId,
261) {
262    let Some((datafile_path, file_references)) =
263        find_attached_manifest_file_references(files, package, datasource_id)
264    else {
265        return;
266    };
267
268    let root = Path::new(datafile_path)
269        .parent()
270        .map(|p| p.to_string_lossy().to_string())
271        .unwrap_or_default();
272
273    let mut missing_refs = Vec::new();
274    for file_ref in &file_references {
275        let resolved_path = if root.is_empty() {
276            file_ref.path.clone()
277        } else {
278            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
279        };
280
281        if let Some(&file_idx) = path_index.get(&resolved_path) {
282            let package_uid = package.package_uid.clone();
283            if !files[file_idx].for_packages.contains(&package_uid) {
284                files[file_idx].for_packages.push(package_uid);
285            }
286        } else {
287            missing_refs.push(file_ref.path.clone());
288        }
289    }
290
291    record_missing_file_references(package, missing_refs);
292}
293
294fn resolve_conda_file_references(
295    files: &mut [FileInfo],
296    path_index: &HashMap<String, usize>,
297    package: &mut Package,
298) {
299    let Some(conda_meta_path) = package
300        .datafile_paths
301        .iter()
302        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
303    else {
304        return;
305    };
306    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
307        return;
308    };
309
310    let file_references = collect_file_references(
311        files,
312        path_index,
313        conda_meta_path,
314        &package.datasource_ids,
315        &[DatasourceId::CondaMetaJson],
316        package.purl.as_deref(),
317    );
318
319    let mut missing_refs = Vec::new();
320    for file_ref in &file_references {
321        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
322        if let Some(&file_idx) = path_index.get(&resolved_path) {
323            let package_uid = package.package_uid.clone();
324            if !files[file_idx].for_packages.contains(&package_uid) {
325                files[file_idx].for_packages.push(package_uid);
326            }
327        } else {
328            missing_refs.push(file_ref.path.clone());
329        }
330    }
331
332    record_missing_file_references(package, missing_refs);
333}
334
335fn resolve_installed_db_file_references(
336    files: &mut [FileInfo],
337    path_index: &HashMap<String, usize>,
338    package: &mut Package,
339    dependencies: &mut [TopLevelDependency],
340) {
341    let Some(config) = find_db_config(package) else {
342        return;
343    };
344    let Some(datafile_path) = package.datafile_paths.first() else {
345        return;
346    };
347
348    let root = compute_root(datafile_path, config.path_suffix);
349
350    let mut file_references = collect_file_references(
351        files,
352        path_index,
353        datafile_path,
354        &package.datasource_ids,
355        config.datasource_ids,
356        package.purl.as_deref(),
357    );
358
359    if is_debian_installed_package(package) {
360        merge_file_references(
361            &mut file_references,
362            collect_debian_installed_file_references(files, package),
363        );
364    }
365
366    let mut missing_refs = Vec::new();
367    for file_ref in &file_references {
368        let ref_path = file_ref.path.trim_start_matches('/');
369        let resolved_path = if root.is_empty() {
370            ref_path.to_string()
371        } else {
372            format!("{}{}", root, ref_path)
373        };
374
375        if let Some(&file_idx) = path_index.get(&resolved_path) {
376            let package_uid = package.package_uid.clone();
377            if !files[file_idx].for_packages.contains(&package_uid) {
378                files[file_idx].for_packages.push(package_uid);
379            }
380        } else {
381            missing_refs.push(file_ref.path.clone());
382        }
383    }
384
385    record_missing_file_references(package, missing_refs);
386
387    if is_rpm_package(package)
388        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
389    {
390        apply_rpm_namespace(files, package, dependencies, &namespace);
391    }
392}
393
394fn resolve_debian_extracted_deb_file_references(
395    files: &mut [FileInfo],
396    path_index: &HashMap<String, usize>,
397    package: &mut Package,
398) {
399    let Some(datafile_path) = package
400        .datafile_paths
401        .iter()
402        .find(|path| path.ends_with("/md5sums"))
403    else {
404        return;
405    };
406
407    let Some(md5sums_parent) = Path::new(datafile_path).parent() else {
408        return;
409    };
410    let Some(extracted_root) = md5sums_parent.parent() else {
411        return;
412    };
413    let root = extracted_root.to_string_lossy().to_string();
414
415    let Some(&file_idx) = path_index.get(datafile_path) else {
416        return;
417    };
418    let file_references: Vec<_> = files[file_idx]
419        .package_data
420        .iter()
421        .filter(|pkg_data| {
422            pkg_data.datasource_id == Some(DatasourceId::DebianMd5SumsInExtractedDeb)
423        })
424        .flat_map(|pkg_data| pkg_data.file_references.clone())
425        .collect();
426
427    let mut missing_refs = Vec::new();
428    for file_ref in &file_references {
429        let resolved_path = if root.is_empty() {
430            file_ref.path.trim_start_matches('/').to_string()
431        } else {
432            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
433        };
434
435        if let Some(&file_idx) = path_index.get(&resolved_path) {
436            let package_uid = package.package_uid.clone();
437            if !files[file_idx].for_packages.contains(&package_uid) {
438                files[file_idx].for_packages.push(package_uid);
439            }
440        } else {
441            missing_refs.push(file_ref.path.clone());
442        }
443    }
444
445    record_missing_file_references(package, missing_refs);
446}
447
448fn resolve_python_metadata_file_references(
449    files: &mut [FileInfo],
450    path_index: &HashMap<String, usize>,
451    package: &mut Package,
452) {
453    let Some(python_resolution) = find_python_metadata_root(package) else {
454        return;
455    };
456    let Some(datafile_path) = package
457        .datafile_paths
458        .iter()
459        .find(|path| is_python_metadata_layout(path))
460    else {
461        return;
462    };
463
464    let file_references = collect_file_references(
465        files,
466        path_index,
467        datafile_path,
468        &package.datasource_ids,
469        PYTHON_METADATA_DATASOURCE_IDS,
470        package.purl.as_deref(),
471    );
472
473    let mut missing_refs = Vec::new();
474    for file_ref in &file_references {
475        let Some(resolved_path) = normalize_relative_path(
476            &python_resolution.base_path,
477            &python_resolution.allowed_root,
478            &file_ref.path,
479        ) else {
480            missing_refs.push(file_ref.path.clone());
481            continue;
482        };
483
484        if let Some(&file_idx) = path_index.get(&resolved_path) {
485            let package_uid = package.package_uid.clone();
486            if !files[file_idx].for_packages.contains(&package_uid) {
487                files[file_idx].for_packages.push(package_uid);
488            }
489        } else {
490            missing_refs.push(file_ref.path.clone());
491        }
492    }
493
494    record_missing_file_references(package, missing_refs);
495}
496
497fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
498    if missing_refs.is_empty() {
499        return;
500    }
501
502    missing_refs.sort();
503    let missing_refs_json: Vec<serde_json::Value> = missing_refs
504        .into_iter()
505        .map(|path| serde_json::json!({"path": path}))
506        .collect();
507
508    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
509    extra_data.insert(
510        "missing_file_references".to_string(),
511        serde_json::Value::Array(missing_refs_json),
512    );
513}
514
515fn find_file_reference_resolver(
516    files: &[FileInfo],
517    package: &Package,
518) -> Option<&'static FileReferenceResolverConfig> {
519    FILE_REFERENCE_RESOLVER_CONFIGS
520        .iter()
521        .find(|config| match config.kind {
522            FileReferenceResolverKind::AttachedManifest => {
523                config.datasource_ids.iter().any(|datasource_id| {
524                    files.iter().any(|file| {
525                        file.for_packages.contains(&package.package_uid)
526                            && file
527                                .package_data
528                                .iter()
529                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
530                    })
531                })
532            }
533            _ => config
534                .datasource_ids
535                .iter()
536                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
537        })
538}
539
540fn is_python_metadata_layout(path: &str) -> bool {
541    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
542}
543
544fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
545    let datafile_path = package
546        .datafile_paths
547        .iter()
548        .find(|path| is_python_metadata_layout(path))?;
549
550    if !package
551        .datasource_ids
552        .iter()
553        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
554    {
555        return None;
556    }
557
558    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
559        if let Some(idx) = datafile_path.rfind(segment) {
560            if datafile_path.ends_with("/METADATA") {
561                let root_end = idx + segment.len();
562                let root = datafile_path[..root_end].to_string();
563                return Some(PythonMetadataResolution {
564                    base_path: root.clone(),
565                    allowed_root: root,
566                });
567            }
568
569            if datafile_path.ends_with("/PKG-INFO") {
570                let parent = Path::new(datafile_path).parent()?;
571                let allowed_root = datafile_path[..idx + segment.len()].to_string();
572                return Some(PythonMetadataResolution {
573                    base_path: parent.to_string_lossy().to_string(),
574                    allowed_root,
575                });
576            }
577        }
578    }
579
580    if datafile_path.ends_with(".egg-info/PKG-INFO") {
581        let metadata_parent = Path::new(datafile_path).parent()?;
582        let project_root = metadata_parent.parent()?;
583        let project_root = project_root.to_string_lossy().to_string();
584        return Some(PythonMetadataResolution {
585            base_path: project_root.clone(),
586            allowed_root: project_root,
587        });
588    }
589
590    None
591}
592
593fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
594    let joined = Path::new(base).join(relative.trim_start_matches('/'));
595    let mut normalized = Path::new("").to_path_buf();
596
597    for component in joined.components() {
598        match component {
599            std::path::Component::CurDir => {}
600            std::path::Component::ParentDir => {
601                normalized.pop();
602            }
603            _ => normalized.push(component.as_os_str()),
604        }
605    }
606
607    let normalized_str = normalized.to_string_lossy().to_string();
608    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
609        Some(normalized_str)
610    } else {
611        None
612    }
613}
614
615fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
616    let path = datafile_path?;
617    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
618    Some(path[..idx].to_string())
619}
620
621pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
622    let yumdb_indices: Vec<usize> = packages
623        .iter()
624        .enumerate()
625        .filter_map(|(idx, package)| {
626            package
627                .datasource_ids
628                .contains(&DatasourceId::RpmYumdb)
629                .then_some(idx)
630        })
631        .collect();
632    let mut removal_indices = Vec::new();
633
634    for yumdb_idx in yumdb_indices {
635        let yumdb_package = packages[yumdb_idx].clone();
636        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
637            continue;
638        };
639        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
640        let yumdb_arch = yumdb_package
641            .qualifiers
642            .as_ref()
643            .and_then(|qualifiers| qualifiers.get("arch"));
644
645        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
646            if idx == yumdb_idx || !is_rpm_package(package) {
647                return None;
648            }
649
650            let config = find_db_config(package)?;
651            let datafile_path = package.datafile_paths.first()?;
652            let target_root = compute_root(datafile_path, config.path_suffix);
653            let target_arch = package
654                .qualifiers
655                .as_ref()
656                .and_then(|qualifiers| qualifiers.get("arch"));
657
658            (target_root == yumdb_root
659                && package.name == yumdb_package.name
660                && package.version == yumdb_package.version
661                && target_arch == yumdb_arch)
662                .then_some(idx)
663        }) else {
664            continue;
665        };
666
667        let target_package_uid = packages[target_idx].package_uid.clone();
668        {
669            let target = &mut packages[target_idx];
670            target
671                .datafile_paths
672                .extend(yumdb_package.datafile_paths.clone());
673            target
674                .datasource_ids
675                .extend(yumdb_package.datasource_ids.clone());
676
677            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
678                && !yumdb_extra.is_empty()
679            {
680                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
681                let mut merged_yumdb = extra_data
682                    .get("yumdb")
683                    .and_then(|value| value.as_object().cloned())
684                    .unwrap_or_default();
685                for (key, value) in yumdb_extra {
686                    merged_yumdb.insert(key, value);
687                }
688                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
689            }
690        }
691
692        for file in files.iter_mut() {
693            for package_uid in &mut file.for_packages {
694                if *package_uid == yumdb_package.package_uid {
695                    *package_uid = target_package_uid.clone();
696                }
697            }
698        }
699
700        removal_indices.push(yumdb_idx);
701    }
702
703    removal_indices.sort_unstable();
704    removal_indices.dedup();
705    for idx in removal_indices.into_iter().rev() {
706        packages.remove(idx);
707    }
708}
709
710fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
711    files
712        .iter()
713        .enumerate()
714        .map(|(idx, file)| (file.path.clone(), idx))
715        .collect()
716}
717
718fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
719    let datafile_paths = &package.datafile_paths;
720
721    for config in DB_PATH_CONFIGS {
722        if !datafile_paths.is_empty()
723            && !datafile_paths
724                .iter()
725                .any(|path| path.ends_with(config.path_suffix))
726        {
727            continue;
728        }
729
730        for &config_dsid in config.datasource_ids {
731            for &pkg_dsid in &package.datasource_ids {
732                if config_dsid == pkg_dsid {
733                    return Some(config);
734                }
735            }
736        }
737    }
738
739    for config in DB_PATH_CONFIGS {
740        for &config_dsid in config.datasource_ids {
741            for &pkg_dsid in &package.datasource_ids {
742                if config_dsid == pkg_dsid {
743                    return Some(config);
744                }
745            }
746        }
747    }
748
749    None
750}
751
752fn compute_root(datafile_path: &str, suffix: &str) -> String {
753    if let Some(pos) = datafile_path.rfind(suffix) {
754        let root = &datafile_path[..pos];
755        if root.is_empty() {
756            String::new()
757        } else {
758            root.to_string()
759        }
760    } else {
761        String::new()
762    }
763}
764
765fn collect_file_references(
766    files: &[FileInfo],
767    path_index: &HashMap<String, usize>,
768    datafile_path: &str,
769    package_datasource_ids: &[DatasourceId],
770    config_datasource_ids: &[DatasourceId],
771    package_purl: Option<&str>,
772) -> Vec<crate::models::FileReference> {
773    let file_idx = match path_index.get(datafile_path) {
774        Some(&idx) => idx,
775        None => return Vec::new(),
776    };
777
778    let file = &files[file_idx];
779    let mut refs = Vec::new();
780
781    for pkg_data in &file.package_data {
782        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
783            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
784        });
785
786        if !dsid_matches {
787            continue;
788        }
789
790        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
791            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
792            _ => true,
793        };
794
795        if purl_matches {
796            refs.extend(pkg_data.file_references.clone());
797        }
798    }
799
800    refs
801}
802
803fn is_rpm_package(package: &Package) -> bool {
804    for &dsid in &package.datasource_ids {
805        for &rpm_dsid in RPM_DATASOURCE_IDS {
806            if rpm_dsid == dsid {
807                return true;
808            }
809        }
810    }
811    false
812}
813
814fn is_debian_installed_package(package: &Package) -> bool {
815    package
816        .datasource_ids
817        .contains(&DatasourceId::DebianInstalledStatusDb)
818        || package
819            .datasource_ids
820            .contains(&DatasourceId::DebianDistrolessInstalledDb)
821}
822
823fn collect_debian_installed_file_references(
824    files: &[FileInfo],
825    package: &Package,
826) -> Vec<crate::models::FileReference> {
827    let mut refs = Vec::new();
828
829    for file in files {
830        for pkg_data in &file.package_data {
831            let Some(dsid) = pkg_data.datasource_id else {
832                continue;
833            };
834            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
835                continue;
836            }
837
838            if pkg_data.name != package.name {
839                continue;
840            }
841            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
842                continue;
843            }
844            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
845                continue;
846            }
847
848            merge_file_references(&mut refs, pkg_data.file_references.clone());
849        }
850    }
851
852    refs
853}
854
855fn find_attached_manifest_file_references<'a>(
856    files: &'a [FileInfo],
857    package: &Package,
858    datasource_id: DatasourceId,
859) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
860    for file in files {
861        if !file.for_packages.contains(&package.package_uid) {
862            continue;
863        }
864
865        for pkg_data in &file.package_data {
866            if pkg_data.datasource_id == Some(datasource_id) {
867                return Some((&file.path, pkg_data.file_references.clone()));
868            }
869        }
870    }
871
872    None
873}
874
875fn debian_installed_namespace_matches(
876    supplemental_namespace: &Option<String>,
877    package_namespace: &Option<String>,
878) -> bool {
879    match (
880        supplemental_namespace.as_deref(),
881        package_namespace.as_deref(),
882    ) {
883        (None, _) => true,
884        (Some("debian"), Some("ubuntu")) => true,
885        (Some(left), Some(right)) => left == right,
886        (Some(_), None) => true,
887    }
888}
889
890fn debian_installed_arch_matches(
891    supplemental_qualifiers: &Option<HashMap<String, String>>,
892    package_qualifiers: &Option<HashMap<String, String>>,
893) -> bool {
894    let supplemental_arch = supplemental_qualifiers
895        .as_ref()
896        .and_then(|qualifiers| qualifiers.get("arch"));
897    let package_arch = package_qualifiers
898        .as_ref()
899        .and_then(|qualifiers| qualifiers.get("arch"));
900
901    match (supplemental_arch, package_arch) {
902        (Some(left), Some(right)) => left == right,
903        (Some(_), None) => false,
904        _ => true,
905    }
906}
907
908fn merge_file_references(
909    target: &mut Vec<crate::models::FileReference>,
910    incoming: Vec<crate::models::FileReference>,
911) {
912    for file_ref in incoming {
913        if let Some(existing) = target
914            .iter_mut()
915            .find(|existing| existing.path == file_ref.path)
916        {
917            if existing.size.is_none() {
918                existing.size = file_ref.size;
919            }
920            if existing.sha1.is_none() {
921                existing.sha1 = file_ref.sha1;
922            }
923            if existing.md5.is_none() {
924                existing.md5 = file_ref.md5;
925            }
926            if existing.sha256.is_none() {
927                existing.sha256 = file_ref.sha256;
928            }
929            if existing.sha512.is_none() {
930                existing.sha512 = file_ref.sha512;
931            }
932            if existing.extra_data.is_none() {
933                existing.extra_data = file_ref.extra_data.clone();
934            }
935        } else {
936            target.push(file_ref);
937        }
938    }
939}
940
941fn resolve_rpm_namespace(
942    files: &[FileInfo],
943    path_index: &HashMap<String, usize>,
944    root: &str,
945) -> Option<String> {
946    let os_release_paths = [
947        format!("{}etc/os-release", root),
948        format!("{}usr/lib/os-release", root),
949    ];
950
951    for os_release_path in &os_release_paths {
952        if let Some(&file_idx) = path_index.get(os_release_path) {
953            let file = &files[file_idx];
954            for pkg_data in &file.package_data {
955                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
956                    && let Some(namespace) = &pkg_data.namespace
957                {
958                    return Some(namespace.clone());
959                }
960            }
961        }
962    }
963
964    None
965}
966
967fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
968    let parsed = PackageUrl::from_str(existing_purl).ok()?;
969    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
970
971    updated.with_namespace(namespace).ok()?;
972
973    if let Some(version) = parsed.version() {
974        updated.with_version(version).ok()?;
975    }
976
977    if let Some(subpath) = parsed.subpath() {
978        updated.with_subpath(subpath).ok()?;
979    }
980
981    for (key, value) in parsed.qualifiers() {
982        updated
983            .add_qualifier(key.to_string(), value.to_string())
984            .ok()?;
985    }
986
987    Some(updated.to_string())
988}
989
990fn apply_rpm_namespace(
991    files: &mut [FileInfo],
992    package: &mut Package,
993    dependencies: &mut [TopLevelDependency],
994    namespace: &str,
995) {
996    let old_package_uid = package.package_uid.clone();
997
998    package.namespace = Some(namespace.to_string());
999
1000    if let Some(current_purl) = package.purl.as_deref()
1001        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1002    {
1003        package.purl = Some(updated_purl.clone());
1004        package.package_uid = old_package_uid.replace_base(&updated_purl);
1005    }
1006
1007    for file in files.iter_mut() {
1008        for package_uid in &mut file.for_packages {
1009            if *package_uid == old_package_uid {
1010                *package_uid = package.package_uid.clone();
1011            }
1012        }
1013    }
1014
1015    for dep in dependencies.iter_mut() {
1016        if dep.for_package_uid.as_ref() == Some(&old_package_uid) {
1017            dep.for_package_uid = Some(package.package_uid.clone());
1018        }
1019
1020        if dep.for_package_uid.as_ref() == Some(&package.package_uid) {
1021            dep.namespace = Some(namespace.to_string());
1022
1023            if let Some(current_purl) = dep.purl.as_deref()
1024                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
1025            {
1026                dep.purl = Some(updated_purl.clone());
1027                dep.dependency_uid = dep.dependency_uid.replace_base(&updated_purl);
1028            }
1029        }
1030    }
1031}
1032
1033#[cfg(test)]
1034#[path = "file_ref_resolve_test.rs"]
1035mod tests;