Skip to main content

provenant/assembly/
file_ref_resolve.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::str::FromStr;
4
5use crate::models::{DatasourceId, FileInfo, Package, TopLevelDependency};
6use packageurl::PackageUrl;
7use strum::EnumIter;
8
9struct DbPathConfig {
10    datasource_ids: &'static [DatasourceId],
11    path_suffix: &'static str,
12}
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumIter)]
15enum FileReferenceResolverKind {
16    About,
17    AttachedManifest,
18    CondaMeta,
19    InstalledDb,
20    PythonMetadata,
21    RelativeToDatafileParent,
22}
23
24struct FileReferenceResolverConfig {
25    datasource_ids: &'static [DatasourceId],
26    kind: FileReferenceResolverKind,
27}
28
29const DB_PATH_CONFIGS: &[DbPathConfig] = &[
30    DbPathConfig {
31        datasource_ids: &[DatasourceId::AlpineInstalledDb],
32        path_suffix: "lib/apk/db/installed",
33    },
34    DbPathConfig {
35        datasource_ids: &[DatasourceId::RpmInstalledDatabaseBdb],
36        path_suffix: "var/lib/rpm/Packages",
37    },
38    DbPathConfig {
39        datasource_ids: &[DatasourceId::RpmInstalledDatabaseNdb],
40        path_suffix: "usr/lib/sysimage/rpm/Packages.db",
41    },
42    DbPathConfig {
43        datasource_ids: &[DatasourceId::RpmInstalledDatabaseSqlite],
44        path_suffix: "usr/lib/sysimage/rpm/rpmdb.sqlite",
45    },
46    DbPathConfig {
47        datasource_ids: &[DatasourceId::DebianInstalledStatusDb],
48        path_suffix: "var/lib/dpkg/status",
49    },
50    DbPathConfig {
51        datasource_ids: &[DatasourceId::DebianDistrolessInstalledDb],
52        path_suffix: "var/lib/dpkg/status.d/",
53    },
54];
55
56const RPM_DATASOURCE_IDS: &[DatasourceId] = &[
57    DatasourceId::RpmInstalledDatabaseBdb,
58    DatasourceId::RpmInstalledDatabaseNdb,
59    DatasourceId::RpmInstalledDatabaseSqlite,
60];
61const RPM_YUMDB_PATH_SUFFIX: &str = "var/lib/yum/yumdb/";
62const CONDA_META_PATH_SEGMENT: &str = "conda-meta/";
63const PYTHON_METADATA_DATASOURCE_IDS: &[DatasourceId] = &[
64    DatasourceId::PypiWheelMetadata,
65    DatasourceId::PypiSdistPkginfo,
66];
67const PYTHON_SITE_PACKAGES_SEGMENTS: &[&str] = &["site-packages/", "dist-packages/"];
68const DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS: &[DatasourceId] = &[
69    DatasourceId::DebianInstalledFilesList,
70    DatasourceId::DebianInstalledMd5Sums,
71];
72
73const INSTALLED_DB_DATASOURCE_IDS: &[DatasourceId] = &[
74    DatasourceId::AlpineInstalledDb,
75    DatasourceId::RpmInstalledDatabaseBdb,
76    DatasourceId::RpmInstalledDatabaseNdb,
77    DatasourceId::RpmInstalledDatabaseSqlite,
78    DatasourceId::DebianInstalledStatusDb,
79    DatasourceId::DebianDistrolessInstalledDb,
80];
81
82const FILE_REFERENCE_RESOLVER_CONFIGS: &[FileReferenceResolverConfig] = &[
83    FileReferenceResolverConfig {
84        datasource_ids: &[DatasourceId::AboutFile],
85        kind: FileReferenceResolverKind::About,
86    },
87    FileReferenceResolverConfig {
88        datasource_ids: &[DatasourceId::CpanManifest],
89        kind: FileReferenceResolverKind::AttachedManifest,
90    },
91    FileReferenceResolverConfig {
92        datasource_ids: &[DatasourceId::CondaMetaJson],
93        kind: FileReferenceResolverKind::CondaMeta,
94    },
95    FileReferenceResolverConfig {
96        datasource_ids: INSTALLED_DB_DATASOURCE_IDS,
97        kind: FileReferenceResolverKind::InstalledDb,
98    },
99    FileReferenceResolverConfig {
100        datasource_ids: PYTHON_METADATA_DATASOURCE_IDS,
101        kind: FileReferenceResolverKind::PythonMetadata,
102    },
103    FileReferenceResolverConfig {
104        datasource_ids: &[DatasourceId::GradleModule],
105        kind: FileReferenceResolverKind::RelativeToDatafileParent,
106    },
107];
108
109struct PythonMetadataResolution {
110    base_path: String,
111    allowed_root: String,
112}
113
114pub fn resolve_file_references(
115    files: &mut [FileInfo],
116    packages: &mut [Package],
117    dependencies: &mut [TopLevelDependency],
118) {
119    let path_index = build_path_index(&*files);
120
121    for package in packages.iter_mut() {
122        let Some(config) = find_file_reference_resolver(files, package) else {
123            continue;
124        };
125
126        match config.kind {
127            FileReferenceResolverKind::About
128            | FileReferenceResolverKind::RelativeToDatafileParent => {
129                resolve_relative_to_datafile_parent(
130                    files,
131                    &path_index,
132                    package,
133                    config.datasource_ids,
134                );
135            }
136            FileReferenceResolverKind::AttachedManifest => {
137                resolve_attached_manifest_file_references(
138                    files,
139                    &path_index,
140                    package,
141                    config.datasource_ids[0],
142                );
143            }
144            FileReferenceResolverKind::CondaMeta => {
145                resolve_conda_file_references(files, &path_index, package);
146            }
147            FileReferenceResolverKind::InstalledDb => {
148                resolve_installed_db_file_references(files, &path_index, package, dependencies);
149            }
150            FileReferenceResolverKind::PythonMetadata => {
151                resolve_python_metadata_file_references(files, &path_index, package);
152            }
153        }
154    }
155}
156
157fn resolve_relative_to_datafile_parent(
158    files: &mut [FileInfo],
159    path_index: &HashMap<String, usize>,
160    package: &mut Package,
161    datasource_ids: &[DatasourceId],
162) {
163    let Some(datafile_path) = package.datafile_paths.first() else {
164        return;
165    };
166    let root = Path::new(datafile_path)
167        .parent()
168        .map(|p| p.to_string_lossy().to_string())
169        .unwrap_or_default();
170
171    let file_references = collect_file_references(
172        files,
173        path_index,
174        datafile_path,
175        &package.datasource_ids,
176        datasource_ids,
177        package.purl.as_deref(),
178    );
179
180    let mut missing_refs = Vec::new();
181    for file_ref in &file_references {
182        let resolved_path = if root.is_empty() {
183            file_ref.path.clone()
184        } else {
185            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
186        };
187        if let Some(&file_idx) = path_index.get(&resolved_path) {
188            let package_uid = package.package_uid.clone();
189            if !files[file_idx].for_packages.contains(&package_uid) {
190                files[file_idx].for_packages.push(package_uid);
191            }
192        } else {
193            missing_refs.push(file_ref.path.clone());
194        }
195    }
196
197    record_missing_file_references(package, missing_refs);
198}
199
200fn resolve_attached_manifest_file_references(
201    files: &mut [FileInfo],
202    path_index: &HashMap<String, usize>,
203    package: &mut Package,
204    datasource_id: DatasourceId,
205) {
206    let Some((datafile_path, file_references)) =
207        find_attached_manifest_file_references(files, package, datasource_id)
208    else {
209        return;
210    };
211
212    let root = Path::new(datafile_path)
213        .parent()
214        .map(|p| p.to_string_lossy().to_string())
215        .unwrap_or_default();
216
217    let mut missing_refs = Vec::new();
218    for file_ref in &file_references {
219        let resolved_path = if root.is_empty() {
220            file_ref.path.clone()
221        } else {
222            format!("{}/{}", root, file_ref.path.trim_start_matches('/'))
223        };
224
225        if let Some(&file_idx) = path_index.get(&resolved_path) {
226            let package_uid = package.package_uid.clone();
227            if !files[file_idx].for_packages.contains(&package_uid) {
228                files[file_idx].for_packages.push(package_uid);
229            }
230        } else {
231            missing_refs.push(file_ref.path.clone());
232        }
233    }
234
235    record_missing_file_references(package, missing_refs);
236}
237
238fn resolve_conda_file_references(
239    files: &mut [FileInfo],
240    path_index: &HashMap<String, usize>,
241    package: &mut Package,
242) {
243    let Some(conda_meta_path) = package
244        .datafile_paths
245        .iter()
246        .find(|path| path.contains(CONDA_META_PATH_SEGMENT))
247    else {
248        return;
249    };
250    let Some(root) = compute_conda_root(Some(conda_meta_path.as_str())) else {
251        return;
252    };
253
254    let file_references = collect_file_references(
255        files,
256        path_index,
257        conda_meta_path,
258        &package.datasource_ids,
259        &[DatasourceId::CondaMetaJson],
260        package.purl.as_deref(),
261    );
262
263    let mut missing_refs = Vec::new();
264    for file_ref in &file_references {
265        let resolved_path = format!("{}{}", root, file_ref.path.trim_start_matches('/'));
266        if let Some(&file_idx) = path_index.get(&resolved_path) {
267            let package_uid = package.package_uid.clone();
268            if !files[file_idx].for_packages.contains(&package_uid) {
269                files[file_idx].for_packages.push(package_uid);
270            }
271        } else {
272            missing_refs.push(file_ref.path.clone());
273        }
274    }
275
276    record_missing_file_references(package, missing_refs);
277}
278
279fn resolve_installed_db_file_references(
280    files: &mut [FileInfo],
281    path_index: &HashMap<String, usize>,
282    package: &mut Package,
283    dependencies: &mut [TopLevelDependency],
284) {
285    let Some(config) = find_db_config(package) else {
286        return;
287    };
288    let Some(datafile_path) = package.datafile_paths.first() else {
289        return;
290    };
291
292    let root = compute_root(datafile_path, config.path_suffix);
293
294    let mut file_references = collect_file_references(
295        files,
296        path_index,
297        datafile_path,
298        &package.datasource_ids,
299        config.datasource_ids,
300        package.purl.as_deref(),
301    );
302
303    if is_debian_installed_package(package) {
304        merge_file_references(
305            &mut file_references,
306            collect_debian_installed_file_references(files, package),
307        );
308    }
309
310    let mut missing_refs = Vec::new();
311    for file_ref in &file_references {
312        let ref_path = file_ref.path.trim_start_matches('/');
313        let resolved_path = if root.is_empty() {
314            ref_path.to_string()
315        } else {
316            format!("{}{}", root, ref_path)
317        };
318
319        if let Some(&file_idx) = path_index.get(&resolved_path) {
320            let package_uid = package.package_uid.clone();
321            if !files[file_idx].for_packages.contains(&package_uid) {
322                files[file_idx].for_packages.push(package_uid);
323            }
324        } else {
325            missing_refs.push(file_ref.path.clone());
326        }
327    }
328
329    record_missing_file_references(package, missing_refs);
330
331    if is_rpm_package(package)
332        && let Some(namespace) = resolve_rpm_namespace(files, path_index, &root)
333    {
334        apply_rpm_namespace(files, package, dependencies, &namespace);
335    }
336}
337
338fn resolve_python_metadata_file_references(
339    files: &mut [FileInfo],
340    path_index: &HashMap<String, usize>,
341    package: &mut Package,
342) {
343    let Some(python_resolution) = find_python_metadata_root(package) else {
344        return;
345    };
346    let Some(datafile_path) = package
347        .datafile_paths
348        .iter()
349        .find(|path| is_python_metadata_layout(path))
350    else {
351        return;
352    };
353
354    let file_references = collect_file_references(
355        files,
356        path_index,
357        datafile_path,
358        &package.datasource_ids,
359        PYTHON_METADATA_DATASOURCE_IDS,
360        package.purl.as_deref(),
361    );
362
363    let mut missing_refs = Vec::new();
364    for file_ref in &file_references {
365        let Some(resolved_path) = normalize_relative_path(
366            &python_resolution.base_path,
367            &python_resolution.allowed_root,
368            &file_ref.path,
369        ) else {
370            missing_refs.push(file_ref.path.clone());
371            continue;
372        };
373
374        if let Some(&file_idx) = path_index.get(&resolved_path) {
375            let package_uid = package.package_uid.clone();
376            if !files[file_idx].for_packages.contains(&package_uid) {
377                files[file_idx].for_packages.push(package_uid);
378            }
379        } else {
380            missing_refs.push(file_ref.path.clone());
381        }
382    }
383
384    record_missing_file_references(package, missing_refs);
385}
386
387fn record_missing_file_references(package: &mut Package, mut missing_refs: Vec<String>) {
388    if missing_refs.is_empty() {
389        return;
390    }
391
392    missing_refs.sort();
393    let missing_refs_json: Vec<serde_json::Value> = missing_refs
394        .into_iter()
395        .map(|path| serde_json::json!({"path": path}))
396        .collect();
397
398    let extra_data = package.extra_data.get_or_insert_with(HashMap::new);
399    extra_data.insert(
400        "missing_file_references".to_string(),
401        serde_json::Value::Array(missing_refs_json),
402    );
403}
404
405fn find_file_reference_resolver(
406    files: &[FileInfo],
407    package: &Package,
408) -> Option<&'static FileReferenceResolverConfig> {
409    FILE_REFERENCE_RESOLVER_CONFIGS
410        .iter()
411        .find(|config| match config.kind {
412            FileReferenceResolverKind::AttachedManifest => {
413                config.datasource_ids.iter().any(|datasource_id| {
414                    files.iter().any(|file| {
415                        file.for_packages.contains(&package.package_uid)
416                            && file
417                                .package_data
418                                .iter()
419                                .any(|pkg_data| pkg_data.datasource_id == Some(*datasource_id))
420                    })
421                })
422            }
423            _ => config
424                .datasource_ids
425                .iter()
426                .any(|datasource_id| package.datasource_ids.contains(datasource_id)),
427        })
428}
429
430fn is_python_metadata_layout(path: &str) -> bool {
431    path.ends_with("/METADATA") || path.ends_with("/PKG-INFO")
432}
433
434fn find_python_metadata_root(package: &Package) -> Option<PythonMetadataResolution> {
435    let datafile_path = package
436        .datafile_paths
437        .iter()
438        .find(|path| is_python_metadata_layout(path))?;
439
440    if !package
441        .datasource_ids
442        .iter()
443        .any(|datasource_id| PYTHON_METADATA_DATASOURCE_IDS.contains(datasource_id))
444    {
445        return None;
446    }
447
448    for segment in PYTHON_SITE_PACKAGES_SEGMENTS {
449        if let Some(idx) = datafile_path.rfind(segment) {
450            if datafile_path.ends_with("/METADATA") {
451                let root_end = idx + segment.len();
452                let root = datafile_path[..root_end].to_string();
453                return Some(PythonMetadataResolution {
454                    base_path: root.clone(),
455                    allowed_root: root,
456                });
457            }
458
459            if datafile_path.ends_with("/PKG-INFO") {
460                let parent = Path::new(datafile_path).parent()?;
461                let allowed_root = datafile_path[..idx + segment.len()].to_string();
462                return Some(PythonMetadataResolution {
463                    base_path: parent.to_string_lossy().to_string(),
464                    allowed_root,
465                });
466            }
467        }
468    }
469
470    if datafile_path.ends_with(".egg-info/PKG-INFO") {
471        let metadata_parent = Path::new(datafile_path).parent()?;
472        let project_root = metadata_parent.parent()?;
473        let project_root = project_root.to_string_lossy().to_string();
474        return Some(PythonMetadataResolution {
475            base_path: project_root.clone(),
476            allowed_root: project_root,
477        });
478    }
479
480    None
481}
482
483fn normalize_relative_path(base: &str, allowed_root: &str, relative: &str) -> Option<String> {
484    let joined = Path::new(base).join(relative.trim_start_matches('/'));
485    let mut normalized = Path::new("").to_path_buf();
486
487    for component in joined.components() {
488        match component {
489            std::path::Component::CurDir => {}
490            std::path::Component::ParentDir => {
491                normalized.pop();
492            }
493            _ => normalized.push(component.as_os_str()),
494        }
495    }
496
497    let normalized_str = normalized.to_string_lossy().to_string();
498    if Path::new(&normalized_str).starts_with(Path::new(allowed_root)) {
499        Some(normalized_str)
500    } else {
501        None
502    }
503}
504
505fn compute_conda_root(datafile_path: Option<&str>) -> Option<String> {
506    let path = datafile_path?;
507    let idx = path.rfind(CONDA_META_PATH_SEGMENT)?;
508    Some(path[..idx].to_string())
509}
510
511pub fn merge_rpm_yumdb_metadata(files: &mut [FileInfo], packages: &mut Vec<Package>) {
512    let yumdb_indices: Vec<usize> = packages
513        .iter()
514        .enumerate()
515        .filter_map(|(idx, package)| {
516            package
517                .datasource_ids
518                .contains(&DatasourceId::RpmYumdb)
519                .then_some(idx)
520        })
521        .collect();
522    let mut removal_indices = Vec::new();
523
524    for yumdb_idx in yumdb_indices {
525        let yumdb_package = packages[yumdb_idx].clone();
526        let Some(yumdb_path) = yumdb_package.datafile_paths.first() else {
527            continue;
528        };
529        let yumdb_root = compute_root(yumdb_path, RPM_YUMDB_PATH_SUFFIX);
530        let yumdb_arch = yumdb_package
531            .qualifiers
532            .as_ref()
533            .and_then(|qualifiers| qualifiers.get("arch"));
534
535        let Some(target_idx) = packages.iter().enumerate().find_map(|(idx, package)| {
536            if idx == yumdb_idx || !is_rpm_package(package) {
537                return None;
538            }
539
540            let config = find_db_config(package)?;
541            let datafile_path = package.datafile_paths.first()?;
542            let target_root = compute_root(datafile_path, config.path_suffix);
543            let target_arch = package
544                .qualifiers
545                .as_ref()
546                .and_then(|qualifiers| qualifiers.get("arch"));
547
548            (target_root == yumdb_root
549                && package.name == yumdb_package.name
550                && package.version == yumdb_package.version
551                && target_arch == yumdb_arch)
552                .then_some(idx)
553        }) else {
554            continue;
555        };
556
557        let target_package_uid = packages[target_idx].package_uid.clone();
558        {
559            let target = &mut packages[target_idx];
560            target
561                .datafile_paths
562                .extend(yumdb_package.datafile_paths.clone());
563            target
564                .datasource_ids
565                .extend(yumdb_package.datasource_ids.clone());
566
567            if let Some(yumdb_extra) = yumdb_package.extra_data.clone()
568                && !yumdb_extra.is_empty()
569            {
570                let extra_data = target.extra_data.get_or_insert_with(HashMap::new);
571                let mut merged_yumdb = extra_data
572                    .get("yumdb")
573                    .and_then(|value| value.as_object().cloned())
574                    .unwrap_or_default();
575                for (key, value) in yumdb_extra {
576                    merged_yumdb.insert(key, value);
577                }
578                extra_data.insert("yumdb".to_string(), serde_json::Value::Object(merged_yumdb));
579            }
580        }
581
582        for file in files.iter_mut() {
583            for package_uid in &mut file.for_packages {
584                if *package_uid == yumdb_package.package_uid {
585                    *package_uid = target_package_uid.clone();
586                }
587            }
588        }
589
590        removal_indices.push(yumdb_idx);
591    }
592
593    removal_indices.sort_unstable();
594    removal_indices.dedup();
595    for idx in removal_indices.into_iter().rev() {
596        packages.remove(idx);
597    }
598}
599
600fn build_path_index(files: &[FileInfo]) -> HashMap<String, usize> {
601    files
602        .iter()
603        .enumerate()
604        .map(|(idx, file)| (file.path.clone(), idx))
605        .collect()
606}
607
608fn find_db_config(package: &Package) -> Option<&'static DbPathConfig> {
609    for config in DB_PATH_CONFIGS {
610        for &config_dsid in config.datasource_ids {
611            for &pkg_dsid in &package.datasource_ids {
612                if config_dsid == pkg_dsid {
613                    return Some(config);
614                }
615            }
616        }
617    }
618    None
619}
620
621fn compute_root(datafile_path: &str, suffix: &str) -> String {
622    if let Some(pos) = datafile_path.rfind(suffix) {
623        let root = &datafile_path[..pos];
624        if root.is_empty() {
625            String::new()
626        } else {
627            root.to_string()
628        }
629    } else {
630        String::new()
631    }
632}
633
634fn collect_file_references(
635    files: &[FileInfo],
636    path_index: &HashMap<String, usize>,
637    datafile_path: &str,
638    package_datasource_ids: &[DatasourceId],
639    config_datasource_ids: &[DatasourceId],
640    package_purl: Option<&str>,
641) -> Vec<crate::models::FileReference> {
642    let file_idx = match path_index.get(datafile_path) {
643        Some(&idx) => idx,
644        None => return Vec::new(),
645    };
646
647    let file = &files[file_idx];
648    let mut refs = Vec::new();
649
650    for pkg_data in &file.package_data {
651        let dsid_matches = pkg_data.datasource_id.is_some_and(|dsid| {
652            package_datasource_ids.contains(&dsid) || config_datasource_ids.contains(&dsid)
653        });
654
655        if !dsid_matches {
656            continue;
657        }
658
659        let purl_matches = match (package_purl, pkg_data.purl.as_deref()) {
660            (Some(pkg_purl), Some(data_purl)) => pkg_purl == data_purl,
661            _ => true,
662        };
663
664        if purl_matches {
665            refs.extend(pkg_data.file_references.clone());
666        }
667    }
668
669    refs
670}
671
672fn is_rpm_package(package: &Package) -> bool {
673    for &dsid in &package.datasource_ids {
674        for &rpm_dsid in RPM_DATASOURCE_IDS {
675            if rpm_dsid == dsid {
676                return true;
677            }
678        }
679    }
680    false
681}
682
683fn is_debian_installed_package(package: &Package) -> bool {
684    package
685        .datasource_ids
686        .contains(&DatasourceId::DebianInstalledStatusDb)
687        || package
688            .datasource_ids
689            .contains(&DatasourceId::DebianDistrolessInstalledDb)
690}
691
692fn collect_debian_installed_file_references(
693    files: &[FileInfo],
694    package: &Package,
695) -> Vec<crate::models::FileReference> {
696    let mut refs = Vec::new();
697
698    for file in files {
699        for pkg_data in &file.package_data {
700            let Some(dsid) = pkg_data.datasource_id else {
701                continue;
702            };
703            if !DEBIAN_INSTALLED_SUPPLEMENTAL_DATASOURCE_IDS.contains(&dsid) {
704                continue;
705            }
706
707            if pkg_data.name != package.name {
708                continue;
709            }
710            if !debian_installed_namespace_matches(&pkg_data.namespace, &package.namespace) {
711                continue;
712            }
713            if !debian_installed_arch_matches(&pkg_data.qualifiers, &package.qualifiers) {
714                continue;
715            }
716
717            merge_file_references(&mut refs, pkg_data.file_references.clone());
718        }
719    }
720
721    refs
722}
723
724fn find_attached_manifest_file_references<'a>(
725    files: &'a [FileInfo],
726    package: &Package,
727    datasource_id: DatasourceId,
728) -> Option<(&'a str, Vec<crate::models::FileReference>)> {
729    for file in files {
730        if !file.for_packages.contains(&package.package_uid) {
731            continue;
732        }
733
734        for pkg_data in &file.package_data {
735            if pkg_data.datasource_id == Some(datasource_id) {
736                return Some((&file.path, pkg_data.file_references.clone()));
737            }
738        }
739    }
740
741    None
742}
743
744fn debian_installed_namespace_matches(
745    supplemental_namespace: &Option<String>,
746    package_namespace: &Option<String>,
747) -> bool {
748    match (
749        supplemental_namespace.as_deref(),
750        package_namespace.as_deref(),
751    ) {
752        (None, _) => true,
753        (Some("debian"), Some("ubuntu")) => true,
754        (Some(left), Some(right)) => left == right,
755        (Some(_), None) => true,
756    }
757}
758
759fn debian_installed_arch_matches(
760    supplemental_qualifiers: &Option<HashMap<String, String>>,
761    package_qualifiers: &Option<HashMap<String, String>>,
762) -> bool {
763    let supplemental_arch = supplemental_qualifiers
764        .as_ref()
765        .and_then(|qualifiers| qualifiers.get("arch"));
766    let package_arch = package_qualifiers
767        .as_ref()
768        .and_then(|qualifiers| qualifiers.get("arch"));
769
770    match (supplemental_arch, package_arch) {
771        (Some(left), Some(right)) => left == right,
772        (Some(_), None) => false,
773        _ => true,
774    }
775}
776
777fn merge_file_references(
778    target: &mut Vec<crate::models::FileReference>,
779    incoming: Vec<crate::models::FileReference>,
780) {
781    for file_ref in incoming {
782        if let Some(existing) = target
783            .iter_mut()
784            .find(|existing| existing.path == file_ref.path)
785        {
786            if existing.size.is_none() {
787                existing.size = file_ref.size;
788            }
789            if existing.sha1.is_none() {
790                existing.sha1 = file_ref.sha1.clone();
791            }
792            if existing.md5.is_none() {
793                existing.md5 = file_ref.md5.clone();
794            }
795            if existing.sha256.is_none() {
796                existing.sha256 = file_ref.sha256.clone();
797            }
798            if existing.sha512.is_none() {
799                existing.sha512 = file_ref.sha512.clone();
800            }
801            if existing.extra_data.is_none() {
802                existing.extra_data = file_ref.extra_data.clone();
803            }
804        } else {
805            target.push(file_ref);
806        }
807    }
808}
809
810fn resolve_rpm_namespace(
811    files: &[FileInfo],
812    path_index: &HashMap<String, usize>,
813    root: &str,
814) -> Option<String> {
815    let os_release_paths = [
816        format!("{}etc/os-release", root),
817        format!("{}usr/lib/os-release", root),
818    ];
819
820    for os_release_path in &os_release_paths {
821        if let Some(&file_idx) = path_index.get(os_release_path) {
822            let file = &files[file_idx];
823            for pkg_data in &file.package_data {
824                if pkg_data.datasource_id == Some(DatasourceId::EtcOsRelease)
825                    && let Some(namespace) = &pkg_data.namespace
826                {
827                    return Some(namespace.clone());
828                }
829            }
830        }
831    }
832
833    None
834}
835
836fn replace_uid_base(old_uid: &str, new_purl: &str) -> String {
837    if let Some((_, suffix)) = old_uid.split_once("?uuid=") {
838        return format!("{}?uuid={}", new_purl, suffix);
839    }
840
841    if let Some((_, suffix)) = old_uid.split_once("&uuid=") {
842        let separator = if new_purl.contains('?') { '&' } else { '?' };
843        return format!("{}{separator}uuid={suffix}", new_purl);
844    }
845
846    old_uid.to_string()
847}
848
849fn rewrite_purl_namespace(existing_purl: &str, namespace: &str) -> Option<String> {
850    let parsed = PackageUrl::from_str(existing_purl).ok()?;
851    let mut updated = PackageUrl::new(parsed.ty(), parsed.name()).ok()?;
852
853    updated.with_namespace(namespace).ok()?;
854
855    if let Some(version) = parsed.version() {
856        updated.with_version(version).ok()?;
857    }
858
859    if let Some(subpath) = parsed.subpath() {
860        updated.with_subpath(subpath).ok()?;
861    }
862
863    for (key, value) in parsed.qualifiers() {
864        updated
865            .add_qualifier(key.to_string(), value.to_string())
866            .ok()?;
867    }
868
869    Some(updated.to_string())
870}
871
872fn apply_rpm_namespace(
873    files: &mut [FileInfo],
874    package: &mut Package,
875    dependencies: &mut [TopLevelDependency],
876    namespace: &str,
877) {
878    let old_package_uid = package.package_uid.clone();
879
880    package.namespace = Some(namespace.to_string());
881
882    if let Some(current_purl) = package.purl.as_deref()
883        && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
884    {
885        package.purl = Some(updated_purl.clone());
886        package.package_uid = replace_uid_base(&old_package_uid, &updated_purl);
887    }
888
889    for file in files.iter_mut() {
890        for package_uid in &mut file.for_packages {
891            if *package_uid == old_package_uid {
892                *package_uid = package.package_uid.clone();
893            }
894        }
895    }
896
897    for dep in dependencies.iter_mut() {
898        if dep.for_package_uid.as_deref() == Some(old_package_uid.as_str()) {
899            dep.for_package_uid = Some(package.package_uid.clone());
900        }
901
902        if dep.for_package_uid.as_deref() == Some(package.package_uid.as_str()) {
903            dep.namespace = Some(namespace.to_string());
904
905            if let Some(current_purl) = dep.purl.as_deref()
906                && let Some(updated_purl) = rewrite_purl_namespace(current_purl, namespace)
907            {
908                dep.purl = Some(updated_purl.clone());
909                dep.dependency_uid = replace_uid_base(&dep.dependency_uid, &updated_purl);
910            }
911        }
912    }
913}
914
915#[cfg(test)]
916mod tests {
917    use super::*;
918    use crate::models::{FileReference, FileType, PackageData, PackageType};
919    use strum::IntoEnumIterator;
920
921    #[test]
922    fn test_every_file_reference_resolver_kind_is_registered_once() {
923        let registered: std::collections::HashSet<FileReferenceResolverKind> =
924            FILE_REFERENCE_RESOLVER_CONFIGS
925                .iter()
926                .map(|config| config.kind)
927                .collect();
928
929        let missing: Vec<_> = FileReferenceResolverKind::iter()
930            .filter(|kind| !registered.contains(kind))
931            .collect();
932
933        assert!(
934            missing.is_empty(),
935            "File-reference resolver kinds not registered: {missing:?}"
936        );
937
938        for kind in FileReferenceResolverKind::iter() {
939            let count = FILE_REFERENCE_RESOLVER_CONFIGS
940                .iter()
941                .filter(|config| config.kind == kind)
942                .count();
943            assert_eq!(
944                count, 1,
945                "File-reference resolver kind {kind:?} should be registered exactly once"
946            );
947        }
948    }
949
950    #[test]
951    fn test_file_reference_resolver_datasource_ids_are_unique() {
952        let mut seen = std::collections::HashSet::new();
953        let mut duplicates = Vec::new();
954
955        for config in FILE_REFERENCE_RESOLVER_CONFIGS {
956            for datasource_id in config.datasource_ids {
957                if !seen.insert(*datasource_id) {
958                    duplicates.push(*datasource_id);
959                }
960            }
961        }
962
963        assert!(
964            duplicates.is_empty(),
965            "Datasource IDs registered in multiple file-reference resolvers: {duplicates:?}"
966        );
967    }
968
969    #[test]
970    fn test_find_root_from_path() {
971        assert_eq!(
972            compute_root("rootfs/lib/apk/db/installed", "lib/apk/db/installed"),
973            "rootfs/"
974        );
975        assert_eq!(
976            compute_root("lib/apk/db/installed", "lib/apk/db/installed"),
977            ""
978        );
979        assert_eq!(
980            compute_root("container/var/lib/rpm/Packages", "var/lib/rpm/Packages"),
981            "container/"
982        );
983        assert_eq!(
984            compute_root("var/lib/rpm/Packages", "var/lib/rpm/Packages"),
985            ""
986        );
987    }
988
989    #[test]
990    fn test_resolve_basic_alpine() {
991        let mut files = vec![
992            FileInfo {
993                name: "installed".to_string(),
994                base_name: "installed".to_string(),
995                extension: String::new(),
996                path: "lib/apk/db/installed".to_string(),
997                file_type: FileType::File,
998                mime_type: None,
999                size: 100,
1000                date: None,
1001                sha1: None,
1002                md5: None,
1003                sha256: None,
1004                programming_language: None,
1005                package_data: vec![PackageData {
1006                    datasource_id: Some(DatasourceId::AlpineInstalledDb),
1007                    purl: Some("pkg:alpine/musl@1.2.3".to_string()),
1008                    name: Some("musl".to_string()),
1009                    file_references: vec![
1010                        FileReference {
1011                            path: "lib/libc.so".to_string(),
1012                            size: None,
1013                            sha1: None,
1014                            md5: None,
1015                            sha256: None,
1016                            sha512: None,
1017                            extra_data: None,
1018                        },
1019                        FileReference {
1020                            path: "usr/bin/ldconfig".to_string(),
1021                            size: None,
1022                            sha1: None,
1023                            md5: None,
1024                            sha256: None,
1025                            sha512: None,
1026                            extra_data: None,
1027                        },
1028                    ],
1029                    ..Default::default()
1030                }],
1031                license_expression: None,
1032                license_detections: vec![],
1033                copyrights: vec![],
1034                holders: vec![],
1035                authors: vec![],
1036                emails: vec![],
1037                urls: vec![],
1038                for_packages: vec![],
1039                scan_errors: vec![],
1040                is_source: None,
1041                source_count: None,
1042                is_legal: false,
1043                is_manifest: false,
1044                is_readme: false,
1045                is_top_level: false,
1046                is_key_file: false,
1047                is_community: false,
1048                is_generated: None,
1049                facets: vec![],
1050                tallies: None,
1051            },
1052            FileInfo {
1053                name: "libc.so".to_string(),
1054                base_name: "libc".to_string(),
1055                extension: "so".to_string(),
1056                path: "lib/libc.so".to_string(),
1057                file_type: FileType::File,
1058                mime_type: None,
1059                size: 200,
1060                date: None,
1061                sha1: None,
1062                md5: None,
1063                sha256: None,
1064                programming_language: None,
1065                package_data: vec![],
1066                license_expression: None,
1067                license_detections: vec![],
1068                copyrights: vec![],
1069                holders: vec![],
1070                authors: vec![],
1071                emails: vec![],
1072                urls: vec![],
1073                for_packages: vec![],
1074                scan_errors: vec![],
1075                is_source: None,
1076                source_count: None,
1077                is_legal: false,
1078                is_manifest: false,
1079                is_readme: false,
1080                is_top_level: false,
1081                is_key_file: false,
1082                is_community: false,
1083                is_generated: None,
1084                facets: vec![],
1085                tallies: None,
1086            },
1087            FileInfo {
1088                name: "ldconfig".to_string(),
1089                base_name: "ldconfig".to_string(),
1090                extension: String::new(),
1091                path: "usr/bin/ldconfig".to_string(),
1092                file_type: FileType::File,
1093                mime_type: None,
1094                size: 300,
1095                date: None,
1096                sha1: None,
1097                md5: None,
1098                sha256: None,
1099                programming_language: None,
1100                package_data: vec![],
1101                license_expression: None,
1102                license_detections: vec![],
1103                copyrights: vec![],
1104                holders: vec![],
1105                authors: vec![],
1106                emails: vec![],
1107                urls: vec![],
1108                for_packages: vec![],
1109                scan_errors: vec![],
1110                is_source: None,
1111                source_count: None,
1112                is_legal: false,
1113                is_manifest: false,
1114                is_readme: false,
1115                is_top_level: false,
1116                is_key_file: false,
1117                is_community: false,
1118                is_generated: None,
1119                facets: vec![],
1120                tallies: None,
1121            },
1122        ];
1123
1124        let mut packages = vec![Package {
1125            package_type: Some(PackageType::Alpine),
1126            namespace: None,
1127            name: Some("musl".to_string()),
1128            version: Some("1.2.3".to_string()),
1129            qualifiers: None,
1130            subpath: None,
1131            primary_language: None,
1132            description: None,
1133            release_date: None,
1134            parties: vec![],
1135            keywords: vec![],
1136            homepage_url: None,
1137            download_url: None,
1138            size: None,
1139            sha1: None,
1140            md5: None,
1141            sha256: None,
1142            sha512: None,
1143            bug_tracking_url: None,
1144            code_view_url: None,
1145            vcs_url: None,
1146            copyright: None,
1147            holder: None,
1148            declared_license_expression: None,
1149            declared_license_expression_spdx: None,
1150            license_detections: vec![],
1151            other_license_expression: None,
1152            other_license_expression_spdx: None,
1153            other_license_detections: vec![],
1154            extracted_license_statement: None,
1155            notice_text: None,
1156            source_packages: vec![],
1157            is_private: false,
1158            is_virtual: false,
1159            extra_data: None,
1160            repository_homepage_url: None,
1161            repository_download_url: None,
1162            api_data_url: None,
1163            purl: Some("pkg:alpine/musl@1.2.3".to_string()),
1164            package_uid: "pkg:alpine/musl@1.2.3?uuid=test-uuid".to_string(),
1165            datafile_paths: vec!["lib/apk/db/installed".to_string()],
1166            datasource_ids: vec![DatasourceId::AlpineInstalledDb],
1167        }];
1168
1169        let mut dependencies = vec![];
1170
1171        resolve_file_references(&mut files, &mut packages, &mut dependencies);
1172
1173        assert_eq!(files[1].for_packages.len(), 1);
1174        assert_eq!(
1175            files[1].for_packages[0],
1176            "pkg:alpine/musl@1.2.3?uuid=test-uuid"
1177        );
1178        assert_eq!(files[2].for_packages.len(), 1);
1179        assert_eq!(
1180            files[2].for_packages[0],
1181            "pkg:alpine/musl@1.2.3?uuid=test-uuid"
1182        );
1183    }
1184
1185    #[test]
1186    fn test_resolve_missing_refs() {
1187        let mut files = vec![FileInfo {
1188            name: "installed".to_string(),
1189            base_name: "installed".to_string(),
1190            extension: String::new(),
1191            path: "lib/apk/db/installed".to_string(),
1192            file_type: FileType::File,
1193            mime_type: None,
1194            size: 100,
1195            date: None,
1196            sha1: None,
1197            md5: None,
1198            sha256: None,
1199            programming_language: None,
1200            package_data: vec![PackageData {
1201                datasource_id: Some(DatasourceId::AlpineInstalledDb),
1202                purl: Some("pkg:alpine/test@1.0".to_string()),
1203                name: Some("test".to_string()),
1204                file_references: vec![
1205                    FileReference {
1206                        path: "missing/file1.txt".to_string(),
1207                        size: None,
1208                        sha1: None,
1209                        md5: None,
1210                        sha256: None,
1211                        sha512: None,
1212                        extra_data: None,
1213                    },
1214                    FileReference {
1215                        path: "another/missing.so".to_string(),
1216                        size: None,
1217                        sha1: None,
1218                        md5: None,
1219                        sha256: None,
1220                        sha512: None,
1221                        extra_data: None,
1222                    },
1223                ],
1224                ..Default::default()
1225            }],
1226            license_expression: None,
1227            license_detections: vec![],
1228            copyrights: vec![],
1229            holders: vec![],
1230            authors: vec![],
1231            emails: vec![],
1232            urls: vec![],
1233            for_packages: vec![],
1234            scan_errors: vec![],
1235            is_source: None,
1236            source_count: None,
1237            is_legal: false,
1238            is_manifest: false,
1239            is_readme: false,
1240            is_top_level: false,
1241            is_key_file: false,
1242            is_community: false,
1243            is_generated: None,
1244            facets: vec![],
1245            tallies: None,
1246        }];
1247
1248        let mut packages = vec![Package {
1249            package_type: Some(PackageType::Alpine),
1250            namespace: None,
1251            name: Some("test".to_string()),
1252            version: Some("1.0".to_string()),
1253            qualifiers: None,
1254            subpath: None,
1255            primary_language: None,
1256            description: None,
1257            release_date: None,
1258            parties: vec![],
1259            keywords: vec![],
1260            homepage_url: None,
1261            download_url: None,
1262            size: None,
1263            sha1: None,
1264            md5: None,
1265            sha256: None,
1266            sha512: None,
1267            bug_tracking_url: None,
1268            code_view_url: None,
1269            vcs_url: None,
1270            copyright: None,
1271            holder: None,
1272            declared_license_expression: None,
1273            declared_license_expression_spdx: None,
1274            license_detections: vec![],
1275            other_license_expression: None,
1276            other_license_expression_spdx: None,
1277            other_license_detections: vec![],
1278            extracted_license_statement: None,
1279            notice_text: None,
1280            source_packages: vec![],
1281            is_private: false,
1282            is_virtual: false,
1283            extra_data: None,
1284            repository_homepage_url: None,
1285            repository_download_url: None,
1286            api_data_url: None,
1287            purl: Some("pkg:alpine/test@1.0".to_string()),
1288            package_uid: "pkg:alpine/test@1.0?uuid=test-uuid".to_string(),
1289            datafile_paths: vec!["lib/apk/db/installed".to_string()],
1290            datasource_ids: vec![DatasourceId::AlpineInstalledDb],
1291        }];
1292
1293        let mut dependencies = vec![];
1294
1295        resolve_file_references(&mut files, &mut packages, &mut dependencies);
1296
1297        assert!(packages[0].extra_data.is_some());
1298        let extra_data = packages[0].extra_data.as_ref().unwrap();
1299        assert!(extra_data.contains_key("missing_file_references"));
1300
1301        let missing = extra_data.get("missing_file_references").unwrap();
1302        assert!(missing.is_array());
1303        let missing_array = missing.as_array().unwrap();
1304        assert_eq!(missing_array.len(), 2);
1305        assert_eq!(missing_array[0]["path"], "another/missing.so");
1306        assert_eq!(missing_array[1]["path"], "missing/file1.txt");
1307    }
1308
1309    #[test]
1310    fn test_resolve_rpm_namespace() {
1311        let mut files = vec![
1312            FileInfo {
1313                name: "Packages".to_string(),
1314                base_name: "Packages".to_string(),
1315                extension: String::new(),
1316                path: "rootfs/var/lib/rpm/Packages".to_string(),
1317                file_type: FileType::File,
1318                mime_type: None,
1319                size: 100,
1320                date: None,
1321                sha1: None,
1322                md5: None,
1323                sha256: None,
1324                programming_language: None,
1325                package_data: vec![PackageData {
1326                    datasource_id: Some(DatasourceId::RpmInstalledDatabaseBdb),
1327                    purl: Some("pkg:rpm/bash@5.0".to_string()),
1328                    name: Some("bash".to_string()),
1329                    file_references: vec![],
1330                    ..Default::default()
1331                }],
1332                license_expression: None,
1333                license_detections: vec![],
1334                copyrights: vec![],
1335                holders: vec![],
1336                authors: vec![],
1337                emails: vec![],
1338                urls: vec![],
1339                for_packages: vec![],
1340                scan_errors: vec![],
1341                is_source: None,
1342                source_count: None,
1343                is_legal: false,
1344                is_manifest: false,
1345                is_readme: false,
1346                is_top_level: false,
1347                is_key_file: false,
1348                is_community: false,
1349                is_generated: None,
1350                facets: vec![],
1351                tallies: None,
1352            },
1353            FileInfo {
1354                name: "os-release".to_string(),
1355                base_name: "os-release".to_string(),
1356                extension: String::new(),
1357                path: "rootfs/etc/os-release".to_string(),
1358                file_type: FileType::File,
1359                mime_type: None,
1360                size: 50,
1361                date: None,
1362                sha1: None,
1363                md5: None,
1364                sha256: None,
1365                programming_language: None,
1366                package_data: vec![PackageData {
1367                    datasource_id: Some(DatasourceId::EtcOsRelease),
1368                    namespace: Some("fedora".to_string()),
1369                    name: Some("fedora".to_string()),
1370                    ..Default::default()
1371                }],
1372                license_expression: None,
1373                license_detections: vec![],
1374                copyrights: vec![],
1375                holders: vec![],
1376                authors: vec![],
1377                emails: vec![],
1378                urls: vec![],
1379                for_packages: vec![],
1380                scan_errors: vec![],
1381                is_source: None,
1382                source_count: None,
1383                is_legal: false,
1384                is_manifest: false,
1385                is_readme: false,
1386                is_top_level: false,
1387                is_key_file: false,
1388                is_community: false,
1389                is_generated: None,
1390                facets: vec![],
1391                tallies: None,
1392            },
1393        ];
1394
1395        let mut packages = vec![Package {
1396            package_type: Some(PackageType::Rpm),
1397            namespace: None,
1398            name: Some("bash".to_string()),
1399            version: Some("5.0".to_string()),
1400            qualifiers: None,
1401            subpath: None,
1402            primary_language: None,
1403            description: None,
1404            release_date: None,
1405            parties: vec![],
1406            keywords: vec![],
1407            homepage_url: None,
1408            download_url: None,
1409            size: None,
1410            sha1: None,
1411            md5: None,
1412            sha256: None,
1413            sha512: None,
1414            bug_tracking_url: None,
1415            code_view_url: None,
1416            vcs_url: None,
1417            copyright: None,
1418            holder: None,
1419            declared_license_expression: None,
1420            declared_license_expression_spdx: None,
1421            license_detections: vec![],
1422            other_license_expression: None,
1423            other_license_expression_spdx: None,
1424            other_license_detections: vec![],
1425            extracted_license_statement: None,
1426            notice_text: None,
1427            source_packages: vec![],
1428            is_private: false,
1429            is_virtual: false,
1430            extra_data: None,
1431            repository_homepage_url: None,
1432            repository_download_url: None,
1433            api_data_url: None,
1434            purl: Some("pkg:rpm/bash@5.0".to_string()),
1435            package_uid: "pkg:rpm/bash@5.0?uuid=test-uuid".to_string(),
1436            datafile_paths: vec!["rootfs/var/lib/rpm/Packages".to_string()],
1437            datasource_ids: vec![DatasourceId::RpmInstalledDatabaseBdb],
1438        }];
1439
1440        let mut dependencies = vec![TopLevelDependency {
1441            purl: Some("pkg:rpm/readline@8.0".to_string()),
1442            extracted_requirement: None,
1443            scope: None,
1444            is_runtime: Some(true),
1445            is_optional: None,
1446            is_pinned: None,
1447            is_direct: None,
1448            resolved_package: None,
1449            extra_data: None,
1450            dependency_uid: "pkg:rpm/readline@8.0?uuid=dep-uuid".to_string(),
1451            for_package_uid: Some("pkg:rpm/bash@5.0?uuid=test-uuid".to_string()),
1452            datafile_path: "rootfs/var/lib/rpm/Packages".to_string(),
1453            datasource_id: DatasourceId::RpmInstalledDatabaseBdb,
1454            namespace: None,
1455        }];
1456
1457        resolve_file_references(&mut files, &mut packages, &mut dependencies);
1458
1459        assert_eq!(packages[0].namespace, Some("fedora".to_string()));
1460        assert_eq!(packages[0].purl.as_deref(), Some("pkg:rpm/fedora/bash@5.0"));
1461        assert!(
1462            packages[0]
1463                .package_uid
1464                .starts_with("pkg:rpm/fedora/bash@5.0?uuid=")
1465        );
1466        assert_eq!(dependencies[0].namespace, Some("fedora".to_string()));
1467        assert_eq!(
1468            dependencies[0].purl.as_deref(),
1469            Some("pkg:rpm/fedora/readline@8.0")
1470        );
1471        assert_eq!(
1472            dependencies[0].for_package_uid.as_deref(),
1473            Some(packages[0].package_uid.as_str())
1474        );
1475    }
1476
1477    #[test]
1478    fn test_merge_rpm_yumdb_metadata() {
1479        let mut files = vec![
1480            FileInfo {
1481                name: "Packages".to_string(),
1482                base_name: "Packages".to_string(),
1483                extension: String::new(),
1484                path: "rootfs/var/lib/rpm/Packages".to_string(),
1485                file_type: FileType::File,
1486                mime_type: None,
1487                size: 1,
1488                date: None,
1489                sha1: None,
1490                md5: None,
1491                sha256: None,
1492                programming_language: None,
1493                package_data: vec![],
1494                license_expression: None,
1495                license_detections: vec![],
1496                copyrights: vec![],
1497                holders: vec![],
1498                authors: vec![],
1499                emails: vec![],
1500                urls: vec![],
1501                for_packages: vec!["pkg:rpm/bash@5.0-1.el8?uuid=rpm-uuid".to_string()],
1502                scan_errors: vec![],
1503                is_source: None,
1504                source_count: None,
1505                is_legal: false,
1506                is_manifest: false,
1507                is_readme: false,
1508                is_top_level: false,
1509                is_key_file: false,
1510                is_community: false,
1511                is_generated: None,
1512                facets: vec![],
1513                tallies: None,
1514            },
1515            FileInfo {
1516                name: "from_repo".to_string(),
1517                base_name: "from_repo".to_string(),
1518                extension: String::new(),
1519                path: "rootfs/var/lib/yum/yumdb/p/abc123-bash-5.0-1.el8.x86_64/from_repo"
1520                    .to_string(),
1521                file_type: FileType::File,
1522                mime_type: None,
1523                size: 1,
1524                date: None,
1525                sha1: None,
1526                md5: None,
1527                sha256: None,
1528                programming_language: None,
1529                package_data: vec![],
1530                license_expression: None,
1531                license_detections: vec![],
1532                copyrights: vec![],
1533                holders: vec![],
1534                authors: vec![],
1535                emails: vec![],
1536                urls: vec![],
1537                for_packages: vec!["pkg:rpm/bash@5.0-1.el8?uuid=yumdb-uuid".to_string()],
1538                scan_errors: vec![],
1539                is_source: None,
1540                source_count: None,
1541                is_legal: false,
1542                is_manifest: false,
1543                is_readme: false,
1544                is_top_level: false,
1545                is_key_file: false,
1546                is_community: false,
1547                is_generated: None,
1548                facets: vec![],
1549                tallies: None,
1550            },
1551        ];
1552
1553        let mut packages = vec![
1554            Package {
1555                package_type: Some(PackageType::Rpm),
1556                namespace: None,
1557                name: Some("bash".to_string()),
1558                version: Some("5.0-1.el8".to_string()),
1559                qualifiers: Some(
1560                    std::iter::once(("arch".to_string(), "x86_64".to_string())).collect(),
1561                ),
1562                subpath: None,
1563                primary_language: None,
1564                description: None,
1565                release_date: None,
1566                parties: vec![],
1567                keywords: vec![],
1568                homepage_url: None,
1569                download_url: None,
1570                size: None,
1571                sha1: None,
1572                md5: None,
1573                sha256: None,
1574                sha512: None,
1575                bug_tracking_url: None,
1576                code_view_url: None,
1577                vcs_url: None,
1578                copyright: None,
1579                holder: None,
1580                declared_license_expression: None,
1581                declared_license_expression_spdx: None,
1582                license_detections: vec![],
1583                other_license_expression: None,
1584                other_license_expression_spdx: None,
1585                other_license_detections: vec![],
1586                extracted_license_statement: None,
1587                notice_text: None,
1588                source_packages: vec![],
1589                is_private: false,
1590                is_virtual: false,
1591                extra_data: None,
1592                repository_homepage_url: None,
1593                repository_download_url: None,
1594                api_data_url: None,
1595                purl: Some("pkg:rpm/bash@5.0-1.el8?arch=x86_64".to_string()),
1596                package_uid: "pkg:rpm/bash@5.0-1.el8?uuid=rpm-uuid".to_string(),
1597                datafile_paths: vec!["rootfs/var/lib/rpm/Packages".to_string()],
1598                datasource_ids: vec![DatasourceId::RpmInstalledDatabaseBdb],
1599            },
1600            Package {
1601                package_type: Some(PackageType::Rpm),
1602                namespace: None,
1603                name: Some("bash".to_string()),
1604                version: Some("5.0-1.el8".to_string()),
1605                qualifiers: Some(
1606                    std::iter::once(("arch".to_string(), "x86_64".to_string())).collect(),
1607                ),
1608                subpath: None,
1609                primary_language: None,
1610                description: None,
1611                release_date: None,
1612                parties: vec![],
1613                keywords: vec![],
1614                homepage_url: None,
1615                download_url: None,
1616                size: None,
1617                sha1: None,
1618                md5: None,
1619                sha256: None,
1620                sha512: None,
1621                bug_tracking_url: None,
1622                code_view_url: None,
1623                vcs_url: None,
1624                copyright: None,
1625                holder: None,
1626                declared_license_expression: None,
1627                declared_license_expression_spdx: None,
1628                license_detections: vec![],
1629                other_license_expression: None,
1630                other_license_expression_spdx: None,
1631                other_license_detections: vec![],
1632                extracted_license_statement: None,
1633                notice_text: None,
1634                source_packages: vec![],
1635                is_private: false,
1636                is_virtual: true,
1637                extra_data: Some(
1638                    [
1639                        (
1640                            "from_repo".to_string(),
1641                            serde_json::Value::String("baseos".to_string()),
1642                        ),
1643                        (
1644                            "releasever".to_string(),
1645                            serde_json::Value::String("8".to_string()),
1646                        ),
1647                    ]
1648                    .into_iter()
1649                    .collect(),
1650                ),
1651                repository_homepage_url: None,
1652                repository_download_url: None,
1653                api_data_url: None,
1654                purl: Some("pkg:rpm/bash@5.0-1.el8?arch=x86_64".to_string()),
1655                package_uid: "pkg:rpm/bash@5.0-1.el8?uuid=yumdb-uuid".to_string(),
1656                datafile_paths: vec![
1657                    "rootfs/var/lib/yum/yumdb/p/abc123-bash-5.0-1.el8.x86_64/from_repo".to_string(),
1658                ],
1659                datasource_ids: vec![DatasourceId::RpmYumdb],
1660            },
1661        ];
1662
1663        merge_rpm_yumdb_metadata(&mut files, &mut packages);
1664
1665        assert_eq!(packages.len(), 1);
1666        assert!(packages[0].datasource_ids.contains(&DatasourceId::RpmYumdb));
1667        assert!(
1668            packages[0]
1669                .datafile_paths
1670                .iter()
1671                .any(|path| path.contains("var/lib/yum/yumdb"))
1672        );
1673        let yumdb = packages[0]
1674            .extra_data
1675            .as_ref()
1676            .and_then(|extra| extra.get("yumdb"))
1677            .and_then(|value| value.as_object())
1678            .unwrap();
1679        assert_eq!(yumdb["from_repo"], "baseos");
1680        assert_eq!(yumdb["releasever"], "8");
1681        assert_eq!(
1682            files[1].for_packages,
1683            vec!["pkg:rpm/bash@5.0-1.el8?uuid=rpm-uuid".to_string()]
1684        );
1685    }
1686
1687    #[test]
1688    fn test_strip_leading_slash() {
1689        let mut files = vec![
1690            FileInfo {
1691                name: "installed".to_string(),
1692                base_name: "installed".to_string(),
1693                extension: String::new(),
1694                path: "lib/apk/db/installed".to_string(),
1695                file_type: FileType::File,
1696                mime_type: None,
1697                size: 100,
1698                date: None,
1699                sha1: None,
1700                md5: None,
1701                sha256: None,
1702                programming_language: None,
1703                package_data: vec![PackageData {
1704                    datasource_id: Some(DatasourceId::AlpineInstalledDb),
1705                    purl: Some("pkg:alpine/test@1.0".to_string()),
1706                    name: Some("test".to_string()),
1707                    file_references: vec![FileReference {
1708                        path: "/lib/test.so".to_string(),
1709                        size: None,
1710                        sha1: None,
1711                        md5: None,
1712                        sha256: None,
1713                        sha512: None,
1714                        extra_data: None,
1715                    }],
1716                    ..Default::default()
1717                }],
1718                license_expression: None,
1719                license_detections: vec![],
1720                copyrights: vec![],
1721                holders: vec![],
1722                authors: vec![],
1723                emails: vec![],
1724                urls: vec![],
1725                for_packages: vec![],
1726                scan_errors: vec![],
1727                is_source: None,
1728                source_count: None,
1729                is_legal: false,
1730                is_manifest: false,
1731                is_readme: false,
1732                is_top_level: false,
1733                is_key_file: false,
1734                is_community: false,
1735                is_generated: None,
1736                facets: vec![],
1737                tallies: None,
1738            },
1739            FileInfo {
1740                name: "test.so".to_string(),
1741                base_name: "test".to_string(),
1742                extension: "so".to_string(),
1743                path: "lib/test.so".to_string(),
1744                file_type: FileType::File,
1745                mime_type: None,
1746                size: 200,
1747                date: None,
1748                sha1: None,
1749                md5: None,
1750                sha256: None,
1751                programming_language: None,
1752                package_data: vec![],
1753                license_expression: None,
1754                license_detections: vec![],
1755                copyrights: vec![],
1756                holders: vec![],
1757                authors: vec![],
1758                emails: vec![],
1759                urls: vec![],
1760                for_packages: vec![],
1761                scan_errors: vec![],
1762                is_source: None,
1763                source_count: None,
1764                is_legal: false,
1765                is_manifest: false,
1766                is_readme: false,
1767                is_top_level: false,
1768                is_key_file: false,
1769                is_community: false,
1770                is_generated: None,
1771                facets: vec![],
1772                tallies: None,
1773            },
1774        ];
1775
1776        let mut packages = vec![Package {
1777            package_type: Some(PackageType::Alpine),
1778            namespace: None,
1779            name: Some("test".to_string()),
1780            version: Some("1.0".to_string()),
1781            qualifiers: None,
1782            subpath: None,
1783            primary_language: None,
1784            description: None,
1785            release_date: None,
1786            parties: vec![],
1787            keywords: vec![],
1788            homepage_url: None,
1789            download_url: None,
1790            size: None,
1791            sha1: None,
1792            md5: None,
1793            sha256: None,
1794            sha512: None,
1795            bug_tracking_url: None,
1796            code_view_url: None,
1797            vcs_url: None,
1798            copyright: None,
1799            holder: None,
1800            declared_license_expression: None,
1801            declared_license_expression_spdx: None,
1802            license_detections: vec![],
1803            other_license_expression: None,
1804            other_license_expression_spdx: None,
1805            other_license_detections: vec![],
1806            extracted_license_statement: None,
1807            notice_text: None,
1808            source_packages: vec![],
1809            is_private: false,
1810            is_virtual: false,
1811            extra_data: None,
1812            repository_homepage_url: None,
1813            repository_download_url: None,
1814            api_data_url: None,
1815            purl: Some("pkg:alpine/test@1.0".to_string()),
1816            package_uid: "pkg:alpine/test@1.0?uuid=test-uuid".to_string(),
1817            datafile_paths: vec!["lib/apk/db/installed".to_string()],
1818            datasource_ids: vec![DatasourceId::AlpineInstalledDb],
1819        }];
1820
1821        let mut dependencies = vec![];
1822
1823        resolve_file_references(&mut files, &mut packages, &mut dependencies);
1824
1825        assert_eq!(files[1].for_packages.len(), 1);
1826        assert_eq!(
1827            files[1].for_packages[0],
1828            "pkg:alpine/test@1.0?uuid=test-uuid"
1829        );
1830    }
1831
1832    #[test]
1833    fn test_resolve_python_metadata_file_references() {
1834        let mut files = vec![
1835            FileInfo {
1836                name: "METADATA".to_string(),
1837                base_name: "METADATA".to_string(),
1838                extension: String::new(),
1839                path: "venv/lib/python3.11/site-packages/click-8.0.4.dist-info/METADATA"
1840                    .to_string(),
1841                file_type: FileType::File,
1842                mime_type: None,
1843                size: 100,
1844                date: None,
1845                sha1: None,
1846                md5: None,
1847                sha256: None,
1848                programming_language: None,
1849                package_data: vec![PackageData {
1850                    datasource_id: Some(DatasourceId::PypiWheelMetadata),
1851                    purl: Some("pkg:pypi/click@8.0.4".to_string()),
1852                    name: Some("click".to_string()),
1853                    version: Some("8.0.4".to_string()),
1854                    file_references: vec![
1855                        FileReference {
1856                            path: "click/__init__.py".to_string(),
1857                            size: None,
1858                            sha1: None,
1859                            md5: None,
1860                            sha256: None,
1861                            sha512: None,
1862                            extra_data: None,
1863                        },
1864                        FileReference {
1865                            path: "click/core.py".to_string(),
1866                            size: None,
1867                            sha1: None,
1868                            md5: None,
1869                            sha256: None,
1870                            sha512: None,
1871                            extra_data: None,
1872                        },
1873                        FileReference {
1874                            path: "click-8.0.4.dist-info/LICENSE.rst".to_string(),
1875                            size: None,
1876                            sha1: None,
1877                            md5: None,
1878                            sha256: None,
1879                            sha512: None,
1880                            extra_data: None,
1881                        },
1882                    ],
1883                    ..Default::default()
1884                }],
1885                license_expression: None,
1886                license_detections: vec![],
1887                copyrights: vec![],
1888                holders: vec![],
1889                authors: vec![],
1890                emails: vec![],
1891                urls: vec![],
1892                for_packages: vec![],
1893                scan_errors: vec![],
1894                is_source: None,
1895                source_count: None,
1896                is_legal: false,
1897                is_manifest: false,
1898                is_readme: false,
1899                is_top_level: false,
1900                is_key_file: false,
1901                is_community: false,
1902                is_generated: None,
1903                facets: vec![],
1904                tallies: None,
1905            },
1906            FileInfo {
1907                name: "__init__.py".to_string(),
1908                base_name: "__init__".to_string(),
1909                extension: "py".to_string(),
1910                path: "venv/lib/python3.11/site-packages/click/__init__.py".to_string(),
1911                file_type: FileType::File,
1912                mime_type: None,
1913                size: 5,
1914                date: None,
1915                sha1: None,
1916                md5: None,
1917                sha256: None,
1918                programming_language: None,
1919                package_data: vec![],
1920                license_expression: None,
1921                license_detections: vec![],
1922                copyrights: vec![],
1923                holders: vec![],
1924                authors: vec![],
1925                emails: vec![],
1926                urls: vec![],
1927                for_packages: vec![],
1928                scan_errors: vec![],
1929                is_source: None,
1930                source_count: None,
1931                is_legal: false,
1932                is_manifest: false,
1933                is_readme: false,
1934                is_top_level: false,
1935                is_key_file: false,
1936                is_community: false,
1937                is_generated: None,
1938                facets: vec![],
1939                tallies: None,
1940            },
1941            FileInfo {
1942                name: "core.py".to_string(),
1943                base_name: "core".to_string(),
1944                extension: "py".to_string(),
1945                path: "venv/lib/python3.11/site-packages/click/core.py".to_string(),
1946                file_type: FileType::File,
1947                mime_type: None,
1948                size: 10,
1949                date: None,
1950                sha1: None,
1951                md5: None,
1952                sha256: None,
1953                programming_language: None,
1954                package_data: vec![],
1955                license_expression: None,
1956                license_detections: vec![],
1957                copyrights: vec![],
1958                holders: vec![],
1959                authors: vec![],
1960                emails: vec![],
1961                urls: vec![],
1962                for_packages: vec![],
1963                scan_errors: vec![],
1964                is_source: None,
1965                source_count: None,
1966                is_legal: false,
1967                is_manifest: false,
1968                is_readme: false,
1969                is_top_level: false,
1970                is_key_file: false,
1971                is_community: false,
1972                is_generated: None,
1973                facets: vec![],
1974                tallies: None,
1975            },
1976            FileInfo {
1977                name: "LICENSE.rst".to_string(),
1978                base_name: "LICENSE".to_string(),
1979                extension: "rst".to_string(),
1980                path: "venv/lib/python3.11/site-packages/click-8.0.4.dist-info/LICENSE.rst"
1981                    .to_string(),
1982                file_type: FileType::File,
1983                mime_type: None,
1984                size: 20,
1985                date: None,
1986                sha1: None,
1987                md5: None,
1988                sha256: None,
1989                programming_language: None,
1990                package_data: vec![],
1991                license_expression: None,
1992                license_detections: vec![],
1993                copyrights: vec![],
1994                holders: vec![],
1995                authors: vec![],
1996                emails: vec![],
1997                urls: vec![],
1998                for_packages: vec![],
1999                scan_errors: vec![],
2000                is_source: None,
2001                source_count: None,
2002                is_legal: false,
2003                is_manifest: false,
2004                is_readme: false,
2005                is_top_level: false,
2006                is_key_file: false,
2007                is_community: false,
2008                is_generated: None,
2009                facets: vec![],
2010                tallies: None,
2011            },
2012        ];
2013
2014        let mut packages = vec![Package {
2015            package_type: Some(PackageType::Pypi),
2016            namespace: None,
2017            name: Some("click".to_string()),
2018            version: Some("8.0.4".to_string()),
2019            qualifiers: None,
2020            subpath: None,
2021            primary_language: None,
2022            description: None,
2023            release_date: None,
2024            parties: vec![],
2025            keywords: vec![],
2026            homepage_url: None,
2027            download_url: None,
2028            size: None,
2029            sha1: None,
2030            md5: None,
2031            sha256: None,
2032            sha512: None,
2033            bug_tracking_url: None,
2034            code_view_url: None,
2035            vcs_url: None,
2036            copyright: None,
2037            holder: None,
2038            declared_license_expression: None,
2039            declared_license_expression_spdx: None,
2040            license_detections: vec![],
2041            other_license_expression: None,
2042            other_license_expression_spdx: None,
2043            other_license_detections: vec![],
2044            extracted_license_statement: None,
2045            notice_text: None,
2046            source_packages: vec![],
2047            is_private: false,
2048            is_virtual: false,
2049            extra_data: None,
2050            repository_homepage_url: None,
2051            repository_download_url: None,
2052            api_data_url: None,
2053            purl: Some("pkg:pypi/click@8.0.4".to_string()),
2054            package_uid: "pkg:pypi/click@8.0.4?uuid=test-uuid".to_string(),
2055            datafile_paths: vec![
2056                "venv/lib/python3.11/site-packages/click-8.0.4.dist-info/METADATA".to_string(),
2057            ],
2058            datasource_ids: vec![DatasourceId::PypiWheelMetadata],
2059        }];
2060
2061        let mut dependencies = vec![];
2062
2063        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2064
2065        assert_eq!(files[1].for_packages.len(), 1);
2066        assert_eq!(files[2].for_packages.len(), 1);
2067        assert_eq!(files[3].for_packages.len(), 1);
2068        assert_eq!(
2069            files[2].for_packages[0],
2070            "pkg:pypi/click@8.0.4?uuid=test-uuid"
2071        );
2072    }
2073
2074    #[test]
2075    fn test_resolve_python_pkg_info_installed_files_references() {
2076        let mut files = vec![
2077            FileInfo {
2078                name: "PKG-INFO".to_string(),
2079                base_name: "PKG-INFO".to_string(),
2080                extension: String::new(),
2081                path: "venv/lib/python3.11/site-packages/examplepkg.egg-info/PKG-INFO".to_string(),
2082                file_type: FileType::File,
2083                mime_type: None,
2084                size: 100,
2085                date: None,
2086                sha1: None,
2087                md5: None,
2088                sha256: None,
2089                programming_language: None,
2090                package_data: vec![PackageData {
2091                    datasource_id: Some(DatasourceId::PypiSdistPkginfo),
2092                    purl: Some("pkg:pypi/examplepkg@1.0.0".to_string()),
2093                    name: Some("examplepkg".to_string()),
2094                    version: Some("1.0.0".to_string()),
2095                    file_references: vec![FileReference {
2096                        path: "../examplepkg/core.py".to_string(),
2097                        size: None,
2098                        sha1: None,
2099                        md5: None,
2100                        sha256: None,
2101                        sha512: None,
2102                        extra_data: None,
2103                    }],
2104                    ..Default::default()
2105                }],
2106                license_expression: None,
2107                license_detections: vec![],
2108                copyrights: vec![],
2109                holders: vec![],
2110                authors: vec![],
2111                emails: vec![],
2112                urls: vec![],
2113                for_packages: vec![],
2114                scan_errors: vec![],
2115                is_source: None,
2116                source_count: None,
2117                is_legal: false,
2118                is_manifest: false,
2119                is_readme: false,
2120                is_top_level: false,
2121                is_key_file: false,
2122                is_community: false,
2123                is_generated: None,
2124                facets: vec![],
2125                tallies: None,
2126            },
2127            FileInfo {
2128                name: "core.py".to_string(),
2129                base_name: "core".to_string(),
2130                extension: "py".to_string(),
2131                path: "venv/lib/python3.11/site-packages/examplepkg/core.py".to_string(),
2132                file_type: FileType::File,
2133                mime_type: None,
2134                size: 10,
2135                date: None,
2136                sha1: None,
2137                md5: None,
2138                sha256: None,
2139                programming_language: None,
2140                package_data: vec![],
2141                license_expression: None,
2142                license_detections: vec![],
2143                copyrights: vec![],
2144                holders: vec![],
2145                authors: vec![],
2146                emails: vec![],
2147                urls: vec![],
2148                for_packages: vec![],
2149                scan_errors: vec![],
2150                is_source: None,
2151                source_count: None,
2152                is_legal: false,
2153                is_manifest: false,
2154                is_readme: false,
2155                is_top_level: false,
2156                is_key_file: false,
2157                is_community: false,
2158                is_generated: None,
2159                facets: vec![],
2160                tallies: None,
2161            },
2162        ];
2163
2164        let mut packages = vec![Package {
2165            package_type: Some(PackageType::Pypi),
2166            namespace: None,
2167            name: Some("examplepkg".to_string()),
2168            version: Some("1.0.0".to_string()),
2169            qualifiers: None,
2170            subpath: None,
2171            primary_language: None,
2172            description: None,
2173            release_date: None,
2174            parties: vec![],
2175            keywords: vec![],
2176            homepage_url: None,
2177            download_url: None,
2178            size: None,
2179            sha1: None,
2180            md5: None,
2181            sha256: None,
2182            sha512: None,
2183            bug_tracking_url: None,
2184            code_view_url: None,
2185            vcs_url: None,
2186            copyright: None,
2187            holder: None,
2188            declared_license_expression: None,
2189            declared_license_expression_spdx: None,
2190            license_detections: vec![],
2191            other_license_expression: None,
2192            other_license_expression_spdx: None,
2193            other_license_detections: vec![],
2194            extracted_license_statement: None,
2195            notice_text: None,
2196            source_packages: vec![],
2197            is_private: false,
2198            is_virtual: false,
2199            extra_data: None,
2200            repository_homepage_url: None,
2201            repository_download_url: None,
2202            api_data_url: None,
2203            purl: Some("pkg:pypi/examplepkg@1.0.0".to_string()),
2204            package_uid: "pkg:pypi/examplepkg@1.0.0?uuid=test-uuid".to_string(),
2205            datafile_paths: vec![
2206                "venv/lib/python3.11/site-packages/examplepkg.egg-info/PKG-INFO".to_string(),
2207            ],
2208            datasource_ids: vec![DatasourceId::PypiSdistPkginfo],
2209        }];
2210
2211        let mut dependencies = vec![];
2212
2213        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2214
2215        assert_eq!(
2216            files[1].for_packages,
2217            vec!["pkg:pypi/examplepkg@1.0.0?uuid=test-uuid".to_string()]
2218        );
2219    }
2220
2221    #[test]
2222    fn test_resolve_python_metadata_file_references_in_dist_packages() {
2223        let mut files = vec![
2224            FileInfo {
2225                name: "METADATA".to_string(),
2226                base_name: "METADATA".to_string(),
2227                extension: String::new(),
2228                path: "usr/lib/python3/dist-packages/click-8.0.4.dist-info/METADATA".to_string(),
2229                file_type: FileType::File,
2230                mime_type: None,
2231                size: 100,
2232                date: None,
2233                sha1: None,
2234                md5: None,
2235                sha256: None,
2236                programming_language: None,
2237                package_data: vec![PackageData {
2238                    datasource_id: Some(DatasourceId::PypiWheelMetadata),
2239                    purl: Some("pkg:pypi/click@8.0.4".to_string()),
2240                    name: Some("click".to_string()),
2241                    version: Some("8.0.4".to_string()),
2242                    file_references: vec![FileReference {
2243                        path: "click/core.py".to_string(),
2244                        size: None,
2245                        sha1: None,
2246                        md5: None,
2247                        sha256: None,
2248                        sha512: None,
2249                        extra_data: None,
2250                    }],
2251                    ..Default::default()
2252                }],
2253                license_expression: None,
2254                license_detections: vec![],
2255                copyrights: vec![],
2256                holders: vec![],
2257                authors: vec![],
2258                emails: vec![],
2259                urls: vec![],
2260                for_packages: vec![],
2261                scan_errors: vec![],
2262                is_source: None,
2263                source_count: None,
2264                is_legal: false,
2265                is_manifest: false,
2266                is_readme: false,
2267                is_top_level: false,
2268                is_key_file: false,
2269                is_community: false,
2270                is_generated: None,
2271                facets: vec![],
2272                tallies: None,
2273            },
2274            FileInfo {
2275                name: "core.py".to_string(),
2276                base_name: "core".to_string(),
2277                extension: "py".to_string(),
2278                path: "usr/lib/python3/dist-packages/click/core.py".to_string(),
2279                file_type: FileType::File,
2280                mime_type: None,
2281                size: 10,
2282                date: None,
2283                sha1: None,
2284                md5: None,
2285                sha256: None,
2286                programming_language: None,
2287                package_data: vec![],
2288                license_expression: None,
2289                license_detections: vec![],
2290                copyrights: vec![],
2291                holders: vec![],
2292                authors: vec![],
2293                emails: vec![],
2294                urls: vec![],
2295                for_packages: vec![],
2296                scan_errors: vec![],
2297                is_source: None,
2298                source_count: None,
2299                is_legal: false,
2300                is_manifest: false,
2301                is_readme: false,
2302                is_top_level: false,
2303                is_key_file: false,
2304                is_community: false,
2305                is_generated: None,
2306                facets: vec![],
2307                tallies: None,
2308            },
2309        ];
2310
2311        let mut packages = vec![Package {
2312            package_type: Some(PackageType::Pypi),
2313            namespace: None,
2314            name: Some("click".to_string()),
2315            version: Some("8.0.4".to_string()),
2316            qualifiers: None,
2317            subpath: None,
2318            primary_language: None,
2319            description: None,
2320            release_date: None,
2321            parties: vec![],
2322            keywords: vec![],
2323            homepage_url: None,
2324            download_url: None,
2325            size: None,
2326            sha1: None,
2327            md5: None,
2328            sha256: None,
2329            sha512: None,
2330            bug_tracking_url: None,
2331            code_view_url: None,
2332            vcs_url: None,
2333            copyright: None,
2334            holder: None,
2335            declared_license_expression: None,
2336            declared_license_expression_spdx: None,
2337            license_detections: vec![],
2338            other_license_expression: None,
2339            other_license_expression_spdx: None,
2340            other_license_detections: vec![],
2341            extracted_license_statement: None,
2342            notice_text: None,
2343            source_packages: vec![],
2344            is_private: false,
2345            is_virtual: false,
2346            extra_data: None,
2347            repository_homepage_url: None,
2348            repository_download_url: None,
2349            api_data_url: None,
2350            purl: Some("pkg:pypi/click@8.0.4".to_string()),
2351            package_uid: "pkg:pypi/click@8.0.4?uuid=test-uuid".to_string(),
2352            datafile_paths: vec![
2353                "usr/lib/python3/dist-packages/click-8.0.4.dist-info/METADATA".to_string(),
2354            ],
2355            datasource_ids: vec![DatasourceId::PypiWheelMetadata],
2356        }];
2357
2358        let mut dependencies = vec![];
2359
2360        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2361
2362        assert_eq!(
2363            files[1].for_packages,
2364            vec!["pkg:pypi/click@8.0.4?uuid=test-uuid".to_string()]
2365        );
2366    }
2367
2368    #[test]
2369    fn test_python_metadata_file_references_do_not_assign_outside_packages_dirs() {
2370        let mut files = vec![
2371            FileInfo {
2372                name: "METADATA".to_string(),
2373                base_name: "METADATA".to_string(),
2374                extension: String::new(),
2375                path: "project/metadata/METADATA".to_string(),
2376                file_type: FileType::File,
2377                mime_type: None,
2378                size: 100,
2379                date: None,
2380                sha1: None,
2381                md5: None,
2382                sha256: None,
2383                programming_language: None,
2384                package_data: vec![PackageData {
2385                    datasource_id: Some(DatasourceId::PypiWheelMetadata),
2386                    purl: Some("pkg:pypi/examplepkg@1.0.0".to_string()),
2387                    name: Some("examplepkg".to_string()),
2388                    version: Some("1.0.0".to_string()),
2389                    file_references: vec![FileReference {
2390                        path: "examplepkg/core.py".to_string(),
2391                        size: None,
2392                        sha1: None,
2393                        md5: None,
2394                        sha256: None,
2395                        sha512: None,
2396                        extra_data: None,
2397                    }],
2398                    ..Default::default()
2399                }],
2400                license_expression: None,
2401                license_detections: vec![],
2402                copyrights: vec![],
2403                holders: vec![],
2404                authors: vec![],
2405                emails: vec![],
2406                urls: vec![],
2407                for_packages: vec![],
2408                scan_errors: vec![],
2409                is_source: None,
2410                source_count: None,
2411                is_legal: false,
2412                is_manifest: false,
2413                is_readme: false,
2414                is_top_level: false,
2415                is_key_file: false,
2416                is_community: false,
2417                is_generated: None,
2418                facets: vec![],
2419                tallies: None,
2420            },
2421            FileInfo {
2422                name: "core.py".to_string(),
2423                base_name: "core".to_string(),
2424                extension: "py".to_string(),
2425                path: "project/examplepkg/core.py".to_string(),
2426                file_type: FileType::File,
2427                mime_type: None,
2428                size: 10,
2429                date: None,
2430                sha1: None,
2431                md5: None,
2432                sha256: None,
2433                programming_language: None,
2434                package_data: vec![],
2435                license_expression: None,
2436                license_detections: vec![],
2437                copyrights: vec![],
2438                holders: vec![],
2439                authors: vec![],
2440                emails: vec![],
2441                urls: vec![],
2442                for_packages: vec![],
2443                scan_errors: vec![],
2444                is_source: None,
2445                source_count: None,
2446                is_legal: false,
2447                is_manifest: false,
2448                is_readme: false,
2449                is_top_level: false,
2450                is_key_file: false,
2451                is_community: false,
2452                is_generated: None,
2453                facets: vec![],
2454                tallies: None,
2455            },
2456        ];
2457
2458        let mut packages = vec![Package {
2459            package_type: Some(PackageType::Pypi),
2460            namespace: None,
2461            name: Some("examplepkg".to_string()),
2462            version: Some("1.0.0".to_string()),
2463            qualifiers: None,
2464            subpath: None,
2465            primary_language: None,
2466            description: None,
2467            release_date: None,
2468            parties: vec![],
2469            keywords: vec![],
2470            homepage_url: None,
2471            download_url: None,
2472            size: None,
2473            sha1: None,
2474            md5: None,
2475            sha256: None,
2476            sha512: None,
2477            bug_tracking_url: None,
2478            code_view_url: None,
2479            vcs_url: None,
2480            copyright: None,
2481            holder: None,
2482            declared_license_expression: None,
2483            declared_license_expression_spdx: None,
2484            license_detections: vec![],
2485            other_license_expression: None,
2486            other_license_expression_spdx: None,
2487            other_license_detections: vec![],
2488            extracted_license_statement: None,
2489            notice_text: None,
2490            source_packages: vec![],
2491            is_private: false,
2492            is_virtual: false,
2493            extra_data: None,
2494            repository_homepage_url: None,
2495            repository_download_url: None,
2496            api_data_url: None,
2497            purl: Some("pkg:pypi/examplepkg@1.0.0".to_string()),
2498            package_uid: "pkg:pypi/examplepkg@1.0.0?uuid=test-uuid".to_string(),
2499            datafile_paths: vec!["project/metadata/METADATA".to_string()],
2500            datasource_ids: vec![DatasourceId::PypiWheelMetadata],
2501        }];
2502
2503        let mut dependencies = vec![];
2504
2505        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2506
2507        assert!(files[1].for_packages.is_empty());
2508    }
2509
2510    #[test]
2511    fn test_python_sources_file_references_do_not_escape_project_root() {
2512        let mut files = vec![
2513            FileInfo {
2514                name: "PKG-INFO".to_string(),
2515                base_name: "PKG-INFO".to_string(),
2516                extension: String::new(),
2517                path: "project/PyJPString.egg-info/PKG-INFO".to_string(),
2518                file_type: FileType::File,
2519                mime_type: None,
2520                size: 100,
2521                date: None,
2522                sha1: None,
2523                md5: None,
2524                sha256: None,
2525                programming_language: None,
2526                package_data: vec![PackageData {
2527                    datasource_id: Some(DatasourceId::PypiSdistPkginfo),
2528                    purl: Some("pkg:pypi/PyJPString@0.0.3".to_string()),
2529                    name: Some("PyJPString".to_string()),
2530                    version: Some("0.0.3".to_string()),
2531                    file_references: vec![FileReference {
2532                        path: "../../outside.py".to_string(),
2533                        size: None,
2534                        sha1: None,
2535                        md5: None,
2536                        sha256: None,
2537                        sha512: None,
2538                        extra_data: None,
2539                    }],
2540                    ..Default::default()
2541                }],
2542                license_expression: None,
2543                license_detections: vec![],
2544                copyrights: vec![],
2545                holders: vec![],
2546                authors: vec![],
2547                emails: vec![],
2548                urls: vec![],
2549                for_packages: vec![],
2550                scan_errors: vec![],
2551                is_source: None,
2552                source_count: None,
2553                is_legal: false,
2554                is_manifest: false,
2555                is_readme: false,
2556                is_top_level: false,
2557                is_key_file: false,
2558                is_community: false,
2559                is_generated: None,
2560                facets: vec![],
2561                tallies: None,
2562            },
2563            FileInfo {
2564                name: "outside.py".to_string(),
2565                base_name: "outside".to_string(),
2566                extension: "py".to_string(),
2567                path: "outside.py".to_string(),
2568                file_type: FileType::File,
2569                mime_type: None,
2570                size: 10,
2571                date: None,
2572                sha1: None,
2573                md5: None,
2574                sha256: None,
2575                programming_language: None,
2576                package_data: vec![],
2577                license_expression: None,
2578                license_detections: vec![],
2579                copyrights: vec![],
2580                holders: vec![],
2581                authors: vec![],
2582                emails: vec![],
2583                urls: vec![],
2584                for_packages: vec![],
2585                scan_errors: vec![],
2586                is_source: None,
2587                source_count: None,
2588                is_legal: false,
2589                is_manifest: false,
2590                is_readme: false,
2591                is_top_level: false,
2592                is_key_file: false,
2593                is_community: false,
2594                is_generated: None,
2595                facets: vec![],
2596                tallies: None,
2597            },
2598        ];
2599
2600        let mut packages = vec![Package {
2601            package_type: Some(PackageType::Pypi),
2602            namespace: None,
2603            name: Some("PyJPString".to_string()),
2604            version: Some("0.0.3".to_string()),
2605            qualifiers: None,
2606            subpath: None,
2607            primary_language: None,
2608            description: None,
2609            release_date: None,
2610            parties: vec![],
2611            keywords: vec![],
2612            homepage_url: None,
2613            download_url: None,
2614            size: None,
2615            sha1: None,
2616            md5: None,
2617            sha256: None,
2618            sha512: None,
2619            bug_tracking_url: None,
2620            code_view_url: None,
2621            vcs_url: None,
2622            copyright: None,
2623            holder: None,
2624            declared_license_expression: None,
2625            declared_license_expression_spdx: None,
2626            license_detections: vec![],
2627            other_license_expression: None,
2628            other_license_expression_spdx: None,
2629            other_license_detections: vec![],
2630            extracted_license_statement: None,
2631            notice_text: None,
2632            source_packages: vec![],
2633            is_private: false,
2634            is_virtual: false,
2635            extra_data: None,
2636            repository_homepage_url: None,
2637            repository_download_url: None,
2638            api_data_url: None,
2639            purl: Some("pkg:pypi/PyJPString@0.0.3".to_string()),
2640            package_uid: "pkg:pypi/PyJPString@0.0.3?uuid=test-uuid".to_string(),
2641            datafile_paths: vec!["project/PyJPString.egg-info/PKG-INFO".to_string()],
2642            datasource_ids: vec![DatasourceId::PypiSdistPkginfo],
2643        }];
2644
2645        let mut dependencies = vec![];
2646
2647        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2648
2649        assert!(files[1].for_packages.is_empty());
2650        let missing = packages[0]
2651            .extra_data
2652            .as_ref()
2653            .and_then(|extra| extra.get("missing_file_references"))
2654            .and_then(|value| value.as_array())
2655            .expect("missing_file_references should be recorded");
2656        assert_eq!(missing.len(), 1);
2657    }
2658
2659    #[test]
2660    fn test_resolve_debian_installed_file_references_from_status_db() {
2661        let mut files = vec![
2662            FileInfo {
2663                name: "status".to_string(),
2664                base_name: "status".to_string(),
2665                extension: String::new(),
2666                path: "rootfs/var/lib/dpkg/status".to_string(),
2667                file_type: FileType::File,
2668                mime_type: None,
2669                size: 100,
2670                date: None,
2671                sha1: None,
2672                md5: None,
2673                sha256: None,
2674                programming_language: None,
2675                package_data: vec![PackageData {
2676                    datasource_id: Some(DatasourceId::DebianInstalledStatusDb),
2677                    package_type: Some(PackageType::Deb),
2678                    namespace: Some("debian".to_string()),
2679                    name: Some("bash".to_string()),
2680                    version: Some("5.2-1".to_string()),
2681                    purl: Some("pkg:deb/debian/bash@5.2-1?arch=amd64".to_string()),
2682                    ..Default::default()
2683                }],
2684                license_expression: None,
2685                license_detections: vec![],
2686                copyrights: vec![],
2687                holders: vec![],
2688                authors: vec![],
2689                emails: vec![],
2690                urls: vec![],
2691                for_packages: vec![],
2692                scan_errors: vec![],
2693                is_source: None,
2694                source_count: None,
2695                is_legal: false,
2696                is_manifest: false,
2697                is_readme: false,
2698                is_top_level: false,
2699                is_key_file: false,
2700                is_community: false,
2701                is_generated: None,
2702                facets: vec![],
2703                tallies: None,
2704            },
2705            FileInfo {
2706                name: "bash.list".to_string(),
2707                base_name: "bash".to_string(),
2708                extension: "list".to_string(),
2709                path: "rootfs/var/lib/dpkg/info/bash.list".to_string(),
2710                file_type: FileType::File,
2711                mime_type: None,
2712                size: 40,
2713                date: None,
2714                sha1: None,
2715                md5: None,
2716                sha256: None,
2717                programming_language: None,
2718                package_data: vec![PackageData {
2719                    datasource_id: Some(DatasourceId::DebianInstalledFilesList),
2720                    package_type: Some(PackageType::Deb),
2721                    namespace: Some("debian".to_string()),
2722                    name: Some("bash".to_string()),
2723                    purl: Some("pkg:deb/debian/bash".to_string()),
2724                    file_references: vec![
2725                        FileReference {
2726                            path: "/bin/bash".to_string(),
2727                            size: None,
2728                            sha1: None,
2729                            md5: None,
2730                            sha256: None,
2731                            sha512: None,
2732                            extra_data: None,
2733                        },
2734                        FileReference {
2735                            path: "/usr/share/doc/bash/copyright".to_string(),
2736                            size: None,
2737                            sha1: None,
2738                            md5: None,
2739                            sha256: None,
2740                            sha512: None,
2741                            extra_data: None,
2742                        },
2743                    ],
2744                    ..Default::default()
2745                }],
2746                license_expression: None,
2747                license_detections: vec![],
2748                copyrights: vec![],
2749                holders: vec![],
2750                authors: vec![],
2751                emails: vec![],
2752                urls: vec![],
2753                for_packages: vec![],
2754                scan_errors: vec![],
2755                is_source: None,
2756                source_count: None,
2757                is_legal: false,
2758                is_manifest: false,
2759                is_readme: false,
2760                is_top_level: false,
2761                is_key_file: false,
2762                is_community: false,
2763                is_generated: None,
2764                facets: vec![],
2765                tallies: None,
2766            },
2767            FileInfo {
2768                name: "bash.md5sums".to_string(),
2769                base_name: "bash".to_string(),
2770                extension: "md5sums".to_string(),
2771                path: "rootfs/var/lib/dpkg/info/bash.md5sums".to_string(),
2772                file_type: FileType::File,
2773                mime_type: None,
2774                size: 40,
2775                date: None,
2776                sha1: None,
2777                md5: None,
2778                sha256: None,
2779                programming_language: None,
2780                package_data: vec![PackageData {
2781                    datasource_id: Some(DatasourceId::DebianInstalledMd5Sums),
2782                    package_type: Some(PackageType::Deb),
2783                    namespace: Some("debian".to_string()),
2784                    name: Some("bash".to_string()),
2785                    purl: Some("pkg:deb/debian/bash".to_string()),
2786                    file_references: vec![FileReference {
2787                        path: "bin/bash".to_string(),
2788                        size: None,
2789                        sha1: None,
2790                        md5: Some("77506afebd3b7e19e937a678a185b62e".to_string()),
2791                        sha256: None,
2792                        sha512: None,
2793                        extra_data: None,
2794                    }],
2795                    ..Default::default()
2796                }],
2797                license_expression: None,
2798                license_detections: vec![],
2799                copyrights: vec![],
2800                holders: vec![],
2801                authors: vec![],
2802                emails: vec![],
2803                urls: vec![],
2804                for_packages: vec![],
2805                scan_errors: vec![],
2806                is_source: None,
2807                source_count: None,
2808                is_legal: false,
2809                is_manifest: false,
2810                is_readme: false,
2811                is_top_level: false,
2812                is_key_file: false,
2813                is_community: false,
2814                is_generated: None,
2815                facets: vec![],
2816                tallies: None,
2817            },
2818            FileInfo {
2819                name: "bash".to_string(),
2820                base_name: "bash".to_string(),
2821                extension: String::new(),
2822                path: "rootfs/bin/bash".to_string(),
2823                file_type: FileType::File,
2824                mime_type: None,
2825                size: 20,
2826                date: None,
2827                sha1: None,
2828                md5: None,
2829                sha256: None,
2830                programming_language: None,
2831                package_data: vec![],
2832                license_expression: None,
2833                license_detections: vec![],
2834                copyrights: vec![],
2835                holders: vec![],
2836                authors: vec![],
2837                emails: vec![],
2838                urls: vec![],
2839                for_packages: vec![],
2840                scan_errors: vec![],
2841                is_source: None,
2842                source_count: None,
2843                is_legal: false,
2844                is_manifest: false,
2845                is_readme: false,
2846                is_top_level: false,
2847                is_key_file: false,
2848                is_community: false,
2849                is_generated: None,
2850                facets: vec![],
2851                tallies: None,
2852            },
2853            FileInfo {
2854                name: "copyright".to_string(),
2855                base_name: "copyright".to_string(),
2856                extension: String::new(),
2857                path: "rootfs/usr/share/doc/bash/copyright".to_string(),
2858                file_type: FileType::File,
2859                mime_type: None,
2860                size: 20,
2861                date: None,
2862                sha1: None,
2863                md5: None,
2864                sha256: None,
2865                programming_language: None,
2866                package_data: vec![],
2867                license_expression: None,
2868                license_detections: vec![],
2869                copyrights: vec![],
2870                holders: vec![],
2871                authors: vec![],
2872                emails: vec![],
2873                urls: vec![],
2874                for_packages: vec![],
2875                scan_errors: vec![],
2876                is_source: None,
2877                source_count: None,
2878                is_legal: false,
2879                is_manifest: false,
2880                is_readme: false,
2881                is_top_level: false,
2882                is_key_file: false,
2883                is_community: false,
2884                is_generated: None,
2885                facets: vec![],
2886                tallies: None,
2887            },
2888        ];
2889
2890        let mut packages = vec![Package {
2891            package_type: Some(PackageType::Deb),
2892            namespace: Some("debian".to_string()),
2893            name: Some("bash".to_string()),
2894            version: Some("5.2-1".to_string()),
2895            qualifiers: Some(HashMap::from([("arch".to_string(), "amd64".to_string())])),
2896            subpath: None,
2897            primary_language: None,
2898            description: None,
2899            release_date: None,
2900            parties: vec![],
2901            keywords: vec![],
2902            homepage_url: None,
2903            download_url: None,
2904            size: None,
2905            sha1: None,
2906            md5: None,
2907            sha256: None,
2908            sha512: None,
2909            bug_tracking_url: None,
2910            code_view_url: None,
2911            vcs_url: None,
2912            copyright: None,
2913            holder: None,
2914            declared_license_expression: None,
2915            declared_license_expression_spdx: None,
2916            license_detections: vec![],
2917            other_license_expression: None,
2918            other_license_expression_spdx: None,
2919            other_license_detections: vec![],
2920            extracted_license_statement: None,
2921            notice_text: None,
2922            source_packages: vec![],
2923            is_private: false,
2924            is_virtual: false,
2925            extra_data: None,
2926            repository_homepage_url: None,
2927            repository_download_url: None,
2928            api_data_url: None,
2929            purl: Some("pkg:deb/debian/bash@5.2-1?arch=amd64".to_string()),
2930            package_uid: "pkg:deb/debian/bash@5.2-1?arch=amd64&uuid=test-uuid".to_string(),
2931            datafile_paths: vec!["rootfs/var/lib/dpkg/status".to_string()],
2932            datasource_ids: vec![DatasourceId::DebianInstalledStatusDb],
2933        }];
2934
2935        let mut dependencies = vec![];
2936        resolve_file_references(&mut files, &mut packages, &mut dependencies);
2937
2938        assert_eq!(
2939            files[3].for_packages,
2940            vec!["pkg:deb/debian/bash@5.2-1?arch=amd64&uuid=test-uuid".to_string()]
2941        );
2942        assert_eq!(
2943            files[4].for_packages,
2944            vec!["pkg:deb/debian/bash@5.2-1?arch=amd64&uuid=test-uuid".to_string()]
2945        );
2946    }
2947
2948    #[test]
2949    fn test_resolve_debian_installed_file_references_matches_ubuntu_package_namespace() {
2950        let mut files = vec![
2951            FileInfo {
2952                name: "status".to_string(),
2953                base_name: "status".to_string(),
2954                extension: String::new(),
2955                path: "rootfs/var/lib/dpkg/status".to_string(),
2956                file_type: FileType::File,
2957                mime_type: None,
2958                size: 100,
2959                date: None,
2960                sha1: None,
2961                md5: None,
2962                sha256: None,
2963                programming_language: None,
2964                package_data: vec![PackageData {
2965                    datasource_id: Some(DatasourceId::DebianInstalledStatusDb),
2966                    package_type: Some(PackageType::Deb),
2967                    namespace: Some("ubuntu".to_string()),
2968                    name: Some("bash".to_string()),
2969                    version: Some("5.2-1ubuntu1".to_string()),
2970                    purl: Some("pkg:deb/ubuntu/bash@5.2-1ubuntu1?arch=amd64".to_string()),
2971                    ..Default::default()
2972                }],
2973                license_expression: None,
2974                license_detections: vec![],
2975                copyrights: vec![],
2976                holders: vec![],
2977                authors: vec![],
2978                emails: vec![],
2979                urls: vec![],
2980                for_packages: vec![],
2981                scan_errors: vec![],
2982                is_source: None,
2983                source_count: None,
2984                is_legal: false,
2985                is_manifest: false,
2986                is_readme: false,
2987                is_top_level: false,
2988                is_key_file: false,
2989                is_community: false,
2990                is_generated: None,
2991                facets: vec![],
2992                tallies: None,
2993            },
2994            FileInfo {
2995                name: "bash.list".to_string(),
2996                base_name: "bash".to_string(),
2997                extension: "list".to_string(),
2998                path: "rootfs/var/lib/dpkg/info/bash.list".to_string(),
2999                file_type: FileType::File,
3000                mime_type: None,
3001                size: 40,
3002                date: None,
3003                sha1: None,
3004                md5: None,
3005                sha256: None,
3006                programming_language: None,
3007                package_data: vec![PackageData {
3008                    datasource_id: Some(DatasourceId::DebianInstalledFilesList),
3009                    package_type: Some(PackageType::Deb),
3010                    namespace: Some("debian".to_string()),
3011                    name: Some("bash".to_string()),
3012                    purl: Some("pkg:deb/debian/bash".to_string()),
3013                    file_references: vec![FileReference {
3014                        path: "/bin/bash".to_string(),
3015                        size: None,
3016                        sha1: None,
3017                        md5: None,
3018                        sha256: None,
3019                        sha512: None,
3020                        extra_data: None,
3021                    }],
3022                    ..Default::default()
3023                }],
3024                license_expression: None,
3025                license_detections: vec![],
3026                copyrights: vec![],
3027                holders: vec![],
3028                authors: vec![],
3029                emails: vec![],
3030                urls: vec![],
3031                for_packages: vec![],
3032                scan_errors: vec![],
3033                is_source: None,
3034                source_count: None,
3035                is_legal: false,
3036                is_manifest: false,
3037                is_readme: false,
3038                is_top_level: false,
3039                is_key_file: false,
3040                is_community: false,
3041                is_generated: None,
3042                facets: vec![],
3043                tallies: None,
3044            },
3045            FileInfo {
3046                name: "bash".to_string(),
3047                base_name: "bash".to_string(),
3048                extension: String::new(),
3049                path: "rootfs/bin/bash".to_string(),
3050                file_type: FileType::File,
3051                mime_type: None,
3052                size: 20,
3053                date: None,
3054                sha1: None,
3055                md5: None,
3056                sha256: None,
3057                programming_language: None,
3058                package_data: vec![],
3059                license_expression: None,
3060                license_detections: vec![],
3061                copyrights: vec![],
3062                holders: vec![],
3063                authors: vec![],
3064                emails: vec![],
3065                urls: vec![],
3066                for_packages: vec![],
3067                scan_errors: vec![],
3068                is_source: None,
3069                source_count: None,
3070                is_legal: false,
3071                is_manifest: false,
3072                is_readme: false,
3073                is_top_level: false,
3074                is_key_file: false,
3075                is_community: false,
3076                is_generated: None,
3077                facets: vec![],
3078                tallies: None,
3079            },
3080        ];
3081
3082        let mut packages = vec![Package {
3083            package_type: Some(PackageType::Deb),
3084            namespace: Some("ubuntu".to_string()),
3085            name: Some("bash".to_string()),
3086            version: Some("5.2-1ubuntu1".to_string()),
3087            qualifiers: Some(HashMap::from([("arch".to_string(), "amd64".to_string())])),
3088            subpath: None,
3089            primary_language: None,
3090            description: None,
3091            release_date: None,
3092            parties: vec![],
3093            keywords: vec![],
3094            homepage_url: None,
3095            download_url: None,
3096            size: None,
3097            sha1: None,
3098            md5: None,
3099            sha256: None,
3100            sha512: None,
3101            bug_tracking_url: None,
3102            code_view_url: None,
3103            vcs_url: None,
3104            copyright: None,
3105            holder: None,
3106            declared_license_expression: None,
3107            declared_license_expression_spdx: None,
3108            license_detections: vec![],
3109            other_license_expression: None,
3110            other_license_expression_spdx: None,
3111            other_license_detections: vec![],
3112            extracted_license_statement: None,
3113            notice_text: None,
3114            source_packages: vec![],
3115            is_private: false,
3116            is_virtual: false,
3117            extra_data: None,
3118            repository_homepage_url: None,
3119            repository_download_url: None,
3120            api_data_url: None,
3121            purl: Some("pkg:deb/ubuntu/bash@5.2-1ubuntu1?arch=amd64".to_string()),
3122            package_uid: "pkg:deb/ubuntu/bash@5.2-1ubuntu1?arch=amd64&uuid=test-uuid".to_string(),
3123            datafile_paths: vec!["rootfs/var/lib/dpkg/status".to_string()],
3124            datasource_ids: vec![DatasourceId::DebianInstalledStatusDb],
3125        }];
3126
3127        let mut dependencies = vec![];
3128        resolve_file_references(&mut files, &mut packages, &mut dependencies);
3129
3130        assert_eq!(
3131            files[2].for_packages,
3132            vec!["pkg:deb/ubuntu/bash@5.2-1ubuntu1?arch=amd64&uuid=test-uuid".to_string()]
3133        );
3134    }
3135
3136    #[test]
3137    fn test_resolve_debian_installed_file_references_respects_arch_qualifier() {
3138        let mut files = vec![
3139            FileInfo {
3140                name: "status".to_string(),
3141                base_name: "status".to_string(),
3142                extension: String::new(),
3143                path: "rootfs/var/lib/dpkg/status".to_string(),
3144                file_type: FileType::File,
3145                mime_type: None,
3146                size: 100,
3147                date: None,
3148                sha1: None,
3149                md5: None,
3150                sha256: None,
3151                programming_language: None,
3152                package_data: vec![PackageData {
3153                    datasource_id: Some(DatasourceId::DebianInstalledStatusDb),
3154                    package_type: Some(PackageType::Deb),
3155                    namespace: Some("debian".to_string()),
3156                    name: Some("libc6".to_string()),
3157                    version: Some("2.36-1".to_string()),
3158                    purl: Some("pkg:deb/debian/libc6@2.36-1?arch=amd64".to_string()),
3159                    qualifiers: Some(HashMap::from([("arch".to_string(), "amd64".to_string())])),
3160                    ..Default::default()
3161                }],
3162                license_expression: None,
3163                license_detections: vec![],
3164                copyrights: vec![],
3165                holders: vec![],
3166                authors: vec![],
3167                emails: vec![],
3168                urls: vec![],
3169                for_packages: vec![],
3170                scan_errors: vec![],
3171                is_source: None,
3172                source_count: None,
3173                is_legal: false,
3174                is_manifest: false,
3175                is_readme: false,
3176                is_top_level: false,
3177                is_key_file: false,
3178                is_community: false,
3179                is_generated: None,
3180                facets: vec![],
3181                tallies: None,
3182            },
3183            FileInfo {
3184                name: "libc6:amd64.list".to_string(),
3185                base_name: "libc6:amd64".to_string(),
3186                extension: "list".to_string(),
3187                path: "rootfs/var/lib/dpkg/info/libc6:amd64.list".to_string(),
3188                file_type: FileType::File,
3189                mime_type: None,
3190                size: 20,
3191                date: None,
3192                sha1: None,
3193                md5: None,
3194                sha256: None,
3195                programming_language: None,
3196                package_data: vec![PackageData {
3197                    datasource_id: Some(DatasourceId::DebianInstalledFilesList),
3198                    package_type: Some(PackageType::Deb),
3199                    namespace: Some("debian".to_string()),
3200                    name: Some("libc6".to_string()),
3201                    qualifiers: Some(HashMap::from([("arch".to_string(), "amd64".to_string())])),
3202                    purl: Some("pkg:deb/debian/libc6?arch=amd64".to_string()),
3203                    file_references: vec![FileReference {
3204                        path: "/lib/x86_64-linux-gnu/libc.so.6".to_string(),
3205                        size: None,
3206                        sha1: None,
3207                        md5: None,
3208                        sha256: None,
3209                        sha512: None,
3210                        extra_data: None,
3211                    }],
3212                    ..Default::default()
3213                }],
3214                license_expression: None,
3215                license_detections: vec![],
3216                copyrights: vec![],
3217                holders: vec![],
3218                authors: vec![],
3219                emails: vec![],
3220                urls: vec![],
3221                for_packages: vec![],
3222                scan_errors: vec![],
3223                is_source: None,
3224                source_count: None,
3225                is_legal: false,
3226                is_manifest: false,
3227                is_readme: false,
3228                is_top_level: false,
3229                is_key_file: false,
3230                is_community: false,
3231                is_generated: None,
3232                facets: vec![],
3233                tallies: None,
3234            },
3235            FileInfo {
3236                name: "libc6:i386.list".to_string(),
3237                base_name: "libc6:i386".to_string(),
3238                extension: "list".to_string(),
3239                path: "rootfs/var/lib/dpkg/info/libc6:i386.list".to_string(),
3240                file_type: FileType::File,
3241                mime_type: None,
3242                size: 20,
3243                date: None,
3244                sha1: None,
3245                md5: None,
3246                sha256: None,
3247                programming_language: None,
3248                package_data: vec![PackageData {
3249                    datasource_id: Some(DatasourceId::DebianInstalledFilesList),
3250                    package_type: Some(PackageType::Deb),
3251                    namespace: Some("debian".to_string()),
3252                    name: Some("libc6".to_string()),
3253                    qualifiers: Some(HashMap::from([("arch".to_string(), "i386".to_string())])),
3254                    purl: Some("pkg:deb/debian/libc6?arch=i386".to_string()),
3255                    file_references: vec![FileReference {
3256                        path: "/lib/i386-linux-gnu/libc.so.6".to_string(),
3257                        size: None,
3258                        sha1: None,
3259                        md5: None,
3260                        sha256: None,
3261                        sha512: None,
3262                        extra_data: None,
3263                    }],
3264                    ..Default::default()
3265                }],
3266                license_expression: None,
3267                license_detections: vec![],
3268                copyrights: vec![],
3269                holders: vec![],
3270                authors: vec![],
3271                emails: vec![],
3272                urls: vec![],
3273                for_packages: vec![],
3274                scan_errors: vec![],
3275                is_source: None,
3276                source_count: None,
3277                is_legal: false,
3278                is_manifest: false,
3279                is_readme: false,
3280                is_top_level: false,
3281                is_key_file: false,
3282                is_community: false,
3283                is_generated: None,
3284                facets: vec![],
3285                tallies: None,
3286            },
3287            FileInfo {
3288                name: "libc.so.6".to_string(),
3289                base_name: "libc.so".to_string(),
3290                extension: "6".to_string(),
3291                path: "rootfs/lib/x86_64-linux-gnu/libc.so.6".to_string(),
3292                file_type: FileType::File,
3293                mime_type: None,
3294                size: 10,
3295                date: None,
3296                sha1: None,
3297                md5: None,
3298                sha256: None,
3299                programming_language: None,
3300                package_data: vec![],
3301                license_expression: None,
3302                license_detections: vec![],
3303                copyrights: vec![],
3304                holders: vec![],
3305                authors: vec![],
3306                emails: vec![],
3307                urls: vec![],
3308                for_packages: vec![],
3309                scan_errors: vec![],
3310                is_source: None,
3311                source_count: None,
3312                is_legal: false,
3313                is_manifest: false,
3314                is_readme: false,
3315                is_top_level: false,
3316                is_key_file: false,
3317                is_community: false,
3318                is_generated: None,
3319                facets: vec![],
3320                tallies: None,
3321            },
3322            FileInfo {
3323                name: "libc.so.6".to_string(),
3324                base_name: "libc.so".to_string(),
3325                extension: "6".to_string(),
3326                path: "rootfs/lib/i386-linux-gnu/libc.so.6".to_string(),
3327                file_type: FileType::File,
3328                mime_type: None,
3329                size: 10,
3330                date: None,
3331                sha1: None,
3332                md5: None,
3333                sha256: None,
3334                programming_language: None,
3335                package_data: vec![],
3336                license_expression: None,
3337                license_detections: vec![],
3338                copyrights: vec![],
3339                holders: vec![],
3340                authors: vec![],
3341                emails: vec![],
3342                urls: vec![],
3343                for_packages: vec![],
3344                scan_errors: vec![],
3345                is_source: None,
3346                source_count: None,
3347                is_legal: false,
3348                is_manifest: false,
3349                is_readme: false,
3350                is_top_level: false,
3351                is_key_file: false,
3352                is_community: false,
3353                is_generated: None,
3354                facets: vec![],
3355                tallies: None,
3356            },
3357        ];
3358
3359        let mut packages = vec![Package {
3360            package_type: Some(PackageType::Deb),
3361            namespace: Some("debian".to_string()),
3362            name: Some("libc6".to_string()),
3363            version: Some("2.36-1".to_string()),
3364            qualifiers: Some(HashMap::from([("arch".to_string(), "amd64".to_string())])),
3365            subpath: None,
3366            primary_language: None,
3367            description: None,
3368            release_date: None,
3369            parties: vec![],
3370            keywords: vec![],
3371            homepage_url: None,
3372            download_url: None,
3373            size: None,
3374            sha1: None,
3375            md5: None,
3376            sha256: None,
3377            sha512: None,
3378            bug_tracking_url: None,
3379            code_view_url: None,
3380            vcs_url: None,
3381            copyright: None,
3382            holder: None,
3383            declared_license_expression: None,
3384            declared_license_expression_spdx: None,
3385            license_detections: vec![],
3386            other_license_expression: None,
3387            other_license_expression_spdx: None,
3388            other_license_detections: vec![],
3389            extracted_license_statement: None,
3390            notice_text: None,
3391            source_packages: vec![],
3392            is_private: false,
3393            is_virtual: false,
3394            extra_data: None,
3395            repository_homepage_url: None,
3396            repository_download_url: None,
3397            api_data_url: None,
3398            purl: Some("pkg:deb/debian/libc6@2.36-1?arch=amd64".to_string()),
3399            package_uid: "pkg:deb/debian/libc6@2.36-1?arch=amd64&uuid=test-uuid".to_string(),
3400            datafile_paths: vec!["rootfs/var/lib/dpkg/status".to_string()],
3401            datasource_ids: vec![DatasourceId::DebianInstalledStatusDb],
3402        }];
3403
3404        let mut dependencies = vec![];
3405        resolve_file_references(&mut files, &mut packages, &mut dependencies);
3406
3407        assert_eq!(
3408            files[3].for_packages,
3409            vec!["pkg:deb/debian/libc6@2.36-1?arch=amd64&uuid=test-uuid".to_string()]
3410        );
3411        assert!(files[4].for_packages.is_empty());
3412    }
3413}