scancode_rust/models/
file_info.rs

1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5use crate::utils::spdx::combine_license_expressions;
6
7#[derive(Debug, Builder, Serialize)]
8#[builder(build_fn(skip))]
9pub struct FileInfo {
10    pub name: String,
11    pub base_name: String,
12    pub extension: String,
13    pub path: String,
14    #[serde(rename = "type")] // name used by ScanCode
15    pub file_type: FileType,
16    #[builder(default)]
17    pub mime_type: Option<String>,
18    pub size: u64,
19    #[builder(default)]
20    pub date: Option<String>,
21    #[builder(default)]
22    pub sha1: Option<String>,
23    #[builder(default)]
24    pub md5: Option<String>,
25    #[builder(default)]
26    pub sha256: Option<String>,
27    #[builder(default)]
28    pub programming_language: Option<String>,
29    #[builder(default)]
30    pub package_data: Vec<PackageData>,
31    #[serde(rename = "detected_license_expression_spdx")] // name used by ScanCode
32    #[builder(default)]
33    pub license_expression: Option<String>,
34    #[builder(default)]
35    pub license_detections: Vec<LicenseDetection>,
36    #[builder(default)]
37    pub copyrights: Vec<Copyright>,
38    #[builder(default)]
39    pub urls: Vec<OutputURL>,
40    #[builder(default)]
41    pub scan_errors: Vec<String>,
42}
43
44impl FileInfoBuilder {
45    pub fn build(&self) -> Result<FileInfo, String> {
46        Ok(FileInfo::new(
47            self.name.clone().ok_or("Missing field: name")?,
48            self.base_name.clone().ok_or("Missing field: base_name")?,
49            self.extension.clone().ok_or("Missing field: extension")?,
50            self.path.clone().ok_or("Missing field: path")?,
51            self.file_type.clone().ok_or("Missing field: file_type")?,
52            self.mime_type.clone().flatten(),
53            self.size.clone().ok_or("Missing field: size")?,
54            self.date.clone().flatten(),
55            self.sha1.clone().flatten(),
56            self.md5.clone().flatten(),
57            self.sha256.clone().flatten(),
58            self.programming_language.clone().flatten(),
59            self.package_data.clone().unwrap_or_default(),
60            self.license_expression.clone().flatten(),
61            self.license_detections.clone().unwrap_or_default(),
62            self.copyrights.clone().unwrap_or_default(),
63            self.urls.clone().unwrap_or_default(),
64            self.scan_errors.clone().unwrap_or_default(),
65        ))
66    }
67}
68
69impl FileInfo {
70    pub fn new(
71        name: String,
72        base_name: String,
73        extension: String,
74        path: String,
75        file_type: FileType,
76        mime_type: Option<String>,
77        size: u64,
78        date: Option<String>,
79        sha1: Option<String>,
80        md5: Option<String>,
81        sha256: Option<String>,
82        programming_language: Option<String>,
83        package_data: Vec<PackageData>,
84        mut license_expression: Option<String>,
85        mut license_detections: Vec<LicenseDetection>,
86        copyrights: Vec<Copyright>,
87        urls: Vec<OutputURL>,
88        scan_errors: Vec<String>,
89    ) -> Self {
90        // Combine license expressions from package data if license_expression is None
91        license_expression = license_expression.or_else(|| {
92            let expressions = package_data
93                .iter()
94                .filter_map(|pkg| pkg.get_license_expression());
95            combine_license_expressions(expressions)
96        });
97
98        // Combine license detections from package data if none are provided
99        if license_detections.is_empty() {
100            for pkg in &package_data {
101                license_detections.extend(pkg.license_detections.clone());
102            }
103        }
104
105        // Combine license expressions from license detections if license_expression is still None
106        if license_expression.is_none() && !license_detections.is_empty() {
107            let expressions = license_detections
108                .iter()
109                .map(|detection| detection.license_expression.clone());
110            license_expression = combine_license_expressions(expressions);
111        }
112
113        FileInfo {
114            name,
115            base_name,
116            extension,
117            path,
118            file_type,
119            mime_type,
120            size,
121            date,
122            sha1,
123            md5,
124            sha256,
125            programming_language,
126            package_data,
127            license_expression,
128            license_detections,
129            copyrights,
130            urls,
131            scan_errors,
132        }
133    }
134}
135
136#[derive(Serialize, Debug, Clone)]
137pub struct PackageData {
138    #[serde(rename = "type")] // name used by ScanCode
139    pub package_type: Option<String>,
140    pub namespace: Option<String>,
141    pub name: Option<String>,
142    pub version: Option<String>,
143    pub homepage_url: Option<String>,
144    pub download_url: Option<String>,
145    pub copyright: Option<String>,
146    pub license_detections: Vec<LicenseDetection>,
147    pub dependencies: Vec<Dependency>,
148    pub parties: Vec<Party>,
149    pub purl: Option<String>,
150}
151
152impl PackageData {
153    /// Extracts a single license expression from all license detections in this package.
154    /// Returns None if there are no license detections.
155    pub fn get_license_expression(&self) -> Option<String> {
156        if self.license_detections.is_empty() {
157            return None;
158        }
159
160        let expressions = self
161            .license_detections
162            .iter()
163            .map(|detection| detection.license_expression.clone());
164        combine_license_expressions(expressions)
165    }
166}
167
168#[derive(Serialize, Debug, Clone)]
169pub struct LicenseDetection {
170    #[serde(rename = "license_expression_spdx")] // name used by ScanCode
171    pub license_expression: String,
172    pub matches: Vec<Match>,
173}
174
175#[derive(Serialize, Debug, Clone)]
176pub struct Match {
177    pub score: f64,
178    pub start_line: usize,
179    pub end_line: usize,
180    #[serde(rename = "license_expression_spdx")] // name used by ScanCode
181    pub license_expression: String,
182    pub rule_identifier: Option<String>,
183    pub matched_text: Option<String>,
184}
185
186#[derive(Serialize, Debug, Clone)]
187pub struct Copyright {
188    pub copyright: String,
189    pub start_line: usize,
190    pub end_line: usize,
191}
192
193#[derive(Serialize, Debug, Clone)]
194pub struct Dependency {
195    pub purl: Option<String>,
196    pub scope: Option<String>,
197    pub is_optional: bool,
198}
199
200#[derive(Serialize, Debug, Clone)]
201pub struct Party {
202    pub email: String,
203}
204
205#[derive(Serialize, Debug, Clone)]
206pub struct OutputURL {
207    pub url: String,
208}
209
210#[derive(Debug, Clone)]
211pub enum FileType {
212    File,
213    Directory,
214}
215
216impl Serialize for FileType {
217    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
218    where
219        S: serde::Serializer,
220    {
221        let value = match self {
222            FileType::File => "file",
223            FileType::Directory => "directory",
224        };
225        serializer.serialize_str(value)
226    }
227}