Skip to main content

scancode_rust/models/
file_info.rs

1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5use crate::utils::spdx::combine_license_expressions;
6
7#[derive(Debug, Builder, Serialize)]
8#[builder(build_fn(skip))]
9pub struct FileInfo {
10    pub name: String,
11    pub base_name: String,
12    pub extension: String,
13    pub path: String,
14    #[serde(rename = "type")] // name used by ScanCode
15    pub file_type: FileType,
16    #[builder(default)]
17    pub mime_type: Option<String>,
18    pub size: u64,
19    #[builder(default)]
20    pub date: Option<String>,
21    #[builder(default)]
22    pub sha1: Option<String>,
23    #[builder(default)]
24    pub md5: Option<String>,
25    #[builder(default)]
26    pub sha256: Option<String>,
27    #[builder(default)]
28    pub programming_language: Option<String>,
29    #[builder(default)]
30    pub package_data: Vec<PackageData>,
31    #[serde(rename = "detected_license_expression_spdx")] // name used by ScanCode
32    #[builder(default)]
33    pub license_expression: Option<String>,
34    #[builder(default)]
35    pub license_detections: Vec<LicenseDetection>,
36    #[builder(default)]
37    pub copyrights: Vec<Copyright>,
38    #[builder(default)]
39    pub urls: Vec<OutputURL>,
40    #[builder(default)]
41    pub scan_errors: Vec<String>,
42}
43
44impl FileInfoBuilder {
45    pub fn build(&self) -> Result<FileInfo, String> {
46        Ok(FileInfo::new(
47            self.name.clone().ok_or("Missing field: name")?,
48            self.base_name.clone().ok_or("Missing field: base_name")?,
49            self.extension.clone().ok_or("Missing field: extension")?,
50            self.path.clone().ok_or("Missing field: path")?,
51            self.file_type.clone().ok_or("Missing field: file_type")?,
52            self.mime_type.clone().flatten(),
53            self.size.ok_or("Missing field: size")?,
54            self.date.clone().flatten(),
55            self.sha1.clone().flatten(),
56            self.md5.clone().flatten(),
57            self.sha256.clone().flatten(),
58            self.programming_language.clone().flatten(),
59            self.package_data.clone().unwrap_or_default(),
60            self.license_expression.clone().flatten(),
61            self.license_detections.clone().unwrap_or_default(),
62            self.copyrights.clone().unwrap_or_default(),
63            self.urls.clone().unwrap_or_default(),
64            self.scan_errors.clone().unwrap_or_default(),
65        ))
66    }
67}
68
69impl FileInfo {
70    #[allow(clippy::too_many_arguments)]
71    pub fn new(
72        name: String,
73        base_name: String,
74        extension: String,
75        path: String,
76        file_type: FileType,
77        mime_type: Option<String>,
78        size: u64,
79        date: Option<String>,
80        sha1: Option<String>,
81        md5: Option<String>,
82        sha256: Option<String>,
83        programming_language: Option<String>,
84        package_data: Vec<PackageData>,
85        mut license_expression: Option<String>,
86        mut license_detections: Vec<LicenseDetection>,
87        copyrights: Vec<Copyright>,
88        urls: Vec<OutputURL>,
89        scan_errors: Vec<String>,
90    ) -> Self {
91        // Combine license expressions from package data if license_expression is None
92        license_expression = license_expression.or_else(|| {
93            let expressions = package_data
94                .iter()
95                .filter_map(|pkg| pkg.get_license_expression());
96            combine_license_expressions(expressions)
97        });
98
99        // Combine license detections from package data if none are provided
100        if license_detections.is_empty() {
101            for pkg in &package_data {
102                license_detections.extend(pkg.license_detections.clone());
103            }
104        }
105
106        // Combine license expressions from license detections if license_expression is still None
107        if license_expression.is_none() && !license_detections.is_empty() {
108            let expressions = license_detections
109                .iter()
110                .map(|detection| detection.license_expression.clone());
111            license_expression = combine_license_expressions(expressions);
112        }
113
114        FileInfo {
115            name,
116            base_name,
117            extension,
118            path,
119            file_type,
120            mime_type,
121            size,
122            date,
123            sha1,
124            md5,
125            sha256,
126            programming_language,
127            package_data,
128            license_expression,
129            license_detections,
130            copyrights,
131            urls,
132            scan_errors,
133        }
134    }
135}
136
137#[derive(Serialize, Debug, Clone)]
138pub struct PackageData {
139    #[serde(rename = "type")] // name used by ScanCode
140    pub package_type: Option<String>,
141    pub namespace: Option<String>,
142    pub name: Option<String>,
143    pub version: Option<String>,
144    pub homepage_url: Option<String>,
145    pub download_url: Option<String>,
146    pub copyright: Option<String>,
147    pub license_detections: Vec<LicenseDetection>,
148    pub dependencies: Vec<Dependency>,
149    pub parties: Vec<Party>,
150    pub purl: Option<String>,
151}
152
153impl PackageData {
154    /// Extracts a single license expression from all license detections in this package.
155    /// Returns None if there are no license detections.
156    pub fn get_license_expression(&self) -> Option<String> {
157        if self.license_detections.is_empty() {
158            return None;
159        }
160
161        let expressions = self
162            .license_detections
163            .iter()
164            .map(|detection| detection.license_expression.clone());
165        combine_license_expressions(expressions)
166    }
167}
168
169#[derive(Serialize, Debug, Clone)]
170pub struct LicenseDetection {
171    #[serde(rename = "license_expression_spdx")] // name used by ScanCode
172    pub license_expression: String,
173    pub matches: Vec<Match>,
174}
175
176#[derive(Serialize, Debug, Clone)]
177pub struct Match {
178    pub score: f64,
179    pub start_line: usize,
180    pub end_line: usize,
181    #[serde(rename = "license_expression_spdx")] // name used by ScanCode
182    pub license_expression: String,
183    pub rule_identifier: Option<String>,
184    pub matched_text: Option<String>,
185}
186
187#[derive(Serialize, Debug, Clone)]
188pub struct Copyright {
189    pub copyright: String,
190    pub start_line: usize,
191    pub end_line: usize,
192}
193
194#[derive(Serialize, Debug, Clone)]
195pub struct Dependency {
196    pub purl: Option<String>,
197    pub scope: Option<String>,
198    pub is_optional: bool,
199}
200
201#[derive(Serialize, Debug, Clone)]
202pub struct Party {
203    pub email: String,
204}
205
206#[derive(Serialize, Debug, Clone)]
207pub struct OutputURL {
208    pub url: String,
209}
210
211#[derive(Debug, Clone)]
212pub enum FileType {
213    File,
214    Directory,
215}
216
217impl Serialize for FileType {
218    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
219    where
220        S: serde::Serializer,
221    {
222        let value = match self {
223            FileType::File => "file",
224            FileType::Directory => "directory",
225        };
226        serializer.serialize_str(value)
227    }
228}