1pub mod config;
2pub mod fetch;
3pub mod resolution;
4mod scan;
5mod workarounds;
6
7use crate::{Krate, Krates};
8use anyhow::Context as _;
9use krates::Utf8PathBuf as PathBuf;
10use rayon::prelude::*;
11pub use resolution::Resolved;
12use spdx::detection as sd;
13use std::{cmp, fmt, sync::Arc};
14
15pub type LicenseStore = sd::Store;
16
17#[inline]
18pub fn store_from_cache() -> anyhow::Result<LicenseStore> {
19 sd::Store::load_inline().context("failed to load license store")
20}
21
22#[derive(Debug)]
23#[allow(clippy::large_enum_variant)]
24pub enum LicenseInfo {
25 Expr(spdx::Expression),
26 Unknown,
27 Ignore,
28}
29
30impl fmt::Display for LicenseInfo {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 match self {
33 LicenseInfo::Expr(expr) => write!(f, "{expr}"),
34 LicenseInfo::Unknown => write!(f, "Unknown"),
35 LicenseInfo::Ignore => write!(f, "Ignore"),
36 }
37 }
38}
39
40pub enum LicenseFileKind {
42 Text(String),
44 AddendumText(String, PathBuf),
47 Header,
50}
51
52pub struct LicenseFile {
53 pub license_expr: spdx::Expression,
55 pub path: PathBuf,
57 pub confidence: f32,
60 pub kind: LicenseFileKind,
62}
63
64impl Ord for LicenseFile {
65 #[inline]
66 fn cmp(&self, o: &Self) -> cmp::Ordering {
67 match self.license_expr.as_ref().cmp(o.license_expr.as_ref()) {
68 cmp::Ordering::Equal => o
69 .confidence
70 .partial_cmp(&self.confidence)
71 .expect("NaN encountered comparing license confidences"),
72 ord => ord,
73 }
74 }
75}
76
77impl PartialOrd for LicenseFile {
78 #[inline]
79 fn partial_cmp(&self, o: &Self) -> Option<cmp::Ordering> {
80 Some(self.cmp(o))
81 }
82}
83
84impl PartialEq for LicenseFile {
85 #[inline]
86 fn eq(&self, o: &Self) -> bool {
87 self.cmp(o) == cmp::Ordering::Equal
88 }
89}
90
91impl Eq for LicenseFile {}
92
93pub struct KrateLicense<'krate> {
94 pub krate: &'krate Krate,
95 pub lic_info: LicenseInfo,
96 pub license_files: Vec<LicenseFile>,
97}
98
99impl Ord for KrateLicense<'_> {
100 #[inline]
101 fn cmp(&self, o: &Self) -> cmp::Ordering {
102 self.krate.cmp(o.krate)
103 }
104}
105
106impl PartialOrd for KrateLicense<'_> {
107 #[inline]
108 fn partial_cmp(&self, o: &Self) -> Option<cmp::Ordering> {
109 Some(self.cmp(o))
110 }
111}
112
113impl PartialEq for KrateLicense<'_> {
114 #[inline]
115 fn eq(&self, o: &Self) -> bool {
116 self.cmp(o) == cmp::Ordering::Equal
117 }
118}
119
120impl Eq for KrateLicense<'_> {}
121
122pub struct Gatherer {
123 store: Arc<LicenseStore>,
124 threshold: f32,
125 max_depth: Option<usize>,
126}
127
128impl Gatherer {
129 pub fn with_store(store: Arc<LicenseStore>) -> Self {
130 Self {
131 store,
132 threshold: 0.8,
133 max_depth: None,
134 }
135 }
136
137 pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
138 self.threshold = threshold.clamp(0.0, 1.0);
139 self
140 }
141
142 pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
143 self.max_depth = max_depth;
144 self
145 }
146
147 pub fn gather<'krate>(
148 self,
149 krates: &'krate Krates,
150 cfg: &config::Config,
151 client: Option<reqwest::blocking::Client>,
152 ) -> Vec<KrateLicense<'krate>> {
153 let mut licensed_krates = Vec::with_capacity(krates.len());
154
155 let threshold = self.threshold;
156 let min_threshold = threshold - 0.5;
157
158 let strategy = spdx::detection::scan::Scanner::new(&self.store)
159 .confidence_threshold(if min_threshold < 0.1 {
160 0.1
161 } else {
162 min_threshold
163 })
164 .optimize(false)
165 .max_passes(1);
166
167 let git_cache = fetch::GitCache::maybe_offline(client);
168
169 if cfg.private.ignore {
172 for krate in krates.krates() {
173 if let Some(publish) = &krate.publish {
174 if publish.is_empty()
175 || publish
176 .iter()
177 .all(|reg| cfg.private.registries.contains(reg))
178 {
179 log::debug!("ignoring private crate '{krate}'");
180 licensed_krates.push(KrateLicense {
181 krate,
182 lic_info: LicenseInfo::Ignore,
183 license_files: Vec::new(),
184 });
185 }
186 }
187 }
188
189 licensed_krates.sort();
190 }
191
192 workarounds::apply_workarounds(krates, cfg, &git_cache, &mut licensed_krates);
195
196 self.gather_clarified(krates, cfg, &git_cache, &mut licensed_krates);
199
200 self.gather_file_system(krates, &strategy, &mut licensed_krates);
202
203 licensed_krates.sort();
204 licensed_krates
205 }
206
207 #[allow(clippy::unused_self)]
208 fn gather_clarified<'k>(
209 &self,
210 krates: &'k Krates,
211 cfg: &config::Config,
212 gc: &fetch::GitCache,
213 licensed_krates: &mut Vec<KrateLicense<'k>>,
214 ) {
215 for (krate, clarification) in krates.krates().filter_map(|krate| {
216 cfg.crates
217 .get(&krate.name)
218 .and_then(|kc| kc.clarify.as_ref())
219 .map(|cl| (krate, cl))
220 }) {
221 if let Err(i) = binary_search(licensed_krates, krate) {
222 match apply_clarification(gc, krate, clarification) {
223 Ok(lic_files) => {
224 log::debug!(
225 "applying clarification expression '{}' to crate {krate}",
226 clarification.license,
227 );
228 licensed_krates.insert(
229 i,
230 KrateLicense {
231 krate,
232 lic_info: LicenseInfo::Expr(clarification.license.clone()),
233 license_files: lic_files,
234 },
235 );
236 }
237 Err(e) => {
238 log::warn!(
239 "failed to validate all files specified in clarification for crate {krate}: {e:#}"
240 );
241 }
242 }
243 }
244 }
245 }
246
247 fn gather_file_system<'k>(
248 &self,
249 krates: &'k Krates,
250 scanner: &sd::scan::Scanner<'_>,
251 licensed_krates: &mut Vec<KrateLicense<'k>>,
252 ) {
253 let threshold = self.threshold;
254 let max_depth = self.max_depth;
255
256 let mut gathered: Vec<_> = krates
257 .krates()
258 .par_bridge()
259 .filter_map(|krate| {
260 if binary_search(licensed_krates, krate).is_ok() {
262 return None;
263 }
264
265 let info = krate.get_license_expression();
266
267 let root_path = krate.manifest_path.parent().unwrap();
268
269 let mut license_files =
270 match scan::scan_files(root_path, scanner, threshold, max_depth) {
271 Ok(files) => files,
272 Err(err) => {
273 log::error!(
274 "unable to scan for license files for crate '{} - {}': {err}",
275 krate.name,
276 krate.version,
277 );
278
279 Vec::new()
280 }
281 };
282
283 license_files.sort();
286
287 let mut expr = None;
288 license_files.retain(|lf| {
289 if let Some(cur) = &expr {
290 if *cur != lf.license_expr {
291 expr = Some(lf.license_expr.clone());
292 true
293 } else {
294 false
295 }
296 } else {
297 expr = Some(lf.license_expr.clone());
298 true
299 }
300 });
301
302 Some(KrateLicense {
303 krate,
304 lic_info: info,
305 license_files,
306 })
307 })
308 .collect();
309
310 licensed_krates.append(&mut gathered);
311 }
312}
313
314pub(crate) fn apply_clarification(
315 git_cache: &fetch::GitCache,
316 krate: &crate::Krate,
317 clarification: &config::Clarification,
318) -> anyhow::Result<Vec<LicenseFile>> {
319 anyhow::ensure!(
320 !clarification.files.is_empty() || !clarification.git.is_empty(),
321 "clarification for crate '{}' does not specify any valid LICENSE files to checksum",
322 krate.id
323 );
324
325 let root = krate.manifest_path.parent().unwrap();
326
327 let mut lic_files = Vec::with_capacity(clarification.files.len() + clarification.git.len());
328
329 let mut push = |contents: &str, cf: &config::ClarificationFile, license_path| {
330 anyhow::ensure!(
331 !contents.is_empty(),
332 "clarification file '{license_path}' is empty"
333 );
334
335 let start = match &cf.start {
336 Some(starts) => contents.find(starts).with_context(|| {
337 format!("failed to find subsection starting with '{starts}' in {license_path}")
338 })?,
339 None => 0,
340 };
341
342 let end = match &cf.end {
343 Some(ends) => {
344 contents[start..].find(ends).with_context(|| {
345 format!("failed to find subsection ending with '{ends}' in {license_path}")
346 })? + start
347 + ends.len()
348 }
349 None => contents.len(),
350 };
351
352 let text = &contents[start..end];
353
354 crate::validate_sha256(text, &cf.checksum)?;
355
356 let text = text.to_owned();
357
358 lic_files.push(LicenseFile {
359 path: cf.path.clone(),
360 confidence: 1.0,
361 license_expr: cf
362 .license
363 .as_ref()
364 .unwrap_or(&clarification.license)
365 .clone(),
366 kind: LicenseFileKind::Text(text),
367 });
368
369 Ok(())
370 };
371
372 for file in &clarification.files {
373 let license_path = root.join(&file.path);
374 let file_contents = std::fs::read_to_string(&license_path)
375 .with_context(|| format!("unable to read path '{license_path}'"))?;
376
377 push(&file_contents, file, license_path)?;
378 }
379
380 for file in &clarification.git {
381 let license_path = &file.path;
382
383 let contents = git_cache
384 .retrieve(krate, file, &clarification.override_git_commit)
385 .with_context(|| {
386 format!(
387 "unable to retrieve '{license_path}' for crate '{krate}' from remote git host"
388 )
389 })?;
390
391 push(&contents, file, license_path.clone())?;
392 }
393
394 Ok(lic_files)
395}
396
397#[inline]
398pub fn binary_search<'krate>(
399 kl: &'krate [KrateLicense<'krate>],
400 krate: &Krate,
401) -> Result<(usize, &'krate KrateLicense<'krate>), usize> {
402 kl.binary_search_by(|k| k.krate.cmp(krate))
403 .map(|i| (i, &kl[i]))
404}