cargo_about/
licenses.rs

1pub mod config;
2pub mod fetch;
3pub mod resolution;
4mod scan;
5mod workarounds;
6
7use crate::{Krate, Krates};
8use anyhow::Context as _;
9use krates::Utf8PathBuf as PathBuf;
10use rayon::prelude::*;
11pub use resolution::Resolved;
12use spdx::detection as sd;
13use std::{cmp, fmt, sync::Arc};
14
15pub type LicenseStore = sd::Store;
16
17#[inline]
18pub fn store_from_cache() -> anyhow::Result<LicenseStore> {
19    sd::Store::load_inline().context("failed to load license store")
20}
21
22#[derive(Debug)]
23#[allow(clippy::large_enum_variant)]
24pub enum LicenseInfo {
25    Expr(spdx::Expression),
26    Unknown,
27    Ignore,
28}
29
30impl fmt::Display for LicenseInfo {
31    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32        match self {
33            LicenseInfo::Expr(expr) => write!(f, "{expr}"),
34            LicenseInfo::Unknown => write!(f, "Unknown"),
35            LicenseInfo::Ignore => write!(f, "Ignore"),
36        }
37    }
38}
39
40/// The contents of a file with license info in it
41pub enum LicenseFileKind {
42    /// The license file is the canonical text of the license
43    Text(String),
44    /// The license file is the canonical text, and applies to
45    /// a path root
46    AddendumText(String, PathBuf),
47    /// The file just has a license header, and presumably
48    /// also contains other text in it (like, you know, code)
49    Header,
50}
51
52pub struct LicenseFile {
53    /// The SPDX requirement expression detected for the file
54    pub license_expr: spdx::Expression,
55    /// Full path of the file which had license data in it
56    pub path: PathBuf,
57    /// The confidence score for the license, the closer to the canonical
58    /// license text it is, the closer it approaches 1.0
59    pub confidence: f32,
60    /// The contents of the file
61    pub kind: LicenseFileKind,
62}
63
64impl Ord for LicenseFile {
65    #[inline]
66    fn cmp(&self, o: &Self) -> cmp::Ordering {
67        match self.license_expr.as_ref().cmp(o.license_expr.as_ref()) {
68            cmp::Ordering::Equal => o
69                .confidence
70                .partial_cmp(&self.confidence)
71                .expect("NaN encountered comparing license confidences"),
72            ord => ord,
73        }
74    }
75}
76
77impl PartialOrd for LicenseFile {
78    #[inline]
79    fn partial_cmp(&self, o: &Self) -> Option<cmp::Ordering> {
80        Some(self.cmp(o))
81    }
82}
83
84impl PartialEq for LicenseFile {
85    #[inline]
86    fn eq(&self, o: &Self) -> bool {
87        self.cmp(o) == cmp::Ordering::Equal
88    }
89}
90
91impl Eq for LicenseFile {}
92
93pub struct KrateLicense<'krate> {
94    pub krate: &'krate Krate,
95    pub lic_info: LicenseInfo,
96    pub license_files: Vec<LicenseFile>,
97}
98
99impl Ord for KrateLicense<'_> {
100    #[inline]
101    fn cmp(&self, o: &Self) -> cmp::Ordering {
102        self.krate.cmp(o.krate)
103    }
104}
105
106impl PartialOrd for KrateLicense<'_> {
107    #[inline]
108    fn partial_cmp(&self, o: &Self) -> Option<cmp::Ordering> {
109        Some(self.cmp(o))
110    }
111}
112
113impl PartialEq for KrateLicense<'_> {
114    #[inline]
115    fn eq(&self, o: &Self) -> bool {
116        self.cmp(o) == cmp::Ordering::Equal
117    }
118}
119
120impl Eq for KrateLicense<'_> {}
121
122pub struct Gatherer {
123    store: Arc<LicenseStore>,
124    threshold: f32,
125    max_depth: Option<usize>,
126}
127
128impl Gatherer {
129    pub fn with_store(store: Arc<LicenseStore>) -> Self {
130        Self {
131            store,
132            threshold: 0.8,
133            max_depth: None,
134        }
135    }
136
137    pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
138        self.threshold = threshold.clamp(0.0, 1.0);
139        self
140    }
141
142    pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
143        self.max_depth = max_depth;
144        self
145    }
146
147    pub fn gather<'krate>(
148        self,
149        krates: &'krate Krates,
150        cfg: &config::Config,
151        client: Option<reqwest::blocking::Client>,
152    ) -> Vec<KrateLicense<'krate>> {
153        let mut licensed_krates = Vec::with_capacity(krates.len());
154
155        let threshold = self.threshold;
156        let min_threshold = threshold - 0.5;
157
158        let strategy = spdx::detection::scan::Scanner::new(&self.store)
159            .confidence_threshold(if min_threshold < 0.1 {
160                0.1
161            } else {
162                min_threshold
163            })
164            .optimize(false)
165            .max_passes(1);
166
167        let git_cache = fetch::GitCache::maybe_offline(client);
168
169        // If we're ignoring crates that are private, just add them
170        // to the list so all of the following gathers ignore them
171        if cfg.private.ignore {
172            for krate in krates.krates() {
173                if let Some(publish) = &krate.publish {
174                    if publish.is_empty()
175                        || publish
176                            .iter()
177                            .all(|reg| cfg.private.registries.contains(reg))
178                    {
179                        log::debug!("ignoring private crate '{krate}'");
180                        licensed_krates.push(KrateLicense {
181                            krate,
182                            lic_info: LicenseInfo::Ignore,
183                            license_files: Vec::new(),
184                        });
185                    }
186                }
187            }
188
189            licensed_krates.sort();
190        }
191
192        // Workarounds are built-in to cargo-about to deal with issues that certain
193        // common crates have
194        workarounds::apply_workarounds(krates, cfg, &git_cache, &mut licensed_krates);
195
196        // Clarifications are user supplied and thus take precedence over any
197        // machine gathered data
198        self.gather_clarified(krates, cfg, &git_cache, &mut licensed_krates);
199
200        // Finally, crawl the crate sources on disk to try and determine licenses
201        self.gather_file_system(krates, &strategy, &mut licensed_krates);
202
203        licensed_krates.sort();
204        licensed_krates
205    }
206
207    #[allow(clippy::unused_self)]
208    fn gather_clarified<'k>(
209        &self,
210        krates: &'k Krates,
211        cfg: &config::Config,
212        gc: &fetch::GitCache,
213        licensed_krates: &mut Vec<KrateLicense<'k>>,
214    ) {
215        for (krate, clarification) in krates.krates().filter_map(|krate| {
216            cfg.crates
217                .get(&krate.name)
218                .and_then(|kc| kc.clarify.as_ref())
219                .map(|cl| (krate, cl))
220        }) {
221            if let Err(i) = binary_search(licensed_krates, krate) {
222                match apply_clarification(gc, krate, clarification) {
223                    Ok(lic_files) => {
224                        log::debug!(
225                            "applying clarification expression '{}' to crate {krate}",
226                            clarification.license,
227                        );
228                        licensed_krates.insert(
229                            i,
230                            KrateLicense {
231                                krate,
232                                lic_info: LicenseInfo::Expr(clarification.license.clone()),
233                                license_files: lic_files,
234                            },
235                        );
236                    }
237                    Err(e) => {
238                        log::warn!(
239                            "failed to validate all files specified in clarification for crate {krate}: {e:#}"
240                        );
241                    }
242                }
243            }
244        }
245    }
246
247    fn gather_file_system<'k>(
248        &self,
249        krates: &'k Krates,
250        scanner: &sd::scan::Scanner<'_>,
251        licensed_krates: &mut Vec<KrateLicense<'k>>,
252    ) {
253        let threshold = self.threshold;
254        let max_depth = self.max_depth;
255
256        let mut gathered: Vec<_> = krates
257            .krates()
258            .par_bridge()
259            .filter_map(|krate| {
260                // Ignore crates that we've already gathered
261                if binary_search(licensed_krates, krate).is_ok() {
262                    return None;
263                }
264
265                let info = krate.get_license_expression();
266
267                let root_path = krate.manifest_path.parent().unwrap();
268
269                let mut license_files =
270                    match scan::scan_files(root_path, scanner, threshold, max_depth) {
271                        Ok(files) => files,
272                        Err(err) => {
273                            log::error!(
274                                "unable to scan for license files for crate '{} - {}': {err}",
275                                krate.name,
276                                krate.version,
277                            );
278
279                            Vec::new()
280                        }
281                    };
282
283                // Condense each license down to the best candidate if
284                // multiple are found
285                license_files.sort();
286
287                let mut expr = None;
288                license_files.retain(|lf| {
289                    if let Some(cur) = &expr {
290                        if *cur != lf.license_expr {
291                            expr = Some(lf.license_expr.clone());
292                            true
293                        } else {
294                            false
295                        }
296                    } else {
297                        expr = Some(lf.license_expr.clone());
298                        true
299                    }
300                });
301
302                Some(KrateLicense {
303                    krate,
304                    lic_info: info,
305                    license_files,
306                })
307            })
308            .collect();
309
310        licensed_krates.append(&mut gathered);
311    }
312}
313
314pub(crate) fn apply_clarification(
315    git_cache: &fetch::GitCache,
316    krate: &crate::Krate,
317    clarification: &config::Clarification,
318) -> anyhow::Result<Vec<LicenseFile>> {
319    anyhow::ensure!(
320        !clarification.files.is_empty() || !clarification.git.is_empty(),
321        "clarification for crate '{}' does not specify any valid LICENSE files to checksum",
322        krate.id
323    );
324
325    let root = krate.manifest_path.parent().unwrap();
326
327    let mut lic_files = Vec::with_capacity(clarification.files.len() + clarification.git.len());
328
329    let mut push = |contents: &str, cf: &config::ClarificationFile, license_path| {
330        anyhow::ensure!(
331            !contents.is_empty(),
332            "clarification file '{license_path}' is empty"
333        );
334
335        let start = match &cf.start {
336            Some(starts) => contents.find(starts).with_context(|| {
337                format!("failed to find subsection starting with '{starts}' in {license_path}")
338            })?,
339            None => 0,
340        };
341
342        let end = match &cf.end {
343            Some(ends) => {
344                contents[start..].find(ends).with_context(|| {
345                    format!("failed to find subsection ending with '{ends}' in {license_path}")
346                })? + start
347                    + ends.len()
348            }
349            None => contents.len(),
350        };
351
352        let text = &contents[start..end];
353
354        crate::validate_sha256(text, &cf.checksum)?;
355
356        let text = text.to_owned();
357
358        lic_files.push(LicenseFile {
359            path: cf.path.clone(),
360            confidence: 1.0,
361            license_expr: cf
362                .license
363                .as_ref()
364                .unwrap_or(&clarification.license)
365                .clone(),
366            kind: LicenseFileKind::Text(text),
367        });
368
369        Ok(())
370    };
371
372    for file in &clarification.files {
373        let license_path = root.join(&file.path);
374        let file_contents = std::fs::read_to_string(&license_path)
375            .with_context(|| format!("unable to read path '{license_path}'"))?;
376
377        push(&file_contents, file, license_path)?;
378    }
379
380    for file in &clarification.git {
381        let license_path = &file.path;
382
383        let contents = git_cache
384            .retrieve(krate, file, &clarification.override_git_commit)
385            .with_context(|| {
386                format!(
387                    "unable to retrieve '{license_path}' for crate '{krate}' from remote git host"
388                )
389            })?;
390
391        push(&contents, file, license_path.clone())?;
392    }
393
394    Ok(lic_files)
395}
396
397#[inline]
398pub fn binary_search<'krate>(
399    kl: &'krate [KrateLicense<'krate>],
400    krate: &Krate,
401) -> Result<(usize, &'krate KrateLicense<'krate>), usize> {
402    kl.binary_search_by(|k| k.krate.cmp(krate))
403        .map(|i| (i, &kl[i]))
404}