license_retriever/
lib.rs

1use std::{
2    collections::HashSet,
3    fmt,
4    path::{Path, PathBuf},
5};
6
7use cargo_metadata::{Metadata, MetadataCommand, Package};
8use git2::{build::RepoBuilder, FetchOptions};
9use itertools::Itertools;
10use log::{debug, info, warn};
11use rayon::prelude::*;
12use serde::{Deserialize, Serialize};
13
14pub use crate::{
15    config::Config,
16    error::{Error, Result},
17};
18
19mod config;
20mod error;
21
22fn get_metadata(manifest_path: Option<impl Into<PathBuf>>) -> Result<Metadata> {
23    info!("Retrieving metadata");
24    let mut cmd = MetadataCommand::new();
25    if let Some(manifest_path) = manifest_path {
26        cmd.manifest_path(manifest_path);
27    }
28    let metadata = cmd.exec()?;
29    Ok(metadata)
30}
31
32fn get_packages(metadata: &Metadata) -> Vec<&Package> {
33    let Some(resolve) = &metadata.resolve else {
34        info!("No resolve, getting all packages");
35        return metadata.packages.iter().collect();
36    };
37    let Some(root) = &resolve.root else {
38        info!("No resolve root, getting all packages");
39        return metadata.packages.iter().collect();
40    };
41    let mut packages = Vec::new();
42    let mut to_eval = HashSet::from([root]);
43    while let Some(id) = to_eval.iter().next().copied() {
44        debug!("Evaluating {id}");
45        to_eval.remove(id);
46        let Some(package) = metadata.packages.iter().find(|a| a.id == *id) else {
47            continue;
48        };
49        packages.push(package);
50        let Some(node) = resolve.nodes.iter().find(|a| a.id == *id) else {
51            continue;
52        };
53        for dep in &node.deps {
54            if !packages.iter().any(|a| a.id == dep.pkg) {
55                to_eval.insert(&dep.pkg);
56            }
57        }
58    }
59    packages
60}
61
62fn extract_licenses_from_repo_folder(path: &Path) -> Result<Vec<String>> {
63    let mut licenses = vec![];
64    for entry in path.read_dir()? {
65        let entry = entry?;
66        let name = entry.file_name().to_string_lossy().to_ascii_lowercase();
67        if !name.contains("license")
68            && !name.contains("licence")
69            && !name.contains("copyright")
70            && !name.contains("copying")
71        {
72            continue;
73        }
74        info!("Found {}", entry.path().display());
75        if entry.file_type()?.is_dir() {
76            for entry2 in entry.path().read_dir()? {
77                let entry2 = entry2?;
78                if !entry2.file_type()?.is_dir() {
79                    licenses.push(std::fs::read_to_string(entry2.path())?);
80                }
81            }
82        } else {
83            licenses.push(std::fs::read_to_string(entry.path())?);
84        }
85    }
86    Ok(licenses)
87}
88
89fn clone_repo(id: &str, repository: &str) -> Result<bool> {
90    let repository = repository
91        .strip_suffix('/')
92        .unwrap_or(repository)
93        .split("/tree/")
94        .next()
95        .unwrap();
96    let path = PathBuf::from(format!("{}/repo/{id}", std::env::var("OUT_DIR")?,));
97
98    if path.exists() {
99        return Ok(true);
100    }
101
102    info!("Cloning {repository} to {:?}", path.display());
103    if let Err(e) = RepoBuilder::new()
104        .fetch_options({
105            let mut fo = FetchOptions::new();
106            fo.depth(1);
107            fo
108        })
109        .clone(repository, &path)
110    {
111        if e.message() == "unexpected http status code: 404" {
112            warn!("Repo {repository} not found");
113            Ok(false)
114        } else {
115            Err(e.into())
116        }
117    } else {
118        Ok(true)
119    }
120}
121
122fn get_licenses(package: &Package) -> Result<Vec<String>> {
123    if let Some(license_file) = package.license_file() {
124        info!(
125            "Retrieving license file at {license_file:?} for {}",
126            package.name
127        );
128        return Ok(vec![std::fs::read_to_string(&license_file)?]);
129    }
130
131    let path = package
132        .manifest_path
133        .parent()
134        .unwrap_or(&package.manifest_path);
135    if path.exists() {
136        let licenses = extract_licenses_from_repo_folder(path.as_std_path())?;
137        if !licenses.is_empty() {
138            return Ok(licenses);
139        }
140    }
141
142    if let Some(repository) = &package.repository {
143        let folder = format!("{}-{}", package.name, package.version);
144        let can_eval = clone_repo(&folder, repository)?;
145        if can_eval {
146            let path = PathBuf::from(format!("{}/repo/{}", std::env::var("OUT_DIR")?, folder));
147            let paths = [
148                path.clone(),
149                path.join(&*package.name),
150                path.join("crates").join(&*package.name),
151            ];
152            for path in paths {
153                if path.exists() {
154                    let licenses = extract_licenses_from_repo_folder(&path)?;
155                    if !licenses.is_empty() {
156                        return Ok(licenses);
157                    }
158                }
159            }
160        }
161    }
162
163    if let Some(license) = &package.license {
164        let path = PathBuf::from(format!("{}/repo/@spdx", std::env::var("OUT_DIR")?));
165        let mut licenses = vec![];
166        for license in license
167            .replace(" AND ", " ")
168            .replace(" OR ", " ")
169            .replace(" WITH ", " ")
170            .replace(['(', ')'], "")
171            .replace('/', " ")
172            .split(' ')
173        {
174            let path2 = path.join("text").join(format!("{license}.txt"));
175            if path2.exists() {
176                info!("Found {}", path2.display());
177                licenses.push(std::fs::read_to_string(path2)?);
178            }
179        }
180        if !licenses.is_empty() {
181            return Ok(licenses);
182        }
183    }
184
185    Ok(vec![])
186}
187
188#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
189pub struct LicenseRetriever(Vec<(Package, Vec<String>)>);
190impl LicenseRetriever {
191    pub fn from_config(config: &Config) -> Result<Self> {
192        let metadata = get_metadata(config.manifest_path.as_ref())?;
193        let packages = get_packages(&metadata);
194
195        info!("Cloning spdx license repo");
196        clone_repo("@spdx", "https://github.com/spdx/license-list-data")?;
197
198        let licenses = packages
199            .into_par_iter()
200            .map(|a| {
201                if let Some(licenses) = config.overrides.get(&*a.name) {
202                    return Ok((a.to_owned(), licenses.to_owned()));
203                }
204                Ok((a.to_owned(), get_licenses(a)?))
205            })
206            .collect::<Result<Vec<_>>>()?;
207
208        let no_license = licenses
209            .iter()
210            .filter(|(a, b)| b.is_empty() && !config.ignored_crates.contains(&*a.name))
211            .map(|(a, _)| &a.name)
212            .join(", ");
213        if !no_license.is_empty() {
214            if config.error_for_no_license {
215                return Err(Error::NoLicensesFound(no_license));
216            }
217            warn!("No licenses found for: {no_license}");
218        }
219
220        Ok(Self(licenses))
221    }
222
223    pub fn to_bytes(&self) -> Result<Vec<u8>> {
224        Ok(rmp_serde::to_vec_named(&self.0)?)
225    }
226    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
227        Ok(Self(rmp_serde::from_slice(bytes)?))
228    }
229
230    pub fn save_in_out_dir(&self, file_name: &str) -> Result<()> {
231        std::fs::write(
232            PathBuf::from(std::env::var("OUT_DIR")?).join(file_name),
233            self.to_bytes()?,
234        )?;
235        Ok(())
236    }
237
238    pub fn iter(&self) -> impl Iterator<Item = &<Self as IntoIterator>::Item> {
239        self.0.iter()
240    }
241}
242
243impl IntoIterator for LicenseRetriever {
244    type Item = (Package, Vec<String>);
245    type IntoIter = std::vec::IntoIter<Self::Item>;
246
247    fn into_iter(self) -> Self::IntoIter {
248        self.0.into_iter()
249    }
250}
251
252impl fmt::Display for LicenseRetriever {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        const SEPARATOR_WIDTH: usize = 80;
255        let separator = "=".repeat(SEPARATOR_WIDTH);
256
257        writeln!(f, "{separator}\n")?;
258
259        for (package, license) in self.iter() {
260            writeln!(f, "Package: {}", package.name)?;
261            writeln!(f, "Authors:")?;
262            for author in &package.authors {
263                writeln!(f, " - {author}")?;
264            }
265            writeln!(f, "\n{separator}\n")?;
266
267            for line in license {
268                writeln!(f, "{line}")?;
269            }
270
271            writeln!(f, "{separator}\n")?;
272        }
273
274        Ok(())
275    }
276}
277
278#[macro_export]
279macro_rules! license_retriever_data {
280    ($file_name:literal) => {
281        license_retriever::LicenseRetriever::from_bytes(include_bytes!(concat!(
282            env!("OUT_DIR"),
283            "/",
284            $file_name
285        )))
286    };
287}