1use std::{
2 collections::HashSet,
3 fmt,
4 path::{Path, PathBuf},
5};
6
7use cargo_metadata::{Metadata, MetadataCommand, Package};
8use git2::{build::RepoBuilder, FetchOptions};
9use itertools::Itertools;
10use log::{debug, info, warn};
11use rayon::prelude::*;
12use serde::{Deserialize, Serialize};
13
14pub use crate::{
15 config::Config,
16 error::{Error, Result},
17};
18
19mod config;
20mod error;
21
22fn get_metadata(manifest_path: Option<impl Into<PathBuf>>) -> Result<Metadata> {
23 info!("Retrieving metadata");
24 let mut cmd = MetadataCommand::new();
25 if let Some(manifest_path) = manifest_path {
26 cmd.manifest_path(manifest_path);
27 }
28 let metadata = cmd.exec()?;
29 Ok(metadata)
30}
31
32fn get_packages(metadata: &Metadata) -> Vec<&Package> {
33 let Some(resolve) = &metadata.resolve else {
34 info!("No resolve, getting all packages");
35 return metadata.packages.iter().collect();
36 };
37 let Some(root) = &resolve.root else {
38 info!("No resolve root, getting all packages");
39 return metadata.packages.iter().collect();
40 };
41 let mut packages = Vec::new();
42 let mut to_eval = HashSet::from([root]);
43 while let Some(id) = to_eval.iter().next().copied() {
44 debug!("Evaluating {id}");
45 to_eval.remove(id);
46 let Some(package) = metadata.packages.iter().find(|a| a.id == *id) else {
47 continue;
48 };
49 packages.push(package);
50 let Some(node) = resolve.nodes.iter().find(|a| a.id == *id) else {
51 continue;
52 };
53 for dep in &node.deps {
54 if !packages.iter().any(|a| a.id == dep.pkg) {
55 to_eval.insert(&dep.pkg);
56 }
57 }
58 }
59 packages
60}
61
62fn extract_licenses_from_repo_folder(path: &Path) -> Result<Vec<String>> {
63 let mut licenses = vec![];
64 for entry in path.read_dir()? {
65 let entry = entry?;
66 let name = entry.file_name().to_string_lossy().to_ascii_lowercase();
67 if !name.contains("license")
68 && !name.contains("licence")
69 && !name.contains("copyright")
70 && !name.contains("copying")
71 {
72 continue;
73 }
74 info!("Found {}", entry.path().display());
75 if entry.file_type()?.is_dir() {
76 for entry2 in entry.path().read_dir()? {
77 let entry2 = entry2?;
78 if !entry2.file_type()?.is_dir() {
79 licenses.push(std::fs::read_to_string(entry2.path())?);
80 }
81 }
82 } else {
83 licenses.push(std::fs::read_to_string(entry.path())?);
84 }
85 }
86 Ok(licenses)
87}
88
89fn clone_repo(id: &str, repository: &str) -> Result<bool> {
90 let repository = repository
91 .strip_suffix('/')
92 .unwrap_or(repository)
93 .split("/tree/")
94 .next()
95 .unwrap();
96 let path = PathBuf::from(format!("{}/repo/{id}", std::env::var("OUT_DIR")?,));
97
98 if path.exists() {
99 return Ok(true);
100 }
101
102 info!("Cloning {repository} to {:?}", path.display());
103 if let Err(e) = RepoBuilder::new()
104 .fetch_options({
105 let mut fo = FetchOptions::new();
106 fo.depth(1);
107 fo
108 })
109 .clone(repository, &path)
110 {
111 if e.message() == "unexpected http status code: 404" {
112 warn!("Repo {repository} not found");
113 Ok(false)
114 } else {
115 Err(e.into())
116 }
117 } else {
118 Ok(true)
119 }
120}
121
122fn get_licenses(package: &Package) -> Result<Vec<String>> {
123 if let Some(license_file) = package.license_file() {
124 info!(
125 "Retrieving license file at {license_file:?} for {}",
126 package.name
127 );
128 return Ok(vec![std::fs::read_to_string(&license_file)?]);
129 }
130
131 let path = package
132 .manifest_path
133 .parent()
134 .unwrap_or(&package.manifest_path);
135 if path.exists() {
136 let licenses = extract_licenses_from_repo_folder(path.as_std_path())?;
137 if !licenses.is_empty() {
138 return Ok(licenses);
139 }
140 }
141
142 if let Some(repository) = &package.repository {
143 let folder = format!("{}-{}", package.name, package.version);
144 let can_eval = clone_repo(&folder, repository)?;
145 if can_eval {
146 let path = PathBuf::from(format!("{}/repo/{}", std::env::var("OUT_DIR")?, folder));
147 let paths = [
148 path.clone(),
149 path.join(&*package.name),
150 path.join("crates").join(&*package.name),
151 ];
152 for path in paths {
153 if path.exists() {
154 let licenses = extract_licenses_from_repo_folder(&path)?;
155 if !licenses.is_empty() {
156 return Ok(licenses);
157 }
158 }
159 }
160 }
161 }
162
163 if let Some(license) = &package.license {
164 let path = PathBuf::from(format!("{}/repo/@spdx", std::env::var("OUT_DIR")?));
165 let mut licenses = vec![];
166 for license in license
167 .replace(" AND ", " ")
168 .replace(" OR ", " ")
169 .replace(" WITH ", " ")
170 .replace(['(', ')'], "")
171 .replace('/', " ")
172 .split(' ')
173 {
174 let path2 = path.join("text").join(format!("{license}.txt"));
175 if path2.exists() {
176 info!("Found {}", path2.display());
177 licenses.push(std::fs::read_to_string(path2)?);
178 }
179 }
180 if !licenses.is_empty() {
181 return Ok(licenses);
182 }
183 }
184
185 Ok(vec![])
186}
187
188#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
189pub struct LicenseRetriever(Vec<(Package, Vec<String>)>);
190impl LicenseRetriever {
191 pub fn from_config(config: &Config) -> Result<Self> {
192 let metadata = get_metadata(config.manifest_path.as_ref())?;
193 let packages = get_packages(&metadata);
194
195 info!("Cloning spdx license repo");
196 clone_repo("@spdx", "https://github.com/spdx/license-list-data")?;
197
198 let licenses = packages
199 .into_par_iter()
200 .map(|a| {
201 if let Some(licenses) = config.overrides.get(&*a.name) {
202 return Ok((a.to_owned(), licenses.to_owned()));
203 }
204 Ok((a.to_owned(), get_licenses(a)?))
205 })
206 .collect::<Result<Vec<_>>>()?;
207
208 let no_license = licenses
209 .iter()
210 .filter(|(a, b)| b.is_empty() && !config.ignored_crates.contains(&*a.name))
211 .map(|(a, _)| &a.name)
212 .join(", ");
213 if !no_license.is_empty() {
214 if config.error_for_no_license {
215 return Err(Error::NoLicensesFound(no_license));
216 }
217 warn!("No licenses found for: {no_license}");
218 }
219
220 Ok(Self(licenses))
221 }
222
223 pub fn to_bytes(&self) -> Result<Vec<u8>> {
224 Ok(rmp_serde::to_vec_named(&self.0)?)
225 }
226 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
227 Ok(Self(rmp_serde::from_slice(bytes)?))
228 }
229
230 pub fn save_in_out_dir(&self, file_name: &str) -> Result<()> {
231 std::fs::write(
232 PathBuf::from(std::env::var("OUT_DIR")?).join(file_name),
233 self.to_bytes()?,
234 )?;
235 Ok(())
236 }
237
238 pub fn iter(&self) -> impl Iterator<Item = &<Self as IntoIterator>::Item> {
239 self.0.iter()
240 }
241}
242
243impl IntoIterator for LicenseRetriever {
244 type Item = (Package, Vec<String>);
245 type IntoIter = std::vec::IntoIter<Self::Item>;
246
247 fn into_iter(self) -> Self::IntoIter {
248 self.0.into_iter()
249 }
250}
251
252impl fmt::Display for LicenseRetriever {
253 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254 const SEPARATOR_WIDTH: usize = 80;
255 let separator = "=".repeat(SEPARATOR_WIDTH);
256
257 writeln!(f, "{separator}\n")?;
258
259 for (package, license) in self.iter() {
260 writeln!(f, "Package: {}", package.name)?;
261 writeln!(f, "Authors:")?;
262 for author in &package.authors {
263 writeln!(f, " - {author}")?;
264 }
265 writeln!(f, "\n{separator}\n")?;
266
267 for line in license {
268 writeln!(f, "{line}")?;
269 }
270
271 writeln!(f, "{separator}\n")?;
272 }
273
274 Ok(())
275 }
276}
277
278#[macro_export]
279macro_rules! license_retriever_data {
280 ($file_name:literal) => {
281 license_retriever::LicenseRetriever::from_bytes(include_bytes!(concat!(
282 env!("OUT_DIR"),
283 "/",
284 $file_name
285 )))
286 };
287}