cargo_license/
lib.rs

1use anyhow::Result;
2use cargo_metadata::{
3    CrateType, DepKindInfo, DependencyKind, Metadata, MetadataCommand, Node, NodeDep, Package,
4    PackageId,
5};
6use itertools::Itertools;
7use semver::Version;
8use serde_derive::Serialize;
9use spdx::expression::ExprNode;
10use spdx::LicenseReq;
11use std::cmp::Ordering;
12use std::collections::{HashMap, HashSet};
13use std::mem::swap;
14use std::{io, iter};
15
16#[derive(PartialEq, Eq, Debug)]
17enum LicenseTree<'a> {
18    License(&'a LicenseReq),
19    Or(Vec<LicenseTree<'a>>),
20    And(Vec<LicenseTree<'a>>),
21}
22
23impl PartialOrd for LicenseTree<'_> {
24    // Compare based on the string representation, therefore the name of the first license
25    //
26    // No specific preference for AND/OR priority
27    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
28        Some(self.cmp(other))
29    }
30}
31
32impl LicenseTree<'_> {
33    // Compare based on the string representation, therefore the name of the first license
34    //
35    // No specific preference for AND/OR priority
36    fn cmp(&self, other: &Self) -> Ordering {
37        // let mut ordering = Ordering::Equal;
38        let mut self_iter = self.license_iter();
39        let mut other_iter = other.license_iter();
40
41        loop {
42            let left = self_iter.next();
43            let right = other_iter.next();
44
45            match (left, right) {
46                (None, None) => return Ordering::Equal,
47                (Some(_), None) => return Ordering::Less,
48                (None, Some(_)) => return Ordering::Greater,
49                (Some(l), Some(r)) => match l.cmp(r) {
50                    Ordering::Equal => {}
51                    Ordering::Less => return Ordering::Less,
52                    Ordering::Greater => return Ordering::Greater,
53                },
54            }
55        }
56    }
57
58    pub fn normalize(&mut self) {
59        match self {
60            Self::License(_) => {}
61            Self::Or(nodes) => {
62                let mut acc = Vec::new();
63                for v in nodes.iter_mut() {
64                    v.normalize();
65                }
66                nodes.retain_mut(|v| {
67                    if let Self::Or(ref mut v) = v {
68                        acc.append(v);
69                        false
70                    } else {
71                        true
72                    }
73                });
74                nodes.append(&mut acc);
75                nodes.sort_by(Self::cmp);
76            }
77            Self::And(nodes) => {
78                let mut acc = Vec::new();
79                for v in nodes.iter_mut() {
80                    v.normalize();
81                }
82                nodes.retain_mut(|v| {
83                    if let Self::And(ref mut v) = v {
84                        acc.append(v);
85                        false
86                    } else {
87                        true
88                    }
89                });
90                nodes.append(&mut acc);
91                nodes.sort_by(Self::cmp);
92            }
93        }
94    }
95
96    fn license_iter(&self) -> Box<dyn Iterator<Item = &LicenseReq> + '_> {
97        match self {
98            Self::License(l) => Box::new(iter::once(*l)),
99            Self::Or(nodes) | Self::And(nodes) => {
100                Box::new(nodes.iter().flat_map(LicenseTree::license_iter))
101            }
102        }
103    }
104
105    fn serialize(&self) -> String {
106        let mut acc = String::new();
107        self.serialize_inner(&mut acc, false);
108        acc
109    }
110
111    fn serialize_inner(&self, acc: &mut String, is_first: bool) {
112        match self {
113            Self::License(l) => acc.push_str(&l.to_string()),
114            Self::Or(v) => {
115                let need_parentheses = v.len() != 1 && is_first;
116                if need_parentheses {
117                    acc.push('(');
118                }
119
120                let mut v_iter = v.iter().peekable();
121                while let Some(node) = v_iter.next() {
122                    node.serialize_inner(acc, true);
123                    if v_iter.peek().is_some() {
124                        acc.push_str(" OR ");
125                    }
126                }
127                if need_parentheses {
128                    acc.push(')');
129                }
130            }
131            Self::And(v) => {
132                let need_parentheses = v.len() != 1 && is_first;
133                if need_parentheses {
134                    acc.push('(');
135                }
136
137                let mut v_iter = v.iter().peekable();
138                while let Some(node) = v_iter.next() {
139                    node.serialize_inner(acc, true);
140                    if v_iter.peek().is_some() {
141                        acc.push_str(" AND ");
142                    }
143                }
144                if need_parentheses {
145                    acc.push(')');
146                }
147            }
148        }
149    }
150}
151
152#[must_use]
153pub fn normalize(license_string: &str) -> String {
154    let canon = spdx::Expression::canonicalize(license_string).unwrap_or_default();
155
156    let Ok(license) = spdx::Expression::parse_mode(
157        canon.as_deref().unwrap_or(license_string),
158        spdx::ParseMode::LAX,
159    ) else {
160        return license_string.into();
161    };
162
163    let mut req_stack = Vec::new();
164    let _: Vec<_> = license.iter().collect();
165    for op in license.iter() {
166        match op {
167            ExprNode::Req(r) => req_stack.push(LicenseTree::License(&r.req)),
168            ExprNode::Op(spdx::expression::Operator::Or) => {
169                let Some(mut left) = req_stack.pop() else {
170                    return license_string.into();
171                };
172                let Some(mut right) = req_stack.pop() else {
173                    return license_string.into();
174                };
175
176                // Order elements here based on the name of the first license that appears
177                if left > right {
178                    swap(&mut left, &mut right);
179                }
180
181                req_stack.push(LicenseTree::Or(vec![left, right]));
182            }
183            ExprNode::Op(spdx::expression::Operator::And) => {
184                let Some(mut left) = req_stack.pop() else {
185                    return license_string.into();
186                };
187                let Some(mut right) = req_stack.pop() else {
188                    return license_string.into();
189                };
190
191                // Order elements here based on the name of the first license that appears
192                if left > right {
193                    swap(&mut left, &mut right);
194                }
195
196                req_stack.push(LicenseTree::And(vec![left, right]));
197            }
198        }
199    }
200
201    let [ref mut tree] = &mut *req_stack else {
202        return license_string.into();
203    };
204
205    tree.normalize();
206    tree.serialize()
207}
208
209fn get_proc_macro_node_names(metadata: &Metadata, opt: &GetDependenciesOpt) -> HashSet<String> {
210    let mut proc_macros = HashSet::new();
211    if opt.avoid_proc_macros {
212        for packages in &metadata.packages {
213            for target in &packages.targets {
214                if target.crate_types.contains(&CrateType::ProcMacro) {
215                    proc_macros.insert(target.name.clone());
216                    for package in &packages.dependencies {
217                        proc_macros.insert(package.name.clone());
218                    }
219                }
220            }
221        }
222    }
223    proc_macros
224}
225
226fn get_node_name_filter(metadata: &Metadata, opt: &GetDependenciesOpt) -> HashSet<String> {
227    let mut filter = HashSet::new();
228
229    let roots = if let Some(root) = metadata.root_package() {
230        vec![root]
231    } else {
232        metadata.workspace_packages()
233    };
234
235    if opt.root_only {
236        for root in roots {
237            filter.insert(root.name.as_ref().into());
238        }
239        return filter;
240    }
241
242    if opt.direct_deps_only {
243        for root in roots {
244            filter.insert(root.name.as_ref().into());
245            for package in &root.dependencies {
246                filter.insert(package.name.clone());
247            }
248        }
249    }
250    filter
251}
252
253#[derive(Debug, Serialize, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)]
254pub struct DependencyDetails {
255    pub name: String,
256    pub version: semver::Version,
257    pub authors: Option<String>,
258    pub repository: Option<String>,
259    pub license: Option<String>,
260    pub license_file: Option<String>,
261    pub description: Option<String>,
262}
263
264impl DependencyDetails {
265    #[must_use]
266    pub fn new(package: &Package) -> Self {
267        let authors = if package.authors.is_empty() {
268            None
269        } else {
270            Some(package.authors.clone().join("|"))
271        };
272        Self {
273            name: package.name.as_str().into(),
274            version: package.version.clone(),
275            authors,
276            repository: package.repository.clone(),
277            license: package.license.as_ref().map(|s| normalize(s)),
278            license_file: package
279                .license_file
280                .clone()
281                .map(cargo_metadata::camino::Utf8PathBuf::into_string),
282            description: package
283                .description
284                .clone()
285                .map(|s| s.trim().replace('\n', " ")),
286        }
287    }
288}
289
290#[derive(Debug, Serialize, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)]
291struct GitlabDependency {
292    name: String,
293    version: Version,
294    package_manager: &'static str,
295    path: String,
296    licenses: Vec<&'static str>,
297}
298
299#[derive(Debug, Serialize, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)]
300struct GitlabLicense {
301    id: &'static str,
302    name: &'static str,
303    url: String,
304}
305
306impl GitlabLicense {
307    fn parse_licenses(dependency: &DependencyDetails) -> Result<HashSet<Self>> {
308        let Some(license) = &dependency.license else {
309            return Ok(HashSet::new());
310        };
311        let expression = spdx::Expression::parse_mode(license, spdx::ParseMode::LAX)?;
312        Ok(expression
313            .requirements()
314            .filter_map(|req| {
315                req.req.license.id().map(|license| Self {
316                    id: license.name,
317                    name: license.full_name,
318                    url: String::default(),
319                })
320            })
321            .collect())
322    }
323}
324
325#[derive(Debug, Serialize, Clone)]
326struct GitlabLicenseScanningReport {
327    version: &'static str,
328    licenses: HashSet<GitlabLicense>,
329    dependencies: Vec<GitlabDependency>,
330}
331
332impl TryFrom<&[DependencyDetails]> for GitlabLicenseScanningReport {
333    type Error = anyhow::Error;
334    fn try_from(dependencies: &[DependencyDetails]) -> Result<Self> {
335        let mut licenses = HashSet::new();
336        let dependencies = dependencies
337            .iter()
338            .cloned()
339            .map(|dependency| {
340                let dep_licenses = GitlabLicense::parse_licenses(&dependency)?;
341                let license_ids = dep_licenses.iter().map(|license| license.id).collect();
342                licenses.extend(dep_licenses);
343                Ok::<_, Self::Error>(GitlabDependency {
344                    name: dependency.name,
345                    version: dependency.version,
346                    package_manager: "cargo",
347                    path: String::default(),
348                    licenses: license_ids,
349                })
350            })
351            .try_collect()?;
352
353        Ok(GitlabLicenseScanningReport {
354            version: "2.1",
355            dependencies,
356            licenses,
357        })
358    }
359}
360
361// This is using bools as flags and all combinations are fine
362// It is not a state machine
363#[allow(clippy::struct_excessive_bools)]
364#[derive(Default)]
365pub struct GetDependenciesOpt {
366    pub avoid_dev_deps: bool,
367    pub avoid_build_deps: bool,
368    pub avoid_proc_macros: bool,
369    pub direct_deps_only: bool,
370    pub root_only: bool,
371}
372
373/// Get the list of dependencies from the Cargo.lock
374///
375/// # Errors
376///
377/// Will error if running the metadata command fails
378// Can't panic in normal operation
379#[allow(clippy::missing_panics_doc)]
380pub fn get_dependencies_from_cargo_lock(
381    metadata_command: &MetadataCommand,
382    opt: &GetDependenciesOpt,
383) -> Result<Vec<DependencyDetails>> {
384    let metadata = metadata_command.exec()?;
385
386    let node_name_filter = get_node_name_filter(&metadata, opt);
387    let proc_macro_exclusions = get_proc_macro_node_names(&metadata, opt);
388
389    let connected = {
390        let resolve = metadata.resolve.as_ref().expect("missing `resolve`");
391
392        let deps = resolve
393            .nodes
394            .iter()
395            .map(|Node { id, deps, .. }| (id, deps))
396            .collect::<HashMap<_, _>>();
397
398        let missing_dep_kinds = deps
399            .values()
400            .flat_map(|d| d.iter())
401            .any(|NodeDep { dep_kinds, .. }| dep_kinds.is_empty());
402
403        if missing_dep_kinds && opt.avoid_dev_deps {
404            eprintln!("warning: Cargo 1.41+ is required for `--avoid-dev-deps`");
405        }
406        if missing_dep_kinds && opt.avoid_build_deps {
407            eprintln!("warning: Cargo 1.41+ is required for `--avoid-build-deps`");
408        }
409
410        let neighbors = |package_id: &PackageId| {
411            deps[package_id]
412                .iter()
413                .filter(|NodeDep { dep_kinds, .. }| {
414                    missing_dep_kinds
415                        || dep_kinds.iter().any(|DepKindInfo { kind, .. }| {
416                            *kind == DependencyKind::Normal
417                                || !opt.avoid_dev_deps && *kind == DependencyKind::Development
418                                || !opt.avoid_build_deps && *kind == DependencyKind::Build
419                        })
420                })
421                .map(|NodeDep { pkg, .. }| pkg)
422        };
423
424        let mut connected = HashSet::new();
425        let stack = &mut if let Some(root) = &resolve.root {
426            vec![root]
427        } else {
428            metadata.workspace_members.iter().collect()
429        };
430        while let Some(package_id) = stack.pop() {
431            if connected.insert(package_id) {
432                stack.extend(neighbors(package_id));
433            }
434        }
435        connected
436    };
437
438    let mut detailed_dependencies = metadata
439        .packages
440        .iter()
441        .filter(|p| connected.contains(&p.id))
442        .filter(|p| node_name_filter.is_empty() || node_name_filter.contains(p.name.as_ref()))
443        .filter(|p| !proc_macro_exclusions.contains(p.name.as_ref()))
444        .map(DependencyDetails::new)
445        .collect::<Vec<_>>();
446    detailed_dependencies.sort_unstable();
447    Ok(detailed_dependencies)
448}
449
450/// Write the dependency information in a tab-separated format to the output writer.
451///
452/// # Errors
453///
454/// Will error if output writer is closed
455pub fn write_tsv(
456    dependencies: &[DependencyDetails],
457    output_writer: Box<dyn io::Write>,
458) -> Result<()> {
459    let mut wtr = csv::WriterBuilder::new()
460        .delimiter(b'\t')
461        .quote_style(csv::QuoteStyle::Necessary)
462        .from_writer(output_writer);
463    for dependency in dependencies {
464        wtr.serialize(dependency)?;
465    }
466    wtr.flush()?;
467    Ok(())
468}
469
470/// Write the dependency information in JSON format to output writer
471///
472/// # Errors
473///
474/// Will error if output writer is closed
475pub fn write_json(
476    dependencies: &[DependencyDetails],
477    output_writer: &mut Box<dyn io::Write>,
478) -> Result<()> {
479    writeln!(
480        output_writer,
481        "{}",
482        serde_json::to_string_pretty(&dependencies)?
483    )?;
484    Ok(())
485}
486
487/// Write the dependency information in the Gitlab license scanning format to output writer
488///
489/// # Errors
490///
491/// Will error if output writer is closed
492pub fn write_gitlab(
493    dependencies: &[DependencyDetails],
494    output_writer: &mut Box<dyn io::Write>,
495) -> Result<()> {
496    let dependencies = GitlabLicenseScanningReport::try_from(dependencies)?;
497    writeln!(
498        output_writer,
499        "{}",
500        serde_json::to_string_pretty(&dependencies)?
501    )?;
502
503    Ok(())
504}
505
506#[cfg(test)]
507mod test {
508    use super::*;
509
510    #[test]
511    fn test_detailed() {
512        let cmd = MetadataCommand::new();
513        let detailed_dependencies =
514            get_dependencies_from_cargo_lock(&cmd, &GetDependenciesOpt::default()).unwrap();
515        assert!(!detailed_dependencies.is_empty());
516        for detailed_dependency in &detailed_dependencies {
517            assert!(
518                detailed_dependency.license.is_some() || detailed_dependency.license_file.is_some()
519            );
520        }
521    }
522
523    #[test]
524    fn test_normalize() {
525        let tests = [
526            ("MIT", "MIT"),
527            ("MIT OR Apache-2.0", "Apache-2.0 OR MIT"),
528            ("Apache-2.0 OR MIT", "Apache-2.0 OR MIT"),
529            (
530                "(Apache-2.0 OR MIT) AND Apache-2.0",
531                "(Apache-2.0 OR MIT) AND Apache-2.0",
532            ),
533            ("(Apache-2.0 OR MIT) AND MIT", "(Apache-2.0 OR MIT) AND MIT"),
534            (
535                "Apache-2.0 AND (Apache-2.0 OR MIT)",
536                "(Apache-2.0 OR MIT) AND Apache-2.0",
537            ),
538            ("MIT AND (Apache-2.0 OR MIT)", "(Apache-2.0 OR MIT) AND MIT"),
539            (
540                "(Apache-2.0 AND Unicode-3.0) OR (Apache-2.0 AND MIT)",
541                "(Apache-2.0 AND MIT) OR (Apache-2.0 AND Unicode-3.0)",
542            ),
543            (
544                "Unicode-3.0 AND (MIT OR Apache-2.0)",
545                "(Apache-2.0 OR MIT) AND Unicode-3.0",
546            ),
547            (
548                "(MIT OR Apache-2.0) AND Unicode-3.0",
549                "(Apache-2.0 OR MIT) AND Unicode-3.0",
550            ),
551            ("MIT AND (MIT OR Apache-2.0)", "(Apache-2.0 OR MIT) AND MIT"),
552            (
553                "((((Apache-2.0 WITH LLVM-exception) OR (Apache-2.0)) AND (OpenSSL)) OR (MIT))",
554                "((Apache-2.0 OR Apache-2.0 WITH LLVM-exception) AND OpenSSL) OR MIT",
555            ),
556            (
557                "Borceux OR MIT AND BitTorrent-1.1",
558                "(BitTorrent-1.1 AND MIT) OR Borceux",
559            ),
560            ("Zlib OR Apache-2.0 OR MIT", "Apache-2.0 OR MIT OR Zlib"),
561            ("MIT OR Zlib OR Apache-2.0", "Apache-2.0 OR MIT OR Zlib"),
562            (
563                "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT",
564                "Apache-2.0 OR Apache-2.0 WITH LLVM-exception OR MIT",
565            ),
566        ];
567
568        for (i, o) in tests {
569            assert_eq!(normalize(i), o, "Input {i}");
570        }
571    }
572}