python_packaging/
licensing.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use {
10    crate::{package_metadata::PythonPackageMetadata, resource::PythonResource},
11    anyhow::{anyhow, Context, Result},
12    spdx::{ExceptionId, Expression, LicenseId},
13    std::{
14        cmp::Ordering,
15        collections::{BTreeMap, BTreeSet},
16        fmt::{Display, Formatter},
17    },
18};
19
20/// System libraries that are safe to link against, ignoring copyleft license implications.
21pub const SAFE_SYSTEM_LIBRARIES: &[&str] = &[
22    "cabinet", "iphlpapi", "msi", "rpcrt4", "rt", "winmm", "ws2_32",
23];
24
25fn format_spdx(id: LicenseId, exception: Option<ExceptionId>, full: bool) -> String {
26    let name = if full { id.full_name } else { id.name };
27
28    if let Some(exception) = exception {
29        format!("{} WITH {}", name, exception.name)
30    } else {
31        name.to_string()
32    }
33}
34
35/// The type of a license.
36#[derive(Clone, Debug, PartialEq)]
37pub enum LicenseFlavor {
38    /// No explicit licensing defined.
39    None,
40
41    /// An SPDX license expression.
42    Spdx(Expression),
43
44    /// An SPDX expression that contain unknown license identifiers.
45    OtherExpression(Expression),
46
47    /// License is in the public domain.
48    PublicDomain,
49
50    /// Unknown licensing type with available string identifiers.
51    Unknown(Vec<String>),
52}
53
54/// Describes the type of a software component.
55#[derive(Clone, Debug)]
56pub enum ComponentFlavor {
57    /// A Python distribution.
58    PythonDistribution(String),
59    /// A Python module in the standard library.
60    PythonStandardLibraryModule(String),
61    /// A compiled Python extension module in the standard library.
62    PythonStandardLibraryExtensionModule(String),
63    /// A compiled Python extension module.
64    PythonExtensionModule(String),
65    /// A Python module.
66    PythonModule(String),
67    /// A generic software library.
68    Library(String),
69    /// A Rust crate.
70    RustCrate(String),
71}
72
73impl Display for ComponentFlavor {
74    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75        match self {
76            Self::PythonDistribution(name) => f.write_str(name),
77            Self::PythonStandardLibraryModule(name) => {
78                f.write_fmt(format_args!("Python stdlib module {}", name))
79            }
80            Self::PythonStandardLibraryExtensionModule(name) => {
81                f.write_fmt(format_args!("Python stdlib extension {}", name))
82            }
83            Self::PythonExtensionModule(name) => {
84                f.write_fmt(format_args!("Python extension module {}", name))
85            }
86            Self::PythonModule(name) => f.write_fmt(format_args!("Python module {}", name)),
87            Self::Library(name) => f.write_fmt(format_args!("library {}", name)),
88            Self::RustCrate(name) => f.write_fmt(format_args!("Rust crate {}", name)),
89        }
90    }
91}
92
93impl PartialEq for ComponentFlavor {
94    fn eq(&self, other: &Self) -> bool {
95        // If both entities have a Python module name, equivalence is whether
96        // the module names agree, as there can only be a single entity for a given
97        // module name.
98        match (self.python_module_name(), other.python_module_name()) {
99            (Some(a), Some(b)) => a.eq(b),
100            // Comparing a module with a non-module is always not equivalent.
101            (Some(_), None) => false,
102            (None, Some(_)) => false,
103            (None, None) => match (self, other) {
104                (Self::PythonDistribution(a), Self::PythonDistribution(b)) => a.eq(b),
105                (Self::Library(a), Self::Library(b)) => a.eq(b),
106                (Self::RustCrate(a), Self::RustCrate(b)) => a.eq(b),
107                _ => false,
108            },
109        }
110    }
111}
112
113impl Eq for ComponentFlavor {}
114
115impl PartialOrd for ComponentFlavor {
116    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
117        match (self.python_module_name(), other.python_module_name()) {
118            (Some(a), Some(b)) => a.partial_cmp(b),
119            _ => {
120                let a = (self.ordinal_value(), self.to_string());
121                let b = (other.ordinal_value(), other.to_string());
122
123                a.partial_cmp(&b)
124            }
125        }
126    }
127}
128
129impl Ord for ComponentFlavor {
130    fn cmp(&self, other: &Self) -> Ordering {
131        self.partial_cmp(other).unwrap()
132    }
133}
134
135impl ComponentFlavor {
136    fn ordinal_value(&self) -> u8 {
137        match self {
138            Self::PythonDistribution(_) => 0,
139            ComponentFlavor::PythonStandardLibraryModule(_) => 1,
140            ComponentFlavor::PythonStandardLibraryExtensionModule(_) => 2,
141            ComponentFlavor::PythonExtensionModule(_) => 3,
142            ComponentFlavor::PythonModule(_) => 4,
143            ComponentFlavor::Library(_) => 5,
144            ComponentFlavor::RustCrate(_) => 6,
145        }
146    }
147
148    /// Whether this component is part of the Python standard library.
149    pub fn is_python_standard_library(&self) -> bool {
150        match self {
151            Self::PythonDistribution(_) => false,
152            Self::PythonStandardLibraryModule(_) => true,
153            Self::PythonStandardLibraryExtensionModule(_) => true,
154            Self::PythonExtensionModule(_) => true,
155            Self::PythonModule(_) => false,
156            Self::Library(_) => false,
157            Self::RustCrate(_) => false,
158        }
159    }
160
161    pub fn python_module_name(&self) -> Option<&str> {
162        match self {
163            Self::PythonDistribution(_) => None,
164            Self::PythonStandardLibraryModule(name) => Some(name.as_str()),
165            Self::PythonStandardLibraryExtensionModule(name) => Some(name.as_str()),
166            Self::PythonExtensionModule(name) => Some(name.as_str()),
167            Self::PythonModule(name) => Some(name.as_str()),
168            Self::Library(_) => None,
169            Self::RustCrate(_) => None,
170        }
171    }
172
173    /// Whether the component is part of a Python distribution.
174    pub fn is_python_distribution_component(&self) -> bool {
175        matches!(
176            self,
177            Self::PythonDistribution(_)
178                | Self::PythonStandardLibraryModule(_)
179                | Self::PythonStandardLibraryExtensionModule(_)
180        )
181    }
182}
183
184/// Where source code for a component can be obtained from.
185#[derive(Clone, Debug, PartialEq, Eq)]
186pub enum SourceLocation {
187    /// Source code is not available.
188    NotSet,
189    /// Source code is available at a URL.
190    Url(String),
191}
192
193/// Represents a software component with licensing information.
194#[derive(Clone, Debug)]
195pub struct LicensedComponent {
196    /// Type of component.
197    flavor: ComponentFlavor,
198
199    /// The type of license.
200    license: LicenseFlavor,
201
202    /// Location where source code for this component can be obtained.
203    source_location: SourceLocation,
204
205    /// Homepage for project.
206    homepage: Option<String>,
207
208    /// List of authors.
209    authors: Vec<String>,
210
211    /// Specified license text for this component.
212    ///
213    /// If empty, license texts will be derived from SPDX identifiers, if available.
214    license_texts: Vec<String>,
215}
216
217impl PartialEq for LicensedComponent {
218    fn eq(&self, other: &Self) -> bool {
219        self.flavor.eq(&other.flavor)
220    }
221}
222
223impl Eq for LicensedComponent {}
224
225impl PartialOrd for LicensedComponent {
226    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
227        self.flavor.partial_cmp(&other.flavor)
228    }
229}
230
231impl Ord for LicensedComponent {
232    fn cmp(&self, other: &Self) -> Ordering {
233        self.flavor.cmp(&other.flavor)
234    }
235}
236
237impl LicensedComponent {
238    /// Construct a new instance from parameters.
239    pub fn new(flavor: ComponentFlavor, license: LicenseFlavor) -> Self {
240        Self {
241            flavor,
242            license,
243            source_location: SourceLocation::NotSet,
244            homepage: None,
245            authors: vec![],
246            license_texts: vec![],
247        }
248    }
249
250    /// Construct a new instance from an SPDX expression.
251    pub fn new_spdx(flavor: ComponentFlavor, spdx_expression: &str) -> Result<Self> {
252        let spdx_expression = Expression::parse(spdx_expression).map_err(|e| anyhow!("{}", e))?;
253
254        let license = if spdx_expression.evaluate(|req| req.license.id().is_some()) {
255            LicenseFlavor::Spdx(spdx_expression)
256        } else {
257            LicenseFlavor::OtherExpression(spdx_expression)
258        };
259
260        Ok(Self::new(flavor, license))
261    }
262
263    /// The type of this component.
264    pub fn flavor(&self) -> &ComponentFlavor {
265        &self.flavor
266    }
267
268    /// Obtain the flavor of license for this component.
269    pub fn license(&self) -> &LicenseFlavor {
270        &self.license
271    }
272
273    /// Obtain the SPDX expression for this component's license.
274    pub fn spdx_expression(&self) -> Option<&Expression> {
275        match &self.license {
276            LicenseFlavor::Spdx(expression) => Some(expression),
277            LicenseFlavor::OtherExpression(expression) => Some(expression),
278            LicenseFlavor::None | LicenseFlavor::PublicDomain | LicenseFlavor::Unknown(_) => None,
279        }
280    }
281
282    /// Whether the SPDX expression is simple.
283    ///
284    /// Simple is defined as having at most a single license.
285    pub fn is_simple_spdx_expression(&self) -> bool {
286        if let LicenseFlavor::Spdx(expression) = &self.license {
287            expression.iter().count() < 2
288        } else {
289            false
290        }
291    }
292
293    /// Obtain the location where the source of this component can be obtained.
294    pub fn source_location(&self) -> &SourceLocation {
295        &self.source_location
296    }
297
298    /// Define where source code for this component can be obtained from.
299    pub fn set_source_location(&mut self, location: SourceLocation) {
300        self.source_location = location;
301    }
302
303    /// Obtain the homepage / URL of this component.
304    pub fn homepage(&self) -> Option<&str> {
305        self.homepage.as_deref()
306    }
307
308    /// Set the homepage of this component.
309    pub fn set_homepage(&mut self, value: impl ToString) {
310        self.homepage = Some(value.to_string());
311    }
312
313    /// Obtain the annotated authors of this component.
314    pub fn authors(&self) -> &[String] {
315        &self.authors
316    }
317
318    /// Define an author of this component.
319    pub fn add_author(&mut self, value: impl ToString) {
320        self.authors.push(value.to_string());
321    }
322
323    /// Obtain the explicitly set license texts for this component.
324    pub fn license_texts(&self) -> &Vec<String> {
325        &self.license_texts
326    }
327
328    /// Define the license text for this component.
329    pub fn add_license_text(&mut self, text: impl ToString) {
330        self.license_texts.push(text.to_string());
331    }
332
333    /// Returns whether all license identifiers are SPDX.
334    pub fn is_spdx(&self) -> bool {
335        matches!(self.license, LicenseFlavor::Spdx(_))
336    }
337
338    /// Obtain all SPDX licenses referenced by this component.
339    ///
340    /// The first element of the returned tuple is the license identifier. The 2nd
341    /// is an optional exclusion identifier.
342    pub fn all_spdx_licenses(&self) -> BTreeSet<(LicenseId, Option<ExceptionId>)> {
343        match &self.license {
344            LicenseFlavor::Spdx(expression) => expression
345                .requirements()
346                .map(|req| (req.req.license.id().unwrap(), req.req.exception))
347                .collect::<BTreeSet<_>>(),
348            LicenseFlavor::OtherExpression(expression) => expression
349                .requirements()
350                .filter_map(|req| req.req.license.id().map(|id| (id, req.req.exception)))
351                .collect::<BTreeSet<_>>(),
352            LicenseFlavor::None | LicenseFlavor::PublicDomain | LicenseFlavor::Unknown(_) => {
353                BTreeSet::new()
354            }
355        }
356    }
357
358    /// Obtain all SPDX license names.
359    pub fn all_spdx_license_names(&self, full: bool) -> Vec<String> {
360        self.all_spdx_licenses()
361            .into_iter()
362            .map(|(id, exception)| format_spdx(id, exception, full))
363            .collect::<Vec<_>>()
364    }
365
366    /// Obtain all the distinct [LicenseId] in this component.
367    ///
368    /// Unlike [Self::all_spdx_licenses()], this returns just the license IDs without exceptions.
369    pub fn all_spdx_license_ids(&self) -> BTreeSet<LicenseId> {
370        self.all_spdx_licenses()
371            .into_iter()
372            .map(|(lid, _)| lid)
373            .collect::<BTreeSet<_>>()
374    }
375
376    /// Obtain all the [ExceptionId] present in this component.
377    pub fn all_spdx_exception_ids(&self) -> BTreeSet<ExceptionId> {
378        self.all_spdx_licenses()
379            .into_iter()
380            .filter_map(|(_, id)| id)
381            .collect::<BTreeSet<_>>()
382    }
383
384    /// Whether the component has any copyleft licenses.
385    pub fn has_copyleft(&self) -> bool {
386        self.all_spdx_licenses()
387            .into_iter()
388            .any(|(id, _)| id.is_copyleft())
389    }
390
391    /// Whether all licenses are copyleft.
392    pub fn is_always_copyleft(&self) -> bool {
393        let licenses = self.all_spdx_licenses();
394
395        if licenses.is_empty() {
396            false
397        } else {
398            licenses.into_iter().all(|(id, _)| id.is_copyleft())
399        }
400    }
401
402    /// Obtain a textual licensing summary of this component.
403    pub fn licensing_summary(&self) -> String {
404        let mut lines = vec![];
405
406        if !self.authors().is_empty() {
407            lines.push(format!("Authors: {}", self.authors().join(", ")));
408        }
409        if let Some(value) = self.homepage() {
410            lines.push(format!("Homepage: {}", value));
411        }
412        match self.source_location() {
413            SourceLocation::NotSet => {}
414            SourceLocation::Url(value) => {
415                lines.push(format!("Source location: {}", value));
416            }
417        }
418
419        match self.license() {
420            LicenseFlavor::None => {
421                lines.push("No licensing information available.".into());
422            }
423            LicenseFlavor::Spdx(expression) | LicenseFlavor::OtherExpression(expression) => {
424                lines.push(format!(
425                    "Licensed according to SPDX expression: {}",
426                    expression
427                ));
428            }
429            LicenseFlavor::PublicDomain => {
430                lines.push("Licensed to the public domain.".into());
431            }
432            LicenseFlavor::Unknown(terms) => {
433                lines.push(format!("Licensed according to {}", terms.join(", ")));
434            }
435        }
436
437        lines.join("\n")
438    }
439}
440
441/// A collection of licensed components.
442#[derive(Clone, Debug, Default, PartialEq, Eq)]
443pub struct LicensedComponents {
444    /// The collection of components, indexed by its flavor.
445    components: BTreeMap<ComponentFlavor, LicensedComponent>,
446}
447
448impl LicensedComponents {
449    /// Obtain an iterator over the components, consuming self.
450    pub fn into_components(self) -> impl Iterator<Item = LicensedComponent> {
451        self.components.into_values()
452    }
453
454    /// Iterate over components in this collection.
455    pub fn iter_components(&self) -> impl Iterator<Item = &LicensedComponent> {
456        self.components.values()
457    }
458
459    /// Add a component to this collection.
460    pub fn add_component(&mut self, component: LicensedComponent) {
461        self.components.insert(component.flavor.clone(), component);
462    }
463
464    /// Add a component to this collection, but only if it only contains SPDX license identifiers.
465    pub fn add_spdx_only_component(&mut self, component: LicensedComponent) -> Result<()> {
466        if component.is_spdx() {
467            self.add_component(component);
468            Ok(())
469        } else {
470            Err(anyhow!("component has non-SPDX license identifiers"))
471        }
472    }
473
474    /// Whether a Python module exists in the collection.
475    pub fn has_python_module(&self, name: &str) -> bool {
476        // ComponentFlavor are equivalent if the Python module name is the same,
477        // even if the enum variant is different.
478        self.components
479            .contains_key(&ComponentFlavor::PythonModule(name.into()))
480    }
481
482    /// Adjusts Python modules in the components set.
483    ///
484    /// Standard library modules that have identical licensing to the Python
485    /// distribution are removed.
486    ///
487    /// Missing top-level packages are added with an unknown license annotation.
488    ///
489    /// Modules that aren't top-level modules are removed.
490    pub fn normalize_python_modules(&self) -> Self {
491        let distribution = self
492            .components
493            .values()
494            .find(|c| matches!(c.flavor(), ComponentFlavor::PythonDistribution(_)));
495
496        let mut top_level_names = BTreeSet::new();
497        let mut components = Self::default();
498
499        let filtered = self.components.iter().filter(|(k, v)| {
500            // Remove standard library modules with licensing identical to the distribution.
501            if k.is_python_standard_library() {
502                if let Some(distribution) = distribution {
503                    if v.license() == distribution.license() {
504                        return false;
505                    }
506                }
507            }
508
509            if let Some(name) = k.python_module_name() {
510                let top_level_name = if let Some((name, _)) = name.split_once('.') {
511                    name
512                } else {
513                    name
514                };
515
516                top_level_names.insert(top_level_name.to_string());
517            }
518
519            true
520        });
521
522        for (_, component) in filtered {
523            components.add_component(component.clone());
524        }
525
526        // Ensure top-level modules are present.
527        for name in top_level_names {
528            if !components.has_python_module(&name) {
529                components.add_component(LicensedComponent::new(
530                    ComponentFlavor::PythonModule(name.to_string()),
531                    LicenseFlavor::None,
532                ));
533            }
534        }
535
536        // Filter non top-levels from the list.
537        components.components =
538            BTreeMap::from_iter(components.components.into_iter().filter(|(k, _)| {
539                if let Some(name) = k.python_module_name() {
540                    if name.contains('.') {
541                        return false;
542                    }
543                }
544
545                true
546            }));
547
548        components
549    }
550
551    /// Obtain all SPDX license identifiers referenced by registered components.
552    pub fn all_spdx_licenses(&self) -> BTreeSet<(LicenseId, Option<ExceptionId>)> {
553        self.components
554            .values()
555            .flat_map(|component| component.all_spdx_licenses())
556            .collect::<BTreeSet<_>>()
557    }
558
559    /// Obtain all SPDX license IDs referenced by all components.
560    ///
561    /// Unlike [Self::all_spdx_licenses()], this returns just the [LicenseId], without exceptions.
562    pub fn all_spdx_license_ids(&self) -> BTreeSet<LicenseId> {
563        self.components
564            .values()
565            .flat_map(|component| component.all_spdx_license_ids())
566            .collect::<BTreeSet<_>>()
567    }
568
569    /// Obtain all SPDX license names referenced by registered components.
570    pub fn all_spdx_license_names(&self, full: bool) -> Vec<String> {
571        self.iter_components()
572            .flat_map(|c| c.all_spdx_license_names(full))
573            .collect::<BTreeSet<_>>()
574            .into_iter()
575            .collect::<Vec<_>>()
576    }
577
578    /// Obtain a mapping of all SPDX licenses to components having them.
579    pub fn components_by_spdx_license(
580        &self,
581    ) -> BTreeMap<(LicenseId, Option<ExceptionId>), BTreeSet<&LicensedComponent>> {
582        let mut res = BTreeMap::new();
583
584        for component in self.iter_components() {
585            for key in component.all_spdx_licenses() {
586                res.entry(key)
587                    .or_insert_with(BTreeSet::new)
588                    .insert(component);
589            }
590        }
591
592        res
593    }
594
595    /// Obtain all components with valid SPDX license expressions.
596    pub fn license_spdx_components(&self) -> impl Iterator<Item = &LicensedComponent> {
597        self.components
598            .values()
599            .filter(|c| matches!(c.license(), &LicenseFlavor::Spdx(_)))
600    }
601
602    /// Obtain components that are missing license annotations.
603    pub fn license_missing_components(&self) -> impl Iterator<Item = &LicensedComponent> {
604        self.components
605            .values()
606            .filter(|c| c.license() == &LicenseFlavor::None)
607    }
608
609    /// Obtain components that are licensed to the public domain.
610    pub fn license_public_domain_components(&self) -> impl Iterator<Item = &LicensedComponent> {
611        self.components
612            .values()
613            .filter(|c| c.license() == &LicenseFlavor::PublicDomain)
614    }
615
616    /// Obtain components that have unknown licensing.
617    ///
618    /// There is a value for the license but that license is not recognized by us.
619    pub fn license_unknown_components(&self) -> impl Iterator<Item = &LicensedComponent> {
620        self.components.values().filter(|c| {
621            matches!(
622                c.license(),
623                &LicenseFlavor::Unknown(_) | &LicenseFlavor::OtherExpression(_)
624            )
625        })
626    }
627
628    /// Components that have copyleft licenses.
629    ///
630    /// There may be false negatives if the component doesn't have fully SPDX parsed
631    /// licenses.
632    pub fn license_copyleft_components(&self) -> impl Iterator<Item = &LicensedComponent> {
633        self.components.values().filter(|c| c.has_copyleft())
634    }
635
636    /// Generate a text summary of licesning info.
637    pub fn license_summary(&self) -> String {
638        let mut lines = vec![
639            "Software Licensing Summary".to_string(),
640            "==========================".to_string(),
641            "".to_string(),
642        ];
643
644        lines.push(format!(
645            "{} distinct software components",
646            self.components.len()
647        ));
648        lines.push(format!(
649            "{} lack a known software license",
650            self.license_missing_components().count()
651        ));
652        lines.push(format!(
653            "{} have unknown license expressions",
654            self.license_unknown_components().count()
655        ));
656        lines.push(format!(
657            "{} distinct SPDX licenses",
658            self.all_spdx_licenses().len()
659        ));
660        lines.push(format!(
661            "{} components in the public domain",
662            self.license_public_domain_components().count()
663        ));
664        lines.push(format!(
665            "{} have copyleft licenses",
666            self.license_copyleft_components().count()
667        ));
668        let spdx_components = self.components_by_spdx_license();
669        if !spdx_components.is_empty() {
670            lines.push("".to_string());
671
672            lines.push("Count   OSI   FSF free   Copyleft   SPDX License".to_string());
673
674            for ((lid, exception), components) in spdx_components {
675                lines.push(format!(
676                    "{:>5}   [{}]     [{}]        [{}]      {}",
677                    components.len(),
678                    if lid.is_osi_approved() { "x" } else { " " },
679                    if lid.is_fsf_free_libre() { "x" } else { " " },
680                    if lid.is_copyleft() { "x" } else { " " },
681                    format_spdx(lid, exception, true)
682                ));
683            }
684        }
685
686        lines.join("\n")
687    }
688
689    /// Generate a text report of noteworthy licensing info.
690    ///
691    /// This essentially emits license quirks that may warrant user attention.
692    pub fn interesting_report(&self) -> Option<String> {
693        let mut lines = vec![
694            "Noteworthy Licensing Info".to_string(),
695            "=========================".to_string(),
696            "".to_string(),
697        ];
698
699        let mut have_interesting = false;
700
701        for component in self.iter_components() {
702            match component.license() {
703                LicenseFlavor::None => {
704                    lines.push(format!("* {} lacks a known license", component.flavor()));
705                    have_interesting = true;
706                }
707                LicenseFlavor::Spdx(_) => {
708                    let copyleft_names = component
709                        .all_spdx_licenses()
710                        .into_iter()
711                        .filter(|(id, _)| id.is_copyleft())
712                        .map(|(id, exception)| format_spdx(id, exception, true))
713                        .collect::<Vec<_>>();
714
715                    if component.is_always_copyleft() {
716                        lines.push(format!(
717                            "* {} has copyleft licenses exclusively ({})",
718                            component.flavor(),
719                            copyleft_names.join(", ")
720                        ));
721                        have_interesting = true;
722                    } else if component.has_copyleft() {
723                        lines.push(format!(
724                            "* {} has a copyleft license ({})",
725                            component.flavor(),
726                            copyleft_names.join(", ")
727                        ));
728                        have_interesting = true;
729                    }
730                }
731                LicenseFlavor::OtherExpression(expr) => {
732                    lines.push(format!(
733                        "* {} has an unknown SPDX license expression: {}",
734                        component.flavor(),
735                        expr
736                    ));
737                    have_interesting = true;
738                }
739                LicenseFlavor::PublicDomain => {}
740                LicenseFlavor::Unknown(terms) => {
741                    lines.push(format!(
742                        "* {} has unknown license expression: {}",
743                        component.flavor(),
744                        terms.join(", ")
745                    ));
746                    have_interesting = true;
747                }
748            }
749        }
750
751        if have_interesting {
752            Some(lines.join("\n"))
753        } else {
754            None
755        }
756    }
757
758    /// Generate a summary of SPDX licenses in all components.
759    pub fn spdx_license_breakdown(&self) -> String {
760        let mut lines = vec![
761            "SPDX License Breakdown".to_string(),
762            "======================".to_string(),
763            "".to_string(),
764        ];
765
766        for (license, exception) in self.all_spdx_licenses() {
767            lines.push(format_spdx(license, exception, true));
768            lines.push("-".repeat(format_spdx(license, exception, true).len()));
769            lines.push("".to_string());
770
771            lines.push(format!(
772                "[{}] OSI approved; [{}] FSF free libre; [{}] copyleft",
773                if license.is_osi_approved() { "*" } else { " " },
774                if license.is_fsf_free_libre() {
775                    "*"
776                } else {
777                    " "
778                },
779                if license.is_copyleft() { "*" } else { " " }
780            ));
781            lines.push("".to_string());
782
783            for component in self.iter_components() {
784                if component
785                    .all_spdx_licenses()
786                    .contains(&(license, exception))
787                {
788                    lines.push(format!("* {}", component.flavor()));
789                }
790            }
791
792            lines.push("".to_string());
793        }
794
795        lines.join("\n")
796    }
797
798    /// Generate a unified text document describing licensing info for the components within.
799    #[cfg(feature = "spdx-text")]
800    pub fn aggregate_license_document(&self, emit_interesting: bool) -> Result<String> {
801        let mut lines = vec![self.license_summary()];
802        lines.push("".into());
803
804        if emit_interesting {
805            if let Some(value) = self.interesting_report() {
806                lines.push(value);
807                lines.push("".into());
808            }
809        }
810
811        lines.push("Software Components".to_string());
812        lines.push("===================".to_string());
813        lines.push("".into());
814
815        for component in self.iter_components() {
816            lines.push(component.flavor().to_string());
817            lines.push("-".repeat(component.flavor().to_string().len()));
818            lines.push("".into());
819
820            lines.push(component.licensing_summary());
821            lines.push("".into());
822
823            if component.spdx_expression().is_some() && component.license_texts.is_empty() {
824                lines.push("The license texts for this component are reproduced elsewhere in this document.".into());
825            }
826
827            for exception in component.all_spdx_exception_ids() {
828                lines.push("".into());
829                lines.push(format!(
830                        "In addition to the standard SPDX license, this component has the license exception: {}",
831                        exception.name
832                    ));
833                lines.push("The text of that exception follows.".into());
834                lines.push("".into());
835                lines.push(exception.text().to_string());
836                lines.push(format!("(end of exception text for {})", exception.name));
837            }
838
839            if !component.license_texts().is_empty() {
840                lines.push("".into());
841                lines.push("The license text for this component is as follows.".into());
842                lines.push("".into());
843                lines.push("-".repeat(80).to_string());
844
845                for text in component.license_texts() {
846                    lines.push(text.to_string());
847                }
848                lines.push("".into());
849                lines.push("-".repeat(80).to_string());
850                lines.push(format!("(end of license text for {})", component.flavor()));
851            }
852
853            lines.push("".into());
854        }
855
856        lines.push("SPDX License Texts".into());
857        lines.push("==================".into());
858        lines.push("".into());
859        lines.push("The following sections contain license texts for all SPDX licenses".into());
860        lines.push("referenced by software components listed above.".into());
861        lines.push("".into());
862
863        for license in self.all_spdx_license_ids() {
864            let header = format!("{} / {}", license.name, license.full_name);
865
866            lines.push(header.clone());
867            lines.push("-".repeat(header.len()));
868
869            lines.push("".into());
870
871            lines.push(license.text().to_string());
872
873            lines.push("".into());
874        }
875
876        let text = lines.join("\n");
877
878        Ok(text)
879    }
880}
881
882/// Defines license information for a Python package.
883#[derive(Clone, Debug, Default, Eq, PartialEq)]
884pub struct PackageLicenseInfo {
885    /// The Python package who license info is being annotated.
886    pub package: String,
887
888    /// Version string of Python package being annotated.
889    pub version: String,
890
891    /// `License` entries in package metadata.
892    pub metadata_licenses: Vec<String>,
893
894    /// Licenses present in `Classifier: License` entries in package metadata.
895    pub classifier_licenses: Vec<String>,
896
897    /// Texts of licenses present in the package.
898    pub license_texts: Vec<String>,
899
900    /// Texts of NOTICE files in the package.
901    pub notice_texts: Vec<String>,
902
903    /// Special annotation indicating if the license is in the public domain.
904    pub is_public_domain: bool,
905
906    /// URL of project home.
907    pub homepage: Option<String>,
908
909    /// List of author strings.
910    pub authors: Vec<String>,
911}
912
913impl TryInto<LicensedComponent> for PackageLicenseInfo {
914    type Error = anyhow::Error;
915
916    fn try_into(self) -> Result<LicensedComponent, Self::Error> {
917        let component_flavor = ComponentFlavor::PythonModule(self.package.clone());
918
919        let mut component = if self.is_public_domain {
920            LicensedComponent::new(component_flavor, LicenseFlavor::PublicDomain)
921        } else if !self.metadata_licenses.is_empty() || !self.classifier_licenses.is_empty() {
922            let mut spdx_license_ids = BTreeSet::new();
923            let mut non_spdx_licenses = BTreeSet::new();
924
925            for s in self
926                .metadata_licenses
927                .into_iter()
928                .chain(self.classifier_licenses.into_iter())
929            {
930                if let Some(lid) = spdx::license_id(&s) {
931                    spdx_license_ids.insert(format!("({})", lid.name));
932                } else if spdx::Expression::parse(&s).is_ok() {
933                    spdx_license_ids.insert(format!("({})", s));
934                } else if let Some(name) = spdx::identifiers::LICENSES
935                    .iter()
936                    .find_map(|(name, full, _)| if &s == full { Some(name) } else { None })
937                {
938                    spdx_license_ids.insert(name.to_string());
939                } else {
940                    non_spdx_licenses.insert(s);
941                }
942            }
943
944            if non_spdx_licenses.is_empty() {
945                let expression = spdx_license_ids
946                    .into_iter()
947                    .collect::<Vec<_>>()
948                    .join(" OR ");
949                LicensedComponent::new_spdx(component_flavor, &expression)?
950            } else {
951                LicensedComponent::new(
952                    component_flavor,
953                    LicenseFlavor::Unknown(non_spdx_licenses.into_iter().collect::<Vec<_>>()),
954                )
955            }
956        } else {
957            LicensedComponent::new(component_flavor, LicenseFlavor::None)
958        };
959
960        for text in self
961            .license_texts
962            .into_iter()
963            .chain(self.notice_texts.into_iter())
964        {
965            component.add_license_text(text);
966        }
967
968        if let Some(value) = self.homepage {
969            component.set_homepage(value);
970        }
971        for value in self.authors {
972            component.add_author(value);
973        }
974
975        Ok(component)
976    }
977}
978
979impl PartialOrd for PackageLicenseInfo {
980    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
981        if self.package == other.package {
982            self.version.partial_cmp(&other.version)
983        } else {
984            self.package.partial_cmp(&other.package)
985        }
986    }
987}
988
989impl Ord for PackageLicenseInfo {
990    fn cmp(&self, other: &Self) -> Ordering {
991        if self.package == other.package {
992            self.version.cmp(&other.version)
993        } else {
994            self.package.cmp(&other.package)
995        }
996    }
997}
998
999/// Obtain Python package license information from an iterable of Python resources.
1000///
1001/// This will look at `PythonPackageDistributionResource` entries and attempt
1002/// to find license information within. It looks for license info in `METADATA`
1003/// and `PKG-INFO` files (both the `License` key and the trove classifiers) as
1004/// well as well-named files.
1005pub fn derive_package_license_infos<'a>(
1006    resources: impl Iterator<Item = &'a PythonResource<'a>>,
1007) -> Result<Vec<PackageLicenseInfo>> {
1008    let mut packages = BTreeMap::new();
1009
1010    let resources = resources.filter_map(|resource| {
1011        if let PythonResource::PackageDistributionResource(resource) = resource {
1012            Some(resource)
1013        } else {
1014            None
1015        }
1016    });
1017
1018    for resource in resources {
1019        let key = (resource.package.clone(), resource.version.clone());
1020
1021        let entry = packages.entry(key).or_insert(PackageLicenseInfo {
1022            package: resource.package.clone(),
1023            version: resource.version.clone(),
1024            ..Default::default()
1025        });
1026
1027        // This is a special metadata file. Parse it and attempt to extract license info.
1028        if resource.name == "METADATA" || resource.name == "PKG-INFO" {
1029            let metadata = PythonPackageMetadata::from_metadata(&resource.data.resolve_content()?)
1030                .context("parsing package metadata")?;
1031
1032            if let Some(value) = metadata.find_first_header("Home-page") {
1033                entry.homepage = Some(value.to_string());
1034            }
1035            for value in metadata.find_all_headers("Author") {
1036                entry.authors.push(value.to_string());
1037            }
1038            for value in metadata.find_all_headers("Maintainer") {
1039                entry.authors.push(value.to_string());
1040            }
1041
1042            for value in metadata.find_all_headers("License") {
1043                entry.metadata_licenses.push(value.to_string());
1044            }
1045
1046            for value in metadata.find_all_headers("Classifier") {
1047                if value.starts_with("License ") {
1048                    if let Some(license) = value.split(" :: ").last() {
1049                        // In case they forget the part after this.
1050                        if license != "OSI Approved" {
1051                            entry.classifier_licenses.push(license.to_string());
1052                        }
1053                    }
1054                }
1055            }
1056        }
1057        // This looks like a license file.
1058        else if resource.name.starts_with("LICENSE")
1059            || resource.name.starts_with("LICENSE")
1060            || resource.name.starts_with("COPYING")
1061        {
1062            let data = resource.data.resolve_content()?;
1063            let license_text = String::from_utf8_lossy(&data);
1064
1065            entry.license_texts.push(license_text.to_string());
1066        }
1067        // This looks like a NOTICE file.
1068        else if resource.name.starts_with("NOTICE") {
1069            let data = resource.data.resolve_content()?;
1070            let notice_text = String::from_utf8_lossy(&data);
1071
1072            entry.notice_texts.push(notice_text.to_string());
1073        }
1074        // Else we don't know what to do with this file. Just ignore it.
1075    }
1076
1077    Ok(packages.into_values().collect::<Vec<_>>())
1078}
1079
1080#[cfg(test)]
1081mod tests {
1082    use {
1083        super::*,
1084        crate::resource::{
1085            PythonPackageDistributionResource, PythonPackageDistributionResourceFlavor,
1086        },
1087        simple_file_manifest::FileData,
1088        std::borrow::Cow,
1089    };
1090
1091    #[test]
1092    fn component_flavor_equivalence() {
1093        assert_eq!(
1094            ComponentFlavor::PythonDistribution("foo".to_string()),
1095            ComponentFlavor::PythonDistribution("foo".to_string())
1096        );
1097        assert_ne!(
1098            ComponentFlavor::PythonDistribution("foo".to_string()),
1099            ComponentFlavor::PythonStandardLibraryModule("foo".into())
1100        );
1101        assert_eq!(
1102            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1103            ComponentFlavor::PythonStandardLibraryModule("foo".into())
1104        );
1105        assert_eq!(
1106            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1107            ComponentFlavor::PythonStandardLibraryExtensionModule("foo".into())
1108        );
1109        assert_eq!(
1110            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1111            ComponentFlavor::PythonExtensionModule("foo".into())
1112        );
1113        assert_eq!(
1114            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1115            ComponentFlavor::PythonModule("foo".into())
1116        );
1117
1118        assert_ne!(
1119            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1120            ComponentFlavor::PythonStandardLibraryModule("bar".into())
1121        );
1122        assert_ne!(
1123            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1124            ComponentFlavor::PythonStandardLibraryExtensionModule("bar".into())
1125        );
1126        assert_ne!(
1127            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1128            ComponentFlavor::PythonExtensionModule("bar".into())
1129        );
1130        assert_ne!(
1131            ComponentFlavor::PythonStandardLibraryModule("foo".into()),
1132            ComponentFlavor::PythonModule("bar".into())
1133        );
1134    }
1135
1136    #[test]
1137    fn parse_advanced() -> Result<()> {
1138        LicensedComponent::new_spdx(
1139            ComponentFlavor::PythonDistribution("foo".into()),
1140            "Apache-2.0 OR MPL-2.0 OR 0BSD",
1141        )?;
1142        LicensedComponent::new_spdx(
1143            ComponentFlavor::PythonDistribution("foo".into()),
1144            "Apache-2.0 AND MPL-2.0 AND 0BSD",
1145        )?;
1146        LicensedComponent::new_spdx(
1147            ComponentFlavor::PythonDistribution("foo".into()),
1148            "Apache-2.0 AND MPL-2.0 OR 0BSD",
1149        )?;
1150        LicensedComponent::new_spdx(
1151            ComponentFlavor::PythonDistribution("foo".into()),
1152            "MIT AND (LGPL-2.1-or-later OR BSD-3-Clause)",
1153        )?;
1154
1155        Ok(())
1156    }
1157
1158    #[test]
1159    fn test_derive_package_license_infos_empty() -> Result<()> {
1160        let infos = derive_package_license_infos(vec![].iter())?;
1161        assert!(infos.is_empty());
1162
1163        Ok(())
1164    }
1165
1166    #[test]
1167    fn test_derive_package_license_infos_license_file() -> Result<()> {
1168        let resources = vec![PythonResource::PackageDistributionResource(Cow::Owned(
1169            PythonPackageDistributionResource {
1170                location: PythonPackageDistributionResourceFlavor::DistInfo,
1171                package: "foo".to_string(),
1172                version: "1.0".to_string(),
1173                name: "LICENSE".to_string(),
1174                data: FileData::Memory(vec![42]),
1175            },
1176        ))];
1177
1178        let infos = derive_package_license_infos(resources.iter())?;
1179        assert_eq!(infos.len(), 1);
1180
1181        assert_eq!(
1182            infos[0],
1183            PackageLicenseInfo {
1184                package: "foo".to_string(),
1185                version: "1.0".to_string(),
1186                license_texts: vec!["*".to_string()],
1187                ..Default::default()
1188            }
1189        );
1190
1191        Ok(())
1192    }
1193
1194    #[test]
1195    fn test_derive_package_license_infos_metadata_licenses() -> Result<()> {
1196        let resources = vec![PythonResource::PackageDistributionResource(Cow::Owned(
1197            PythonPackageDistributionResource {
1198                location: PythonPackageDistributionResourceFlavor::DistInfo,
1199                package: "foo".to_string(),
1200                version: "1.0".to_string(),
1201                name: "METADATA".to_string(),
1202                data: FileData::Memory(
1203                    "Name: foo\nLicense: BSD-1-Clause\nLicense: BSD-2-Clause\n"
1204                        .as_bytes()
1205                        .to_vec(),
1206                ),
1207            },
1208        ))];
1209
1210        let infos = derive_package_license_infos(resources.iter())?;
1211        assert_eq!(infos.len(), 1);
1212
1213        assert_eq!(
1214            infos[0],
1215            PackageLicenseInfo {
1216                package: "foo".to_string(),
1217                version: "1.0".to_string(),
1218                metadata_licenses: vec!["BSD-1-Clause".to_string(), "BSD-2-Clause".to_string()],
1219                ..Default::default()
1220            }
1221        );
1222
1223        Ok(())
1224    }
1225
1226    #[test]
1227    fn test_derive_package_license_infos_metadata_classifiers() -> Result<()> {
1228        let resources = vec![PythonResource::PackageDistributionResource(Cow::Owned(
1229            PythonPackageDistributionResource {
1230                location: PythonPackageDistributionResourceFlavor::DistInfo,
1231                package: "foo".to_string(),
1232                version: "1.0".to_string(),
1233                name: "METADATA".to_string(),
1234                data: FileData::Memory(
1235                    "Name: foo\nClassifier: License :: OSI Approved\nClassifier: License :: OSI Approved :: BSD-1-Clause\n"
1236                        .as_bytes()
1237                        .to_vec(),
1238                ),
1239            },
1240        ))];
1241
1242        let infos = derive_package_license_infos(resources.iter())?;
1243        assert_eq!(infos.len(), 1);
1244
1245        assert_eq!(
1246            infos[0],
1247            PackageLicenseInfo {
1248                package: "foo".to_string(),
1249                version: "1.0".to_string(),
1250                classifier_licenses: vec!["BSD-1-Clause".to_string()],
1251                ..Default::default()
1252            }
1253        );
1254
1255        Ok(())
1256    }
1257
1258    #[test]
1259    fn license_info_to_component_empty() -> Result<()> {
1260        let li = PackageLicenseInfo {
1261            package: "foo".to_string(),
1262            version: "0.1".to_string(),
1263            ..Default::default()
1264        };
1265
1266        let c: LicensedComponent = li.try_into()?;
1267        let wanted = LicensedComponent::new(
1268            ComponentFlavor::PythonModule("foo".to_string()),
1269            LicenseFlavor::None,
1270        );
1271        assert_eq!(c, wanted);
1272
1273        Ok(())
1274    }
1275
1276    #[test]
1277    fn license_info_to_component_single_metadata_spdx() -> Result<()> {
1278        let li = PackageLicenseInfo {
1279            package: "foo".to_string(),
1280            version: "0.1".to_string(),
1281            metadata_licenses: vec!["MIT".to_string()],
1282            ..Default::default()
1283        };
1284
1285        let c: LicensedComponent = li.try_into()?;
1286        let wanted =
1287            LicensedComponent::new_spdx(ComponentFlavor::PythonModule("foo".to_string()), "MIT")?;
1288        assert_eq!(c, wanted);
1289
1290        Ok(())
1291    }
1292
1293    #[test]
1294    fn license_info_to_component_single_classifier_spdx() -> Result<()> {
1295        let li = PackageLicenseInfo {
1296            package: "foo".to_string(),
1297            version: "0.1".to_string(),
1298            classifier_licenses: vec!["Apache-2.0".to_string()],
1299            ..Default::default()
1300        };
1301
1302        let c: LicensedComponent = li.try_into()?;
1303        let wanted = LicensedComponent::new_spdx(
1304            ComponentFlavor::PythonModule("foo".to_string()),
1305            "Apache-2.0",
1306        )?;
1307        assert_eq!(c, wanted);
1308
1309        Ok(())
1310    }
1311
1312    #[test]
1313    fn license_info_to_component_multiple_metadata_spdx() -> Result<()> {
1314        let li = PackageLicenseInfo {
1315            package: "foo".to_string(),
1316            version: "0.1".to_string(),
1317            metadata_licenses: vec!["MIT".to_string(), "Apache-2.0".to_string()],
1318            ..Default::default()
1319        };
1320
1321        let c: LicensedComponent = li.try_into()?;
1322        let wanted = LicensedComponent::new_spdx(
1323            ComponentFlavor::PythonModule("foo".to_string()),
1324            "Apache-2.0 OR MIT",
1325        )?;
1326        assert_eq!(c, wanted);
1327
1328        Ok(())
1329    }
1330
1331    #[test]
1332    fn license_info_to_component_multiple_classifier_spdx() -> Result<()> {
1333        let li = PackageLicenseInfo {
1334            package: "foo".to_string(),
1335            version: "0.1".to_string(),
1336            classifier_licenses: vec!["Apache-2.0".to_string(), "MIT".to_string()],
1337            ..Default::default()
1338        };
1339
1340        let c: LicensedComponent = li.try_into()?;
1341        let wanted = LicensedComponent::new_spdx(
1342            ComponentFlavor::PythonModule("foo".to_string()),
1343            "Apache-2.0 OR MIT",
1344        )?;
1345        assert_eq!(c, wanted);
1346
1347        Ok(())
1348    }
1349
1350    #[test]
1351    fn license_info_to_component_spdx_expression() -> Result<()> {
1352        let li = PackageLicenseInfo {
1353            package: "foo".to_string(),
1354            version: "0.1".to_string(),
1355            metadata_licenses: vec!["MIT OR Apache-2.0".to_string()],
1356            ..Default::default()
1357        };
1358
1359        let c: LicensedComponent = li.try_into()?;
1360        let wanted = LicensedComponent::new_spdx(
1361            ComponentFlavor::PythonModule("foo".to_string()),
1362            "MIT OR Apache-2.0",
1363        )?;
1364        assert_eq!(c, wanted);
1365
1366        Ok(())
1367    }
1368
1369    #[test]
1370    fn license_info_to_component_spdx_fullname() -> Result<()> {
1371        let li = PackageLicenseInfo {
1372            package: "foo".to_string(),
1373            version: "0.1".to_string(),
1374            metadata_licenses: vec!["MIT License".to_string()],
1375            ..Default::default()
1376        };
1377
1378        let c: LicensedComponent = li.try_into()?;
1379        let wanted =
1380            LicensedComponent::new_spdx(ComponentFlavor::PythonModule("foo".to_string()), "MIT")?;
1381        assert_eq!(c, wanted);
1382
1383        Ok(())
1384    }
1385
1386    #[test]
1387    fn license_info_to_component_unknown() -> Result<()> {
1388        let terms = vec!["Unknown".to_string(), "Unknown 2".to_string()];
1389
1390        let li = PackageLicenseInfo {
1391            package: "foo".to_string(),
1392            version: "0.1".to_string(),
1393            metadata_licenses: terms.clone(),
1394            ..Default::default()
1395        };
1396
1397        let c: LicensedComponent = li.try_into()?;
1398        let wanted = LicensedComponent::new(
1399            ComponentFlavor::PythonModule("foo".to_string()),
1400            LicenseFlavor::Unknown(terms),
1401        );
1402        assert_eq!(c, wanted);
1403
1404        Ok(())
1405    }
1406}