scancode_rust/askalono/store/
spdx.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::{
5    ffi::OsStr,
6    fs::{read_dir, File},
7    io::prelude::*,
8    path::Path,
9};
10
11use anyhow::{format_err, Error};
12use log::{debug, info};
13
14use crate::askalono::{
15    license::TextData,
16    store::base::{LicenseEntry, Store},
17};
18
19impl Store {
20    /// Fill the store with SPDX JSON data.
21    ///
22    /// This function is very specific to the format of SPDX's
23    /// `license-list-data` repository. It reads all JSON files in the
24    /// `json/details` directory and creates entries inside the store for
25    /// matching.
26    ///
27    /// This is intended to be used during build of askalono, so it's not
28    /// available unless the `spdx` feature is enabled.
29    ///
30    /// `include_texts`, if true, will keep normalized license text data inside
31    /// the store. This yields a larger store when serialized, but has the
32    /// benefit of allowing you to diff your result against what askalono has
33    /// stored.
34    pub fn load_spdx(&mut self, dir: &Path, include_texts: bool) -> Result<(), Error> {
35        use serde_json::{from_str, Value};
36
37        // locate all json files in the directory
38        let mut paths: Vec<_> = read_dir(dir)?
39            .filter_map(|e| e.ok())
40            .map(|e| e.path())
41            .filter(|p| p.is_file() && p.extension().unwrap_or_else(|| OsStr::new("")) == "json")
42            .collect();
43
44        // sort without extensions; otherwise dashes and dots muck it up
45        paths.sort_by(|a, b| a.file_stem().unwrap().cmp(b.file_stem().unwrap()));
46
47        for path in paths {
48            let mut f = File::open(path)?;
49            let mut data = String::new();
50            f.read_to_string(&mut data)?;
51            let val: Value = from_str(&data)?;
52
53            let name = val["licenseId"]
54                .as_str()
55                .ok_or_else(|| format_err!("missing licenseId"))?;
56
57            let deprecated = val["isDeprecatedLicenseId"]
58                .as_bool()
59                .ok_or_else(|| format_err!("missing isDeprecatedLicenseId"))?;
60            if deprecated {
61                debug!("Skipping {} (deprecated)", name);
62                continue;
63            }
64
65            let text = val["licenseText"]
66                .as_str()
67                .ok_or_else(|| format_err!("missing licenseText"))?;
68            let header = val["standardLicenseHeader"].as_str();
69
70            info!("Processing {}", name);
71
72            let content = match include_texts {
73                true => TextData::new(text),
74                false => TextData::new(text).without_text(),
75            };
76
77            // check if an identical license is already present
78            let mut already_existed = false;
79            self.licenses.iter_mut().for_each(|(key, ref mut value)| {
80                if value.original.eq_data(&content) {
81                    value.aliases.push(name.to_string());
82                    info!("{} already stored; added as an alias for {}", name, key);
83                    already_existed = true;
84                }
85            });
86
87            if already_existed {
88                continue;
89            }
90
91            let license = self
92                .licenses
93                .entry(name.to_owned())
94                .or_insert_with(|| LicenseEntry::new(content));
95
96            if let Some(header_text) = header {
97                let header_data = match include_texts {
98                    false => TextData::new(header_text),
99                    true => TextData::new(header_text).without_text(),
100                };
101                license.headers = vec![header_data];
102            }
103        }
104
105        Ok(())
106    }
107}