scancode_rust/askalono/store/
analyze.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::{cmp::Ordering, fmt};
5
6use crate::askalono::{
7    license::LicenseType,
8    license::TextData,
9    store::base::{LicenseEntry, Store},
10};
11
12/// Information about text that was compared against licenses in the store.
13///
14/// This only contains information about the overall match; to uncover more
15/// data you can run methods like `optimize_bounds` on `TextData`.
16///
17/// Its lifetime is tied to the lifetime of the `Store` it was generated from.
18#[derive(Clone)]
19pub struct Match<'a> {
20    /// Confidence score of the match, ranging from 0 to 1.
21    pub score: f32,
22    /// The name of the closest matching license in the `Store`. This will
23    /// always be something that exists in the store, regardless of the score.
24    pub name: &'a str,
25    /// The type of the license that matched. Useful to know if the match was
26    /// the complete text, a header, or something else.
27    pub license_type: LicenseType,
28    /// A reference to the license data that matched inside the `Store`. May be
29    /// useful for diagnostic purposes or to further optimize the result.
30    pub data: &'a TextData,
31}
32
33/// A lighter version of Match to be used during analysis.
34/// Reduces the need for cloning a bunch of fields.
35struct PartialMatch<'a> {
36    pub name: &'a str,
37    pub score: f32,
38    pub license_type: LicenseType,
39    pub data: &'a TextData,
40}
41
42impl<'a> PartialOrd for PartialMatch<'a> {
43    fn partial_cmp(&self, other: &PartialMatch<'_>) -> Option<Ordering> {
44        self.score.partial_cmp(&other.score)
45    }
46}
47
48impl<'a> PartialEq for PartialMatch<'a> {
49    fn eq(&self, other: &PartialMatch<'_>) -> bool {
50        self.score.eq(&other.score)
51            && self.name == other.name
52            && self.license_type == other.license_type
53    }
54}
55
56impl<'a> fmt::Debug for Match<'a> {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(
59            f,
60            "Match {{ score: {}, name: {}, license_type: {:?} }}",
61            self.score, self.name, self.license_type
62        )
63    }
64}
65
66impl Store {
67    /// Compare the given `TextData` against all licenses in the `Store`.
68    ///
69    /// This parallelizes the search as much as it can to find the best match.
70    /// Once a match is obtained, it can be optimized further; see methods on
71    /// `TextData` for more information.
72    pub fn analyze<'a>(&'a self, text: &TextData) -> Match<'a> {
73        let mut res: Vec<PartialMatch<'a>>;
74
75        let analyze_fold =
76            |mut acc: Vec<PartialMatch<'a>>, (name, data): (&'a String, &'a LicenseEntry)| {
77                acc.push(PartialMatch {
78                    score: data.original.match_score(text),
79                    name,
80                    license_type: LicenseType::Original,
81                    data: &data.original,
82                });
83                data.alternates.iter().for_each(|alt| {
84                    acc.push(PartialMatch {
85                        score: alt.match_score(text),
86                        name,
87                        license_type: LicenseType::Alternate,
88                        data: alt,
89                    })
90                });
91                data.headers.iter().for_each(|head| {
92                    acc.push(PartialMatch {
93                        score: head.match_score(text),
94                        name,
95                        license_type: LicenseType::Header,
96                        data: head,
97                    })
98                });
99                acc
100            };
101
102        // parallel analysis
103        #[cfg(not(target_arch = "wasm32"))]
104        {
105            use rayon::prelude::*;
106            res = self
107                .licenses
108                .par_iter()
109                .fold(Vec::new, analyze_fold)
110                .reduce(
111                    Vec::new,
112                    |mut a: Vec<PartialMatch<'a>>, b: Vec<PartialMatch<'a>>| {
113                        a.extend(b);
114                        a
115                    },
116                );
117            res.par_sort_unstable_by(|a, b| b.partial_cmp(a).unwrap());
118        }
119
120        // single-threaded analysis
121        #[cfg(target_arch = "wasm32")]
122        {
123            res = self
124                .licenses
125                .iter()
126                // len of licenses isn't strictly correct, but it'll do
127                .fold(Vec::with_capacity(self.licenses.len()), analyze_fold);
128            res.sort_unstable_by(|a, b| b.partial_cmp(a).unwrap());
129        }
130
131        let m = &res[0];
132
133        Match {
134            score: m.score,
135            name: m.name,
136            license_type: m.license_type,
137            data: m.data,
138        }
139    }
140}