whichlicense_detection/detecting/
fuzzy_implementation.rs

1/*
2*   Copyright (c) 2023 Duart Snel
3*   All rights reserved.
4
5*   Licensed under the Apache License, Version 2.0 (the "License");
6*   you may not use this file except in compliance with the License.
7*   You may obtain a copy of the License at
8
9*   http://www.apache.org/licenses/LICENSE-2.0
10
11*   Unless required by applicable law or agreed to in writing, software
12*   distributed under the License is distributed on an "AS IS" BASIS,
13*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*   See the License for the specific language governing permissions and
15*   limitations under the License.
16*/
17
18pub mod fuzzy_implementation {
19    use std::{fs::File, io::Read};
20
21    use fuzzyhash::FuzzyHash;
22
23    use crate::{
24        detecting::detecting::{DiskData, LicenseEntry},
25        LicenseListActions, LicenseMatch,
26    };
27
28    pub struct FuzzyDetection {
29        pub licenses: Vec<LicenseEntry<String>>,
30        pub min_confidence: u8,
31        pub exit_on_exact_match: bool,
32
33        pub normalization_fn: fn(&str) -> String,
34    }
35    impl LicenseListActions<String> for FuzzyDetection {
36        fn match_by_plain_text(&self, plain_text: &str) -> Vec<LicenseMatch> {
37            self.match_by_hash(FuzzyHash::new((self.normalization_fn)(plain_text)).to_string())
38        }
39
40        fn match_by_hash(&self, hash: String) -> Vec<LicenseMatch> {
41            let mut matches: Vec<LicenseMatch> = Vec::new();
42            for license in self.licenses.iter() {
43                let res = FuzzyHash::compare(&hash, license.hash.as_str());
44                let res = match res {
45                    Ok(r) => r as u8,
46                    Err(_e) => 0,
47                };
48                if res >= self.min_confidence {
49                    matches.push(LicenseMatch {
50                        name: license.name.to_string(),
51                        confidence: res as f32,
52                    });
53                    if self.exit_on_exact_match && res == 100 {
54                        break;
55                    }
56                }
57            }
58            matches.sort_unstable_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
59            matches
60        }
61
62        fn get_license_list(&self) -> Vec<(String, String)> {
63            self.licenses
64                .iter()
65                .map(|l| (l.name.to_string(), l.hash.to_string()))
66                .collect()
67        }
68
69        fn load_from_memory(&mut self, raw: &Vec<u8>) {
70            let loaded: DiskData<String> = bincode::deserialize(&raw).unwrap_or(DiskData {
71                licenses: Vec::new(),
72            });
73            self.licenses.extend(loaded.licenses);
74        }
75
76        fn load_from_file(&mut self, file_path: &str) {
77            let mut file = File::open(file_path).unwrap();
78            let mut contents = Vec::new();
79            file.read_to_end(&mut contents).unwrap();
80
81            self.load_from_memory(&contents);
82        }
83
84        fn add_plain(&mut self, license_name: &str, license_text: &str) {
85            let stripped = (self.normalization_fn)(license_text);
86            let fuzzy = FuzzyHash::new(stripped);
87            self.licenses.push(LicenseEntry {
88                name: license_name.to_string(),
89                hash: fuzzy.to_string(),
90            });
91        }
92
93        fn hash_from_inline_string(&self, license_text: &str) -> String {
94            FuzzyHash::new((self.normalization_fn)(license_text)).to_string()
95        }
96
97        fn remove(&mut self, license_name: &str) {
98            self.licenses.retain(|l| l.name != license_name.to_string());
99        }
100
101        fn set_normalization_fn(&mut self, func: fn(&str) -> String) {
102            self.normalization_fn = func;
103        }
104    }
105}