whichlicense_detection/detecting/
fuzzy_implementation.rs1pub mod fuzzy_implementation {
19 use std::{fs::File, io::Read};
20
21 use fuzzyhash::FuzzyHash;
22
23 use crate::{
24 detecting::detecting::{DiskData, LicenseEntry},
25 LicenseListActions, LicenseMatch,
26 };
27
28 pub struct FuzzyDetection {
29 pub licenses: Vec<LicenseEntry<String>>,
30 pub min_confidence: u8,
31 pub exit_on_exact_match: bool,
32
33 pub normalization_fn: fn(&str) -> String,
34 }
35 impl LicenseListActions<String> for FuzzyDetection {
36 fn match_by_plain_text(&self, plain_text: &str) -> Vec<LicenseMatch> {
37 self.match_by_hash(FuzzyHash::new((self.normalization_fn)(plain_text)).to_string())
38 }
39
40 fn match_by_hash(&self, hash: String) -> Vec<LicenseMatch> {
41 let mut matches: Vec<LicenseMatch> = Vec::new();
42 for license in self.licenses.iter() {
43 let res = FuzzyHash::compare(&hash, license.hash.as_str());
44 let res = match res {
45 Ok(r) => r as u8,
46 Err(_e) => 0,
47 };
48 if res >= self.min_confidence {
49 matches.push(LicenseMatch {
50 name: license.name.to_string(),
51 confidence: res as f32,
52 });
53 if self.exit_on_exact_match && res == 100 {
54 break;
55 }
56 }
57 }
58 matches.sort_unstable_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
59 matches
60 }
61
62 fn get_license_list(&self) -> Vec<(String, String)> {
63 self.licenses
64 .iter()
65 .map(|l| (l.name.to_string(), l.hash.to_string()))
66 .collect()
67 }
68
69 fn load_from_memory(&mut self, raw: &Vec<u8>) {
70 let loaded: DiskData<String> = bincode::deserialize(&raw).unwrap_or(DiskData {
71 licenses: Vec::new(),
72 });
73 self.licenses.extend(loaded.licenses);
74 }
75
76 fn load_from_file(&mut self, file_path: &str) {
77 let mut file = File::open(file_path).unwrap();
78 let mut contents = Vec::new();
79 file.read_to_end(&mut contents).unwrap();
80
81 self.load_from_memory(&contents);
82 }
83
84 fn add_plain(&mut self, license_name: &str, license_text: &str) {
85 let stripped = (self.normalization_fn)(license_text);
86 let fuzzy = FuzzyHash::new(stripped);
87 self.licenses.push(LicenseEntry {
88 name: license_name.to_string(),
89 hash: fuzzy.to_string(),
90 });
91 }
92
93 fn hash_from_inline_string(&self, license_text: &str) -> String {
94 FuzzyHash::new((self.normalization_fn)(license_text)).to_string()
95 }
96
97 fn remove(&mut self, license_name: &str) {
98 self.licenses.retain(|l| l.name != license_name.to_string());
99 }
100
101 fn set_normalization_fn(&mut self, func: fn(&str) -> String) {
102 self.normalization_fn = func;
103 }
104 }
105}