provenant/license_detection/models/
rule.rs1use std::collections::HashMap;
4use std::ops::Range;
5
6use serde::{Deserialize, Serialize};
7
8use crate::license_detection::index::dictionary::TokenId;
9
10const SCANCODE_LICENSE_URL_BASE: &str =
11 "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses";
12const SCANCODE_RULE_URL_BASE: &str =
13 "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules";
14
15#[derive(
16 Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize, Deserialize,
17)]
18pub enum RuleKind {
19 #[default]
20 None,
21 Text,
22 Notice,
23 Reference,
24 Tag,
25 Intro,
26 Clue,
27}
28
29impl RuleKind {
30 pub fn from_rule_flags(
31 is_license_text: bool,
32 is_license_notice: bool,
33 is_license_reference: bool,
34 is_license_tag: bool,
35 is_license_intro: bool,
36 is_license_clue: bool,
37 ) -> Result<Self, &'static str> {
38 let mut active = None;
39
40 for (enabled, kind) in [
41 (is_license_text, Self::Text),
42 (is_license_notice, Self::Notice),
43 (is_license_reference, Self::Reference),
44 (is_license_tag, Self::Tag),
45 (is_license_intro, Self::Intro),
46 (is_license_clue, Self::Clue),
47 ] {
48 if !enabled {
49 continue;
50 }
51
52 if active.replace(kind).is_some() {
53 return Err("rule has multiple rule kinds set");
54 }
55 }
56
57 Ok(active.unwrap_or(Self::None))
58 }
59
60 pub fn from_match_flags(
61 is_license_text: bool,
62 is_license_reference: bool,
63 is_license_tag: bool,
64 is_license_intro: bool,
65 is_license_clue: bool,
66 ) -> Result<Self, &'static str> {
67 Self::from_rule_flags(
68 is_license_text,
69 false,
70 is_license_reference,
71 is_license_tag,
72 is_license_intro,
73 is_license_clue,
74 )
75 .map_err(|_| "license match has multiple rule kinds set")
76 }
77
78 pub const fn is_license_text(self) -> bool {
79 matches!(self, Self::Text)
80 }
81
82 pub const fn is_license_notice(self) -> bool {
83 matches!(self, Self::Notice)
84 }
85
86 pub const fn is_license_reference(self) -> bool {
87 matches!(self, Self::Reference)
88 }
89
90 pub const fn is_license_tag(self) -> bool {
91 matches!(self, Self::Tag)
92 }
93
94 pub const fn is_license_intro(self) -> bool {
95 matches!(self, Self::Intro)
96 }
97
98 pub const fn is_license_clue(self) -> bool {
99 matches!(self, Self::Clue)
100 }
101}
102
103#[derive(Debug, Clone, PartialEq, Eq)]
105pub struct Rule {
106 pub identifier: String,
110
111 pub license_expression: String,
113
114 pub text: String,
116
117 pub tokens: Vec<TokenId>,
119
120 pub rule_kind: RuleKind,
122
123 pub is_false_positive: bool,
125
126 pub is_required_phrase: bool,
130
131 pub is_from_license: bool,
133
134 pub relevance: u8,
136
137 pub minimum_coverage: Option<u8>,
139
140 pub has_stored_minimum_coverage: bool,
142
143 pub is_continuous: bool,
145
146 pub required_phrase_spans: Vec<Range<usize>>,
149
150 pub stopwords_by_pos: HashMap<usize, usize>,
153
154 pub referenced_filenames: Option<Vec<String>>,
156
157 pub ignorable_urls: Option<Vec<String>>,
159
160 pub ignorable_emails: Option<Vec<String>>,
162
163 pub ignorable_copyrights: Option<Vec<String>>,
165
166 pub ignorable_holders: Option<Vec<String>>,
168
169 pub ignorable_authors: Option<Vec<String>>,
171
172 pub language: Option<String>,
174
175 pub notes: Option<String>,
177
178 pub length_unique: usize,
180
181 pub high_length_unique: usize,
183
184 pub high_length: usize,
186
187 pub min_matched_length: usize,
189
190 pub min_high_matched_length: usize,
192
193 pub min_matched_length_unique: usize,
195
196 pub min_high_matched_length_unique: usize,
198
199 pub is_small: bool,
201
202 pub is_tiny: bool,
204
205 pub starts_with_license: bool,
207
208 pub ends_with_license: bool,
210
211 pub is_deprecated: bool,
213
214 pub spdx_license_key: Option<String>,
216
217 pub other_spdx_license_keys: Vec<String>,
219}
220
221impl PartialOrd for Rule {
222 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
223 Some(self.cmp(other))
224 }
225}
226
227impl Ord for Rule {
228 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
229 self.identifier.cmp(&other.identifier)
230 }
231}
232
233impl Rule {
234 pub fn rule_url(&self) -> Option<String> {
235 if self.is_from_license {
236 return (!self.license_expression.is_empty()).then(|| {
237 format!(
238 "{SCANCODE_LICENSE_URL_BASE}/{}.LICENSE",
239 self.license_expression
240 )
241 });
242 }
243
244 (!self.identifier.is_empty())
245 .then(|| format!("{SCANCODE_RULE_URL_BASE}/{}", self.identifier))
246 }
247
248 pub const fn kind(&self) -> RuleKind {
249 self.rule_kind
250 }
251
252 pub const fn is_license_text(&self) -> bool {
253 self.rule_kind.is_license_text()
254 }
255
256 #[allow(dead_code)]
261 pub const fn is_license_notice(&self) -> bool {
262 self.rule_kind.is_license_notice()
263 }
264
265 pub const fn is_license_reference(&self) -> bool {
266 self.rule_kind.is_license_reference()
267 }
268
269 pub const fn is_license_tag(&self) -> bool {
270 self.rule_kind.is_license_tag()
271 }
272
273 #[allow(dead_code)]
277 pub const fn is_license_intro(&self) -> bool {
278 self.rule_kind.is_license_intro()
279 }
280
281 pub const fn is_license_clue(&self) -> bool {
282 self.rule_kind.is_license_clue()
283 }
284}