1use crate::buffer_proxy_iterator::BufferProxyIterator;
2use crate::trie::Node;
3use crate::Type;
4use std::hash::{Hash, Hasher};
5
6#[derive(Clone)]
7pub(crate) struct Match {
8 pub node: &'static Node,
10 pub start: usize,
12 pub end: usize,
14 pub last: char,
16 pub begin_separate: bool,
18 pub end_separate: bool,
20 pub spaces: u8,
22 pub skipped: u8,
24 pub replacements: u8,
26 pub repetitions: u8,
28 pub low_confidence_replacements: u8,
30}
31
32impl Match {
33 pub(crate) fn combine(&self, other: &Self) -> Self {
35 Self {
36 start: self.start.min(other.start),
37 spaces: self.spaces.min(other.spaces),
38 skipped: self.skipped.min(other.skipped),
39 replacements: self.replacements.min(other.replacements),
40 low_confidence_replacements: self
41 .low_confidence_replacements
42 .min(other.low_confidence_replacements),
43 repetitions: self.repetitions.min(other.repetitions),
44 last: self.last.min(other.last),
45 ..*self
46 }
47 }
48
49 fn confidence(&self) -> i64 {
50 let mut confidence: i64 = 0;
51 confidence += self.node.depth.max(1).ilog2() as i64;
52 confidence += (self.end - self.start).max(1).ilog2() as i64;
53 if self.node.depth == 1 {
54 confidence += 1;
55 } else {
56 if !self.begin_separate {
57 confidence -= 2;
58 if self.node.contains_space {
59 confidence -= 3;
60 }
61 }
62 if !self.end_separate {
63 confidence -= 1;
64 }
65 if !self.begin_separate && !self.end_separate {
66 confidence -= 1;
67 }
68 }
69 if self.node.typ.is(Type::SEVERE) {
70 confidence += 3;
71 } else if self.node.typ.is(Type::MODERATE_OR_HIGHER)
72 && (self.node.depth == 1 || self.node.typ.isnt(Type::EVASIVE & Type::SEVERE))
73 {
74 confidence += 2
75 } else if self.node.typ.is(Type::MILD_OR_HIGHER)
76 && (self.node.depth == 1
77 || self.node.typ.isnt(Type::EVASIVE & Type::MODERATE_OR_HIGHER))
78 {
79 confidence += 1;
80 };
81 confidence -= (self.skipped as u16 + self.spaces as u16 + self.replacements as u16 + 1)
82 .ilog2() as i64;
83 confidence -= (self.low_confidence_replacements + 1).ilog2() as i64;
84 if self.node.depth == 2 && self.low_confidence_replacements > 0 {
85 confidence -= 2;
87 }
88 if self.node.typ.is(Type::EVASIVE & Type::SEVERE) {
89 confidence -= 3;
90 } else if self.node.typ.is(Type::EVASIVE & Type::MODERATE_OR_HIGHER) {
91 confidence -= 2;
92 } else if self.node.typ.is(Type::EVASIVE & Type::MILD) {
93 confidence -= 1;
94 }
95 confidence
96 }
97
98 pub(crate) fn commit<I: Iterator<Item = char>>(
100 &self,
101 typ: &mut Type,
102 spy: &mut BufferProxyIterator<I>,
103 censor_threshold: Type,
104 censor_first_character_threshold: Type,
105 censor_replacement: char,
106 ) -> bool {
107 #[cfg(feature = "trace")]
108 print!(
109 "Committing {} with begin_separate={}, spaces={}, skipped={}, end_separate={}, depth={}, replacements={}, lcr={}, contains_space={}: ",
110 self.node.trace,
111 self.begin_separate,
112 self.spaces,
113 self.skipped,
114 self.end_separate,
115 self.node.depth,
116 self.replacements,
117 self.low_confidence_replacements,
118 self.node.contains_space
119 );
120
121 let confidence = self.confidence();
122
123 if confidence <= 0 {
124 #[cfg(feature = "trace")]
125 println!("rejected with confidence {confidence}");
126 return false;
127 }
128 #[cfg(feature = "trace")]
129 println!("accepted with confidence {confidence}");
130
131 *typ |= self.node.typ
174 | if self.replacements >= 2 {
175 Type::EVASIVE & Type::MILD
176 } else {
177 Type::NONE
178 };
179
180 if self.node.typ.is(censor_threshold) {
182 let offset =
184 if self.node.typ.is(censor_first_character_threshold) || self.node.depth == 1 {
185 0
186 } else {
187 1
188 };
189 spy.censor(self.start + offset..=self.end, censor_replacement);
190 }
191
192 true
193 }
194}
195
196impl PartialEq for Match {
197 fn eq(&self, other: &Self) -> bool {
198 std::ptr::eq(self.node, other.node) && self.begin_separate == other.begin_separate
199 }
200}
201
202impl Eq for Match {}
203
204impl Hash for Match {
205 fn hash<H: Hasher>(&self, state: &mut H) {
206 state.write_usize(self.node as *const _ as usize);
207 state.write_u8(self.begin_separate as u8);
208 }
209}