1use core::ops::RangeInclusive;
8
9use crate::buckets::constrained::{FuzzyHashBucketMapper, FuzzyHashBucketsInfo};
10use crate::buckets::{NUM_BUCKETS_LONG, NUM_BUCKETS_NORMAL, NUM_BUCKETS_SHORT};
11use crate::compare::dist_length::{distance, MAX_DISTANCE};
12use crate::errors::ParseError;
13#[allow(unused_imports)]
14use crate::macros::{invariant, optionally_unsafe};
15use crate::parse::hex_str::decode_rev_1;
16
17mod private {
19 pub trait Sealed {}
21}
22
23pub const ENCODED_VALUE_SIZE: usize = 170;
28static_assertions::const_assert!(ENCODED_VALUE_SIZE <= 256);
29
30#[rustfmt::skip]
35const TOP_VALUE_BY_ENCODING: [u32; ENCODED_VALUE_SIZE] = [
36 1,
38 2,
39 3,
40 5,
41 7,
42 11,
43 17,
44 25,
45 38,
46 57,
47 86,
48 129,
49 194,
50 291,
51 437,
52 656,
53 854,
55 1110,
56 1443,
57 1876,
58 2439,
59 3171,
60 3475,
63 3823,
64 4205,
65 4626,
66 5088,
67 5597,
68 6157,
69 6772,
70 7450,
71 8195,
72 9014,
73 9916,
74 10907,
75 11998,
76 13198,
77 14518,
78 15970,
79 17567,
80 19323,
81 21256,
82 23382,
83 25720,
84 28292,
85 31121,
86 34233,
87 37656,
88 41422,
89 45564,
90 50121,
91 55133,
92 60646,
93 66711,
94 73382,
95 80721,
96 88793,
97 97672,
98 107439,
99 118183,
100 130002,
101 143002,
102 157302,
103 173032,
104 190335,
105 209369,
106 230306,
107 253337,
108 278670,
109 306538,
110 337191,
111 370911,
112 408002,
113 448802,
114 493682,
115 543050,
116 597356,
117 657091,
118 722800,
119 795081,
120 874589,
121 962048,
122 1058252,
123 1164078,
124 1280486,
125 1408534,
126 1549388,
127 1704327,
128 1874759,
129 2062236,
130 2268459,
131 2495305,
132 2744836,
133 3019320,
134 3321252,
135 3653374,
136 4018711,
137 4420582,
138 4862641,
139 5348905,
140 5883796,
141 6472176,
142 7119394,
143 7831333,
144 8614467,
145 9475909,
146 10423501,
147 11465851,
148 12612437,
149 13873681,
150 15261050,
151 16787154,
152 18465870,
153 20312458,
154 22343706,
155 24578077,
156 27035886,
157 29739474,
158 32713425,
159 35984770,
160 39583245,
161 43541573,
162 47895730,
163 52685306,
164 57953837,
165 63749221,
166 70124148,
167 77136564,
168 84850228,
169 93335252,
170 102668779,
171 112935659,
172 124229227,
173 136652151,
174 150317384,
175 165349128,
176 181884040,
177 200072456,
178 220079703,
179 242087671,
180 266296456,
181 292926096,
182 322218735,
183 354440623,
184 389884688,
185 428873168,
186 471760495,
187 518936559,
188 570830240,
189 627913311,
190 690704607,
191 759775136,
192 835752671,
193 919327967,
194 1011260767,
195 1112386880,
196 1223623232,
197 1345985727,
198 1480584256,
199 1628642751,
200 1791507135,
201 1970657856,
202 2167723648,
203 2384496256,
204 2622945920,
205 2885240448,
206 3173764736,
207 3491141248,
208 3840255616,
209 4224281216,
210];
211
212pub(crate) const MAX: u32 = TOP_VALUE_BY_ENCODING[TOP_VALUE_BY_ENCODING.len() - 1];
214
215pub trait ConstrainedLengthProcessingInfo: private::Sealed {
217 const MIN: u32;
219 const MIN_CONSERVATIVE: u32;
221 const MAX: u32 = self::MAX;
226}
227
228pub struct LengthProcessingInfo<const SIZE_BUCKETS: usize>
233where
234 FuzzyHashBucketsInfo<SIZE_BUCKETS>: FuzzyHashBucketMapper;
235impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_SHORT> where
237 FuzzyHashBucketsInfo<NUM_BUCKETS_SHORT>: FuzzyHashBucketMapper
238{
239}
240impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_SHORT>
241where
242 FuzzyHashBucketsInfo<NUM_BUCKETS_SHORT>: FuzzyHashBucketMapper,
243{
244 const MIN: u32 = 10;
245 const MIN_CONSERVATIVE: u32 = 10;
246}
247impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_NORMAL> where
249 FuzzyHashBucketsInfo<NUM_BUCKETS_NORMAL>: FuzzyHashBucketMapper
250{
251}
252impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_NORMAL>
253where
254 FuzzyHashBucketsInfo<NUM_BUCKETS_NORMAL>: FuzzyHashBucketMapper,
255{
256 const MIN: u32 = 50;
257 const MIN_CONSERVATIVE: u32 = 128;
258}
259impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_LONG> where
261 FuzzyHashBucketsInfo<NUM_BUCKETS_LONG>: FuzzyHashBucketMapper
262{
263}
264impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_LONG>
265where
266 FuzzyHashBucketsInfo<NUM_BUCKETS_LONG>: FuzzyHashBucketMapper,
267{
268 const MIN: u32 = 50;
269 const MIN_CONSERVATIVE: u32 = 128;
270}
271
272#[cfg(any(
279 test,
280 doc,
281 any(
282 target_arch = "x86",
283 target_arch = "x86_64",
284 target_arch = "arm",
285 target_arch = "aarch64",
286 all(
287 any(target_arch = "riscv32", target_arch = "riscv64"),
288 target_feature = "zbb"
289 ),
290 target_arch = "wasm32",
291 target_arch = "wasm64"
292 )
293))]
294const ENCODED_INDICES_BY_LEADING_ZEROS: [usize; 33] = {
295 let mut array = [0; 33];
296 let mut i = 0;
297 while i < TOP_VALUE_BY_ENCODING.len() {
298 array[TOP_VALUE_BY_ENCODING[i].leading_zeros() as usize] = i + 1;
299 i += 1;
300 }
301 array
302};
303
304#[derive(Debug, Clone, Copy, PartialEq, Eq)]
306pub enum DataLengthValidity {
307 TooSmall,
309 ValidWhenOptimistic,
312 Valid,
314 TooLarge,
316}
317
318#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
325pub enum DataLengthProcessingMode {
326 #[default]
331 Optimistic,
332 Conservative,
338}
339
340impl DataLengthValidity {
341 pub fn new<const SIZE_BUCKETS: usize>(len: u32) -> DataLengthValidity
347 where
348 FuzzyHashBucketsInfo<SIZE_BUCKETS>: FuzzyHashBucketMapper,
349 LengthProcessingInfo<SIZE_BUCKETS>: ConstrainedLengthProcessingInfo,
350 {
351 if len < LengthProcessingInfo::<SIZE_BUCKETS>::MIN {
352 DataLengthValidity::TooSmall
353 } else if len < LengthProcessingInfo::<SIZE_BUCKETS>::MIN_CONSERVATIVE {
354 DataLengthValidity::ValidWhenOptimistic
355 } else if len <= LengthProcessingInfo::<SIZE_BUCKETS>::MAX {
356 DataLengthValidity::Valid
357 } else {
358 DataLengthValidity::TooLarge
359 }
360 }
361
362 pub fn is_err(&self) -> bool {
365 matches!(
366 *self,
367 DataLengthValidity::TooSmall | DataLengthValidity::TooLarge
368 )
369 }
370
371 pub fn is_err_on(&self, mode: DataLengthProcessingMode) -> bool {
374 match *self {
375 DataLengthValidity::TooLarge | DataLengthValidity::TooSmall => true,
376 DataLengthValidity::Valid => false,
377 DataLengthValidity::ValidWhenOptimistic => {
378 matches!(mode, DataLengthProcessingMode::Conservative)
379 }
380 }
381 }
382}
383
384#[derive(Debug, Clone, Copy, PartialEq, Eq)]
400#[repr(transparent)]
401pub struct FuzzyHashLengthEncoding {
402 lvalue: u8,
404}
405impl FuzzyHashLengthEncoding {
406 pub const MAX_DISTANCE: u32 = MAX_DISTANCE;
408
409 #[inline(always)]
411 pub(crate) fn from_raw(lvalue: u8) -> Self {
412 Self { lvalue }
413 }
414
415 pub(crate) fn from_str_bytes(bytes: &[u8]) -> Result<Self, ParseError> {
418 if bytes.len() != 2 {
419 return Err(ParseError::InvalidStringLength);
420 }
421 decode_rev_1(bytes)
422 .ok_or(ParseError::InvalidCharacter)
423 .map(Self::from_raw)
424 }
425
426 pub fn new(len: u32) -> Option<Self> {
428 if len == 0 {
429 return Some(Self { lvalue: 0 }); }
431 if len > MAX {
432 return None;
433 }
434 cfg_if::cfg_if! {
435 if #[cfg(any(
440 target_arch = "x86",
441 target_arch = "x86_64",
442 target_arch = "arm",
443 target_arch = "aarch64",
444 all(
445 any(target_arch = "riscv32", target_arch = "riscv64"),
446 target_feature = "zbb"
447 ),
448 target_arch = "wasm32",
449 target_arch = "wasm64"
450 ))] {
451 let clz = len.leading_zeros() as usize;
452 let bottom = ENCODED_INDICES_BY_LEADING_ZEROS[clz + 1];
453 let top = ENCODED_INDICES_BY_LEADING_ZEROS[clz];
454 optionally_unsafe! {
455 invariant!(bottom <= TOP_VALUE_BY_ENCODING.len());
456 invariant!(top <= TOP_VALUE_BY_ENCODING.len());
457 invariant!(bottom <= top);
458 }
459 Some(Self {
460 lvalue: match TOP_VALUE_BY_ENCODING[bottom..top].binary_search(&len) {
461 Ok(i) => bottom + i,
462 Err(i) => bottom + i,
463 } as u8,
464 })
465 }
466 else {
467 Some(Self {
468 lvalue: match TOP_VALUE_BY_ENCODING.as_slice().binary_search(&len) {
469 Ok(i) => i,
470 Err(i) => i,
471 } as u8,
472 })
473 }
474 }
475 }
476
477 #[inline(always)]
479 pub fn value(&self) -> u8 {
480 self.lvalue
481 }
482
483 #[inline(always)]
491 pub fn is_valid(&self) -> bool {
492 (self.lvalue as usize) < ENCODED_VALUE_SIZE
493 }
494
495 #[inline(always)]
498 pub fn compare(&self, other: &FuzzyHashLengthEncoding) -> u32 {
499 distance(self.lvalue, other.lvalue)
500 }
501
502 pub fn range(&self) -> Option<RangeInclusive<u32>> {
508 if self.lvalue == 0 {
509 return Some(0..=TOP_VALUE_BY_ENCODING[0]);
510 }
511 if self.lvalue as usize >= ENCODED_VALUE_SIZE {
512 return None;
513 }
514 let bottom = TOP_VALUE_BY_ENCODING[self.lvalue as usize - 1] + 1;
515 let top = TOP_VALUE_BY_ENCODING[self.lvalue as usize];
516 Some(bottom..=top)
517 }
518}
519impl TryFrom<u32> for FuzzyHashLengthEncoding {
520 type Error = ParseError;
521
522 fn try_from(len: u32) -> Result<Self, Self::Error> {
523 Self::new(len).ok_or(ParseError::LengthIsTooLarge)
524 }
525}
526
527#[cfg(any(doc, test))]
529#[cfg_attr(feature = "unstable", doc(cfg(all())))]
530fn encode(len: u32) -> Option<u8> {
531 FuzzyHashLengthEncoding::new(len).map(|x| x.lvalue)
532}
533
534#[cfg(any(doc, test))]
536#[cfg_attr(feature = "unstable", doc(cfg(all())))]
537pub(crate) mod naive {
538 use super::TOP_VALUE_BY_ENCODING;
539
540 pub fn encode(len: u32) -> Option<u8> {
542 if len == 0 {
543 return Some(0); }
545 for (i, &top) in TOP_VALUE_BY_ENCODING.iter().enumerate() {
546 if len <= top {
547 return Some(i as u8);
548 }
549 }
550 None
551 }
552}
553
554mod tests;