use core::ops::RangeInclusive;
use crate::buckets::constrained::{FuzzyHashBucketMapper, FuzzyHashBucketsInfo};
use crate::buckets::{NUM_BUCKETS_LONG, NUM_BUCKETS_NORMAL, NUM_BUCKETS_SHORT};
use crate::compare::dist_length::{distance, MAX_DISTANCE};
use crate::errors::ParseError;
#[allow(unused_imports)]
use crate::macros::{invariant, optionally_unsafe};
use crate::parse::hex_str::decode_rev_1;
mod private {
pub trait Sealed {}
}
pub const ENCODED_VALUE_SIZE: usize = 170;
static_assertions::const_assert!(ENCODED_VALUE_SIZE <= 256);
#[rustfmt::skip]
const TOP_VALUE_BY_ENCODING: [u32; ENCODED_VALUE_SIZE] = [
1,
2,
3,
5,
7,
11,
17,
25,
38,
57,
86,
129,
194,
291,
437,
656,
854,
1110,
1443,
1876,
2439,
3171,
3475,
3823,
4205,
4626,
5088,
5597,
6157,
6772,
7450,
8195,
9014,
9916,
10907,
11998,
13198,
14518,
15970,
17567,
19323,
21256,
23382,
25720,
28292,
31121,
34233,
37656,
41422,
45564,
50121,
55133,
60646,
66711,
73382,
80721,
88793,
97672,
107439,
118183,
130002,
143002,
157302,
173032,
190335,
209369,
230306,
253337,
278670,
306538,
337191,
370911,
408002,
448802,
493682,
543050,
597356,
657091,
722800,
795081,
874589,
962048,
1058252,
1164078,
1280486,
1408534,
1549388,
1704327,
1874759,
2062236,
2268459,
2495305,
2744836,
3019320,
3321252,
3653374,
4018711,
4420582,
4862641,
5348905,
5883796,
6472176,
7119394,
7831333,
8614467,
9475909,
10423501,
11465851,
12612437,
13873681,
15261050,
16787154,
18465870,
20312458,
22343706,
24578077,
27035886,
29739474,
32713425,
35984770,
39583245,
43541573,
47895730,
52685306,
57953837,
63749221,
70124148,
77136564,
84850228,
93335252,
102668779,
112935659,
124229227,
136652151,
150317384,
165349128,
181884040,
200072456,
220079703,
242087671,
266296456,
292926096,
322218735,
354440623,
389884688,
428873168,
471760495,
518936559,
570830240,
627913311,
690704607,
759775136,
835752671,
919327967,
1011260767,
1112386880,
1223623232,
1345985727,
1480584256,
1628642751,
1791507135,
1970657856,
2167723648,
2384496256,
2622945920,
2885240448,
3173764736,
3491141248,
3840255616,
4224281216,
];
const MAX: u32 = TOP_VALUE_BY_ENCODING[TOP_VALUE_BY_ENCODING.len() - 1];
pub trait ConstrainedLengthProcessingInfo: private::Sealed {
const MIN: u32;
const MIN_CONSERVATIVE: u32;
const MAX: u32 = self::MAX;
}
pub struct LengthProcessingInfo<const SIZE_BUCKETS: usize>
where
FuzzyHashBucketsInfo<SIZE_BUCKETS>: FuzzyHashBucketMapper;
impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_SHORT> where
FuzzyHashBucketsInfo<NUM_BUCKETS_SHORT>: FuzzyHashBucketMapper
{
}
impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_SHORT>
where
FuzzyHashBucketsInfo<NUM_BUCKETS_SHORT>: FuzzyHashBucketMapper,
{
const MIN: u32 = 10;
const MIN_CONSERVATIVE: u32 = 10;
}
impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_NORMAL> where
FuzzyHashBucketsInfo<NUM_BUCKETS_NORMAL>: FuzzyHashBucketMapper
{
}
impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_NORMAL>
where
FuzzyHashBucketsInfo<NUM_BUCKETS_NORMAL>: FuzzyHashBucketMapper,
{
const MIN: u32 = 50;
const MIN_CONSERVATIVE: u32 = 128;
}
impl private::Sealed for LengthProcessingInfo<NUM_BUCKETS_LONG> where
FuzzyHashBucketsInfo<NUM_BUCKETS_LONG>: FuzzyHashBucketMapper
{
}
impl ConstrainedLengthProcessingInfo for LengthProcessingInfo<NUM_BUCKETS_LONG>
where
FuzzyHashBucketsInfo<NUM_BUCKETS_LONG>: FuzzyHashBucketMapper,
{
const MIN: u32 = 50;
const MIN_CONSERVATIVE: u32 = 128;
}
#[cfg(any(
test,
doc,
any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
all(
any(target_arch = "riscv32", target_arch = "riscv64"),
target_feature = "zbb"
),
target_arch = "wasm32",
target_arch = "wasm64"
)
))]
const ENCODED_INDICES_BY_LEADING_ZEROS: [usize; 33] = {
let mut array = [0; 33];
let mut i = 0;
while i < TOP_VALUE_BY_ENCODING.len() {
array[TOP_VALUE_BY_ENCODING[i].leading_zeros() as usize] = i + 1;
i += 1;
}
array
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DataLengthValidity {
TooSmall,
ValidWhenOptimistic,
Valid,
TooLarge,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum DataLengthProcessingMode {
#[default]
Optimistic,
Conservative,
}
impl DataLengthValidity {
pub fn new<const SIZE_BUCKETS: usize>(len: u32) -> DataLengthValidity
where
FuzzyHashBucketsInfo<SIZE_BUCKETS>: FuzzyHashBucketMapper,
LengthProcessingInfo<SIZE_BUCKETS>: ConstrainedLengthProcessingInfo,
{
if len < LengthProcessingInfo::<SIZE_BUCKETS>::MIN {
DataLengthValidity::TooSmall
} else if len < LengthProcessingInfo::<SIZE_BUCKETS>::MIN_CONSERVATIVE {
DataLengthValidity::ValidWhenOptimistic
} else if len <= LengthProcessingInfo::<SIZE_BUCKETS>::MAX {
DataLengthValidity::Valid
} else {
DataLengthValidity::TooLarge
}
}
pub fn is_err(&self) -> bool {
matches!(
*self,
DataLengthValidity::TooSmall | DataLengthValidity::TooLarge
)
}
pub fn is_err_on(&self, mode: DataLengthProcessingMode) -> bool {
match *self {
DataLengthValidity::TooLarge | DataLengthValidity::TooSmall => true,
DataLengthValidity::Valid => false,
DataLengthValidity::ValidWhenOptimistic => {
matches!(mode, DataLengthProcessingMode::Conservative)
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(transparent)]
pub struct FuzzyHashLengthEncoding {
lvalue: u8,
}
impl FuzzyHashLengthEncoding {
pub const MAX_DISTANCE: u32 = MAX_DISTANCE;
#[inline(always)]
pub(crate) fn from_raw(lvalue: u8) -> Self {
Self { lvalue }
}
pub(crate) fn from_str_bytes(bytes: &[u8]) -> Result<Self, ParseError> {
if bytes.len() != 2 {
return Err(ParseError::InvalidStringLength);
}
decode_rev_1(bytes)
.ok_or(ParseError::InvalidCharacter)
.map(Self::from_raw)
}
pub fn new(len: u32) -> Option<Self> {
if len == 0 {
return Some(Self { lvalue: 0 }); }
if len > MAX {
return None;
}
cfg_if::cfg_if! {
if #[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
all(
any(target_arch = "riscv32", target_arch = "riscv64"),
target_feature = "zbb"
),
target_arch = "wasm32",
target_arch = "wasm64"
))] {
let clz = len.leading_zeros() as usize;
let bottom = ENCODED_INDICES_BY_LEADING_ZEROS[clz + 1];
let top = ENCODED_INDICES_BY_LEADING_ZEROS[clz];
optionally_unsafe! {
invariant!(bottom <= TOP_VALUE_BY_ENCODING.len());
invariant!(top <= TOP_VALUE_BY_ENCODING.len());
invariant!(bottom <= top);
}
Some(Self {
lvalue: match TOP_VALUE_BY_ENCODING[bottom..top].binary_search(&len) {
Ok(i) => bottom + i,
Err(i) => bottom + i,
} as u8,
})
}
else {
Some(Self {
lvalue: match TOP_VALUE_BY_ENCODING.as_slice().binary_search(&len) {
Ok(i) => i,
Err(i) => i,
} as u8,
})
}
}
}
#[inline(always)]
pub fn value(&self) -> u8 {
self.lvalue
}
#[inline(always)]
pub fn is_valid(&self) -> bool {
(self.lvalue as usize) < ENCODED_VALUE_SIZE
}
#[inline(always)]
pub fn compare(&self, other: &FuzzyHashLengthEncoding) -> u32 {
distance(self.lvalue, other.lvalue)
}
pub fn range(&self) -> Option<RangeInclusive<u32>> {
if self.lvalue == 0 {
return Some(0..=TOP_VALUE_BY_ENCODING[0]);
}
if self.lvalue as usize >= ENCODED_VALUE_SIZE {
return None;
}
let bottom = TOP_VALUE_BY_ENCODING[self.lvalue as usize - 1] + 1;
let top = TOP_VALUE_BY_ENCODING[self.lvalue as usize];
Some(bottom..=top)
}
}
impl TryFrom<u32> for FuzzyHashLengthEncoding {
type Error = ParseError;
fn try_from(len: u32) -> Result<Self, Self::Error> {
Self::new(len).ok_or(ParseError::LengthIsTooLarge)
}
}
#[cfg(any(doc, test))]
#[cfg_attr(feature = "unstable", doc(cfg(all())))]
fn encode(len: u32) -> Option<u8> {
FuzzyHashLengthEncoding::new(len).map(|x| x.lvalue)
}
#[cfg(any(doc, test))]
#[cfg_attr(feature = "unstable", doc(cfg(all())))]
pub(crate) mod naive {
use super::TOP_VALUE_BY_ENCODING;
pub fn encode(len: u32) -> Option<u8> {
if len == 0 {
return Some(0); }
for (i, &top) in TOP_VALUE_BY_ENCODING.iter().enumerate() {
if len <= top {
return Some(i as u8);
}
}
None
}
}
mod tests;