use core::str::FromStr;
use crate::pearson::{b_mapping, fast_b_mapping};
use crate::quartile::get_quartiles;
use crate::util::{l_capturing, swap_byte};
use crate::BUCKETS;
const SLIDING_WND_SIZE: usize = 5;
const RNG_SIZE: usize = SLIDING_WND_SIZE - 1;
#[derive(Clone)]
pub struct TlshBuilder<
const EFF_BUCKETS: usize,
const TLSH_CHECKSUM_LEN: usize,
const CODE_SIZE: usize,
const TLSH_STRING_LEN_REQ: usize,
const MIN_DATA_LENGTH: usize,
> {
a_bucket: [u32; BUCKETS],
slide_window: [u8; RNG_SIZE],
checksum: [u8; TLSH_CHECKSUM_LEN],
data_len: usize,
}
impl<
const EFF_BUCKETS: usize,
const TLSH_CHECKSUM_LEN: usize,
const CODE_SIZE: usize,
const TLSH_STRING_LEN_REQ: usize,
const MIN_DATA_LENGTH: usize,
> Default
for TlshBuilder<EFF_BUCKETS, TLSH_CHECKSUM_LEN, CODE_SIZE, TLSH_STRING_LEN_REQ, MIN_DATA_LENGTH>
{
fn default() -> Self {
Self::new()
}
}
impl<
const EFF_BUCKETS: usize,
const TLSH_CHECKSUM_LEN: usize,
const CODE_SIZE: usize,
const TLSH_STRING_LEN_REQ: usize,
const MIN_DATA_LENGTH: usize,
> TlshBuilder<EFF_BUCKETS, TLSH_CHECKSUM_LEN, CODE_SIZE, TLSH_STRING_LEN_REQ, MIN_DATA_LENGTH>
{
pub fn new() -> Self {
Self {
a_bucket: [0; BUCKETS],
slide_window: [0; RNG_SIZE],
checksum: [0; TLSH_CHECKSUM_LEN],
data_len: 0,
}
}
pub fn build_from(
data: &[u8],
) -> Option<Tlsh<TLSH_CHECKSUM_LEN, TLSH_STRING_LEN_REQ, CODE_SIZE>> {
let mut builder = Self::new();
builder.update(data);
builder.build()
}
pub fn update(&mut self, data: &[u8]) {
let mut fed_len = self.data_len;
for b in data {
let b_0 = *b;
let [b_4, b_3, b_2, b_1] = self.slide_window;
if fed_len >= 4 {
for k in 0..TLSH_CHECKSUM_LEN {
if k == 0 {
self.checksum[k] =
fast_b_mapping::<EFF_BUCKETS>(1, b_0, b_1, self.checksum[k]);
} else {
self.checksum[k] =
b_mapping(self.checksum[k - 1], b_0, b_1, self.checksum[k]);
}
}
let r = fast_b_mapping::<EFF_BUCKETS>(49, b_0, b_1, b_2);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(12, b_0, b_1, b_3);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(84, b_0, b_1, b_4);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(178, b_0, b_2, b_3);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(166, b_0, b_2, b_4);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(230, b_0, b_3, b_4);
self.a_bucket[usize::from(r)] += 1;
}
fed_len += 1;
self.slide_window = [b_3, b_2, b_1, b_0];
}
self.data_len += data.len();
}
pub fn build(&self) -> Option<Tlsh<TLSH_CHECKSUM_LEN, TLSH_STRING_LEN_REQ, CODE_SIZE>> {
if self.data_len < MIN_DATA_LENGTH {
return None;
}
let bucket: &[u32; EFF_BUCKETS] = (&self.a_bucket[..EFF_BUCKETS])
.try_into()
.expect("EFF_BUCKETS is bigger than BUCKETS");
let (q1, q2, q3) = get_quartiles::<EFF_BUCKETS>(bucket);
if q3 == 0 {
return None;
}
let nonzero = self
.a_bucket
.iter()
.take(CODE_SIZE * 4)
.filter(|v| **v > 0)
.count();
if EFF_BUCKETS == 48 {
if nonzero < 18 {
return None;
}
} else if nonzero <= 2 * CODE_SIZE {
return None;
}
let mut code: [u8; CODE_SIZE] = [0; CODE_SIZE];
for (c, slice) in code.iter_mut().zip(bucket.chunks(4)) {
*c = slice.iter().rev().fold(0u8, |h, &k| {
(if q3 < k {
3
} else if q2 < k {
2
} else if q1 < k {
1
} else {
0
}) | h << 2
});
}
let lvalue = l_capturing(self.data_len as u32);
let q1_ratio = (((((q1 * 100) as f32) / (q3 as f32)) as u32) % 16) as u8;
let q2_ratio = (((((q2 * 100) as f32) / (q3 as f32)) as u32) % 16) as u8;
Some(Tlsh {
lvalue,
q1_ratio,
q2_ratio,
checksum: self.checksum,
code,
})
}
}
#[derive(Clone)]
pub struct Tlsh<
const TLSH_CHECKSUM_LEN: usize,
const TLSH_STRING_LEN_REQ: usize,
const CODE_SIZE: usize,
> {
lvalue: u8,
q1_ratio: u8,
q2_ratio: u8,
checksum: [u8; TLSH_CHECKSUM_LEN],
code: [u8; CODE_SIZE],
}
impl<const TLSH_CHECKSUM_LEN: usize, const TLSH_STRING_LEN_REQ: usize, const CODE_SIZE: usize>
Tlsh<TLSH_CHECKSUM_LEN, TLSH_STRING_LEN_REQ, CODE_SIZE>
{
pub fn hash(&self) -> [u8; TLSH_STRING_LEN_REQ] {
let mut hash = [0; TLSH_STRING_LEN_REQ];
hash[0] = b'T';
hash[1] = b'1';
let mut i = 2;
for k in &self.checksum {
to_hex(&mut hash, &mut i, swap_byte(*k));
}
to_hex(&mut hash, &mut i, swap_byte(self.lvalue));
let qb = (self.q1_ratio << 4) | self.q2_ratio;
to_hex(&mut hash, &mut i, qb);
for c in self.code.iter().rev() {
to_hex(&mut hash, &mut i, *c);
}
hash
}
#[cfg(feature = "diff")]
pub fn diff(&self, other: &Self, len_diff: bool) -> i32 {
use crate::util::{h_distance, mod_diff};
const LENGTH_MULT: i32 = 12;
const QRATIO_MULT: i32 = 12;
const RANGE_LVALUE: u32 = 256;
const RANGE_QRATIO: u32 = 16;
let mut diff;
if len_diff {
let ldiff = mod_diff(self.lvalue, other.lvalue, RANGE_LVALUE);
if ldiff == 0 {
diff = 0;
} else if ldiff == 1 {
diff = 1;
} else {
diff = ldiff * LENGTH_MULT;
}
} else {
diff = 0;
}
let q1diff = mod_diff(self.q1_ratio, other.q1_ratio, RANGE_QRATIO);
if q1diff <= 1 {
diff += q1diff;
} else {
diff += (q1diff - 1) * QRATIO_MULT;
}
let q2diff = mod_diff(self.q2_ratio, other.q2_ratio, RANGE_QRATIO);
if q2diff <= 1 {
diff += q2diff;
} else {
diff += (q2diff - 1) * QRATIO_MULT;
}
for (a, b) in self.checksum.iter().zip(other.checksum.iter()) {
if a != b {
diff += 1;
break;
}
}
diff += h_distance(&self.code, &other.code);
diff
}
fn from_hash(s: &[u8]) -> Option<Self> {
if s.len() != TLSH_STRING_LEN_REQ || s[0] != b'T' || s[1] != b'1' {
return None;
}
let mut i = 2;
let mut checksum = [0; TLSH_CHECKSUM_LEN];
for k in &mut checksum {
*k = swap_byte(from_hex(s, &mut i)?);
}
let lvalue = swap_byte(from_hex(s, &mut i)?);
let qb = from_hex(s, &mut i)?;
let q1_ratio = qb >> 4;
let q2_ratio = qb & 0x0F;
let mut code = [0; CODE_SIZE];
for c in code.iter_mut().rev() {
*c = from_hex(s, &mut i)?;
}
Some(Self {
lvalue,
q1_ratio,
q2_ratio,
checksum,
code,
})
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParseError;
impl<const TLSH_CHECKSUM_LEN: usize, const TLSH_STRING_LEN_REQ: usize, const CODE_SIZE: usize>
FromStr for Tlsh<TLSH_CHECKSUM_LEN, TLSH_STRING_LEN_REQ, CODE_SIZE>
{
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::from_hash(s.as_bytes()).ok_or(ParseError)
}
}
fn from_hex(s: &[u8], i: &mut usize) -> Option<u8> {
let a = char::from(s[*i]).to_digit(16)?;
*i += 1;
let b = char::from(s[*i]).to_digit(16)?;
*i += 1;
Some(((a as u8) << 4) | (b as u8))
}
fn to_hex(s: &mut [u8], s_idx: &mut usize, b: u8) {
const HEX_LOOKUP: &[u8] = b"000102030405060708090A0B0C0D0E0F\
101112131415161718191A1B1C1D1E1F\
202122232425262728292A2B2C2D2E2F\
303132333435363738393A3B3C3D3E3F\
404142434445464748494A4B4C4D4E4F\
505152535455565758595A5B5C5D5E5F\
606162636465666768696A6B6C6D6E6F\
707172737475767778797A7B7C7D7E7F\
808182838485868788898A8B8C8D8E8F\
909192939495969798999A9B9C9D9E9F\
A0A1A2A3A4A5A6A7A8A9AAABACADAEAF\
B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF\
C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF\
D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF\
E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF\
F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
let i = usize::from(b) * 2;
s[*s_idx] = HEX_LOOKUP[i];
s[*s_idx + 1] = HEX_LOOKUP[i + 1];
*s_idx += 2;
}