#![allow(dead_code)]
const RADIX_NULL_LINK: u32 = 0x03FF_FFFF;
const RADIX_LINK_MASK: u32 = 0x03FF_FFFF;
const RADIX_LENGTH_SHIFT: u32 = 26;
const RADIX_MAX_LENGTH: u32 = 63;
const MIN_MATCH_LEN: usize = 2;
const MAX_MATCH_LEN: usize = 273;
const HASH_SIZE: usize = 65536;
const HASH_MASK: u32 = (HASH_SIZE as u32) - 1;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Match {
pub offset: u32,
pub length: u32,
}
impl Match {
#[inline]
pub fn new(offset: u32, length: u32) -> Self {
Self { offset, length }
}
}
pub struct RadixMatchFinder {
table: Vec<u32>,
hash_table: Vec<u32>,
dict_size: usize,
max_depth: u32,
}
impl std::fmt::Debug for RadixMatchFinder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RadixMatchFinder")
.field("dict_size", &self.dict_size)
.field("max_depth", &self.max_depth)
.field("table_len", &self.table.len())
.finish()
}
}
impl RadixMatchFinder {
pub fn new(dict_size: usize, max_depth: u32) -> Self {
let dict_size = dict_size.min(64 * 1024 * 1024);
Self {
table: Vec::new(),
hash_table: vec![RADIX_NULL_LINK; HASH_SIZE],
dict_size,
max_depth: max_depth.max(4),
}
}
#[inline]
fn hash2(data: &[u8], pos: usize) -> u32 {
if pos + 2 > data.len() {
return HASH_SIZE as u32; }
let h = ((data[pos] as u32) << 8) | (data[pos + 1] as u32);
h & HASH_MASK
}
pub fn build(&mut self, data: &[u8]) {
let len = data.len();
if len < MIN_MATCH_LEN {
self.table.clear();
return;
}
self.table.clear();
self.table.resize(len, RADIX_NULL_LINK);
self.hash_table.fill(RADIX_NULL_LINK);
let end = len.saturating_sub(MIN_MATCH_LEN - 1);
for pos in 0..end {
let hash = Self::hash2(data, pos);
if hash >= HASH_SIZE as u32 {
continue; }
let prev_pos = self.hash_table[hash as usize];
if prev_pos != RADIX_NULL_LINK {
let prev = prev_pos as usize;
if prev < pos && Self::prefix_matches(data, prev, pos) {
let match_len = Self::match_length(data, prev, pos);
let stored_len = (match_len as u32).min(RADIX_MAX_LENGTH);
self.table[pos] = prev_pos | (stored_len << RADIX_LENGTH_SHIFT);
}
}
self.hash_table[hash as usize] = pos as u32;
}
}
#[inline]
fn prefix_matches(data: &[u8], pos1: usize, pos2: usize) -> bool {
if pos1 + 2 > data.len() || pos2 + 2 > data.len() {
return false;
}
data[pos1] == data[pos2] && data[pos1 + 1] == data[pos2 + 1]
}
#[inline]
fn match_length(data: &[u8], pos1: usize, pos2: usize) -> usize {
let max_len = (data.len() - pos2).min(MAX_MATCH_LEN);
let mut len = 0;
while len < max_len && data[pos1 + len] == data[pos2 + len] {
len += 1;
}
len
}
pub fn get_match(&self, data: &[u8], pos: usize) -> Option<Match> {
if pos >= self.table.len() || pos + MIN_MATCH_LEN > data.len() {
return None;
}
let entry = self.table[pos];
let link = entry & RADIX_LINK_MASK;
if link == RADIX_NULL_LINK {
return None;
}
let match_pos = link as usize;
if match_pos >= pos {
return None;
}
let distance = pos - match_pos;
if distance > self.dict_size {
return None;
}
let actual_len = Self::match_length(data, match_pos, pos);
if actual_len >= MIN_MATCH_LEN {
Some(Match::new(distance as u32, actual_len as u32))
} else {
None
}
}
pub fn find_matches(&self, data: &[u8], pos: usize, max_matches: usize) -> Vec<Match> {
let mut matches = Vec::with_capacity(max_matches.min(16));
if pos >= self.table.len() || pos + MIN_MATCH_LEN > data.len() {
return matches;
}
let mut current_pos = pos;
let mut iterations = 0;
let max_iterations = self.max_depth as usize;
loop {
if current_pos >= self.table.len() {
break;
}
let entry = self.table[current_pos];
let link = entry & RADIX_LINK_MASK;
if link == RADIX_NULL_LINK {
break;
}
let match_pos = link as usize;
if match_pos >= current_pos {
break;
}
let distance = pos - match_pos;
if distance > self.dict_size {
break;
}
let actual_len = Self::match_length(data, match_pos, pos);
if actual_len >= MIN_MATCH_LEN {
matches.push(Match::new(distance as u32, actual_len as u32));
if matches.len() >= max_matches {
break;
}
}
current_pos = match_pos;
iterations += 1;
if iterations >= max_iterations {
break;
}
}
matches.sort_by(|a, b| b.length.cmp(&a.length));
matches
}
pub fn dict_size(&self) -> usize {
self.dict_size
}
pub fn reset(&mut self) {
self.table.clear();
self.hash_table.fill(RADIX_NULL_LINK);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_struct() {
let m = Match::new(100, 5);
assert_eq!(m.offset, 100);
assert_eq!(m.length, 5);
}
#[test]
fn test_radix_mf_new() {
let mf = RadixMatchFinder::new(1024 * 1024, 64);
assert_eq!(mf.dict_size(), 1024 * 1024);
}
#[test]
fn test_build_empty() {
let mut mf = RadixMatchFinder::new(1024, 32);
mf.build(&[]);
assert!(mf.table.is_empty());
}
#[test]
fn test_build_short() {
let mut mf = RadixMatchFinder::new(1024, 32);
mf.build(&[0]);
assert!(mf.table.is_empty());
}
#[test]
fn test_build_simple() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"abcabcabc";
mf.build(data);
let m = mf.get_match(data, 3);
assert!(m.is_some(), "Expected match at position 3");
let m = m.unwrap();
assert_eq!(m.offset, 3); assert!(m.length >= 3, "Expected at least 3 bytes match"); }
#[test]
fn test_build_repeated() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"aaaaaaaa";
mf.build(data);
let m = mf.get_match(data, 2);
assert!(m.is_some(), "Expected match at position 2");
let m = m.unwrap();
assert!(m.length >= 2);
}
#[test]
fn test_no_match() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"abcdefgh";
mf.build(data);
let m = mf.get_match(data, 0);
assert!(m.is_none());
}
#[test]
fn test_find_matches() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"abcabcabcabc";
mf.build(data);
let matches = mf.find_matches(data, 9, 10);
assert!(!matches.is_empty(), "Expected at least one match");
for i in 1..matches.len() {
assert!(matches[i - 1].length >= matches[i].length);
}
}
#[test]
fn test_extend_match() {
let mut mf = RadixMatchFinder::new(1024, 4); let data = b"abcdefabcdefgh"; mf.build(data);
let m = mf.get_match(data, 6);
assert!(m.is_some(), "Expected match at position 6");
let m = m.unwrap();
assert_eq!(m.length, 6);
}
#[test]
fn test_reset() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"abcabc";
mf.build(data);
assert!(!mf.table.is_empty());
mf.reset();
assert!(mf.table.is_empty());
}
#[test]
fn test_dict_size_limit() {
let mf = RadixMatchFinder::new(100 * 1024 * 1024, 32);
assert_eq!(mf.dict_size(), 64 * 1024 * 1024);
}
#[test]
fn test_longer_pattern() {
let mut mf = RadixMatchFinder::new(1024, 32);
let data = b"hello world hello world hello";
mf.build(data);
let m = mf.get_match(data, 12);
assert!(m.is_some(), "Expected match at position 12");
let m = m.unwrap();
assert_eq!(m.offset, 12);
assert!(m.length >= 11, "Expected at least 'hello world' match");
}
}