use crate::algorithms::suffix_array::SuffixArray;
use crate::error::{Result, ZiporaError};
use crate::fsa::{ZiporaTrie, ZiporaTrieConfig};
use crate::fsa::traits::{FiniteStateAutomaton, Trie};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DfaCacheConfig {
pub initial_capacity: usize,
pub use_memory_pool: bool,
pub cache_aligned: bool,
pub enable_simd: bool,
pub min_node_frequency: u32,
pub max_memory_usage: usize,
pub growth_factor: f64,
pub trie_config: ZiporaTrieConfig,
}
impl Default for DfaCacheConfig {
fn default() -> Self {
Self {
initial_capacity: 8192,
use_memory_pool: true,
cache_aligned: true,
enable_simd: cfg!(feature = "simd"),
min_node_frequency: 2,
max_memory_usage: 16 * 1024 * 1024, growth_factor: 1.5,
trie_config: ZiporaTrieConfig::default(),
}
}
}
impl DfaCacheConfig {
pub fn small_dictionary(dict_size: usize) -> Self {
Self {
initial_capacity: dict_size.min(256),
use_memory_pool: false, cache_aligned: false, enable_simd: false, min_node_frequency: 2,
max_memory_usage: dict_size * 2, growth_factor: 1.2, trie_config: ZiporaTrieConfig {
trie_strategy: crate::fsa::TrieStrategy::DoubleArray {
initial_capacity: dict_size.min(256),
growth_factor: 1.2,
free_list_management: false,
auto_shrink: true,
},
storage_strategy: crate::fsa::StorageStrategy::Standard {
initial_capacity: dict_size.min(256),
growth_factor: 1.2,
},
compression_strategy: crate::fsa::CompressionStrategy::None,
rank_select_type: crate::fsa::RankSelectType::Simple,
enable_simd: false,
enable_concurrency: false,
cache_optimization: false,
},
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CacheMatch {
pub length: usize,
pub dict_position: usize,
pub frequency: u32,
pub state_id: u32,
}
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
pub total_lookups: u64,
pub successful_matches: u64,
pub cache_misses: u64,
pub avg_prefix_length: f64,
pub total_lookup_time_us: u64,
pub state_count: usize,
pub memory_usage: usize,
}
impl CacheStats {
pub fn hit_ratio(&self) -> f64 {
if self.total_lookups == 0 {
0.0
} else {
self.successful_matches as f64 / self.total_lookups as f64
}
}
pub fn avg_lookup_time_us(&self) -> f64 {
if self.total_lookups == 0 {
0.0
} else {
self.total_lookup_time_us as f64 / self.total_lookups as f64
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DfaState {
pub child0: u32,
pub parent: u32,
pub zlen_lo: u8,
pub suffix_low: u32,
pub suffix_hig: u32,
}
impl DfaState {
fn new(parent: u32, suffix_low: u32, suffix_hig: u32) -> Self {
Self {
child0: 0,
parent,
zlen_lo: 0,
suffix_low,
suffix_hig,
}
}
fn frequency(&self) -> u32 {
self.suffix_hig.saturating_sub(self.suffix_low)
}
}
#[derive(Debug, Clone)]
struct BfsQueueElem {
depth: u32,
state: u32,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
struct PatternInfo {
pattern: Vec<u8>,
position: usize,
frequency: u32,
length: usize,
}
#[derive(Debug)]
struct TemporaryTrie {
states: Vec<DfaState>,
transitions: HashMap<(u32, u8), u32>,
terminals: HashMap<u32, PatternInfo>,
}
#[derive(Debug)]
struct TrieNode {
children: HashMap<u8, Box<TrieNode>>,
pattern_info: Option<PatternInfo>,
frequency: u32,
depth: usize,
}
impl TrieNode {
fn new(depth: usize) -> Self {
Self {
children: HashMap::new(),
pattern_info: None,
frequency: 0,
depth,
}
}
fn is_terminal(&self) -> bool {
self.pattern_info.is_some()
}
}
#[derive(Debug, Clone)]
pub struct DfaCache {
trie: ZiporaTrie,
pattern_map: HashMap<u32, PatternInfo>,
config: DfaCacheConfig,
stats: CacheStats,
text: Vec<u8>,
suffix_array: Vec<i32>,
}
impl DfaCache {
pub fn build_from_suffix_array(
suffix_array: &SuffixArray,
text: &[u8],
config: &DfaCacheConfig,
min_frequency: u32,
max_depth: u32,
) -> Result<Self> {
let sa_i32: Vec<i32> = suffix_array.as_slice().iter()
.map(|&x| x as i32)
.collect();
let mut cache = Self::new_empty(config.clone(), text.to_vec(), sa_i32);
cache.bfs_build_cache(suffix_array.as_slice(), text, min_frequency as usize, max_depth as usize)?;
let (base_memory, check_memory, _) = cache.trie.memory_stats();
cache.stats.state_count = cache.trie.capacity();
cache.stats.memory_usage = base_memory + check_memory;
Ok(cache)
}
fn new_empty(config: DfaCacheConfig, text: Vec<u8>, suffix_array: Vec<i32>) -> Self {
let trie = ZiporaTrie::new();
let stats = CacheStats::default();
Self {
trie,
pattern_map: HashMap::new(),
config,
stats,
text,
suffix_array,
}
}
pub fn find_longest_prefix(&mut self, input: &[u8], max_length: usize) -> Result<Option<CacheMatch>> {
let start_time = std::time::Instant::now();
self.stats.total_lookups += 1;
if input.is_empty() {
self.stats.cache_misses += 1;
self.stats.total_lookup_time_us += start_time.elapsed().as_micros() as u64;
return Ok(None);
}
let mut state = self.trie.root();
let mut longest_match: Option<CacheMatch> = None;
let search_len = max_length.min(input.len());
for i in 0..search_len {
let byte = input[i];
if let Some(next_state) = self.trie.transition(state, byte) {
state = next_state;
if self.trie.is_final(state) {
if let Some(pattern_info) = self.pattern_map.get(&state) {
longest_match = Some(CacheMatch {
length: i + 1,
dict_position: pattern_info.position,
frequency: pattern_info.frequency,
state_id: state,
});
}
}
} else {
break;
}
}
let elapsed = start_time.elapsed().as_micros() as u64;
self.stats.total_lookup_time_us += elapsed;
if let Some(ref match_result) = longest_match {
self.stats.successful_matches += 1;
let total_len = self.stats.avg_prefix_length * (self.stats.successful_matches - 1) as f64
+ match_result.length as f64;
self.stats.avg_prefix_length = total_len / self.stats.successful_matches as f64;
} else {
self.stats.cache_misses += 1;
}
Ok(longest_match)
}
pub fn stats(&self) -> &CacheStats {
&self.stats
}
pub fn state_count(&self) -> usize {
self.trie.capacity()
}
#[inline]
pub fn memory_usage(&self) -> usize {
let (base_memory, check_memory, _) = self.trie.memory_stats();
base_memory + check_memory +
self.pattern_map.len() * (std::mem::size_of::<u32>() + std::mem::size_of::<PatternInfo>())
}
pub fn optimize(&mut self, min_frequency: u32) -> Result<()> {
self.pattern_map.retain(|_, pattern_info| {
pattern_info.frequency >= min_frequency
});
self.stats.state_count = self.trie.capacity();
self.stats.memory_usage = self.memory_usage();
Ok(())
}
pub fn validate(&self) -> Result<()> {
for &state_id in self.pattern_map.keys() {
if state_id as usize >= self.trie.capacity() {
return Err(ZiporaError::invalid_data(
&format!("Invalid state ID {} in pattern map", state_id)
));
}
}
if self.trie.capacity() == 0 && !self.pattern_map.is_empty() {
return Err(ZiporaError::invalid_data("Empty trie in cache but non-empty pattern map"));
}
Ok(())
}
#[cfg(feature = "serde")]
pub fn serialize(&self) -> Result<Vec<u8>> {
use bincode;
let serializable = SerializableCache {
pattern_map: self.pattern_map.clone(),
config: self.config.clone(),
};
bincode::serialize(&serializable)
.map_err(|e| ZiporaError::invalid_data(&format!("Cache serialization failed: {}", e)))
}
pub fn get_state(&self, state_id: u32) -> Option<DfaState> {
if state_id == 0 {
Some(DfaState {
child0: 1, parent: 0, zlen_lo: 0, suffix_low: 0,
suffix_hig: self.suffix_array.len() as u32,
})
} else {
None
}
}
pub fn has_transition(&self, state_id: u32, byte: u8) -> bool {
if let Some(next_state) = self.trie.transition(state_id, byte) {
next_state != 0
} else {
false
}
}
pub fn transition_state(&self, state_id: u32, byte: u8) -> Option<u32> {
self.trie.transition(state_id, byte)
}
pub fn get_zstr_length(&self, state_id: u32) -> Option<usize> {
let _ = state_id;
None
}
#[cfg(feature = "serde")]
pub fn deserialize(data: &[u8]) -> Result<Self> {
use bincode;
let serializable: SerializableCache = bincode::deserialize(data)
.map_err(|e| ZiporaError::invalid_data(&format!("Cache deserialization failed: {}", e)))?;
let trie = ZiporaTrie::new();
let pattern_map = serializable.pattern_map;
let stats = CacheStats {
state_count: trie.capacity(),
memory_usage: {
let (base_memory, check_memory, _) = trie.memory_stats();
base_memory + check_memory +
pattern_map.len() * (std::mem::size_of::<u32>() + std::mem::size_of::<PatternInfo>())
},
..Default::default()
};
Ok(Self {
trie,
pattern_map,
config: serializable.config,
stats,
text: Vec::new(), suffix_array: Vec::new(), })
}
pub fn bfs_build_cache(
&mut self,
suffix_array: &[usize],
text: &[u8],
min_freq: usize,
max_depth: usize,
) -> Result<()> {
if suffix_array.is_empty() || text.is_empty() {
return Ok(());
}
let temp_trie = self.build_bfs_trie(suffix_array, text, min_freq, max_depth)?;
self.build_double_array_trie(temp_trie)?;
Ok(())
}
fn build_bfs_trie(
&self,
suffix_array: &[usize],
text: &[u8],
min_freq: usize,
max_depth: usize,
) -> Result<TemporaryTrie> {
let mut trie = TemporaryTrie {
states: Vec::new(),
transitions: HashMap::new(),
terminals: HashMap::new(),
};
let root_state = DfaState::new(0, 0, suffix_array.len() as u32);
trie.states.push(root_state);
let mut queue = VecDeque::new();
queue.push_back(BfsQueueElem { depth: 0, state: 0 });
while let Some(elem) = queue.pop_front() {
if elem.depth >= max_depth as u32 {
continue;
}
let state = &trie.states[elem.state as usize].clone();
let lo = state.suffix_low as usize;
let hi = state.suffix_hig as usize;
if hi <= lo {
continue;
}
let partitions = self.partition_by_character(suffix_array, text, lo, hi, elem.depth as usize)?;
for (byte, (start, end)) in partitions {
let frequency = end - start;
if frequency >= min_freq {
let child_state_id = trie.states.len() as u32;
let child_state = DfaState::new(elem.state, start as u32, end as u32);
trie.states.push(child_state);
trie.transitions.insert((elem.state, byte), child_state_id);
queue.push_back(BfsQueueElem {
depth: elem.depth + 1,
state: child_state_id,
});
if elem.depth + 1 >= 3 { let pattern = self.extract_pattern_from_state(suffix_array, text, child_state_id, &trie, elem.depth + 1)?;
if let Some(pattern_info) = pattern {
trie.terminals.insert(child_state_id, pattern_info);
}
}
}
}
}
Ok(trie)
}
fn partition_by_character(
&self,
suffix_array: &[usize],
text: &[u8],
lo: usize,
hi: usize,
depth: usize,
) -> Result<HashMap<u8, (usize, usize)>> {
let mut partitions = HashMap::new();
if lo >= hi || lo >= suffix_array.len() {
return Ok(partitions);
}
let mut range_chars: Vec<(u8, usize)> = Vec::new();
for i in lo..hi.min(suffix_array.len()) {
let suffix_pos = suffix_array[i];
if suffix_pos + depth < text.len() {
let ch = text[suffix_pos + depth];
range_chars.push((ch, i));
}
}
if range_chars.is_empty() {
return Ok(partitions);
}
range_chars.sort_by_key(|&(ch, _)| ch);
let mut current_char = range_chars[0].0;
let mut current_start = lo;
let mut i = 0;
for (ch, _pos) in range_chars.iter() {
if *ch != current_char {
partitions.insert(current_char, (current_start, current_start + i));
current_char = *ch;
current_start = current_start + i;
i = 0;
}
i += 1;
}
partitions.insert(current_char, (current_start, current_start + i));
Ok(partitions)
}
fn extract_pattern_from_state(
&self,
suffix_array: &[usize],
text: &[u8],
state_id: u32,
trie: &TemporaryTrie,
depth: u32,
) -> Result<Option<PatternInfo>> {
if state_id as usize >= trie.states.len() {
return Ok(None);
}
let state = &trie.states[state_id as usize];
let lo = state.suffix_low as usize;
let hi = state.suffix_hig as usize;
if lo >= hi || lo >= suffix_array.len() {
return Ok(None);
}
let suffix_pos = suffix_array[lo];
if suffix_pos + depth as usize > text.len() {
return Ok(None);
}
let pattern = text[suffix_pos..suffix_pos + depth as usize].to_vec();
let frequency = (hi - lo) as u32;
Ok(Some(PatternInfo {
pattern,
position: suffix_pos,
frequency,
length: depth as usize,
}))
}
fn sa_upper_bound(&self, lo: usize, hi: usize, depth: usize, ch: u8) -> usize {
let mut left = lo;
let mut right = hi;
while left < right {
let mid = left + (right - left) / 2;
if mid >= self.suffix_array.len() {
break;
}
let suffix_pos = self.suffix_array[mid] as usize;
if suffix_pos + depth >= self.text.len() {
right = mid;
continue;
}
let suffix_ch = self.text[suffix_pos + depth];
if suffix_ch <= ch {
left = mid + 1;
} else {
right = mid;
}
}
left
}
fn sa_equal_range(&self, lo: usize, hi: usize, depth: usize, ch: u8) -> (usize, usize) {
let lower = self.sa_lower_bound(lo, hi, depth, ch);
let upper = self.sa_upper_bound(lo, hi, depth, ch);
(lower, upper)
}
fn sa_lower_bound(&self, lo: usize, hi: usize, depth: usize, ch: u8) -> usize {
let mut left = lo;
let mut right = hi;
while left < right {
let mid = left + (right - left) / 2;
if mid >= self.suffix_array.len() {
break;
}
let suffix_pos = self.suffix_array[mid] as usize;
if suffix_pos + depth >= self.text.len() {
right = mid;
continue;
}
let suffix_ch = self.text[suffix_pos + depth];
if suffix_ch < ch {
left = mid + 1;
} else {
right = mid;
}
}
left
}
fn build_double_array_trie(&mut self, temp_trie: TemporaryTrie) -> Result<()> {
self.trie = ZiporaTrie::with_config(self.config.trie_config.clone());
let mut pattern_to_info: HashMap<Vec<u8>, PatternInfo> = HashMap::new();
for (_state_id, pattern_info) in &temp_trie.terminals {
pattern_to_info.insert(pattern_info.pattern.clone(), pattern_info.clone());
}
self.pattern_map.clear();
for (pattern, pattern_info) in pattern_to_info {
if let Ok(state_id) = self.trie.insert_and_get_node_id(&pattern) {
self.pattern_map.insert(state_id, pattern_info);
}
}
Ok(())
}
fn extract_frequent_patterns(
suffix_array: &SuffixArray,
text: &[u8],
min_frequency: u32,
max_depth: usize,
) -> Result<Vec<PatternInfo>> {
let mut patterns = Vec::new();
let sa = suffix_array.as_slice();
if sa.is_empty() {
return Ok(patterns);
}
for pattern_len in 1..=max_depth {
let mut pattern_counts: HashMap<Vec<u8>, (usize, u32)> = HashMap::new();
for &start_pos in sa {
if start_pos + pattern_len <= text.len() {
let pattern = text[start_pos..start_pos + pattern_len].to_vec();
let entry = pattern_counts.entry(pattern).or_insert((start_pos, 0));
entry.1 += 1;
}
}
for (pattern, (position, frequency)) in pattern_counts {
if frequency >= min_frequency {
patterns.push(PatternInfo {
pattern,
position,
frequency,
length: pattern_len,
});
}
}
}
patterns.sort_by(|a, b| b.frequency.cmp(&a.frequency));
Ok(patterns)
}
fn build_trie_bfs(patterns: &[PatternInfo], max_depth: usize) -> Result<Box<TrieNode>> {
let mut root = Box::new(TrieNode::new(0));
for pattern_info in patterns {
if pattern_info.length > max_depth {
continue;
}
let mut current = &mut root;
for (i, &byte) in pattern_info.pattern.iter().enumerate() {
let depth = i + 1;
current = current.children.entry(byte)
.or_insert_with(|| Box::new(TrieNode::new(depth)));
current.frequency += pattern_info.frequency;
}
current.pattern_info = Some(pattern_info.clone());
}
Ok(root)
}
fn convert_to_zipora_trie(
root: Box<TrieNode>,
config: &ZiporaTrieConfig,
) -> Result<(ZiporaTrie, HashMap<u32, PatternInfo>)> {
let mut trie = ZiporaTrie::with_config(config.clone());
let mut pattern_map = HashMap::new();
let mut patterns = Vec::new();
Self::collect_patterns_from_trie(&root, Vec::new(), &mut patterns);
for (pattern, pattern_info) in patterns {
if let Ok(state_id) = trie.insert_and_get_node_id(&pattern) {
pattern_map.insert(state_id, pattern_info);
}
}
Ok((trie, pattern_map))
}
fn collect_patterns_from_trie(
node: &TrieNode,
current_pattern: Vec<u8>,
patterns: &mut Vec<(Vec<u8>, PatternInfo)>,
) {
if let Some(ref pattern_info) = node.pattern_info {
patterns.push((current_pattern.clone(), pattern_info.clone()));
}
for (&byte, child) in &node.children {
let mut child_pattern = current_pattern.clone();
child_pattern.push(byte);
Self::collect_patterns_from_trie(child, child_pattern, patterns);
}
}
}
#[cfg(feature = "serde")]
#[derive(Serialize, Deserialize)]
struct SerializableCache {
pattern_map: HashMap<u32, PatternInfo>,
config: DfaCacheConfig,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::algorithms::suffix_array::SuffixArrayConfig;
fn create_test_suffix_array(text: &[u8]) -> SuffixArray {
SuffixArray::with_config(text, &SuffixArrayConfig::default()).unwrap()
}
#[test]
fn test_cache_creation() {
let text = b"abcdefghijklmnopqrstuvwxyz";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 5).unwrap();
assert!(cache.state_count() > 0);
assert!(cache.memory_usage() > 0);
}
#[test]
fn test_prefix_matching() {
let text = b"the quick brown fox jumps over the lazy dog";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 4).unwrap();
let input = b"the";
let result = cache.find_longest_prefix(input, 10).unwrap();
if result.is_some() {
let cache_match = result.unwrap();
assert!(cache_match.length > 0);
assert!(cache_match.length <= input.len());
}
assert_eq!(cache.stats().total_lookups, 1);
}
#[test]
fn test_cache_statistics() {
let text = b"aaabbbcccaaabbbccc";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 2, 3).unwrap();
cache.find_longest_prefix(b"aaa", 5).unwrap();
cache.find_longest_prefix(b"bbb", 5).unwrap();
cache.find_longest_prefix(b"xyz", 5).unwrap();
let stats = cache.stats();
assert_eq!(stats.total_lookups, 3);
assert!(stats.hit_ratio() >= 0.0 && stats.hit_ratio() <= 1.0);
}
#[test]
fn test_cache_optimization() {
let text = b"optimization test with repeated patterns";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 4).unwrap();
let initial_states = cache.state_count();
cache.optimize(3).unwrap();
assert!(cache.state_count() <= initial_states);
}
#[test]
fn test_cache_validation() {
let text = b"validation test data";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 3).unwrap();
assert!(cache.validate().is_ok());
}
#[test]
fn test_empty_input() {
let text = b"test";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 2).unwrap();
let result = cache.find_longest_prefix(b"", 10).unwrap();
assert!(result.is_none());
let stats = cache.stats();
assert_eq!(stats.cache_misses, 1);
}
#[test]
#[cfg(feature = "serde")]
fn test_serialization() {
let text = b"serialization test";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 3).unwrap();
let serialized = cache.serialize().unwrap();
assert!(!serialized.is_empty());
let deserialized = DfaCache::deserialize(&serialized).unwrap();
assert!(deserialized.state_count() >= 0); }
#[test]
fn test_frequent_pattern_extraction() {
let text = b"aaabbbaaaccc";
let sa = create_test_suffix_array(text);
let patterns = DfaCache::extract_frequent_patterns(&sa, text, 2, 3).unwrap();
assert!(!patterns.is_empty());
for pattern in &patterns {
assert!(pattern.frequency >= 2);
assert!(pattern.length <= 3);
}
}
#[test]
fn test_bfs_cache_construction() {
let text = b"abcabcdefabcdef";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 2, 4).unwrap();
assert!(cache.state_count() > 0);
let result = cache.find_longest_prefix(b"abc", 10).unwrap();
if result.is_some() {
let cache_match = result.unwrap();
assert!(cache_match.length > 0);
assert!(cache_match.frequency >= 2);
}
}
#[test]
fn test_suffix_array_partitioning() {
let text = b"abacaba";
let sa = create_test_suffix_array(text);
let sa_slice: Vec<i32> = sa.as_slice().iter().map(|&x| x as i32).collect();
let cache = DfaCache::new_empty(DfaCacheConfig::default(), text.to_vec(), sa_slice);
let partitions = cache.partition_by_character(sa.as_slice(), text, 0, sa.as_slice().len(), 0).unwrap();
assert!(!partitions.is_empty());
for (ch, (start, end)) in partitions {
assert!(start <= end);
assert!(end <= sa.as_slice().len());
assert!(ch <= b'z'); }
}
#[test]
fn test_sa_bounds_search() {
let text = b"aaaaabbbbbccccc";
let sa = create_test_suffix_array(text);
let sa_slice: Vec<i32> = sa.as_slice().iter().map(|&x| x as i32).collect();
let cache = DfaCache::new_empty(DfaCacheConfig::default(), text.to_vec(), sa_slice);
let upper_bound = cache.sa_upper_bound(0, sa.as_slice().len(), 0, b'a');
assert!(upper_bound <= sa.as_slice().len());
let (lower, upper) = cache.sa_equal_range(0, sa.as_slice().len(), 0, b'b');
assert!(lower <= upper);
assert!(upper <= sa.as_slice().len());
}
#[test]
fn test_dfa_state_creation() {
let state = DfaState::new(5, 10, 20);
assert_eq!(state.parent, 5);
assert_eq!(state.suffix_low, 10);
assert_eq!(state.suffix_hig, 20);
assert_eq!(state.frequency(), 10); assert_eq!(state.child0, 0);
assert_eq!(state.zlen_lo, 0);
}
#[test]
fn test_bfs_queue_elem() {
let elem = BfsQueueElem { depth: 3, state: 42 };
assert_eq!(elem.depth, 3);
assert_eq!(elem.state, 42);
}
#[test]
fn test_max_length_limiting() {
let text = b"long pattern for testing maximum length limits";
let sa = create_test_suffix_array(text);
let config = DfaCacheConfig::default();
let mut cache = DfaCache::build_from_suffix_array(&sa, text, &config, 1, 5).unwrap();
let input = b"long pattern";
let result = cache.find_longest_prefix(input, 4).unwrap();
if let Some(cache_match) = result {
assert!(cache_match.length <= 4);
}
}
}