use zipora::fsa::*;
use zipora::error::Result;
#[cfg(test)]
mod fsa_cache_tests {
use super::*;
#[test]
fn test_fsa_cache_basic_operations() {
let mut cache = FsaCache::new().unwrap();
let state_id1 = cache.cache_state(0, 100, true).unwrap();
let state_id2 = cache.cache_state(state_id1, 200, false).unwrap();
assert_ne!(state_id1, state_id2);
let state1 = cache.get_state(state_id1).unwrap();
assert_eq!(state1.child_base, 100);
assert!(state1.is_terminal());
let state2 = cache.get_state(state_id2).unwrap();
assert_eq!(state2.child_base, 200);
assert!(!state2.is_terminal());
let stats = cache.stats();
assert_eq!(stats.cached_states, 2);
assert!(stats.memory_usage > 0);
}
#[test]
fn test_fsa_cache_eviction() {
let config = FsaCacheConfig {
max_states: 3,
strategy: CacheStrategy::BreadthFirst,
..Default::default()
};
let mut cache = FsaCache::with_config(config).unwrap();
let _id1 = cache.cache_state(0, 100, false).unwrap();
let _id2 = cache.cache_state(0, 200, false).unwrap();
let _id3 = cache.cache_state(0, 300, false).unwrap();
assert_eq!(cache.stats().cached_states, 3);
let _id4 = cache.cache_state(0, 400, false).unwrap();
let stats = cache.stats();
assert!(stats.cached_states <= 3);
assert!(stats.evictions > 0);
}
#[test]
fn test_fsa_cache_zero_paths() {
let mut cache = FsaCache::new().unwrap();
let state_id = cache.cache_state(0, 100, false).unwrap();
let mut zp_data = ZeroPathData::new();
zp_data.add_segment(b"hello").unwrap();
zp_data.add_segment(b"world").unwrap();
cache.add_zero_path(state_id, zp_data).unwrap();
let retrieved = cache.get_zero_path(state_id).unwrap();
assert_eq!(retrieved.get_full_path(), b"helloworld");
assert_eq!(retrieved.total_length, 10);
assert!(retrieved.compression_ratio() > 0.0);
}
#[test]
fn test_fsa_cache_configurations() {
let small_cache = FsaCache::with_config(FsaCacheConfig::small()).unwrap();
assert_eq!(small_cache.config().max_states, 10_000);
let large_cache = FsaCache::with_config(FsaCacheConfig::large()).unwrap();
assert_eq!(large_cache.config().max_states, 10_000_000);
let efficient_cache = FsaCache::with_config(FsaCacheConfig::memory_efficient()).unwrap();
assert_eq!(efficient_cache.config().strategy, CacheStrategy::DepthFirst);
}
#[test]
fn test_cached_state_operations() {
let mut state = CachedState::new(100, 50, true, false);
assert_eq!(state.child_base, 100);
assert_eq!(state.parent(), 50);
assert!(state.is_terminal());
assert!(!state.is_free());
state.mark_free();
assert!(state.is_free());
state.mark_used();
assert!(!state.is_free());
}
}
#[cfg(test)]
mod dawg_tests {
use super::*;
#[test]
fn test_dawg_state_creation() {
let state = DawgState::new(100, 50, true, false);
assert_eq!(state.child_base, 100);
assert_eq!(state.parent(), 50);
assert!(state.is_terminal());
assert!(!state.is_final());
}
#[test]
fn test_dawg_state_flags() {
let mut state = DawgState::new(0, 0, false, false);
assert!(!state.is_terminal());
assert!(!state.is_final());
state.set_terminal(true);
assert!(state.is_terminal());
state.set_final(true);
assert!(state.is_final());
}
#[test]
fn test_transition_table_dense() {
let mut table = TransitionTable::new(5, true);
table.add_transition(0, b'a', 1).unwrap();
table.add_transition(0, b'b', 2).unwrap();
table.add_transition(1, b'c', 3).unwrap();
assert_eq!(table.get_transition(0, b'a'), Some(1));
assert_eq!(table.get_transition(0, b'b'), Some(2));
assert_eq!(table.get_transition(1, b'c'), Some(3));
assert_eq!(table.get_transition(0, b'z'), None);
let transitions = table.get_outgoing_transitions(0);
assert_eq!(transitions.len(), 2);
assert!(transitions.contains(&(b'a', 1)));
assert!(transitions.contains(&(b'b', 2)));
}
#[test]
fn test_transition_table_sparse() {
let mut table = TransitionTable::new(5, false);
table.add_transition(0, b'a', 1).unwrap();
table.add_transition(0, b'b', 2).unwrap();
assert_eq!(table.get_transition(0, b'a'), Some(1));
assert_eq!(table.get_transition(0, b'b'), Some(2));
assert_eq!(table.get_transition(0, b'c'), None);
let transitions = table.get_outgoing_transitions(0);
assert_eq!(transitions.len(), 2);
}
#[test]
fn test_nested_trie_dawg_basic() {
let mut dawg = NestedTrieDawg::new().unwrap();
let keys = vec![b"cat".as_slice(), b"car".as_slice(), b"card".as_slice(), b"care".as_slice()];
dawg.build_from_keys(keys).unwrap();
assert!(dawg.contains(b"cat"));
assert!(dawg.contains(b"car"));
assert!(dawg.contains(b"card"));
assert!(dawg.contains(b"care"));
assert!(!dawg.contains(b"dog"));
assert!(!dawg.contains(b"ca"));
let stats = dawg.statistics();
assert_eq!(stats.num_keys, 4);
assert!(stats.num_states > 0);
assert!(stats.memory_usage > 0);
}
#[test]
fn test_nested_trie_dawg_prefix_operations() {
let mut dawg = NestedTrieDawg::new().unwrap();
let keys = vec![b"app".as_slice(), b"apple".as_slice(), b"application".as_slice()];
dawg.build_from_keys(keys).unwrap();
assert!(dawg.contains(b"app"));
assert!(dawg.contains(b"apple"));
assert!(dawg.contains(b"application"));
assert_eq!(dawg.longest_prefix(b"app"), Some(3));
assert_eq!(dawg.longest_prefix(b"apple"), Some(5));
assert_eq!(dawg.longest_prefix(b"applications"), Some(11));
}
#[test]
fn test_dawg_compression() {
let mut dawg = NestedTrieDawg::new().unwrap();
let keys = vec![
b"reading", b"heading", b"leading", b"sending"
];
dawg.build_from_keys(keys).unwrap();
let stats = dawg.statistics();
assert_eq!(stats.num_keys, 4);
assert!(stats.compression_ratio > 0.0);
assert!(stats.num_states >= stats.num_keys); }
#[test]
fn test_dawg_configurations() {
let memory_config = DawgConfig::memory_efficient();
let performance_config = DawgConfig::performance_optimized();
assert!(memory_config.max_states < performance_config.max_states);
assert!(memory_config.compressed_storage);
assert!(performance_config.use_rank_select);
let dawg1 = NestedTrieDawg::with_config(memory_config).unwrap();
let dawg2 = NestedTrieDawg::with_config(performance_config).unwrap();
assert!(dawg1.is_empty());
assert!(dawg2.is_empty());
}
}
#[cfg(test)]
mod graph_walker_tests {
use super::*;
fn create_test_graph() -> std::collections::HashMap<u32, SimpleVertex> {
let mut graph = std::collections::HashMap::new();
let mut v0 = SimpleVertex::with_edges(0, vec![1, 3]);
let mut v1 = SimpleVertex::with_edges(1, vec![2]);
let mut v2 = SimpleVertex::with_terminal(2, true);
let mut v3 = SimpleVertex::with_terminal(3, true);
graph.insert(0, v0);
graph.insert(1, v1);
graph.insert(2, v2);
graph.insert(3, v3);
graph
}
struct TestVisitor {
visited_vertices: Vec<u32>,
visited_edges: Vec<(u32, u32)>,
}
impl TestVisitor {
fn new() -> Self {
Self {
visited_vertices: Vec::new(),
visited_edges: Vec::new(),
}
}
}
impl GraphVisitor<SimpleVertex> for TestVisitor {
fn visit_vertex(&mut self, vertex: &SimpleVertex, _depth: usize) -> Result<bool> {
self.visited_vertices.push(vertex.id);
Ok(true)
}
fn visit_edge(&mut self, from: &SimpleVertex, to: &SimpleVertex) -> Result<bool> {
self.visited_edges.push((from.id, to.id));
Ok(true)
}
}
#[test]
fn test_bfs_graph_walker() {
let graph = create_test_graph();
let mut walker = BfsGraphWalker::new(WalkerConfig::default());
let mut visitor = TestVisitor::new();
walker.walk(graph[&0].clone(), &mut visitor).unwrap();
assert!(visitor.visited_vertices.len() >= 1);
assert!(visitor.visited_vertices.contains(&0));
let stats = walker.stats();
assert!(stats.vertices_visited >= 1);
}
#[test]
fn test_dfs_graph_walker() {
let graph = create_test_graph();
let mut walker = DfsGraphWalker::new(WalkerConfig::default());
let mut visitor = TestVisitor::new();
walker.walk(graph[&0].clone(), &mut visitor).unwrap();
assert!(visitor.visited_vertices.len() >= 1);
assert!(visitor.visited_vertices.contains(&0));
let stats = walker.stats();
assert!(stats.vertices_visited >= 1);
}
#[test]
fn test_cfs_graph_walker() {
let graph = create_test_graph();
let mut walker = CfsGraphWalker::new(WalkerConfig::default());
let mut visitor = TestVisitor::new();
walker.walk(graph[&0].clone(), &mut visitor).unwrap();
let stats = walker.stats();
assert!(stats.vertices_visited > 0);
assert!(stats.edges_traversed > 0);
}
#[test]
fn test_walker_limits() {
let config = WalkerConfig {
max_depth: Some(1),
max_vertices: Some(2),
..Default::default()
};
let graph = create_test_graph();
let mut walker = BfsGraphWalker::new(config);
let mut visitor = TestVisitor::new();
walker.walk(graph[&0].clone(), &mut visitor).unwrap();
let stats = walker.stats();
assert!(stats.vertices_visited <= 2);
assert!(stats.max_depth_reached <= 1);
}
#[test]
fn test_walker_factory() {
let config = WalkerConfig::default();
let mut bfs_walker = GraphWalkerFactory::create_walker::<SimpleVertex>(
WalkMethod::BreadthFirst, config.clone()
);
let mut dfs_walker = GraphWalkerFactory::create_walker::<SimpleVertex>(
WalkMethod::DepthFirst, config.clone()
);
let mut cfs_walker = GraphWalkerFactory::create_walker::<SimpleVertex>(
WalkMethod::CacheFriendly, config
);
let graph = create_test_graph();
let mut visitor = TestVisitor::new();
bfs_walker.walk_dyn(graph[&0].clone(), &mut visitor).unwrap();
assert!(bfs_walker.stats().vertices_visited > 0);
visitor = TestVisitor::new();
dfs_walker.walk_dyn(graph[&0].clone(), &mut visitor).unwrap();
assert!(dfs_walker.stats().vertices_visited > 0);
visitor = TestVisitor::new();
cfs_walker.walk_dyn(graph[&0].clone(), &mut visitor).unwrap();
assert!(cfs_walker.stats().vertices_visited > 0);
}
#[test]
fn test_multi_pass_walker() {
let graph = create_test_graph();
let mut walker = MultiPassWalker::new(WalkerConfig::for_multi_pass());
let mut visitor = TestVisitor::new();
walker.walk_pass(graph[&0].clone(), WalkMethod::BreadthFirst, &mut visitor).unwrap();
walker.walk_pass(graph[&0].clone(), WalkMethod::DepthFirst, &mut visitor).unwrap();
let stats = walker.stats();
assert!(stats.vertices_visited > 0);
assert!(stats.edges_traversed > 0);
}
#[test]
fn test_vertex_color() {
assert_eq!(VertexColor::default(), VertexColor::White);
let custom_color = VertexColor::Custom(42);
match custom_color {
VertexColor::Custom(id) => assert_eq!(id, 42),
_ => panic!("Expected custom color"),
}
}
#[test]
fn test_simple_vertex() {
let vertex = SimpleVertex::with_edges(1, vec![2, 3]);
assert_eq!(vertex.id(), 1);
let edges = vertex.outgoing_edges();
assert_eq!(edges.len(), 2);
let terminal_vertex = SimpleVertex::with_terminal(5, true);
assert!(terminal_vertex.is_terminal());
}
#[test]
fn test_walker_configurations() {
let tree_config = WalkerConfig::for_tree();
assert!(!tree_config.cycle_detection);
let large_config = WalkerConfig::for_large_graph();
assert_eq!(large_config.max_vertices, Some(1_000_000));
let multipass_config = WalkerConfig::for_multi_pass();
assert!(multipass_config.incremental_colors);
}
}
#[cfg(test)]
mod fast_search_tests {
use super::*;
#[test]
fn test_hardware_capabilities() {
let caps = HardwareCapabilities::detect();
let _ = caps.has_sse42;
let _ = caps.has_avx2;
let _ = caps.best_strategy(100, 36);
}
#[test]
fn test_fast_search_strategies() {
let configs = [
FastSearchConfig {
strategy: SearchStrategy::Linear,
..Default::default()
},
FastSearchConfig {
strategy: SearchStrategy::Simd,
..Default::default()
},
FastSearchConfig {
strategy: SearchStrategy::Adaptive,
..Default::default()
},
];
let data = b"hello world hello";
for config in configs {
let mut engine = FastSearchEngine::with_config(config);
let positions = engine.search_byte(data, b'l').unwrap();
assert_eq!(positions, vec![2, 3, 9, 14, 15]);
}
}
#[test]
fn test_fast_search_basic_operations() {
let mut engine = FastSearchEngine::new();
let data = b"hello world hello";
let positions = engine.search_byte(data, b'l').unwrap();
assert_eq!(positions, vec![2, 3, 9, 14, 15]);
assert_eq!(engine.find_first(data, b'l'), Some(2));
assert_eq!(engine.find_last(data, b'l'), Some(15));
assert_eq!(engine.find_first(data, b'z'), None);
assert_eq!(engine.count_byte(data, b'l').unwrap(), 5);
assert_eq!(engine.count_byte(data, b'o').unwrap(), 3);
assert_eq!(engine.count_byte(data, b'z').unwrap(), 0);
}
#[test]
fn test_search_multiple() {
let mut engine = FastSearchEngine::new();
let data = b"hello world";
let targets = [b'l', b'o'];
let results = engine.search_multiple(data, &targets).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0], vec![2, 3, 9]); assert_eq!(results[1], vec![4, 7]); }
#[test]
fn test_adaptive_strategy() {
let mut engine = FastSearchEngine::with_config(FastSearchConfig {
strategy: SearchStrategy::Adaptive,
rank_select_threshold: 10,
..Default::default()
});
let small_data = b"hello";
let positions = engine.search_byte(small_data, b'l').unwrap();
assert_eq!(positions, vec![2, 3]);
let large_data = vec![b'a'; 100];
let positions = engine.search_byte(&large_data, b'a').unwrap();
assert_eq!(positions.len(), 100);
}
#[test]
fn test_rank_select_cache() {
let mut engine = FastSearchEngine::with_config(FastSearchConfig {
strategy: SearchStrategy::RankSelect,
..Default::default()
});
let data = b"hello world hello";
let positions1 = engine.search_byte(data, b'l').unwrap();
let positions2 = engine.search_byte(data, b'l').unwrap();
assert_eq!(positions1, positions2);
assert_eq!(positions1, vec![2, 3, 9, 14, 15]);
engine.clear_cache();
let positions3 = engine.search_byte(data, b'l').unwrap();
assert_eq!(positions1, positions3);
}
#[test]
fn test_fast_search_configurations() {
let small_config = FastSearchConfig::for_small_arrays();
assert_eq!(small_config.strategy, SearchStrategy::Simd);
let large_config = FastSearchConfig::for_large_arrays();
assert_eq!(large_config.strategy, SearchStrategy::RankSelect);
let perf_config = FastSearchConfig::performance_optimized();
assert_eq!(perf_config.strategy, SearchStrategy::Adaptive);
}
#[test]
fn test_search_utils() {
let data = b"hello world";
let targets = [b'l', b'w'];
assert_eq!(fast_search::utils::search_any_of(data, &targets), Some(2));
let no_targets = [b'x', b'z'];
assert_eq!(fast_search::utils::search_any_of(data, &no_targets), None);
let data = b"hello world hello";
let positions = fast_search::utils::search_pattern(data, b"hello");
assert_eq!(positions, vec![0, 12]);
let data = [0xFF, 0x00, 0x0F, 0xF0];
let count = fast_search::utils::popcount(&data);
assert_eq!(count, 16); }
#[test]
fn test_empty_data() {
let mut engine = FastSearchEngine::new();
let empty_data = b"";
assert_eq!(engine.search_byte(empty_data, b'a').unwrap(), Vec::<usize>::new());
assert_eq!(engine.find_first(empty_data, b'a'), None);
assert_eq!(engine.find_last(empty_data, b'a'), None);
assert_eq!(engine.count_byte(empty_data, b'a').unwrap(), 0);
}
#[test]
fn test_large_data_performance() {
let mut engine = FastSearchEngine::new();
let large_data = vec![b'a'; 10000];
let start = std::time::Instant::now();
let count = engine.count_byte(&large_data, b'a').unwrap();
let duration = start.elapsed();
assert_eq!(count, 10000);
assert!(duration.as_millis() < 100);
}
}
#[cfg(test)]
mod integration_tests {
use super::*;
#[test]
fn test_fsa_cache_with_dawg() {
let mut dawg = NestedTrieDawg::with_config(DawgConfig {
enable_cache: true,
cache_config: FsaCacheConfig::small(),
..Default::default()
}).unwrap();
let keys = vec![b"test".as_slice(), b"testing".as_slice(), b"tester".as_slice()];
dawg.build_from_keys(keys).unwrap();
assert!(dawg.contains(b"test"));
assert!(dawg.contains(b"testing"));
assert!(dawg.contains(b"tester"));
assert!(!dawg.contains(b"nonexistent"));
let stats = dawg.statistics();
assert_eq!(stats.num_keys, 3);
assert!(stats.cache_hit_ratio >= 0.0); }
#[test]
fn test_graph_walker_with_dawg() {
let mut dawg = NestedTrieDawg::new().unwrap();
let keys = vec![b"a".as_slice(), b"ab".as_slice(), b"abc".as_slice()];
dawg.build_from_keys(keys).unwrap();
assert!(dawg.contains(b"a"));
assert!(dawg.contains(b"ab"));
assert!(dawg.contains(b"abc"));
assert_eq!(dawg.longest_prefix(b"abc"), Some(3));
}
#[test]
fn test_fast_search_with_trie_keys() {
let mut engine = FastSearchEngine::new();
let trie_data = b"abcdefghijklmnopqrstuvwxyz";
assert_eq!(engine.find_first(trie_data, b'a'), Some(0));
assert_eq!(engine.find_first(trie_data, b'z'), Some(25));
let vowel_positions = engine.search_multiple(trie_data, &[b'a', b'e', b'i', b'o', b'u']).unwrap();
assert_eq!(vowel_positions[0], vec![0]); assert_eq!(vowel_positions[1], vec![4]); assert_eq!(vowel_positions[2], vec![8]); assert_eq!(vowel_positions[3], vec![14]); assert_eq!(vowel_positions[4], vec![20]); }
#[test]
fn test_combined_fsa_infrastructure() {
let mut cache = FsaCache::new().unwrap();
let mut search_engine = FastSearchEngine::new();
let state_id = cache.cache_state(0, 100, true).unwrap();
let cached_state = cache.get_state(state_id).unwrap();
let test_data = b"finite state automata";
let positions = search_engine.search_byte(test_data, b'a').unwrap();
assert!(cached_state.is_terminal());
assert!(!positions.is_empty());
assert!(positions.contains(&20)); }
#[test]
fn test_performance_comparison() {
let test_data = vec![b'x'; 1000];
let mut test_data_with_targets = test_data.clone();
test_data_with_targets[99] = b'a';
test_data_with_targets[499] = b'a';
test_data_with_targets[899] = b'a';
let strategies = [
SearchStrategy::Linear,
SearchStrategy::Simd,
SearchStrategy::Adaptive,
];
for strategy in strategies {
let mut engine = FastSearchEngine::with_config(FastSearchConfig {
strategy,
..Default::default()
});
let start = std::time::Instant::now();
let positions = engine.search_byte(&test_data_with_targets, b'a').unwrap();
let duration = start.elapsed();
assert!(positions.len() >= 2); assert!(positions.len() <= 3); assert!(duration.as_millis() < 50);
}
}
}