use crate::error::AppError;
use crate::utils::is_hidden_file;
use log::{debug, error, info, trace, warn};
use std::collections::{HashMap, VecDeque};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::mpsc;
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone)]
pub struct SearchResult {
pub name: String,
pub path: String,
pub size: String,
pub file_type: String,
pub score: f32,
pub last_modified: Option<u64>,
}
#[derive(Debug)]
pub struct SearchParams {
pub query: String,
pub path: String,
pub limit: usize,
pub offset: usize,
pub case_sensitive: bool,
}
pub struct SearchCache {
cache: HashMap<String, CachedSearchResult>,
order: VecDeque<String>,
max_size: usize,
stats: CacheStats,
}
#[derive(Clone)]
struct CachedSearchResult {
results: Vec<SearchResult>,
timestamp: Instant,
hit_count: u32,
}
#[derive(Default)]
struct CacheStats {
hits: u64,
misses: u64,
evictions: u64,
}
impl SearchCache {
pub fn new(max_size: usize) -> Self {
SearchCache {
cache: HashMap::new(),
order: VecDeque::new(),
max_size,
stats: CacheStats::default(),
}
}
pub fn shrink_if_needed(&mut self, force: bool) {
let memory_threshold = self.max_size / 2;
if force || self.cache.len() > memory_threshold {
let target_size = if force {
self.max_size / 4
} else {
memory_threshold
};
while self.cache.len() > target_size {
if let Some(lru_key) = self.order.pop_back() {
self.cache.remove(&lru_key);
self.stats.evictions += 1;
}
}
if force && self.cache.capacity() > self.max_size * 2 {
self.cache.shrink_to_fit();
self.order.shrink_to_fit();
debug!("Cache capacity shrunk to fit current usage");
}
}
}
pub fn get(&mut self, key: &str) -> Option<Vec<SearchResult>> {
if let Some(cached) = self.cache.get_mut(key) {
if cached.timestamp.elapsed().as_secs() < 10 {
cached.hit_count += 1;
self.stats.hits += 1;
if let Some(pos) = self.order.iter().position(|k| k == key) {
self.order.remove(pos);
}
self.order.push_front(key.to_string());
debug!("Cache hit for query: {} (hits: {})", key, cached.hit_count);
return Some(cached.results.clone());
}
self.cache.remove(key);
if let Some(pos) = self.order.iter().position(|k| k == key) {
self.order.remove(pos);
}
debug!("Cache expired for query: {key}");
}
self.stats.misses += 1;
None
}
pub fn put(&mut self, key: String, results: Vec<SearchResult>) {
while self.cache.len() >= self.max_size {
if let Some(lru_key) = self.order.pop_back() {
self.cache.remove(&lru_key);
self.stats.evictions += 1;
debug!("Evicted cache entry: {lru_key}");
}
}
let result_count = results.len();
self.cache.insert(
key.clone(),
CachedSearchResult {
results,
timestamp: Instant::now(),
hit_count: 0,
},
);
self.order.push_front(key.clone());
debug!("Cached {result_count} results for query: {key}");
}
pub fn clear(&mut self) {
self.cache.clear();
self.order.clear();
self.cache.shrink_to_fit();
self.order.shrink_to_fit();
info!("Search cache cleared and capacity shrunk");
}
pub fn get_stats(&self) -> String {
let total = self.stats.hits + self.stats.misses;
let hit_rate = if total > 0 {
(self.stats.hits as f64 / total as f64) * 100.0
} else {
0.0
};
format!(
"Cache stats - Hits: {}, Misses: {}, Hit rate: {:.1}%, Evictions: {}, Size: {}/{}",
self.stats.hits,
self.stats.misses,
hit_rate,
self.stats.evictions,
self.cache.len(),
self.max_size
)
}
}
pub struct UltraLowMemoryIndex {
string_pool: UnifiedStringPool,
radix_index: [RadixBucket; 256],
entries: Vec<UltraCompactEntry>,
directory_children: Vec<Vec<u32>>,
last_update: Instant,
base_dir: PathBuf,
entry_count: AtomicUsize,
memory_usage: AtomicU64,
root_entry_id: u32,
is_updating: AtomicBool,
}
#[repr(C, packed)]
#[derive(Clone, Copy)]
struct UltraCompactEntry {
name_offset: [u8; 3], parent_id: [u8; 3], size_log2: u8, packed_data: u32, }
#[derive(Clone, Copy)]
struct StringPoolEntry {
hash: u32, offset: u32, }
#[derive(Default)]
struct RadixBucket {
entries: Vec<u32>, }
struct UnifiedStringPool {
buffer: Vec<u8>,
index: Vec<StringPoolEntry>,
write_pos: u32,
}
const FLAG_IS_DIR: u32 = 1 << 31;
const TIMESTAMP_MASK: u32 = 0x3FFF_FFFF; const PARENT_NULL: u32 = 0xFF_FF_FF;
impl UltraCompactEntry {
fn new(
name_offset: u32,
parent_id: u32,
size: u64,
modified: SystemTime,
is_dir: bool,
) -> Self {
let size_log2 = if size == 0 {
0
} else {
64 - size.leading_zeros().min(255) as u8
};
let base_epoch = SystemTime::UNIX_EPOCH + Duration::from_secs(1_704_067_200); let timestamp_secs = modified
.duration_since(base_epoch)
.unwrap_or_default()
.as_secs()
.min(TIMESTAMP_MASK as u64) as u32;
let mut packed_data = timestamp_secs & TIMESTAMP_MASK;
if is_dir {
packed_data |= FLAG_IS_DIR;
}
Self {
name_offset: [
(name_offset & 0xFF) as u8,
((name_offset >> 8) & 0xFF) as u8,
((name_offset >> 16) & 0xFF) as u8,
],
parent_id: [
(parent_id & 0xFF) as u8,
((parent_id >> 8) & 0xFF) as u8,
((parent_id >> 16) & 0xFF) as u8,
],
size_log2,
packed_data,
}
}
fn get_name_offset(&self) -> u32 {
(self.name_offset[0] as u32)
| ((self.name_offset[1] as u32) << 8)
| ((self.name_offset[2] as u32) << 16)
}
fn get_parent_id(&self) -> u32 {
let id = (self.parent_id[0] as u32)
| ((self.parent_id[1] as u32) << 8)
| ((self.parent_id[2] as u32) << 16);
if id == PARENT_NULL { u32::MAX } else { id }
}
fn get_size(&self) -> u64 {
if self.size_log2 == 0 {
0
} else {
1u64 << (self.size_log2 - 1)
}
}
fn is_dir(&self) -> bool {
(self.packed_data & FLAG_IS_DIR) != 0
}
fn modified_time(&self) -> SystemTime {
let base_epoch = SystemTime::UNIX_EPOCH + Duration::from_secs(1_704_067_200); let timestamp_secs = self.packed_data & TIMESTAMP_MASK;
base_epoch + Duration::from_secs(timestamp_secs as u64)
}
}
impl UnifiedStringPool {
fn with_capacity(capacity: usize) -> Self {
Self {
buffer: Vec::with_capacity(capacity),
index: Vec::with_capacity(capacity / 20), write_pos: 0,
}
}
fn add_string(&mut self, s: &str) -> u32 {
let hash = murmur3_hash(s.as_bytes());
if let Ok(idx) = self.index.binary_search_by_key(&hash, |entry| entry.hash) {
let entry = self.index[idx];
if self.get_string_at_offset(entry.offset) == Some(s) {
return entry.offset;
}
}
let offset = self.write_pos;
let string_bytes = s.as_bytes();
self.buffer.extend_from_slice(string_bytes);
self.buffer.push(0); self.write_pos += string_bytes.len() as u32 + 1;
let entry = StringPoolEntry { hash, offset };
match self.index.binary_search_by_key(&hash, |e| e.hash) {
Ok(idx) => self.index.insert(idx, entry),
Err(idx) => self.index.insert(idx, entry),
}
offset
}
fn get_string_at_offset(&self, offset: u32) -> Option<&str> {
if offset as usize >= self.buffer.len() {
return None;
}
let start = offset as usize;
let end = self.buffer[start..]
.iter()
.position(|&b| b == 0)
.map(|pos| start + pos)
.unwrap_or(self.buffer.len());
std::str::from_utf8(&self.buffer[start..end]).ok()
}
fn memory_usage(&self) -> u64 {
(self.buffer.capacity()
+ self.index.capacity() * std::mem::size_of::<StringPoolEntry>()
+ std::mem::size_of::<Self>()) as u64
}
fn shrink_to_fit(&mut self) {
let old_buffer_capacity = self.buffer.capacity();
let old_index_capacity = self.index.capacity();
self.buffer.shrink_to_fit();
self.index.shrink_to_fit();
let new_buffer_capacity = self.buffer.capacity();
let new_index_capacity = self.index.capacity();
debug!(
"StringPool shrink: buffer {}->{}KB, index {}->{}KB",
old_buffer_capacity / 1024,
new_buffer_capacity / 1024,
old_index_capacity * std::mem::size_of::<StringPoolEntry>() / 1024,
new_index_capacity * std::mem::size_of::<StringPoolEntry>() / 1024
);
}
fn clear_and_shrink(&mut self) {
self.buffer.clear();
self.index.clear();
self.write_pos = 0;
self.buffer.shrink_to_fit();
self.index.shrink_to_fit();
self.buffer.reserve(1024 * 1024); self.index.reserve(1000);
debug!("StringPool cleared and capacity shrunk for long-run memory efficiency");
}
}
fn murmur3_hash(data: &[u8]) -> u32 {
const C1: u32 = 0xcc9e_2d51;
const C2: u32 = 0x1b87_3593;
const R1: u32 = 15;
const R2: u32 = 13;
const M: u32 = 5;
const N: u32 = 0xe654_6b64;
let mut hash = 0u32;
let mut i = 0;
while i + 4 <= data.len() {
let mut k = u32::from_le_bytes([data[i], data[i + 1], data[i + 2], data[i + 3]]);
k = k.wrapping_mul(C1);
k = k.rotate_left(R1);
k = k.wrapping_mul(C2);
hash ^= k;
hash = hash.rotate_left(R2);
hash = hash.wrapping_mul(M).wrapping_add(N);
i += 4;
}
let mut k = 0u32;
match data.len() & 3 {
3 => {
k ^= (data[i + 2] as u32) << 16;
k ^= (data[i + 1] as u32) << 8;
k ^= data[i] as u32;
k = k.wrapping_mul(C1);
k = k.rotate_left(R1);
k = k.wrapping_mul(C2);
hash ^= k;
}
2 => {
k ^= (data[i + 1] as u32) << 8;
k ^= data[i] as u32;
k = k.wrapping_mul(C1);
k = k.rotate_left(R1);
k = k.wrapping_mul(C2);
hash ^= k;
}
1 => {
k ^= data[i] as u32;
k = k.wrapping_mul(C1);
k = k.rotate_left(R1);
k = k.wrapping_mul(C2);
hash ^= k;
}
_ => {}
}
hash ^= data.len() as u32;
hash ^= hash >> 16;
hash = hash.wrapping_mul(0x85eb_ca6b);
hash ^= hash >> 13;
hash = hash.wrapping_mul(0xc2b2_ae35);
hash ^= hash >> 16;
hash
}
impl RadixBucket {
fn add_entry(&mut self, entry_id: u32) {
match self.entries.binary_search(&entry_id) {
Ok(_) => {} Err(pos) => self.entries.insert(pos, entry_id),
}
}
fn search(&self) -> &[u32] {
&self.entries
}
fn memory_usage(&self) -> usize {
self.entries.capacity() * std::mem::size_of::<u32>()
}
}
impl UltraLowMemoryIndex {
pub fn new(base_dir: PathBuf) -> Self {
let estimated_entries = 10_000_000; let estimated_string_pool_size = estimated_entries * 15;
let radix_index = std::array::from_fn(|_| RadixBucket::default());
Self {
string_pool: UnifiedStringPool::with_capacity(estimated_string_pool_size),
radix_index,
entries: Vec::with_capacity(estimated_entries),
directory_children: Vec::with_capacity(estimated_entries / 10), last_update: Instant::now(),
base_dir,
entry_count: AtomicUsize::new(0),
memory_usage: AtomicU64::new(0),
root_entry_id: u32::MAX, is_updating: AtomicBool::new(false),
}
}
fn add_string(&mut self, s: &str) -> u32 {
let offset = self.string_pool.add_string(s);
let mem_increase = s.len() + 1 + std::mem::size_of::<StringPoolEntry>();
self.memory_usage
.fetch_add(mem_increase as u64, Ordering::Relaxed);
offset
}
fn get_string(&self, offset: u32) -> Option<&str> {
self.string_pool.get_string_at_offset(offset)
}
pub fn get_memory_usage(&self) -> u64 {
let entries_size = self.entries.len() * std::mem::size_of::<UltraCompactEntry>();
let string_pool_size = self.string_pool.memory_usage();
let radix_size: usize = self.radix_index.iter().map(|b| b.memory_usage()).sum();
let directory_children_size =
self.directory_children.capacity() * std::mem::size_of::<Vec<u32>>();
(entries_size
+ string_pool_size as usize
+ radix_size
+ directory_children_size
+ std::mem::size_of::<Self>()) as u64
}
pub fn get_entry_count(&self) -> usize {
self.entry_count.load(Ordering::Relaxed)
}
pub fn is_updating(&self) -> bool {
self.is_updating.load(Ordering::Relaxed)
}
pub fn perform_memory_cleanup(&mut self) {
let initial_memory = self.get_memory_usage();
debug!(
"Starting periodic memory cleanup, current usage: {}MB",
initial_memory / 1_048_576
);
self.string_pool.shrink_to_fit();
let entries_capacity_ratio =
self.entries.capacity() as f64 / self.entries.len().max(1) as f64;
let dir_capacity_ratio =
self.directory_children.capacity() as f64 / self.directory_children.len().max(1) as f64;
if entries_capacity_ratio > 2.0 {
self.entries.shrink_to_fit();
debug!(
"Shrunk entries vector capacity (ratio was {:.1}x)",
entries_capacity_ratio
);
}
if dir_capacity_ratio > 2.0 {
self.directory_children.shrink_to_fit();
debug!(
"Shrunk directory_children vector capacity (ratio was {:.1}x)",
dir_capacity_ratio
);
}
let final_memory = self.get_memory_usage();
let saved = initial_memory.saturating_sub(final_memory);
if saved > 0 {
info!(
"Memory cleanup completed: {}MB -> {}MB (saved {}MB)",
initial_memory / 1_048_576,
final_memory / 1_048_576,
saved / 1_048_576
);
} else {
debug!("Memory cleanup completed, no significant savings");
}
}
pub fn update_if_needed(&mut self, force: bool) -> Result<(), AppError> {
if !force && self.last_update.elapsed().as_secs() < 30 {
return Ok(());
}
if self
.is_updating
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
.is_err()
{
return Ok(()); }
let start = Instant::now();
let initial_count = self.entry_count.load(Ordering::Relaxed);
info!(
"Updating directory index for: {:?} (current: {} entries)",
self.base_dir, initial_count
);
let result = self.rebuild_index_ultra_optimized();
self.last_update = Instant::now();
self.is_updating.store(false, Ordering::Release);
match result {
Ok(()) => {
let final_count = self.entry_count.load(Ordering::Relaxed);
let memory_mb = self.get_memory_usage() / 1_048_576; info!(
"Index update completed: {} entries ({:+}) in {:.2}s, ~{}MB memory",
final_count,
final_count as i64 - initial_count as i64,
start.elapsed().as_secs_f32(),
memory_mb
);
Ok(())
}
Err(e) => {
warn!("Index update failed: {e}");
Err(e)
}
}
}
fn clear_index(&mut self) {
self.string_pool.clear_and_shrink();
self.radix_index = std::array::from_fn(|_| RadixBucket::default());
self.entries.clear();
self.entries.shrink_to_fit();
self.entries.reserve(100_000);
self.directory_children.clear();
self.directory_children.shrink_to_fit();
self.directory_children.reserve(10_000);
self.entry_count.store(0, Ordering::Relaxed);
self.memory_usage.store(0, Ordering::Relaxed);
self.root_entry_id = u32::MAX;
info!("Index cleared with memory shrinking for long-run efficiency");
}
fn walk_directory_hierarchical(
&mut self,
dir: &Path,
parent_entry_id: u32,
depth: usize,
) -> Result<(), AppError> {
if depth > 25 {
return Ok(());
}
let current_entries = self.entry_count.load(Ordering::Relaxed);
if current_entries >= 10_000_000 {
warn!("Directory index limit reached (10M entries)");
return Ok(());
}
let memory_usage = self.get_memory_usage();
if memory_usage > 1_073_741_824 {
warn!("Memory usage limit reached (1GB), stopping indexing");
return Ok(());
}
let dir_entries = match fs::read_dir(dir) {
Ok(entries) => entries,
Err(e) => {
debug!("Failed to read directory {dir:?}: {e}");
return Ok(());
}
};
let mut batch_entries = Vec::with_capacity(1000);
let mut subdirs = Vec::with_capacity(100);
for entry_result in dir_entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let metadata = match entry.metadata() {
Ok(m) => m,
Err(_) => continue,
};
let file_path = entry.path();
let file_name = entry.file_name().to_string_lossy().to_string();
let modified = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
if is_hidden_file(&file_name) {
continue;
}
batch_entries.push((
file_name,
file_path.clone(),
metadata.len(),
metadata.is_dir(),
modified,
));
if metadata.is_dir() {
subdirs.push(file_path);
}
if batch_entries.len() >= 1000 {
self.process_entry_batch_hierarchical(&mut batch_entries, parent_entry_id)?;
}
}
if !batch_entries.is_empty() {
self.process_entry_batch_hierarchical(&mut batch_entries, parent_entry_id)?;
}
for subdir in subdirs {
let subdir_name = subdir.file_name().unwrap().to_string_lossy();
if let Some(subdir_entry_id) = self.find_entry_by_name(&subdir_name, parent_entry_id) {
self.walk_directory_hierarchical(&subdir, subdir_entry_id, depth + 1)?;
}
}
Ok(())
}
fn process_entry_batch_hierarchical(
&mut self,
batch: &mut Vec<(String, PathBuf, u64, bool, SystemTime)>,
parent_entry_id: u32,
) -> Result<(), AppError> {
for (name, _path, size, is_dir, modified) in batch.drain(..) {
let entry_id = self.entries.len() as u32;
let name_offset = self.add_string(&name);
let ultra_compact_entry =
UltraCompactEntry::new(name_offset, parent_entry_id, size, modified, is_dir);
self.entries.push(ultra_compact_entry);
if parent_entry_id != u32::MAX {
while self.directory_children.len() <= parent_entry_id as usize {
self.directory_children.push(Vec::new());
}
self.directory_children[parent_entry_id as usize].push(entry_id);
}
if !name.is_empty() {
let first_byte = name.as_bytes()[0];
self.radix_index[first_byte as usize].add_entry(entry_id);
}
self.entry_count.fetch_add(1, Ordering::Relaxed);
}
Ok(())
}
fn find_entry_by_name(&self, name: &str, parent_id: u32) -> Option<u32> {
if parent_id as usize >= self.directory_children.len() {
return None;
}
for &child_id in &self.directory_children[parent_id as usize] {
if let Some(entry) = self.entries.get(child_id as usize)
&& let Some(entry_name) = self.get_string(entry.get_name_offset())
&& entry_name == name
{
return Some(child_id);
}
}
None
}
fn rebuild_index_ultra_optimized(&mut self) -> Result<(), AppError> {
self.clear_index();
let root_name = self
.base_dir
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let root_name_offset = self.add_string(&root_name);
let root_entry = UltraCompactEntry::new(
root_name_offset,
u32::MAX, 0, SystemTime::now(),
true, );
self.entries.push(root_entry);
self.root_entry_id = 0;
self.entry_count.store(1, Ordering::Relaxed);
self.directory_children.push(Vec::new());
self.walk_directory_hierarchical(&self.base_dir.clone(), self.root_entry_id, 0)?;
self.build_radix_index();
Ok(())
}
pub fn get_ultra_memory_stats(&self) -> String {
let entry_count = self.entry_count.load(Ordering::Relaxed);
let total_memory = self.get_memory_usage();
let memory_per_entry = if entry_count > 0 {
total_memory as f64 / entry_count as f64
} else {
0.0
};
let entries_size = self.entries.len() * std::mem::size_of::<UltraCompactEntry>();
let string_pool_size = self.string_pool.memory_usage();
let radix_size: usize = self.radix_index.iter().map(|b| b.memory_usage()).sum();
format!(
"Ultra-Low Memory Index Stats:\n\
Entries: {} ({:.1} bytes/entry)\n\
Total Memory: {:.1} MB\n\
- Entries: {:.1} MB ({:.1}%)\n\
- String Pool: {:.1} MB ({:.1}%)\n\
- Radix Index: {:.1} MB ({:.1}%)\n\
Target: <100MB for 10M entries (currently {:.1}% of target)",
entry_count,
memory_per_entry,
total_memory as f64 / 1_048_576.0,
entries_size as f64 / 1_048_576.0,
entries_size as f64 / total_memory as f64 * 100.0,
string_pool_size as f64 / 1_048_576.0,
string_pool_size as f64 / total_memory as f64 * 100.0,
radix_size as f64 / 1_048_576.0,
radix_size as f64 / total_memory as f64 * 100.0,
total_memory as f64 / 100_000_000.0 * 100.0
)
}
fn build_radix_index(&mut self) {
info!("Building radix index for {} entries", self.entries.len());
let start = Instant::now();
self.radix_index = std::array::from_fn(|_| RadixBucket::default());
for (entry_id, entry) in self.entries.iter().enumerate() {
if let Some(name) = self.get_string(entry.get_name_offset()) {
let first_byte = name.as_bytes().first().copied().unwrap_or(0);
self.radix_index[first_byte as usize].add_entry(entry_id as u32);
}
}
info!("Radix index built in {:.2}s", start.elapsed().as_secs_f32());
}
pub fn search(&self, query: &str, limit: usize) -> Vec<SearchResult> {
debug!(
"UltraLowMemoryIndex search: query='{}', limit={}",
query, limit
);
trace!(
"Index stats: {} entries, {} bytes memory",
self.get_entry_count(),
self.get_memory_usage()
);
let start = Instant::now();
let query_lower = query.to_lowercase();
let mut candidate_ids = Vec::new();
if !query_lower.is_empty() {
let first_byte = query_lower.as_bytes()[0];
let bucket = &self.radix_index[first_byte as usize];
for &entry_id in bucket.search() {
if candidate_ids.len() >= limit * 3 {
break;
}
if let Some(entry) = self.entries.get(entry_id as usize)
&& let Some(name) = self.get_string(entry.get_name_offset())
{
let name_lower = name.to_lowercase();
if name_lower.contains(&query_lower) {
candidate_ids.push(entry_id);
}
}
}
}
if candidate_ids.len() < limit {
for (bucket_idx, bucket) in self.radix_index.iter().enumerate() {
if bucket_idx == query_lower.as_bytes().first().copied().unwrap_or(0) as usize {
continue; }
for &entry_id in bucket.search() {
if candidate_ids.len() >= limit * 2 {
break;
}
if let Some(entry) = self.entries.get(entry_id as usize)
&& let Some(name) = self.get_string(entry.get_name_offset())
{
let name_lower = name.to_lowercase();
if name_lower.contains(&query_lower) {
candidate_ids.push(entry_id);
}
}
}
}
}
let mut results = Vec::with_capacity(std::cmp::min(candidate_ids.len(), limit * 2));
for &entry_id in &candidate_ids {
if results.len() >= limit * 2 {
break;
}
if let Some(search_result) = self.create_search_result(entry_id, query) {
results.push(search_result);
}
}
debug!(
"Ultra-fast search completed in {:.2}ms, {} candidates -> {} results",
start.elapsed().as_millis(),
candidate_ids.len(),
results.len()
);
results
}
fn create_search_result(&self, entry_id: u32, query: &str) -> Option<SearchResult> {
let entry = self.entries.get(entry_id as usize)?;
let name = self.get_string(entry.get_name_offset())?;
let full_path = self.reconstruct_path(entry_id)?;
let relative_path = full_path
.strip_prefix(&self.base_dir)
.unwrap_or(&full_path)
.to_string_lossy()
.to_string()
.replace('\\', "/");
let mut clean_path = if relative_path.is_empty() {
"/".to_string()
} else if relative_path.starts_with('/') {
relative_path
} else {
format!("/{}", relative_path)
};
if entry.is_dir() && !clean_path.ends_with('/') {
clean_path.push('/');
}
let score = self.calculate_optimized_relevance_score(name, query);
let modified_time = entry
.modified_time()
.duration_since(SystemTime::UNIX_EPOCH)
.ok()
.map(|d| d.as_secs());
Some(SearchResult {
name: name.to_string(),
path: clean_path,
size: if entry.is_dir() {
"-".to_string()
} else {
format_file_size(entry.get_size())
},
file_type: if entry.is_dir() {
"directory".to_string()
} else {
"file".to_string()
},
score,
last_modified: modified_time,
})
}
fn reconstruct_path(&self, entry_id: u32) -> Option<PathBuf> {
let mut path_components = Vec::new();
let mut current_id = entry_id;
loop {
let entry = self.entries.get(current_id as usize)?;
let name = self.get_string(entry.get_name_offset())?;
path_components.push(name);
let parent_id = entry.get_parent_id();
if parent_id == u32::MAX || parent_id == current_id {
break; }
current_id = parent_id;
}
path_components.reverse();
if !path_components.is_empty() {
let base_dir_name = self
.base_dir
.file_name()
.unwrap_or_default()
.to_string_lossy();
if path_components[0] == base_dir_name {
path_components.remove(0);
}
}
let mut full_path = self.base_dir.clone();
for component in path_components {
full_path.push(component);
}
Some(full_path)
}
fn calculate_optimized_relevance_score(&self, filename: &str, query: &str) -> f32 {
let filename_lower = filename.to_lowercase();
let query_lower = query.to_lowercase();
let mut score = 0.0f32;
if filename_lower == query_lower {
return 100.0;
}
if filename_lower.starts_with(&query_lower) {
score += 75.0;
} else if filename_lower.ends_with(&query_lower) {
score += 50.0;
} else if filename_lower.contains(&query_lower) {
score += 25.0;
if self.has_word_boundary_match(&filename_lower, &query_lower) {
score += 25.0;
}
}
score += 5.0 / (1.0 + filename.len() as f32 * 0.1);
if query.len() <= 3 && !query.is_empty() {
let distance = self.quick_edit_distance(&filename_lower, &query_lower);
if distance <= 2 {
score += 10.0 / (1.0 + distance as f32);
}
}
score.max(0.0)
}
fn has_word_boundary_match(&self, filename: &str, query: &str) -> bool {
filename
.split([' ', '_', '-', '.'])
.any(|word| word == query)
}
fn quick_edit_distance(&self, s1: &str, s2: &str) -> usize {
let len1 = s1.len();
let len2 = s2.len();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
if len1 == len2 && s1 == s2 {
return 0;
}
if len1.abs_diff(len2) > 2 {
return 3;
}
if len1.abs_diff(len2) == 1 {
if len1 > len2 {
for i in 0..len2 {
if s1[i..i + len2] == *s2 {
return 1;
}
}
} else {
for i in 0..len1 {
if s2[i..i + len1] == *s1 {
return 1;
}
}
}
}
if len1 == len2 {
let mut diffs = 0;
for (c1, c2) in s1.chars().zip(s2.chars()) {
if c1 != c2 {
diffs += 1;
if diffs > 2 {
return 3;
}
}
}
return diffs;
}
2 }
}
pub struct ConcurrentUltraLowMemoryIndex {
index: Arc<RwLock<UltraLowMemoryIndex>>,
search_cache: Arc<Mutex<SearchCache>>,
update_in_progress: Arc<AtomicBool>,
}
impl ConcurrentUltraLowMemoryIndex {
pub fn new(base_dir: PathBuf) -> Self {
Self {
index: Arc::new(RwLock::new(UltraLowMemoryIndex::new(base_dir))),
search_cache: Arc::new(Mutex::new(SearchCache::new(1000))),
update_in_progress: Arc::new(AtomicBool::new(false)),
}
}
pub fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>, AppError> {
debug!(
"ConcurrentUltraLowMemoryIndex search: query='{}', limit={}",
query, limit
);
let _start_time = Instant::now();
let cache_key = format!("{query}:{limit}");
debug!(
"Starting concurrent search for query: '{}' with limit: {}",
query, limit
);
trace!("Cache key generated: {}", cache_key);
{
if let Ok(mut cache) = self.search_cache.try_lock() {
trace!("Acquired cache lock for search query");
if let Some(cached_results) = cache.get(&cache_key) {
debug!(
"Cache hit for query: '{}', returning {} results",
query,
cached_results.len()
);
trace!("Cache hit saved search time");
return Ok(cached_results);
}
}
}
debug!(
"Cache miss for query: '{}', performing full index search",
query
);
let results = {
let index_guard = self
.index
.read()
.map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
trace!("Acquired index read lock for search");
let index_stats = (
index_guard.get_entry_count(),
index_guard.get_memory_usage(),
);
trace!(
"Index contains {} entries, using {} bytes",
index_stats.0, index_stats.1
);
let start_time = std::time::Instant::now();
let search_results = index_guard.search(query, limit);
let search_time = start_time.elapsed();
debug!(
"Index search completed in {:?}, found {} results",
search_time,
search_results.len()
);
trace!(
"Search performance: {:.2} results/ms",
search_results.len() as f64 / search_time.as_millis().max(1) as f64
);
search_results
};
if let Ok(mut cache) = self.search_cache.try_lock() {
cache.put(cache_key, results.clone());
trace!("Results cached for future queries");
}
Ok(results)
}
pub fn update_if_needed(&self, force: bool) -> Result<(), AppError> {
if !force {
let index_guard = self
.index
.read()
.map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
if index_guard.last_update.elapsed().as_secs() < 30 {
return Ok(());
}
}
if self
.update_in_progress
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
.is_err()
{
return Ok(()); }
let result = {
let mut index_guard = self
.index
.write()
.map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
let memory_usage = index_guard.get_memory_usage();
let memory_mb = memory_usage / 1_048_576;
let should_cleanup = memory_mb > 200 || index_guard.last_update.elapsed().as_secs() > 3600;
if should_cleanup {
info!(
"Performing memory cleanup due to high usage ({}MB) or time threshold",
memory_mb
);
index_guard.perform_memory_cleanup();
}
index_guard.update_if_needed(force)
};
self.update_in_progress.store(false, Ordering::Release);
if result.is_ok()
&& let Ok(mut cache) = self.search_cache.try_lock()
{
cache.shrink_if_needed(false);
cache.clear();
}
result
}
pub fn get_stats(&self) -> Result<(usize, u64, bool), AppError> {
let index_guard = self
.index
.read()
.map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
Ok((
index_guard.get_entry_count(),
index_guard.get_memory_usage(),
index_guard.is_updating(),
))
}
pub fn get_cache_stats(&self) -> String {
if let Ok(cache) = self.search_cache.try_lock() {
cache.get_stats()
} else {
"Cache temporarily unavailable".to_string()
}
}
}
static ULTRA_LOW_MEMORY_INDEX: RwLock<Option<Arc<ConcurrentUltraLowMemoryIndex>>> =
RwLock::new(None);
pub fn initialize_search(base_dir: PathBuf) {
let concurrent_index = Arc::new(ConcurrentUltraLowMemoryIndex::new(base_dir.clone()));
{
let mut global_index = ULTRA_LOW_MEMORY_INDEX.write().unwrap();
*global_index = Some(concurrent_index.clone());
}
let init_index = concurrent_index.clone();
thread::spawn(move || {
if let Err(e) = init_index.update_if_needed(true) {
warn!("Failed to build initial ultra-low memory index: {e:?}");
}
});
thread::spawn(move || {
let mut cleanup_counter = 0;
loop {
thread::sleep(Duration::from_secs(60));
if let Err(e) = concurrent_index.update_if_needed(false) {
warn!("Failed to update ultra-low memory index: {e:?}");
}
cleanup_counter += 1;
if cleanup_counter >= 60 {
cleanup_counter = 0;
if let Ok(mut index_guard) = concurrent_index.index.write() {
index_guard.perform_memory_cleanup();
}
if let Ok(mut cache) = concurrent_index.search_cache.try_lock() {
cache.shrink_if_needed(true); }
debug!("Completed hourly memory cleanup cycle");
}
}
});
info!("Ultra-low memory search subsystem initialized - targeting <100MB for 10M entries");
}
pub fn perform_search(
base_dir: &Path,
params: &SearchParams,
) -> Result<Vec<SearchResult>, AppError> {
debug!(
"Starting search: query='{}', path='{}', limit={}, offset={}",
params.query, params.path, params.limit, params.offset
);
trace!(
"Search parameters: case_sensitive={}",
params.case_sensitive
);
let start = Instant::now();
debug!(
"Performing search with query: '{}', limit: {}, offset: {}",
params.query, params.limit, params.offset
);
trace!(
"Search parameters - path: '{}', case_sensitive: {}",
params.path, params.case_sensitive
);
let concurrent_index = {
let index_guard = ULTRA_LOW_MEMORY_INDEX.read().unwrap();
match &*index_guard {
Some(index) => {
debug!("Using ultra-low memory index for search");
let stats = index.get_stats().unwrap_or((0, 0, false));
trace!(
"Index stats - entries: {}, memory: {} bytes, updating: {}",
stats.0, stats.1, stats.2
);
index.clone()
}
None => {
error!("Ultra-low memory search index not initialized");
return Err(AppError::InternalServerError(
"Ultra-low memory search index not initialized".to_string(),
));
}
}
};
trace!(
"Starting radix-accelerated search with expanded limit: {}",
params.limit * 2
);
let mut results = concurrent_index.search(¶ms.query, params.limit * 2)?;
debug!("Index search returned {} initial results", results.len());
if results.is_empty() {
info!(
"Ultra-low memory index search returned no results, falling back to filesystem search"
);
debug!("Initiating parallel filesystem search as fallback");
results = perform_parallel_search(base_dir, params)?;
trace!(
"Filesystem search fallback returned {} results",
results.len()
);
}
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let start_idx = params.offset.min(results.len());
let end_idx = (params.offset + params.limit).min(results.len());
results = results[start_idx..end_idx].to_vec();
let search_time = start.elapsed();
info!(
"Ultra-fast search completed for '{}': {} results in {:.2}ms (ultra-low memory)",
params.query,
results.len(),
search_time.as_millis()
);
Ok(results)
}
fn perform_parallel_search(
base_dir: &Path,
params: &SearchParams,
) -> Result<Vec<SearchResult>, AppError> {
let (tx, rx) = mpsc::channel();
let query = Arc::new(params.query.clone());
let base_dir = Arc::new(base_dir.to_path_buf());
let num_threads = 4;
let search_root = if params.path == "/" {
base_dir.as_ref().clone()
} else {
let relative_path = PathBuf::from(params.path.strip_prefix('/').unwrap_or(¶ms.path));
base_dir.join(relative_path)
};
let mut dirs_to_search = vec![search_root];
let mut initial_dirs = Vec::new();
if let Ok(entries) = fs::read_dir(&dirs_to_search[0]) {
for entry in entries.flatten() {
let file_name = entry.file_name().to_string_lossy().to_string();
if is_hidden_file(&file_name) {
continue;
}
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
initial_dirs.push(entry.path());
}
}
}
if !initial_dirs.is_empty() {
dirs_to_search = initial_dirs;
}
let chunk_size = (dirs_to_search.len() / num_threads).max(1);
let chunks: Vec<_> = dirs_to_search
.chunks(chunk_size)
.map(|c| c.to_vec())
.collect();
let handles: Vec<_> = chunks
.into_iter()
.map(|chunk| {
let tx = tx.clone();
let query = Arc::clone(&query);
let base_dir = Arc::clone(&base_dir);
thread::spawn(move || {
for dir in chunk {
search_directory_recursive(&dir, &query, &base_dir, &tx, 0);
}
})
})
.collect();
drop(tx);
let mut results = Vec::with_capacity(params.limit * 2);
for result in rx {
results.push(result);
if results.len() >= params.limit * 2 {
break;
}
}
if results.capacity() > results.len() * 2 {
results.shrink_to_fit();
}
for handle in handles {
let _ = handle.join();
}
Ok(results)
}
fn search_directory_recursive(
dir: &Path,
query: &str,
base_dir: &Path,
tx: &mpsc::Sender<SearchResult>,
depth: usize,
) {
if depth > 10 {
return; }
let query_lower = query.to_lowercase();
if let Ok(entries) = fs::read_dir(dir) {
for entry in entries.flatten() {
let file_name = entry.file_name().to_string_lossy().to_string();
if is_hidden_file(&file_name) {
continue;
}
let file_name_lower = file_name.to_lowercase();
if file_name_lower.contains(&query_lower)
&& let Ok(metadata) = entry.metadata()
{
let relative_path = entry
.path()
.strip_prefix(base_dir)
.unwrap_or(&entry.path())
.to_string_lossy()
.to_string()
.replace('\\', "/");
let mut clean_path = if relative_path.is_empty() {
"/".to_string()
} else if relative_path.starts_with('/') {
relative_path
} else {
format!("/{}", relative_path)
};
if metadata.is_dir() && !clean_path.ends_with('/') {
clean_path.push('/');
}
let result = SearchResult {
name: file_name.clone(),
path: clean_path,
size: if metadata.is_dir() {
"-".to_string()
} else {
format_file_size(metadata.len())
},
file_type: if metadata.is_dir() {
"directory".to_string()
} else {
"file".to_string()
},
score: calculate_relevance_score(&file_name, query),
last_modified: metadata
.modified()
.ok()
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs()),
};
let _ = tx.send(result);
}
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
search_directory_recursive(&entry.path(), query, base_dir, tx, depth + 1);
}
}
}
}
pub fn calculate_relevance_score(filename: &str, query: &str) -> f32 {
let filename_lower = filename.to_lowercase();
let query_lower = query.to_lowercase();
let mut score = 0.0f32;
if filename_lower == query_lower {
score += 100.0;
}
else if filename_lower.starts_with(&query_lower) {
score += 75.0;
}
else if filename_lower.ends_with(&query_lower) {
score += 50.0;
}
else if filename_lower.contains(&query_lower) {
score += 25.0;
if filename_lower
.split(|c: char| !c.is_alphanumeric())
.any(|word| word == query_lower)
{
score += 25.0;
}
}
let distance = levenshtein_distance(&filename_lower, &query_lower);
if distance <= 2 && distance > 0 {
score += 10.0 / (1.0 + distance as f32);
}
score += 5.0 / (1.0 + filename.len() as f32 * 0.1);
let path_depth = filename.matches('/').count() as f32;
score -= path_depth * 2.0;
score.max(0.0)
}
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
let len1 = s1.chars().count();
let len2 = s2.chars().count();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
let mut prev_row: Vec<usize> = (0..=len2).collect();
let mut curr_row = vec![0; len2 + 1];
for i in 1..=len1 {
curr_row[0] = i;
for j in 1..=len2 {
let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
0
} else {
1
};
curr_row[j] = std::cmp::min(
std::cmp::min(prev_row[j] + 1, curr_row[j - 1] + 1),
prev_row[j - 1] + cost,
);
}
std::mem::swap(&mut prev_row, &mut curr_row);
}
prev_row[len2]
}
pub fn format_file_size(size: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
const THRESHOLD: u64 = 1024;
if size == 0 {
return "0 B".to_string();
}
let mut size_f = size as f64;
let mut unit_index = 0;
while size_f >= THRESHOLD as f64 && unit_index < UNITS.len() - 1 {
size_f /= THRESHOLD as f64;
unit_index += 1;
}
if unit_index == 0 {
format!("{} {}", size, UNITS[unit_index])
} else {
format!("{:.1} {}", size_f, UNITS[unit_index])
}
}
pub fn clear_cache() {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read()
&& let Some(ref concurrent_index) = *index_guard
&& let Ok(mut cache) = concurrent_index.search_cache.try_lock()
{
cache.clear();
}
}
pub fn get_search_stats() -> String {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
if let Some(ref concurrent_index) = *index_guard {
let cache_stats = concurrent_index.get_cache_stats();
match concurrent_index.get_stats() {
Ok((entry_count, memory_usage, is_updating)) => {
let memory_per_entry = if entry_count > 0 {
memory_usage as f64 / entry_count as f64
} else {
0.0
};
format!(
"Ultra-Low Memory Index: {} entries, {:.1}MB memory ({:.1} bytes/entry), updating: {}\n\
Target: <100MB for 10M entries (currently {:.1}% of target)\n\
Memory efficiency: {:.1}x better than original implementation\n\
{}",
entry_count,
memory_usage as f64 / 1_048_576.0,
memory_per_entry,
is_updating,
memory_usage as f64 / 100_000_000.0 * 100.0,
24.0 / memory_per_entry, cache_stats
)
}
Err(_) => format!("Ultra-low memory index: unavailable\n{cache_stats}"),
}
} else {
"Ultra-low memory search system not initialized".to_string()
}
} else {
"Ultra-low memory search system temporarily unavailable".to_string()
}
}
pub fn get_cache_stats() -> String {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
if let Some(ref concurrent_index) = *index_guard {
concurrent_index.get_cache_stats()
} else {
"Cache not initialized".to_string()
}
} else {
"Cache temporarily unavailable".to_string()
}
}
pub fn force_index_rebuild() -> Result<(), AppError> {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
if let Some(ref concurrent_index) = *index_guard {
concurrent_index.update_if_needed(true)
} else {
Err(AppError::InternalServerError(
"Ultra-low memory search index not initialized".to_string(),
))
}
} else {
Err(AppError::InternalServerError(
"Ultra-low memory search index temporarily unavailable".to_string(),
))
}
}
pub fn get_ultra_memory_stats() -> String {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
if let Some(ref concurrent_index) = *index_guard {
if let Ok(index) = concurrent_index.index.read() {
index.get_ultra_memory_stats()
} else {
"Ultra-low memory statistics temporarily unavailable".to_string()
}
} else {
"Ultra-low memory search system not initialized".to_string()
}
} else {
"Ultra-low memory search system temporarily unavailable".to_string()
}
}
pub fn force_memory_cleanup() -> Result<(), AppError> {
if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
if let Some(ref concurrent_index) = *index_guard {
if let Ok(mut index) = concurrent_index.index.write() {
index.perform_memory_cleanup();
}
if let Ok(mut cache) = concurrent_index.search_cache.try_lock() {
cache.shrink_if_needed(true);
}
info!("Manual memory cleanup completed successfully");
Ok(())
} else {
Err(AppError::InternalServerError(
"Ultra-low memory search index not initialized".to_string(),
))
}
} else {
Err(AppError::InternalServerError(
"Ultra-low memory search index temporarily unavailable".to_string(),
))
}
}