irondrop 2.6.4 - Docs.rs

// SPDX-License-Identifier: MIT

//! Ultra-low memory search module optimized for 10M+ entries (<100MB total)
//!
//! Architecture:
//! - Ultra-compact entries: 11 bytes per entry (vs previous 24 bytes)
//! - Hierarchical path storage: Parent references instead of full paths (saves 2.7GB)
//! - Unified string pool: Single buffer with binary search (saves duplication)
//! - Radix-accelerated index: Sorted arrays instead of HashMap/BTreeMap
//! - Bit-packed data: Every bit counts for memory efficiency
//! - Cache-aligned structures: Optimize for CPU cache lines

use crate::error::AppError;
use crate::utils::is_hidden_file;
use log::{debug, error, info, trace, warn};
use std::collections::{HashMap, VecDeque};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::mpsc;
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

/// Represents a search result file with relevance scoring
#[derive(Debug, Clone)]
pub struct SearchResult {
    pub name: String,
    pub path: String,
    pub size: String,
    pub file_type: String,
    pub score: f32,
    pub last_modified: Option<u64>,
}

/// Search parameters
#[derive(Debug)]
pub struct SearchParams {
    pub query: String,
    pub path: String,
    pub limit: usize,
    pub offset: usize,
    pub case_sensitive: bool,
}

/// LRU Cache for search results
pub struct SearchCache {
    cache: HashMap<String, CachedSearchResult>,
    order: VecDeque<String>,
    max_size: usize,
    stats: CacheStats,
}

#[derive(Clone)]
struct CachedSearchResult {
    results: Vec<SearchResult>,
    timestamp: Instant,
    hit_count: u32,
}

#[derive(Default)]
struct CacheStats {
    hits: u64,
    misses: u64,
    evictions: u64,
}

impl SearchCache {
    pub fn new(max_size: usize) -> Self {
        SearchCache {
            cache: HashMap::new(),
            order: VecDeque::new(),
            max_size,
            stats: CacheStats::default(),
        }
    }

    /// Force shrink cache when memory usage exceeds threshold
    pub fn shrink_if_needed(&mut self, force: bool) {
        let memory_threshold = self.max_size / 2; // Shrink when over 50% capacity

        if force || self.cache.len() > memory_threshold {
            // More aggressive eviction during memory pressure
            let target_size = if force {
                self.max_size / 4
            } else {
                memory_threshold
            };

            while self.cache.len() > target_size {
                if let Some(lru_key) = self.order.pop_back() {
                    self.cache.remove(&lru_key);
                    self.stats.evictions += 1;
                }
            }

            // Shrink underlying HashMap capacity if needed
            if force && self.cache.capacity() > self.max_size * 2 {
                self.cache.shrink_to_fit();
                self.order.shrink_to_fit();
                debug!("Cache capacity shrunk to fit current usage");
            }
        }
    }

    pub fn get(&mut self, key: &str) -> Option<Vec<SearchResult>> {
        if let Some(cached) = self.cache.get_mut(key) {
            // Check if cache is still valid (10 seconds TTL for better performance)
            if cached.timestamp.elapsed().as_secs() < 10 {
                cached.hit_count += 1;
                self.stats.hits += 1;

                // Move to front of LRU queue
                if let Some(pos) = self.order.iter().position(|k| k == key) {
                    self.order.remove(pos);
                }
                self.order.push_front(key.to_string());

                debug!("Cache hit for query: {} (hits: {})", key, cached.hit_count);
                return Some(cached.results.clone());
            }
            // Cache expired, remove it
            self.cache.remove(key);
            if let Some(pos) = self.order.iter().position(|k| k == key) {
                self.order.remove(pos);
            }
            debug!("Cache expired for query: {key}");
        }
        self.stats.misses += 1;
        None
    }

    pub fn put(&mut self, key: String, results: Vec<SearchResult>) {
        // Evict LRU item if cache is full
        while self.cache.len() >= self.max_size {
            if let Some(lru_key) = self.order.pop_back() {
                self.cache.remove(&lru_key);
                self.stats.evictions += 1;
                debug!("Evicted cache entry: {lru_key}");
            }
        }

        let result_count = results.len();
        self.cache.insert(
            key.clone(),
            CachedSearchResult {
                results,
                timestamp: Instant::now(),
                hit_count: 0,
            },
        );
        self.order.push_front(key.clone());
        debug!("Cached {result_count} results for query: {key}");
    }

    pub fn clear(&mut self) {
        self.cache.clear();
        self.order.clear();
        // Shrink capacity to prevent memory bloat during long runs
        self.cache.shrink_to_fit();
        self.order.shrink_to_fit();
        info!("Search cache cleared and capacity shrunk");
    }

    pub fn get_stats(&self) -> String {
        let total = self.stats.hits + self.stats.misses;
        let hit_rate = if total > 0 {
            (self.stats.hits as f64 / total as f64) * 100.0
        } else {
            0.0
        };
        format!(
            "Cache stats - Hits: {}, Misses: {}, Hit rate: {:.1}%, Evictions: {}, Size: {}/{}",
            self.stats.hits,
            self.stats.misses,
            hit_rate,
            self.stats.evictions,
            self.cache.len(),
            self.max_size
        )
    }
}

/// Ultra-low memory directory index targeting <100MB for 10M entries
/// Memory breakdown per entry: 11 bytes + ~0.5 bytes overhead = ~11.5 bytes total
pub struct UltraLowMemoryIndex {
    /// Unified string pool for all filenames and paths (single allocation)
    string_pool: UnifiedStringPool,

    /// Radix index with 256 buckets for first-byte acceleration
    /// Each bucket contains sorted entry IDs for O(log n) search
    radix_index: [RadixBucket; 256],

    /// Ultra-compact entry storage - exactly 11 bytes per entry
    entries: Vec<UltraCompactEntry>,

    /// Directory tracking for hierarchical path reconstruction
    /// Maps directory entry_id -> list of child entry_ids
    directory_children: Vec<Vec<u32>>,

    /// Metadata and tracking
    last_update: Instant,
    base_dir: PathBuf,
    entry_count: AtomicUsize,
    memory_usage: AtomicU64,

    /// Root directory entry ID for path reconstruction
    root_entry_id: u32,

    /// Update tracking for incremental updates
    is_updating: AtomicBool,
}

/// Ultra-compact entry structure - exactly 11 bytes per entry
/// Saves 38x memory vs original implementation (24 bytes -> 11 bytes)
#[repr(C, packed)]
#[derive(Clone, Copy)]
struct UltraCompactEntry {
    /// Offset into unified string pool (24-bit = 16MB max pool)
    name_offset: [u8; 3], // 3 bytes
    /// Parent directory ID for hierarchical path reconstruction
    parent_id: [u8; 3], // 3 bytes - supports 16M directories
    /// Log2 compressed file size (1 byte = sizes up to 2^255)
    size_log2: u8, // 1 byte
    /// Packed flags and timestamp data
    packed_data: u32, // 4 bytes total = 11 bytes
}

/// String pool entry for binary search lookups
#[derive(Clone, Copy)]
struct StringPoolEntry {
    /// Murmur3-style hash for fast comparison
    hash: u32, // 4 bytes
    /// Offset into string buffer
    offset: u32, // 4 bytes - supports 4GB string pool
}

/// Radix index bucket for first-byte acceleration
#[derive(Default)]
struct RadixBucket {
    /// Sorted array of entry indices for binary search
    entries: Vec<u32>, // Variable size, sorted for O(log n) search
}

/// Cache-aligned memory pool for ultra-efficient string storage
/// Single continuous buffer eliminates pointer chasing and fragmentation
struct UnifiedStringPool {
    /// Single buffer containing all strings, null-terminated
    buffer: Vec<u8>,
    /// Sorted array of (hash, offset) pairs for O(log n) lookup
    index: Vec<StringPoolEntry>,
    /// Current write position in buffer
    write_pos: u32,
}

// Constants for ultra-compact bit packing
const FLAG_IS_DIR: u32 = 1 << 31; // Top bit for directory flag

const TIMESTAMP_MASK: u32 = 0x3FFF_FFFF; // 30 bits for timestamp (34 years from 2024)
const PARENT_NULL: u32 = 0xFF_FF_FF; // Special value for root entries

impl UltraCompactEntry {
    /// Create new ultra-compact entry with bit-packed data
    fn new(
        name_offset: u32,
        parent_id: u32,
        size: u64,
        modified: SystemTime,
        is_dir: bool,
    ) -> Self {
        // Compress size using log2 encoding (1 byte = sizes up to 2^255)
        let size_log2 = if size == 0 {
            0
        } else {
            64 - size.leading_zeros().min(255) as u8
        };

        // Pack timestamp in 30 bits (supports ~34 years from 2024)
        let base_epoch = SystemTime::UNIX_EPOCH + Duration::from_secs(1_704_067_200); // 2024-01-01
        let timestamp_secs = modified
            .duration_since(base_epoch)
            .unwrap_or_default()
            .as_secs()
            .min(TIMESTAMP_MASK as u64) as u32;

        // Pack flags and timestamp into 32 bits
        let mut packed_data = timestamp_secs & TIMESTAMP_MASK;
        if is_dir {
            packed_data |= FLAG_IS_DIR;
        }

        Self {
            name_offset: [
                (name_offset & 0xFF) as u8,
                ((name_offset >> 8) & 0xFF) as u8,
                ((name_offset >> 16) & 0xFF) as u8,
            ],
            parent_id: [
                (parent_id & 0xFF) as u8,
                ((parent_id >> 8) & 0xFF) as u8,
                ((parent_id >> 16) & 0xFF) as u8,
            ],
            size_log2,
            packed_data,
        }
    }

    /// Extract name offset from 24-bit field
    fn get_name_offset(&self) -> u32 {
        (self.name_offset[0] as u32)
            | ((self.name_offset[1] as u32) << 8)
            | ((self.name_offset[2] as u32) << 16)
    }

    /// Extract parent ID from 24-bit field
    fn get_parent_id(&self) -> u32 {
        let id = (self.parent_id[0] as u32)
            | ((self.parent_id[1] as u32) << 8)
            | ((self.parent_id[2] as u32) << 16);
        if id == PARENT_NULL { u32::MAX } else { id }
    }

    /// Decompress size from log2 encoding
    fn get_size(&self) -> u64 {
        if self.size_log2 == 0 {
            0
        } else {
            1u64 << (self.size_log2 - 1)
        }
    }

    /// Check if entry is directory
    fn is_dir(&self) -> bool {
        (self.packed_data & FLAG_IS_DIR) != 0
    }

    /// Extract modification time
    fn modified_time(&self) -> SystemTime {
        let base_epoch = SystemTime::UNIX_EPOCH + Duration::from_secs(1_704_067_200); // 2024-01-01
        let timestamp_secs = self.packed_data & TIMESTAMP_MASK;
        base_epoch + Duration::from_secs(timestamp_secs as u64)
    }
}

impl UnifiedStringPool {
    /// Create new string pool with reserved capacity
    fn with_capacity(capacity: usize) -> Self {
        Self {
            buffer: Vec::with_capacity(capacity),
            index: Vec::with_capacity(capacity / 20), // Estimate ~20 chars per string
            write_pos: 0,
        }
    }

    /// Add string to pool, returning offset. Returns existing offset if string exists.
    fn add_string(&mut self, s: &str) -> u32 {
        let hash = murmur3_hash(s.as_bytes());

        // Binary search for existing string
        if let Ok(idx) = self.index.binary_search_by_key(&hash, |entry| entry.hash) {
            // Hash collision check - verify actual string content
            let entry = self.index[idx];
            if self.get_string_at_offset(entry.offset) == Some(s) {
                return entry.offset;
            }
            // Hash collision - continue to add new string
        }

        // Add new string to buffer
        let offset = self.write_pos;
        let string_bytes = s.as_bytes();
        self.buffer.extend_from_slice(string_bytes);
        self.buffer.push(0); // Null terminator
        self.write_pos += string_bytes.len() as u32 + 1;

        // Add to sorted index
        let entry = StringPoolEntry { hash, offset };
        match self.index.binary_search_by_key(&hash, |e| e.hash) {
            Ok(idx) => self.index.insert(idx, entry),
            Err(idx) => self.index.insert(idx, entry),
        }

        offset
    }

    /// Get string at specific offset - unsafe but fast
    fn get_string_at_offset(&self, offset: u32) -> Option<&str> {
        if offset as usize >= self.buffer.len() {
            return None;
        }

        let start = offset as usize;
        let end = self.buffer[start..]
            .iter()
            .position(|&b| b == 0)
            .map(|pos| start + pos)
            .unwrap_or(self.buffer.len());

        std::str::from_utf8(&self.buffer[start..end]).ok()
    }

    /// Get memory usage in bytes
    fn memory_usage(&self) -> u64 {
        (self.buffer.capacity()
            + self.index.capacity() * std::mem::size_of::<StringPoolEntry>()
            + std::mem::size_of::<Self>()) as u64
    }

    /// Shrink string pool capacity to reduce memory usage
    /// Call this periodically during long runs to prevent memory bloat
    fn shrink_to_fit(&mut self) {
        let old_buffer_capacity = self.buffer.capacity();
        let old_index_capacity = self.index.capacity();

        self.buffer.shrink_to_fit();
        self.index.shrink_to_fit();

        let new_buffer_capacity = self.buffer.capacity();
        let new_index_capacity = self.index.capacity();

        debug!(
            "StringPool shrink: buffer {}->{}KB, index {}->{}KB",
            old_buffer_capacity / 1024,
            new_buffer_capacity / 1024,
            old_index_capacity * std::mem::size_of::<StringPoolEntry>() / 1024,
            new_index_capacity * std::mem::size_of::<StringPoolEntry>() / 1024
        );
    }

    /// Clear all strings and shrink capacity for memory efficiency
    fn clear_and_shrink(&mut self) {
        self.buffer.clear();
        self.index.clear();
        self.write_pos = 0;

        // Shrink capacity to prevent memory bloat
        self.buffer.shrink_to_fit();
        self.index.shrink_to_fit();

        // Re-reserve small initial capacity
        self.buffer.reserve(1024 * 1024); // 1MB initial
        self.index.reserve(1000); // 1K entries initial

        debug!("StringPool cleared and capacity shrunk for long-run memory efficiency");
    }
}

/// Fast murmur3-style hash for string pool
fn murmur3_hash(data: &[u8]) -> u32 {
    const C1: u32 = 0xcc9e_2d51;
    const C2: u32 = 0x1b87_3593;
    const R1: u32 = 15;
    const R2: u32 = 13;
    const M: u32 = 5;
    const N: u32 = 0xe654_6b64;

    let mut hash = 0u32;
    let mut i = 0;

    // Process 4-byte chunks
    while i + 4 <= data.len() {
        let mut k = u32::from_le_bytes([data[i], data[i + 1], data[i + 2], data[i + 3]]);

        k = k.wrapping_mul(C1);
        k = k.rotate_left(R1);
        k = k.wrapping_mul(C2);

        hash ^= k;
        hash = hash.rotate_left(R2);
        hash = hash.wrapping_mul(M).wrapping_add(N);

        i += 4;
    }

    // Handle remaining bytes
    let mut k = 0u32;
    match data.len() & 3 {
        3 => {
            k ^= (data[i + 2] as u32) << 16;
            k ^= (data[i + 1] as u32) << 8;
            k ^= data[i] as u32;
            k = k.wrapping_mul(C1);
            k = k.rotate_left(R1);
            k = k.wrapping_mul(C2);
            hash ^= k;
        }
        2 => {
            k ^= (data[i + 1] as u32) << 8;
            k ^= data[i] as u32;
            k = k.wrapping_mul(C1);
            k = k.rotate_left(R1);
            k = k.wrapping_mul(C2);
            hash ^= k;
        }
        1 => {
            k ^= data[i] as u32;
            k = k.wrapping_mul(C1);
            k = k.rotate_left(R1);
            k = k.wrapping_mul(C2);
            hash ^= k;
        }
        _ => {}
    }

    // Finalization
    hash ^= data.len() as u32;
    hash ^= hash >> 16;
    hash = hash.wrapping_mul(0x85eb_ca6b);
    hash ^= hash >> 13;
    hash = hash.wrapping_mul(0xc2b2_ae35);
    hash ^= hash >> 16;

    hash
}

impl RadixBucket {
    /// Add entry to bucket, maintaining sorted order for binary search
    fn add_entry(&mut self, entry_id: u32) {
        match self.entries.binary_search(&entry_id) {
            Ok(_) => {} // Already exists
            Err(pos) => self.entries.insert(pos, entry_id),
        }
    }

    /// Search bucket for entries matching criteria
    fn search(&self) -> &[u32] {
        &self.entries
    }

    /// Get memory usage of this bucket
    fn memory_usage(&self) -> usize {
        self.entries.capacity() * std::mem::size_of::<u32>()
    }
}

impl UltraLowMemoryIndex {
    /// Create new ultra-low memory index with optimized capacity planning
    pub fn new(base_dir: PathBuf) -> Self {
        let estimated_entries = 10_000_000; // Plan for 10M entries
        let estimated_string_pool_size = estimated_entries * 15; // ~15 chars average filename

        // Initialize radix buckets array
        let radix_index = std::array::from_fn(|_| RadixBucket::default());

        Self {
            string_pool: UnifiedStringPool::with_capacity(estimated_string_pool_size),
            radix_index,
            entries: Vec::with_capacity(estimated_entries),
            directory_children: Vec::with_capacity(estimated_entries / 10), // ~10% directories
            last_update: Instant::now(),
            base_dir,
            entry_count: AtomicUsize::new(0),
            memory_usage: AtomicU64::new(0),
            root_entry_id: u32::MAX, // Will be set during first build
            is_updating: AtomicBool::new(false),
        }
    }

    /// Add string to unified pool and return offset
    fn add_string(&mut self, s: &str) -> u32 {
        let offset = self.string_pool.add_string(s);

        // Update memory usage tracking
        let mem_increase = s.len() + 1 + std::mem::size_of::<StringPoolEntry>();
        self.memory_usage
            .fetch_add(mem_increase as u64, Ordering::Relaxed);

        offset
    }

    /// Get string from pool offset
    fn get_string(&self, offset: u32) -> Option<&str> {
        self.string_pool.get_string_at_offset(offset)
    }

    /// Get precise memory usage calculation
    pub fn get_memory_usage(&self) -> u64 {
        let entries_size = self.entries.len() * std::mem::size_of::<UltraCompactEntry>();
        let string_pool_size = self.string_pool.memory_usage();
        let radix_size: usize = self.radix_index.iter().map(|b| b.memory_usage()).sum();
        let directory_children_size =
            self.directory_children.capacity() * std::mem::size_of::<Vec<u32>>();

        (entries_size
            + string_pool_size as usize
            + radix_size
            + directory_children_size
            + std::mem::size_of::<Self>()) as u64
    }

    /// Get entry count
    pub fn get_entry_count(&self) -> usize {
        self.entry_count.load(Ordering::Relaxed)
    }

    /// Check if index is currently updating
    pub fn is_updating(&self) -> bool {
        self.is_updating.load(Ordering::Relaxed)
    }

    /// Perform periodic memory cleanup to prevent memory bloat during long runs
    /// This should be called periodically (e.g., every few hours) during long-running operations
    pub fn perform_memory_cleanup(&mut self) {
        let initial_memory = self.get_memory_usage();
        debug!(
            "Starting periodic memory cleanup, current usage: {}MB",
            initial_memory / 1_048_576
        );

        // Shrink string pool capacity
        self.string_pool.shrink_to_fit();

        // Shrink vectors if they have excessive capacity
        let entries_capacity_ratio =
            self.entries.capacity() as f64 / self.entries.len().max(1) as f64;
        let dir_capacity_ratio =
            self.directory_children.capacity() as f64 / self.directory_children.len().max(1) as f64;

        if entries_capacity_ratio > 2.0 {
            self.entries.shrink_to_fit();
            debug!(
                "Shrunk entries vector capacity (ratio was {:.1}x)",
                entries_capacity_ratio
            );
        }

        if dir_capacity_ratio > 2.0 {
            self.directory_children.shrink_to_fit();
            debug!(
                "Shrunk directory_children vector capacity (ratio was {:.1}x)",
                dir_capacity_ratio
            );
        }

        let final_memory = self.get_memory_usage();
        let saved = initial_memory.saturating_sub(final_memory);

        if saved > 0 {
            info!(
                "Memory cleanup completed: {}MB -> {}MB (saved {}MB)",
                initial_memory / 1_048_576,
                final_memory / 1_048_576,
                saved / 1_048_576
            );
        } else {
            debug!("Memory cleanup completed, no significant savings");
        }
    }

    /// Build or update the index if it's stale with incremental updates
    pub fn update_if_needed(&mut self, force: bool) -> Result<(), AppError> {
        // Update index every 30 seconds or if forced
        if !force && self.last_update.elapsed().as_secs() < 30 {
            return Ok(());
        }

        // Prevent concurrent updates
        if self
            .is_updating
            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
            .is_err()
        {
            return Ok(()); // Already updating
        }

        let start = Instant::now();
        let initial_count = self.entry_count.load(Ordering::Relaxed);

        info!(
            "Updating directory index for: {:?} (current: {} entries)",
            self.base_dir, initial_count
        );

        // Always perform full rebuild for ultra-low memory efficiency
        let result = self.rebuild_index_ultra_optimized();

        self.last_update = Instant::now();
        self.is_updating.store(false, Ordering::Release);

        match result {
            Ok(()) => {
                let final_count = self.entry_count.load(Ordering::Relaxed);
                let memory_mb = self.get_memory_usage() / 1_048_576; // Convert to MB
                info!(
                    "Index update completed: {} entries ({:+}) in {:.2}s, ~{}MB memory",
                    final_count,
                    final_count as i64 - initial_count as i64,
                    start.elapsed().as_secs_f32(),
                    memory_mb
                );
                Ok(())
            }
            Err(e) => {
                warn!("Index update failed: {e}");
                Err(e)
            }
        }
    }

    /// Clear all index data and reset to initial state with memory shrinking
    fn clear_index(&mut self) {
        // Use new shrinking method instead of creating new instances
        self.string_pool.clear_and_shrink();
        self.radix_index = std::array::from_fn(|_| RadixBucket::default());

        // Clear and shrink vectors to prevent memory accumulation
        self.entries.clear();
        self.entries.shrink_to_fit();
        self.entries.reserve(100_000); // Reserve reasonable initial capacity

        self.directory_children.clear();
        self.directory_children.shrink_to_fit();
        self.directory_children.reserve(10_000); // Reserve reasonable initial capacity

        self.entry_count.store(0, Ordering::Relaxed);
        self.memory_usage.store(0, Ordering::Relaxed);
        self.root_entry_id = u32::MAX;

        info!("Index cleared with memory shrinking for long-run efficiency");
    }

    /// Ultra-efficient directory walking with hierarchical parent tracking
    fn walk_directory_hierarchical(
        &mut self,
        dir: &Path,
        parent_entry_id: u32,
        depth: usize,
    ) -> Result<(), AppError> {
        // Prevent excessive recursion
        if depth > 25 {
            return Ok(());
        }

        // Check memory and entry limits for ultra-low memory target
        let current_entries = self.entry_count.load(Ordering::Relaxed);
        if current_entries >= 10_000_000 {
            warn!("Directory index limit reached (10M entries)");
            return Ok(());
        }

        // Check memory usage (limit for large directories)
        let memory_usage = self.get_memory_usage();
        if memory_usage > 1_073_741_824 {
            // 1GB safety margin
            warn!("Memory usage limit reached (1GB), stopping indexing");
            return Ok(());
        }

        let dir_entries = match fs::read_dir(dir) {
            Ok(entries) => entries,
            Err(e) => {
                debug!("Failed to read directory {dir:?}: {e}");
                return Ok(());
            }
        };

        let mut batch_entries = Vec::with_capacity(1000);
        let mut subdirs = Vec::with_capacity(100);

        // Collect entries in this directory
        for entry_result in dir_entries {
            let entry = match entry_result {
                Ok(e) => e,
                Err(_) => continue,
            };

            let metadata = match entry.metadata() {
                Ok(m) => m,
                Err(_) => continue,
            };

            let file_path = entry.path();
            let file_name = entry.file_name().to_string_lossy().to_string();
            let modified = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);

            // Skip hidden files (starting with '._' or '.DS_Store')
            if is_hidden_file(&file_name) {
                continue;
            }

            batch_entries.push((
                file_name,
                file_path.clone(),
                metadata.len(),
                metadata.is_dir(),
                modified,
            ));

            // Collect subdirectories for recursive processing
            if metadata.is_dir() {
                subdirs.push(file_path);
            }

            // Process batch when it's full
            if batch_entries.len() >= 1000 {
                self.process_entry_batch_hierarchical(&mut batch_entries, parent_entry_id)?;
            }
        }

        // Process remaining entries
        if !batch_entries.is_empty() {
            self.process_entry_batch_hierarchical(&mut batch_entries, parent_entry_id)?;
        }

        // Recursively process subdirectories with their entry IDs as parents
        for subdir in subdirs {
            // Find the entry ID for this subdirectory
            let subdir_name = subdir.file_name().unwrap().to_string_lossy();
            if let Some(subdir_entry_id) = self.find_entry_by_name(&subdir_name, parent_entry_id) {
                self.walk_directory_hierarchical(&subdir, subdir_entry_id, depth + 1)?;
            }
        }

        Ok(())
    }

    /// Process batch of entries with hierarchical parent references (ultra-memory efficient)
    fn process_entry_batch_hierarchical(
        &mut self,
        batch: &mut Vec<(String, PathBuf, u64, bool, SystemTime)>,
        parent_entry_id: u32,
    ) -> Result<(), AppError> {
        for (name, _path, size, is_dir, modified) in batch.drain(..) {
            let entry_id = self.entries.len() as u32;

            // Add filename to unified string pool
            let name_offset = self.add_string(&name);

            // Create ultra-compact entry with parent reference
            let ultra_compact_entry =
                UltraCompactEntry::new(name_offset, parent_entry_id, size, modified, is_dir);

            self.entries.push(ultra_compact_entry);

            // Update directory children mapping if parent exists
            if parent_entry_id != u32::MAX {
                // Ensure directory_children vector is large enough
                while self.directory_children.len() <= parent_entry_id as usize {
                    self.directory_children.push(Vec::new());
                }
                self.directory_children[parent_entry_id as usize].push(entry_id);
            }

            // Add to radix index for fast searching
            if !name.is_empty() {
                let first_byte = name.as_bytes()[0];
                self.radix_index[first_byte as usize].add_entry(entry_id);
            }

            // Update entry count
            self.entry_count.fetch_add(1, Ordering::Relaxed);
        }

        Ok(())
    }

    /// Find entry ID by name within a parent directory
    fn find_entry_by_name(&self, name: &str, parent_id: u32) -> Option<u32> {
        if parent_id as usize >= self.directory_children.len() {
            return None;
        }

        for &child_id in &self.directory_children[parent_id as usize] {
            if let Some(entry) = self.entries.get(child_id as usize)
                && let Some(entry_name) = self.get_string(entry.get_name_offset())
                && entry_name == name
            {
                return Some(child_id);
            }
        }

        None
    }

    /// Rebuild the entire index from scratch (ultra-low memory optimized)
    fn rebuild_index_ultra_optimized(&mut self) -> Result<(), AppError> {
        // Clear existing data
        self.clear_index();

        // Create root entry for the base directory
        let root_name = self
            .base_dir
            .file_name()
            .unwrap_or_default()
            .to_string_lossy()
            .to_string();

        let root_name_offset = self.add_string(&root_name);
        let root_entry = UltraCompactEntry::new(
            root_name_offset,
            u32::MAX, // Root has no parent
            0,        // Directory size is 0
            SystemTime::now(),
            true, // Is directory
        );

        self.entries.push(root_entry);
        self.root_entry_id = 0;
        self.entry_count.store(1, Ordering::Relaxed);

        // Ensure directory_children has space for root
        self.directory_children.push(Vec::new());

        // Walk directory hierarchy starting from root
        self.walk_directory_hierarchical(&self.base_dir.clone(), self.root_entry_id, 0)?;

        // Build radix index for fast searching
        self.build_radix_index();

        Ok(())
    }

    /// Get comprehensive statistics about ultra-low memory usage
    pub fn get_ultra_memory_stats(&self) -> String {
        let entry_count = self.entry_count.load(Ordering::Relaxed);
        let total_memory = self.get_memory_usage();
        let memory_per_entry = if entry_count > 0 {
            total_memory as f64 / entry_count as f64
        } else {
            0.0
        };

        let entries_size = self.entries.len() * std::mem::size_of::<UltraCompactEntry>();
        let string_pool_size = self.string_pool.memory_usage();
        let radix_size: usize = self.radix_index.iter().map(|b| b.memory_usage()).sum();

        format!(
            "Ultra-Low Memory Index Stats:\n\
             Entries: {} ({:.1} bytes/entry)\n\
             Total Memory: {:.1} MB\n\
             - Entries: {:.1} MB ({:.1}%)\n\
             - String Pool: {:.1} MB ({:.1}%)\n\
             - Radix Index: {:.1} MB ({:.1}%)\n\
             Target: <100MB for 10M entries (currently {:.1}% of target)",
            entry_count,
            memory_per_entry,
            total_memory as f64 / 1_048_576.0,
            entries_size as f64 / 1_048_576.0,
            entries_size as f64 / total_memory as f64 * 100.0,
            string_pool_size as f64 / 1_048_576.0,
            string_pool_size as f64 / total_memory as f64 * 100.0,
            radix_size as f64 / 1_048_576.0,
            radix_size as f64 / total_memory as f64 * 100.0,
            total_memory as f64 / 100_000_000.0 * 100.0
        )
    }

    /// Build radix index for ultra-fast first-character lookups
    fn build_radix_index(&mut self) {
        info!("Building radix index for {} entries", self.entries.len());
        let start = Instant::now();

        // Clear existing radix index
        self.radix_index = std::array::from_fn(|_| RadixBucket::default());

        // Populate radix buckets based on first character of filename
        for (entry_id, entry) in self.entries.iter().enumerate() {
            if let Some(name) = self.get_string(entry.get_name_offset()) {
                let first_byte = name.as_bytes().first().copied().unwrap_or(0);
                self.radix_index[first_byte as usize].add_entry(entry_id as u32);
            }
        }

        info!("Radix index built in {:.2}s", start.elapsed().as_secs_f32());
    }

    /// Ultra-fast search using radix acceleration and binary search
    pub fn search(&self, query: &str, limit: usize) -> Vec<SearchResult> {
        debug!(
            "UltraLowMemoryIndex search: query='{}', limit={}",
            query, limit
        );
        trace!(
            "Index stats: {} entries, {} bytes memory",
            self.get_entry_count(),
            self.get_memory_usage()
        );
        let start = Instant::now();
        let query_lower = query.to_lowercase();
        let mut candidate_ids = Vec::new();

        // Strategy 1: Radix-accelerated search using first character
        if !query_lower.is_empty() {
            let first_byte = query_lower.as_bytes()[0];
            let bucket = &self.radix_index[first_byte as usize];

            // Search within the radix bucket for matching entries
            for &entry_id in bucket.search() {
                if candidate_ids.len() >= limit * 3 {
                    break;
                }

                if let Some(entry) = self.entries.get(entry_id as usize)
                    && let Some(name) = self.get_string(entry.get_name_offset())
                {
                    let name_lower = name.to_lowercase();
                    if name_lower.contains(&query_lower) {
                        candidate_ids.push(entry_id);
                    }
                }
            }
        }

        // Strategy 2: If radix search is insufficient, search other buckets
        if candidate_ids.len() < limit {
            for (bucket_idx, bucket) in self.radix_index.iter().enumerate() {
                if bucket_idx == query_lower.as_bytes().first().copied().unwrap_or(0) as usize {
                    continue; // Already searched
                }

                for &entry_id in bucket.search() {
                    if candidate_ids.len() >= limit * 2 {
                        break;
                    }

                    if let Some(entry) = self.entries.get(entry_id as usize)
                        && let Some(name) = self.get_string(entry.get_name_offset())
                    {
                        let name_lower = name.to_lowercase();
                        if name_lower.contains(&query_lower) {
                            candidate_ids.push(entry_id);
                        }
                    }
                }
            }
        }

        // Convert candidate IDs to SearchResults with path reconstruction
        let mut results = Vec::with_capacity(std::cmp::min(candidate_ids.len(), limit * 2));

        for &entry_id in &candidate_ids {
            if results.len() >= limit * 2 {
                break;
            }

            if let Some(search_result) = self.create_search_result(entry_id, query) {
                results.push(search_result);
            }
        }

        debug!(
            "Ultra-fast search completed in {:.2}ms, {} candidates -> {} results",
            start.elapsed().as_millis(),
            candidate_ids.len(),
            results.len()
        );

        results
    }

    /// Create SearchResult with on-demand path reconstruction from parent chain
    fn create_search_result(&self, entry_id: u32, query: &str) -> Option<SearchResult> {
        let entry = self.entries.get(entry_id as usize)?;
        let name = self.get_string(entry.get_name_offset())?;

        // Reconstruct full path from parent chain (hierarchical storage)
        let full_path = self.reconstruct_path(entry_id)?;

        let relative_path = full_path
            .strip_prefix(&self.base_dir)
            .unwrap_or(&full_path)
            .to_string_lossy()
            .to_string()
            .replace('\\', "/"); // Normalize path separators for web URLs

        // Ensure path starts with / and doesn't have double slashes
        let mut clean_path = if relative_path.is_empty() {
            "/".to_string()
        } else if relative_path.starts_with('/') {
            relative_path
        } else {
            format!("/{}", relative_path)
        };

        // Ensure directories end with a trailing slash for correct relative link resolution in browsers
        if entry.is_dir() && !clean_path.ends_with('/') {
            clean_path.push('/');
        }

        let score = self.calculate_optimized_relevance_score(name, query);

        let modified_time = entry
            .modified_time()
            .duration_since(SystemTime::UNIX_EPOCH)
            .ok()
            .map(|d| d.as_secs());

        Some(SearchResult {
            name: name.to_string(),
            path: clean_path,
            size: if entry.is_dir() {
                "-".to_string()
            } else {
                format_file_size(entry.get_size())
            },
            file_type: if entry.is_dir() {
                "directory".to_string()
            } else {
                "file".to_string()
            },
            score,
            last_modified: modified_time,
        })
    }

    /// Reconstruct full path from parent references (hierarchical path storage)
    fn reconstruct_path(&self, entry_id: u32) -> Option<PathBuf> {
        let mut path_components = Vec::new();
        let mut current_id = entry_id;

        // Follow parent chain to root
        loop {
            let entry = self.entries.get(current_id as usize)?;
            let name = self.get_string(entry.get_name_offset())?;
            path_components.push(name);

            let parent_id = entry.get_parent_id();
            if parent_id == u32::MAX || parent_id == current_id {
                break; // Reached root
            }
            current_id = parent_id;
        }

        // Reverse to get correct order (root to file)
        path_components.reverse();

        // Remove the first component if it's the base directory name
        if !path_components.is_empty() {
            let base_dir_name = self
                .base_dir
                .file_name()
                .unwrap_or_default()
                .to_string_lossy();
            if path_components[0] == base_dir_name {
                path_components.remove(0);
            }
        }

        // Build path from base_dir + components
        let mut full_path = self.base_dir.clone();
        for component in path_components {
            full_path.push(component);
        }

        Some(full_path)
    }

    /// Optimized relevance scoring with caching
    fn calculate_optimized_relevance_score(&self, filename: &str, query: &str) -> f32 {
        let filename_lower = filename.to_lowercase();
        let query_lower = query.to_lowercase();

        let mut score = 0.0f32;

        // Exact match gets highest score
        if filename_lower == query_lower {
            return 100.0;
        }

        // Fast path for common cases
        if filename_lower.starts_with(&query_lower) {
            score += 75.0;
        } else if filename_lower.ends_with(&query_lower) {
            score += 50.0;
        } else if filename_lower.contains(&query_lower) {
            score += 25.0;

            // Bonus for word boundary matches (optimized)
            if self.has_word_boundary_match(&filename_lower, &query_lower) {
                score += 25.0;
            }
        }

        // Bonus for shorter filenames (more relevant)
        score += 5.0 / (1.0 + filename.len() as f32 * 0.1);

        // Quick fuzzy match for very short queries
        if query.len() <= 3 && !query.is_empty() {
            let distance = self.quick_edit_distance(&filename_lower, &query_lower);
            if distance <= 2 {
                score += 10.0 / (1.0 + distance as f32);
            }
        }

        score.max(0.0)
    }

    /// Fast word boundary match detection
    fn has_word_boundary_match(&self, filename: &str, query: &str) -> bool {
        // Simple optimization: split only on common delimiters
        filename
            .split([' ', '_', '-', '.'])
            .any(|word| word == query)
    }

    /// Quick edit distance calculation for short strings
    fn quick_edit_distance(&self, s1: &str, s2: &str) -> usize {
        let len1 = s1.len();
        let len2 = s2.len();

        // Fast path for common cases
        if len1 == 0 {
            return len2;
        }
        if len2 == 0 {
            return len1;
        }
        if len1 == len2 && s1 == s2 {
            return 0;
        }

        // Limit calculation to prevent expensive operations
        if len1.abs_diff(len2) > 2 {
            return 3;
        }

        // Simple single-character operations check
        if len1.abs_diff(len2) == 1 {
            if len1 > len2 {
                // Deletion
                for i in 0..len2 {
                    if s1[i..i + len2] == *s2 {
                        return 1;
                    }
                }
            } else {
                // Insertion
                for i in 0..len1 {
                    if s2[i..i + len1] == *s1 {
                        return 1;
                    }
                }
            }
        }

        // Substitution check for equal lengths
        if len1 == len2 {
            let mut diffs = 0;
            for (c1, c2) in s1.chars().zip(s2.chars()) {
                if c1 != c2 {
                    diffs += 1;
                    if diffs > 2 {
                        return 3;
                    }
                }
            }
            return diffs;
        }

        2 // Default for more complex cases
    }
}

/// Concurrent wrapper for ultra-low memory index
pub struct ConcurrentUltraLowMemoryIndex {
    index: Arc<RwLock<UltraLowMemoryIndex>>,
    search_cache: Arc<Mutex<SearchCache>>,
    update_in_progress: Arc<AtomicBool>,
}

impl ConcurrentUltraLowMemoryIndex {
    pub fn new(base_dir: PathBuf) -> Self {
        Self {
            index: Arc::new(RwLock::new(UltraLowMemoryIndex::new(base_dir))),
            search_cache: Arc::new(Mutex::new(SearchCache::new(1000))),
            update_in_progress: Arc::new(AtomicBool::new(false)),
        }
    }

    /// Perform search with minimal lock contention
    pub fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>, AppError> {
        debug!(
            "ConcurrentUltraLowMemoryIndex search: query='{}', limit={}",
            query, limit
        );
        let _start_time = Instant::now();
        let cache_key = format!("{query}:{limit}");
        debug!(
            "Starting concurrent search for query: '{}' with limit: {}",
            query, limit
        );
        trace!("Cache key generated: {}", cache_key);

        // Try cache first (quick lock)
        {
            if let Ok(mut cache) = self.search_cache.try_lock() {
                trace!("Acquired cache lock for search query");
                if let Some(cached_results) = cache.get(&cache_key) {
                    debug!(
                        "Cache hit for query: '{}', returning {} results",
                        query,
                        cached_results.len()
                    );
                    trace!("Cache hit saved search time");
                    return Ok(cached_results);
                }
            }
        }

        debug!(
            "Cache miss for query: '{}', performing full index search",
            query
        );

        // Perform search with read lock (allows concurrent searches)
        let results = {
            let index_guard = self
                .index
                .read()
                .map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
            trace!("Acquired index read lock for search");
            let index_stats = (
                index_guard.get_entry_count(),
                index_guard.get_memory_usage(),
            );
            trace!(
                "Index contains {} entries, using {} bytes",
                index_stats.0, index_stats.1
            );
            let start_time = std::time::Instant::now();
            let search_results = index_guard.search(query, limit);
            let search_time = start_time.elapsed();
            debug!(
                "Index search completed in {:?}, found {} results",
                search_time,
                search_results.len()
            );
            trace!(
                "Search performance: {:.2} results/ms",
                search_results.len() as f64 / search_time.as_millis().max(1) as f64
            );
            search_results
        };

        // Cache results (quick lock)
        if let Ok(mut cache) = self.search_cache.try_lock() {
            cache.put(cache_key, results.clone());
            trace!("Results cached for future queries");
        }

        Ok(results)
    }

    /// Update index with optimistic locking and memory pressure detection
    pub fn update_if_needed(&self, force: bool) -> Result<(), AppError> {
        // Quick check without locking
        if !force {
            let index_guard = self
                .index
                .read()
                .map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
            if index_guard.last_update.elapsed().as_secs() < 30 {
                return Ok(());
            }
        }

        // Try to acquire update lock atomically
        if self
            .update_in_progress
            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
            .is_err()
        {
            return Ok(()); // Already updating
        }

        let result = {
            let mut index_guard = self
                .index
                .write()
                .map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;

            // Check memory usage before update
            let memory_usage = index_guard.get_memory_usage();
            let memory_mb = memory_usage / 1_048_576;

            // Perform memory cleanup if memory usage is high or if it's been a while
            let should_cleanup = memory_mb > 200 || // Over 200MB
                index_guard.last_update.elapsed().as_secs() > 3600; // Over 1 hour since last update

            if should_cleanup {
                info!(
                    "Performing memory cleanup due to high usage ({}MB) or time threshold",
                    memory_mb
                );
                index_guard.perform_memory_cleanup();
            }

            index_guard.update_if_needed(force)
        };

        self.update_in_progress.store(false, Ordering::Release);

        // Clear cache after update and force shrink if needed
        if result.is_ok()
            && let Ok(mut cache) = self.search_cache.try_lock()
        {
            cache.shrink_if_needed(false);
            cache.clear();
        }

        result
    }

    /// Get index statistics
    pub fn get_stats(&self) -> Result<(usize, u64, bool), AppError> {
        let index_guard = self
            .index
            .read()
            .map_err(|_| AppError::InternalServerError("Index lock poisoned".to_string()))?;
        Ok((
            index_guard.get_entry_count(),
            index_guard.get_memory_usage(),
            index_guard.is_updating(),
        ))
    }

    /// Get cache statistics
    pub fn get_cache_stats(&self) -> String {
        if let Ok(cache) = self.search_cache.try_lock() {
            cache.get_stats()
        } else {
            "Cache temporarily unavailable".to_string()
        }
    }
}

/// Global ultra-low memory index instance with lazy initialization
static ULTRA_LOW_MEMORY_INDEX: RwLock<Option<Arc<ConcurrentUltraLowMemoryIndex>>> =
    RwLock::new(None);

/// Initialize the ultra-low memory search subsystem (<100MB for 10M entries)
pub fn initialize_search(base_dir: PathBuf) {
    // Initialize ultra-low memory concurrent index
    let concurrent_index = Arc::new(ConcurrentUltraLowMemoryIndex::new(base_dir.clone()));

    {
        let mut global_index = ULTRA_LOW_MEMORY_INDEX.write().unwrap();
        *global_index = Some(concurrent_index.clone());
    }

    // Perform initial index build in background
    let init_index = concurrent_index.clone();
    thread::spawn(move || {
        if let Err(e) = init_index.update_if_needed(true) {
            warn!("Failed to build initial ultra-low memory index: {e:?}");
        }
    });

    // Spawn background thread to periodically update the index with memory management
    thread::spawn(move || {
        let mut cleanup_counter = 0;
        loop {
            thread::sleep(Duration::from_secs(60)); // Update every minute

            if let Err(e) = concurrent_index.update_if_needed(false) {
                warn!("Failed to update ultra-low memory index: {e:?}");
            }

            // Perform more aggressive memory cleanup every hour during long runs
            cleanup_counter += 1;
            if cleanup_counter >= 60 {
                // 60 minutes = 1 hour
                cleanup_counter = 0;

                // Force memory cleanup
                if let Ok(mut index_guard) = concurrent_index.index.write() {
                    index_guard.perform_memory_cleanup();
                }

                // Force cache shrinking
                if let Ok(mut cache) = concurrent_index.search_cache.try_lock() {
                    cache.shrink_if_needed(true); // Force aggressive shrinking
                }

                debug!("Completed hourly memory cleanup cycle");
            }
        }
    });

    info!("Ultra-low memory search subsystem initialized - targeting <100MB for 10M entries");
}

/// Perform ultra-fast search using ultra-low memory concurrent index
pub fn perform_search(
    base_dir: &Path,
    params: &SearchParams,
) -> Result<Vec<SearchResult>, AppError> {
    debug!(
        "Starting search: query='{}', path='{}', limit={}, offset={}",
        params.query, params.path, params.limit, params.offset
    );
    trace!(
        "Search parameters: case_sensitive={}",
        params.case_sensitive
    );
    let start = Instant::now();
    debug!(
        "Performing search with query: '{}', limit: {}, offset: {}",
        params.query, params.limit, params.offset
    );
    trace!(
        "Search parameters - path: '{}', case_sensitive: {}",
        params.path, params.case_sensitive
    );

    // Get ultra-low memory concurrent index
    let concurrent_index = {
        let index_guard = ULTRA_LOW_MEMORY_INDEX.read().unwrap();
        match &*index_guard {
            Some(index) => {
                debug!("Using ultra-low memory index for search");
                let stats = index.get_stats().unwrap_or((0, 0, false));
                trace!(
                    "Index stats - entries: {}, memory: {} bytes, updating: {}",
                    stats.0, stats.1, stats.2
                );
                index.clone()
            }
            None => {
                error!("Ultra-low memory search index not initialized");
                return Err(AppError::InternalServerError(
                    "Ultra-low memory search index not initialized".to_string(),
                ));
            }
        }
    };

    // Perform ultra-fast radix-accelerated search
    trace!(
        "Starting radix-accelerated search with expanded limit: {}",
        params.limit * 2
    );
    let mut results = concurrent_index.search(&params.query, params.limit * 2)?;
    debug!("Index search returned {} initial results", results.len());

    // If index search returns no results, fall back to filesystem search
    if results.is_empty() {
        info!(
            "Ultra-low memory index search returned no results, falling back to filesystem search"
        );
        debug!("Initiating parallel filesystem search as fallback");
        results = perform_parallel_search(base_dir, params)?;
        trace!(
            "Filesystem search fallback returned {} results",
            results.len()
        );
    }

    // Sort by relevance score (stable sort to maintain order for equal scores)
    results.sort_by(|a, b| {
        b.score
            .partial_cmp(&a.score)
            .unwrap_or(std::cmp::Ordering::Equal)
    });

    // Apply offset and limit
    let start_idx = params.offset.min(results.len());
    let end_idx = (params.offset + params.limit).min(results.len());
    results = results[start_idx..end_idx].to_vec();

    let search_time = start.elapsed();
    info!(
        "Ultra-fast search completed for '{}': {} results in {:.2}ms (ultra-low memory)",
        params.query,
        results.len(),
        search_time.as_millis()
    );

    Ok(results)
}

/// Perform a parallel filesystem search using multiple threads
fn perform_parallel_search(
    base_dir: &Path,
    params: &SearchParams,
) -> Result<Vec<SearchResult>, AppError> {
    let (tx, rx) = mpsc::channel();
    let query = Arc::new(params.query.clone());
    let base_dir = Arc::new(base_dir.to_path_buf());
    let num_threads = 4; // Use 4 worker threads for parallel searching

    // Determine search root
    let search_root = if params.path == "/" {
        base_dir.as_ref().clone()
    } else {
        let relative_path = PathBuf::from(params.path.strip_prefix('/').unwrap_or(&params.path));
        base_dir.join(relative_path)
    };

    // Get initial directories to search
    let mut dirs_to_search = vec![search_root];
    let mut initial_dirs = Vec::new();

    // Expand to first level of subdirectories for better parallelization
    if let Ok(entries) = fs::read_dir(&dirs_to_search[0]) {
        for entry in entries.flatten() {
            let file_name = entry.file_name().to_string_lossy().to_string();
            if is_hidden_file(&file_name) {
                continue;
            }
            if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
                initial_dirs.push(entry.path());
            }
        }
    }

    if !initial_dirs.is_empty() {
        dirs_to_search = initial_dirs;
    }

    // Distribute directories among threads
    let chunk_size = (dirs_to_search.len() / num_threads).max(1);
    let chunks: Vec<_> = dirs_to_search
        .chunks(chunk_size)
        .map(|c| c.to_vec())
        .collect();

    let handles: Vec<_> = chunks
        .into_iter()
        .map(|chunk| {
            let tx = tx.clone();
            let query = Arc::clone(&query);
            let base_dir = Arc::clone(&base_dir);

            thread::spawn(move || {
                for dir in chunk {
                    search_directory_recursive(&dir, &query, &base_dir, &tx, 0);
                }
            })
        })
        .collect();

    // Drop the original sender so the channel closes when all threads finish
    drop(tx);

    // Collect results from all threads with capacity management
    let mut results = Vec::with_capacity(params.limit * 2);
    for result in rx {
        results.push(result);
        if results.len() >= params.limit * 2 {
            break;
        }
    }

    // Shrink capacity if it's grown too large to prevent memory leak
    if results.capacity() > results.len() * 2 {
        results.shrink_to_fit();
    }

    // Wait for all threads to complete
    for handle in handles {
        let _ = handle.join();
    }

    Ok(results)
}

/// Recursively search a directory
fn search_directory_recursive(
    dir: &Path,
    query: &str,
    base_dir: &Path,
    tx: &mpsc::Sender<SearchResult>,
    depth: usize,
) {
    if depth > 10 {
        return; // Limit recursion depth
    }

    let query_lower = query.to_lowercase();

    if let Ok(entries) = fs::read_dir(dir) {
        for entry in entries.flatten() {
            let file_name = entry.file_name().to_string_lossy().to_string();
            if is_hidden_file(&file_name) {
                continue;
            }
            let file_name_lower = file_name.to_lowercase();

            if file_name_lower.contains(&query_lower)
                && let Ok(metadata) = entry.metadata()
            {
                let relative_path = entry
                    .path()
                    .strip_prefix(base_dir)
                    .unwrap_or(&entry.path())
                    .to_string_lossy()
                    .to_string()
                    .replace('\\', "/"); // Normalize path separators for web URLs

                // Ensure path starts with / and doesn't have double slashes
                let mut clean_path = if relative_path.is_empty() {
                    "/".to_string()
                } else if relative_path.starts_with('/') {
                    relative_path
                } else {
                    format!("/{}", relative_path)
                };

                // Ensure directories end with a trailing slash
                if metadata.is_dir() && !clean_path.ends_with('/') {
                    clean_path.push('/');
                }

                let result = SearchResult {
                    name: file_name.clone(),
                    path: clean_path,
                    size: if metadata.is_dir() {
                        "-".to_string()
                    } else {
                        format_file_size(metadata.len())
                    },
                    file_type: if metadata.is_dir() {
                        "directory".to_string()
                    } else {
                        "file".to_string()
                    },
                    score: calculate_relevance_score(&file_name, query),
                    last_modified: metadata
                        .modified()
                        .ok()
                        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
                        .map(|d| d.as_secs()),
                };

                let _ = tx.send(result);
            }

            // Recursively search subdirectories
            if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
                search_directory_recursive(&entry.path(), query, base_dir, tx, depth + 1);
            }
        }
    }
}

/// Calculate relevance score for search results
pub fn calculate_relevance_score(filename: &str, query: &str) -> f32 {
    let filename_lower = filename.to_lowercase();
    let query_lower = query.to_lowercase();

    let mut score = 0.0f32;

    // Exact match gets highest score
    if filename_lower == query_lower {
        score += 100.0;
    }
    // Starts with query gets high score
    else if filename_lower.starts_with(&query_lower) {
        score += 75.0;
    }
    // Ends with query (useful for extensions)
    else if filename_lower.ends_with(&query_lower) {
        score += 50.0;
    }
    // Contains query gets moderate score
    else if filename_lower.contains(&query_lower) {
        score += 25.0;

        // Bonus for word boundary matches
        if filename_lower
            .split(|c: char| !c.is_alphanumeric())
            .any(|word| word == query_lower)
        {
            score += 25.0;
        }
    }

    // Fuzzy match for typos
    let distance = levenshtein_distance(&filename_lower, &query_lower);
    if distance <= 2 && distance > 0 {
        score += 10.0 / (1.0 + distance as f32);
    }

    // Bonus for shorter filenames (more relevant)
    score += 5.0 / (1.0 + filename.len() as f32 * 0.1);

    // Penalty for deep nesting (prefer files closer to search root)
    let path_depth = filename.matches('/').count() as f32;
    score -= path_depth * 2.0;

    score.max(0.0)
}

/// Simple Levenshtein distance for fuzzy matching
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
    let len1 = s1.chars().count();
    let len2 = s2.chars().count();

    if len1 == 0 {
        return len2;
    }
    if len2 == 0 {
        return len1;
    }

    let s1_chars: Vec<char> = s1.chars().collect();
    let s2_chars: Vec<char> = s2.chars().collect();

    let mut prev_row: Vec<usize> = (0..=len2).collect();
    let mut curr_row = vec![0; len2 + 1];

    for i in 1..=len1 {
        curr_row[0] = i;
        for j in 1..=len2 {
            let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
                0
            } else {
                1
            };
            curr_row[j] = std::cmp::min(
                std::cmp::min(prev_row[j] + 1, curr_row[j - 1] + 1),
                prev_row[j - 1] + cost,
            );
        }
        std::mem::swap(&mut prev_row, &mut curr_row);
    }

    prev_row[len2]
}

/// Format file size in human-readable format
pub fn format_file_size(size: u64) -> String {
    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
    const THRESHOLD: u64 = 1024;

    if size == 0 {
        return "0 B".to_string();
    }

    let mut size_f = size as f64;
    let mut unit_index = 0;

    while size_f >= THRESHOLD as f64 && unit_index < UNITS.len() - 1 {
        size_f /= THRESHOLD as f64;
        unit_index += 1;
    }

    if unit_index == 0 {
        format!("{} {}", size, UNITS[unit_index])
    } else {
        format!("{:.1} {}", size_f, UNITS[unit_index])
    }
}

/// Clear the search cache (useful for testing or manual cache invalidation)
pub fn clear_cache() {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read()
        && let Some(ref concurrent_index) = *index_guard
        && let Ok(mut cache) = concurrent_index.search_cache.try_lock()
    {
        cache.clear();
    }
}

/// Get comprehensive ultra-low memory search statistics
pub fn get_search_stats() -> String {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
        if let Some(ref concurrent_index) = *index_guard {
            let cache_stats = concurrent_index.get_cache_stats();

            match concurrent_index.get_stats() {
                Ok((entry_count, memory_usage, is_updating)) => {
                    let memory_per_entry = if entry_count > 0 {
                        memory_usage as f64 / entry_count as f64
                    } else {
                        0.0
                    };

                    format!(
                        "Ultra-Low Memory Index: {} entries, {:.1}MB memory ({:.1} bytes/entry), updating: {}\n\
                         Target: <100MB for 10M entries (currently {:.1}% of target)\n\
                         Memory efficiency: {:.1}x better than original implementation\n\
                         {}",
                        entry_count,
                        memory_usage as f64 / 1_048_576.0,
                        memory_per_entry,
                        is_updating,
                        memory_usage as f64 / 100_000_000.0 * 100.0,
                        24.0 / memory_per_entry, // Original was ~24 bytes per entry
                        cache_stats
                    )
                }
                Err(_) => format!("Ultra-low memory index: unavailable\n{cache_stats}"),
            }
        } else {
            "Ultra-low memory search system not initialized".to_string()
        }
    } else {
        "Ultra-low memory search system temporarily unavailable".to_string()
    }
}

/// Get cache statistics (legacy function for backward compatibility)
pub fn get_cache_stats() -> String {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
        if let Some(ref concurrent_index) = *index_guard {
            concurrent_index.get_cache_stats()
        } else {
            "Cache not initialized".to_string()
        }
    } else {
        "Cache temporarily unavailable".to_string()
    }
}

/// Force ultra-low memory index rebuild (useful for testing or after major filesystem changes)
pub fn force_index_rebuild() -> Result<(), AppError> {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
        if let Some(ref concurrent_index) = *index_guard {
            concurrent_index.update_if_needed(true)
        } else {
            Err(AppError::InternalServerError(
                "Ultra-low memory search index not initialized".to_string(),
            ))
        }
    } else {
        Err(AppError::InternalServerError(
            "Ultra-low memory search index temporarily unavailable".to_string(),
        ))
    }
}

/// Get detailed ultra-low memory statistics (new function)
pub fn get_ultra_memory_stats() -> String {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
        if let Some(ref concurrent_index) = *index_guard {
            if let Ok(index) = concurrent_index.index.read() {
                index.get_ultra_memory_stats()
            } else {
                "Ultra-low memory statistics temporarily unavailable".to_string()
            }
        } else {
            "Ultra-low memory search system not initialized".to_string()
        }
    } else {
        "Ultra-low memory search system temporarily unavailable".to_string()
    }
}

/// Force memory cleanup for long-running processes
/// Call this manually if you notice memory usage growing during long runs
pub fn force_memory_cleanup() -> Result<(), AppError> {
    if let Ok(index_guard) = ULTRA_LOW_MEMORY_INDEX.read() {
        if let Some(ref concurrent_index) = *index_guard {
            // Force cleanup of the main index
            if let Ok(mut index) = concurrent_index.index.write() {
                index.perform_memory_cleanup();
            }

            // Force cache shrinking
            if let Ok(mut cache) = concurrent_index.search_cache.try_lock() {
                cache.shrink_if_needed(true);
            }

            info!("Manual memory cleanup completed successfully");
            Ok(())
        } else {
            Err(AppError::InternalServerError(
                "Ultra-low memory search index not initialized".to_string(),
            ))
        }
    } else {
        Err(AppError::InternalServerError(
            "Ultra-low memory search index temporarily unavailable".to_string(),
        ))
    }
}