colgrep 1.5.0 - Docs.rs

use std::collections::HashMap;
use std::path::{Path, PathBuf};

use anyhow::{Context, Result};
use colored::Colorize;

use colgrep::{
    acquire_index_lock, bre_to_ere, ensure_model, escape_literal_braces, find_parent_index,
    get_index_dir_for_project, get_vector_index_path, index_exists, is_text_format,
    path_contains_ignored_dir, Config, IndexBuilder, IndexState, Searcher, DEFAULT_MODEL,
};

use crate::display::{
    calc_display_ranges, find_representative_lines, group_results_by_file,
    print_highlighted_content, print_highlighted_ranges,
};
use crate::scoring::should_search_from_root;

/// Pre-compiled pattern matcher for efficient repeated matching.
/// Compiling regex is expensive (~microseconds), so we do it once and reuse.
///
/// Backed by `fancy-regex` so lookaround (`(?=...)`, `(?<=...)`) and
/// backreferences (`\1`) work. Standard patterns still go through the
/// fast `regex`-crate engine internally; the fancy backtracking NFA only
/// kicks in for features `regex` cannot handle.
struct PatternMatcher {
    kind: PatternKind,
    case_sensitive: bool,
}

enum PatternKind {
    /// Compiled regex (case-insensitivity is baked into the pattern via
    /// an inline `(?i)` flag — `case_sensitive` is informational here).
    Regex(fancy_regex::Regex),
    /// Pre-normalized literal needle. When `case_sensitive` is false this
    /// has been lowercased and lines are lowercased before `.contains`.
    Literal(String),
}

impl PatternMatcher {
    /// Create a new pattern matcher based on the matching mode.
    /// - `extended_regexp`: Use ERE (extended regular expressions)
    /// - `fixed_strings`: Treat pattern as literal (overrides extended_regexp)
    /// - `word_regexp`: Add word boundaries
    /// - `case_sensitive`: When false, prefixes `(?i)` so matching is
    ///   case-insensitive (colgrep's historical default).
    fn new(
        pattern: &str,
        extended_regexp: bool,
        fixed_strings: bool,
        word_regexp: bool,
        case_sensitive: bool,
    ) -> Self {
        let effective_use_regex = extended_regexp && !fixed_strings;
        let case_prefix = if case_sensitive { "" } else { "(?i)" };
        let literal_needle = || {
            if case_sensitive {
                pattern.to_string()
            } else {
                pattern.to_lowercase()
            }
        };

        if effective_use_regex {
            let ere_pattern = escape_literal_braces(&bre_to_ere(pattern));
            let body = if word_regexp {
                format!(r"\b{}\b", ere_pattern)
            } else {
                ere_pattern
            };
            let regex_pattern = format!("{}{}", case_prefix, body);
            match fancy_regex::Regex::new(&regex_pattern) {
                Ok(re) => PatternMatcher {
                    kind: PatternKind::Regex(re),
                    case_sensitive,
                },
                Err(_) => PatternMatcher {
                    kind: PatternKind::Literal(literal_needle()),
                    case_sensitive,
                },
            }
        } else if word_regexp {
            let escaped = regex::escape(pattern);
            let regex_pattern = format!(r"{}\b{}\b", case_prefix, escaped);
            match fancy_regex::Regex::new(&regex_pattern) {
                Ok(re) => PatternMatcher {
                    kind: PatternKind::Regex(re),
                    case_sensitive,
                },
                Err(_) => PatternMatcher {
                    kind: PatternKind::Literal(literal_needle()),
                    case_sensitive,
                },
            }
        } else {
            PatternMatcher {
                kind: PatternKind::Literal(literal_needle()),
                case_sensitive,
            }
        }
    }

    /// Test whether a single line matches.
    fn line_matches(&self, line: &str) -> bool {
        match &self.kind {
            // `is_match` is `Result<bool>` under fancy-regex (the backtracking
            // engine can fail with `backtrack_limit_exceeded` on adversarial
            // patterns). Treat any such failure as "no match" so a single
            // pathological line cannot abort the whole scan.
            PatternKind::Regex(re) => re.is_match(line).unwrap_or(false),
            PatternKind::Literal(needle) => {
                if self.case_sensitive {
                    line.contains(needle.as_str())
                } else {
                    line.to_lowercase().contains(needle)
                }
            }
        }
    }

    /// Find matching line numbers within a code unit's content.
    /// Returns 1-indexed line numbers where matches were found.
    fn find_matches_in_unit(&self, unit: &colgrep::CodeUnit) -> Vec<usize> {
        let matches: Vec<usize> = unit
            .code
            .lines()
            .enumerate()
            .filter_map(|(i, line)| {
                if self.line_matches(line) {
                    Some(unit.line + i)
                } else {
                    None
                }
            })
            .collect();

        if matches.is_empty() {
            if let PatternKind::Regex(_) = &self.kind {
                return self.literal_fallback(unit);
            }
        }
        matches
    }

    /// Literal fallback for when regex mode produces no matches.
    fn literal_fallback(&self, unit: &colgrep::CodeUnit) -> Vec<usize> {
        let pattern_str = match &self.kind {
            PatternKind::Regex(re) => re.as_str().to_string(),
            PatternKind::Literal(p) => p.clone(),
        };
        let needle = if self.case_sensitive {
            pattern_str
        } else {
            pattern_str.to_lowercase()
        };

        unit.code
            .lines()
            .enumerate()
            .filter_map(|(i, line)| {
                let haystack = if self.case_sensitive {
                    line.to_string()
                } else {
                    line.to_lowercase()
                };
                if haystack.contains(&needle) {
                    Some(unit.line + i)
                } else {
                    None
                }
            })
            .collect()
    }

    /// Scan a file from disk for matching lines. Used by compact regex mode
    /// so we don't miss matches in chunks that were merged into a leader
    /// by `collapse_by_file` (the leader's `unit.code` only carries one
    /// chunk's text). Returns 1-indexed `(line_number, line_content)`
    /// pairs in source order.
    fn find_matches_in_file(&self, file: &Path) -> Vec<(usize, String)> {
        let content = match std::fs::read_to_string(file) {
            Ok(c) => c,
            Err(_) => return Vec::new(),
        };

        let matches: Vec<(usize, String)> = content
            .lines()
            .enumerate()
            .filter_map(|(i, line)| {
                if self.line_matches(line) {
                    Some((i + 1, line.to_string()))
                } else {
                    None
                }
            })
            .collect();

        if !matches.is_empty() {
            return matches;
        }

        // Literal fallback when the regex (often regex metacharacters
        // a user meant literally) finds nothing.
        if let PatternKind::Regex(re) = &self.kind {
            let needle_raw = re.as_str();
            let needle = if self.case_sensitive {
                needle_raw.to_string()
            } else {
                needle_raw.to_lowercase()
            };
            return content
                .lines()
                .enumerate()
                .filter_map(|(i, line)| {
                    let haystack = if self.case_sensitive {
                        line.to_string()
                    } else {
                        line.to_lowercase()
                    };
                    if haystack.contains(&needle) {
                        Some((i + 1, line.to_string()))
                    } else {
                        None
                    }
                })
                .collect();
        }

        matches
    }
}

/// Strip regex special characters from a pattern for use in semantic queries.
///
/// When combining a regex pattern with a semantic query, the regex metacharacters
/// (like `\s`, `\w`, `+`, `*`, etc.) have no semantic meaning and could negatively
/// affect the embedding quality. This function extracts only the meaningful text
/// content from a regex pattern.
///
/// Examples:
/// - `fn\s+\w+` -> `fn`
/// - `async\s+fn` -> `async fn`
/// - `Result<.*>` -> `Result`
/// - `foo|bar` -> `foo bar`
fn strip_regex_for_semantic(pattern: &str) -> String {
    let mut result = String::with_capacity(pattern.len());
    let mut chars = pattern.chars().peekable();

    while let Some(c) = chars.next() {
        match c {
            // Handle backslash escapes
            '\\' => {
                if let Some(&next) = chars.peek() {
                    match next {
                        // Common regex character classes - skip both backslash and class char
                        's' | 'S' | 'w' | 'W' | 'd' | 'D' | 'b' | 'B' | 'n' | 'r' | 't' => {
                            chars.next();
                            // Add a space to separate tokens (e.g., "fn\s+bar" -> "fn bar")
                            if !result.ends_with(' ') && !result.is_empty() {
                                result.push(' ');
                            }
                        }
                        // Escaped literal characters - keep the literal
                        '.' | '*' | '+' | '?' | '[' | ']' | '(' | ')' | '{' | '}' | '^' | '$'
                        | '|' | '\\' => {
                            chars.next();
                            result.push(next);
                        }
                        // Other escapes - skip the backslash, keep the char
                        _ => {
                            chars.next();
                            result.push(next);
                        }
                    }
                }
            }
            // Quantifiers and metacharacters - skip them
            '*' | '+' | '?' => {}
            // Anchors - skip them
            '^' | '$' => {}
            // Character class - skip entire [...] block
            #[allow(clippy::while_let_on_iterator)]
            '[' => {
                // Skip until we find the closing ]
                // Note: using while let because we need to call chars.next() inside for escaped chars
                let mut depth = 1;
                while let Some(inner) = chars.next() {
                    if inner == '\\' {
                        // Skip escaped char inside character class
                        chars.next();
                    } else if inner == '[' {
                        depth += 1;
                    } else if inner == ']' {
                        depth -= 1;
                        if depth == 0 {
                            break;
                        }
                    }
                }
            }
            // Grouping - skip the parens but process contents
            '(' | ')' => {}
            // Alternation - convert to space
            '|' => {
                if !result.ends_with(' ') && !result.is_empty() {
                    result.push(' ');
                }
            }
            // Quantifier ranges {n,m} - skip entire block
            '{' => {
                for inner in chars.by_ref() {
                    if inner == '}' {
                        break;
                    }
                }
            }
            // Dot (any char) - skip
            '.' => {}
            // Regular characters - keep them
            _ => {
                result.push(c);
            }
        }
    }

    // Clean up multiple spaces and trim
    let cleaned: String = result.split_whitespace().collect::<Vec<_>>().join(" ");

    cleaned
}

/// Merge a semantic query with a sanitized regex pattern, removing duplicate tokens.
///
/// This prevents redundant tokens in the final query when the regex pattern
/// contains words that are already in the semantic query.
///
/// Example:
/// - query: "async function", pattern: "async fn" -> "async function fn"
/// - query: "error handling", pattern: "error" -> "error handling"
fn merge_query_with_pattern(query: &str, sanitized_pattern: &str) -> String {
    if sanitized_pattern.is_empty() {
        return query.to_string();
    }

    // Collect query tokens (lowercase for comparison)
    let query_tokens: std::collections::HashSet<String> =
        query.split_whitespace().map(|s| s.to_lowercase()).collect();

    // Filter pattern tokens to only include those not already in the query
    let new_tokens: Vec<&str> = sanitized_pattern
        .split_whitespace()
        .filter(|token| !query_tokens.contains(&token.to_lowercase()))
        .collect();

    if new_tokens.is_empty() {
        query.to_string()
    } else {
        format!("{} {}", query, new_tokens.join(" "))
    }
}

/// Resolve the model to use: CLI arg > saved config > default
pub fn resolve_model(cli_model: Option<&str>) -> String {
    if let Some(model) = cli_model {
        return model.to_string();
    }

    // Try to load from config
    if let Ok(config) = Config::load() {
        if let Some(model) = config.get_default_model() {
            return model.to_string();
        }
    }

    // Fall back to default
    DEFAULT_MODEL.to_string()
}

/// Resolve top_k: CLI arg > saved config > default
pub fn resolve_top_k(cli_k: Option<usize>, default: usize) -> usize {
    if let Some(k) = cli_k {
        return k;
    }

    // Try to load from config
    if let Ok(config) = Config::load() {
        if let Some(k) = config.get_default_k() {
            return k;
        }
    }

    default
}

/// Resolve context_lines (n): CLI arg > saved config > default
pub fn resolve_context_lines(cli_n: Option<usize>, default: usize) -> usize {
    if let Some(n) = cli_n {
        return n;
    }

    // Try to load from config
    if let Ok(config) = Config::load() {
        if let Some(n) = config.get_default_n() {
            return n;
        }
    }

    default
}

/// Resolve relative_paths: saved config > default (true = relative paths)
pub fn resolve_relative_paths() -> bool {
    if let Ok(config) = Config::load() {
        return config.use_relative_paths();
    }
    true
}

/// Format a path for display, using relative or absolute based on config.
fn display_path(path: &Path, use_relative: bool) -> String {
    let current_dir = std::env::current_dir().ok();
    display_path_with_cwd(path, current_dir.as_deref(), use_relative)
}

fn display_path_with_cwd(path: &Path, cwd: Option<&Path>, use_relative: bool) -> String {
    let normalized_path = normalize_windows_path(path);

    if use_relative {
        if let Some(cwd) = cwd {
            let normalized_cwd = normalize_windows_path(cwd);
            return normalized_path
                .strip_prefix(&normalized_cwd)
                .unwrap_or(&normalized_path)
                .display()
                .to_string();
        }
    }

    normalized_path.display().to_string()
}

fn is_external_project_path(search_path: &Path, cwd: Option<&Path>) -> bool {
    let Some(cwd) = cwd else {
        return false;
    };

    let normalized_search_path = normalize_windows_path(search_path);
    let normalized_cwd = normalize_windows_path(cwd);
    !normalized_search_path.starts_with(&normalized_cwd)
}

fn normalize_windows_path(path: &Path) -> PathBuf {
    #[cfg(windows)]
    {
        use std::path::{Component, Prefix};

        let mut components = path.components();
        match components.next() {
            Some(Component::Prefix(prefix_component)) => match prefix_component.kind() {
                Prefix::VerbatimDisk(drive) => {
                    let mut normalized = PathBuf::from(format!("{}:\\", char::from(drive)));
                    normalized.push(components.as_path());
                    normalized
                }
                Prefix::VerbatimUNC(server, share) => {
                    let mut normalized = PathBuf::from(format!(
                        r"\\{}\{}",
                        server.to_string_lossy(),
                        share.to_string_lossy()
                    ));
                    normalized.push(components.as_path());
                    normalized
                }
                _ => path.to_path_buf(),
            },
            _ => path.to_path_buf(),
        }
    }
    #[cfg(not(windows))]
    {
        path.to_path_buf()
    }
}

/// Resolve verbose: saved config > default (false)
pub fn resolve_verbose() -> bool {
    if let Ok(config) = Config::load() {
        return config.is_verbose();
    }
    false
}

/// Resolve pool_factor: --no-pool > --pool-factor > config > default (2)
pub fn resolve_pool_factor(cli_pool_factor: Option<usize>, no_pool: bool) -> Option<usize> {
    if no_pool {
        return Some(1); // Disable pooling
    }

    if let Some(factor) = cli_pool_factor {
        return Some(factor.max(1)); // Minimum is 1
    }

    // Try to load from config
    if let Ok(config) = Config::load() {
        return Some(config.get_pool_factor());
    }

    // Default pool factor
    Some(colgrep::DEFAULT_POOL_FACTOR)
}

#[allow(clippy::too_many_arguments)]
pub fn cmd_search(
    query: &str,
    paths: &[PathBuf],
    top_k: usize,
    top_k_explicit: bool,
    cli_model: Option<&str>,
    json: bool,
    include_patterns: &[String],
    files_only: bool,
    show_content: bool,
    cli_context_lines: Option<usize>,
    text_pattern: Option<&str>,
    extended_regexp: bool,
    fixed_strings: bool,
    word_regexp: bool,
    case_sensitive: bool,
    exclude_patterns: &[String],
    exclude_dirs: &[String],
    code_only: bool,
    no_fts: bool,
    alpha: Option<f32>,
    pool_factor: Option<usize>,
    auto_confirm: bool,
    static_batch: bool,
) -> Result<()> {
    // Resolve context_lines: CLI > config > default (20)
    let context_lines = resolve_context_lines(cli_context_lines, 20);
    // Resolve relative paths: config > default (false = absolute)
    let use_relative = resolve_relative_paths();

    // When -e is used and the user didn't explicitly pass -k, return *all*
    // matching lines (parity with grep, which has no implicit cap). We keep
    // a finite ceiling so the upstream `top_k * 4` / `top_k * 20` multipliers
    // can't overflow; `usize::MAX / 1024` is still ~1.8e16, effectively
    // unbounded for any real index.
    let regex_unbounded = text_pattern.is_some() && !top_k_explicit;
    let effective_top_k = if regex_unbounded {
        usize::MAX / 1024
    } else {
        top_k
    };

    // Collect results from all paths
    let mut all_results: Vec<colgrep::SearchResult> = Vec::new();
    let mut path_errors: Vec<String> = Vec::new();

    for path in paths {
        match search_single_path(
            query,
            path,
            effective_top_k,
            cli_model,
            json,
            include_patterns,
            files_only,
            text_pattern,
            extended_regexp,
            fixed_strings,
            word_regexp,
            case_sensitive,
            exclude_patterns,
            exclude_dirs,
            code_only,
            no_fts,
            alpha,
            pool_factor,
            auto_confirm,
            static_batch,
        ) {
            Ok(results) => all_results.extend(results),
            Err(e) => {
                let err_msg = format!("{}", e);
                // Check if this is a "path does not exist" error
                if err_msg.contains("Path does not exist:") {
                    // Store error message for later display, continue with other paths
                    path_errors.push(err_msg);
                } else {
                    // For other errors, fail immediately
                    return Err(e);
                }
            }
        }
    }

    // If ALL paths failed, return error with all messages
    if all_results.is_empty() && !path_errors.is_empty() {
        anyhow::bail!("{}", path_errors.join("\n\n"));
    }

    // Print warnings for failed paths (but we have some results)
    if !path_errors.is_empty() && !json && !files_only {
        for err in &path_errors {
            eprintln!("⚠️  {}\n", err);
        }
    }

    // Sort all results by score and take top_k
    all_results.sort_by(|a, b| {
        b.score
            .partial_cmp(&a.score)
            .unwrap_or(std::cmp::Ordering::Equal)
    });

    // Filter out text/config files if --code-only is enabled
    let filtered_results: Vec<_> = if code_only {
        all_results
            .into_iter()
            .filter(|r| !is_text_format(r.unit.language))
            .collect()
    } else {
        all_results
    };
    let results: Vec<_> = filtered_results.into_iter().take(effective_top_k).collect();

    // When -e is used without -F, automatically enable regex mode (ERE)
    let effective_extended_regexp = extended_regexp || (text_pattern.is_some() && !fixed_strings);

    // Output
    if files_only {
        // -l mode: show only unique filenames
        let mut seen_files = std::collections::HashSet::new();
        for result in &results {
            let file_str = display_path(&result.unit.file, use_relative);
            if seen_files.insert(file_str.clone()) {
                println!("{}", file_str);
            }
        }
    } else if json {
        println!("{}", serde_json::to_string_pretty(&results)?);
    } else {
        if results.is_empty() {
            println!("No results found for: {}", query);
            return Ok(());
        }

        // Resolve verbose mode from config, but force verbose if -c or -n > 0 is used
        let verbose = if show_content || cli_context_lines.is_some_and(|n| n > 0) {
            true // Force verbose when user explicitly requests content display
        } else {
            resolve_verbose()
        };

        // Maximum characters of matching line content to show in compact mode
        const COMPACT_LINE_MAX_CHARS: usize = 120;

        if !verbose {
            let compact_matcher = text_pattern.map(|p| {
                PatternMatcher::new(
                    p,
                    effective_extended_regexp,
                    fixed_strings,
                    word_regexp,
                    case_sensitive,
                )
            });

            if let Some(ref matcher) = compact_matcher {
                // Regex mode: emit *every* matching line in each result file,
                // not just the lines that happen to fall inside the leader
                // chunk's `unit.code`. `collapse_by_file` merges multiple
                // matching chunks of the same file into one leader and only
                // keeps the leader's text, so a per-`unit.code` scan silently
                // drops matches from the merged-in chunks (a `def foo` line
                // at line 150 plus three comment hits at line 800+ would
                // collapse to just one of those, depending on which chunk
                // led the file).
                //
                // Scanning the file from disk keeps the line-count consistent
                // with `grep` while preserving colgrep's score-based file
                // ordering: results are already sorted by score, so the first
                // occurrence of each file wins.
                use std::collections::HashSet;

                let mut seen: HashSet<PathBuf> = HashSet::new();
                let mut per_file: Vec<(&colgrep::SearchResult, Vec<(usize, String)>)> = Vec::new();

                for result in &results {
                    if !seen.insert(result.unit.file.clone()) {
                        continue;
                    }
                    let file_matches = matcher.find_matches_in_file(&result.unit.file);
                    if !file_matches.is_empty() {
                        per_file.push((result, file_matches));
                    }
                }

                // `-k` caps the number of *documents* (files), not lines.
                // Every match in the kept documents is emitted.
                for (result, matching_lines) in &per_file {
                    let file_path = display_path(&result.unit.file, use_relative);
                    for (line_num, raw_line) in matching_lines {
                        let trimmed = raw_line.trim();
                        let truncated: String =
                            trimmed.chars().take(COMPACT_LINE_MAX_CHARS).collect();
                        let suffix = if trimmed.chars().count() > COMPACT_LINE_MAX_CHARS {
                            "..."
                        } else {
                            ""
                        };
                        println!("{}:{}:{}{}", file_path, line_num, truncated, suffix);
                    }
                }
                if !regex_unbounded {
                    eprintln!(
                        "\nShowing matches from top {} document(s); omit -k to see every match.",
                        top_k
                    );
                }
            } else {
                // Semantic-only mode: show filepath:start-end ordered by score
                for result in &results {
                    let file_path = display_path(&result.unit.file, use_relative);
                    println!(
                        "{}:{}-{}",
                        file_path, result.unit.line, result.unit.end_line
                    );
                }
            }
        } else {
            // Verbose mode: full content grouped by file with syntax highlighting

            // Pre-compile pattern matchers ONCE before the display loop.
            // This avoids expensive regex compilation on every result.
            let text_pattern_matcher = text_pattern.map(|p| {
                PatternMatcher::new(
                    p,
                    effective_extended_regexp,
                    fixed_strings,
                    word_regexp,
                    case_sensitive,
                )
            });

            // For query matching (when no -e pattern), use literal matching
            let query_matcher = PatternMatcher::new(query, false, true, false, case_sensitive);

            // Separate results into code files and documents/config files
            let (code_results, doc_results): (Vec<_>, Vec<_>) = results
                .iter()
                .partition(|r| !is_text_format(r.unit.language));

            let half_context = context_lines / 2;
            let has_text_pattern = text_pattern.is_some();

            // Calculate max line number across all results for consistent alignment
            let max_line_num = results.iter().map(|r| r.unit.end_line).max().unwrap_or(1);
            let line_num_width = max_line_num.to_string().len().max(4);

            // Display code results first, grouped by file
            if !code_results.is_empty() {
                let grouped = group_results_by_file(&code_results);
                for (file, file_results) in grouped {
                    // Print file header with relative path
                    println!("file: {}", display_path(&file, use_relative).cyan());
                    for result in file_results {
                        let file_to_read = &result.unit.file;
                        if let Ok(content) = std::fs::read_to_string(file_to_read) {
                            let lines: Vec<&str> = content.lines().collect();
                            let end = result.unit.end_line.min(lines.len());
                            let max_lines = if show_content {
                                usize::MAX
                            } else {
                                context_lines
                            };

                            if has_text_pattern {
                                let file_matches = text_pattern_matcher
                                    .as_ref()
                                    .unwrap()
                                    .find_matches_in_unit(&result.unit);
                                let ranges = calc_display_ranges(
                                    &file_matches,
                                    result.unit.line,
                                    end,
                                    half_context,
                                    max_lines,
                                    true,
                                );
                                print_highlighted_ranges(
                                    file_to_read,
                                    &lines,
                                    &ranges,
                                    end,
                                    line_num_width,
                                );
                            } else {
                                let query_matches =
                                    query_matcher.find_matches_in_unit(&result.unit);
                                if !query_matches.is_empty() {
                                    let ranges = calc_display_ranges(
                                        &query_matches,
                                        result.unit.line,
                                        end,
                                        half_context,
                                        max_lines,
                                        true,
                                    );
                                    print_highlighted_ranges(
                                        file_to_read,
                                        &lines,
                                        &ranges,
                                        end,
                                        line_num_width,
                                    );
                                } else {
                                    // No exact match - find most representative line(s) based on token overlap
                                    let representative_lines = find_representative_lines(
                                        &result.unit.code,
                                        result.unit.line,
                                        query,
                                    );
                                    if !representative_lines.is_empty() {
                                        let ranges = calc_display_ranges(
                                            &representative_lines,
                                            result.unit.line,
                                            end,
                                            half_context,
                                            max_lines,
                                            true,
                                        );
                                        print_highlighted_ranges(
                                            file_to_read,
                                            &lines,
                                            &ranges,
                                            end,
                                            line_num_width,
                                        );
                                    } else {
                                        // Final fallback: show from beginning
                                        let start = result.unit.line.saturating_sub(1);
                                        if start < lines.len() {
                                            print_highlighted_content(
                                                file_to_read,
                                                &lines,
                                                start,
                                                max_lines,
                                                end,
                                                line_num_width,
                                            );
                                        }
                                    }
                                }
                            }
                        }
                    }
                    println!();
                }
            }

            // Display document/config results after, grouped by file
            if !doc_results.is_empty() {
                let grouped = group_results_by_file(&doc_results);
                for (file, file_results) in grouped {
                    println!("file: {}", display_path(&file, use_relative).cyan());
                    for result in file_results {
                        let file_to_read = &result.unit.file;
                        if let Ok(content) = std::fs::read_to_string(file_to_read) {
                            let lines: Vec<&str> = content.lines().collect();
                            let end = result.unit.end_line.min(lines.len());
                            let max_lines = if show_content { 250 } else { context_lines };

                            if has_text_pattern {
                                let file_matches = text_pattern_matcher
                                    .as_ref()
                                    .unwrap()
                                    .find_matches_in_unit(&result.unit);
                                let ranges = calc_display_ranges(
                                    &file_matches,
                                    result.unit.line,
                                    end,
                                    half_context,
                                    max_lines,
                                    true,
                                );
                                print_highlighted_ranges(
                                    file_to_read,
                                    &lines,
                                    &ranges,
                                    end,
                                    line_num_width,
                                );
                            } else {
                                let query_matches =
                                    query_matcher.find_matches_in_unit(&result.unit);
                                if !query_matches.is_empty() {
                                    let ranges = calc_display_ranges(
                                        &query_matches,
                                        result.unit.line,
                                        end,
                                        half_context,
                                        max_lines,
                                        true,
                                    );
                                    print_highlighted_ranges(
                                        file_to_read,
                                        &lines,
                                        &ranges,
                                        end,
                                        line_num_width,
                                    );
                                } else {
                                    // No exact match - find most representative line(s) based on token overlap
                                    let representative_lines = find_representative_lines(
                                        &result.unit.code,
                                        result.unit.line,
                                        query,
                                    );
                                    if !representative_lines.is_empty() {
                                        let ranges = calc_display_ranges(
                                            &representative_lines,
                                            result.unit.line,
                                            end,
                                            half_context,
                                            max_lines,
                                            true,
                                        );
                                        print_highlighted_ranges(
                                            file_to_read,
                                            &lines,
                                            &ranges,
                                            end,
                                            line_num_width,
                                        );
                                    } else {
                                        // Final fallback: show from beginning
                                        let start = result.unit.line.saturating_sub(1);
                                        if start < lines.len() {
                                            print_highlighted_content(
                                                file_to_read,
                                                &lines,
                                                start,
                                                max_lines,
                                                end,
                                                line_num_width,
                                            );
                                        }
                                    }
                                }
                            }
                        }
                    }
                    println!();
                }
            }
        }
    }

    Ok(())
}

/// Find the lowest existing parent directory and list its contents
fn find_existing_parent_and_list(path: &Path) -> String {
    let mut current = path.to_path_buf();

    // Walk up the path to find the first existing directory
    while !current.exists() {
        if let Some(parent) = current.parent() {
            current = parent.to_path_buf();
        } else {
            break;
        }
    }

    // If we found an existing directory, list its contents
    if current.exists() && current.is_dir() {
        let mut entries: Vec<String> = Vec::new();
        if let Ok(dir_entries) = std::fs::read_dir(&current) {
            for entry in dir_entries.take(30).flatten() {
                let name = entry.file_name().to_string_lossy().to_string();
                let is_dir = entry.file_type().map(|t| t.is_dir()).unwrap_or(false);
                if is_dir {
                    entries.push(format!("  {}/", name));
                } else {
                    entries.push(format!("  {}", name));
                }
            }
        }
        entries.sort();

        let suffix = if entries.len() >= 30 {
            "\n  ... (truncated)"
        } else {
            ""
        };

        format!(
            "Closest existing directory: {}\nContents:\n{}{}",
            current.display(),
            entries.join("\n"),
            suffix
        )
    } else {
        "Could not find any existing parent directory.".to_string()
    }
}

/// Search a single path and return results with absolute file paths
#[allow(clippy::too_many_arguments)]
fn search_single_path(
    query: &str,
    path: &PathBuf,
    top_k: usize,
    cli_model: Option<&str>,
    json: bool,
    include_patterns: &[String],
    files_only: bool,
    text_pattern: Option<&str>,
    extended_regexp: bool,
    fixed_strings: bool,
    word_regexp: bool,
    case_sensitive: bool,
    exclude_patterns: &[String],
    exclude_dirs: &[String],
    code_only: bool,
    no_fts: bool,
    alpha: Option<f32>,
    pool_factor: Option<usize>,
    auto_confirm: bool,
    static_batch: bool,
) -> Result<Vec<colgrep::SearchResult>> {
    let path = match std::fs::canonicalize(path) {
        Ok(p) => p,
        Err(_) => {
            let help = find_existing_parent_and_list(path);
            anyhow::bail!("Path does not exist: {}\n\n{}", path.display(), help);
        }
    };

    // Check if path is a file (not a directory)
    // If so, we'll use the parent directory for indexing and filter to this specific file
    let (search_path, specific_file): (PathBuf, Option<PathBuf>) = if path.is_file() {
        let parent = path
            .parent()
            .ok_or_else(|| anyhow::anyhow!("File has no parent directory: {}", path.display()))?
            .to_path_buf();
        (parent, Some(path.clone()))
    } else {
        (path.clone(), None)
    };

    // When -e is used without -F, automatically enable regex mode (ERE)
    // This makes -e imply -E by default, with -F as the opt-out
    let effective_extended_regexp = extended_regexp || (text_pattern.is_some() && !fixed_strings);

    // Resolve model: CLI > config > default
    let model = resolve_model(cli_model);

    // Load config for settings
    let config = Config::load().unwrap_or_default();

    // Resolve quantized setting from config (default: false = use FP32)
    let quantized = !config.use_fp32();

    let parallel_sessions = config.configured_parallel_sessions();
    let batch_size = config.configured_batch_size();

    // Check if index already exists for this model (suppress model output if so)
    let has_existing_index =
        index_exists(&search_path, &model) || find_parent_index(&search_path, &model)?.is_some();

    // Ensure model is downloaded (quiet if we already have an index)
    let model_path = ensure_model(Some(&model), has_existing_index)?;

    // Check for parent index (scoped to current model) unless the resolved path
    // is outside the current directory (external project)
    let parent_info = {
        let current_dir = std::env::current_dir().ok();
        let is_external_project = is_external_project_path(&search_path, current_dir.as_deref());

        if is_external_project {
            None
        } else {
            find_parent_index(&search_path, &model)?
        }
    };

    // Determine effective project root and subdirectory filter
    let (effective_root, subdir_filter): (PathBuf, Option<PathBuf>) = match &parent_info {
        Some(info) => (
            info.project_path.clone(),
            Some(info.relative_subdir.clone()),
        ),
        None => (search_path.clone(), None),
    };

    // Check if --include patterns would escape the subdirectory
    // If so, search the full project (still within the same index - effective_root)
    // This does NOT escape to a different or parent index, it only removes the subdir restriction
    let subdir_filter = if let Some(ref subdir) = subdir_filter {
        if should_search_from_root(include_patterns, subdir, &effective_root) {
            if !json && !files_only {
                eprintln!("📂 Pattern escapes subdirectory, searching full project");
            }
            None // Skip subdir filter, search full index (still bounded by effective_root)
        } else {
            Some(subdir.clone())
        }
    } else {
        None
    };

    // Get files matching include patterns (for file-type filtering)
    // BUG FIX: Don't scan filesystem for --include patterns.
    // The filesystem scan finds files that aren't in the index, causing
    // filter_by_files() to return empty results. Instead, let the code
    // fall through to filter_by_file_patterns() which queries the index directly.
    let include_files: Option<Vec<String>> = None;

    // Auto-index: try incremental update without blocking on the lock.
    // If another process is indexing, skip the update and search the existing index.
    let mut index_locked = false;
    {
        let mut builder = IndexBuilder::with_options(
            &effective_root,
            &model,
            &model_path,
            quantized,
            pool_factor,
            parallel_sessions,
            batch_size,
        )?;
        builder.set_auto_confirm(auto_confirm);
        builder.set_dynamic_batch(!static_batch);

        // Try non-blocking index update
        match builder.try_index(None, false) {
            Ok(Some(stats)) => {
                let changes = stats.added + stats.changed + stats.deleted;
                if changes > 0 && !json && !files_only {
                    if let Some(ref info) = parent_info {
                        eprintln!(
                            "📂 Using index: {} (subdir: {}): indexed {} files\n",
                            display_path(&info.project_path, false),
                            info.relative_subdir.display(),
                            changes
                        );
                    } else {
                        eprintln!(
                            "📂 Using index: {}: indexed {} files\n",
                            display_path(&effective_root, false),
                            changes
                        );
                    }
                }
            }
            Ok(None) => {
                // Lock held by another process — search existing index
                index_locked = true;
                if !json && !files_only {
                    eprintln!(
                        "📂 Index is being updated by another process, searching existing index..."
                    );
                }
            }
            Err(e) => {
                let err_str = format!("{}", e);
                let err_debug = format!("{:?}", e);
                if err_str.contains("Indexing cancelled by user") {
                    return Err(e);
                }
                if err_str.contains("No data to merge")
                    || err_debug.contains("No data to merge")
                    || err_str.contains("Index load failed")
                {
                    // Index is corrupted - clear and rebuild
                    if !json && !files_only {
                        eprintln!("⚠️  Index corrupted, rebuilding...");
                    }

                    let index_dir = get_index_dir_for_project(&effective_root, &model)?;
                    if index_dir.exists() {
                        let _lock = acquire_index_lock(&index_dir)?;
                        std::fs::remove_dir_all(&index_dir)?;
                    }

                    let mut new_builder = IndexBuilder::with_options(
                        &effective_root,
                        &model,
                        &model_path,
                        quantized,
                        pool_factor,
                        parallel_sessions,
                        batch_size,
                    )?;
                    new_builder.set_auto_confirm(auto_confirm);
                    new_builder.set_dynamic_batch(!static_batch);
                    new_builder.index(None, false)?;
                } else {
                    return Err(e);
                }
            }
        }
    }

    // Verify index exists (at least partially)
    let index_dir = get_index_dir_for_project(&effective_root, &model)?;
    let vector_index_path = get_vector_index_path(&index_dir);
    if !vector_index_path.join("metadata.json").exists() {
        if index_locked {
            // Index is being created for the first time by another process — nothing to search yet
            anyhow::bail!("colgrep index is currently being built, rely on grep for now.");
        }
        // Check if the path contains an ignored directory pattern
        if let Some(ignored_pattern) = path_contains_ignored_dir(&effective_root) {
            anyhow::bail!(
                "No files indexed. The path contains '{}' which is in the default ignore list.\n\
                 Ignored directories: tmp, temp, vendor, node_modules, target, build, dist, .git, etc.\n\
                 Try searching from a different directory or project root.",
                ignored_pattern
            );
        }
        anyhow::bail!("No index found. Index building may have failed (no indexable files found).");
    }

    // Load searcher (from parent index if applicable)
    // If loading fails while another process holds the lock, retry a few times in case
    // the failure is due to a transient mid-write state.
    // If loading fails without a concurrent updater, clear and rebuild the index.
    let load_searcher = || -> Result<Searcher> {
        match &parent_info {
            Some(info) => Searcher::load_from_index_dir_with_quantized(
                &info.index_dir,
                &model_path,
                quantized,
            ),
            None => Searcher::load_with_quantized(&effective_root, &model, &model_path, quantized),
        }
    };

    let searcher = match load_searcher() {
        Ok(s) => s,
        Err(e) if index_locked => {
            // Another process is updating the index — the load failure is likely
            // due to a transient mid-write state. Retry a few times with short delays
            // rather than blocking on the lock (the updater may run for minutes).
            if !json && !files_only {
                eprintln!("⏳ Index load failed during update, retrying...");
            }
            const MAX_RETRIES: u32 = 3;
            const RETRY_DELAY: std::time::Duration = std::time::Duration::from_millis(500);
            let mut last_err = e;
            let mut loaded = None;
            for _ in 0..MAX_RETRIES {
                std::thread::sleep(RETRY_DELAY);
                match load_searcher() {
                    Ok(s) => {
                        loaded = Some(s);
                        break;
                    }
                    Err(e) => last_err = e,
                }
            }
            match loaded {
                Some(s) => s,
                None => {
                    return Err(last_err).with_context(|| {
                        "⏳ Index load failed while another process is updating. \
                         Rely on grep until the update completes."
                    });
                }
            }
        }
        Err(e)
            if {
                let err_debug = format!("{:?}", e);
                let err_display = format!("{}", e);
                err_debug.contains("No data to merge")
                    || err_display.contains("No data to merge")
                    || err_debug.contains("IndexLoad")
                    || err_display.contains("Index load failed")
            } =>
        {
            // Index is corrupted or empty (no concurrent updater) - clear and rebuild
            if !json && !files_only {
                eprintln!("⚠️  Index corrupted, rebuilding...");
            }

            let target_index_dir = match &parent_info {
                Some(info) => &info.index_dir,
                None => &index_dir,
            };
            if target_index_dir.exists() {
                let _lock = acquire_index_lock(target_index_dir)?;
                std::fs::remove_dir_all(target_index_dir)?;
            }

            let mut builder = IndexBuilder::with_options(
                &effective_root,
                &model,
                &model_path,
                quantized,
                pool_factor,
                parallel_sessions,
                batch_size,
            )?;
            builder.set_auto_confirm(auto_confirm);
            builder.set_dynamic_batch(!static_batch);
            builder.index(None, false)?;

            load_searcher()?
        }
        Err(e) => return Err(e),
    };

    // Build subset combining subdirectory filter, text pattern filter, and include patterns
    let subset = {
        let mut combined_ids: Option<Vec<i64>> = None;

        // Apply subdirectory filter first if using parent index
        if let Some(ref subdir) = subdir_filter {
            let subdir_ids = searcher.filter_by_path_prefix(subdir)?;
            if subdir_ids.is_empty() {
                if !json && !files_only {
                    eprintln!(
                        "No indexed code units in subdirectory: {}",
                        subdir.display()
                    );
                }
                return Ok(vec![]);
            }
            combined_ids = Some(subdir_ids);
        }

        // Apply text pattern filter: search indexed code directly (much faster than grep)
        if let Some(pattern) = text_pattern {
            // Use regex-based filtering with full grep flag support:
            // -e now implies ERE by default (no need for -E flag)
            // -F (fixed_strings): literal string matching, disables regex mode
            // -w (word_regexp): whole word matching with \b boundaries
            let pattern_ids = searcher.filter_by_text_pattern_with_options(
                pattern,
                effective_extended_regexp,
                fixed_strings,
                word_regexp,
                case_sensitive,
            )?;

            if pattern_ids.is_empty() {
                if !json && !files_only {
                    eprintln!("No indexed code units contain pattern: {}", pattern);
                }
                return Ok(vec![]);
            }

            combined_ids = match combined_ids {
                Some(existing) => {
                    let existing_set: std::collections::HashSet<_> = existing.into_iter().collect();
                    Some(
                        pattern_ids
                            .into_iter()
                            .filter(|id| existing_set.contains(id))
                            .collect(),
                    )
                }
                None => Some(pattern_ids),
            };
        }

        // Apply include pattern filter (file type filtering)
        // Only use filesystem-scanned files if non-empty; otherwise fall back to index-based pattern matching
        if let Some(files) = include_files.as_ref().filter(|f| !f.is_empty()) {
            let file_ids = searcher.filter_by_files(files)?;
            combined_ids = match combined_ids {
                Some(existing) => {
                    let existing_set: std::collections::HashSet<_> = existing.into_iter().collect();
                    Some(
                        file_ids
                            .into_iter()
                            .filter(|id| existing_set.contains(id))
                            .collect(),
                    )
                }
                None => Some(file_ids),
            };
        } else if !include_patterns.is_empty() {
            let pattern_ids = searcher.filter_by_file_patterns(include_patterns)?;
            combined_ids = match combined_ids {
                Some(existing) => {
                    let existing_set: std::collections::HashSet<_> = existing.into_iter().collect();
                    Some(
                        pattern_ids
                            .into_iter()
                            .filter(|id| existing_set.contains(id))
                            .collect(),
                    )
                }
                None => Some(pattern_ids),
            };
        }

        // Apply specific file filter (when user passes a file path instead of directory)
        if let Some(ref file_path) = specific_file {
            // Convert absolute file path to relative path (relative to effective_root)
            let rel_path = file_path
                .strip_prefix(&effective_root)
                .unwrap_or(file_path)
                .to_string_lossy()
                .to_string();
            let file_ids = searcher.filter_by_files(std::slice::from_ref(&rel_path))?;
            if file_ids.is_empty() {
                if !json && !files_only {
                    eprintln!("No indexed code units in file: {}", file_path.display());
                }
                return Ok(vec![]);
            }
            combined_ids = match combined_ids {
                Some(existing) => {
                    let existing_set: std::collections::HashSet<_> = existing.into_iter().collect();
                    Some(
                        file_ids
                            .into_iter()
                            .filter(|id| existing_set.contains(id))
                            .collect(),
                    )
                }
                None => Some(file_ids),
            };
        }

        // Apply exclude pattern filter (SQL-based: returns IDs that DON'T match patterns)
        if !exclude_patterns.is_empty() {
            let included_ids = searcher.filter_exclude_by_patterns(exclude_patterns)?;
            let included_set: std::collections::HashSet<_> = included_ids.into_iter().collect();
            combined_ids = match combined_ids {
                Some(existing) => Some(
                    existing
                        .into_iter()
                        .filter(|id| included_set.contains(id))
                        .collect(),
                ),
                None => Some(included_set.into_iter().collect()),
            };
        }

        // Apply exclude-dir filter (SQL-based: returns IDs NOT in excluded directories)
        if !exclude_dirs.is_empty() {
            let included_ids = searcher.filter_exclude_by_dirs(exclude_dirs)?;
            let included_set: std::collections::HashSet<_> = included_ids.into_iter().collect();
            combined_ids = match combined_ids {
                Some(existing) => Some(
                    existing
                        .into_iter()
                        .filter(|id| included_set.contains(id))
                        .collect(),
                ),
                None => Some(included_set.into_iter().collect()),
            };
        }

        // Check if subset is empty after combining
        if let Some(ref ids) = combined_ids {
            if ids.is_empty() {
                if !json && !files_only {
                    eprintln!("No indexed code units match the specified filters");
                }
                return Ok(vec![]);
            }
        }

        combined_ids
    };

    // Search with optional filtering
    // Request more results to allow for re-ranking with query boost and test function demotion
    let search_top_k = if code_only { top_k * 4 } else { top_k * 3 };

    // Resolve hybrid search: --semantic-only CLI flag overrides, then config, default is enabled
    let config = colgrep::Config::load().unwrap_or_default();
    let hybrid_disabled = if no_fts {
        true
    } else {
        !config.use_hybrid_search()
    };

    // CLI --alpha overrides config, config overrides default (0.55)
    let hybrid_alpha = alpha.unwrap_or_else(|| config.get_hybrid_alpha());

    // When no -e flag is provided, run BOTH semantic/hybrid search and text-pattern search
    // This ensures exact matches are found even if the vector database doesn't rank them highly
    let results = if let Some(pattern) = &text_pattern {
        // -e flag provided: use existing hybrid search logic
        // Enhance semantic query with -e pattern (strip regex metacharacters and dedupe tokens)
        let sanitized_pattern = strip_regex_for_semantic(pattern);
        let enhanced_query = merge_query_with_pattern(query, &sanitized_pattern);
        if hybrid_disabled {
            searcher.search(&enhanced_query, search_top_k, subset.as_deref())?
        } else {
            searcher.search_hybrid(
                &enhanced_query,
                search_top_k,
                subset.as_deref(),
                hybrid_alpha,
            )?
        }
    } else {
        // Encode query once and reuse across both searches
        let query_emb = searcher.encode_query(query)?;

        // Run FTS5 once and reuse across both searches
        let fts5_results = if hybrid_disabled {
            None
        } else {
            searcher.fts5_search(query, search_top_k * 3, subset.as_deref())
        };

        // 1. Run semantic search (with FTS5 fusion if enabled)
        let semantic_results = if hybrid_disabled {
            searcher.search_with_embedding(&query_emb, search_top_k, subset.as_deref())?
        } else {
            searcher.search_hybrid_with_embedding(
                &query_emb,
                query,
                search_top_k,
                subset.as_deref(),
                hybrid_alpha,
                fts5_results.as_ref(),
            )?
        };

        // 2. Run hybrid search: filter by query text, then semantic rank
        // Use fixed_strings mode to treat the query as a literal pattern.
        // The semantic-side query is *always* case-insensitive — ColBERT
        // embeddings handle case fuzzily and we want broad recall here.
        let text_filtered_ids =
            searcher.filter_by_text_pattern_with_options(query, false, true, false, false)?;

        let hybrid_results = if !text_filtered_ids.is_empty() {
            // Intersect with existing subset if any
            let hybrid_subset: Vec<i64> = match &subset {
                Some(existing) => {
                    let existing_set: std::collections::HashSet<_> =
                        existing.iter().copied().collect();
                    text_filtered_ids
                        .into_iter()
                        .filter(|id| existing_set.contains(id))
                        .collect()
                }
                None => text_filtered_ids,
            };

            if !hybrid_subset.is_empty() {
                // Reuse cached embedding and FTS5 results (filtered to subset)
                if hybrid_disabled {
                    searcher.search_with_embedding(
                        &query_emb,
                        search_top_k,
                        Some(&hybrid_subset),
                    )?
                } else {
                    // Pass `None` so FTS5 is refetched *within* the subset.
                    // Reusing the global `fts5_results` here would carry
                    // BM25 hits from outside the subset; they'd get filtered
                    // down to a tiny intersection, hurting recall.
                    searcher.search_hybrid_with_embedding(
                        &query_emb,
                        query,
                        search_top_k,
                        Some(&hybrid_subset),
                        hybrid_alpha,
                        None,
                    )?
                }
            } else {
                vec![]
            }
        } else {
            vec![]
        };

        // 3. Merge results: one entry per file, span covers every matched
        //    unit, score is the max across both calls.
        //
        // The previous `(file, line)` dedup was buggy: both
        // `search_hybrid_with_embedding` calls run `collapse_by_file`
        // internally, which sets `unit.line = min(line_i)` *across that
        // call's candidate pool*. Two pools → two different mins for the
        // same file → same file occupied two top-K slots.
        use std::collections::hash_map::Entry;
        let mut merged: HashMap<PathBuf, colgrep::SearchResult> = HashMap::new();
        for result in semantic_results.into_iter().chain(hybrid_results) {
            let key = result.unit.file.clone();
            match merged.entry(key) {
                Entry::Occupied(mut e) => {
                    let existing = e.get_mut();
                    let new_start = existing.unit.line.min(result.unit.line);
                    let new_end = existing.unit.end_line.max(result.unit.end_line);
                    if result.score > existing.score {
                        *existing = result;
                    }
                    existing.unit.line = new_start;
                    existing.unit.end_line = new_end;
                }
                Entry::Vacant(e) => {
                    e.insert(result);
                }
            }
        }
        merged.into_values().collect::<Vec<_>>()
    };

    // Note: When -e is used, results are already filtered to units containing the pattern
    // via filter_by_text_pattern_with_options() above, which supports -E, -F, -w flags.
    //
    // The legacy `compute_final_score` test-name demotion was removed; the
    // hybrid pipeline now applies `ranking::file_path_penalty` (a much more
    // complete language-aware test/bench/example/compat penalty) inside
    // `Searcher::search_hybrid_with_embedding`.
    let mut results: Vec<_> = results;
    results.sort_by(|a, b| {
        b.score
            .partial_cmp(&a.score)
            .unwrap_or(std::cmp::Ordering::Equal)
    });

    // Increment search count
    let index_dir = get_index_dir_for_project(&effective_root, &model)?;
    if let Ok(mut state) = IndexState::load(&index_dir) {
        state.increment_search_count();
        let _ = state.save(&index_dir);
    }

    // Convert file paths to absolute for proper display when merging results from multiple paths
    let results: Vec<colgrep::SearchResult> = results
        .into_iter()
        .map(|mut r| {
            if !r.unit.file.is_absolute() {
                r.unit.file = effective_root.join(&r.unit.file);
            }
            r
        })
        .collect();

    Ok(results)
}

#[cfg(test)]
mod tests {
    use super::*;
    #[cfg(windows)]
    use tempfile::tempdir;

    // Test resolve_top_k function
    #[test]
    fn test_resolve_top_k_cli_provided() {
        // CLI value should take precedence
        assert_eq!(resolve_top_k(Some(30), 15), 30);
        assert_eq!(resolve_top_k(Some(1), 20), 1);
        assert_eq!(resolve_top_k(Some(100), 15), 100);
    }

    #[test]
    fn test_resolve_top_k_fallback_to_default() {
        // When CLI not provided and no config, should use default
        // Note: This test may be affected by actual config file
        let result = resolve_top_k(None, 15);
        // Should be either 25 (default) or whatever is in config
        assert!(result > 0);
    }

    // Test resolve_context_lines function
    #[test]
    fn test_resolve_context_lines_cli_provided() {
        // CLI value should take precedence
        assert_eq!(resolve_context_lines(Some(10), 20), 10);
        assert_eq!(resolve_context_lines(Some(0), 20), 0);
        assert_eq!(resolve_context_lines(Some(30), 20), 30);
    }

    #[test]
    fn test_resolve_context_lines_fallback_to_default() {
        // When CLI not provided and no config, should use default
        let result = resolve_context_lines(None, 20);
        // Should be either 20 (default) or whatever is in config
        assert!(result <= 100); // sanity check
    }

    // Test strip_regex_for_semantic function
    #[test]
    fn test_strip_regex_basic_patterns() {
        // Character classes should be stripped, leaving meaningful text
        assert_eq!(strip_regex_for_semantic(r"fn\s+\w+"), "fn");
        assert_eq!(strip_regex_for_semantic(r"async\s+fn"), "async fn");
        assert_eq!(strip_regex_for_semantic(r"\btest\b"), "test");
    }

    #[test]
    fn test_strip_regex_quantifiers() {
        // Quantifiers should be stripped
        assert_eq!(strip_regex_for_semantic("foo+"), "foo");
        assert_eq!(strip_regex_for_semantic("bar*"), "bar");
        assert_eq!(strip_regex_for_semantic("baz?"), "baz");
        assert_eq!(strip_regex_for_semantic("qux{2,5}"), "qux");
    }

    #[test]
    fn test_strip_regex_alternation() {
        // Alternation should become space-separated
        assert_eq!(strip_regex_for_semantic("foo|bar"), "foo bar");
        assert_eq!(strip_regex_for_semantic("a|b|c"), "a b c");
    }

    #[test]
    fn test_strip_regex_anchors() {
        // Anchors should be stripped
        assert_eq!(strip_regex_for_semantic("^start"), "start");
        assert_eq!(strip_regex_for_semantic("end$"), "end");
        assert_eq!(strip_regex_for_semantic("^both$"), "both");
    }

    #[test]
    fn test_strip_regex_character_classes() {
        // Character class brackets should be stripped entirely
        assert_eq!(strip_regex_for_semantic("[abc]"), "");
        assert_eq!(strip_regex_for_semantic("pre[abc]post"), "prepost");
        assert_eq!(strip_regex_for_semantic("[a-z]+"), "");
    }

    #[test]
    fn test_strip_regex_groups() {
        // Grouping parens should be stripped but contents kept
        assert_eq!(strip_regex_for_semantic("(foo)"), "foo");
        assert_eq!(strip_regex_for_semantic("(foo)(bar)"), "foobar");
    }

    #[test]
    fn test_strip_regex_escaped_literals() {
        // Escaped metacharacters should become literals
        assert_eq!(strip_regex_for_semantic(r"foo\.bar"), "foo.bar");
        assert_eq!(strip_regex_for_semantic(r"a\*b"), "a*b");
        assert_eq!(strip_regex_for_semantic(r"Result\<T\>"), "Result<T>");
    }

    #[test]
    fn test_strip_regex_dots() {
        // Dots (any char) should be stripped
        assert_eq!(strip_regex_for_semantic("a.b"), "ab");
        assert_eq!(strip_regex_for_semantic("Result<.*>"), "Result<>");
    }

    #[test]
    fn test_strip_regex_plain_text() {
        // Plain text should pass through unchanged
        assert_eq!(strip_regex_for_semantic("hello"), "hello");
        assert_eq!(strip_regex_for_semantic("hello world"), "hello world");
        assert_eq!(strip_regex_for_semantic("foo_bar"), "foo_bar");
    }

    #[test]
    fn test_strip_regex_complex_patterns() {
        // Complex real-world patterns
        assert_eq!(strip_regex_for_semantic(r"impl\s+\w+\s+for"), "impl for");
        assert_eq!(strip_regex_for_semantic(r"fn\s+test_\w+"), "fn test_");
        assert_eq!(
            strip_regex_for_semantic(r"pub\s+(async\s+)?fn"),
            "pub async fn"
        );
    }

    #[test]
    fn test_strip_regex_empty_result() {
        // Patterns that result in empty string
        assert_eq!(strip_regex_for_semantic(r"\s+"), "");
        assert_eq!(strip_regex_for_semantic(r"\w+"), "");
        assert_eq!(strip_regex_for_semantic(r".*"), "");
        assert_eq!(strip_regex_for_semantic(r"[a-z]+"), "");
    }

    // Test merge_query_with_pattern function
    #[test]
    fn test_merge_query_no_duplicates() {
        // No duplicates - all pattern tokens added
        assert_eq!(
            merge_query_with_pattern("error handling", "Result"),
            "error handling Result"
        );
        assert_eq!(
            merge_query_with_pattern("function", "async fn"),
            "function async fn"
        );
    }

    #[test]
    fn test_merge_query_with_duplicates() {
        // Duplicates should be removed (case-insensitive)
        assert_eq!(
            merge_query_with_pattern("async function", "async fn"),
            "async function fn"
        );
        assert_eq!(
            merge_query_with_pattern("error handling", "error"),
            "error handling"
        );
        assert_eq!(
            merge_query_with_pattern("Error Handling", "error handling"),
            "Error Handling"
        );
    }

    #[test]
    fn test_merge_query_all_duplicates() {
        // All pattern tokens are duplicates - just return query
        assert_eq!(merge_query_with_pattern("foo bar", "foo bar"), "foo bar");
        assert_eq!(merge_query_with_pattern("FOO BAR", "foo bar"), "FOO BAR");
    }

    #[test]
    fn test_merge_query_empty_pattern() {
        // Empty pattern - just return query
        assert_eq!(merge_query_with_pattern("query", ""), "query");
    }

    #[test]
    fn test_merge_query_partial_duplicates() {
        // Mix of duplicate and new tokens
        assert_eq!(
            merge_query_with_pattern("impl trait", "impl for"),
            "impl trait for"
        );
        assert_eq!(
            merge_query_with_pattern("pub fn", "pub async fn test"),
            "pub fn async test"
        );
    }

    #[cfg(windows)]
    #[test]
    fn test_display_path_uses_relative_output_for_verbatim_windows_paths() {
        let temp_dir = tempdir().unwrap();
        let canonical_root = std::fs::canonicalize(temp_dir.path()).unwrap();
        let search_result_path = canonical_root.join("src").join("main.rs");
        // Pass the canonical root as cwd so both sides have the same path form
        let display = display_path_with_cwd(&search_result_path, Some(&canonical_root), true);

        assert_eq!(
            display,
            PathBuf::from("src").join("main.rs").display().to_string()
        );
    }

    #[cfg(windows)]
    #[test]
    fn test_display_path_strips_verbatim_windows_prefix_in_absolute_mode() {
        let temp_dir = tempdir().unwrap();
        let canonical_root = std::fs::canonicalize(temp_dir.path()).unwrap();
        let search_result_path = canonical_root.join("src").join("main.rs");

        let display = display_path(&search_result_path, false);

        assert!(!display.starts_with(r"\\?\"));
        assert!(display.ends_with(r"src\main.rs"));
    }

    #[cfg(windows)]
    #[test]
    fn test_is_external_project_path_ignores_windows_verbatim_prefixes() {
        let temp_dir = tempdir().unwrap();
        let canonical_root = std::fs::canonicalize(temp_dir.path()).unwrap();
        // Both paths canonical so the only difference is the verbatim prefix
        assert!(!is_external_project_path(
            &canonical_root,
            Some(&canonical_root)
        ));
    }

    #[cfg(windows)]
    #[test]
    fn test_normalize_windows_path_strips_verbatim_unc_prefix() {
        let raw_path = PathBuf::from(r"\\?\UNC\server\share\repo\src\main.rs");

        assert_eq!(
            normalize_windows_path(&raw_path),
            PathBuf::from(r"\\server\share\repo\src\main.rs")
        );
    }
}