asrch 0.1.1

Agent-safe bounded code search CLI
use std::collections::BTreeMap;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::process::{Command, Stdio};

use crate::cli::{Args, SearchMode};

const MAX_SCAN_MATCHES: usize = 50_000;
const MAX_STORED_MATCHES: usize = 5_000;

const EXCLUDES: &[&str] = &[
    "!.git/**",
    "!target/**",
    "!node_modules/**",
    "!vendor/**",
    "!dist/**",
    "!build/**",
    "!coverage/**",
    "!scratch/**",
    "!tmp/**",
    "!generated/**",
    "!*.log",
    "!*.jsonl",
    "!*.xml",
    "!*.min.js",
    "!*.map",
];

#[derive(Clone, Debug)]
pub(crate) struct Match {
    pub(crate) path: String,
    pub(crate) line: usize,
    pub(crate) column: usize,
    pub(crate) text: String,
}

#[derive(Debug)]
pub(crate) struct SearchResult {
    pub(crate) matches: Vec<Match>,
    pub(crate) counts: BTreeMap<String, usize>,
    pub(crate) scanned_matches: usize,
    pub(crate) scan_limited: bool,
}

fn rg_command(args: &Args, query: &str, path: &Path) -> Command {
    let mut command = Command::new("rg");
    command.args([
        "--null",
        "--line-number",
        "--column",
        "--no-heading",
        "--with-filename",
        "--color",
        "never",
        "--no-messages",
        "--sort",
        "path",
        "--max-columns",
        "300",
        "--max-columns-preview",
    ]);
    let effective_query = (args.mode == SearchMode::Identifier).then(|| identifier_pattern(query));
    match args.mode {
        SearchMode::Fixed => {
            command.arg("--fixed-strings");
        }
        SearchMode::Identifier => {}
        SearchMode::Word => {
            command.args(["--fixed-strings", "--word-regexp"]);
        }
        SearchMode::Regex => {}
    }
    for glob in EXCLUDES {
        command.args(["--glob", glob]);
    }
    command
        .arg("--")
        .arg(effective_query.as_deref().unwrap_or(query))
        .arg(path);
    command.stdout(Stdio::piped()).stderr(Stdio::piped());
    command
}

fn identifier_pattern(value: &str) -> String {
    format!("(^|[^A-Za-z0-9_]){}([^A-Za-z0-9_]|$)", regex_escape(value))
}

fn regex_escape(value: &str) -> String {
    let mut escaped = String::with_capacity(value.len());
    for ch in value.chars() {
        if matches!(
            ch,
            '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
        ) {
            escaped.push('\\');
        }
        escaped.push(ch);
    }
    escaped
}

pub(crate) fn search(args: &Args, query: &str, path: &Path) -> Result<SearchResult, String> {
    let mut child = rg_command(args, query, path)
        .spawn()
        .map_err(|error| format!("failed to start ripgrep: {error}"))?;
    let stdout = child
        .stdout
        .take()
        .ok_or_else(|| "failed to read ripgrep output".to_string())?;
    let mut reader = BufReader::new(stdout);
    let mut raw = Vec::new();
    let mut matches = Vec::new();
    let mut counts = BTreeMap::new();
    let mut scanned_matches = 0;
    let mut scan_limited = false;

    loop {
        raw.clear();
        let read = reader
            .read_until(b'\n', &mut raw)
            .map_err(|error| format!("failed to read ripgrep output: {error}"))?;
        if read == 0 {
            break;
        }
        if let Some(item) = parse_match(&raw) {
            scanned_matches += 1;
            *counts.entry(item.path.clone()).or_insert(0) += 1;
            if matches.len() < MAX_STORED_MATCHES {
                matches.push(item);
            }
            if scanned_matches >= MAX_SCAN_MATCHES {
                scan_limited = true;
                let _ = child.kill();
                break;
            }
        }
    }

    let status = child
        .wait()
        .map_err(|error| format!("failed to wait for ripgrep: {error}"))?;
    if !scan_limited && !matches!(status.code(), Some(0 | 1)) {
        let stderr = child
            .stderr
            .take()
            .map(|stream| {
                let mut text = String::new();
                let _ = BufReader::new(stream).read_line(&mut text);
                text.trim().to_string()
            })
            .unwrap_or_default();
        return Err(if stderr.is_empty() {
            format!("ripgrep exited with {status}")
        } else {
            stderr
        });
    }

    Ok(SearchResult {
        matches,
        counts,
        scanned_matches,
        scan_limited,
    })
}

fn parse_match(raw: &[u8]) -> Option<Match> {
    let nul = raw.iter().position(|byte| *byte == 0)?;
    let path = String::from_utf8_lossy(&raw[..nul]).to_string();
    let rest = String::from_utf8_lossy(&raw[nul + 1..]);
    let mut fields = rest.trim_end_matches(['\r', '\n']).splitn(3, ':');
    let line = fields.next()?.parse().ok()?;
    let column = fields.next()?.parse().ok()?;
    let text = fields.next()?.to_string();
    Some(Match {
        path,
        line,
        column,
        text,
    })
}