srcwalk 0.1.0 - Docs.rs

pub mod imports;
pub mod outline;

use std::fs;
use std::path::Path;

use memmap2::Mmap;

use crate::cache::OutlineCache;
use crate::error::SrcwalkError;
use crate::format;
use crate::lang::detect_file_type;
use crate::lang::outline::get_outline_entries as lang_get_outline_entries;
use crate::types::{estimate_tokens, FileType, OutlineEntry, ViewMode};

pub(crate) const TOKEN_THRESHOLD: u64 = 6_000;
const FILE_SIZE_CAP: u64 = 500_000; // 500KB

/// Sections exceeding this token count are degraded to an outline of the range.
/// Override with `SRCWALK_SECTION_SOFT_LIMIT` env var.
fn section_token_limit() -> u64 {
    std::env::var("SRCWALK_SECTION_SOFT_LIMIT")
        .ok()
        .and_then(|v| v.parse().ok())
        .unwrap_or(5_000)
}

/// Max file size for `full=true` reads. Files above this threshold get a
/// warning header + outline instead of raw content, preventing multi-megabyte
/// responses that cause MCP client timeouts.
/// Override with `SRCWALK_FULL_SIZE_CAP` env var (bytes). Default: 2MB.
fn full_read_size_cap() -> u64 {
    std::env::var("SRCWALK_FULL_SIZE_CAP")
        .ok()
        .and_then(|v| v.parse::<u64>().ok())
        .unwrap_or(2_000_000)
}

/// Main entry point for read mode. Routes through the decision tree.
pub fn read_file(
    path: &Path,
    section: Option<&str>,
    full: bool,
    cache: &OutlineCache,
) -> Result<String, SrcwalkError> {
    let meta = match fs::metadata(path) {
        Ok(m) => m,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
            return Err(SrcwalkError::NotFound {
                path: path.to_path_buf(),
                suggestion: suggest_similar(path),
            });
        }
        Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
            return Err(SrcwalkError::PermissionDenied {
                path: path.to_path_buf(),
            });
        }
        Err(e) => {
            return Err(SrcwalkError::IoError {
                path: path.to_path_buf(),
                source: e,
            });
        }
    };

    // Directory → list contents
    if meta.is_dir() {
        return list_directory(path);
    }

    let byte_len = meta.len();

    // Empty check before mmap — mmap on 0-byte file may fail on some platforms
    if byte_len == 0 {
        return Ok(format::file_header(path, 0, 0, ViewMode::Empty));
    }

    // Section param → return those lines verbatim, any size
    if let Some(range) = section {
        return read_section(path, range, cache);
    }

    // Binary detection
    let file = fs::File::open(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let mmap = unsafe { Mmap::map(&file) }.map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let buf = &mmap[..];

    if crate::lang::detection::is_binary(buf) {
        let mime = mime_from_ext(path);
        return Ok(format::binary_header(path, byte_len, mime));
    }

    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");

    // Generated
    if crate::lang::detection::is_generated_by_name(name)
        || crate::lang::detection::is_generated_by_content(buf)
    {
        let line_count = memchr::memchr_iter(b'\n', buf).count() as u32 + 1;
        return Ok(format::file_header(
            path,
            byte_len,
            line_count,
            ViewMode::Generated,
        ));
    }

    let tokens = estimate_tokens(byte_len);
    let content = String::from_utf8_lossy(buf);
    let line_count = memchr::memchr_iter(b'\n', buf).count() as u32 + 1;

    // Guard: full=true on very large files. Return first-N numbered lines +
    // outline + section continue hint instead of dead-ending. This lets the
    // agent see head content immediately and paginate via `section`.
    let cap = full_read_size_cap();
    if full && byte_len > cap {
        const PROGRESSIVE_LINES: u32 = 200;
        let file_type = detect_file_type(path);
        let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
        #[allow(clippy::cast_precision_loss)] // cap and file sizes fit in f64 mantissa for display
        let cap_mb = cap as f64 / 1_000_000.0;
        #[allow(clippy::cast_precision_loss)]
        let file_mb = byte_len as f64 / 1_000_000.0;

        // Take the first PROGRESSIVE_LINES via memchr — avoids allocating the full content split.
        let head_end = memchr::memchr_iter(b'\n', buf)
            .nth(PROGRESSIVE_LINES as usize - 1)
            .map_or(buf.len(), |p| p + 1);
        let head = String::from_utf8_lossy(&buf[..head_end]);
        let numbered_head = format::number_lines(&head, 1);

        let outline = cache.get_or_compute(path, mtime, || {
            outline::generate(path, file_type, &content, buf, true)
        });

        let header = format::file_header(path, byte_len, line_count, ViewMode::Full);
        let shown = PROGRESSIVE_LINES.min(line_count);
        let next_start = shown + 1;
        return Ok(format!(
            "{header}\n\n> **full=true capped**: file is {file_mb:.1}MB (cap: {cap_mb:.1}MB). \
             Showing first {shown} of {line_count} lines. \
             Continue with `section=\"{next_start}-<end>\"` or set SRCWALK_FULL_SIZE_CAP={byte_len} to override.\n\n\
             {numbered_head}\n\n## Outline\n\n{outline}"
        ));
    }

    // Full mode or small file → return full content (skip smart view)
    if full || tokens <= TOKEN_THRESHOLD {
        let header = format::file_header(path, byte_len, line_count, ViewMode::Full);
        let numbered = format::number_lines(&content, 1);
        return Ok(format!("{header}\n\n{numbered}"));
    }

    // Large file → smart view by file type
    let file_type = detect_file_type(path);
    let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);

    let capped = byte_len > FILE_SIZE_CAP;

    let outline = cache.get_or_compute(path, mtime, || {
        outline::generate(path, file_type, &content, buf, capped)
    });

    let mode = match file_type {
        FileType::StructuredData => ViewMode::Keys,
        _ => ViewMode::Outline,
    };
    let header = format::file_header(path, byte_len, line_count, mode);
    Ok(format!("{header}\n\n{outline}"))
}

/// Would this file produce an outline (rather than full content) in default read mode?
/// Used by the MCP layer to decide whether to append related-file hints.
pub fn would_outline(path: &Path) -> bool {
    std::fs::metadata(path).is_ok_and(|m| !m.is_dir() && estimate_tokens(m.len()) > TOKEN_THRESHOLD)
}

/// Wrapper around `read_file` that, for `--full` requests with `--budget`,
/// degrades gracefully instead of letting the post-hoc `budget::apply`
/// truncate body bytes mid-function and leave a misleading `[full]` header.
///
/// Cascade (when `full=true` and rendered output exceeds `budget`):
///   1. full file        → if fits, return as-is.
///   2. outline           → labelled `[outline (full requested, over budget)]` + note.
///   3. signatures only   → labelled `[signatures (...)]` + note (outline still overflows).
///   4. header + advice   → file too large at any granularity for this budget.
///
/// For `section`, non-`full`, or no-budget paths, behaves identically to `read_file`
/// (caller still applies `budget::apply` for byte-level cap if needed).
pub fn read_file_with_budget(
    path: &Path,
    section: Option<&str>,
    full: bool,
    budget: Option<u64>,
    cache: &OutlineCache,
) -> Result<String, SrcwalkError> {
    // Fast path: not a full-file budgeted request → defer to read_file.
    let Some(b) = budget else {
        return read_file(path, section, full, cache);
    };
    if !full || section.is_some() {
        return read_file(path, section, full, cache);
    }

    let full_out = read_file(path, section, full, cache)?;
    if estimate_tokens(full_out.len() as u64) <= b {
        return Ok(full_out);
    }

    // Step 2: outline cascade.
    let outline_out = render_outline_view(path, cache, ViewMode::OutlineCascade)?;
    let with_note = append_cascade_note(&outline_out, "full body", full_out.len(), b);
    if estimate_tokens(with_note.len() as u64) <= b {
        return Ok(with_note);
    }

    // Step 3: signatures only.
    let sig_out = render_signatures_view(path, cache)?;
    let sig_with_note = append_cascade_note(&sig_out, "outline", outline_out.len(), b);
    if estimate_tokens(sig_with_note.len() as u64) <= b {
        return Ok(sig_with_note);
    }

    // Step 4: terminal — header + advice only.
    let meta = std::fs::metadata(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let line_count = std::fs::read(path)
        .map(|buf| memchr::memchr_iter(b'\n', &buf).count() as u32 + 1)
        .unwrap_or(0);
    let header = format::file_header(path, meta.len(), line_count, ViewMode::Signatures);
    Ok(format!(
        "{header}\n\n> File too large for budget {b} tokens at any granularity. \
         Drill: `--section <fn-name>` or raise `--budget`."
    ))
}

fn render_outline_view(
    path: &Path,
    cache: &OutlineCache,
    mode: ViewMode,
) -> Result<String, SrcwalkError> {
    let meta = std::fs::metadata(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let buf = std::fs::read(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let content = String::from_utf8_lossy(&buf);
    let line_count = memchr::memchr_iter(b'\n', &buf).count() as u32 + 1;
    let file_type = detect_file_type(path);
    let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
    let outline = cache.get_or_compute(path, mtime, || {
        outline::generate(path, file_type, &content, &buf, true)
    });
    let header = format::file_header(path, meta.len(), line_count, mode);
    Ok(format!("{header}\n\n{outline}"))
}

/// Signatures-only view: keep top-level outline lines (no nested children body).
/// Heuristic: drop indented continuation lines from the outline, preserving
/// only the first non-indented entry per block.
fn render_signatures_view(path: &Path, cache: &OutlineCache) -> Result<String, SrcwalkError> {
    let outline_full = render_outline_view(path, cache, ViewMode::Signatures)?;
    let mut lines = outline_full.lines();
    let header = lines.next().unwrap_or("");
    let mut kept: Vec<&str> = vec![header];
    for line in lines {
        // Keep blank separators and lines starting at column 0 or with one level of indent.
        if line.is_empty() {
            kept.push(line);
            continue;
        }
        let indent = line.chars().take_while(|c| *c == ' ').count();
        if indent <= 2 {
            kept.push(line);
        }
    }
    Ok(kept.join("\n"))
}

fn append_cascade_note(body: &str, prev_kind: &str, prev_bytes: usize, budget: u64) -> String {
    let prev_tokens = estimate_tokens(prev_bytes as u64);
    format!(
        "{body}\n\n> Note: {prev_kind} ({prev_tokens} tokens) exceeded budget ({budget}). \
         Drill: `--section <fn-name>` for specific symbol, or raise `--budget`."
    )
}

/// Resolve a heading address to a line range in a markdown file.
/// Returns `(start_line, end_line)` as 1-indexed inclusive range.
/// Returns `None` if heading not found.
fn resolve_heading(buf: &[u8], heading: &str) -> Option<(usize, usize)> {
    let heading_trimmed = heading.trim_end();
    let heading_level = heading_trimmed.chars().take_while(|&c| c == '#').count();

    if heading_level == 0 {
        return None;
    }

    // Build line offsets
    let mut line_offsets: Vec<usize> = vec![0];
    for pos in memchr::memchr_iter(b'\n', buf) {
        line_offsets.push(pos + 1);
    }
    // Exclude phantom empty line after trailing newline (match outline's count)
    let total_lines = if buf.last() == Some(&b'\n') {
        line_offsets.len() - 1
    } else {
        line_offsets.len()
    };

    let mut in_code_block = false;
    let mut found_line: Option<usize> = None;

    // Scan for the target heading
    for (line_idx, &offset) in line_offsets.iter().enumerate() {
        let line_end = if line_idx + 1 < line_offsets.len() {
            line_offsets[line_idx + 1] - 1 // exclude newline
        } else {
            buf.len()
        };

        if let Ok(line_str) = std::str::from_utf8(&buf[offset..line_end]) {
            let trimmed = line_str.trim_end();

            // Track code blocks
            if trimmed.starts_with("```") {
                in_code_block = !in_code_block;
                continue;
            }

            // Skip headings inside code blocks
            if in_code_block {
                continue;
            }

            // Check if this line matches the heading (exact or with anchor/attribute/ATX-close suffix)
            // Accept: "## Foo", "## Foo {#anchor}", "## Foo {:.class}", "## Foo ##", "## Foo\t"
            let matches = trimmed == heading_trimmed
                || (trimmed.starts_with(heading_trimmed)
                    && trimmed[heading_trimmed.len()..]
                        .chars()
                        .next()
                        .is_none_or(|c| matches!(c, ' ' | '\t' | '{' | '#')));
            if matches {
                found_line = Some(line_idx + 1); // 1-indexed
                break;
            }
        }
    }

    let start_line = found_line?;

    // Find the next heading of same or higher level
    in_code_block = false;
    let start_idx = start_line - 1; // convert back to 0-indexed for iteration

    for (line_idx, &offset) in line_offsets.iter().enumerate().skip(start_idx + 1) {
        let line_end = if line_idx + 1 < line_offsets.len() {
            line_offsets[line_idx + 1] - 1
        } else {
            buf.len()
        };

        if let Ok(line_str) = std::str::from_utf8(&buf[offset..line_end]) {
            let trimmed = line_str.trim_end();

            if trimmed.starts_with("```") {
                in_code_block = !in_code_block;
                continue;
            }

            if in_code_block {
                continue;
            }

            // Check if this is a heading
            if trimmed.starts_with('#') {
                let level = trimmed.chars().take_while(|&c| c == '#').count();
                if level <= heading_level {
                    // 0-based line_idx of next heading = 1-indexed line before it
                    return Some((start_line, line_idx));
                }
            }
        }
    }

    // No next heading found — section goes to end of file
    Some((start_line, total_lines))
}

/// Collect up to `top_n` headings whose text is closest (by edit distance)
/// to the queried heading. Returns headings as they appear in the file
/// (e.g. "## Foo Bar"), excluding ones inside fenced code blocks.
fn suggest_headings(buf: &[u8], query: &str, top_n: usize) -> Vec<String> {
    let q = query.trim_end();
    let q_text = q.trim_start_matches('#').trim();
    if q_text.is_empty() {
        return Vec::new();
    }

    let mut in_code_block = false;
    let mut scored: Vec<(usize, String)> = Vec::new();
    for line in buf.split(|&b| b == b'\n') {
        let Ok(s) = std::str::from_utf8(line) else {
            continue;
        };
        let trimmed = s.trim_end();
        if trimmed.starts_with("```") {
            in_code_block = !in_code_block;
            continue;
        }
        if in_code_block || !trimmed.starts_with('#') {
            continue;
        }
        let h_text = trimmed.trim_start_matches('#').trim();
        if h_text.is_empty() {
            continue;
        }
        // Strip kramdown attr / ATX-close trailing markers from comparison text.
        let h_clean = h_text
            .split('{')
            .next()
            .unwrap_or(h_text)
            .trim_end_matches('#')
            .trim();
        let dist = edit_distance(&q_text.to_ascii_lowercase(), &h_clean.to_ascii_lowercase());
        scored.push((dist, trimmed.to_string()));
    }

    scored.sort_by_key(|(d, _)| *d);
    scored.into_iter().take(top_n).map(|(_, h)| h).collect()
}

/// Read a specific line range from a file.
/// Uses memchr to find the Nth newline offset and slice the mmap buffer directly
/// instead of collecting all lines into a Vec.
fn read_section(path: &Path, range: &str, _cache: &OutlineCache) -> Result<String, SrcwalkError> {
    let file = fs::File::open(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let mmap = unsafe { Mmap::map(&file) }.map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;
    let buf = &mmap[..];

    // Resolve section address: line range, heading, or symbol name
    let (start, end) = if range.starts_with('#') {
        // Markdown heading
        resolve_heading(buf, range).ok_or_else(|| {
            let suggestions = suggest_headings(buf, range, 5);
            let reason = if suggestions.is_empty() {
                "heading not found in file".to_string()
            } else {
                format!(
                    "heading not found in file. Closest matches:\n  {}",
                    suggestions.join("\n  ")
                )
            };
            SrcwalkError::InvalidQuery {
                query: range.to_string(),
                reason,
            }
        })?
    } else if let Some(r) = parse_range(range) {
        // Line range like "45-89"
        r
    } else if let Some(r) = resolve_symbol(buf, path, range) {
        // Symbol name like "isCustomization" or "handleRequest"
        r
    } else {
        return Err(SrcwalkError::InvalidQuery {
            query: range.to_string(),
            reason:
                "not a valid line range (e.g. \"45-89\"), heading (e.g. \"## Foo\"), or symbol name in this file"
                    .to_string(),
        });
    };

    // Find line offsets using memchr — no full-file Vec<&str> allocation
    let mut line_offsets: Vec<usize> = vec![0];
    for pos in memchr::memchr_iter(b'\n', buf) {
        line_offsets.push(pos + 1);
    }
    let total = line_offsets.len();

    let s = (start.saturating_sub(1)).min(total);
    let e = end.min(total);

    if s >= e {
        return Err(SrcwalkError::InvalidQuery {
            query: range.to_string(),
            reason: format!("range out of bounds (file has {total} lines)"),
        });
    }

    let start_byte = line_offsets[s];
    let end_byte = if e < line_offsets.len() {
        line_offsets[e]
    } else {
        buf.len()
    };

    let selected = String::from_utf8_lossy(&buf[start_byte..end_byte]);
    let byte_len = selected.len() as u64;
    let line_count = (e - s) as u32;
    let tok_est = estimate_tokens(byte_len);
    let limit = section_token_limit();

    if tok_est > limit {
        // Degrade: render outline entries within the section range
        let file_type = detect_file_type(path);
        let content = String::from_utf8_lossy(buf);
        let header = format::file_header(path, byte_len, line_count, ViewMode::SectionOutline);

        let start32 = start as u32;
        let end32 = end as u32;

        if let crate::types::FileType::Code(lang) = file_type {
            let entries = lang_get_outline_entries(&content, lang);
            let filtered = filter_entries_in_range(&entries, start32, end32);
            if !filtered.is_empty() {
                let body = format_section_outline(&filtered);
                return Ok(format!(
                    "{header}\n\n{body}\n\n\
                     > Section spans ~{tok_est} tokens (limit {limit}). Showing outline of {start}-{end}.\n\
                     > Drill: `--section <fn-name>` for a specific symbol."
                ));
            }
        }

        // Fallback: no structured outline available — return header + advice only
        return Ok(format!(
            "{header}\n\n\
             > Section spans ~{tok_est} tokens (limit {limit}).\n\
             > Drill: `--section <fn-name>` for a specific symbol, or use a narrower line range."
        ));
    }

    let header = format::file_header(path, byte_len, line_count, ViewMode::Section);
    let formatted = format::number_lines(&selected, start as u32);
    Ok(format!("{header}\n\n{formatted}"))
}

/// Filter outline entries (and children) to those overlapping [range_start, range_end].
fn filter_entries_in_range(
    entries: &[OutlineEntry],
    range_start: u32,
    range_end: u32,
) -> Vec<&OutlineEntry> {
    let mut out = Vec::new();
    for e in entries {
        // For container entries (class/struct) that span beyond the range,
        // skip the parent — we'll include matching children directly.
        if !e.children.is_empty()
            && (e.start_line < range_start || e.end_line > range_end)
        {
            // Recurse into children
            for c in &e.children {
                if c.start_line <= range_end && c.end_line >= range_start {
                    out.push(c);
                }
            }
        } else if e.start_line <= range_end && e.end_line >= range_start {
            out.push(e);
        }
    }
    out
}

/// Format filtered outline entries for section degrade output.
fn format_section_outline(entries: &[&OutlineEntry]) -> String {
    let mut lines = Vec::new();
    for e in entries {
        let range = if e.start_line == e.end_line {
            format!("[{}]", e.start_line)
        } else {
            format!("[{}-{}]", e.start_line, e.end_line)
        };
        let sig = e.signature.as_deref().unwrap_or(&e.name);
        lines.push(format!("  {range:>14}    {sig}"));
        // Show children in range
        for c in &e.children {
            let cr = if c.start_line == c.end_line {
                format!("[{}]", c.start_line)
            } else {
                format!("[{}-{}]", c.start_line, c.end_line)
            };
            let csig = c.signature.as_deref().unwrap_or(&c.name);
            lines.push(format!("    {cr:>12}    {csig}"));
        }
    }
    lines.join("\n")
}

/// Parse "45-89" into (45, 89). 1-indexed.
fn parse_range(s: &str) -> Option<(usize, usize)> {
    let (a, b) = s.split_once('-')?;
    let start: usize = a.trim().parse().ok()?;
    let end: usize = b.trim().parse().ok()?;
    if start == 0 || end < start {
        return None;
    }
    Some((start, end))
}

/// Resolve a symbol name to its line range using AST outline.
/// Returns (`start_line`, `end_line`) if found.
fn resolve_symbol(buf: &[u8], path: &Path, symbol: &str) -> Option<(usize, usize)> {
    let content = std::str::from_utf8(buf).ok()?;
    let FileType::Code(lang) = detect_file_type(path) else {
        return None;
    };
    let entries = lang_get_outline_entries(content, lang);
    find_symbol_in_entries(&entries, symbol)
}

/// Recursively search for a symbol in outline entries.
fn find_symbol_in_entries(entries: &[OutlineEntry], symbol: &str) -> Option<(usize, usize)> {
    for entry in entries {
        if entry.name == symbol {
            return Some((entry.start_line as usize, entry.end_line as usize));
        }
        // Search children (methods inside class, etc.)
        if let Some(range) = find_symbol_in_entries(&entry.children, symbol) {
            return Some(range);
        }
    }
    None
}

/// List directory contents — treat as glob on dir/*.
fn list_directory(path: &Path) -> Result<String, SrcwalkError> {
    let mut entries: Vec<String> = Vec::new();
    let read_dir = fs::read_dir(path).map_err(|e| SrcwalkError::IoError {
        path: path.to_path_buf(),
        source: e,
    })?;

    let mut items: Vec<_> = read_dir.filter_map(std::result::Result::ok).collect();
    items.sort_by_key(std::fs::DirEntry::file_name);

    for entry in &items {
        let ft = entry.file_type().ok();
        let name = entry.file_name();
        let name = name.to_string_lossy();
        let meta = entry.metadata().ok();

        let suffix = match ft {
            Some(t) if t.is_dir() => "/".to_string(),
            Some(t) if t.is_symlink() => " →".to_string(),
            _ => match meta {
                Some(m) => {
                    let tokens = estimate_tokens(m.len());
                    format!("  ({tokens} tokens)")
                }
                None => String::new(),
            },
        };
        entries.push(format!("  {name}{suffix}"));
    }

    let header = format!("# {} ({} items)", path.display(), items.len());
    Ok(format!("{header}\n\n{}", entries.join("\n")))
}

/// Public entry point for did-you-mean on path-like fallthrough queries.
/// Resolves the query relative to scope and checks the parent directory.
pub fn suggest_similar_file(scope: &Path, query: &str) -> Option<String> {
    let resolved = scope.join(query);
    suggest_similar(&resolved)
}

/// Suggest a similar file name from the parent directory (edit distance).
fn suggest_similar(path: &Path) -> Option<String> {
    let parent = path.parent()?;
    let name = path.file_name()?.to_str()?;
    let entries = fs::read_dir(parent).ok()?;

    let mut best: Option<(usize, String)> = None;
    for entry in entries.flatten() {
        let candidate = entry.file_name();
        let candidate = candidate.to_string_lossy();
        let dist = edit_distance(name, &candidate);
        if dist <= 3 {
            match &best {
                Some((d, _)) if dist < *d => best = Some((dist, candidate.into_owned())),
                None => best = Some((dist, candidate.into_owned())),
                _ => {}
            }
        }
    }
    best.map(|(_, name)| name)
}

/// Simple Levenshtein distance — only used on short file names.
pub(crate) fn edit_distance(a: &str, b: &str) -> usize {
    let a = a.as_bytes();
    let b = b.as_bytes();
    let mut prev: Vec<usize> = (0..=b.len()).collect();
    let mut curr = vec![0; b.len() + 1];

    for (i, &ca) in a.iter().enumerate() {
        curr[0] = i + 1;
        for (j, &cb) in b.iter().enumerate() {
            let cost = usize::from(ca != cb);
            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
        }
        std::mem::swap(&mut prev, &mut curr);
    }
    prev[b.len()]
}

/// Guess MIME type from extension for binary file headers.
fn mime_from_ext(path: &Path) -> &'static str {
    match path.extension().and_then(|e| e.to_str()) {
        Some("png") => "image/png",
        Some("jpg" | "jpeg") => "image/jpeg",
        Some("gif") => "image/gif",
        Some("svg") => "image/svg+xml",
        Some("webp") => "image/webp",
        Some("ico") => "image/x-icon",
        Some("pdf") => "application/pdf",
        Some("zip") => "application/zip",
        Some("gz" | "tgz") => "application/gzip",
        Some("tar") => "application/x-tar",
        Some("wasm") => "application/wasm",
        Some("woff" | "woff2") => "font/woff2",
        Some("ttf" | "otf") => "font/ttf",
        Some("mp3") => "audio/mpeg",
        Some("mp4") => "video/mp4",
        _ => "application/octet-stream",
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn heading_found() {
        let input = b"# Title\nSome content\n## Section\nSection content\n";
        let result = resolve_heading(input, "## Section");

        assert_eq!(result, Some((3, 4)));
    }

    #[test]
    fn heading_not_found() {
        let input = b"# Title\nContent\n";
        let result = resolve_heading(input, "## Missing");

        assert_eq!(result, None);
    }

    #[test]
    fn heading_in_code_block() {
        let input = b"# Real\n```\n## Fake\n```\n";
        let result = resolve_heading(input, "## Fake");

        // Heading inside code block should be skipped
        assert_eq!(result, None);
    }

    #[test]
    fn duplicate_headings() {
        let input = b"## First\ntext\n## First\ntext\n";
        let result = resolve_heading(input, "## First");

        // Should return the first occurrence
        assert_eq!(result, Some((1, 2)));
    }

    #[test]
    fn last_heading_to_eof() {
        let input = b"# Start\ntext\n## End\nfinal line\n";
        let result = resolve_heading(input, "## End");

        // Last heading should extend to total_lines (4)
        assert_eq!(result, Some((3, 4)));
    }

    #[test]
    fn nested_sections() {
        let input = b"## A\ncontent\n### B\nmore\n## C\ntext\n";
        let result = resolve_heading(input, "## A");

        // ## A should include ### B, ending when ## C starts (line 5)
        // So range is [1, 4]
        assert_eq!(result, Some((1, 4)));
    }

    #[test]
    fn no_hashes() {
        let input = b"# Heading\ntext\n";

        // Empty string
        assert_eq!(resolve_heading(input, ""), None);

        // String without hashes
        assert_eq!(resolve_heading(input, "hello"), None);
    }

    #[test]
    fn full_true_size_cap_returns_outline() {
        use std::io::Write;

        // Create a temp file larger than our small cap (100 bytes)
        let path = std::env::temp_dir().join("srcwalk_test_large.rs");
        let mut f = std::fs::File::create(&path).unwrap();
        // Write enough to exceed the cap — 200 bytes of Rust code
        for i in 0..20 {
            writeln!(f, "pub fn func_{i}() {{ println!(\"hello\"); }}").unwrap();
        }
        drop(f);

        // Set a tiny cap so the guard triggers
        std::env::set_var("SRCWALK_FULL_SIZE_CAP", "100");

        let cache = OutlineCache::new();
        let result = read_file(&path, None, true, &cache).unwrap();

        // Should contain the progressive-read warning, not the full file content
        assert!(
            result.contains("full=true capped"),
            "expected size cap warning, got: {result}"
        );
        assert!(
            result.contains("func_0"),
            "expected head/outline content in output"
        );

        std::env::remove_var("SRCWALK_FULL_SIZE_CAP");
        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn budget_cascade_full_to_outline() {
        // Build a file large enough that --full would emit ~5k tokens.
        let mut body = String::from("<?php\nclass Big {\n");
        for i in 0..120 {
            body.push_str(&format!(
                "    public function method_{i}() {{\n        $x = {i}; // padding line {i}\n        return $x * 2;\n    }}\n"
            ));
        }
        body.push_str("}\n");
        let path = std::env::temp_dir().join("srcwalk_p11_cascade.php");
        std::fs::write(&path, body.as_bytes()).unwrap();

        let cache = OutlineCache::new();
        let out = read_file_with_budget(&path, None, true, Some(800), &cache).unwrap();

        // Budget honored.
        let tokens = estimate_tokens(out.len() as u64);
        assert!(tokens <= 800, "cascade overshot budget: {tokens} tokens");
        // Header relabelled, not [full].
        assert!(
            out.contains("[outline (full requested, over budget)]")
                || out.contains("[signatures"),
            "expected cascade header label, got: {}",
            &out[..out.len().min(200)]
        );
        // Cascade note present.
        assert!(out.contains("exceeded budget"), "missing cascade note");

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn budget_cascade_passthrough_when_fits() {
        // Tiny file fits in budget → unchanged behavior (full content).
        let path = std::env::temp_dir().join("srcwalk_p11_tiny.php");
        std::fs::write(&path, b"<?php\nclass Tiny { public function f() {} }\n").unwrap();

        let cache = OutlineCache::new();
        let out = read_file_with_budget(&path, None, true, Some(2000), &cache).unwrap();

        assert!(out.contains("[full]"), "expected [full] label, got header in: {out}");
        assert!(!out.contains("exceeded budget"), "no cascade note for fitting file");

        let _ = std::fs::remove_file(&path);
    }
}