pub mod imports;
pub mod outline;
use std::fs;
use std::path::Path;
use memmap2::Mmap;
use crate::cache::OutlineCache;
use crate::error::SrcwalkError;
use crate::format;
use crate::lang::detect_file_type;
use crate::lang::outline::get_outline_entries as lang_get_outline_entries;
use crate::types::{estimate_tokens, FileType, OutlineEntry, ViewMode};
pub(crate) const TOKEN_THRESHOLD: u64 = 6_000;
const FILE_SIZE_CAP: u64 = 500_000;
fn section_token_limit() -> u64 {
std::env::var("SRCWALK_SECTION_SOFT_LIMIT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(5_000)
}
fn full_read_size_cap() -> u64 {
std::env::var("SRCWALK_FULL_SIZE_CAP")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(2_000_000)
}
pub fn read_file(
path: &Path,
section: Option<&str>,
full: bool,
cache: &OutlineCache,
) -> Result<String, SrcwalkError> {
let meta = match fs::metadata(path) {
Ok(m) => m,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(SrcwalkError::NotFound {
path: path.to_path_buf(),
suggestion: suggest_similar(path),
});
}
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
return Err(SrcwalkError::PermissionDenied {
path: path.to_path_buf(),
});
}
Err(e) => {
return Err(SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
});
}
};
if meta.is_dir() {
return list_directory(path);
}
let byte_len = meta.len();
if byte_len == 0 {
return Ok(format::file_header(path, 0, 0, ViewMode::Empty));
}
if let Some(range) = section {
return read_section(path, range, cache);
}
let file = fs::File::open(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let mmap = unsafe { Mmap::map(&file) }.map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let buf = &mmap[..];
if crate::lang::detection::is_binary(buf) {
let mime = mime_from_ext(path);
return Ok(format::binary_header(path, byte_len, mime));
}
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if crate::lang::detection::is_generated_by_name(name)
|| crate::lang::detection::is_generated_by_content(buf)
{
let line_count = memchr::memchr_iter(b'\n', buf).count() as u32 + 1;
return Ok(format::file_header(
path,
byte_len,
line_count,
ViewMode::Generated,
));
}
let tokens = estimate_tokens(byte_len);
let content = String::from_utf8_lossy(buf);
let line_count = memchr::memchr_iter(b'\n', buf).count() as u32 + 1;
let cap = full_read_size_cap();
if full && byte_len > cap {
const PROGRESSIVE_LINES: u32 = 200;
let file_type = detect_file_type(path);
let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
#[allow(clippy::cast_precision_loss)] let cap_mb = cap as f64 / 1_000_000.0;
#[allow(clippy::cast_precision_loss)]
let file_mb = byte_len as f64 / 1_000_000.0;
let head_end = memchr::memchr_iter(b'\n', buf)
.nth(PROGRESSIVE_LINES as usize - 1)
.map_or(buf.len(), |p| p + 1);
let head = String::from_utf8_lossy(&buf[..head_end]);
let numbered_head = format::number_lines(&head, 1);
let outline = cache.get_or_compute(path, mtime, || {
outline::generate(path, file_type, &content, buf, true)
});
let header = format::file_header(path, byte_len, line_count, ViewMode::Full);
let shown = PROGRESSIVE_LINES.min(line_count);
let next_start = shown + 1;
return Ok(format!(
"{header}\n\n> **full=true capped**: file is {file_mb:.1}MB (cap: {cap_mb:.1}MB). \
Showing first {shown} of {line_count} lines. \
Continue with `section=\"{next_start}-<end>\"` or set SRCWALK_FULL_SIZE_CAP={byte_len} to override.\n\n\
{numbered_head}\n\n## Outline\n\n{outline}"
));
}
if full || tokens <= TOKEN_THRESHOLD {
let header = format::file_header(path, byte_len, line_count, ViewMode::Full);
let numbered = format::number_lines(&content, 1);
return Ok(format!("{header}\n\n{numbered}"));
}
let file_type = detect_file_type(path);
let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let capped = byte_len > FILE_SIZE_CAP;
let outline = cache.get_or_compute(path, mtime, || {
outline::generate(path, file_type, &content, buf, capped)
});
let mode = match file_type {
FileType::StructuredData => ViewMode::Keys,
_ => ViewMode::Outline,
};
let header = format::file_header(path, byte_len, line_count, mode);
Ok(format!("{header}\n\n{outline}"))
}
pub fn would_outline(path: &Path) -> bool {
std::fs::metadata(path).is_ok_and(|m| !m.is_dir() && estimate_tokens(m.len()) > TOKEN_THRESHOLD)
}
pub fn read_file_with_budget(
path: &Path,
section: Option<&str>,
full: bool,
budget: Option<u64>,
cache: &OutlineCache,
) -> Result<String, SrcwalkError> {
let Some(b) = budget else {
return read_file(path, section, full, cache);
};
if !full || section.is_some() {
return read_file(path, section, full, cache);
}
let full_out = read_file(path, section, full, cache)?;
if estimate_tokens(full_out.len() as u64) <= b {
return Ok(full_out);
}
let outline_out = render_outline_view(path, cache, ViewMode::OutlineCascade)?;
let with_note = append_cascade_note(&outline_out, "full body", full_out.len(), b);
if estimate_tokens(with_note.len() as u64) <= b {
return Ok(with_note);
}
let sig_out = render_signatures_view(path, cache)?;
let sig_with_note = append_cascade_note(&sig_out, "outline", outline_out.len(), b);
if estimate_tokens(sig_with_note.len() as u64) <= b {
return Ok(sig_with_note);
}
let meta = std::fs::metadata(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let line_count = std::fs::read(path)
.map(|buf| memchr::memchr_iter(b'\n', &buf).count() as u32 + 1)
.unwrap_or(0);
let header = format::file_header(path, meta.len(), line_count, ViewMode::Signatures);
Ok(format!(
"{header}\n\n> File too large for budget {b} tokens at any granularity. \
Drill: `--section <fn-name>` or raise `--budget`."
))
}
fn render_outline_view(
path: &Path,
cache: &OutlineCache,
mode: ViewMode,
) -> Result<String, SrcwalkError> {
let meta = std::fs::metadata(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let buf = std::fs::read(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let content = String::from_utf8_lossy(&buf);
let line_count = memchr::memchr_iter(b'\n', &buf).count() as u32 + 1;
let file_type = detect_file_type(path);
let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let outline = cache.get_or_compute(path, mtime, || {
outline::generate(path, file_type, &content, &buf, true)
});
let header = format::file_header(path, meta.len(), line_count, mode);
Ok(format!("{header}\n\n{outline}"))
}
fn render_signatures_view(path: &Path, cache: &OutlineCache) -> Result<String, SrcwalkError> {
let outline_full = render_outline_view(path, cache, ViewMode::Signatures)?;
let mut lines = outline_full.lines();
let header = lines.next().unwrap_or("");
let mut kept: Vec<&str> = vec![header];
for line in lines {
if line.is_empty() {
kept.push(line);
continue;
}
let indent = line.chars().take_while(|c| *c == ' ').count();
if indent <= 2 {
kept.push(line);
}
}
Ok(kept.join("\n"))
}
fn append_cascade_note(body: &str, prev_kind: &str, prev_bytes: usize, budget: u64) -> String {
let prev_tokens = estimate_tokens(prev_bytes as u64);
format!(
"{body}\n\n> Note: {prev_kind} ({prev_tokens} tokens) exceeded budget ({budget}). \
Drill: `--section <fn-name>` for specific symbol, or raise `--budget`."
)
}
fn resolve_heading(buf: &[u8], heading: &str) -> Option<(usize, usize)> {
let heading_trimmed = heading.trim_end();
let heading_level = heading_trimmed.chars().take_while(|&c| c == '#').count();
if heading_level == 0 {
return None;
}
let mut line_offsets: Vec<usize> = vec![0];
for pos in memchr::memchr_iter(b'\n', buf) {
line_offsets.push(pos + 1);
}
let total_lines = if buf.last() == Some(&b'\n') {
line_offsets.len() - 1
} else {
line_offsets.len()
};
let mut in_code_block = false;
let mut found_line: Option<usize> = None;
for (line_idx, &offset) in line_offsets.iter().enumerate() {
let line_end = if line_idx + 1 < line_offsets.len() {
line_offsets[line_idx + 1] - 1 } else {
buf.len()
};
if let Ok(line_str) = std::str::from_utf8(&buf[offset..line_end]) {
let trimmed = line_str.trim_end();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
let matches = trimmed == heading_trimmed
|| (trimmed.starts_with(heading_trimmed)
&& trimmed[heading_trimmed.len()..]
.chars()
.next()
.is_none_or(|c| matches!(c, ' ' | '\t' | '{' | '#')));
if matches {
found_line = Some(line_idx + 1); break;
}
}
}
let start_line = found_line?;
in_code_block = false;
let start_idx = start_line - 1;
for (line_idx, &offset) in line_offsets.iter().enumerate().skip(start_idx + 1) {
let line_end = if line_idx + 1 < line_offsets.len() {
line_offsets[line_idx + 1] - 1
} else {
buf.len()
};
if let Ok(line_str) = std::str::from_utf8(&buf[offset..line_end]) {
let trimmed = line_str.trim_end();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if trimmed.starts_with('#') {
let level = trimmed.chars().take_while(|&c| c == '#').count();
if level <= heading_level {
return Some((start_line, line_idx));
}
}
}
}
Some((start_line, total_lines))
}
fn suggest_headings(buf: &[u8], query: &str, top_n: usize) -> Vec<String> {
let q = query.trim_end();
let q_text = q.trim_start_matches('#').trim();
if q_text.is_empty() {
return Vec::new();
}
let mut in_code_block = false;
let mut scored: Vec<(usize, String)> = Vec::new();
for line in buf.split(|&b| b == b'\n') {
let Ok(s) = std::str::from_utf8(line) else {
continue;
};
let trimmed = s.trim_end();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block || !trimmed.starts_with('#') {
continue;
}
let h_text = trimmed.trim_start_matches('#').trim();
if h_text.is_empty() {
continue;
}
let h_clean = h_text
.split('{')
.next()
.unwrap_or(h_text)
.trim_end_matches('#')
.trim();
let dist = edit_distance(&q_text.to_ascii_lowercase(), &h_clean.to_ascii_lowercase());
scored.push((dist, trimmed.to_string()));
}
scored.sort_by_key(|(d, _)| *d);
scored.into_iter().take(top_n).map(|(_, h)| h).collect()
}
fn read_section(path: &Path, range: &str, _cache: &OutlineCache) -> Result<String, SrcwalkError> {
let file = fs::File::open(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let mmap = unsafe { Mmap::map(&file) }.map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let buf = &mmap[..];
let (start, end) = if range.starts_with('#') {
resolve_heading(buf, range).ok_or_else(|| {
let suggestions = suggest_headings(buf, range, 5);
let reason = if suggestions.is_empty() {
"heading not found in file".to_string()
} else {
format!(
"heading not found in file. Closest matches:\n {}",
suggestions.join("\n ")
)
};
SrcwalkError::InvalidQuery {
query: range.to_string(),
reason,
}
})?
} else if let Some(r) = parse_range(range) {
r
} else if let Some(r) = resolve_symbol(buf, path, range) {
r
} else {
return Err(SrcwalkError::InvalidQuery {
query: range.to_string(),
reason:
"not a valid line range (e.g. \"45-89\"), heading (e.g. \"## Foo\"), or symbol name in this file"
.to_string(),
});
};
let mut line_offsets: Vec<usize> = vec![0];
for pos in memchr::memchr_iter(b'\n', buf) {
line_offsets.push(pos + 1);
}
let total = line_offsets.len();
let s = (start.saturating_sub(1)).min(total);
let e = end.min(total);
if s >= e {
return Err(SrcwalkError::InvalidQuery {
query: range.to_string(),
reason: format!("range out of bounds (file has {total} lines)"),
});
}
let start_byte = line_offsets[s];
let end_byte = if e < line_offsets.len() {
line_offsets[e]
} else {
buf.len()
};
let selected = String::from_utf8_lossy(&buf[start_byte..end_byte]);
let byte_len = selected.len() as u64;
let line_count = (e - s) as u32;
let tok_est = estimate_tokens(byte_len);
let limit = section_token_limit();
if tok_est > limit {
let file_type = detect_file_type(path);
let content = String::from_utf8_lossy(buf);
let header = format::file_header(path, byte_len, line_count, ViewMode::SectionOutline);
let start32 = start as u32;
let end32 = end as u32;
if let crate::types::FileType::Code(lang) = file_type {
let entries = lang_get_outline_entries(&content, lang);
let filtered = filter_entries_in_range(&entries, start32, end32);
if !filtered.is_empty() {
let body = format_section_outline(&filtered);
return Ok(format!(
"{header}\n\n{body}\n\n\
> Section spans ~{tok_est} tokens (limit {limit}). Showing outline of {start}-{end}.\n\
> Drill: `--section <fn-name>` for a specific symbol."
));
}
}
return Ok(format!(
"{header}\n\n\
> Section spans ~{tok_est} tokens (limit {limit}).\n\
> Drill: `--section <fn-name>` for a specific symbol, or use a narrower line range."
));
}
let header = format::file_header(path, byte_len, line_count, ViewMode::Section);
let formatted = format::number_lines(&selected, start as u32);
Ok(format!("{header}\n\n{formatted}"))
}
fn filter_entries_in_range(
entries: &[OutlineEntry],
range_start: u32,
range_end: u32,
) -> Vec<&OutlineEntry> {
let mut out = Vec::new();
for e in entries {
if !e.children.is_empty() && (e.start_line < range_start || e.end_line > range_end) {
for c in &e.children {
if c.start_line <= range_end && c.end_line >= range_start {
out.push(c);
}
}
} else if e.start_line <= range_end && e.end_line >= range_start {
out.push(e);
}
}
out
}
fn format_section_outline(entries: &[&OutlineEntry]) -> String {
let mut lines = Vec::new();
for e in entries {
let range = if e.start_line == e.end_line {
format!("[{}]", e.start_line)
} else {
format!("[{}-{}]", e.start_line, e.end_line)
};
let sig = e.signature.as_deref().unwrap_or(&e.name);
lines.push(format!(" {range:>14} {sig}"));
for c in &e.children {
let cr = if c.start_line == c.end_line {
format!("[{}]", c.start_line)
} else {
format!("[{}-{}]", c.start_line, c.end_line)
};
let csig = c.signature.as_deref().unwrap_or(&c.name);
lines.push(format!(" {cr:>12} {csig}"));
}
}
lines.join("\n")
}
fn parse_range(s: &str) -> Option<(usize, usize)> {
let (a, b) = s.split_once('-')?;
let start: usize = a.trim().parse().ok()?;
let end: usize = b.trim().parse().ok()?;
if start == 0 || end < start {
return None;
}
Some((start, end))
}
fn resolve_symbol(buf: &[u8], path: &Path, symbol: &str) -> Option<(usize, usize)> {
let content = std::str::from_utf8(buf).ok()?;
let FileType::Code(lang) = detect_file_type(path) else {
return None;
};
let entries = lang_get_outline_entries(content, lang);
find_symbol_in_entries(&entries, symbol)
}
fn find_symbol_in_entries(entries: &[OutlineEntry], symbol: &str) -> Option<(usize, usize)> {
for entry in entries {
if entry.name == symbol {
return Some((entry.start_line as usize, entry.end_line as usize));
}
if let Some(range) = find_symbol_in_entries(&entry.children, symbol) {
return Some(range);
}
}
None
}
fn list_directory(path: &Path) -> Result<String, SrcwalkError> {
let mut entries: Vec<String> = Vec::new();
let read_dir = fs::read_dir(path).map_err(|e| SrcwalkError::IoError {
path: path.to_path_buf(),
source: e,
})?;
let mut items: Vec<_> = read_dir.filter_map(std::result::Result::ok).collect();
items.sort_by_key(std::fs::DirEntry::file_name);
for entry in &items {
let ft = entry.file_type().ok();
let name = entry.file_name();
let name = name.to_string_lossy();
let meta = entry.metadata().ok();
let suffix = match ft {
Some(t) if t.is_dir() => "/".to_string(),
Some(t) if t.is_symlink() => " →".to_string(),
_ => match meta {
Some(m) => {
let tokens = estimate_tokens(m.len());
format!(" ({tokens} tokens)")
}
None => String::new(),
},
};
entries.push(format!(" {name}{suffix}"));
}
let header = format!("# {} ({} items)", path.display(), items.len());
Ok(format!("{header}\n\n{}", entries.join("\n")))
}
pub fn suggest_similar_file(scope: &Path, query: &str) -> Option<String> {
let resolved = scope.join(query);
suggest_similar(&resolved)
}
fn suggest_similar(path: &Path) -> Option<String> {
let parent = path.parent()?;
let name = path.file_name()?.to_str()?;
let entries = fs::read_dir(parent).ok()?;
let mut best: Option<(usize, String)> = None;
for entry in entries.flatten() {
let candidate = entry.file_name();
let candidate = candidate.to_string_lossy();
let dist = edit_distance(name, &candidate);
if dist <= 3 {
match &best {
Some((d, _)) if dist < *d => best = Some((dist, candidate.into_owned())),
None => best = Some((dist, candidate.into_owned())),
_ => {}
}
}
}
best.map(|(_, name)| name)
}
pub(crate) fn edit_distance(a: &str, b: &str) -> usize {
let a = a.as_bytes();
let b = b.as_bytes();
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr = vec![0; b.len() + 1];
for (i, &ca) in a.iter().enumerate() {
curr[0] = i + 1;
for (j, &cb) in b.iter().enumerate() {
let cost = usize::from(ca != cb);
curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b.len()]
}
fn mime_from_ext(path: &Path) -> &'static str {
match path.extension().and_then(|e| e.to_str()) {
Some("png") => "image/png",
Some("jpg" | "jpeg") => "image/jpeg",
Some("gif") => "image/gif",
Some("svg") => "image/svg+xml",
Some("webp") => "image/webp",
Some("ico") => "image/x-icon",
Some("pdf") => "application/pdf",
Some("zip") => "application/zip",
Some("gz" | "tgz") => "application/gzip",
Some("tar") => "application/x-tar",
Some("wasm") => "application/wasm",
Some("woff" | "woff2") => "font/woff2",
Some("ttf" | "otf") => "font/ttf",
Some("mp3") => "audio/mpeg",
Some("mp4") => "video/mp4",
_ => "application/octet-stream",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn heading_found() {
let input = b"# Title\nSome content\n## Section\nSection content\n";
let result = resolve_heading(input, "## Section");
assert_eq!(result, Some((3, 4)));
}
#[test]
fn heading_not_found() {
let input = b"# Title\nContent\n";
let result = resolve_heading(input, "## Missing");
assert_eq!(result, None);
}
#[test]
fn heading_in_code_block() {
let input = b"# Real\n```\n## Fake\n```\n";
let result = resolve_heading(input, "## Fake");
assert_eq!(result, None);
}
#[test]
fn duplicate_headings() {
let input = b"## First\ntext\n## First\ntext\n";
let result = resolve_heading(input, "## First");
assert_eq!(result, Some((1, 2)));
}
#[test]
fn last_heading_to_eof() {
let input = b"# Start\ntext\n## End\nfinal line\n";
let result = resolve_heading(input, "## End");
assert_eq!(result, Some((3, 4)));
}
#[test]
fn nested_sections() {
let input = b"## A\ncontent\n### B\nmore\n## C\ntext\n";
let result = resolve_heading(input, "## A");
assert_eq!(result, Some((1, 4)));
}
#[test]
fn no_hashes() {
let input = b"# Heading\ntext\n";
assert_eq!(resolve_heading(input, ""), None);
assert_eq!(resolve_heading(input, "hello"), None);
}
#[test]
fn full_true_size_cap_returns_outline() {
use std::io::Write;
let path = std::env::temp_dir().join("srcwalk_test_large.rs");
let mut f = std::fs::File::create(&path).unwrap();
for i in 0..20 {
writeln!(f, "pub fn func_{i}() {{ println!(\"hello\"); }}").unwrap();
}
drop(f);
std::env::set_var("SRCWALK_FULL_SIZE_CAP", "100");
let cache = OutlineCache::new();
let result = read_file(&path, None, true, &cache).unwrap();
assert!(
result.contains("full=true capped"),
"expected size cap warning, got: {result}"
);
assert!(
result.contains("func_0"),
"expected head/outline content in output"
);
std::env::remove_var("SRCWALK_FULL_SIZE_CAP");
let _ = std::fs::remove_file(&path);
}
#[test]
fn budget_cascade_full_to_outline() {
let mut body = String::from("<?php\nclass Big {\n");
for i in 0..120 {
body.push_str(&format!(
" public function method_{i}() {{\n $x = {i}; // padding line {i}\n return $x * 2;\n }}\n"
));
}
body.push_str("}\n");
let path = std::env::temp_dir().join("srcwalk_p11_cascade.php");
std::fs::write(&path, body.as_bytes()).unwrap();
let cache = OutlineCache::new();
let out = read_file_with_budget(&path, None, true, Some(800), &cache).unwrap();
let tokens = estimate_tokens(out.len() as u64);
assert!(tokens <= 800, "cascade overshot budget: {tokens} tokens");
assert!(
out.contains("[outline (full requested, over budget)]") || out.contains("[signatures"),
"expected cascade header label, got: {}",
&out[..out.len().min(200)]
);
assert!(out.contains("exceeded budget"), "missing cascade note");
let _ = std::fs::remove_file(&path);
}
#[test]
fn budget_cascade_passthrough_when_fits() {
let path = std::env::temp_dir().join("srcwalk_p11_tiny.php");
std::fs::write(&path, b"<?php\nclass Tiny { public function f() {} }\n").unwrap();
let cache = OutlineCache::new();
let out = read_file_with_budget(&path, None, true, Some(2000), &cache).unwrap();
assert!(
out.contains("[full]"),
"expected [full] label, got header in: {out}"
);
assert!(
!out.contains("exceeded budget"),
"no cascade note for fitting file"
);
let _ = std::fs::remove_file(&path);
}
}