use std::fs;
use std::path::{Path, PathBuf};
use crate::types::CodeChunk;
const WINDOW_LINES: usize = 40;
const STEP_LINES: usize = 20;
pub(crate) fn chunks_from_file(path: &Path) -> anyhow::Result<Vec<CodeChunk>> {
let content = fs::read_to_string(path)?;
let lines: Vec<&str> = content.lines().collect();
let file_str = path.to_string_lossy().to_string();
if lines.is_empty() {
return Ok(Vec::new());
}
let mut out = Vec::new();
let mut start: usize = 0;
while start < lines.len() {
let end = (start + WINDOW_LINES).min(lines.len());
let start_line = start + 1;
let end_line = end;
let body = lines[start..end].join("\n");
out.push(CodeChunk {
id: format!("{file_str}:{start_line}:{end_line}"),
file: file_str.clone(),
start_line,
end_line,
content: body,
function_name: None,
score: 0.0,
compact_snippet: None,
match_reason: String::new(),
});
if end == lines.len() {
break;
}
start += STEP_LINES;
}
Ok(out)
}
pub(crate) fn chunks_from_dir(dir: &Path, ext: &str) -> anyhow::Result<Vec<CodeChunk>> {
let mut stack: Vec<PathBuf> = vec![dir.to_path_buf()];
let mut out: Vec<CodeChunk> = Vec::new();
while let Some(current) = stack.pop() {
let entries = match fs::read_dir(¤t) {
Ok(it) => it,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
let file_type = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if file_type.is_dir() {
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if matches!(name, "target" | ".git" | "node_modules") {
continue;
}
}
stack.push(path);
} else if file_type.is_file() {
let matches = path.to_str().map(|s| s.ends_with(ext)).unwrap_or(false);
if matches {
let chunks = chunks_from_file(&path)?;
out.extend(chunks);
}
}
}
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn chunks_from_file_produces_windows() {
let tmp = TempDir::new().unwrap();
let p = tmp.path().join("a.rs");
let mut body = String::new();
for i in 0..100 {
body.push_str(&format!("let x{i} = {i};\n"));
}
let mut f = fs::File::create(&p).unwrap();
f.write_all(body.as_bytes()).unwrap();
let chunks = chunks_from_file(&p).unwrap();
assert!(!chunks.is_empty());
assert_eq!(chunks[0].start_line, 1);
assert!(chunks[0].end_line >= 40);
let ids: std::collections::HashSet<_> = chunks.iter().map(|c| c.id.clone()).collect();
assert_eq!(ids.len(), chunks.len());
}
#[test]
fn chunks_from_dir_collects_rs_files() {
let tmp = TempDir::new().unwrap();
let sub = tmp.path().join("sub");
fs::create_dir(&sub).unwrap();
fs::write(tmp.path().join("a.rs"), "fn a() {}\n").unwrap();
fs::write(sub.join("b.rs"), "fn b() {}\n").unwrap();
fs::write(tmp.path().join("c.txt"), "not rust\n").unwrap();
let chunks = chunks_from_dir(tmp.path(), ".rs").unwrap();
let files: std::collections::HashSet<_> = chunks.iter().map(|c| c.file.clone()).collect();
assert_eq!(files.len(), 2, "expected 2 rust files, got {files:?}");
}
}