use super::lightweight::*;
use crate::parsers::parse_file;
use anyhow::Result;
use std::path::{Path, PathBuf};
pub fn parse_file_lightweight(path: &Path) -> Result<LightweightFileInfo> {
let language = Language::from_path(path);
let result = parse_file(path)?;
let loc = crate::cache::global_cache()
.get_lines(path)
.map(|lines| lines.len() as u32)
.unwrap_or_else(|| {
result
.functions
.iter()
.map(|f| f.line_end)
.chain(result.classes.iter().map(|c| c.line_end))
.max()
.unwrap_or(1)
});
let info = LightweightFileInfo::from_parse_result(&result, path.to_path_buf(), language, loc);
Ok(info)
}
pub fn parse_files_streaming<'a>(
files: &'a [PathBuf],
) -> impl Iterator<Item = Result<LightweightFileInfo>> + 'a {
files.iter().map(|path| parse_file_lightweight(path))
}
pub fn parse_files_sequential_collect(
files: &[PathBuf],
progress: Option<&dyn Fn(usize, usize)>,
) -> (Vec<LightweightFileInfo>, LightweightParseStats) {
let total = files.len();
let mut results = Vec::with_capacity(total);
let mut stats = LightweightParseStats {
total_files: total,
..Default::default()
};
for (idx, path) in files.iter().enumerate() {
if let Some(cb) = progress {
if idx % 100 == 0 || idx == total - 1 {
cb(idx, total);
}
}
match parse_file_lightweight(path) {
Ok(info) => {
stats.add_file(&info);
results.push(info);
}
Err(e) => {
stats.parse_errors += 1;
tracing::warn!("Failed to parse {}: {}", path.display(), e);
}
}
}
(results, stats)
}
pub fn parse_files_parallel_streaming(
files: &[PathBuf],
batch_size: usize,
progress: Option<&(dyn Fn(usize, usize) + Sync)>,
) -> (Vec<LightweightFileInfo>, LightweightParseStats) {
use rayon::prelude::*;
use std::sync::atomic::{AtomicUsize, Ordering};
let total = files.len();
let mut all_results = Vec::with_capacity(total);
let mut stats = LightweightParseStats {
total_files: total,
..Default::default()
};
let counter = AtomicUsize::new(0);
let errors = AtomicUsize::new(0);
for chunk in files.chunks(batch_size) {
let batch_results: Vec<Option<LightweightFileInfo>> = chunk
.par_iter()
.map(|path| {
let count = counter.fetch_add(1, Ordering::Relaxed);
if let Some(cb) = progress {
if count.is_multiple_of(200) {
cb(count, total);
}
}
match parse_file_lightweight(path) {
Ok(info) => Some(info),
Err(e) => {
errors.fetch_add(1, Ordering::Relaxed);
tracing::warn!("Failed to parse {}: {}", path.display(), e);
None
}
}
})
.collect();
for info in batch_results.into_iter().flatten() {
stats.add_file(&info);
all_results.push(info);
}
}
stats.parse_errors = errors.load(Ordering::Relaxed);
(all_results, stats)
}
pub fn stream_parse_with_callback<F>(
files: &[PathBuf],
mut on_file: F,
progress: Option<&dyn Fn(usize, usize)>,
) -> LightweightParseStats
where
F: FnMut(LightweightFileInfo) -> Result<()>,
{
let total = files.len();
let mut stats = LightweightParseStats {
total_files: total,
..Default::default()
};
for (idx, path) in files.iter().enumerate() {
if let Some(cb) = progress {
if idx % 100 == 0 || idx == total - 1 {
cb(idx, total);
}
}
match parse_file_lightweight(path) {
Ok(info) => {
stats.add_file(&info);
if let Err(e) = on_file(info) {
tracing::warn!("Callback error for {}: {}", path.display(), e);
}
}
Err(e) => {
stats.parse_errors += 1;
tracing::warn!("Failed to parse {}: {}", path.display(), e);
}
}
}
stats
}
fn count_lines(path: &Path) -> Result<u32> {
let content = std::fs::read_to_string(path)?;
Ok(content.lines().count() as u32)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_parse_file_lightweight() {
let mut file = NamedTempFile::with_suffix(".py").unwrap();
writeln!(
file,
"def hello(name):\n print(f'Hello {{name}}')\n\ndef world():\n pass"
)
.unwrap();
let result = parse_file_lightweight(file.path());
assert!(result.is_ok());
let info = result.unwrap();
assert_eq!(info.language, Language::Python);
assert!(!info.functions.is_empty());
}
#[test]
fn test_streaming_iterator() {
let mut file = NamedTempFile::with_suffix(".py").unwrap();
writeln!(file, "x = 1").unwrap();
let files = vec![file.path().to_path_buf()];
let mut results: Vec<_> = parse_files_streaming(&files).collect();
assert_eq!(results.len(), 1);
assert!(results.pop().unwrap().is_ok());
}
#[test]
fn test_callback_streaming() {
let mut file = NamedTempFile::with_suffix(".py").unwrap();
writeln!(file, "def test(): pass").unwrap();
let files = vec![file.path().to_path_buf()];
let mut count = 0;
let stats = stream_parse_with_callback(
&files,
|_info| {
count += 1;
Ok(())
},
None,
);
assert_eq!(count, 1);
assert_eq!(stats.parsed_files, 1);
}
}