use std::path::Path;
use std::sync::Arc;
use synwire_chunker::{ChunkOptions, Chunker};
use synwire_core::embeddings::Embeddings;
use synwire_core::vectorstores::VectorStore;
use synwire_core::vfs::IndexOptions;
use tracing::{debug, warn};
use crate::hashes::{self, HashRegistry};
pub async fn run(
root: &Path,
opts: &IndexOptions,
embeddings: &Arc<dyn Embeddings>,
store: &Arc<dyn VectorStore>,
chunk_size: usize,
chunk_overlap: usize,
hash_registry: &mut HashRegistry,
) -> Result<(usize, usize), Box<dyn std::error::Error + Send + Sync>> {
let files = crate::walker::walk(root, opts);
let mut files_indexed = 0usize;
let mut chunks_produced = 0usize;
let chunker = Chunker::with_options({
let mut opts = ChunkOptions::default();
opts.chunk_size = chunk_size;
opts.overlap = chunk_overlap;
opts
});
for file_path in &files {
let content = match std::fs::read_to_string(file_path) {
Ok(c) => c,
Err(e) => {
warn!("Skipping {}: {e}", file_path.display());
continue;
}
};
let path_str = file_path.to_string_lossy().to_string();
let new_hash = hashes::xxh128_hex(content.as_bytes());
if let Some(old_hash) = hash_registry.files.get(&path_str)
&& *old_hash == new_hash
{
debug!("Skipping {} (unchanged, xxh128 match)", file_path.display());
continue;
}
let chunks = chunker.chunk_file(&path_str, &content);
if chunks.is_empty() {
let _ = hash_registry.files.insert(path_str, new_hash);
continue;
}
chunks_produced += chunks.len();
files_indexed += 1;
debug!("Indexing {} ({} chunks)", file_path.display(), chunks.len());
match store.add_documents(&chunks, embeddings.as_ref()).await {
Ok(_ids) => {
let _ = hash_registry.files.insert(path_str, new_hash);
}
Err(e) => warn!("Failed to index {}: {e}", file_path.display()),
}
}
Ok((files_indexed, chunks_produced))
}