1use std::collections::BTreeMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::time::{SystemTime, UNIX_EPOCH};
21
22use anyhow::{Context, Result, bail};
23use serde::{Deserialize, Serialize};
24
25use crate::index::dense::DenseIndex;
26use crate::index::sparse::Bm25Index;
27use crate::symbols::Symbol;
28use crate::types::Chunk;
29
30pub const INDEX_DIR_NAME: &str = ".veles";
32
33pub const FORMAT_VERSION: u32 = 2;
36
37const MANIFEST_FILE: &str = "manifest.json";
38const CHUNKS_FILE: &str = "chunks.bin";
39const BM25_FILE: &str = "bm25.bin";
40const DENSE_FILE: &str = "dense.bin";
41const SYMBOLS_FILE: &str = "symbols.bin";
42
43#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
48pub struct FileFingerprint {
49 pub size: u64,
51 pub mtime_secs: i64,
53 pub chunk_count: usize,
55}
56
57impl FileFingerprint {
58 pub fn from_path(path: &Path, chunk_count: usize) -> Result<Self> {
61 let meta = fs::metadata(path)
62 .with_context(|| format!("stat {}", path.display()))?;
63 let mtime = meta.modified().unwrap_or(UNIX_EPOCH);
64 let mtime_secs = mtime
65 .duration_since(UNIX_EPOCH)
66 .map(|d| d.as_secs() as i64)
67 .unwrap_or(0);
68 Ok(Self {
69 size: meta.len(),
70 mtime_secs,
71 chunk_count,
72 })
73 }
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct Manifest {
82 pub veles_version: String,
84 pub format_version: u32,
87 pub model_name: String,
90 pub embedding_dim: usize,
92 pub include_text_files: bool,
95 pub indexed_at: i64,
97 pub files: BTreeMap<String, FileFingerprint>,
99 pub total_chunks: usize,
101}
102
103impl Manifest {
104 pub fn new(
105 model_name: &str,
106 embedding_dim: usize,
107 include_text_files: bool,
108 ) -> Self {
109 Self {
110 veles_version: env!("CARGO_PKG_VERSION").to_string(),
111 format_version: FORMAT_VERSION,
112 model_name: model_name.to_string(),
113 embedding_dim,
114 include_text_files,
115 indexed_at: now_secs(),
116 files: BTreeMap::new(),
117 total_chunks: 0,
118 }
119 }
120
121 pub fn touch(&mut self) {
122 self.indexed_at = now_secs();
123 }
124}
125
126fn now_secs() -> i64 {
127 SystemTime::now()
128 .duration_since(UNIX_EPOCH)
129 .map(|d| d.as_secs() as i64)
130 .unwrap_or(0)
131}
132
133pub fn index_dir_for(repo_root: &Path) -> PathBuf {
135 repo_root.join(INDEX_DIR_NAME)
136}
137
138pub fn index_exists(repo_root: &Path) -> bool {
140 let dir = index_dir_for(repo_root);
141 dir.join(MANIFEST_FILE).is_file()
142 && dir.join(CHUNKS_FILE).is_file()
143 && dir.join(BM25_FILE).is_file()
144 && dir.join(DENSE_FILE).is_file()
145}
146
147pub struct PersistedIndex {
150 pub manifest: Manifest,
151 pub chunks: Vec<Chunk>,
152 pub bm25: Bm25Index,
153 pub dense: DenseIndex,
154 pub symbols: Vec<Symbol>,
155}
156
157pub fn save(
159 repo_root: &Path,
160 manifest: &Manifest,
161 chunks: &[Chunk],
162 bm25: &Bm25Index,
163 dense: &DenseIndex,
164 symbols: &[Symbol],
165) -> Result<()> {
166 let dir = index_dir_for(repo_root);
167 fs::create_dir_all(&dir)
168 .with_context(|| format!("create index dir {}", dir.display()))?;
169
170 write_json(&dir.join(MANIFEST_FILE), manifest)?;
171 write_bincode(&dir.join(CHUNKS_FILE), &chunks.to_vec())?;
172 write_bincode(&dir.join(BM25_FILE), bm25)?;
173 write_bincode(&dir.join(DENSE_FILE), dense)?;
174 write_bincode(&dir.join(SYMBOLS_FILE), &symbols.to_vec())?;
175 Ok(())
176}
177
178pub fn load(repo_root: &Path) -> Result<PersistedIndex> {
180 let dir = index_dir_for(repo_root);
181 if !dir.is_dir() {
182 bail!("No index found at {}", dir.display());
183 }
184
185 let manifest: Manifest = read_json(&dir.join(MANIFEST_FILE))?;
186 if manifest.format_version != FORMAT_VERSION {
187 bail!(
188 "Index format version {} is incompatible (expected {}). Run `veles index --force` to rebuild.",
189 manifest.format_version,
190 FORMAT_VERSION
191 );
192 }
193 let chunks: Vec<Chunk> = read_bincode(&dir.join(CHUNKS_FILE))?;
194 let bm25: Bm25Index = read_bincode(&dir.join(BM25_FILE))?;
195 let dense: DenseIndex = read_bincode(&dir.join(DENSE_FILE))?;
196 let symbols: Vec<Symbol> = if dir.join(SYMBOLS_FILE).is_file() {
198 read_bincode(&dir.join(SYMBOLS_FILE))?
199 } else {
200 Vec::new()
201 };
202
203 Ok(PersistedIndex {
204 manifest,
205 chunks,
206 bm25,
207 dense,
208 symbols,
209 })
210}
211
212pub fn load_manifest(repo_root: &Path) -> Result<Manifest> {
214 let dir = index_dir_for(repo_root);
215 read_json(&dir.join(MANIFEST_FILE))
216}
217
218pub fn clean(repo_root: &Path) -> Result<bool> {
220 let dir = index_dir_for(repo_root);
221 if dir.is_dir() {
222 fs::remove_dir_all(&dir)
223 .with_context(|| format!("remove {}", dir.display()))?;
224 return Ok(true);
225 }
226 Ok(false)
227}
228
229fn write_json<T: Serialize>(path: &Path, value: &T) -> Result<()> {
230 let f = fs::File::create(path)
231 .with_context(|| format!("create {}", path.display()))?;
232 serde_json::to_writer_pretty(f, value)
233 .with_context(|| format!("write {}", path.display()))?;
234 Ok(())
235}
236
237fn read_json<T: for<'de> Deserialize<'de>>(path: &Path) -> Result<T> {
238 let f = fs::File::open(path)
239 .with_context(|| format!("open {}", path.display()))?;
240 let value = serde_json::from_reader(std::io::BufReader::new(f))
241 .with_context(|| format!("parse {}", path.display()))?;
242 Ok(value)
243}
244
245fn write_bincode<T: Serialize>(path: &Path, value: &T) -> Result<()> {
246 let f = fs::File::create(path)
247 .with_context(|| format!("create {}", path.display()))?;
248 let mut w = std::io::BufWriter::new(f);
249 bincode::serialize_into(&mut w, value)
250 .with_context(|| format!("encode {}", path.display()))?;
251 Ok(())
252}
253
254fn read_bincode<T: for<'de> Deserialize<'de>>(path: &Path) -> Result<T> {
255 let f = fs::File::open(path)
256 .with_context(|| format!("open {}", path.display()))?;
257 let r = std::io::BufReader::new(f);
258 let value = bincode::deserialize_from(r)
259 .with_context(|| format!("decode {}", path.display()))?;
260 Ok(value)
261}
262
263#[derive(Debug, Default, Clone)]
266pub struct UpdateReport {
267 pub added_files: usize,
269 pub modified_files: usize,
271 pub removed_files: usize,
273 pub kept_chunks: usize,
275 pub new_chunks: usize,
277 pub total_chunks: usize,
279}
280
281impl UpdateReport {
282 pub fn is_noop(&self) -> bool {
284 self.added_files == 0 && self.modified_files == 0 && self.removed_files == 0
285 }
286}
287
288#[cfg(test)]
289mod tests {
290 use super::*;
291
292 #[test]
293 fn manifest_roundtrip_via_json() {
294 let mut m = Manifest::new("test-model", 64, false);
295 m.files.insert(
296 "src/lib.rs".to_string(),
297 FileFingerprint {
298 size: 100,
299 mtime_secs: 1_000_000,
300 chunk_count: 2,
301 },
302 );
303 m.total_chunks = 2;
304
305 let s = serde_json::to_string(&m).unwrap();
306 let m2: Manifest = serde_json::from_str(&s).unwrap();
307 assert_eq!(m2.model_name, "test-model");
308 assert_eq!(m2.embedding_dim, 64);
309 assert_eq!(m2.files.len(), 1);
310 assert_eq!(m2.files["src/lib.rs"].size, 100);
311 }
312}