1use open_kioku_core::{
2 AnalysisFact, CodeChunk, EvidenceSourceType, File, FileId, FileProvenance, GitCochangeEdge,
3 GitCommitRecord, GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, HistorySnapshot,
4 HistorySummary, ImpactReport, Import, IndexManifest, SearchResult, Symbol, SymbolId,
5 SymbolOccurrence, SymbolProvenance, TestTarget,
6};
7use open_kioku_errors::{OkError, Result};
8use std::path::Path;
9
10pub trait MetadataStore: Send + Sync {
11 fn initialize(&self) -> Result<()>;
12 fn put_manifest(&self, manifest: &IndexManifest) -> Result<()>;
13 fn manifest(&self) -> Result<Option<IndexManifest>>;
14 fn replace_index(&self, data: IndexData<'_>) -> Result<()>;
15 fn replace_files_index(&self, _update: PartialIndexUpdate<'_>) -> Result<()> {
16 Err(OkError::Unsupported(
17 "partial index replacement is not implemented by this metadata store".into(),
18 ))
19 }
20 fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>>;
21 fn get_file_by_path(&self, path: &Path) -> Result<Option<File>>;
22 fn list_symbols(&self, query: Option<&str>, limit: usize, offset: usize)
23 -> Result<Vec<Symbol>>;
24 fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>>;
25 fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>>;
26 fn all_chunks(&self) -> Result<Vec<CodeChunk>>;
27 fn tests(&self) -> Result<Vec<TestTarget>>;
28 fn imports(&self) -> Result<Vec<Import>>;
29 fn analysis_facts(
30 &self,
31 _source_type: Option<EvidenceSourceType>,
32 _limit: usize,
33 ) -> Result<Vec<AnalysisFact>> {
34 Ok(Vec::new())
35 }
36 fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>>;
37 fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>>;
38 fn symbols_for_file(&self, _file_id: &FileId) -> Result<Vec<Symbol>> {
39 Ok(Vec::new())
40 }
41 fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
42 let chunks = self.all_chunks()?;
43 let mut results = Vec::new();
44 for chunk in chunks {
45 if chunk.text.contains(query) {
46 results.push(chunk);
47 if results.len() >= limit {
48 break;
49 }
50 }
51 }
52 Ok(results)
53 }
54 fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
55 let files = self.list_files(usize::MAX, 0)?;
56 let lower_pattern = pattern.to_ascii_lowercase();
57 Ok(files
58 .into_iter()
59 .filter(|f| {
60 f.path
61 .to_string_lossy()
62 .to_ascii_lowercase()
63 .contains(&lower_pattern)
64 })
65 .collect())
66 }
67 fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
68 let tests = self.tests()?;
69 let set = file_ids.iter().collect::<std::collections::HashSet<_>>();
70 Ok(tests
71 .into_iter()
72 .filter(|t| set.contains(&t.file_id))
73 .collect())
74 }
75}
76
77pub struct IndexData<'a> {
78 pub manifest: &'a IndexManifest,
79 pub files: &'a [File],
80 pub symbols: &'a [Symbol],
81 pub chunks: &'a [CodeChunk],
82 pub tests: &'a [TestTarget],
83 pub imports: &'a [Import],
84 pub occurrences: &'a [SymbolOccurrence],
85 pub analysis_facts: &'a [AnalysisFact],
86}
87
88pub struct PartialIndexUpdate<'a> {
89 pub manifest: &'a IndexManifest,
90 pub changed_files: &'a [File],
91 pub deleted_file_ids: &'a [FileId],
92 pub symbols: &'a [Symbol],
93 pub chunks: &'a [CodeChunk],
94 pub tests: &'a [TestTarget],
95 pub imports: &'a [Import],
96 pub occurrences: &'a [SymbolOccurrence],
97 pub analysis_facts: &'a [AnalysisFact],
98 pub graph_nodes: &'a [GraphNode],
99 pub graph_edges: &'a [GraphEdge],
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum IndexChangeKind {
104 Unchanged,
105 Modified,
106 Added,
107 Deleted,
108 Renamed,
109 ModeSkipped,
110 ParserVersionStale,
111 SchemaVersionStale,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct IndexChange {
116 pub old_path: Option<std::path::PathBuf>,
117 pub new_path: Option<std::path::PathBuf>,
118 pub file_id: Option<FileId>,
119 pub kind: IndexChangeKind,
120}
121
122pub fn classify_file_changes(
123 previous_manifest: Option<&IndexManifest>,
124 next_manifest: &IndexManifest,
125 previous_files: &[File],
126 next_files: &[File],
127) -> Vec<IndexChange> {
128 classify_file_changes_with_parser_version(
129 previous_manifest,
130 next_manifest,
131 previous_files,
132 next_files,
133 None,
134 None,
135 )
136}
137
138pub fn classify_file_changes_with_parser_version(
139 previous_manifest: Option<&IndexManifest>,
140 next_manifest: &IndexManifest,
141 previous_files: &[File],
142 next_files: &[File],
143 previous_parser_version: Option<&str>,
144 next_parser_version: Option<&str>,
145) -> Vec<IndexChange> {
146 if previous_manifest
147 .is_some_and(|manifest| manifest.schema_version != next_manifest.schema_version)
148 {
149 return next_files
150 .iter()
151 .map(|file| IndexChange {
152 old_path: Some(file.path.clone()),
153 new_path: Some(file.path.clone()),
154 file_id: Some(file.id.clone()),
155 kind: IndexChangeKind::SchemaVersionStale,
156 })
157 .collect();
158 }
159 if previous_parser_version
160 .zip(next_parser_version)
161 .is_some_and(|(previous, next)| previous != next)
162 {
163 return next_files
164 .iter()
165 .map(|file| IndexChange {
166 old_path: Some(file.path.clone()),
167 new_path: Some(file.path.clone()),
168 file_id: Some(file.id.clone()),
169 kind: IndexChangeKind::ParserVersionStale,
170 })
171 .collect();
172 }
173 if previous_manifest.is_some_and(|manifest| manifest.index_mode != next_manifest.index_mode) {
174 return next_files
175 .iter()
176 .map(|file| IndexChange {
177 old_path: Some(file.path.clone()),
178 new_path: Some(file.path.clone()),
179 file_id: Some(file.id.clone()),
180 kind: IndexChangeKind::ModeSkipped,
181 })
182 .collect();
183 }
184
185 let previous_by_id = previous_files
186 .iter()
187 .map(|file| (&file.id, file))
188 .collect::<std::collections::BTreeMap<_, _>>();
189 let next_by_id = next_files
190 .iter()
191 .map(|file| (&file.id, file))
192 .collect::<std::collections::BTreeMap<_, _>>();
193 let mut changes = Vec::new();
194 for file in next_files {
195 let kind = match previous_by_id.get(&file.id) {
196 None => IndexChangeKind::Added,
197 Some(previous) if previous.path != file.path => IndexChangeKind::Renamed,
198 Some(previous) if previous.content_hash != file.content_hash => {
199 IndexChangeKind::Modified
200 }
201 Some(_) => IndexChangeKind::Unchanged,
202 };
203 let old_path = previous_by_id.get(&file.id).map(|file| file.path.clone());
204 changes.push(IndexChange {
205 old_path,
206 new_path: Some(file.path.clone()),
207 file_id: Some(file.id.clone()),
208 kind,
209 });
210 }
211 for file in previous_files {
212 if !next_by_id.contains_key(&file.id) {
213 changes.push(IndexChange {
214 old_path: Some(file.path.clone()),
215 new_path: None,
216 file_id: Some(file.id.clone()),
217 kind: IndexChangeKind::Deleted,
218 });
219 }
220 }
221 changes.sort_by(|left, right| {
222 left.new_path
223 .as_ref()
224 .or(left.old_path.as_ref())
225 .cmp(&right.new_path.as_ref().or(right.old_path.as_ref()))
226 });
227 changes
228}
229
230pub fn partial_index_supported(previous: Option<&IndexManifest>, next: &IndexManifest) -> bool {
231 previous.is_some_and(|previous| {
232 previous.schema_version == next.schema_version && previous.index_mode == next.index_mode
233 })
234}
235
236pub fn partial_index_supported_for_versions(
237 previous: Option<&IndexManifest>,
238 next: &IndexManifest,
239 previous_parser_version: Option<&str>,
240 next_parser_version: Option<&str>,
241) -> bool {
242 partial_index_supported(previous, next)
243 && previous_parser_version
244 .zip(next_parser_version)
245 .map(|(previous, next)| previous == next)
246 .unwrap_or(true)
247}
248
249#[cfg(test)]
250mod tests {
251 use super::{
252 classify_file_changes, classify_file_changes_with_parser_version, IndexChangeKind,
253 };
254 use chrono::Utc;
255 use open_kioku_core::{
256 File, FileId, IndexManifest, IndexQuality, Language, Repository, RepositoryId,
257 };
258 use std::path::PathBuf;
259
260 #[test]
261 fn classifies_added_modified_deleted_and_renamed_files() {
262 let previous = vec![
263 file("stable", "src/stable.rs", "a"),
264 file("modified", "src/modified.rs", "a"),
265 file("renamed", "src/old.rs", "a"),
266 file("deleted", "src/deleted.rs", "a"),
267 ];
268 let next = vec![
269 file("stable", "src/stable.rs", "a"),
270 file("modified", "src/modified.rs", "b"),
271 file("renamed", "src/new.rs", "a"),
272 file("added", "src/added.rs", "a"),
273 ];
274
275 let changes = classify_file_changes(Some(&manifest(1)), &manifest(1), &previous, &next);
276
277 assert!(changes
278 .iter()
279 .any(|change| change.kind == IndexChangeKind::Unchanged
280 && change.new_path.as_deref() == Some(std::path::Path::new("src/stable.rs"))));
281 assert!(changes
282 .iter()
283 .any(|change| change.kind == IndexChangeKind::Modified
284 && change.new_path.as_deref() == Some(std::path::Path::new("src/modified.rs"))));
285 assert!(changes
286 .iter()
287 .any(|change| change.kind == IndexChangeKind::Renamed
288 && change.old_path.as_deref() == Some(std::path::Path::new("src/old.rs"))
289 && change.new_path.as_deref() == Some(std::path::Path::new("src/new.rs"))));
290 assert!(changes
291 .iter()
292 .any(|change| change.kind == IndexChangeKind::Added
293 && change.new_path.as_deref() == Some(std::path::Path::new("src/added.rs"))));
294 assert!(changes
295 .iter()
296 .any(|change| change.kind == IndexChangeKind::Deleted
297 && change.old_path.as_deref() == Some(std::path::Path::new("src/deleted.rs"))));
298 }
299
300 #[test]
301 fn schema_and_parser_version_changes_force_stale_classification() {
302 let previous = vec![file("f1", "src/lib.rs", "a")];
303 let next = vec![file("f1", "src/lib.rs", "b")];
304
305 let schema_changes =
306 classify_file_changes(Some(&manifest(1)), &manifest(2), &previous, &next);
307 assert_eq!(schema_changes[0].kind, IndexChangeKind::SchemaVersionStale);
308
309 let parser_changes = classify_file_changes_with_parser_version(
310 Some(&manifest(1)),
311 &manifest(1),
312 &previous,
313 &next,
314 Some("parser-a"),
315 Some("parser-b"),
316 );
317 assert_eq!(parser_changes[0].kind, IndexChangeKind::ParserVersionStale);
318 }
319
320 fn manifest(schema_version: u32) -> IndexManifest {
321 IndexManifest {
322 repository: Repository {
323 id: RepositoryId::new("repo"),
324 name: "repo".into(),
325 root: PathBuf::from("."),
326 branch: None,
327 commit: None,
328 indexed_at: Some(Utc::now()),
329 },
330 file_count: 0,
331 symbol_count: 0,
332 chunk_count: 0,
333 indexed_at: Utc::now(),
334 schema_version,
335 index_mode: Default::default(),
336 phase_reports: Vec::new(),
337 quality: IndexQuality::default(),
338 }
339 }
340
341 fn file(id: &str, path: &str, hash: &str) -> File {
342 File {
343 id: FileId::new(id),
344 repository_id: RepositoryId::new("repo"),
345 path: PathBuf::from(path),
346 language: Language::Rust,
347 size_bytes: 10,
348 content_hash: hash.into(),
349 is_generated: false,
350 is_vendor: false,
351 }
352 }
353}
354
355#[derive(Debug, Clone, Default, PartialEq, Eq)]
356pub struct GraphCounts {
357 pub nodes: usize,
358 pub edges: usize,
359}
360
361#[derive(Debug, Clone, Default, PartialEq, Eq)]
362pub struct GraphSchemaCounts {
363 pub node_types: std::collections::BTreeMap<String, usize>,
364 pub edge_types: std::collections::BTreeMap<String, usize>,
365}
366
367#[derive(Debug, Clone, Default)]
368pub struct TypeStats {
369 pub count: usize,
370 pub evidence_available: bool,
371 pub freshness: Option<u64>,
372}
373
374pub trait GraphStore: Send + Sync {
375 fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()>;
376 fn node_by_id(&self, _id: &str) -> Result<Option<GraphNode>> {
377 Err(OkError::Unsupported(
378 "node_by_id is not implemented by this graph store".into(),
379 ))
380 }
381 fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)>;
382 fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>>;
383
384 fn node_type_stats(&self) -> Result<std::collections::HashMap<String, TypeStats>> {
385 Ok(std::collections::HashMap::new())
386 }
387
388 fn edge_type_stats(&self) -> Result<std::collections::HashMap<String, TypeStats>> {
389 Ok(std::collections::HashMap::new())
390 }
391
392 fn nodes_by_type(
393 &self,
394 _node_type: GraphNodeType,
395 _limit: usize,
396 _offset: usize,
397 ) -> Result<Vec<GraphNode>> {
398 Err(OkError::Unsupported(
399 "nodes_by_type is not implemented by this graph store".into(),
400 ))
401 }
402
403 fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
404 Err(OkError::Unsupported(
405 "all_graph_nodes is not implemented by this graph store".into(),
406 ))
407 }
408
409 fn edges_by_type(
410 &self,
411 _edge_type: GraphEdgeType,
412 _limit: usize,
413 _offset: usize,
414 ) -> Result<Vec<GraphEdge>> {
415 Err(OkError::Unsupported(
416 "edges_by_type is not implemented by this graph store".into(),
417 ))
418 }
419
420 fn graph_counts(&self) -> Result<GraphCounts> {
421 Err(OkError::Unsupported(
422 "graph_counts is not implemented by this graph store".into(),
423 ))
424 }
425
426 fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
427 Err(OkError::Unsupported(
428 "graph_schema_counts is not implemented by this graph store".into(),
429 ))
430 }
431
432 fn graph_edges_between(&self, _from: &str, _to: &str, _limit: usize) -> Result<Vec<GraphEdge>> {
433 Err(OkError::Unsupported(
434 "graph_edges_between is not implemented by this graph store".into(),
435 ))
436 }
437}
438
439pub trait HistoryStore: Send + Sync {
440 fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()>;
441 fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary>;
442 fn provenance_for_path(&self, _path: &Path, _limit: usize) -> Result<FileProvenance> {
443 Err(OkError::Unsupported(
444 "file provenance lookup is not implemented by this history store".into(),
445 ))
446 }
447 fn provenance_for_symbol(
448 &self,
449 _symbol_id: &SymbolId,
450 _limit: usize,
451 ) -> Result<SymbolProvenance> {
452 Err(OkError::Unsupported(
453 "symbol provenance lookup is not implemented by this history store".into(),
454 ))
455 }
456 fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>>;
457 fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>>;
458}
459
460pub trait SearchIndex: Send + Sync {
461 fn rebuild(&mut self, chunks: &[CodeChunk], files: &[File], symbols: &[Symbol]) -> Result<()>;
462 fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>>;
463}
464
465pub trait ImpactStore: Send + Sync {
466 fn impact_for_file(&self, path: &Path) -> Result<ImpactReport>;
467}
468
469pub trait OkStore: MetadataStore + GraphStore {}
471impl<T: MetadataStore + GraphStore> OkStore for T {}