Skip to main content

open_kioku_storage/
lib.rs

1use open_kioku_core::{
2    AnalysisFact, CodeChunk, EvidenceSourceType, File, FileId, FileProvenance, GitCochangeEdge,
3    GitCommitRecord, GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, HistorySnapshot,
4    HistorySummary, ImpactReport, Import, IndexManifest, SearchResult, Symbol, SymbolId,
5    SymbolOccurrence, SymbolProvenance, TestTarget,
6};
7use open_kioku_errors::{OkError, Result};
8use std::path::Path;
9
10pub trait MetadataStore: Send + Sync {
11    fn initialize(&self) -> Result<()>;
12    fn put_manifest(&self, manifest: &IndexManifest) -> Result<()>;
13    fn manifest(&self) -> Result<Option<IndexManifest>>;
14    fn replace_index(&self, data: IndexData<'_>) -> Result<()>;
15    fn replace_files_index(&self, _update: PartialIndexUpdate<'_>) -> Result<()> {
16        Err(OkError::Unsupported(
17            "partial index replacement is not implemented by this metadata store".into(),
18        ))
19    }
20    fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>>;
21    fn get_file_by_path(&self, path: &Path) -> Result<Option<File>>;
22    fn list_symbols(&self, query: Option<&str>, limit: usize, offset: usize)
23        -> Result<Vec<Symbol>>;
24    fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>>;
25    fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>>;
26    fn all_chunks(&self) -> Result<Vec<CodeChunk>>;
27    fn tests(&self) -> Result<Vec<TestTarget>>;
28    fn imports(&self) -> Result<Vec<Import>>;
29    fn analysis_facts(
30        &self,
31        _source_type: Option<EvidenceSourceType>,
32        _limit: usize,
33    ) -> Result<Vec<AnalysisFact>> {
34        Ok(Vec::new())
35    }
36    fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>>;
37    fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>>;
38    fn symbols_for_file(&self, _file_id: &FileId) -> Result<Vec<Symbol>> {
39        Ok(Vec::new())
40    }
41    fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
42        let chunks = self.all_chunks()?;
43        let mut results = Vec::new();
44        for chunk in chunks {
45            if chunk.text.contains(query) {
46                results.push(chunk);
47                if results.len() >= limit {
48                    break;
49                }
50            }
51        }
52        Ok(results)
53    }
54    fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
55        let files = self.list_files(usize::MAX, 0)?;
56        let lower_pattern = pattern.to_ascii_lowercase();
57        Ok(files
58            .into_iter()
59            .filter(|f| {
60                f.path
61                    .to_string_lossy()
62                    .to_ascii_lowercase()
63                    .contains(&lower_pattern)
64            })
65            .collect())
66    }
67    fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
68        let tests = self.tests()?;
69        let set = file_ids.iter().collect::<std::collections::HashSet<_>>();
70        Ok(tests
71            .into_iter()
72            .filter(|t| set.contains(&t.file_id))
73            .collect())
74    }
75}
76
77pub struct IndexData<'a> {
78    pub manifest: &'a IndexManifest,
79    pub files: &'a [File],
80    pub symbols: &'a [Symbol],
81    pub chunks: &'a [CodeChunk],
82    pub tests: &'a [TestTarget],
83    pub imports: &'a [Import],
84    pub occurrences: &'a [SymbolOccurrence],
85    pub analysis_facts: &'a [AnalysisFact],
86}
87
88pub struct PartialIndexUpdate<'a> {
89    pub manifest: &'a IndexManifest,
90    pub changed_files: &'a [File],
91    pub deleted_file_ids: &'a [FileId],
92    pub symbols: &'a [Symbol],
93    pub chunks: &'a [CodeChunk],
94    pub tests: &'a [TestTarget],
95    pub imports: &'a [Import],
96    pub occurrences: &'a [SymbolOccurrence],
97    pub analysis_facts: &'a [AnalysisFact],
98    pub graph_nodes: &'a [GraphNode],
99    pub graph_edges: &'a [GraphEdge],
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum IndexChangeKind {
104    Unchanged,
105    Modified,
106    Added,
107    Deleted,
108    Renamed,
109    ModeSkipped,
110    ParserVersionStale,
111    SchemaVersionStale,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct IndexChange {
116    pub old_path: Option<std::path::PathBuf>,
117    pub new_path: Option<std::path::PathBuf>,
118    pub file_id: Option<FileId>,
119    pub kind: IndexChangeKind,
120}
121
122pub fn classify_file_changes(
123    previous_manifest: Option<&IndexManifest>,
124    next_manifest: &IndexManifest,
125    previous_files: &[File],
126    next_files: &[File],
127) -> Vec<IndexChange> {
128    classify_file_changes_with_parser_version(
129        previous_manifest,
130        next_manifest,
131        previous_files,
132        next_files,
133        None,
134        None,
135    )
136}
137
138pub fn classify_file_changes_with_parser_version(
139    previous_manifest: Option<&IndexManifest>,
140    next_manifest: &IndexManifest,
141    previous_files: &[File],
142    next_files: &[File],
143    previous_parser_version: Option<&str>,
144    next_parser_version: Option<&str>,
145) -> Vec<IndexChange> {
146    if previous_manifest
147        .is_some_and(|manifest| manifest.schema_version != next_manifest.schema_version)
148    {
149        return next_files
150            .iter()
151            .map(|file| IndexChange {
152                old_path: Some(file.path.clone()),
153                new_path: Some(file.path.clone()),
154                file_id: Some(file.id.clone()),
155                kind: IndexChangeKind::SchemaVersionStale,
156            })
157            .collect();
158    }
159    if previous_parser_version
160        .zip(next_parser_version)
161        .is_some_and(|(previous, next)| previous != next)
162    {
163        return next_files
164            .iter()
165            .map(|file| IndexChange {
166                old_path: Some(file.path.clone()),
167                new_path: Some(file.path.clone()),
168                file_id: Some(file.id.clone()),
169                kind: IndexChangeKind::ParserVersionStale,
170            })
171            .collect();
172    }
173    if previous_manifest.is_some_and(|manifest| manifest.index_mode != next_manifest.index_mode) {
174        return next_files
175            .iter()
176            .map(|file| IndexChange {
177                old_path: Some(file.path.clone()),
178                new_path: Some(file.path.clone()),
179                file_id: Some(file.id.clone()),
180                kind: IndexChangeKind::ModeSkipped,
181            })
182            .collect();
183    }
184
185    let previous_by_id = previous_files
186        .iter()
187        .map(|file| (&file.id, file))
188        .collect::<std::collections::BTreeMap<_, _>>();
189    let next_by_id = next_files
190        .iter()
191        .map(|file| (&file.id, file))
192        .collect::<std::collections::BTreeMap<_, _>>();
193    let mut changes = Vec::new();
194    for file in next_files {
195        let kind = match previous_by_id.get(&file.id) {
196            None => IndexChangeKind::Added,
197            Some(previous) if previous.path != file.path => IndexChangeKind::Renamed,
198            Some(previous) if previous.content_hash != file.content_hash => {
199                IndexChangeKind::Modified
200            }
201            Some(_) => IndexChangeKind::Unchanged,
202        };
203        let old_path = previous_by_id.get(&file.id).map(|file| file.path.clone());
204        changes.push(IndexChange {
205            old_path,
206            new_path: Some(file.path.clone()),
207            file_id: Some(file.id.clone()),
208            kind,
209        });
210    }
211    for file in previous_files {
212        if !next_by_id.contains_key(&file.id) {
213            changes.push(IndexChange {
214                old_path: Some(file.path.clone()),
215                new_path: None,
216                file_id: Some(file.id.clone()),
217                kind: IndexChangeKind::Deleted,
218            });
219        }
220    }
221    changes.sort_by(|left, right| {
222        left.new_path
223            .as_ref()
224            .or(left.old_path.as_ref())
225            .cmp(&right.new_path.as_ref().or(right.old_path.as_ref()))
226    });
227    changes
228}
229
230pub fn partial_index_supported(previous: Option<&IndexManifest>, next: &IndexManifest) -> bool {
231    previous.is_some_and(|previous| {
232        previous.schema_version == next.schema_version && previous.index_mode == next.index_mode
233    })
234}
235
236pub fn partial_index_supported_for_versions(
237    previous: Option<&IndexManifest>,
238    next: &IndexManifest,
239    previous_parser_version: Option<&str>,
240    next_parser_version: Option<&str>,
241) -> bool {
242    partial_index_supported(previous, next)
243        && previous_parser_version
244            .zip(next_parser_version)
245            .map(|(previous, next)| previous == next)
246            .unwrap_or(true)
247}
248
249#[cfg(test)]
250mod tests {
251    use super::{
252        classify_file_changes, classify_file_changes_with_parser_version, IndexChangeKind,
253    };
254    use chrono::Utc;
255    use open_kioku_core::{
256        File, FileId, IndexManifest, IndexQuality, Language, Repository, RepositoryId,
257    };
258    use std::path::PathBuf;
259
260    #[test]
261    fn classifies_added_modified_deleted_and_renamed_files() {
262        let previous = vec![
263            file("stable", "src/stable.rs", "a"),
264            file("modified", "src/modified.rs", "a"),
265            file("renamed", "src/old.rs", "a"),
266            file("deleted", "src/deleted.rs", "a"),
267        ];
268        let next = vec![
269            file("stable", "src/stable.rs", "a"),
270            file("modified", "src/modified.rs", "b"),
271            file("renamed", "src/new.rs", "a"),
272            file("added", "src/added.rs", "a"),
273        ];
274
275        let changes = classify_file_changes(Some(&manifest(1)), &manifest(1), &previous, &next);
276
277        assert!(changes
278            .iter()
279            .any(|change| change.kind == IndexChangeKind::Unchanged
280                && change.new_path.as_deref() == Some(std::path::Path::new("src/stable.rs"))));
281        assert!(changes
282            .iter()
283            .any(|change| change.kind == IndexChangeKind::Modified
284                && change.new_path.as_deref() == Some(std::path::Path::new("src/modified.rs"))));
285        assert!(changes
286            .iter()
287            .any(|change| change.kind == IndexChangeKind::Renamed
288                && change.old_path.as_deref() == Some(std::path::Path::new("src/old.rs"))
289                && change.new_path.as_deref() == Some(std::path::Path::new("src/new.rs"))));
290        assert!(changes
291            .iter()
292            .any(|change| change.kind == IndexChangeKind::Added
293                && change.new_path.as_deref() == Some(std::path::Path::new("src/added.rs"))));
294        assert!(changes
295            .iter()
296            .any(|change| change.kind == IndexChangeKind::Deleted
297                && change.old_path.as_deref() == Some(std::path::Path::new("src/deleted.rs"))));
298    }
299
300    #[test]
301    fn schema_and_parser_version_changes_force_stale_classification() {
302        let previous = vec![file("f1", "src/lib.rs", "a")];
303        let next = vec![file("f1", "src/lib.rs", "b")];
304
305        let schema_changes =
306            classify_file_changes(Some(&manifest(1)), &manifest(2), &previous, &next);
307        assert_eq!(schema_changes[0].kind, IndexChangeKind::SchemaVersionStale);
308
309        let parser_changes = classify_file_changes_with_parser_version(
310            Some(&manifest(1)),
311            &manifest(1),
312            &previous,
313            &next,
314            Some("parser-a"),
315            Some("parser-b"),
316        );
317        assert_eq!(parser_changes[0].kind, IndexChangeKind::ParserVersionStale);
318    }
319
320    fn manifest(schema_version: u32) -> IndexManifest {
321        IndexManifest {
322            repository: Repository {
323                id: RepositoryId::new("repo"),
324                name: "repo".into(),
325                root: PathBuf::from("."),
326                branch: None,
327                commit: None,
328                indexed_at: Some(Utc::now()),
329            },
330            file_count: 0,
331            symbol_count: 0,
332            chunk_count: 0,
333            indexed_at: Utc::now(),
334            schema_version,
335            index_mode: Default::default(),
336            phase_reports: Vec::new(),
337            quality: IndexQuality::default(),
338        }
339    }
340
341    fn file(id: &str, path: &str, hash: &str) -> File {
342        File {
343            id: FileId::new(id),
344            repository_id: RepositoryId::new("repo"),
345            path: PathBuf::from(path),
346            language: Language::Rust,
347            size_bytes: 10,
348            content_hash: hash.into(),
349            is_generated: false,
350            is_vendor: false,
351        }
352    }
353}
354
355#[derive(Debug, Clone, Default, PartialEq, Eq)]
356pub struct GraphCounts {
357    pub nodes: usize,
358    pub edges: usize,
359}
360
361#[derive(Debug, Clone, Default, PartialEq, Eq)]
362pub struct GraphSchemaCounts {
363    pub node_types: std::collections::BTreeMap<String, usize>,
364    pub edge_types: std::collections::BTreeMap<String, usize>,
365}
366
367#[derive(Debug, Clone, Default)]
368pub struct TypeStats {
369    pub count: usize,
370    pub evidence_available: bool,
371    pub freshness: Option<u64>,
372}
373
374pub trait GraphStore: Send + Sync {
375    fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()>;
376    fn node_by_id(&self, _id: &str) -> Result<Option<GraphNode>> {
377        Err(OkError::Unsupported(
378            "node_by_id is not implemented by this graph store".into(),
379        ))
380    }
381    fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)>;
382    fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>>;
383
384    fn node_type_stats(&self) -> Result<std::collections::HashMap<String, TypeStats>> {
385        Ok(std::collections::HashMap::new())
386    }
387
388    fn edge_type_stats(&self) -> Result<std::collections::HashMap<String, TypeStats>> {
389        Ok(std::collections::HashMap::new())
390    }
391
392    fn nodes_by_type(
393        &self,
394        _node_type: GraphNodeType,
395        _limit: usize,
396        _offset: usize,
397    ) -> Result<Vec<GraphNode>> {
398        Err(OkError::Unsupported(
399            "nodes_by_type is not implemented by this graph store".into(),
400        ))
401    }
402
403    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
404        Err(OkError::Unsupported(
405            "all_graph_nodes is not implemented by this graph store".into(),
406        ))
407    }
408
409    fn edges_by_type(
410        &self,
411        _edge_type: GraphEdgeType,
412        _limit: usize,
413        _offset: usize,
414    ) -> Result<Vec<GraphEdge>> {
415        Err(OkError::Unsupported(
416            "edges_by_type is not implemented by this graph store".into(),
417        ))
418    }
419
420    fn graph_counts(&self) -> Result<GraphCounts> {
421        Err(OkError::Unsupported(
422            "graph_counts is not implemented by this graph store".into(),
423        ))
424    }
425
426    fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
427        Err(OkError::Unsupported(
428            "graph_schema_counts is not implemented by this graph store".into(),
429        ))
430    }
431
432    fn graph_edges_between(&self, _from: &str, _to: &str, _limit: usize) -> Result<Vec<GraphEdge>> {
433        Err(OkError::Unsupported(
434            "graph_edges_between is not implemented by this graph store".into(),
435        ))
436    }
437}
438
439pub trait HistoryStore: Send + Sync {
440    fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()>;
441    fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary>;
442    fn provenance_for_path(&self, _path: &Path, _limit: usize) -> Result<FileProvenance> {
443        Err(OkError::Unsupported(
444            "file provenance lookup is not implemented by this history store".into(),
445        ))
446    }
447    fn provenance_for_symbol(
448        &self,
449        _symbol_id: &SymbolId,
450        _limit: usize,
451    ) -> Result<SymbolProvenance> {
452        Err(OkError::Unsupported(
453            "symbol provenance lookup is not implemented by this history store".into(),
454        ))
455    }
456    fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>>;
457    fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>>;
458}
459
460pub trait SearchIndex: Send + Sync {
461    fn rebuild(&mut self, chunks: &[CodeChunk], files: &[File], symbols: &[Symbol]) -> Result<()>;
462    fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>>;
463}
464
465pub trait ImpactStore: Send + Sync {
466    fn impact_for_file(&self, path: &Path) -> Result<ImpactReport>;
467}
468
469/// Combined store trait for types that implement both metadata and graph storage.
470pub trait OkStore: MetadataStore + GraphStore {}
471impl<T: MetadataStore + GraphStore> OkStore for T {}