maproom 0.1.0

Semantic code search powered by embeddings and SQLite
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
//! Store trait hierarchy for backend-agnostic database operations.
//!
//! Defines 9 sub-traits grouped by functional responsibility, combined into
//! a single `Store` supertrait via blanket impl. This enables both `SqliteStore`
//! and future `PostgresStore` to implement the same interface.
//!
//! # Sub-traits
//!
//! - [`StoreCore`] - Repository, worktree, commit, file CRUD and stats queries
//! - [`StoreChunks`] - Chunk CRUD and worktree associations
//! - [`StoreSearch`] - FTS, vector, and hybrid search
//! - [`StoreGraph`] - Graph traversal and importance scoring
//! - [`StoreEmbeddings`] - Embedding storage and retrieval
//! - [`StoreMigration`] - Schema migration management
//! - [`StoreCleanup`] - Stale worktree detection and cleanup
//! - [`StoreIndexState`] - Index state tracking
//! - [`StoreEncoding`] - Encoding run lifecycle

use std::collections::{HashMap, HashSet};

use async_trait::async_trait;

use crate::config::EdgeQualityWeights;
use crate::db::index_state::UpdateStats;
use crate::db::types::{
    ChunkMetadata, EmbeddingRecord, EncodingRunRow, GraphResult, HybridResult, HybridWeights,
    ImportDirection, RankedSearchHit, SemanticRanking,
};
use crate::db::{
    ChunkContext, ChunkForEmbedding, ChunkFull, ChunkRecord, ChunkSummary, FileRecord, RepoInfo,
    SearchHit, StaleWorktree, WorktreeCleanupResult, WorktreeInfo,
};

// =============================================================================
// StoreCore - Repository, worktree, commit, file CRUD and stats
// =============================================================================

#[async_trait]
pub trait StoreCore: Send + Sync {
    /// Check if sqlite-vec (or equivalent vector extension) is available.
    ///
    /// Returns `true` if the backend supports vector similarity search. When
    /// `false`, vector search degrades gracefully (FTS-only results).
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// fn choose_search_mode(store: &dyn Store) -> &'static str {
    ///     if store.has_vector_extension() {
    ///         "hybrid"
    ///     } else {
    ///         "fts"
    ///     }
    /// }
    /// ```
    fn has_vector_extension(&self) -> bool;

    /// Get or create a repository by name and root path. Returns repo ID.
    ///
    /// If a repository with the given name already exists, its ID is returned
    /// without creating a duplicate. This is the standard entry point for
    /// registering a repo before indexing.
    ///
    /// # Arguments
    ///
    /// * `name` - Short name for the repository (e.g., `"crewchief"`)
    /// * `root_path` - Absolute filesystem path to the repository root
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn setup_repo(store: &dyn Store) -> anyhow::Result<i64> {
    ///     let repo_id = store
    ///         .get_or_create_repo("my-project", "/home/user/repos/my-project")
    ///         .await?;
    ///     // repo_id can now be used with get_or_create_worktree, etc.
    ///     Ok(repo_id)
    /// }
    /// ```
    async fn get_or_create_repo(&self, name: &str, root_path: &str) -> anyhow::Result<i64>;

    /// Get or create a worktree for a repository. Returns worktree ID.
    async fn get_or_create_worktree(
        &self,
        repo_id: i64,
        name: &str,
        abs_path: &str,
    ) -> anyhow::Result<i64>;

    /// Get or create a commit record. Returns commit ID.
    async fn get_or_create_commit(
        &self,
        repo_id: i64,
        sha: &str,
        committed_at: Option<chrono::DateTime<chrono::Utc>>,
    ) -> anyhow::Result<i64>;

    /// Look up a repository by name.
    async fn get_repo_by_name(&self, name: &str) -> anyhow::Result<Option<RepoInfo>>;

    /// Look up a worktree by repo ID and name.
    async fn get_worktree_by_name(
        &self,
        repo_id: i64,
        name: &str,
    ) -> anyhow::Result<Option<WorktreeInfo>>;

    /// List all repositories.
    async fn list_repos(&self) -> anyhow::Result<Vec<RepoInfo>>;

    /// List all worktrees for a repository.
    async fn list_worktrees(&self, repo_id: i64) -> anyhow::Result<Vec<WorktreeInfo>>;

    /// Upsert a file record. Returns file ID.
    async fn upsert_file(&self, file: &FileRecord) -> anyhow::Result<i64>;

    /// Delete a file record by ID. Returns true if the file was found and deleted.
    async fn delete_file(&self, file_id: i64) -> anyhow::Result<bool>;

    /// Look up a file ID by relative path and worktree ID.
    async fn get_file_id_by_relpath(
        &self,
        relpath: &str,
        worktree_id: i64,
    ) -> anyhow::Result<Option<i64>>;

    /// Get the count of chunks associated with a worktree.
    async fn get_worktree_chunk_count(&self, worktree_id: i64) -> anyhow::Result<i64>;

    /// Get the count of files in a worktree.
    async fn get_worktree_file_count(&self, worktree_id: i64) -> anyhow::Result<i64>;

    /// Get the count of chunks with embeddings in a worktree.
    async fn get_worktree_embedding_count(&self, worktree_id: i64) -> anyhow::Result<i64>;

    /// Get the language breakdown for files in a worktree.
    async fn get_worktree_language_breakdown(
        &self,
        worktree_id: i64,
    ) -> anyhow::Result<Vec<(String, i64)>>;

    /// Get the last scan timestamp for a worktree.
    async fn get_worktree_last_scan(&self, worktree_id: i64) -> anyhow::Result<Option<String>>;

    /// Get the global count of distinct chunks (by blob_sha).
    async fn get_global_chunk_count(&self) -> anyhow::Result<i64>;

    /// Get the global count of embeddings.
    async fn get_global_embedding_count(&self) -> anyhow::Result<i64>;

    /// Get the count of distinct chunks for a specific repo by name.
    async fn get_repo_chunk_count(&self, repo_name: &str) -> anyhow::Result<i64>;

    /// Get the count of embeddings for a specific repo by name.
    async fn get_repo_embedding_count(&self, repo_name: &str) -> anyhow::Result<i64>;
}

// =============================================================================
// StoreChunks - Chunk CRUD and worktree associations
// =============================================================================

#[async_trait]
pub trait StoreChunks: Send + Sync {
    /// Insert a single chunk. Returns chunk ID.
    ///
    /// Stores a parsed code chunk (function, class, etc.) extracted by
    /// tree-sitter. The chunk is associated with a file via `chunk.file_id`
    /// and a worktree via `chunk.worktree_id`.
    ///
    /// # Arguments
    ///
    /// * `chunk` - The chunk record containing symbol metadata, line range,
    ///   preview text, and content hash (`blob_sha`)
    ///
    /// # Returns
    ///
    /// The database ID of the inserted chunk.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::{ChunkRecord, Store};
    ///
    /// async fn index_chunk(store: &dyn Store, file_id: i64, worktree_id: i64) -> anyhow::Result<i64> {
    ///     let chunk = ChunkRecord {
    ///         file_id,
    ///         blob_sha: "abc123".to_string(),
    ///         symbol_name: Some("process_request".to_string()),
    ///         kind: "function".to_string(),
    ///         signature: Some("fn process_request(req: Request) -> Response".to_string()),
    ///         docstring: None,
    ///         start_line: 10,
    ///         end_line: 25,
    ///         preview: "fn process_request(req: Request) -> Response { ... }".to_string(),
    ///         ts_doc_text: "process_request function request response".to_string(),
    ///         recency_score: 1.0,
    ///         churn_score: 0.5,
    ///         metadata: None,
    ///         worktree_id,
    ///     };
    ///     store.insert_chunk(&chunk).await
    /// }
    /// ```
    async fn insert_chunk(&self, chunk: &ChunkRecord) -> anyhow::Result<i64>;

    /// Insert multiple chunks in a batch. Returns chunk IDs.
    async fn insert_chunks_batch(&self, chunks: &[ChunkRecord]) -> anyhow::Result<Vec<i64>>;

    /// Insert a directed edge between two chunks.
    async fn insert_chunk_edge(
        &self,
        src_chunk_id: i64,
        dst_chunk_id: i64,
        edge_type: &str,
    ) -> anyhow::Result<()>;

    /// Get full chunk data by ID.
    async fn get_chunk_by_id(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkFull>>;

    /// Get all chunks for a file, ordered by start line.
    async fn get_file_chunks(&self, file_id: i64) -> anyhow::Result<Vec<ChunkSummary>>;

    /// Get a chunk with surrounding context.
    ///
    /// Returns the full chunk data along with neighboring chunks from the
    /// same file. The `surrounding` parameter controls how many chunks
    /// before and after are included, providing spatial context for code
    /// navigation and LLM context assembly.
    ///
    /// # Arguments
    ///
    /// * `chunk_id` - The database ID of the target chunk
    /// * `surrounding` - Number of neighboring chunks to include on each side
    ///
    /// # Returns
    ///
    /// `None` if the chunk ID does not exist; otherwise a [`ChunkContext`]
    /// containing the chunk and its neighbors.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn show_context(store: &dyn Store, chunk_id: i64) -> anyhow::Result<()> {
    ///     if let Some(ctx) = store.get_chunk_context(chunk_id, 2).await? {
    ///         println!("File: {}", ctx.file_path);
    ///         println!("Chunk: {} (lines {}-{})",
    ///             ctx.chunk.kind, ctx.chunk.start_line, ctx.chunk.end_line);
    ///         println!("Surrounding chunks: {}", ctx.surrounding_chunks.len());
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn get_chunk_context(
        &self,
        chunk_id: i64,
        surrounding: usize,
    ) -> anyhow::Result<Option<ChunkContext>>;

    /// Find a chunk by symbol name, returning the chunk ID.
    async fn find_chunk_by_symbol(
        &self,
        repo_id: i64,
        worktree_id: Option<i64>,
        symbol_name: &str,
        relpath: Option<&str>,
    ) -> anyhow::Result<Option<i64>>;

    /// Delete all chunks associated with a file. Returns count of chunks deleted.
    async fn delete_chunks_by_file(&self, file_id: i64) -> anyhow::Result<u64>;

    /// Delete chunks by their IDs. Returns count of chunks deleted.
    async fn delete_chunks_by_ids(
        &self,
        worktree_id: i64,
        chunk_ids: &[i64],
    ) -> anyhow::Result<usize>;

    /// Get all chunks for a worktree with their file paths.
    /// Returns (chunk_id, file_relpath) tuples.
    async fn get_chunks_for_worktree(&self, worktree_id: i64)
        -> anyhow::Result<Vec<(i64, String)>>;

    /// Get chunks by blob SHA.
    async fn get_chunks_by_blob_sha(&self, blob_sha: &str) -> anyhow::Result<Vec<ChunkSummary>>;

    /// Add a chunk to an additional worktree.
    async fn add_chunk_to_worktree(&self, chunk_id: i64, worktree_id: i64) -> anyhow::Result<()>;

    /// Get all worktree IDs containing a chunk.
    async fn get_chunk_worktrees(&self, chunk_id: i64) -> anyhow::Result<Vec<i64>>;
}

// =============================================================================
// StoreSearch - FTS, vector, and hybrid search
// =============================================================================

#[allow(clippy::too_many_arguments)] // Trait methods mirror database query parameters; changing signatures would break all implementors
#[async_trait]
pub trait StoreSearch: Send + Sync {
    /// Full-text search for chunks, resolving repo/worktree by name.
    ///
    /// Performs FTS5 keyword search across indexed chunks in the specified
    /// repository and optional worktree. Results are ranked by BM25 with
    /// optional semantic ranking adjustments.
    ///
    /// # Arguments
    ///
    /// * `repo` - Repository name (e.g., `"crewchief"`)
    /// * `worktree` - Optional worktree name to scope the search
    /// * `query` - FTS query string (supports SQLite FTS5 syntax)
    /// * `k` - Maximum number of results to return
    /// * `debug` - If `true`, populate debug fields (`base_score`, `kind_mult`, etc.)
    /// * `kind_filter` - Optional filter to restrict chunk kinds (e.g., `["function", "method"]`)
    /// * `lang_filter` - Optional filter to restrict languages (e.g., `["rust", "typescript"]`)
    ///
    /// # Returns
    ///
    /// A tuple of `(hits, total_count)` where `total_count` is the total number
    /// of matching chunks before the `k` limit is applied. This allows callers
    /// to distinguish "all results shown" from "results truncated by k limit."
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn find_functions(store: &dyn Store) -> anyhow::Result<()> {
    ///     let (hits, total_count) = store.search_chunks_fts(
    ///         "my-project",
    ///         Some("main"),
    ///         "authentication",
    ///         10,
    ///         false,
    ///         Some(&["function".to_string(), "method".to_string()]),
    ///         None,
    ///     ).await?;
    ///
    ///     println!("Showing {} of {} total matches", hits.len(), total_count);
    ///     for hit in &hits {
    ///         println!("{} ({}) score={:.3}", hit.file_relpath, hit.kind, hit.score);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn search_chunks_fts(
        &self,
        repo: &str,
        worktree: Option<&str>,
        query: &str,
        k: i64,
        debug: bool,
        kind_filter: Option<&[String]>,
        lang_filter: Option<&[String]>,
    ) -> anyhow::Result<(Vec<SearchHit>, usize)>;

    /// FTS search by repo_id and worktree_id (for search executors).
    async fn search_fts_by_id(
        &self,
        repo_id: i64,
        worktree_id: Option<i64>,
        query: &str,
        normalized_query: &str,
        k: i64,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Vector search for chunks, resolving repo/worktree by name.
    async fn search_chunks_vector(
        &self,
        repo: &str,
        worktree: Option<&str>,
        embedding: &[f32],
        k: i64,
        debug: bool,
        kind_filter: Option<&[String]>,
        lang_filter: Option<&[String]>,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Vector search by repo_id and worktree_id (for search executors).
    async fn search_vector_by_id(
        &self,
        repo_id: i64,
        worktree_id: Option<i64>,
        query_embedding: &[f32],
        k: i64,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Hybrid search combining FTS and vector, resolving repo/worktree by name.
    async fn search_chunks_hybrid(
        &self,
        repo: &str,
        worktree: Option<&str>,
        query: &str,
        embedding: &[f32],
        k: i64,
        debug: bool,
        kind_filter: Option<&[String]>,
        lang_filter: Option<&[String]>,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Hybrid search using RRF to combine FTS and vector results.
    ///
    /// Executes both FTS and vector searches, then merges them using
    /// Reciprocal Rank Fusion (RRF). Each result's final score blends
    /// keyword relevance and semantic similarity according to `weights`.
    ///
    /// # Arguments
    ///
    /// * `repo` - Repository name
    /// * `worktree` - Optional worktree name to scope the search
    /// * `query` - FTS query string for the keyword component
    /// * `query_embedding` - Pre-computed embedding vector for the semantic component
    /// * `limit` - Maximum number of results to return
    /// * `weights` - Relative importance of FTS vs. vector scores
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::{HybridWeights, Store};
    ///
    /// async fn hybrid_search(
    ///     store: &dyn Store,
    ///     query: &str,
    ///     embedding: &[f32],
    /// ) -> anyhow::Result<()> {
    ///     let results = store.search_hybrid(
    ///         "my-project",
    ///         Some("main"),
    ///         query,
    ///         embedding,
    ///         20,
    ///         HybridWeights::default(), // 0.3 FTS, 0.7 vector
    ///     ).await?;
    ///
    ///     for r in &results {
    ///         println!("chunk {} score={:.3} source={}", r.chunk_id, r.score, r.source);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn search_hybrid(
        &self,
        repo: &str,
        worktree: Option<&str>,
        query: &str,
        query_embedding: &[f32],
        limit: usize,
        weights: HybridWeights,
    ) -> anyhow::Result<Vec<HybridResult>>;

    /// Hybrid search with semantic ranking applied.
    async fn search_hybrid_ranked(
        &self,
        repo: &str,
        worktree: Option<&str>,
        query: &str,
        query_embedding: &[f32],
        limit: usize,
        weights: HybridWeights,
        ranking: SemanticRanking,
    ) -> anyhow::Result<Vec<RankedSearchHit>>;

    /// Get metadata for multiple chunks (batch query for semantic ranking).
    async fn get_chunks_metadata(
        &self,
        chunk_ids: &[i64],
    ) -> anyhow::Result<HashMap<i64, ChunkMetadata>>;
}

// =============================================================================
// StoreGraph - Graph traversal and importance scoring
// =============================================================================

#[async_trait]
pub trait StoreGraph: Send + Sync {
    /// Find all chunks that call the target chunk (directly or transitively).
    ///
    /// Performs a recursive graph traversal following incoming "calls" edges
    /// to discover callers up to `max_depth` levels. Uses cycle detection to
    /// avoid infinite loops. Default depth is 3; hard maximum is 10.
    ///
    /// # Arguments
    ///
    /// * `target_chunk_id` - The chunk whose callers to find
    /// * `max_depth` - Maximum traversal depth (`None` for default of 3)
    ///
    /// # Returns
    ///
    /// A list of [`GraphResult`] entries, each containing the caller's chunk ID,
    /// traversal depth, and the edge path from source to target.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn who_calls(store: &dyn Store, chunk_id: i64) -> anyhow::Result<()> {
    ///     let callers = store.find_callers(chunk_id, Some(2)).await?;
    ///     for caller in &callers {
    ///         println!(
    ///             "chunk {} at depth {} via {:?}",
    ///             caller.chunk_id, caller.depth, caller.path
    ///         );
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn find_callers(
        &self,
        target_chunk_id: i64,
        max_depth: Option<usize>,
    ) -> anyhow::Result<Vec<GraphResult>>;

    /// Find all chunks called by the source chunk (directly or transitively).
    async fn find_callees(
        &self,
        source_chunk_id: i64,
        max_depth: Option<usize>,
    ) -> anyhow::Result<Vec<GraphResult>>;

    /// Find import relationships for a chunk.
    async fn find_imports(
        &self,
        chunk_id: i64,
        direction: ImportDirection,
        max_depth: Option<usize>,
    ) -> anyhow::Result<Vec<GraphResult>>;

    /// Find extension/inheritance relationships for a chunk.
    async fn find_extensions(
        &self,
        chunk_id: i64,
        direction: ImportDirection,
        max_depth: Option<usize>,
    ) -> anyhow::Result<Vec<GraphResult>>;

    /// Get all direct edges from or to a chunk (without recursion).
    ///
    /// Returns only depth-1 relationships (no transitive traversal). Useful
    /// for building local dependency views or populating UI adjacency lists.
    ///
    /// # Arguments
    ///
    /// * `chunk_id` - The chunk to query edges for
    /// * `direction` - [`ImportDirection::Outgoing`] for edges leaving the chunk,
    ///   [`ImportDirection::Incoming`] for edges arriving at the chunk
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::{ImportDirection, Store};
    ///
    /// async fn list_dependencies(store: &dyn Store, chunk_id: i64) -> anyhow::Result<()> {
    ///     let outgoing = store.get_direct_edges(chunk_id, ImportDirection::Outgoing).await?;
    ///     println!("Chunk {} has {} outgoing edges", chunk_id, outgoing.len());
    ///
    ///     let incoming = store.get_direct_edges(chunk_id, ImportDirection::Incoming).await?;
    ///     println!("Chunk {} has {} incoming edges", chunk_id, incoming.len());
    ///     Ok(())
    /// }
    /// ```
    async fn get_direct_edges(
        &self,
        chunk_id: i64,
        direction: ImportDirection,
    ) -> anyhow::Result<Vec<GraphResult>>;

    /// Calculate graph importance scores for chunks in a repo/worktree.
    async fn calculate_graph_importance(
        &self,
        repo_id: i64,
        worktree_id: Option<i64>,
        limit: usize,
        enable_quality: bool,
        weights: &EdgeQualityWeights,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Calculate graph importance for specific chunk IDs.
    async fn calculate_graph_importance_for_chunks(
        &self,
        chunk_ids: &[i64],
        repo_id: i64,
        worktree_id: Option<i64>,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Calculate signal scores (recency + churn) for chunks in a repo/worktree.
    async fn calculate_signal_scores(
        &self,
        repo_id: i64,
        worktree_id: Option<i64>,
        recency_weight: f32,
        churn_weight: f32,
        limit: usize,
    ) -> anyhow::Result<Vec<SearchHit>>;

    /// Calculate signal scores for specific chunk IDs.
    async fn calculate_signal_scores_for_chunks(
        &self,
        chunk_ids: &[i64],
        repo_id: i64,
        worktree_id: Option<i64>,
        recency_weight: f32,
        churn_weight: f32,
    ) -> anyhow::Result<Vec<SearchHit>>;
}

// =============================================================================
// StoreEmbeddings - Embedding storage and retrieval
// =============================================================================

#[async_trait]
pub trait StoreEmbeddings: Send + Sync {
    /// Store or update an embedding by content hash. Returns embedding ID.
    ///
    /// Embeddings are deduplicated by `blob_sha` -- if the same content hash
    /// already has an embedding, it is updated in place. This avoids
    /// redundant storage when the same chunk appears in multiple worktrees.
    ///
    /// # Arguments
    ///
    /// * `blob_sha` - Content hash identifying the chunk text
    /// * `embedding` - The embedding vector (dimension must match the configured table)
    /// * `model_version` - Identifier for the model that produced the embedding
    ///   (e.g., `"mxbai-embed-large"`)
    ///
    /// # Returns
    ///
    /// The database ID of the upserted embedding row.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn store_embedding(
    ///     store: &dyn Store,
    ///     blob_sha: &str,
    ///     vector: &[f32],
    /// ) -> anyhow::Result<i64> {
    ///     store.upsert_embedding(blob_sha, vector, "mxbai-embed-large").await
    /// }
    /// ```
    async fn upsert_embedding(
        &self,
        blob_sha: &str,
        embedding: &[f32],
        model_version: &str,
    ) -> anyhow::Result<i64>;

    /// Batch upsert embeddings with deduplication.
    async fn upsert_embeddings_batch_new(
        &self,
        embeddings: &[EmbeddingRecord],
    ) -> anyhow::Result<()>;

    /// Check if an embedding exists for a blob SHA.
    async fn has_embedding(&self, blob_sha: &str) -> anyhow::Result<bool>;

    /// Get an embedding vector by blob SHA.
    async fn get_embedding(&self, blob_sha: &str) -> anyhow::Result<Option<Vec<f32>>>;

    /// Sync a single embedding to the vector search table.
    async fn sync_embedding_to_vec(
        &self,
        embedding_id: i64,
        embedding: &[f32],
    ) -> anyhow::Result<()>;

    /// Sync all un-synced embeddings to the vector search table.
    /// Returns the number of embeddings synced.
    async fn sync_all_embeddings_to_vec(&self) -> anyhow::Result<usize>;

    /// Count chunks that need embeddings generated.
    async fn get_chunks_needing_embeddings_count(&self) -> anyhow::Result<i64>;

    /// Copy existing embeddings from cache (no-op for SQLite).
    async fn copy_existing_embeddings_from_cache(&self) -> anyhow::Result<i64>;

    /// Fetch chunks that need embeddings generated.
    ///
    /// Returns chunks whose `blob_sha` does not yet have a corresponding
    /// embedding record. Used by the embedding pipeline to discover work.
    ///
    /// # Arguments
    ///
    /// * `incremental` - If `true`, only return chunks added since the last
    ///   encoding run; if `false`, return all chunks without embeddings
    /// * `sample_size` - Optional cap on the number of chunks returned
    ///   (useful for batched processing)
    ///
    /// # Example
    ///
    /// ```no_run
    /// use maproom::db::Store;
    ///
    /// async fn embedding_pipeline(store: &dyn Store) -> anyhow::Result<()> {
    ///     let chunks = store.fetch_chunks_needing_embeddings(true, Some(500)).await?;
    ///     println!("{} chunks need embeddings", chunks.len());
    ///
    ///     for chunk in &chunks {
    ///         // Generate embedding for chunk.preview + chunk.signature ...
    ///         println!("chunk {} blob_sha={}", chunk.id, chunk.blob_sha);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn fetch_chunks_needing_embeddings(
        &self,
        incremental: bool,
        sample_size: Option<usize>,
    ) -> anyhow::Result<Vec<ChunkForEmbedding>>;
}

// =============================================================================
// StoreMigration - Schema migration management
// =============================================================================

/// Migration management for database schema versioning.
///
/// # Version-Based Migration Tracking
///
/// This trait uses integer-based migration versions (`i32`) for tracking
/// applied migrations. This design aligns with SQLite's simple migration
/// system where migrations are ordered by version number.
///
/// # PostgreSQL Compatibility Note
///
/// PostgreSQL's `sqlx` uses string-based migration identifiers (e.g.,
/// `"20240101_initial_schema.sql"`). Future `PostgresStore` implementations
/// may need an adapter layer to convert between string-based sqlx migration
/// names and integer-based version numbers used by this trait.
#[async_trait]
pub trait StoreMigration: Send + Sync {
    /// Run all pending database migrations.
    async fn migrate(&self) -> anyhow::Result<()>;

    /// Get the set of already-applied migration versions.
    async fn get_applied_migrations(&self) -> anyhow::Result<HashSet<i32>>;
}

// =============================================================================
// StoreCleanup - Stale worktree detection and cleanup
// =============================================================================

#[async_trait]
pub trait StoreCleanup: Send + Sync {
    /// Detect worktrees whose paths no longer exist on disk.
    async fn detect_stale_worktrees(&self) -> anyhow::Result<Vec<StaleWorktree>>;

    /// Delete all data associated with a worktree.
    async fn delete_worktree_data(&self, worktree_id: i64)
        -> anyhow::Result<WorktreeCleanupResult>;
}

// =============================================================================
// StoreIndexState - Index state tracking
// =============================================================================

#[async_trait]
pub trait StoreIndexState: Send + Sync {
    /// Get the last indexed tree SHA for a worktree.
    /// Returns "init" if never indexed.
    async fn get_last_indexed_tree(&self, worktree_id: i64) -> anyhow::Result<String>;

    /// Update the index state for a worktree.
    async fn update_index_state(
        &self,
        worktree_id: i64,
        tree_sha: &str,
        stats: &UpdateStats,
    ) -> anyhow::Result<()>;
}

// =============================================================================
// StoreEncoding - Encoding run lifecycle
// =============================================================================

#[async_trait]
pub trait StoreEncoding: Send + Sync {
    /// Create a new encoding run record. Returns the run ID.
    async fn create_encoding_run(
        &self,
        total_chunks: i64,
        provider: Option<&str>,
        dimension: Option<i32>,
    ) -> anyhow::Result<i64>;

    /// Update the progress of an encoding run.
    async fn update_encoding_run_progress(
        &self,
        run_id: i64,
        chunks_completed: i64,
        chunks_per_second: Option<f64>,
    ) -> anyhow::Result<()>;

    /// Complete an encoding run, setting its final status.
    async fn complete_encoding_run(&self, run_id: i64, status: &str) -> anyhow::Result<()>;

    /// Mark all running encoding runs as failed (cleanup on startup).
    async fn mark_stale_runs_as_failed(&self) -> anyhow::Result<()>;

    /// Get the currently active (running) encoding run, if any.
    async fn get_active_encoding_run(&self) -> anyhow::Result<Option<EncodingRunRow>>;
}

// =============================================================================
// Store - Supertrait combining all 9 sub-traits
// =============================================================================

/// The unified Store supertrait combining all database operation categories.
///
/// Any type implementing all 9 sub-traits automatically implements `Store`
/// via the blanket impl below.
pub trait Store:
    StoreCore
    + StoreChunks
    + StoreSearch
    + StoreGraph
    + StoreEmbeddings
    + StoreMigration
    + StoreCleanup
    + StoreIndexState
    + StoreEncoding
{
}

/// Blanket implementation: any type implementing all 9 sub-traits is a Store.
impl<T> Store for T where
    T: StoreCore
        + StoreChunks
        + StoreSearch
        + StoreGraph
        + StoreEmbeddings
        + StoreMigration
        + StoreCleanup
        + StoreIndexState
        + StoreEncoding
{
}

// =============================================================================
// Object safety verification tests
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    // These functions verify that each trait is object-safe by accepting
    // a reference to a trait object. If any trait is not object-safe,
    // these functions will fail to compile.

    fn _assert_object_safe_core(_: &dyn StoreCore) {}
    fn _assert_object_safe_chunks(_: &dyn StoreChunks) {}
    fn _assert_object_safe_search(_: &dyn StoreSearch) {}
    fn _assert_object_safe_graph(_: &dyn StoreGraph) {}
    fn _assert_object_safe_embeddings(_: &dyn StoreEmbeddings) {}
    fn _assert_object_safe_migration(_: &dyn StoreMigration) {}
    fn _assert_object_safe_cleanup(_: &dyn StoreCleanup) {}
    fn _assert_object_safe_index_state(_: &dyn StoreIndexState) {}
    fn _assert_object_safe_encoding(_: &dyn StoreEncoding) {}
    fn _assert_object_safe_store(_: &dyn Store) {}
}