Skip to main content

roder_api/
code_index.rs

1use std::path::PathBuf;
2
3use serde::{Deserialize, Serialize};
4use time::OffsetDateTime;
5
6use crate::events::{ThreadId, TurnId};
7use crate::extension::EmbeddingProviderId;
8
9pub type CodeIndexProviderId = String;
10pub type CodeIndexStoreId = String;
11pub type CodeIndexGenerationId = String;
12pub type CodeIndexQueryId = String;
13pub type MerkleHash = String;
14pub type ContentHash = String;
15pub type ChunkHash = String;
16pub type PathHash = String;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19#[serde(rename_all = "snake_case")]
20pub enum CodeIndexStatus {
21    Disabled,
22    Missing,
23    Building,
24    Chunking,
25    Embedding,
26    Ready,
27    Stale,
28    Failed,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(rename_all = "snake_case")]
33pub enum CodeIndexNodeKind {
34    File,
35    Directory,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
39#[serde(rename_all = "camelCase")]
40pub struct WorkspaceSimilarityHash {
41    pub algorithm: String,
42    pub value: String,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(rename_all = "camelCase")]
47pub struct WorkspaceMerkleNode {
48    pub path: PathBuf,
49    pub path_hash: PathHash,
50    pub content_hash: MerkleHash,
51    pub kind: CodeIndexNodeKind,
52    #[serde(default)]
53    pub children: Vec<MerkleHash>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
57#[serde(rename_all = "camelCase")]
58pub struct WorkspaceMerkleTree {
59    pub workspace_root: PathBuf,
60    pub root_hash: MerkleHash,
61    pub similarity_hash: WorkspaceSimilarityHash,
62    pub nodes: Vec<WorkspaceMerkleNode>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
66#[serde(rename_all = "camelCase")]
67pub struct CodeByteRange {
68    pub start: u64,
69    pub end: u64,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
73#[serde(rename_all = "camelCase")]
74pub struct CodeLineRange {
75    pub start: u32,
76    pub end: u32,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
80#[serde(rename_all = "camelCase")]
81pub struct CodeChunk {
82    pub chunk_hash: ChunkHash,
83    pub path: PathBuf,
84    pub path_hash: PathHash,
85    pub byte_range: CodeByteRange,
86    pub line_range: CodeLineRange,
87    pub content_hash: ContentHash,
88    pub language: Option<String>,
89    pub symbol_hint: Option<String>,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "camelCase")]
94pub struct ChunkEmbedding {
95    pub chunk_hash: ChunkHash,
96    pub provider: EmbeddingProviderId,
97    pub model: String,
98    pub dimensions: usize,
99    #[serde(default)]
100    pub vector: Vec<f32>,
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
104#[serde(rename_all = "camelCase")]
105pub struct ContentProof {
106    pub path_hash: PathHash,
107    pub content_hash: ContentHash,
108    pub workspace_root_hash: MerkleHash,
109    pub generation_id: CodeIndexGenerationId,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
113#[serde(rename_all = "camelCase")]
114pub struct CodeIndexStats {
115    pub file_count: u64,
116    pub chunk_count: u64,
117    pub embedded_chunk_count: u64,
118    pub cached_embedding_count: u64,
119    pub index_bytes: u64,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
123#[serde(rename_all = "camelCase")]
124pub struct IndexGeneration {
125    pub id: CodeIndexGenerationId,
126    pub status: CodeIndexStatus,
127    pub workspace_root: PathBuf,
128    pub root_hash: Option<MerkleHash>,
129    pub config_hash: String,
130    pub stats: CodeIndexStats,
131    #[serde(with = "time::serde::rfc3339")]
132    pub created_at: OffsetDateTime,
133    #[serde(with = "time::serde::rfc3339::option")]
134    pub updated_at: Option<OffsetDateTime>,
135    pub stale_reason: Option<String>,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
139#[serde(rename_all = "camelCase")]
140pub struct CodeIndexSearchRequest {
141    pub query_id: CodeIndexQueryId,
142    pub query: String,
143    pub workspace_root: PathBuf,
144    pub limit: usize,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
148#[serde(rename_all = "camelCase")]
149pub struct CodeIndexSearchResult {
150    pub query_id: CodeIndexQueryId,
151    pub chunk: CodeChunk,
152    pub score: f32,
153    pub proof: ContentProof,
154    pub proof_verified: bool,
155    #[serde(default, skip_serializing_if = "Option::is_none")]
156    pub snippet: Option<String>,
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
160#[serde(rename_all = "camelCase")]
161pub struct CodeIndexSearchResponse {
162    pub generation: IndexGeneration,
163    pub results: Vec<CodeIndexSearchResult>,
164    pub dropped_results: Vec<ProofFilteredDrop>,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
168#[serde(rename_all = "camelCase")]
169pub struct ProofFilteredDrop {
170    pub query_id: CodeIndexQueryId,
171    pub path_hash: PathHash,
172    pub content_hash: ContentHash,
173    pub reason: String,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
177#[serde(rename_all = "camelCase")]
178pub struct CodeIndexEventContext {
179    pub workspace_root: PathBuf,
180    pub generation_id: Option<CodeIndexGenerationId>,
181    pub thread_id: Option<ThreadId>,
182    pub turn_id: Option<TurnId>,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
186#[serde(rename_all = "camelCase")]
187pub struct CodeIndexingStarted {
188    pub context: CodeIndexEventContext,
189    pub config_hash: String,
190    #[serde(with = "time::serde::rfc3339")]
191    pub timestamp: OffsetDateTime,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
195#[serde(rename_all = "camelCase")]
196pub struct CodeIndexChunked {
197    pub context: CodeIndexEventContext,
198    pub file_count: u64,
199    pub chunk_count: u64,
200    pub changed_chunk_count: u64,
201    #[serde(with = "time::serde::rfc3339")]
202    pub timestamp: OffsetDateTime,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(rename_all = "camelCase")]
207pub struct CodeIndexEmbedded {
208    pub context: CodeIndexEventContext,
209    pub provider: EmbeddingProviderId,
210    pub model: String,
211    pub embedded_chunk_count: u64,
212    pub cached_embedding_count: u64,
213    #[serde(with = "time::serde::rfc3339")]
214    pub timestamp: OffsetDateTime,
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
218#[serde(rename_all = "camelCase")]
219pub struct CodeIndexReady {
220    pub generation: IndexGeneration,
221    #[serde(with = "time::serde::rfc3339")]
222    pub timestamp: OffsetDateTime,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
226#[serde(rename_all = "camelCase")]
227pub struct CodeIndexStale {
228    pub context: CodeIndexEventContext,
229    pub reason: String,
230    #[serde(with = "time::serde::rfc3339")]
231    pub timestamp: OffsetDateTime,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
235#[serde(rename_all = "camelCase")]
236pub struct CodeIndexFailed {
237    pub context: CodeIndexEventContext,
238    pub error: String,
239    #[serde(with = "time::serde::rfc3339")]
240    pub timestamp: OffsetDateTime,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
244#[serde(rename_all = "camelCase")]
245pub struct CodeIndexProofFilteredResultDropped {
246    pub context: CodeIndexEventContext,
247    pub drop: ProofFilteredDrop,
248    #[serde(with = "time::serde::rfc3339")]
249    pub timestamp: OffsetDateTime,
250}
251
252#[async_trait::async_trait]
253pub trait CodeIndexStore: Send + Sync + 'static {
254    fn id(&self) -> CodeIndexStoreId;
255
256    async fn status(&self, workspace_root: PathBuf) -> anyhow::Result<IndexGeneration>;
257
258    async fn search(
259        &self,
260        request: CodeIndexSearchRequest,
261    ) -> anyhow::Result<CodeIndexSearchResponse>;
262
263    async fn read_chunk(
264        &self,
265        proof: ContentProof,
266        byte_range: Option<CodeByteRange>,
267    ) -> anyhow::Result<Option<String>>;
268
269    async fn list_proofs(&self, workspace_root: PathBuf) -> anyhow::Result<Vec<ContentProof>>;
270}
271
272#[async_trait::async_trait]
273pub trait CodeIndexProvider: Send + Sync + 'static {
274    fn id(&self) -> CodeIndexProviderId;
275
276    async fn rebuild(&self, workspace_root: PathBuf) -> anyhow::Result<IndexGeneration>;
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn code_index_search_result_serializes_without_source_by_default() {
285        let generation = generation();
286        let response = CodeIndexSearchResponse {
287            generation,
288            results: vec![CodeIndexSearchResult {
289                query_id: "query-1".to_string(),
290                chunk: chunk(),
291                score: 0.82,
292                proof: proof(),
293                proof_verified: true,
294                snippet: None,
295            }],
296            dropped_results: vec![ProofFilteredDrop {
297                query_id: "query-1".to_string(),
298                path_hash: "path-denied".to_string(),
299                content_hash: "content-denied".to_string(),
300                reason: "content proof missing".to_string(),
301            }],
302        };
303
304        let value = serde_json::to_value(&response).unwrap();
305        assert_eq!(value["generation"]["status"], "ready");
306        assert_eq!(value["results"][0]["proofVerified"], true);
307        assert!(value["results"][0].get("snippet").is_none());
308        assert_eq!(
309            value["droppedResults"][0]["reason"],
310            "content proof missing"
311        );
312    }
313
314    #[test]
315    fn code_index_events_round_trip_context_and_proof_drop() {
316        let event = CodeIndexProofFilteredResultDropped {
317            context: CodeIndexEventContext {
318                workspace_root: PathBuf::from("/repo"),
319                generation_id: Some("gen-1".to_string()),
320                thread_id: Some("thread-1".to_string()),
321                turn_id: Some("turn-1".to_string()),
322            },
323            drop: ProofFilteredDrop {
324                query_id: "query-1".to_string(),
325                path_hash: "path-x".to_string(),
326                content_hash: "content-x".to_string(),
327                reason: "path outside workspace scope".to_string(),
328            },
329            timestamp: OffsetDateTime::UNIX_EPOCH,
330        };
331
332        let json = serde_json::to_string(&event).unwrap();
333        let round_trip: CodeIndexProofFilteredResultDropped = serde_json::from_str(&json).unwrap();
334
335        assert_eq!(round_trip.context.thread_id.as_deref(), Some("thread-1"));
336        assert_eq!(round_trip.drop.reason, "path outside workspace scope");
337    }
338
339    fn generation() -> IndexGeneration {
340        IndexGeneration {
341            id: "gen-1".to_string(),
342            status: CodeIndexStatus::Ready,
343            workspace_root: PathBuf::from("/repo"),
344            root_hash: Some("root-hash".to_string()),
345            config_hash: "config-hash".to_string(),
346            stats: CodeIndexStats {
347                file_count: 2,
348                chunk_count: 4,
349                embedded_chunk_count: 4,
350                cached_embedding_count: 1,
351                index_bytes: 128,
352            },
353            created_at: OffsetDateTime::UNIX_EPOCH,
354            updated_at: Some(OffsetDateTime::UNIX_EPOCH),
355            stale_reason: None,
356        }
357    }
358
359    fn chunk() -> CodeChunk {
360        CodeChunk {
361            chunk_hash: "chunk-1".to_string(),
362            path: PathBuf::from("src/lib.rs"),
363            path_hash: "path-1".to_string(),
364            byte_range: CodeByteRange { start: 0, end: 42 },
365            line_range: CodeLineRange { start: 1, end: 3 },
366            content_hash: "content-1".to_string(),
367            language: Some("rust".to_string()),
368            symbol_hint: Some("CodeIndex".to_string()),
369        }
370    }
371
372    fn proof() -> ContentProof {
373        ContentProof {
374            path_hash: "path-1".to_string(),
375            content_hash: "content-1".to_string(),
376            workspace_root_hash: "root-hash".to_string(),
377            generation_id: "gen-1".to_string(),
378        }
379    }
380}