1use std::path::PathBuf;
2
3use serde::{Deserialize, Serialize};
4use time::OffsetDateTime;
5
6use crate::events::{ThreadId, TurnId};
7use crate::extension::EmbeddingProviderId;
8
9pub type CodeIndexProviderId = String;
10pub type CodeIndexStoreId = String;
11pub type CodeIndexGenerationId = String;
12pub type CodeIndexQueryId = String;
13pub type MerkleHash = String;
14pub type ContentHash = String;
15pub type ChunkHash = String;
16pub type PathHash = String;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19#[serde(rename_all = "snake_case")]
20pub enum CodeIndexStatus {
21 Disabled,
22 Missing,
23 Building,
24 Chunking,
25 Embedding,
26 Ready,
27 Stale,
28 Failed,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(rename_all = "snake_case")]
33pub enum CodeIndexNodeKind {
34 File,
35 Directory,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
39#[serde(rename_all = "camelCase")]
40pub struct WorkspaceSimilarityHash {
41 pub algorithm: String,
42 pub value: String,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(rename_all = "camelCase")]
47pub struct WorkspaceMerkleNode {
48 pub path: PathBuf,
49 pub path_hash: PathHash,
50 pub content_hash: MerkleHash,
51 pub kind: CodeIndexNodeKind,
52 #[serde(default)]
53 pub children: Vec<MerkleHash>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
57#[serde(rename_all = "camelCase")]
58pub struct WorkspaceMerkleTree {
59 pub workspace_root: PathBuf,
60 pub root_hash: MerkleHash,
61 pub similarity_hash: WorkspaceSimilarityHash,
62 pub nodes: Vec<WorkspaceMerkleNode>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
66#[serde(rename_all = "camelCase")]
67pub struct CodeByteRange {
68 pub start: u64,
69 pub end: u64,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
73#[serde(rename_all = "camelCase")]
74pub struct CodeLineRange {
75 pub start: u32,
76 pub end: u32,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
80#[serde(rename_all = "camelCase")]
81pub struct CodeChunk {
82 pub chunk_hash: ChunkHash,
83 pub path: PathBuf,
84 pub path_hash: PathHash,
85 pub byte_range: CodeByteRange,
86 pub line_range: CodeLineRange,
87 pub content_hash: ContentHash,
88 pub language: Option<String>,
89 pub symbol_hint: Option<String>,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "camelCase")]
94pub struct ChunkEmbedding {
95 pub chunk_hash: ChunkHash,
96 pub provider: EmbeddingProviderId,
97 pub model: String,
98 pub dimensions: usize,
99 #[serde(default)]
100 pub vector: Vec<f32>,
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
104#[serde(rename_all = "camelCase")]
105pub struct ContentProof {
106 pub path_hash: PathHash,
107 pub content_hash: ContentHash,
108 pub workspace_root_hash: MerkleHash,
109 pub generation_id: CodeIndexGenerationId,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
113#[serde(rename_all = "camelCase")]
114pub struct CodeIndexStats {
115 pub file_count: u64,
116 pub chunk_count: u64,
117 pub embedded_chunk_count: u64,
118 pub cached_embedding_count: u64,
119 pub index_bytes: u64,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
123#[serde(rename_all = "camelCase")]
124pub struct IndexGeneration {
125 pub id: CodeIndexGenerationId,
126 pub status: CodeIndexStatus,
127 pub workspace_root: PathBuf,
128 pub root_hash: Option<MerkleHash>,
129 pub config_hash: String,
130 pub stats: CodeIndexStats,
131 #[serde(with = "time::serde::rfc3339")]
132 pub created_at: OffsetDateTime,
133 #[serde(with = "time::serde::rfc3339::option")]
134 pub updated_at: Option<OffsetDateTime>,
135 pub stale_reason: Option<String>,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
139#[serde(rename_all = "camelCase")]
140pub struct CodeIndexSearchRequest {
141 pub query_id: CodeIndexQueryId,
142 pub query: String,
143 pub workspace_root: PathBuf,
144 pub limit: usize,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
148#[serde(rename_all = "camelCase")]
149pub struct CodeIndexSearchResult {
150 pub query_id: CodeIndexQueryId,
151 pub chunk: CodeChunk,
152 pub score: f32,
153 pub proof: ContentProof,
154 pub proof_verified: bool,
155 #[serde(default, skip_serializing_if = "Option::is_none")]
156 pub snippet: Option<String>,
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
160#[serde(rename_all = "camelCase")]
161pub struct CodeIndexSearchResponse {
162 pub generation: IndexGeneration,
163 pub results: Vec<CodeIndexSearchResult>,
164 pub dropped_results: Vec<ProofFilteredDrop>,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
168#[serde(rename_all = "camelCase")]
169pub struct ProofFilteredDrop {
170 pub query_id: CodeIndexQueryId,
171 pub path_hash: PathHash,
172 pub content_hash: ContentHash,
173 pub reason: String,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
177#[serde(rename_all = "camelCase")]
178pub struct CodeIndexEventContext {
179 pub workspace_root: PathBuf,
180 pub generation_id: Option<CodeIndexGenerationId>,
181 pub thread_id: Option<ThreadId>,
182 pub turn_id: Option<TurnId>,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
186#[serde(rename_all = "camelCase")]
187pub struct CodeIndexingStarted {
188 pub context: CodeIndexEventContext,
189 pub config_hash: String,
190 #[serde(with = "time::serde::rfc3339")]
191 pub timestamp: OffsetDateTime,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
195#[serde(rename_all = "camelCase")]
196pub struct CodeIndexChunked {
197 pub context: CodeIndexEventContext,
198 pub file_count: u64,
199 pub chunk_count: u64,
200 pub changed_chunk_count: u64,
201 #[serde(with = "time::serde::rfc3339")]
202 pub timestamp: OffsetDateTime,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(rename_all = "camelCase")]
207pub struct CodeIndexEmbedded {
208 pub context: CodeIndexEventContext,
209 pub provider: EmbeddingProviderId,
210 pub model: String,
211 pub embedded_chunk_count: u64,
212 pub cached_embedding_count: u64,
213 #[serde(with = "time::serde::rfc3339")]
214 pub timestamp: OffsetDateTime,
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
218#[serde(rename_all = "camelCase")]
219pub struct CodeIndexReady {
220 pub generation: IndexGeneration,
221 #[serde(with = "time::serde::rfc3339")]
222 pub timestamp: OffsetDateTime,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
226#[serde(rename_all = "camelCase")]
227pub struct CodeIndexStale {
228 pub context: CodeIndexEventContext,
229 pub reason: String,
230 #[serde(with = "time::serde::rfc3339")]
231 pub timestamp: OffsetDateTime,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
235#[serde(rename_all = "camelCase")]
236pub struct CodeIndexFailed {
237 pub context: CodeIndexEventContext,
238 pub error: String,
239 #[serde(with = "time::serde::rfc3339")]
240 pub timestamp: OffsetDateTime,
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
244#[serde(rename_all = "camelCase")]
245pub struct CodeIndexProofFilteredResultDropped {
246 pub context: CodeIndexEventContext,
247 pub drop: ProofFilteredDrop,
248 #[serde(with = "time::serde::rfc3339")]
249 pub timestamp: OffsetDateTime,
250}
251
252#[async_trait::async_trait]
253pub trait CodeIndexStore: Send + Sync + 'static {
254 fn id(&self) -> CodeIndexStoreId;
255
256 async fn status(&self, workspace_root: PathBuf) -> anyhow::Result<IndexGeneration>;
257
258 async fn search(
259 &self,
260 request: CodeIndexSearchRequest,
261 ) -> anyhow::Result<CodeIndexSearchResponse>;
262
263 async fn read_chunk(
264 &self,
265 proof: ContentProof,
266 byte_range: Option<CodeByteRange>,
267 ) -> anyhow::Result<Option<String>>;
268
269 async fn list_proofs(&self, workspace_root: PathBuf) -> anyhow::Result<Vec<ContentProof>>;
270}
271
272#[async_trait::async_trait]
273pub trait CodeIndexProvider: Send + Sync + 'static {
274 fn id(&self) -> CodeIndexProviderId;
275
276 async fn rebuild(&self, workspace_root: PathBuf) -> anyhow::Result<IndexGeneration>;
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn code_index_search_result_serializes_without_source_by_default() {
285 let generation = generation();
286 let response = CodeIndexSearchResponse {
287 generation,
288 results: vec![CodeIndexSearchResult {
289 query_id: "query-1".to_string(),
290 chunk: chunk(),
291 score: 0.82,
292 proof: proof(),
293 proof_verified: true,
294 snippet: None,
295 }],
296 dropped_results: vec![ProofFilteredDrop {
297 query_id: "query-1".to_string(),
298 path_hash: "path-denied".to_string(),
299 content_hash: "content-denied".to_string(),
300 reason: "content proof missing".to_string(),
301 }],
302 };
303
304 let value = serde_json::to_value(&response).unwrap();
305 assert_eq!(value["generation"]["status"], "ready");
306 assert_eq!(value["results"][0]["proofVerified"], true);
307 assert!(value["results"][0].get("snippet").is_none());
308 assert_eq!(
309 value["droppedResults"][0]["reason"],
310 "content proof missing"
311 );
312 }
313
314 #[test]
315 fn code_index_events_round_trip_context_and_proof_drop() {
316 let event = CodeIndexProofFilteredResultDropped {
317 context: CodeIndexEventContext {
318 workspace_root: PathBuf::from("/repo"),
319 generation_id: Some("gen-1".to_string()),
320 thread_id: Some("thread-1".to_string()),
321 turn_id: Some("turn-1".to_string()),
322 },
323 drop: ProofFilteredDrop {
324 query_id: "query-1".to_string(),
325 path_hash: "path-x".to_string(),
326 content_hash: "content-x".to_string(),
327 reason: "path outside workspace scope".to_string(),
328 },
329 timestamp: OffsetDateTime::UNIX_EPOCH,
330 };
331
332 let json = serde_json::to_string(&event).unwrap();
333 let round_trip: CodeIndexProofFilteredResultDropped = serde_json::from_str(&json).unwrap();
334
335 assert_eq!(round_trip.context.thread_id.as_deref(), Some("thread-1"));
336 assert_eq!(round_trip.drop.reason, "path outside workspace scope");
337 }
338
339 fn generation() -> IndexGeneration {
340 IndexGeneration {
341 id: "gen-1".to_string(),
342 status: CodeIndexStatus::Ready,
343 workspace_root: PathBuf::from("/repo"),
344 root_hash: Some("root-hash".to_string()),
345 config_hash: "config-hash".to_string(),
346 stats: CodeIndexStats {
347 file_count: 2,
348 chunk_count: 4,
349 embedded_chunk_count: 4,
350 cached_embedding_count: 1,
351 index_bytes: 128,
352 },
353 created_at: OffsetDateTime::UNIX_EPOCH,
354 updated_at: Some(OffsetDateTime::UNIX_EPOCH),
355 stale_reason: None,
356 }
357 }
358
359 fn chunk() -> CodeChunk {
360 CodeChunk {
361 chunk_hash: "chunk-1".to_string(),
362 path: PathBuf::from("src/lib.rs"),
363 path_hash: "path-1".to_string(),
364 byte_range: CodeByteRange { start: 0, end: 42 },
365 line_range: CodeLineRange { start: 1, end: 3 },
366 content_hash: "content-1".to_string(),
367 language: Some("rust".to_string()),
368 symbol_hint: Some("CodeIndex".to_string()),
369 }
370 }
371
372 fn proof() -> ContentProof {
373 ContentProof {
374 path_hash: "path-1".to_string(),
375 content_hash: "content-1".to_string(),
376 workspace_root_hash: "root-hash".to_string(),
377 generation_id: "gen-1".to_string(),
378 }
379 }
380}