1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7 #[error("IO error: {0}")]
8 Io(#[from] std::io::Error),
9
10 #[error("Regex error: {0}")]
11 Regex(#[from] regex::Error),
12
13 #[error("Serialization error: {0}")]
14 Serialization(#[from] bincode::Error),
15
16 #[error("JSON error: {0}")]
17 Json(#[from] serde_json::Error),
18
19 #[error("Index error: {0}")]
20 Index(String),
21
22 #[error("Search error: {0}")]
23 Search(String),
24
25 #[error("Embedding error: {0}")]
26 Embedding(String),
27
28 #[error("Other error: {0}")]
29 Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35pub enum Language {
36 Rust,
37 Python,
38 JavaScript,
39 TypeScript,
40 Haskell,
41 Go,
42 Java,
43 C,
44 Cpp,
45 CSharp,
46 Ruby,
47 Php,
48 Swift,
49 Kotlin,
50}
51
52impl Language {
53 pub fn from_extension(ext: &str) -> Option<Self> {
54 match ext {
55 "rs" => Some(Language::Rust),
56 "py" => Some(Language::Python),
57 "js" => Some(Language::JavaScript),
58 "ts" | "tsx" => Some(Language::TypeScript),
59 "hs" | "lhs" => Some(Language::Haskell),
60 "go" => Some(Language::Go),
61 "java" => Some(Language::Java),
62 "c" => Some(Language::C),
63 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
64 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
66 "rb" => Some(Language::Ruby),
67 "php" => Some(Language::Php),
68 "swift" => Some(Language::Swift),
69 "kt" | "kts" => Some(Language::Kotlin),
70 _ => None,
71 }
72 }
73
74 pub fn from_path(path: &Path) -> Option<Self> {
75 path.extension()
76 .and_then(|ext| ext.to_str())
77 .and_then(Self::from_extension)
78 }
79}
80
81impl std::fmt::Display for Language {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let name = match self {
84 Language::Rust => "rust",
85 Language::Python => "python",
86 Language::JavaScript => "javascript",
87 Language::TypeScript => "typescript",
88 Language::Haskell => "haskell",
89 Language::Go => "go",
90 Language::Java => "java",
91 Language::C => "c",
92 Language::Cpp => "cpp",
93 Language::CSharp => "csharp",
94 Language::Ruby => "ruby",
95 Language::Php => "php",
96 Language::Swift => "swift",
97 Language::Kotlin => "kotlin",
98 };
99 write!(f, "{}", name)
100 }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Span {
105 pub byte_start: usize,
106 pub byte_end: usize,
107 pub line_start: usize,
108 pub line_end: usize,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct FileMetadata {
113 pub path: PathBuf,
114 pub hash: String,
115 pub last_modified: u64,
116 pub size: u64,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct SearchResult {
121 pub file: PathBuf,
122 pub span: Span,
123 pub score: f32,
124 pub preview: String,
125 #[serde(skip_serializing_if = "Option::is_none")]
126 pub lang: Option<Language>,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 pub symbol: Option<String>,
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub chunk_hash: Option<String>,
131 #[serde(skip_serializing_if = "Option::is_none")]
132 pub index_epoch: Option<u64>,
133}
134
135#[derive(Debug, Clone)]
137pub struct SearchResults {
138 pub matches: Vec<SearchResult>,
139 pub closest_below_threshold: Option<SearchResult>,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct JsonSearchResult {
145 pub file: String,
146 pub span: Span,
147 pub lang: Option<Language>,
148 pub symbol: Option<String>,
149 pub score: f32,
150 pub signals: SearchSignals,
151 pub preview: String,
152 pub model: String,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct JsonlSearchResult {
157 pub path: String,
158 pub span: Span,
159 pub language: Option<String>,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 pub snippet: Option<String>,
162 #[serde(skip_serializing_if = "Option::is_none")]
163 pub score: Option<f32>,
164 #[serde(skip_serializing_if = "Option::is_none")]
165 pub chunk_hash: Option<String>,
166 #[serde(skip_serializing_if = "Option::is_none")]
167 pub index_epoch: Option<u64>,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct SearchSignals {
172 pub lex_rank: Option<usize>,
173 pub vec_rank: Option<usize>,
174 pub rrf_score: f32,
175}
176
177#[derive(Debug, Clone, PartialEq)]
178pub enum SearchMode {
179 Regex,
180 Lexical,
181 Semantic,
182 Hybrid,
183}
184
185#[derive(Debug, Clone)]
186pub struct SearchOptions {
187 pub mode: SearchMode,
188 pub query: String,
189 pub path: PathBuf,
190 pub top_k: Option<usize>,
191 pub threshold: Option<f32>,
192 pub case_insensitive: bool,
193 pub whole_word: bool,
194 pub fixed_string: bool,
195 pub line_numbers: bool,
196 pub context_lines: usize,
197 pub before_context_lines: usize,
198 pub after_context_lines: usize,
199 pub recursive: bool,
200 pub json_output: bool,
201 pub jsonl_output: bool,
202 pub no_snippet: bool,
203 pub reindex: bool,
204 pub show_scores: bool,
205 pub show_filenames: bool,
206 pub files_with_matches: bool,
207 pub files_without_matches: bool,
208 pub exclude_patterns: Vec<String>,
209 pub respect_gitignore: bool,
210 pub full_section: bool,
211 pub rerank: bool,
213 pub rerank_model: Option<String>,
214}
215
216impl JsonlSearchResult {
217 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
218 Self {
219 path: result.file.to_string_lossy().to_string(),
220 span: result.span.clone(),
221 language: result.lang.as_ref().map(|l| l.to_string()),
222 snippet: if include_snippet {
223 Some(result.preview.clone())
224 } else {
225 None
226 },
227 score: if result.score >= 0.0 {
228 Some(result.score)
229 } else {
230 None
231 },
232 chunk_hash: result.chunk_hash.clone(),
233 index_epoch: result.index_epoch,
234 }
235 }
236}
237
238impl Default for SearchOptions {
239 fn default() -> Self {
240 Self {
241 mode: SearchMode::Regex,
242 query: String::new(),
243 path: PathBuf::from("."),
244 top_k: None,
245 threshold: None,
246 case_insensitive: false,
247 whole_word: false,
248 fixed_string: false,
249 line_numbers: false,
250 context_lines: 0,
251 before_context_lines: 0,
252 after_context_lines: 0,
253 recursive: true,
254 json_output: false,
255 jsonl_output: false,
256 no_snippet: false,
257 reindex: false,
258 show_scores: false,
259 show_filenames: false,
260 files_with_matches: false,
261 files_without_matches: false,
262 exclude_patterns: get_default_exclude_patterns(),
263 respect_gitignore: true,
264 full_section: false,
265 rerank: false,
267 rerank_model: None,
268 }
269 }
270}
271
272pub fn get_default_exclude_patterns() -> Vec<String> {
275 vec![
276 ".ck".to_string(),
278 ".fastembed_cache".to_string(),
280 ".cache".to_string(),
281 "__pycache__".to_string(),
282 ".git".to_string(),
284 ".svn".to_string(),
285 ".hg".to_string(),
286 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
297 ".venv".to_string(),
298 "env".to_string(),
299 ".env".to_string(),
300 "virtualenv".to_string(),
301 ".vscode".to_string(),
303 ".idea".to_string(),
304 ".eclipse".to_string(),
305 "tmp".to_string(),
307 "temp".to_string(),
308 ".tmp".to_string(),
309 ]
310}
311
312pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
313 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
314 let mut sidecar = repo_root.join(".ck");
315 sidecar.push(relative);
316 let ext = relative
317 .extension()
318 .map(|e| format!("{}.ck", e.to_string_lossy()))
319 .unwrap_or_else(|| "ck".to_string());
320 sidecar.set_extension(ext);
321 sidecar
322}
323
324pub fn compute_file_hash(path: &Path) -> Result<String> {
325 let data = std::fs::read(path)?;
326 let hash = blake3::hash(&data);
327 Ok(hash.to_hex().to_string())
328}
329
330#[cfg(test)]
331mod tests {
332 use super::*;
333 use std::fs;
334 use tempfile::TempDir;
335
336 #[test]
337 fn test_span_creation() {
338 let span = Span {
339 byte_start: 0,
340 byte_end: 10,
341 line_start: 1,
342 line_end: 2,
343 };
344
345 assert_eq!(span.byte_start, 0);
346 assert_eq!(span.byte_end, 10);
347 assert_eq!(span.line_start, 1);
348 assert_eq!(span.line_end, 2);
349 }
350
351 #[test]
352 fn test_search_options_default() {
353 let options = SearchOptions::default();
354 assert!(matches!(options.mode, SearchMode::Regex));
355 assert_eq!(options.query, "");
356 assert_eq!(options.path, PathBuf::from("."));
357 assert_eq!(options.top_k, None);
358 assert_eq!(options.threshold, None);
359 assert!(!options.case_insensitive);
360 assert!(!options.whole_word);
361 assert!(!options.fixed_string);
362 assert!(!options.line_numbers);
363 assert_eq!(options.context_lines, 0);
364 assert!(options.recursive);
365 assert!(!options.json_output);
366 assert!(!options.reindex);
367 assert!(!options.show_scores);
368 assert!(!options.show_filenames);
369 }
370
371 #[test]
372 fn test_file_metadata_serialization() {
373 let metadata = FileMetadata {
374 path: PathBuf::from("test.txt"),
375 hash: "abc123".to_string(),
376 last_modified: 1234567890,
377 size: 1024,
378 };
379
380 let json = serde_json::to_string(&metadata).unwrap();
381 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
382
383 assert_eq!(metadata.path, deserialized.path);
384 assert_eq!(metadata.hash, deserialized.hash);
385 assert_eq!(metadata.last_modified, deserialized.last_modified);
386 assert_eq!(metadata.size, deserialized.size);
387 }
388
389 #[test]
390 fn test_search_result_serialization() {
391 let result = SearchResult {
392 file: PathBuf::from("test.txt"),
393 span: Span {
394 byte_start: 0,
395 byte_end: 10,
396 line_start: 1,
397 line_end: 1,
398 },
399 score: 0.95,
400 preview: "hello world".to_string(),
401 lang: Some(Language::Rust),
402 symbol: Some("main".to_string()),
403 chunk_hash: Some("abc123".to_string()),
404 index_epoch: Some(1699123456),
405 };
406
407 let json = serde_json::to_string(&result).unwrap();
408 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
409
410 assert_eq!(result.file, deserialized.file);
411 assert_eq!(result.score, deserialized.score);
412 assert_eq!(result.preview, deserialized.preview);
413 assert_eq!(result.lang, deserialized.lang);
414 assert_eq!(result.symbol, deserialized.symbol);
415 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
416 assert_eq!(result.index_epoch, deserialized.index_epoch);
417 }
418
419 #[test]
420 fn test_jsonl_search_result_conversion() {
421 let result = SearchResult {
422 file: PathBuf::from("src/auth.rs"),
423 span: Span {
424 byte_start: 1203,
425 byte_end: 1456,
426 line_start: 42,
427 line_end: 58,
428 },
429 score: 0.89,
430 preview: "function authenticate(user) {...}".to_string(),
431 lang: Some(Language::Rust),
432 symbol: Some("authenticate".to_string()),
433 chunk_hash: Some("abc123def456".to_string()),
434 index_epoch: Some(1699123456),
435 };
436
437 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
439 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
440 assert_eq!(jsonl_with_snippet.span.line_start, 42);
441 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
442 assert_eq!(
443 jsonl_with_snippet.snippet,
444 Some("function authenticate(user) {...}".to_string())
445 );
446 assert_eq!(jsonl_with_snippet.score, Some(0.89));
447 assert_eq!(
448 jsonl_with_snippet.chunk_hash,
449 Some("abc123def456".to_string())
450 );
451 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
452
453 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
455 assert_eq!(jsonl_no_snippet.snippet, None);
456 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
457 }
458
459 #[test]
460 fn test_get_sidecar_path() {
461 let repo_root = PathBuf::from("/home/user/project");
462 let file_path = PathBuf::from("/home/user/project/src/main.rs");
463
464 let sidecar = get_sidecar_path(&repo_root, &file_path);
465 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
466
467 assert_eq!(sidecar, expected);
468 }
469
470 #[test]
471 fn test_get_sidecar_path_no_extension() {
472 let repo_root = PathBuf::from("/project");
473 let file_path = PathBuf::from("/project/README");
474
475 let sidecar = get_sidecar_path(&repo_root, &file_path);
476 let expected = PathBuf::from("/project/.ck/README.ck");
477
478 assert_eq!(sidecar, expected);
479 }
480
481 #[test]
482 fn test_compute_file_hash() {
483 let temp_dir = TempDir::new().unwrap();
484 let file_path = temp_dir.path().join("test.txt");
485
486 fs::write(&file_path, "hello world").unwrap();
487
488 let hash1 = compute_file_hash(&file_path).unwrap();
489 let hash2 = compute_file_hash(&file_path).unwrap();
490
491 assert_eq!(hash1, hash2);
493 assert!(!hash1.is_empty());
494
495 fs::write(&file_path, "hello rust").unwrap();
497 let hash3 = compute_file_hash(&file_path).unwrap();
498 assert_ne!(hash1, hash3);
499 }
500
501 #[test]
502 fn test_compute_file_hash_nonexistent() {
503 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
504 assert!(result.is_err());
505 }
506
507 #[test]
508 fn test_json_search_result_serialization() {
509 let signals = SearchSignals {
510 lex_rank: Some(1),
511 vec_rank: Some(2),
512 rrf_score: 0.85,
513 };
514
515 let result = JsonSearchResult {
516 file: "test.txt".to_string(),
517 span: Span {
518 byte_start: 0,
519 byte_end: 5,
520 line_start: 1,
521 line_end: 1,
522 },
523 lang: None, symbol: None,
525 score: 0.95,
526 signals,
527 preview: "hello".to_string(),
528 model: "bge-small".to_string(),
529 };
530
531 let json = serde_json::to_string(&result).unwrap();
532 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
533
534 assert_eq!(result.file, deserialized.file);
535 assert_eq!(result.score, deserialized.score);
536 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
537 assert_eq!(result.model, deserialized.model);
538 }
539
540 #[test]
541 fn test_language_from_extension() {
542 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
543 assert_eq!(Language::from_extension("py"), Some(Language::Python));
544 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
545 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
546 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
547 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
548 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
549 assert_eq!(Language::from_extension("go"), Some(Language::Go));
550 assert_eq!(Language::from_extension("java"), Some(Language::Java));
551 assert_eq!(Language::from_extension("c"), Some(Language::C));
552 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
553 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
554 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
555 assert_eq!(Language::from_extension("php"), Some(Language::Php));
556 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
557 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
558 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
559 assert_eq!(Language::from_extension("unknown"), None);
560 }
561
562 #[test]
563 fn test_language_from_path() {
564 assert_eq!(
565 Language::from_path(&PathBuf::from("test.rs")),
566 Some(Language::Rust)
567 );
568 assert_eq!(
569 Language::from_path(&PathBuf::from("test.py")),
570 Some(Language::Python)
571 );
572 assert_eq!(
573 Language::from_path(&PathBuf::from("test.js")),
574 Some(Language::JavaScript)
575 );
576 assert_eq!(
577 Language::from_path(&PathBuf::from("test.hs")),
578 Some(Language::Haskell)
579 );
580 assert_eq!(
581 Language::from_path(&PathBuf::from("test.lhs")),
582 Some(Language::Haskell)
583 );
584 assert_eq!(
585 Language::from_path(&PathBuf::from("test.go")),
586 Some(Language::Go)
587 );
588 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
591
592 #[test]
593 fn test_language_display() {
594 assert_eq!(Language::Rust.to_string(), "rust");
595 assert_eq!(Language::Python.to_string(), "python");
596 assert_eq!(Language::JavaScript.to_string(), "javascript");
597 assert_eq!(Language::TypeScript.to_string(), "typescript");
598 assert_eq!(Language::Go.to_string(), "go");
599 assert_eq!(Language::Java.to_string(), "java");
600 }
601}