1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7 #[error("IO error: {0}")]
8 Io(#[from] std::io::Error),
9
10 #[error("Regex error: {0}")]
11 Regex(#[from] regex::Error),
12
13 #[error("Serialization error: {0}")]
14 Serialization(#[from] bincode::Error),
15
16 #[error("JSON error: {0}")]
17 Json(#[from] serde_json::Error),
18
19 #[error("Index error: {0}")]
20 Index(String),
21
22 #[error("Search error: {0}")]
23 Search(String),
24
25 #[error("Embedding error: {0}")]
26 Embedding(String),
27
28 #[error("Other error: {0}")]
29 Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35pub enum Language {
36 Rust,
37 Python,
38 JavaScript,
39 TypeScript,
40 Haskell,
41 Go,
42 Java,
43 C,
44 Cpp,
45 CSharp,
46 Ruby,
47 Php,
48 Swift,
49 Kotlin,
50}
51
52impl Language {
53 pub fn from_extension(ext: &str) -> Option<Self> {
54 match ext {
55 "rs" => Some(Language::Rust),
56 "py" => Some(Language::Python),
57 "js" => Some(Language::JavaScript),
58 "ts" | "tsx" => Some(Language::TypeScript),
59 "hs" | "lhs" => Some(Language::Haskell),
60 "go" => Some(Language::Go),
61 "java" => Some(Language::Java),
62 "c" => Some(Language::C),
63 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
64 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
66 "rb" => Some(Language::Ruby),
67 "php" => Some(Language::Php),
68 "swift" => Some(Language::Swift),
69 "kt" | "kts" => Some(Language::Kotlin),
70 _ => None,
71 }
72 }
73
74 pub fn from_path(path: &Path) -> Option<Self> {
75 path.extension()
76 .and_then(|ext| ext.to_str())
77 .and_then(Self::from_extension)
78 }
79}
80
81impl std::fmt::Display for Language {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let name = match self {
84 Language::Rust => "rust",
85 Language::Python => "python",
86 Language::JavaScript => "javascript",
87 Language::TypeScript => "typescript",
88 Language::Haskell => "haskell",
89 Language::Go => "go",
90 Language::Java => "java",
91 Language::C => "c",
92 Language::Cpp => "cpp",
93 Language::CSharp => "csharp",
94 Language::Ruby => "ruby",
95 Language::Php => "php",
96 Language::Swift => "swift",
97 Language::Kotlin => "kotlin",
98 };
99 write!(f, "{}", name)
100 }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Span {
105 pub byte_start: usize,
106 pub byte_end: usize,
107 pub line_start: usize,
108 pub line_end: usize,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct FileMetadata {
113 pub path: PathBuf,
114 pub hash: String,
115 pub last_modified: u64,
116 pub size: u64,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct SearchResult {
121 pub file: PathBuf,
122 pub span: Span,
123 pub score: f32,
124 pub preview: String,
125 #[serde(skip_serializing_if = "Option::is_none")]
126 pub lang: Option<Language>,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 pub symbol: Option<String>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct JsonSearchResult {
133 pub file: String,
134 pub span: Span,
135 pub lang: Option<Language>,
136 pub symbol: Option<String>,
137 pub score: f32,
138 pub signals: SearchSignals,
139 pub preview: String,
140 pub model: String,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct SearchSignals {
145 pub lex_rank: Option<usize>,
146 pub vec_rank: Option<usize>,
147 pub rrf_score: f32,
148}
149
150#[derive(Debug, Clone, PartialEq)]
151pub enum SearchMode {
152 Regex,
153 Lexical,
154 Semantic,
155 Hybrid,
156}
157
158#[derive(Debug, Clone)]
159pub struct SearchOptions {
160 pub mode: SearchMode,
161 pub query: String,
162 pub path: PathBuf,
163 pub top_k: Option<usize>,
164 pub threshold: Option<f32>,
165 pub case_insensitive: bool,
166 pub whole_word: bool,
167 pub fixed_string: bool,
168 pub line_numbers: bool,
169 pub context_lines: usize,
170 pub before_context_lines: usize,
171 pub after_context_lines: usize,
172 pub recursive: bool,
173 pub json_output: bool,
174 pub reindex: bool,
175 pub show_scores: bool,
176 pub show_filenames: bool,
177 pub files_with_matches: bool,
178 pub files_without_matches: bool,
179 pub exclude_patterns: Vec<String>,
180 pub respect_gitignore: bool,
181 pub full_section: bool,
182}
183
184impl Default for SearchOptions {
185 fn default() -> Self {
186 Self {
187 mode: SearchMode::Regex,
188 query: String::new(),
189 path: PathBuf::from("."),
190 top_k: None,
191 threshold: None,
192 case_insensitive: false,
193 whole_word: false,
194 fixed_string: false,
195 line_numbers: false,
196 context_lines: 0,
197 before_context_lines: 0,
198 after_context_lines: 0,
199 recursive: true,
200 json_output: false,
201 reindex: false,
202 show_scores: false,
203 show_filenames: false,
204 files_with_matches: false,
205 files_without_matches: false,
206 exclude_patterns: get_default_exclude_patterns(),
207 respect_gitignore: true,
208 full_section: false,
209 }
210 }
211}
212
213pub fn get_default_exclude_patterns() -> Vec<String> {
216 vec![
217 ".ck".to_string(),
219 ".fastembed_cache".to_string(),
221 ".cache".to_string(),
222 "__pycache__".to_string(),
223 ".git".to_string(),
225 ".svn".to_string(),
226 ".hg".to_string(),
227 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
238 ".venv".to_string(),
239 "env".to_string(),
240 ".env".to_string(),
241 "virtualenv".to_string(),
242 ".vscode".to_string(),
244 ".idea".to_string(),
245 ".eclipse".to_string(),
246 "tmp".to_string(),
248 "temp".to_string(),
249 ".tmp".to_string(),
250 ]
251}
252
253pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
254 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
255 let mut sidecar = repo_root.join(".ck");
256 sidecar.push(relative);
257 let ext = relative
258 .extension()
259 .map(|e| format!("{}.ck", e.to_string_lossy()))
260 .unwrap_or_else(|| "ck".to_string());
261 sidecar.set_extension(ext);
262 sidecar
263}
264
265pub fn compute_file_hash(path: &Path) -> Result<String> {
266 let data = std::fs::read(path)?;
267 let hash = blake3::hash(&data);
268 Ok(hash.to_hex().to_string())
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use std::fs;
275 use tempfile::TempDir;
276
277 #[test]
278 fn test_span_creation() {
279 let span = Span {
280 byte_start: 0,
281 byte_end: 10,
282 line_start: 1,
283 line_end: 2,
284 };
285
286 assert_eq!(span.byte_start, 0);
287 assert_eq!(span.byte_end, 10);
288 assert_eq!(span.line_start, 1);
289 assert_eq!(span.line_end, 2);
290 }
291
292 #[test]
293 fn test_search_options_default() {
294 let options = SearchOptions::default();
295 assert!(matches!(options.mode, SearchMode::Regex));
296 assert_eq!(options.query, "");
297 assert_eq!(options.path, PathBuf::from("."));
298 assert_eq!(options.top_k, None);
299 assert_eq!(options.threshold, None);
300 assert!(!options.case_insensitive);
301 assert!(!options.whole_word);
302 assert!(!options.fixed_string);
303 assert!(!options.line_numbers);
304 assert_eq!(options.context_lines, 0);
305 assert!(options.recursive);
306 assert!(!options.json_output);
307 assert!(!options.reindex);
308 assert!(!options.show_scores);
309 assert!(!options.show_filenames);
310 }
311
312 #[test]
313 fn test_file_metadata_serialization() {
314 let metadata = FileMetadata {
315 path: PathBuf::from("test.txt"),
316 hash: "abc123".to_string(),
317 last_modified: 1234567890,
318 size: 1024,
319 };
320
321 let json = serde_json::to_string(&metadata).unwrap();
322 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
323
324 assert_eq!(metadata.path, deserialized.path);
325 assert_eq!(metadata.hash, deserialized.hash);
326 assert_eq!(metadata.last_modified, deserialized.last_modified);
327 assert_eq!(metadata.size, deserialized.size);
328 }
329
330 #[test]
331 fn test_search_result_serialization() {
332 let result = SearchResult {
333 file: PathBuf::from("test.txt"),
334 span: Span {
335 byte_start: 0,
336 byte_end: 10,
337 line_start: 1,
338 line_end: 1,
339 },
340 score: 0.95,
341 preview: "hello world".to_string(),
342 lang: Some(Language::Rust),
343 symbol: Some("main".to_string()),
344 };
345
346 let json = serde_json::to_string(&result).unwrap();
347 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
348
349 assert_eq!(result.file, deserialized.file);
350 assert_eq!(result.score, deserialized.score);
351 assert_eq!(result.preview, deserialized.preview);
352 assert_eq!(result.lang, deserialized.lang);
353 assert_eq!(result.symbol, deserialized.symbol);
354 }
355
356 #[test]
357 fn test_get_sidecar_path() {
358 let repo_root = PathBuf::from("/home/user/project");
359 let file_path = PathBuf::from("/home/user/project/src/main.rs");
360
361 let sidecar = get_sidecar_path(&repo_root, &file_path);
362 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
363
364 assert_eq!(sidecar, expected);
365 }
366
367 #[test]
368 fn test_get_sidecar_path_no_extension() {
369 let repo_root = PathBuf::from("/project");
370 let file_path = PathBuf::from("/project/README");
371
372 let sidecar = get_sidecar_path(&repo_root, &file_path);
373 let expected = PathBuf::from("/project/.ck/README.ck");
374
375 assert_eq!(sidecar, expected);
376 }
377
378 #[test]
379 fn test_compute_file_hash() {
380 let temp_dir = TempDir::new().unwrap();
381 let file_path = temp_dir.path().join("test.txt");
382
383 fs::write(&file_path, "hello world").unwrap();
384
385 let hash1 = compute_file_hash(&file_path).unwrap();
386 let hash2 = compute_file_hash(&file_path).unwrap();
387
388 assert_eq!(hash1, hash2);
390 assert!(!hash1.is_empty());
391
392 fs::write(&file_path, "hello rust").unwrap();
394 let hash3 = compute_file_hash(&file_path).unwrap();
395 assert_ne!(hash1, hash3);
396 }
397
398 #[test]
399 fn test_compute_file_hash_nonexistent() {
400 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
401 assert!(result.is_err());
402 }
403
404 #[test]
405 fn test_json_search_result_serialization() {
406 let signals = SearchSignals {
407 lex_rank: Some(1),
408 vec_rank: Some(2),
409 rrf_score: 0.85,
410 };
411
412 let result = JsonSearchResult {
413 file: "test.txt".to_string(),
414 span: Span {
415 byte_start: 0,
416 byte_end: 5,
417 line_start: 1,
418 line_end: 1,
419 },
420 lang: None, symbol: None,
422 score: 0.95,
423 signals,
424 preview: "hello".to_string(),
425 model: "bge-small".to_string(),
426 };
427
428 let json = serde_json::to_string(&result).unwrap();
429 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
430
431 assert_eq!(result.file, deserialized.file);
432 assert_eq!(result.score, deserialized.score);
433 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
434 assert_eq!(result.model, deserialized.model);
435 }
436
437 #[test]
438 fn test_language_from_extension() {
439 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
440 assert_eq!(Language::from_extension("py"), Some(Language::Python));
441 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
442 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
443 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
444 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
445 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
446 assert_eq!(Language::from_extension("go"), Some(Language::Go));
447 assert_eq!(Language::from_extension("java"), Some(Language::Java));
448 assert_eq!(Language::from_extension("c"), Some(Language::C));
449 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
450 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
451 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
452 assert_eq!(Language::from_extension("php"), Some(Language::Php));
453 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
454 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
455 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
456 assert_eq!(Language::from_extension("unknown"), None);
457 }
458
459 #[test]
460 fn test_language_from_path() {
461 assert_eq!(
462 Language::from_path(&PathBuf::from("test.rs")),
463 Some(Language::Rust)
464 );
465 assert_eq!(
466 Language::from_path(&PathBuf::from("test.py")),
467 Some(Language::Python)
468 );
469 assert_eq!(
470 Language::from_path(&PathBuf::from("test.js")),
471 Some(Language::JavaScript)
472 );
473 assert_eq!(
474 Language::from_path(&PathBuf::from("test.hs")),
475 Some(Language::Haskell)
476 );
477 assert_eq!(
478 Language::from_path(&PathBuf::from("test.lhs")),
479 Some(Language::Haskell)
480 );
481 assert_eq!(
482 Language::from_path(&PathBuf::from("test.go")),
483 Some(Language::Go)
484 );
485 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
488
489 #[test]
490 fn test_language_display() {
491 assert_eq!(Language::Rust.to_string(), "rust");
492 assert_eq!(Language::Python.to_string(), "python");
493 assert_eq!(Language::JavaScript.to_string(), "javascript");
494 assert_eq!(Language::TypeScript.to_string(), "typescript");
495 assert_eq!(Language::Go.to_string(), "go");
496 assert_eq!(Language::Java.to_string(), "java");
497 }
498}