1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7 #[error("IO error: {0}")]
8 Io(#[from] std::io::Error),
9
10 #[error("Regex error: {0}")]
11 Regex(#[from] regex::Error),
12
13 #[error("Serialization error: {0}")]
14 Serialization(#[from] bincode::Error),
15
16 #[error("JSON error: {0}")]
17 Json(#[from] serde_json::Error),
18
19 #[error("Index error: {0}")]
20 Index(String),
21
22 #[error("Search error: {0}")]
23 Search(String),
24
25 #[error("Embedding error: {0}")]
26 Embedding(String),
27
28 #[error("Span validation error: {0}")]
29 SpanValidation(String),
30
31 #[error("Other error: {0}")]
32 Other(String),
33}
34
35pub type Result<T> = std::result::Result<T, CkError>;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
38pub enum Language {
39 Rust,
40 Python,
41 JavaScript,
42 TypeScript,
43 Haskell,
44 Go,
45 Java,
46 C,
47 Cpp,
48 CSharp,
49 Ruby,
50 Php,
51 Swift,
52 Kotlin,
53 Pdf,
54}
55
56impl Language {
57 pub fn from_extension(ext: &str) -> Option<Self> {
58 match ext.to_lowercase().as_str() {
60 "rs" => Some(Language::Rust),
61 "py" => Some(Language::Python),
62 "js" => Some(Language::JavaScript),
63 "ts" | "tsx" => Some(Language::TypeScript),
64 "hs" | "lhs" => Some(Language::Haskell),
65 "go" => Some(Language::Go),
66 "java" => Some(Language::Java),
67 "c" => Some(Language::C),
68 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
69 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
71 "rb" => Some(Language::Ruby),
72 "php" => Some(Language::Php),
73 "swift" => Some(Language::Swift),
74 "kt" | "kts" => Some(Language::Kotlin),
75 "pdf" => Some(Language::Pdf),
76 _ => None,
77 }
78 }
79
80 pub fn from_path(path: &Path) -> Option<Self> {
81 path.extension()
82 .and_then(|ext| ext.to_str())
83 .and_then(Self::from_extension)
84 }
85}
86
87impl std::fmt::Display for Language {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 let name = match self {
90 Language::Rust => "rust",
91 Language::Python => "python",
92 Language::JavaScript => "javascript",
93 Language::TypeScript => "typescript",
94 Language::Haskell => "haskell",
95 Language::Go => "go",
96 Language::Java => "java",
97 Language::C => "c",
98 Language::Cpp => "cpp",
99 Language::CSharp => "csharp",
100 Language::Ruby => "ruby",
101 Language::Php => "php",
102 Language::Swift => "swift",
103 Language::Kotlin => "kotlin",
104 Language::Pdf => "pdf",
105 };
106 write!(f, "{}", name)
107 }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Span {
112 pub byte_start: usize,
113 pub byte_end: usize,
114 pub line_start: usize,
115 pub line_end: usize,
116}
117
118impl Span {
119 pub fn new(
121 byte_start: usize,
122 byte_end: usize,
123 line_start: usize,
124 line_end: usize,
125 ) -> Result<Self> {
126 let span = Self {
127 byte_start,
128 byte_end,
129 line_start,
130 line_end,
131 };
132 span.validate()?;
133 Ok(span)
134 }
135
136 pub fn new_unchecked(
142 byte_start: usize,
143 byte_end: usize,
144 line_start: usize,
145 line_end: usize,
146 ) -> Self {
147 Self {
148 byte_start,
149 byte_end,
150 line_start,
151 line_end,
152 }
153 }
154
155 pub fn validate(&self) -> Result<()> {
157 if self.line_start == 0 {
159 return Err(CkError::SpanValidation(
160 "Line start cannot be zero (lines are 1-indexed)".to_string(),
161 ));
162 }
163
164 if self.line_end == 0 {
165 return Err(CkError::SpanValidation(
166 "Line end cannot be zero (lines are 1-indexed)".to_string(),
167 ));
168 }
169
170 if self.byte_start > self.byte_end {
172 return Err(CkError::SpanValidation(format!(
173 "Invalid byte range: start ({}) > end ({})",
174 self.byte_start, self.byte_end
175 )));
176 }
177
178 if self.line_start > self.line_end {
180 return Err(CkError::SpanValidation(format!(
181 "Invalid line range: start ({}) > end ({})",
182 self.line_start, self.line_end
183 )));
184 }
185
186 Ok(())
187 }
188
189 pub fn is_valid(&self) -> bool {
191 self.validate().is_ok()
192 }
193
194 pub fn byte_len(&self) -> usize {
196 self.byte_end.saturating_sub(self.byte_start)
197 }
198
199 pub fn line_count(&self) -> usize {
201 self.line_end.saturating_sub(self.line_start) + 1
202 }
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct FileMetadata {
207 pub path: PathBuf,
208 pub hash: String,
209 pub last_modified: u64,
210 pub size: u64,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct SearchResult {
215 pub file: PathBuf,
216 pub span: Span,
217 pub score: f32,
218 pub preview: String,
219 #[serde(skip_serializing_if = "Option::is_none")]
220 pub lang: Option<Language>,
221 #[serde(skip_serializing_if = "Option::is_none")]
222 pub symbol: Option<String>,
223 #[serde(skip_serializing_if = "Option::is_none")]
224 pub chunk_hash: Option<String>,
225 #[serde(skip_serializing_if = "Option::is_none")]
226 pub index_epoch: Option<u64>,
227}
228
229#[derive(Debug, Clone)]
231pub struct SearchResults {
232 pub matches: Vec<SearchResult>,
233 pub closest_below_threshold: Option<SearchResult>,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct JsonSearchResult {
239 pub file: String,
240 pub span: Span,
241 pub lang: Option<Language>,
242 pub symbol: Option<String>,
243 pub score: f32,
244 pub signals: SearchSignals,
245 pub preview: String,
246 pub model: String,
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct JsonlSearchResult {
251 pub path: String,
252 pub span: Span,
253 pub language: Option<String>,
254 #[serde(skip_serializing_if = "Option::is_none")]
255 pub snippet: Option<String>,
256 #[serde(skip_serializing_if = "Option::is_none")]
257 pub score: Option<f32>,
258 #[serde(skip_serializing_if = "Option::is_none")]
259 pub chunk_hash: Option<String>,
260 #[serde(skip_serializing_if = "Option::is_none")]
261 pub index_epoch: Option<u64>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct SearchSignals {
266 pub lex_rank: Option<usize>,
267 pub vec_rank: Option<usize>,
268 pub rrf_score: f32,
269}
270
271#[derive(Debug, Clone, PartialEq)]
272pub enum SearchMode {
273 Regex,
274 Lexical,
275 Semantic,
276 Hybrid,
277}
278
279#[derive(Debug, Clone)]
280pub struct SearchOptions {
281 pub mode: SearchMode,
282 pub query: String,
283 pub path: PathBuf,
284 pub top_k: Option<usize>,
285 pub threshold: Option<f32>,
286 pub case_insensitive: bool,
287 pub whole_word: bool,
288 pub fixed_string: bool,
289 pub line_numbers: bool,
290 pub context_lines: usize,
291 pub before_context_lines: usize,
292 pub after_context_lines: usize,
293 pub recursive: bool,
294 pub json_output: bool,
295 pub jsonl_output: bool,
296 pub no_snippet: bool,
297 pub reindex: bool,
298 pub show_scores: bool,
299 pub show_filenames: bool,
300 pub files_with_matches: bool,
301 pub files_without_matches: bool,
302 pub exclude_patterns: Vec<String>,
303 pub respect_gitignore: bool,
304 pub full_section: bool,
305 pub rerank: bool,
307 pub rerank_model: Option<String>,
308 pub embedding_model: Option<String>,
309}
310
311impl JsonlSearchResult {
312 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
313 Self {
314 path: result.file.to_string_lossy().to_string(),
315 span: result.span.clone(),
316 language: result.lang.as_ref().map(|l| l.to_string()),
317 snippet: if include_snippet {
318 Some(result.preview.clone())
319 } else {
320 None
321 },
322 score: if result.score >= 0.0 {
323 Some(result.score)
324 } else {
325 None
326 },
327 chunk_hash: result.chunk_hash.clone(),
328 index_epoch: result.index_epoch,
329 }
330 }
331}
332
333impl Default for SearchOptions {
334 fn default() -> Self {
335 Self {
336 mode: SearchMode::Regex,
337 query: String::new(),
338 path: PathBuf::from("."),
339 top_k: None,
340 threshold: None,
341 case_insensitive: false,
342 whole_word: false,
343 fixed_string: false,
344 line_numbers: false,
345 context_lines: 0,
346 before_context_lines: 0,
347 after_context_lines: 0,
348 recursive: true,
349 json_output: false,
350 jsonl_output: false,
351 no_snippet: false,
352 reindex: false,
353 show_scores: false,
354 show_filenames: false,
355 files_with_matches: false,
356 files_without_matches: false,
357 exclude_patterns: get_default_exclude_patterns(),
358 respect_gitignore: true,
359 full_section: false,
360 rerank: false,
362 rerank_model: None,
363 embedding_model: None,
364 }
365 }
366}
367
368pub fn get_default_exclude_patterns() -> Vec<String> {
371 vec![
372 ".ck".to_string(),
374 ".fastembed_cache".to_string(),
376 ".cache".to_string(),
377 "__pycache__".to_string(),
378 ".git".to_string(),
380 ".svn".to_string(),
381 ".hg".to_string(),
382 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
393 ".venv".to_string(),
394 "env".to_string(),
395 ".env".to_string(),
396 "virtualenv".to_string(),
397 ".vscode".to_string(),
399 ".idea".to_string(),
400 ".eclipse".to_string(),
401 "tmp".to_string(),
403 "temp".to_string(),
404 ".tmp".to_string(),
405 ]
406}
407
408pub fn get_default_ckignore_content() -> &'static str {
410 r#"# .ckignore - Default patterns for ck semantic search
411# Created automatically during first index
412# Syntax: same as .gitignore (glob patterns, ! for negation)
413
414# Images
415*.png
416*.jpg
417*.jpeg
418*.gif
419*.bmp
420*.svg
421*.ico
422*.webp
423*.tiff
424
425# Video
426*.mp4
427*.avi
428*.mov
429*.mkv
430*.wmv
431*.flv
432*.webm
433
434# Audio
435*.mp3
436*.wav
437*.flac
438*.aac
439*.ogg
440*.m4a
441
442# Binary/Compiled
443*.exe
444*.dll
445*.so
446*.dylib
447*.a
448*.lib
449*.obj
450*.o
451
452# Archives
453*.zip
454*.tar
455*.tar.gz
456*.tgz
457*.rar
458*.7z
459*.bz2
460*.gz
461
462# Data files
463*.db
464*.sqlite
465*.sqlite3
466*.parquet
467*.arrow
468
469# Config formats (issue #27)
470*.json
471*.yaml
472*.yml
473
474# Add your custom patterns below this line
475"#
476}
477
478pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
480 let ckignore_path = repo_root.join(".ckignore");
481
482 if !ckignore_path.exists() {
483 return Ok(Vec::new());
484 }
485
486 let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
487
488 let patterns: Vec<String> = content
489 .lines()
490 .map(|line| line.trim())
491 .filter(|line| !line.is_empty() && !line.starts_with('#'))
492 .map(|line| line.to_string())
493 .collect();
494
495 Ok(patterns)
496}
497
498pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
500 let ckignore_path = repo_root.join(".ckignore");
501
502 if ckignore_path.exists() {
503 return Ok(false); }
505
506 std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
507
508 Ok(true) }
510
511pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
512 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
513 let mut sidecar = repo_root.join(".ck");
514 sidecar.push(relative);
515 let ext = relative
516 .extension()
517 .map(|e| format!("{}.ck", e.to_string_lossy()))
518 .unwrap_or_else(|| "ck".to_string());
519 sidecar.set_extension(ext);
520 sidecar
521}
522
523pub fn compute_file_hash(path: &Path) -> Result<String> {
524 use std::io::Read;
525
526 let mut file = std::fs::File::open(path)?;
527 let mut hasher = blake3::Hasher::new();
528
529 let mut buffer = [0u8; 65536]; loop {
532 let bytes_read = file.read(&mut buffer)?;
533 if bytes_read == 0 {
534 break;
535 }
536 hasher.update(&buffer[..bytes_read]);
537 }
538
539 let hash = hasher.finalize();
540 Ok(hash.to_hex().to_string())
541}
542
543pub mod pdf {
545 use std::path::{Path, PathBuf};
546
547 pub fn is_pdf_file(path: &Path) -> bool {
549 path.extension()
550 .and_then(|ext| ext.to_str())
551 .map(|ext| ext.eq_ignore_ascii_case("pdf")) .unwrap_or(false)
553 }
554
555 pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
557 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
558 let mut cache_path = repo_root.join(".ck").join("content");
559 cache_path.push(relative);
560
561 let ext = relative
563 .extension()
564 .map(|e| format!("{}.txt", e.to_string_lossy()))
565 .unwrap_or_else(|| "txt".to_string());
566 cache_path.set_extension(ext);
567
568 cache_path
569 }
570
571 #[cfg(test)]
572 mod tests {
573 use super::*;
574 use std::path::PathBuf;
575
576 #[test]
577 fn test_is_pdf_file() {
578 assert!(is_pdf_file(&PathBuf::from("test.pdf")));
579 assert!(is_pdf_file(&PathBuf::from("test.PDF"))); assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
581 assert!(!is_pdf_file(&PathBuf::from("test.txt")));
582 assert!(!is_pdf_file(&PathBuf::from("test"))); assert!(!is_pdf_file(&PathBuf::from("pdf"))); }
585
586 #[test]
587 fn test_get_content_cache_path() {
588 let repo_root = PathBuf::from("/project");
589 let file_path = PathBuf::from("/project/docs/manual.pdf");
590
591 let cache_path = get_content_cache_path(&repo_root, &file_path);
592 assert_eq!(
593 cache_path,
594 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
595 );
596 }
597
598 #[test]
599 fn test_get_content_cache_path_no_extension() {
600 let repo_root = PathBuf::from("/project");
601 let file_path = PathBuf::from("/project/docs/manual");
602
603 let cache_path = get_content_cache_path(&repo_root, &file_path);
604 assert_eq!(
605 cache_path,
606 PathBuf::from("/project/.ck/content/docs/manual.txt")
607 );
608 }
609
610 #[test]
611 fn test_get_content_cache_path_relative() {
612 let repo_root = PathBuf::from("/project");
613 let file_path = PathBuf::from("docs/manual.pdf"); let cache_path = get_content_cache_path(&repo_root, &file_path);
616 assert_eq!(
617 cache_path,
618 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
619 );
620 }
621 }
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627 use std::fs;
628 use tempfile::TempDir;
629
630 #[test]
631 fn test_span_valid_creation() {
632 let span = Span::new(0, 10, 1, 2).unwrap();
634 assert_eq!(span.byte_start, 0);
635 assert_eq!(span.byte_end, 10);
636 assert_eq!(span.line_start, 1);
637 assert_eq!(span.line_end, 2);
638 assert!(span.is_valid());
639 }
640
641 #[test]
642 fn test_span_validation_valid_cases() {
643 let span = Span::new(10, 10, 1, 1).unwrap();
645 assert!(span.is_valid());
646 assert_eq!(span.byte_len(), 0);
647 assert_eq!(span.line_count(), 1);
648
649 let span = Span::new(0, 100, 1, 10).unwrap();
651 assert!(span.is_valid());
652 assert_eq!(span.byte_len(), 100);
653 assert_eq!(span.line_count(), 10);
654
655 let span = Span::new(5, 25, 3, 3).unwrap();
657 assert!(span.is_valid());
658 assert_eq!(span.byte_len(), 20);
659 assert_eq!(span.line_count(), 1);
660 }
661
662 #[test]
663 fn test_span_validation_invalid_byte_range() {
664 let result = Span::new(10, 5, 1, 2);
666 assert!(result.is_err());
667 if let Err(CkError::SpanValidation(msg)) = result {
668 assert!(msg.contains("Invalid byte range"));
669 assert!(msg.contains("start (10) > end (5)"));
670 } else {
671 panic!("Expected SpanValidation error");
672 }
673 }
674
675 #[test]
676 fn test_span_validation_invalid_line_range() {
677 let result = Span::new(0, 10, 5, 2);
679 assert!(result.is_err());
680 if let Err(CkError::SpanValidation(msg)) = result {
681 assert!(msg.contains("Invalid line range"));
682 assert!(msg.contains("start (5) > end (2)"));
683 } else {
684 panic!("Expected SpanValidation error");
685 }
686 }
687
688 #[test]
689 fn test_span_validation_zero_line_numbers() {
690 let result = Span::new(0, 10, 0, 2);
692 assert!(result.is_err());
693 if let Err(CkError::SpanValidation(msg)) = result {
694 assert!(msg.contains("Line start cannot be zero"));
695 } else {
696 panic!("Expected SpanValidation error");
697 }
698
699 let result = Span::new(0, 10, 1, 0);
701 assert!(result.is_err());
702 if let Err(CkError::SpanValidation(msg)) = result {
703 assert!(msg.contains("Line end cannot be zero"));
704 } else {
705 panic!("Expected SpanValidation error");
706 }
707 }
708
709 #[test]
710 fn test_span_unchecked_creation() {
711 let span = Span::new_unchecked(10, 5, 0, 1);
713 assert_eq!(span.byte_start, 10);
714 assert_eq!(span.byte_end, 5);
715 assert_eq!(span.line_start, 0);
716 assert_eq!(span.line_end, 1);
717 assert!(!span.is_valid()); }
719
720 #[test]
721 fn test_span_validation_methods() {
722 let valid_span = Span::new_unchecked(0, 10, 1, 2);
724 assert!(valid_span.validate().is_ok());
725 assert!(valid_span.is_valid());
726
727 let invalid_span = Span::new_unchecked(10, 5, 1, 2);
729 assert!(invalid_span.validate().is_err());
730 assert!(!invalid_span.is_valid());
731
732 let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
734 assert!(zero_line_span.validate().is_err());
735 assert!(!zero_line_span.is_valid());
736 }
737
738 #[test]
739 fn test_span_utility_methods() {
740 let span = Span::new(10, 25, 5, 8).unwrap();
741
742 assert_eq!(span.byte_len(), 15);
744
745 assert_eq!(span.line_count(), 4); let single_line = Span::new(0, 5, 1, 1).unwrap();
750 assert_eq!(single_line.line_count(), 1);
751 assert_eq!(single_line.byte_len(), 5);
752
753 let empty = Span::new(10, 10, 3, 3).unwrap();
755 assert_eq!(empty.byte_len(), 0);
756 assert_eq!(empty.line_count(), 1);
757 }
758
759 #[test]
760 fn test_span_legacy_struct_literal_still_works() {
761 let span = Span {
763 byte_start: 0,
764 byte_end: 10,
765 line_start: 1,
766 line_end: 2,
767 };
768
769 assert_eq!(span.byte_start, 0);
770 assert_eq!(span.byte_end, 10);
771 assert_eq!(span.line_start, 1);
772 assert_eq!(span.line_end, 2);
773 assert!(span.is_valid());
774 }
775
776 #[test]
777 fn test_search_options_default() {
778 let options = SearchOptions::default();
779 assert!(matches!(options.mode, SearchMode::Regex));
780 assert_eq!(options.query, "");
781 assert_eq!(options.path, PathBuf::from("."));
782 assert_eq!(options.top_k, None);
783 assert_eq!(options.threshold, None);
784 assert!(!options.case_insensitive);
785 assert!(!options.whole_word);
786 assert!(!options.fixed_string);
787 assert!(!options.line_numbers);
788 assert_eq!(options.context_lines, 0);
789 assert!(options.recursive);
790 assert!(!options.json_output);
791 assert!(!options.reindex);
792 assert!(!options.show_scores);
793 assert!(!options.show_filenames);
794 }
795
796 #[test]
797 fn test_file_metadata_serialization() {
798 let metadata = FileMetadata {
799 path: PathBuf::from("test.txt"),
800 hash: "abc123".to_string(),
801 last_modified: 1234567890,
802 size: 1024,
803 };
804
805 let json = serde_json::to_string(&metadata).unwrap();
806 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
807
808 assert_eq!(metadata.path, deserialized.path);
809 assert_eq!(metadata.hash, deserialized.hash);
810 assert_eq!(metadata.last_modified, deserialized.last_modified);
811 assert_eq!(metadata.size, deserialized.size);
812 }
813
814 #[test]
815 fn test_search_result_serialization() {
816 let result = SearchResult {
817 file: PathBuf::from("test.txt"),
818 span: Span {
819 byte_start: 0,
820 byte_end: 10,
821 line_start: 1,
822 line_end: 1,
823 },
824 score: 0.95,
825 preview: "hello world".to_string(),
826 lang: Some(Language::Rust),
827 symbol: Some("main".to_string()),
828 chunk_hash: Some("abc123".to_string()),
829 index_epoch: Some(1699123456),
830 };
831
832 let json = serde_json::to_string(&result).unwrap();
833 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
834
835 assert_eq!(result.file, deserialized.file);
836 assert_eq!(result.score, deserialized.score);
837 assert_eq!(result.preview, deserialized.preview);
838 assert_eq!(result.lang, deserialized.lang);
839 assert_eq!(result.symbol, deserialized.symbol);
840 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
841 assert_eq!(result.index_epoch, deserialized.index_epoch);
842 }
843
844 #[test]
845 fn test_jsonl_search_result_conversion() {
846 let result = SearchResult {
847 file: PathBuf::from("src/auth.rs"),
848 span: Span {
849 byte_start: 1203,
850 byte_end: 1456,
851 line_start: 42,
852 line_end: 58,
853 },
854 score: 0.89,
855 preview: "function authenticate(user) {...}".to_string(),
856 lang: Some(Language::Rust),
857 symbol: Some("authenticate".to_string()),
858 chunk_hash: Some("abc123def456".to_string()),
859 index_epoch: Some(1699123456),
860 };
861
862 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
864 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
865 assert_eq!(jsonl_with_snippet.span.line_start, 42);
866 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
867 assert_eq!(
868 jsonl_with_snippet.snippet,
869 Some("function authenticate(user) {...}".to_string())
870 );
871 assert_eq!(jsonl_with_snippet.score, Some(0.89));
872 assert_eq!(
873 jsonl_with_snippet.chunk_hash,
874 Some("abc123def456".to_string())
875 );
876 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
877
878 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
880 assert_eq!(jsonl_no_snippet.snippet, None);
881 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
882 }
883
884 #[test]
885 fn test_get_sidecar_path() {
886 let repo_root = PathBuf::from("/home/user/project");
887 let file_path = PathBuf::from("/home/user/project/src/main.rs");
888
889 let sidecar = get_sidecar_path(&repo_root, &file_path);
890 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
891
892 assert_eq!(sidecar, expected);
893 }
894
895 #[test]
896 fn test_get_sidecar_path_no_extension() {
897 let repo_root = PathBuf::from("/project");
898 let file_path = PathBuf::from("/project/README");
899
900 let sidecar = get_sidecar_path(&repo_root, &file_path);
901 let expected = PathBuf::from("/project/.ck/README.ck");
902
903 assert_eq!(sidecar, expected);
904 }
905
906 #[test]
907 fn test_compute_file_hash() {
908 let temp_dir = TempDir::new().unwrap();
909 let file_path = temp_dir.path().join("test.txt");
910
911 fs::write(&file_path, "hello world").unwrap();
912
913 let hash1 = compute_file_hash(&file_path).unwrap();
914 let hash2 = compute_file_hash(&file_path).unwrap();
915
916 assert_eq!(hash1, hash2);
918 assert!(!hash1.is_empty());
919
920 fs::write(&file_path, "hello rust").unwrap();
922 let hash3 = compute_file_hash(&file_path).unwrap();
923 assert_ne!(hash1, hash3);
924 }
925
926 #[test]
927 fn test_compute_file_hash_nonexistent() {
928 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
929 assert!(result.is_err());
930 }
931
932 #[test]
933 fn test_compute_file_hash_large_file() {
934 let temp_dir = TempDir::new().unwrap();
935 let file_path = temp_dir.path().join("large_test.txt");
936
937 let large_content = "a".repeat(100_000); fs::write(&file_path, &large_content).unwrap();
940
941 let hash1 = compute_file_hash(&file_path).unwrap();
942 let hash2 = compute_file_hash(&file_path).unwrap();
943
944 assert_eq!(hash1, hash2);
946 assert!(!hash1.is_empty());
947
948 fs::write(&file_path, "small content").unwrap();
950 let hash3 = compute_file_hash(&file_path).unwrap();
951 assert_ne!(hash1, hash3);
952 }
953
954 #[test]
955 fn test_json_search_result_serialization() {
956 let signals = SearchSignals {
957 lex_rank: Some(1),
958 vec_rank: Some(2),
959 rrf_score: 0.85,
960 };
961
962 let result = JsonSearchResult {
963 file: "test.txt".to_string(),
964 span: Span {
965 byte_start: 0,
966 byte_end: 5,
967 line_start: 1,
968 line_end: 1,
969 },
970 lang: None, symbol: None,
972 score: 0.95,
973 signals,
974 preview: "hello".to_string(),
975 model: "bge-small".to_string(),
976 };
977
978 let json = serde_json::to_string(&result).unwrap();
979 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
980
981 assert_eq!(result.file, deserialized.file);
982 assert_eq!(result.score, deserialized.score);
983 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
984 assert_eq!(result.model, deserialized.model);
985 }
986
987 #[test]
988 fn test_language_from_extension() {
989 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
990 assert_eq!(Language::from_extension("py"), Some(Language::Python));
991 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
992 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
993 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
994 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
995 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
996 assert_eq!(Language::from_extension("go"), Some(Language::Go));
997 assert_eq!(Language::from_extension("java"), Some(Language::Java));
998 assert_eq!(Language::from_extension("c"), Some(Language::C));
999 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1000 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1001 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1002 assert_eq!(Language::from_extension("php"), Some(Language::Php));
1003 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1004 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1005 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1006 assert_eq!(Language::from_extension("unknown"), None);
1007 }
1008
1009 #[test]
1010 fn test_language_from_extension_case_insensitive() {
1011 assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1013 assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1014 assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1015 assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1016 assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1017 assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1018 assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1019 assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1020 assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1021 assert_eq!(Language::from_extension("C"), Some(Language::C));
1022 assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1023 assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1024 assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1025 assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1026 assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1027 assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1028 assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1029 assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1030 assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1031 assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1032 assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1033 assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1034
1035 assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1037 assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1038 assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1039 assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1040 assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1041 assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1042 assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1043 assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1044 assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1045 assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1046 assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1047 assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1048 assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1049 assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1050
1051 assert_eq!(Language::from_extension("UNKNOWN"), None);
1053 assert_eq!(Language::from_extension("Unknown"), None);
1054 }
1055
1056 #[test]
1057 fn test_language_from_path() {
1058 assert_eq!(
1059 Language::from_path(&PathBuf::from("test.rs")),
1060 Some(Language::Rust)
1061 );
1062 assert_eq!(
1063 Language::from_path(&PathBuf::from("test.py")),
1064 Some(Language::Python)
1065 );
1066 assert_eq!(
1067 Language::from_path(&PathBuf::from("test.js")),
1068 Some(Language::JavaScript)
1069 );
1070 assert_eq!(
1071 Language::from_path(&PathBuf::from("test.hs")),
1072 Some(Language::Haskell)
1073 );
1074 assert_eq!(
1075 Language::from_path(&PathBuf::from("test.lhs")),
1076 Some(Language::Haskell)
1077 );
1078 assert_eq!(
1079 Language::from_path(&PathBuf::from("test.go")),
1080 Some(Language::Go)
1081 );
1082 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
1085
1086 #[test]
1087 fn test_language_from_path_case_insensitive() {
1088 assert_eq!(
1090 Language::from_path(&PathBuf::from("MAIN.RS")),
1091 Some(Language::Rust)
1092 );
1093 assert_eq!(
1094 Language::from_path(&PathBuf::from("app.PY")),
1095 Some(Language::Python)
1096 );
1097 assert_eq!(
1098 Language::from_path(&PathBuf::from("script.JS")),
1099 Some(Language::JavaScript)
1100 );
1101 assert_eq!(
1102 Language::from_path(&PathBuf::from("types.TS")),
1103 Some(Language::TypeScript)
1104 );
1105 assert_eq!(
1106 Language::from_path(&PathBuf::from("Component.TSX")),
1107 Some(Language::TypeScript)
1108 );
1109 assert_eq!(
1110 Language::from_path(&PathBuf::from("module.HS")),
1111 Some(Language::Haskell)
1112 );
1113 assert_eq!(
1114 Language::from_path(&PathBuf::from("server.GO")),
1115 Some(Language::Go)
1116 );
1117 assert_eq!(
1118 Language::from_path(&PathBuf::from("App.JAVA")),
1119 Some(Language::Java)
1120 );
1121 assert_eq!(
1122 Language::from_path(&PathBuf::from("main.C")),
1123 Some(Language::C)
1124 );
1125 assert_eq!(
1126 Language::from_path(&PathBuf::from("utils.CPP")),
1127 Some(Language::Cpp)
1128 );
1129 assert_eq!(
1130 Language::from_path(&PathBuf::from("Program.CS")),
1131 Some(Language::CSharp)
1132 );
1133 assert_eq!(
1134 Language::from_path(&PathBuf::from("script.RB")),
1135 Some(Language::Ruby)
1136 );
1137 assert_eq!(
1138 Language::from_path(&PathBuf::from("index.PHP")),
1139 Some(Language::Php)
1140 );
1141 assert_eq!(
1142 Language::from_path(&PathBuf::from("App.SWIFT")),
1143 Some(Language::Swift)
1144 );
1145 assert_eq!(
1146 Language::from_path(&PathBuf::from("Main.KT")),
1147 Some(Language::Kotlin)
1148 );
1149 assert_eq!(
1150 Language::from_path(&PathBuf::from("document.PDF")),
1151 Some(Language::Pdf)
1152 );
1153
1154 assert_eq!(
1156 Language::from_path(&PathBuf::from("config.Rs")),
1157 Some(Language::Rust)
1158 );
1159 assert_eq!(
1160 Language::from_path(&PathBuf::from("helper.Py")),
1161 Some(Language::Python)
1162 );
1163 assert_eq!(
1164 Language::from_path(&PathBuf::from("utils.Js")),
1165 Some(Language::JavaScript)
1166 );
1167 assert_eq!(
1168 Language::from_path(&PathBuf::from("interfaces.Ts")),
1169 Some(Language::TypeScript)
1170 );
1171 assert_eq!(
1172 Language::from_path(&PathBuf::from("Component.TsX")),
1173 Some(Language::TypeScript)
1174 );
1175 assert_eq!(
1176 Language::from_path(&PathBuf::from("main.Cpp")),
1177 Some(Language::Cpp)
1178 );
1179 assert_eq!(
1180 Language::from_path(&PathBuf::from("report.Pdf")),
1181 Some(Language::Pdf)
1182 );
1183
1184 assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1186 assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1187 }
1188
1189 #[test]
1190 fn test_language_display() {
1191 assert_eq!(Language::Rust.to_string(), "rust");
1192 assert_eq!(Language::Python.to_string(), "python");
1193 assert_eq!(Language::JavaScript.to_string(), "javascript");
1194 assert_eq!(Language::TypeScript.to_string(), "typescript");
1195 assert_eq!(Language::Go.to_string(), "go");
1196 assert_eq!(Language::Java.to_string(), "java");
1197 }
1198
1199 #[test]
1200 fn test_create_ckignore_if_missing() {
1201 let temp_dir = TempDir::new().unwrap();
1202 let test_path = temp_dir.path();
1203
1204 let created = create_ckignore_if_missing(test_path).unwrap();
1206 assert!(created);
1207
1208 let ckignore_path = test_path.join(".ckignore");
1210 assert!(ckignore_path.exists());
1211
1212 let content = fs::read_to_string(&ckignore_path).unwrap();
1214 assert!(content.contains("*.png"));
1215 assert!(content.contains("*.json"));
1216 assert!(content.contains("*.yaml"));
1217 assert!(content.contains("# Images"));
1218 assert!(content.contains("# Config formats"));
1219
1220 let created_again = create_ckignore_if_missing(test_path).unwrap();
1222 assert!(!created_again);
1223 }
1224
1225 #[test]
1226 fn test_read_ckignore_patterns() {
1227 let temp_dir = TempDir::new().unwrap();
1228 let test_path = temp_dir.path();
1229
1230 let patterns = read_ckignore_patterns(test_path).unwrap();
1232 assert_eq!(patterns.len(), 0);
1233
1234 let ckignore_path = test_path.join(".ckignore");
1236 fs::write(
1237 &ckignore_path,
1238 r#"# Comment line
1239*.png
1240*.jpg
1241
1242# Another comment
1243*.json
1244*.yaml
1245"#,
1246 )
1247 .unwrap();
1248
1249 let patterns = read_ckignore_patterns(test_path).unwrap();
1251 assert_eq!(patterns.len(), 4);
1252 assert!(patterns.contains(&"*.png".to_string()));
1253 assert!(patterns.contains(&"*.jpg".to_string()));
1254 assert!(patterns.contains(&"*.json".to_string()));
1255 assert!(patterns.contains(&"*.yaml".to_string()));
1256 assert!(!patterns.iter().any(|p| p.starts_with('#')));
1258 }
1259
1260 #[test]
1261 fn test_read_ckignore_patterns_with_empty_lines() {
1262 let temp_dir = TempDir::new().unwrap();
1263 let test_path = temp_dir.path();
1264
1265 let ckignore_path = test_path.join(".ckignore");
1266 fs::write(
1267 &ckignore_path,
1268 r#"
1269*.png
1270
1271*.jpg
1272
1273
1274*.json
1275"#,
1276 )
1277 .unwrap();
1278
1279 let patterns = read_ckignore_patterns(test_path).unwrap();
1280 assert_eq!(patterns.len(), 3);
1281 assert!(patterns.contains(&"*.png".to_string()));
1282 assert!(patterns.contains(&"*.jpg".to_string()));
1283 assert!(patterns.contains(&"*.json".to_string()));
1284 }
1285
1286 #[test]
1287 fn test_get_default_ckignore_content() {
1288 let content = get_default_ckignore_content();
1289
1290 assert!(content.contains("*.png"));
1292 assert!(content.contains("*.jpg"));
1293 assert!(content.contains("*.mp4"));
1294 assert!(content.contains("*.mp3"));
1295 assert!(content.contains("*.exe"));
1296 assert!(content.contains("*.zip"));
1297 assert!(content.contains("*.db"));
1298 assert!(content.contains("*.json"));
1299 assert!(content.contains("*.yaml"));
1300
1301 assert!(content.contains("# Images"));
1303 assert!(content.contains("# Video"));
1304 assert!(content.contains("# Audio"));
1305 assert!(content.contains("# Config formats"));
1306
1307 assert!(content.contains("issue #27"));
1309 }
1310}