1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9 #[error("IO error: {0}")]
10 Io(#[from] std::io::Error),
11
12 #[error("Regex error: {0}")]
13 Regex(#[from] regex::Error),
14
15 #[error("Serialization error: {0}")]
16 Serialization(#[from] bincode::Error),
17
18 #[error("JSON error: {0}")]
19 Json(#[from] serde_json::Error),
20
21 #[error("Index error: {0}")]
22 Index(String),
23
24 #[error("Search error: {0}")]
25 Search(String),
26
27 #[error("Embedding error: {0}")]
28 Embedding(String),
29
30 #[error("Span validation error: {0}")]
31 SpanValidation(String),
32
33 #[error("Other error: {0}")]
34 Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41 Rust,
42 Python,
43 JavaScript,
44 TypeScript,
45 Haskell,
46 Go,
47 Java,
48 C,
49 Cpp,
50 CSharp,
51 Ruby,
52 Php,
53 Swift,
54 Kotlin,
55 Zig,
56 Pdf,
57}
58
59impl Language {
60 pub fn from_extension(ext: &str) -> Option<Self> {
61 match ext.to_lowercase().as_str() {
63 "rs" => Some(Language::Rust),
64 "py" => Some(Language::Python),
65 "js" => Some(Language::JavaScript),
66 "ts" | "tsx" => Some(Language::TypeScript),
67 "hs" | "lhs" => Some(Language::Haskell),
68 "go" => Some(Language::Go),
69 "java" => Some(Language::Java),
70 "c" => Some(Language::C),
71 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
72 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
74 "rb" => Some(Language::Ruby),
75 "php" => Some(Language::Php),
76 "swift" => Some(Language::Swift),
77 "kt" | "kts" => Some(Language::Kotlin),
78 "zig" => Some(Language::Zig),
79 "pdf" => Some(Language::Pdf),
80 _ => None,
81 }
82 }
83
84 pub fn from_path(path: &Path) -> Option<Self> {
85 path.extension()
86 .and_then(|ext| ext.to_str())
87 .and_then(Self::from_extension)
88 }
89}
90
91impl std::fmt::Display for Language {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 let name = match self {
94 Language::Rust => "rust",
95 Language::Python => "python",
96 Language::JavaScript => "javascript",
97 Language::TypeScript => "typescript",
98 Language::Haskell => "haskell",
99 Language::Go => "go",
100 Language::Java => "java",
101 Language::C => "c",
102 Language::Cpp => "cpp",
103 Language::CSharp => "csharp",
104 Language::Ruby => "ruby",
105 Language::Php => "php",
106 Language::Swift => "swift",
107 Language::Kotlin => "kotlin",
108 Language::Zig => "zig",
109 Language::Pdf => "pdf",
110 };
111 write!(f, "{}", name)
112 }
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Span {
117 pub byte_start: usize,
118 pub byte_end: usize,
119 pub line_start: usize,
120 pub line_end: usize,
121}
122
123impl Span {
124 pub fn new(
126 byte_start: usize,
127 byte_end: usize,
128 line_start: usize,
129 line_end: usize,
130 ) -> Result<Self> {
131 let span = Self {
132 byte_start,
133 byte_end,
134 line_start,
135 line_end,
136 };
137 span.validate()?;
138 Ok(span)
139 }
140
141 pub fn new_unchecked(
147 byte_start: usize,
148 byte_end: usize,
149 line_start: usize,
150 line_end: usize,
151 ) -> Self {
152 Self {
153 byte_start,
154 byte_end,
155 line_start,
156 line_end,
157 }
158 }
159
160 pub fn validate(&self) -> Result<()> {
162 if self.line_start == 0 {
164 return Err(CkError::SpanValidation(
165 "Line start cannot be zero (lines are 1-indexed)".to_string(),
166 ));
167 }
168
169 if self.line_end == 0 {
170 return Err(CkError::SpanValidation(
171 "Line end cannot be zero (lines are 1-indexed)".to_string(),
172 ));
173 }
174
175 if self.byte_start > self.byte_end {
177 return Err(CkError::SpanValidation(format!(
178 "Invalid byte range: start ({}) > end ({})",
179 self.byte_start, self.byte_end
180 )));
181 }
182
183 if self.line_start > self.line_end {
185 return Err(CkError::SpanValidation(format!(
186 "Invalid line range: start ({}) > end ({})",
187 self.line_start, self.line_end
188 )));
189 }
190
191 Ok(())
192 }
193
194 pub fn is_valid(&self) -> bool {
196 self.validate().is_ok()
197 }
198
199 pub fn byte_len(&self) -> usize {
201 self.byte_end.saturating_sub(self.byte_start)
202 }
203
204 pub fn line_count(&self) -> usize {
206 self.line_end.saturating_sub(self.line_start) + 1
207 }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct FileMetadata {
212 pub path: PathBuf,
213 pub hash: String,
214 pub last_modified: u64,
215 pub size: u64,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SearchResult {
220 pub file: PathBuf,
221 pub span: Span,
222 pub score: f32,
223 pub preview: String,
224 #[serde(skip_serializing_if = "Option::is_none")]
225 pub lang: Option<Language>,
226 #[serde(skip_serializing_if = "Option::is_none")]
227 pub symbol: Option<String>,
228 #[serde(skip_serializing_if = "Option::is_none")]
229 pub chunk_hash: Option<String>,
230 #[serde(skip_serializing_if = "Option::is_none")]
231 pub index_epoch: Option<u64>,
232}
233
234#[derive(Debug, Clone)]
236pub struct SearchResults {
237 pub matches: Vec<SearchResult>,
238 pub closest_below_threshold: Option<SearchResult>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct JsonSearchResult {
244 pub file: String,
245 pub span: Span,
246 pub lang: Option<Language>,
247 pub symbol: Option<String>,
248 pub score: f32,
249 pub signals: SearchSignals,
250 pub preview: String,
251 pub model: String,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct JsonlSearchResult {
256 pub path: String,
257 pub span: Span,
258 pub language: Option<String>,
259 #[serde(skip_serializing_if = "Option::is_none")]
260 pub snippet: Option<String>,
261 #[serde(skip_serializing_if = "Option::is_none")]
262 pub score: Option<f32>,
263 #[serde(skip_serializing_if = "Option::is_none")]
264 pub chunk_hash: Option<String>,
265 #[serde(skip_serializing_if = "Option::is_none")]
266 pub index_epoch: Option<u64>,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct SearchSignals {
271 pub lex_rank: Option<usize>,
272 pub vec_rank: Option<usize>,
273 pub rrf_score: f32,
274}
275
276#[derive(Debug, Clone, PartialEq)]
277pub enum SearchMode {
278 Regex,
279 Lexical,
280 Semantic,
281 Hybrid,
282}
283
284#[derive(Debug, Clone)]
285pub struct IncludePattern {
286 pub path: PathBuf,
287 pub is_dir: bool,
288}
289
290#[derive(Debug, Clone)]
291pub struct SearchOptions {
292 pub mode: SearchMode,
293 pub query: String,
294 pub path: PathBuf,
295 pub top_k: Option<usize>,
296 pub threshold: Option<f32>,
297 pub case_insensitive: bool,
298 pub whole_word: bool,
299 pub fixed_string: bool,
300 pub line_numbers: bool,
301 pub context_lines: usize,
302 pub before_context_lines: usize,
303 pub after_context_lines: usize,
304 pub recursive: bool,
305 pub json_output: bool,
306 pub jsonl_output: bool,
307 pub no_snippet: bool,
308 pub reindex: bool,
309 pub show_scores: bool,
310 pub show_filenames: bool,
311 pub files_with_matches: bool,
312 pub files_without_matches: bool,
313 pub exclude_patterns: Vec<String>,
314 pub include_patterns: Vec<IncludePattern>,
315 pub respect_gitignore: bool,
316 pub full_section: bool,
317 pub rerank: bool,
319 pub rerank_model: Option<String>,
320 pub embedding_model: Option<String>,
321}
322
323impl JsonlSearchResult {
324 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
325 Self {
326 path: result.file.to_string_lossy().to_string(),
327 span: result.span.clone(),
328 language: result.lang.as_ref().map(|l| l.to_string()),
329 snippet: if include_snippet {
330 Some(result.preview.clone())
331 } else {
332 None
333 },
334 score: if result.score >= 0.0 {
335 Some(result.score)
336 } else {
337 None
338 },
339 chunk_hash: result.chunk_hash.clone(),
340 index_epoch: result.index_epoch,
341 }
342 }
343}
344
345impl Default for SearchOptions {
346 fn default() -> Self {
347 Self {
348 mode: SearchMode::Regex,
349 query: String::new(),
350 path: PathBuf::from("."),
351 top_k: None,
352 threshold: None,
353 case_insensitive: false,
354 whole_word: false,
355 fixed_string: false,
356 line_numbers: false,
357 context_lines: 0,
358 before_context_lines: 0,
359 after_context_lines: 0,
360 recursive: true,
361 json_output: false,
362 jsonl_output: false,
363 no_snippet: false,
364 reindex: false,
365 show_scores: false,
366 show_filenames: false,
367 files_with_matches: false,
368 files_without_matches: false,
369 exclude_patterns: get_default_exclude_patterns(),
370 include_patterns: Vec::new(),
371 respect_gitignore: true,
372 full_section: false,
373 rerank: false,
375 rerank_model: None,
376 embedding_model: None,
377 }
378 }
379}
380
381pub fn get_default_exclude_patterns() -> Vec<String> {
384 vec![
385 ".ck".to_string(),
387 ".fastembed_cache".to_string(),
389 ".cache".to_string(),
390 "__pycache__".to_string(),
391 ".git".to_string(),
393 ".svn".to_string(),
394 ".hg".to_string(),
395 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
406 ".venv".to_string(),
407 "env".to_string(),
408 ".env".to_string(),
409 "virtualenv".to_string(),
410 ".vscode".to_string(),
412 ".idea".to_string(),
413 ".eclipse".to_string(),
414 "tmp".to_string(),
416 "temp".to_string(),
417 ".tmp".to_string(),
418 ]
419}
420
421pub fn get_default_ckignore_content() -> &'static str {
423 r#"# .ckignore - Default patterns for ck semantic search
424# Created automatically during first index
425# Syntax: same as .gitignore (glob patterns, ! for negation)
426
427# Images
428*.png
429*.jpg
430*.jpeg
431*.gif
432*.bmp
433*.svg
434*.ico
435*.webp
436*.tiff
437
438# Video
439*.mp4
440*.avi
441*.mov
442*.mkv
443*.wmv
444*.flv
445*.webm
446
447# Audio
448*.mp3
449*.wav
450*.flac
451*.aac
452*.ogg
453*.m4a
454
455# Binary/Compiled
456*.exe
457*.dll
458*.so
459*.dylib
460*.a
461*.lib
462*.obj
463*.o
464
465# Archives
466*.zip
467*.tar
468*.tar.gz
469*.tgz
470*.rar
471*.7z
472*.bz2
473*.gz
474
475# Data files
476*.db
477*.sqlite
478*.sqlite3
479*.parquet
480*.arrow
481
482# Config formats (issue #27)
483*.json
484*.yaml
485*.yml
486
487# Add your custom patterns below this line
488"#
489}
490
491pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
493 let ckignore_path = repo_root.join(".ckignore");
494
495 if !ckignore_path.exists() {
496 return Ok(Vec::new());
497 }
498
499 let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
500
501 let patterns: Vec<String> = content
502 .lines()
503 .map(|line| line.trim())
504 .filter(|line| !line.is_empty() && !line.starts_with('#'))
505 .map(|line| line.to_string())
506 .collect();
507
508 Ok(patterns)
509}
510
511pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
513 let ckignore_path = repo_root.join(".ckignore");
514
515 if ckignore_path.exists() {
516 return Ok(false); }
518
519 std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
520
521 Ok(true) }
523
524pub fn build_exclude_patterns(
543 repo_root: Option<&Path>,
544 additional_excludes: &[String],
545 use_ckignore: bool,
546 use_defaults: bool,
547) -> Vec<String> {
548 let mut patterns = Vec::new();
549
550 if use_ckignore
552 && let Some(root) = repo_root
553 && let Ok(ckignore_patterns) = read_ckignore_patterns(root)
554 && !ckignore_patterns.is_empty()
555 {
556 patterns.extend(ckignore_patterns);
557 }
558
559 patterns.extend(additional_excludes.iter().cloned());
561
562 if use_defaults {
564 patterns.extend(get_default_exclude_patterns());
565 }
566
567 patterns
568}
569
570pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
571 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
572 let mut sidecar = repo_root.join(".ck");
573 sidecar.push(relative);
574 let ext = relative
575 .extension()
576 .map(|e| format!("{}.ck", e.to_string_lossy()))
577 .unwrap_or_else(|| "ck".to_string());
578 sidecar.set_extension(ext);
579 sidecar
580}
581
582pub fn compute_file_hash(path: &Path) -> Result<String> {
583 use std::io::Read;
584
585 let mut file = std::fs::File::open(path)?;
586 let mut hasher = blake3::Hasher::new();
587
588 let mut buffer = [0u8; 65536]; loop {
591 let bytes_read = file.read(&mut buffer)?;
592 if bytes_read == 0 {
593 break;
594 }
595 hasher.update(&buffer[..bytes_read]);
596 }
597
598 let hash = hasher.finalize();
599 Ok(hash.to_hex().to_string())
600}
601
602pub fn compute_chunk_hash(
610 text: &str,
611 leading_trivia: &[String],
612 trailing_trivia: &[String],
613) -> String {
614 let mut hasher = blake3::Hasher::new();
615
616 hasher.update(text.as_bytes());
618
619 for trivia in leading_trivia {
621 hasher.update(trivia.as_bytes());
622 }
623
624 for trivia in trailing_trivia {
626 hasher.update(trivia.as_bytes());
627 }
628
629 hasher.finalize().to_hex().to_string()
630}
631
632pub mod pdf {
634 use std::path::{Path, PathBuf};
635
636 pub fn is_pdf_file(path: &Path) -> bool {
638 path.extension()
639 .and_then(|ext| ext.to_str())
640 .map(|ext| ext.eq_ignore_ascii_case("pdf")) .unwrap_or(false)
642 }
643
644 pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
646 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
647 let mut cache_path = repo_root.join(".ck").join("content");
648 cache_path.push(relative);
649
650 let ext = relative
652 .extension()
653 .map(|e| format!("{}.txt", e.to_string_lossy()))
654 .unwrap_or_else(|| "txt".to_string());
655 cache_path.set_extension(ext);
656
657 cache_path
658 }
659
660 #[cfg(test)]
661 mod tests {
662 use super::*;
663 use std::path::PathBuf;
664
665 #[test]
666 fn test_is_pdf_file() {
667 assert!(is_pdf_file(&PathBuf::from("test.pdf")));
668 assert!(is_pdf_file(&PathBuf::from("test.PDF"))); assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
670 assert!(!is_pdf_file(&PathBuf::from("test.txt")));
671 assert!(!is_pdf_file(&PathBuf::from("test"))); assert!(!is_pdf_file(&PathBuf::from("pdf"))); }
674
675 #[test]
676 fn test_get_content_cache_path() {
677 let repo_root = PathBuf::from("/project");
678 let file_path = PathBuf::from("/project/docs/manual.pdf");
679
680 let cache_path = get_content_cache_path(&repo_root, &file_path);
681 assert_eq!(
682 cache_path,
683 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
684 );
685 }
686
687 #[test]
688 fn test_get_content_cache_path_no_extension() {
689 let repo_root = PathBuf::from("/project");
690 let file_path = PathBuf::from("/project/docs/manual");
691
692 let cache_path = get_content_cache_path(&repo_root, &file_path);
693 assert_eq!(
694 cache_path,
695 PathBuf::from("/project/.ck/content/docs/manual.txt")
696 );
697 }
698
699 #[test]
700 fn test_get_content_cache_path_relative() {
701 let repo_root = PathBuf::from("/project");
702 let file_path = PathBuf::from("docs/manual.pdf"); let cache_path = get_content_cache_path(&repo_root, &file_path);
705 assert_eq!(
706 cache_path,
707 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
708 );
709 }
710 }
711}
712
713#[cfg(test)]
714mod tests {
715 use super::*;
716 use std::fs;
717 use tempfile::TempDir;
718
719 #[test]
720 fn test_span_valid_creation() {
721 let span = Span::new(0, 10, 1, 2).unwrap();
723 assert_eq!(span.byte_start, 0);
724 assert_eq!(span.byte_end, 10);
725 assert_eq!(span.line_start, 1);
726 assert_eq!(span.line_end, 2);
727 assert!(span.is_valid());
728 }
729
730 #[test]
731 fn test_span_validation_valid_cases() {
732 let span = Span::new(10, 10, 1, 1).unwrap();
734 assert!(span.is_valid());
735 assert_eq!(span.byte_len(), 0);
736 assert_eq!(span.line_count(), 1);
737
738 let span = Span::new(0, 100, 1, 10).unwrap();
740 assert!(span.is_valid());
741 assert_eq!(span.byte_len(), 100);
742 assert_eq!(span.line_count(), 10);
743
744 let span = Span::new(5, 25, 3, 3).unwrap();
746 assert!(span.is_valid());
747 assert_eq!(span.byte_len(), 20);
748 assert_eq!(span.line_count(), 1);
749 }
750
751 #[test]
752 fn test_span_validation_invalid_byte_range() {
753 let result = Span::new(10, 5, 1, 2);
755 assert!(result.is_err());
756 if let Err(CkError::SpanValidation(msg)) = result {
757 assert!(msg.contains("Invalid byte range"));
758 assert!(msg.contains("start (10) > end (5)"));
759 } else {
760 panic!("Expected SpanValidation error");
761 }
762 }
763
764 #[test]
765 fn test_span_validation_invalid_line_range() {
766 let result = Span::new(0, 10, 5, 2);
768 assert!(result.is_err());
769 if let Err(CkError::SpanValidation(msg)) = result {
770 assert!(msg.contains("Invalid line range"));
771 assert!(msg.contains("start (5) > end (2)"));
772 } else {
773 panic!("Expected SpanValidation error");
774 }
775 }
776
777 #[test]
778 fn test_span_validation_zero_line_numbers() {
779 let result = Span::new(0, 10, 0, 2);
781 assert!(result.is_err());
782 if let Err(CkError::SpanValidation(msg)) = result {
783 assert!(msg.contains("Line start cannot be zero"));
784 } else {
785 panic!("Expected SpanValidation error");
786 }
787
788 let result = Span::new(0, 10, 1, 0);
790 assert!(result.is_err());
791 if let Err(CkError::SpanValidation(msg)) = result {
792 assert!(msg.contains("Line end cannot be zero"));
793 } else {
794 panic!("Expected SpanValidation error");
795 }
796 }
797
798 #[test]
799 fn test_span_unchecked_creation() {
800 let span = Span::new_unchecked(10, 5, 0, 1);
802 assert_eq!(span.byte_start, 10);
803 assert_eq!(span.byte_end, 5);
804 assert_eq!(span.line_start, 0);
805 assert_eq!(span.line_end, 1);
806 assert!(!span.is_valid()); }
808
809 #[test]
810 fn test_span_validation_methods() {
811 let valid_span = Span::new_unchecked(0, 10, 1, 2);
813 assert!(valid_span.validate().is_ok());
814 assert!(valid_span.is_valid());
815
816 let invalid_span = Span::new_unchecked(10, 5, 1, 2);
818 assert!(invalid_span.validate().is_err());
819 assert!(!invalid_span.is_valid());
820
821 let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
823 assert!(zero_line_span.validate().is_err());
824 assert!(!zero_line_span.is_valid());
825 }
826
827 #[test]
828 fn test_span_utility_methods() {
829 let span = Span::new(10, 25, 5, 8).unwrap();
830
831 assert_eq!(span.byte_len(), 15);
833
834 assert_eq!(span.line_count(), 4); let single_line = Span::new(0, 5, 1, 1).unwrap();
839 assert_eq!(single_line.line_count(), 1);
840 assert_eq!(single_line.byte_len(), 5);
841
842 let empty = Span::new(10, 10, 3, 3).unwrap();
844 assert_eq!(empty.byte_len(), 0);
845 assert_eq!(empty.line_count(), 1);
846 }
847
848 #[test]
849 fn test_span_legacy_struct_literal_still_works() {
850 let span = Span {
852 byte_start: 0,
853 byte_end: 10,
854 line_start: 1,
855 line_end: 2,
856 };
857
858 assert_eq!(span.byte_start, 0);
859 assert_eq!(span.byte_end, 10);
860 assert_eq!(span.line_start, 1);
861 assert_eq!(span.line_end, 2);
862 assert!(span.is_valid());
863 }
864
865 #[test]
866 fn test_search_options_default() {
867 let options = SearchOptions::default();
868 assert!(matches!(options.mode, SearchMode::Regex));
869 assert_eq!(options.query, "");
870 assert_eq!(options.path, PathBuf::from("."));
871 assert_eq!(options.top_k, None);
872 assert_eq!(options.threshold, None);
873 assert!(!options.case_insensitive);
874 assert!(!options.whole_word);
875 assert!(!options.fixed_string);
876 assert!(!options.line_numbers);
877 assert_eq!(options.context_lines, 0);
878 assert!(options.recursive);
879 assert!(!options.json_output);
880 assert!(!options.reindex);
881 assert!(!options.show_scores);
882 assert!(!options.show_filenames);
883 }
884
885 #[test]
886 fn test_file_metadata_serialization() {
887 let metadata = FileMetadata {
888 path: PathBuf::from("test.txt"),
889 hash: "abc123".to_string(),
890 last_modified: 1234567890,
891 size: 1024,
892 };
893
894 let json = serde_json::to_string(&metadata).unwrap();
895 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
896
897 assert_eq!(metadata.path, deserialized.path);
898 assert_eq!(metadata.hash, deserialized.hash);
899 assert_eq!(metadata.last_modified, deserialized.last_modified);
900 assert_eq!(metadata.size, deserialized.size);
901 }
902
903 #[test]
904 fn test_search_result_serialization() {
905 let result = SearchResult {
906 file: PathBuf::from("test.txt"),
907 span: Span {
908 byte_start: 0,
909 byte_end: 10,
910 line_start: 1,
911 line_end: 1,
912 },
913 score: 0.95,
914 preview: "hello world".to_string(),
915 lang: Some(Language::Rust),
916 symbol: Some("main".to_string()),
917 chunk_hash: Some("abc123".to_string()),
918 index_epoch: Some(1699123456),
919 };
920
921 let json = serde_json::to_string(&result).unwrap();
922 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
923
924 assert_eq!(result.file, deserialized.file);
925 assert_eq!(result.score, deserialized.score);
926 assert_eq!(result.preview, deserialized.preview);
927 assert_eq!(result.lang, deserialized.lang);
928 assert_eq!(result.symbol, deserialized.symbol);
929 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
930 assert_eq!(result.index_epoch, deserialized.index_epoch);
931 }
932
933 #[test]
934 fn test_jsonl_search_result_conversion() {
935 let result = SearchResult {
936 file: PathBuf::from("src/auth.rs"),
937 span: Span {
938 byte_start: 1203,
939 byte_end: 1456,
940 line_start: 42,
941 line_end: 58,
942 },
943 score: 0.89,
944 preview: "function authenticate(user) {...}".to_string(),
945 lang: Some(Language::Rust),
946 symbol: Some("authenticate".to_string()),
947 chunk_hash: Some("abc123def456".to_string()),
948 index_epoch: Some(1699123456),
949 };
950
951 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
953 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
954 assert_eq!(jsonl_with_snippet.span.line_start, 42);
955 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
956 assert_eq!(
957 jsonl_with_snippet.snippet,
958 Some("function authenticate(user) {...}".to_string())
959 );
960 assert_eq!(jsonl_with_snippet.score, Some(0.89));
961 assert_eq!(
962 jsonl_with_snippet.chunk_hash,
963 Some("abc123def456".to_string())
964 );
965 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
966
967 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
969 assert_eq!(jsonl_no_snippet.snippet, None);
970 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
971 }
972
973 #[test]
974 fn test_get_sidecar_path() {
975 let repo_root = PathBuf::from("/home/user/project");
976 let file_path = PathBuf::from("/home/user/project/src/main.rs");
977
978 let sidecar = get_sidecar_path(&repo_root, &file_path);
979 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
980
981 assert_eq!(sidecar, expected);
982 }
983
984 #[test]
985 fn test_get_sidecar_path_no_extension() {
986 let repo_root = PathBuf::from("/project");
987 let file_path = PathBuf::from("/project/README");
988
989 let sidecar = get_sidecar_path(&repo_root, &file_path);
990 let expected = PathBuf::from("/project/.ck/README.ck");
991
992 assert_eq!(sidecar, expected);
993 }
994
995 #[test]
996 fn test_compute_file_hash() {
997 let temp_dir = TempDir::new().unwrap();
998 let file_path = temp_dir.path().join("test.txt");
999
1000 fs::write(&file_path, "hello world").unwrap();
1001
1002 let hash1 = compute_file_hash(&file_path).unwrap();
1003 let hash2 = compute_file_hash(&file_path).unwrap();
1004
1005 assert_eq!(hash1, hash2);
1007 assert!(!hash1.is_empty());
1008
1009 fs::write(&file_path, "hello rust").unwrap();
1011 let hash3 = compute_file_hash(&file_path).unwrap();
1012 assert_ne!(hash1, hash3);
1013 }
1014
1015 #[test]
1016 fn test_compute_file_hash_nonexistent() {
1017 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
1018 assert!(result.is_err());
1019 }
1020
1021 #[test]
1022 fn test_compute_file_hash_large_file() {
1023 let temp_dir = TempDir::new().unwrap();
1024 let file_path = temp_dir.path().join("large_test.txt");
1025
1026 let large_content = "a".repeat(100_000); fs::write(&file_path, &large_content).unwrap();
1029
1030 let hash1 = compute_file_hash(&file_path).unwrap();
1031 let hash2 = compute_file_hash(&file_path).unwrap();
1032
1033 assert_eq!(hash1, hash2);
1035 assert!(!hash1.is_empty());
1036
1037 fs::write(&file_path, "small content").unwrap();
1039 let hash3 = compute_file_hash(&file_path).unwrap();
1040 assert_ne!(hash1, hash3);
1041 }
1042
1043 #[test]
1044 fn test_json_search_result_serialization() {
1045 let signals = SearchSignals {
1046 lex_rank: Some(1),
1047 vec_rank: Some(2),
1048 rrf_score: 0.85,
1049 };
1050
1051 let result = JsonSearchResult {
1052 file: "test.txt".to_string(),
1053 span: Span {
1054 byte_start: 0,
1055 byte_end: 5,
1056 line_start: 1,
1057 line_end: 1,
1058 },
1059 lang: None, symbol: None,
1061 score: 0.95,
1062 signals,
1063 preview: "hello".to_string(),
1064 model: "bge-small".to_string(),
1065 };
1066
1067 let json = serde_json::to_string(&result).unwrap();
1068 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1069
1070 assert_eq!(result.file, deserialized.file);
1071 assert_eq!(result.score, deserialized.score);
1072 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1073 assert_eq!(result.model, deserialized.model);
1074 }
1075
1076 #[test]
1077 fn test_language_from_extension() {
1078 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1079 assert_eq!(Language::from_extension("py"), Some(Language::Python));
1080 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1081 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1082 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1083 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1084 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1085 assert_eq!(Language::from_extension("go"), Some(Language::Go));
1086 assert_eq!(Language::from_extension("java"), Some(Language::Java));
1087 assert_eq!(Language::from_extension("c"), Some(Language::C));
1088 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1089 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1090 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1091 assert_eq!(Language::from_extension("php"), Some(Language::Php));
1092 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1093 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1094 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1095 assert_eq!(Language::from_extension("unknown"), None);
1096 }
1097
1098 #[test]
1099 fn test_language_from_extension_case_insensitive() {
1100 assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1102 assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1103 assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1104 assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1105 assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1106 assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1107 assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1108 assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1109 assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1110 assert_eq!(Language::from_extension("C"), Some(Language::C));
1111 assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1112 assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1113 assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1114 assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1115 assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1116 assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1117 assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1118 assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1119 assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1120 assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1121 assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1122 assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1123
1124 assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1126 assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1127 assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1128 assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1129 assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1130 assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1131 assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1132 assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1133 assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1134 assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1135 assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1136 assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1137 assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1138 assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1139
1140 assert_eq!(Language::from_extension("UNKNOWN"), None);
1142 assert_eq!(Language::from_extension("Unknown"), None);
1143 }
1144
1145 #[test]
1146 fn test_language_from_path() {
1147 assert_eq!(
1148 Language::from_path(&PathBuf::from("test.rs")),
1149 Some(Language::Rust)
1150 );
1151 assert_eq!(
1152 Language::from_path(&PathBuf::from("test.py")),
1153 Some(Language::Python)
1154 );
1155 assert_eq!(
1156 Language::from_path(&PathBuf::from("test.js")),
1157 Some(Language::JavaScript)
1158 );
1159 assert_eq!(
1160 Language::from_path(&PathBuf::from("test.hs")),
1161 Some(Language::Haskell)
1162 );
1163 assert_eq!(
1164 Language::from_path(&PathBuf::from("test.lhs")),
1165 Some(Language::Haskell)
1166 );
1167 assert_eq!(
1168 Language::from_path(&PathBuf::from("test.go")),
1169 Some(Language::Go)
1170 );
1171 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
1174
1175 #[test]
1176 fn test_language_from_path_case_insensitive() {
1177 assert_eq!(
1179 Language::from_path(&PathBuf::from("MAIN.RS")),
1180 Some(Language::Rust)
1181 );
1182 assert_eq!(
1183 Language::from_path(&PathBuf::from("app.PY")),
1184 Some(Language::Python)
1185 );
1186 assert_eq!(
1187 Language::from_path(&PathBuf::from("script.JS")),
1188 Some(Language::JavaScript)
1189 );
1190 assert_eq!(
1191 Language::from_path(&PathBuf::from("types.TS")),
1192 Some(Language::TypeScript)
1193 );
1194 assert_eq!(
1195 Language::from_path(&PathBuf::from("Component.TSX")),
1196 Some(Language::TypeScript)
1197 );
1198 assert_eq!(
1199 Language::from_path(&PathBuf::from("module.HS")),
1200 Some(Language::Haskell)
1201 );
1202 assert_eq!(
1203 Language::from_path(&PathBuf::from("server.GO")),
1204 Some(Language::Go)
1205 );
1206 assert_eq!(
1207 Language::from_path(&PathBuf::from("App.JAVA")),
1208 Some(Language::Java)
1209 );
1210 assert_eq!(
1211 Language::from_path(&PathBuf::from("main.C")),
1212 Some(Language::C)
1213 );
1214 assert_eq!(
1215 Language::from_path(&PathBuf::from("utils.CPP")),
1216 Some(Language::Cpp)
1217 );
1218 assert_eq!(
1219 Language::from_path(&PathBuf::from("Program.CS")),
1220 Some(Language::CSharp)
1221 );
1222 assert_eq!(
1223 Language::from_path(&PathBuf::from("script.RB")),
1224 Some(Language::Ruby)
1225 );
1226 assert_eq!(
1227 Language::from_path(&PathBuf::from("index.PHP")),
1228 Some(Language::Php)
1229 );
1230 assert_eq!(
1231 Language::from_path(&PathBuf::from("App.SWIFT")),
1232 Some(Language::Swift)
1233 );
1234 assert_eq!(
1235 Language::from_path(&PathBuf::from("Main.KT")),
1236 Some(Language::Kotlin)
1237 );
1238 assert_eq!(
1239 Language::from_path(&PathBuf::from("document.PDF")),
1240 Some(Language::Pdf)
1241 );
1242
1243 assert_eq!(
1245 Language::from_path(&PathBuf::from("config.Rs")),
1246 Some(Language::Rust)
1247 );
1248 assert_eq!(
1249 Language::from_path(&PathBuf::from("helper.Py")),
1250 Some(Language::Python)
1251 );
1252 assert_eq!(
1253 Language::from_path(&PathBuf::from("utils.Js")),
1254 Some(Language::JavaScript)
1255 );
1256 assert_eq!(
1257 Language::from_path(&PathBuf::from("interfaces.Ts")),
1258 Some(Language::TypeScript)
1259 );
1260 assert_eq!(
1261 Language::from_path(&PathBuf::from("Component.TsX")),
1262 Some(Language::TypeScript)
1263 );
1264 assert_eq!(
1265 Language::from_path(&PathBuf::from("main.Cpp")),
1266 Some(Language::Cpp)
1267 );
1268 assert_eq!(
1269 Language::from_path(&PathBuf::from("report.Pdf")),
1270 Some(Language::Pdf)
1271 );
1272
1273 assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1275 assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1276 }
1277
1278 #[test]
1279 fn test_language_display() {
1280 assert_eq!(Language::Rust.to_string(), "rust");
1281 assert_eq!(Language::Python.to_string(), "python");
1282 assert_eq!(Language::JavaScript.to_string(), "javascript");
1283 assert_eq!(Language::TypeScript.to_string(), "typescript");
1284 assert_eq!(Language::Go.to_string(), "go");
1285 assert_eq!(Language::Java.to_string(), "java");
1286 }
1287
1288 #[test]
1289 fn test_create_ckignore_if_missing() {
1290 let temp_dir = TempDir::new().unwrap();
1291 let test_path = temp_dir.path();
1292
1293 let created = create_ckignore_if_missing(test_path).unwrap();
1295 assert!(created);
1296
1297 let ckignore_path = test_path.join(".ckignore");
1299 assert!(ckignore_path.exists());
1300
1301 let content = fs::read_to_string(&ckignore_path).unwrap();
1303 assert!(content.contains("*.png"));
1304 assert!(content.contains("*.json"));
1305 assert!(content.contains("*.yaml"));
1306 assert!(content.contains("# Images"));
1307 assert!(content.contains("# Config formats"));
1308
1309 let created_again = create_ckignore_if_missing(test_path).unwrap();
1311 assert!(!created_again);
1312 }
1313
1314 #[test]
1315 fn test_read_ckignore_patterns() {
1316 let temp_dir = TempDir::new().unwrap();
1317 let test_path = temp_dir.path();
1318
1319 let patterns = read_ckignore_patterns(test_path).unwrap();
1321 assert_eq!(patterns.len(), 0);
1322
1323 let ckignore_path = test_path.join(".ckignore");
1325 fs::write(
1326 &ckignore_path,
1327 r#"# Comment line
1328*.png
1329*.jpg
1330
1331# Another comment
1332*.json
1333*.yaml
1334"#,
1335 )
1336 .unwrap();
1337
1338 let patterns = read_ckignore_patterns(test_path).unwrap();
1340 assert_eq!(patterns.len(), 4);
1341 assert!(patterns.contains(&"*.png".to_string()));
1342 assert!(patterns.contains(&"*.jpg".to_string()));
1343 assert!(patterns.contains(&"*.json".to_string()));
1344 assert!(patterns.contains(&"*.yaml".to_string()));
1345 assert!(!patterns.iter().any(|p| p.starts_with('#')));
1347 }
1348
1349 #[test]
1350 fn test_read_ckignore_patterns_with_empty_lines() {
1351 let temp_dir = TempDir::new().unwrap();
1352 let test_path = temp_dir.path();
1353
1354 let ckignore_path = test_path.join(".ckignore");
1355 fs::write(
1356 &ckignore_path,
1357 r#"
1358*.png
1359
1360*.jpg
1361
1362
1363*.json
1364"#,
1365 )
1366 .unwrap();
1367
1368 let patterns = read_ckignore_patterns(test_path).unwrap();
1369 assert_eq!(patterns.len(), 3);
1370 assert!(patterns.contains(&"*.png".to_string()));
1371 assert!(patterns.contains(&"*.jpg".to_string()));
1372 assert!(patterns.contains(&"*.json".to_string()));
1373 }
1374
1375 #[test]
1376 fn test_get_default_ckignore_content() {
1377 let content = get_default_ckignore_content();
1378
1379 assert!(content.contains("*.png"));
1381 assert!(content.contains("*.jpg"));
1382 assert!(content.contains("*.mp4"));
1383 assert!(content.contains("*.mp3"));
1384 assert!(content.contains("*.exe"));
1385 assert!(content.contains("*.zip"));
1386 assert!(content.contains("*.db"));
1387 assert!(content.contains("*.json"));
1388 assert!(content.contains("*.yaml"));
1389
1390 assert!(content.contains("# Images"));
1392 assert!(content.contains("# Video"));
1393 assert!(content.contains("# Audio"));
1394 assert!(content.contains("# Config formats"));
1395
1396 assert!(content.contains("issue #27"));
1398 }
1399}