1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9 #[error("IO error: {0}")]
10 Io(#[from] std::io::Error),
11
12 #[error("Regex error: {0}")]
13 Regex(#[from] regex::Error),
14
15 #[error("Serialization error: {0}")]
16 Serialization(#[from] bincode::Error),
17
18 #[error("JSON error: {0}")]
19 Json(#[from] serde_json::Error),
20
21 #[error("Index error: {0}")]
22 Index(String),
23
24 #[error("Search error: {0}")]
25 Search(String),
26
27 #[error("Embedding error: {0}")]
28 Embedding(String),
29
30 #[error("Span validation error: {0}")]
31 SpanValidation(String),
32
33 #[error("Other error: {0}")]
34 Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41 Rust,
42 Python,
43 JavaScript,
44 TypeScript,
45 Haskell,
46 Go,
47 Java,
48 C,
49 Cpp,
50 CSharp,
51 Ruby,
52 Php,
53 Swift,
54 Kotlin,
55 Zig,
56 Dart,
57 Elixir,
58 Pdf,
59}
60
61impl Language {
62 pub fn from_extension(ext: &str) -> Option<Self> {
63 match ext.to_lowercase().as_str() {
65 "rs" => Some(Language::Rust),
66 "py" => Some(Language::Python),
67 "js" => Some(Language::JavaScript),
68 "ts" | "tsx" => Some(Language::TypeScript),
69 "hs" | "lhs" => Some(Language::Haskell),
70 "go" => Some(Language::Go),
71 "java" => Some(Language::Java),
72 "c" => Some(Language::C),
73 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
74 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
76 "rb" => Some(Language::Ruby),
77 "php" => Some(Language::Php),
78 "swift" => Some(Language::Swift),
79 "kt" | "kts" => Some(Language::Kotlin),
80 "zig" => Some(Language::Zig),
81 "dart" => Some(Language::Dart),
82 "ex" | "exs" => Some(Language::Elixir),
83 "pdf" => Some(Language::Pdf),
84 _ => None,
85 }
86 }
87
88 pub fn from_path(path: &Path) -> Option<Self> {
89 path.extension()
90 .and_then(|ext| ext.to_str())
91 .and_then(Self::from_extension)
92 }
93}
94
95impl std::fmt::Display for Language {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 let name = match self {
98 Language::Rust => "rust",
99 Language::Python => "python",
100 Language::JavaScript => "javascript",
101 Language::TypeScript => "typescript",
102 Language::Haskell => "haskell",
103 Language::Go => "go",
104 Language::Java => "java",
105 Language::C => "c",
106 Language::Cpp => "cpp",
107 Language::CSharp => "csharp",
108 Language::Ruby => "ruby",
109 Language::Php => "php",
110 Language::Swift => "swift",
111 Language::Kotlin => "kotlin",
112 Language::Zig => "zig",
113 Language::Dart => "dart",
114 Language::Elixir => "elixir",
115 Language::Pdf => "pdf",
116 };
117 write!(f, "{}", name)
118 }
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct Span {
123 pub byte_start: usize,
124 pub byte_end: usize,
125 pub line_start: usize,
126 pub line_end: usize,
127}
128
129impl Span {
130 pub fn new(
132 byte_start: usize,
133 byte_end: usize,
134 line_start: usize,
135 line_end: usize,
136 ) -> Result<Self> {
137 let span = Self {
138 byte_start,
139 byte_end,
140 line_start,
141 line_end,
142 };
143 span.validate()?;
144 Ok(span)
145 }
146
147 pub fn new_unchecked(
153 byte_start: usize,
154 byte_end: usize,
155 line_start: usize,
156 line_end: usize,
157 ) -> Self {
158 Self {
159 byte_start,
160 byte_end,
161 line_start,
162 line_end,
163 }
164 }
165
166 pub fn validate(&self) -> Result<()> {
168 if self.line_start == 0 {
170 return Err(CkError::SpanValidation(
171 "Line start cannot be zero (lines are 1-indexed)".to_string(),
172 ));
173 }
174
175 if self.line_end == 0 {
176 return Err(CkError::SpanValidation(
177 "Line end cannot be zero (lines are 1-indexed)".to_string(),
178 ));
179 }
180
181 if self.byte_start > self.byte_end {
183 return Err(CkError::SpanValidation(format!(
184 "Invalid byte range: start ({}) > end ({})",
185 self.byte_start, self.byte_end
186 )));
187 }
188
189 if self.line_start > self.line_end {
191 return Err(CkError::SpanValidation(format!(
192 "Invalid line range: start ({}) > end ({})",
193 self.line_start, self.line_end
194 )));
195 }
196
197 Ok(())
198 }
199
200 pub fn is_valid(&self) -> bool {
202 self.validate().is_ok()
203 }
204
205 pub fn byte_len(&self) -> usize {
207 self.byte_end.saturating_sub(self.byte_start)
208 }
209
210 pub fn line_count(&self) -> usize {
212 self.line_end.saturating_sub(self.line_start) + 1
213 }
214}
215
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct FileMetadata {
218 pub path: PathBuf,
219 pub hash: String,
220 pub last_modified: u64,
221 pub size: u64,
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct SearchResult {
226 pub file: PathBuf,
227 pub span: Span,
228 pub score: f32,
229 pub preview: String,
230 #[serde(skip_serializing_if = "Option::is_none")]
231 pub lang: Option<Language>,
232 #[serde(skip_serializing_if = "Option::is_none")]
233 pub symbol: Option<String>,
234 #[serde(skip_serializing_if = "Option::is_none")]
235 pub chunk_hash: Option<String>,
236 #[serde(skip_serializing_if = "Option::is_none")]
237 pub index_epoch: Option<u64>,
238}
239
240#[derive(Debug, Clone)]
242pub struct SearchResults {
243 pub matches: Vec<SearchResult>,
244 pub closest_below_threshold: Option<SearchResult>,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct JsonSearchResult {
250 pub file: String,
251 pub span: Span,
252 pub lang: Option<Language>,
253 pub symbol: Option<String>,
254 pub score: f32,
255 pub signals: SearchSignals,
256 pub preview: String,
257 pub model: String,
258}
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct JsonlSearchResult {
262 pub path: String,
263 pub span: Span,
264 pub language: Option<String>,
265 #[serde(skip_serializing_if = "Option::is_none")]
266 pub snippet: Option<String>,
267 #[serde(skip_serializing_if = "Option::is_none")]
268 pub score: Option<f32>,
269 #[serde(skip_serializing_if = "Option::is_none")]
270 pub chunk_hash: Option<String>,
271 #[serde(skip_serializing_if = "Option::is_none")]
272 pub index_epoch: Option<u64>,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct SearchSignals {
277 pub lex_rank: Option<usize>,
278 pub vec_rank: Option<usize>,
279 pub rrf_score: f32,
280}
281
282#[derive(Debug, Clone, PartialEq)]
283pub enum SearchMode {
284 Regex,
285 Lexical,
286 Semantic,
287 Hybrid,
288}
289
290#[derive(Debug, Clone)]
291pub struct IncludePattern {
292 pub path: PathBuf,
293 pub is_dir: bool,
294}
295
296#[derive(Debug, Clone)]
300pub struct FileCollectionOptions {
301 pub respect_gitignore: bool,
303 pub use_ckignore: bool,
305 pub exclude_patterns: Vec<String>,
307}
308
309impl From<&SearchOptions> for FileCollectionOptions {
310 fn from(opts: &SearchOptions) -> Self {
311 Self {
312 respect_gitignore: opts.respect_gitignore,
313 use_ckignore: true, exclude_patterns: opts.exclude_patterns.clone(),
315 }
316 }
317}
318
319#[derive(Debug, Clone)]
320pub struct SearchOptions {
321 pub mode: SearchMode,
322 pub query: String,
323 pub path: PathBuf,
324 pub top_k: Option<usize>,
325 pub threshold: Option<f32>,
326 pub case_insensitive: bool,
327 pub whole_word: bool,
328 pub fixed_string: bool,
329 pub line_numbers: bool,
330 pub context_lines: usize,
331 pub before_context_lines: usize,
332 pub after_context_lines: usize,
333 pub recursive: bool,
334 pub json_output: bool,
335 pub jsonl_output: bool,
336 pub no_snippet: bool,
337 pub reindex: bool,
338 pub show_scores: bool,
339 pub show_filenames: bool,
340 pub files_with_matches: bool,
341 pub files_without_matches: bool,
342 pub exclude_patterns: Vec<String>,
343 pub include_patterns: Vec<IncludePattern>,
344 pub respect_gitignore: bool,
345 pub use_ckignore: bool,
346 pub full_section: bool,
347 pub rerank: bool,
349 pub rerank_model: Option<String>,
350 pub embedding_model: Option<String>,
351}
352
353impl JsonlSearchResult {
354 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
355 Self {
356 path: result.file.to_string_lossy().to_string(),
357 span: result.span.clone(),
358 language: result.lang.as_ref().map(|l| l.to_string()),
359 snippet: if include_snippet {
360 Some(result.preview.clone())
361 } else {
362 None
363 },
364 score: if result.score >= 0.0 {
365 Some(result.score)
366 } else {
367 None
368 },
369 chunk_hash: result.chunk_hash.clone(),
370 index_epoch: result.index_epoch,
371 }
372 }
373}
374
375impl Default for SearchOptions {
376 fn default() -> Self {
377 Self {
378 mode: SearchMode::Regex,
379 query: String::new(),
380 path: PathBuf::from("."),
381 top_k: None,
382 threshold: None,
383 case_insensitive: false,
384 whole_word: false,
385 fixed_string: false,
386 line_numbers: false,
387 context_lines: 0,
388 before_context_lines: 0,
389 after_context_lines: 0,
390 recursive: true,
391 json_output: false,
392 jsonl_output: false,
393 no_snippet: false,
394 reindex: false,
395 show_scores: false,
396 show_filenames: false,
397 files_with_matches: false,
398 files_without_matches: false,
399 exclude_patterns: get_default_exclude_patterns(),
400 include_patterns: Vec::new(),
401 respect_gitignore: true,
402 use_ckignore: true,
403 full_section: false,
404 rerank: false,
406 rerank_model: None,
407 embedding_model: None,
408 }
409 }
410}
411
412pub fn get_default_exclude_patterns() -> Vec<String> {
415 vec![
416 ".ck".to_string(),
418 ".fastembed_cache".to_string(),
420 ".cache".to_string(),
421 "__pycache__".to_string(),
422 ".git".to_string(),
424 ".svn".to_string(),
425 ".hg".to_string(),
426 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
437 ".venv".to_string(),
438 "env".to_string(),
439 ".env".to_string(),
440 "virtualenv".to_string(),
441 ".vscode".to_string(),
443 ".idea".to_string(),
444 ".eclipse".to_string(),
445 "tmp".to_string(),
447 "temp".to_string(),
448 ".tmp".to_string(),
449 ]
450}
451
452pub fn get_default_ckignore_content() -> &'static str {
454 r#"# .ckignore - Default patterns for ck semantic search
455# Created automatically during first index
456# Syntax: same as .gitignore (glob patterns, ! for negation)
457
458# Images
459*.png
460*.jpg
461*.jpeg
462*.gif
463*.bmp
464*.svg
465*.ico
466*.webp
467*.tiff
468
469# Video
470*.mp4
471*.avi
472*.mov
473*.mkv
474*.wmv
475*.flv
476*.webm
477
478# Audio
479*.mp3
480*.wav
481*.flac
482*.aac
483*.ogg
484*.m4a
485
486# Binary/Compiled
487*.exe
488*.dll
489*.so
490*.dylib
491*.a
492*.lib
493*.obj
494*.o
495
496# Archives
497*.zip
498*.tar
499*.tar.gz
500*.tgz
501*.rar
502*.7z
503*.bz2
504*.gz
505
506# Data files
507*.db
508*.sqlite
509*.sqlite3
510*.parquet
511*.arrow
512
513# Config formats (issue #27)
514*.json
515*.yaml
516*.yml
517
518# Add your custom patterns below this line
519"#
520}
521
522pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
524 let ckignore_path = repo_root.join(".ckignore");
525
526 if !ckignore_path.exists() {
527 return Ok(Vec::new());
528 }
529
530 let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
531
532 let patterns: Vec<String> = content
533 .lines()
534 .map(|line| line.trim())
535 .filter(|line| !line.is_empty() && !line.starts_with('#'))
536 .map(|line| line.to_string())
537 .collect();
538
539 Ok(patterns)
540}
541
542pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
544 let ckignore_path = repo_root.join(".ckignore");
545
546 if ckignore_path.exists() {
547 return Ok(false); }
549
550 std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
551
552 Ok(true) }
554
555pub fn build_exclude_patterns(additional_excludes: &[String], use_defaults: bool) -> Vec<String> {
577 let mut patterns = Vec::new();
578
579 patterns.extend(additional_excludes.iter().cloned());
581
582 if use_defaults {
585 patterns.extend(get_default_exclude_patterns());
586 }
587
588 patterns
589}
590
591pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
592 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
593 let mut sidecar = repo_root.join(".ck");
594 sidecar.push(relative);
595 let ext = relative
596 .extension()
597 .map(|e| format!("{}.ck", e.to_string_lossy()))
598 .unwrap_or_else(|| "ck".to_string());
599 sidecar.set_extension(ext);
600 sidecar
601}
602
603pub fn compute_file_hash(path: &Path) -> Result<String> {
604 use std::io::Read;
605
606 let mut file = std::fs::File::open(path)?;
607 let mut hasher = blake3::Hasher::new();
608
609 let mut buffer = [0u8; 65536]; loop {
612 let bytes_read = file.read(&mut buffer)?;
613 if bytes_read == 0 {
614 break;
615 }
616 hasher.update(&buffer[..bytes_read]);
617 }
618
619 let hash = hasher.finalize();
620 Ok(hash.to_hex().to_string())
621}
622
623pub fn compute_chunk_hash(
631 text: &str,
632 leading_trivia: &[String],
633 trailing_trivia: &[String],
634) -> String {
635 let mut hasher = blake3::Hasher::new();
636
637 hasher.update(text.as_bytes());
639
640 for trivia in leading_trivia {
642 hasher.update(trivia.as_bytes());
643 }
644
645 for trivia in trailing_trivia {
647 hasher.update(trivia.as_bytes());
648 }
649
650 hasher.finalize().to_hex().to_string()
651}
652
653pub mod pdf {
655 use std::path::{Path, PathBuf};
656
657 pub fn is_pdf_file(path: &Path) -> bool {
659 path.extension()
660 .and_then(|ext| ext.to_str())
661 .map(|ext| ext.eq_ignore_ascii_case("pdf")) .unwrap_or(false)
663 }
664
665 pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
667 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
668 let mut cache_path = repo_root.join(".ck").join("content");
669 cache_path.push(relative);
670
671 let ext = relative
673 .extension()
674 .map(|e| format!("{}.txt", e.to_string_lossy()))
675 .unwrap_or_else(|| "txt".to_string());
676 cache_path.set_extension(ext);
677
678 cache_path
679 }
680
681 #[cfg(test)]
682 mod tests {
683 use super::*;
684 use std::path::PathBuf;
685
686 #[test]
687 fn test_is_pdf_file() {
688 assert!(is_pdf_file(&PathBuf::from("test.pdf")));
689 assert!(is_pdf_file(&PathBuf::from("test.PDF"))); assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
691 assert!(!is_pdf_file(&PathBuf::from("test.txt")));
692 assert!(!is_pdf_file(&PathBuf::from("test"))); assert!(!is_pdf_file(&PathBuf::from("pdf"))); }
695
696 #[test]
697 fn test_get_content_cache_path() {
698 let repo_root = PathBuf::from("/project");
699 let file_path = PathBuf::from("/project/docs/manual.pdf");
700
701 let cache_path = get_content_cache_path(&repo_root, &file_path);
702 assert_eq!(
703 cache_path,
704 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
705 );
706 }
707
708 #[test]
709 fn test_get_content_cache_path_no_extension() {
710 let repo_root = PathBuf::from("/project");
711 let file_path = PathBuf::from("/project/docs/manual");
712
713 let cache_path = get_content_cache_path(&repo_root, &file_path);
714 assert_eq!(
715 cache_path,
716 PathBuf::from("/project/.ck/content/docs/manual.txt")
717 );
718 }
719
720 #[test]
721 fn test_get_content_cache_path_relative() {
722 let repo_root = PathBuf::from("/project");
723 let file_path = PathBuf::from("docs/manual.pdf"); let cache_path = get_content_cache_path(&repo_root, &file_path);
726 assert_eq!(
727 cache_path,
728 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
729 );
730 }
731 }
732}
733
734#[cfg(test)]
735mod tests {
736 use super::*;
737 use std::fs;
738 use tempfile::TempDir;
739
740 #[test]
741 fn test_span_valid_creation() {
742 let span = Span::new(0, 10, 1, 2).unwrap();
744 assert_eq!(span.byte_start, 0);
745 assert_eq!(span.byte_end, 10);
746 assert_eq!(span.line_start, 1);
747 assert_eq!(span.line_end, 2);
748 assert!(span.is_valid());
749 }
750
751 #[test]
752 fn test_span_validation_valid_cases() {
753 let span = Span::new(10, 10, 1, 1).unwrap();
755 assert!(span.is_valid());
756 assert_eq!(span.byte_len(), 0);
757 assert_eq!(span.line_count(), 1);
758
759 let span = Span::new(0, 100, 1, 10).unwrap();
761 assert!(span.is_valid());
762 assert_eq!(span.byte_len(), 100);
763 assert_eq!(span.line_count(), 10);
764
765 let span = Span::new(5, 25, 3, 3).unwrap();
767 assert!(span.is_valid());
768 assert_eq!(span.byte_len(), 20);
769 assert_eq!(span.line_count(), 1);
770 }
771
772 #[test]
773 fn test_span_validation_invalid_byte_range() {
774 let result = Span::new(10, 5, 1, 2);
776 assert!(result.is_err());
777 if let Err(CkError::SpanValidation(msg)) = result {
778 assert!(msg.contains("Invalid byte range"));
779 assert!(msg.contains("start (10) > end (5)"));
780 } else {
781 panic!("Expected SpanValidation error");
782 }
783 }
784
785 #[test]
786 fn test_span_validation_invalid_line_range() {
787 let result = Span::new(0, 10, 5, 2);
789 assert!(result.is_err());
790 if let Err(CkError::SpanValidation(msg)) = result {
791 assert!(msg.contains("Invalid line range"));
792 assert!(msg.contains("start (5) > end (2)"));
793 } else {
794 panic!("Expected SpanValidation error");
795 }
796 }
797
798 #[test]
799 fn test_span_validation_zero_line_numbers() {
800 let result = Span::new(0, 10, 0, 2);
802 assert!(result.is_err());
803 if let Err(CkError::SpanValidation(msg)) = result {
804 assert!(msg.contains("Line start cannot be zero"));
805 } else {
806 panic!("Expected SpanValidation error");
807 }
808
809 let result = Span::new(0, 10, 1, 0);
811 assert!(result.is_err());
812 if let Err(CkError::SpanValidation(msg)) = result {
813 assert!(msg.contains("Line end cannot be zero"));
814 } else {
815 panic!("Expected SpanValidation error");
816 }
817 }
818
819 #[test]
820 fn test_span_unchecked_creation() {
821 let span = Span::new_unchecked(10, 5, 0, 1);
823 assert_eq!(span.byte_start, 10);
824 assert_eq!(span.byte_end, 5);
825 assert_eq!(span.line_start, 0);
826 assert_eq!(span.line_end, 1);
827 assert!(!span.is_valid()); }
829
830 #[test]
831 fn test_span_validation_methods() {
832 let valid_span = Span::new_unchecked(0, 10, 1, 2);
834 assert!(valid_span.validate().is_ok());
835 assert!(valid_span.is_valid());
836
837 let invalid_span = Span::new_unchecked(10, 5, 1, 2);
839 assert!(invalid_span.validate().is_err());
840 assert!(!invalid_span.is_valid());
841
842 let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
844 assert!(zero_line_span.validate().is_err());
845 assert!(!zero_line_span.is_valid());
846 }
847
848 #[test]
849 fn test_span_utility_methods() {
850 let span = Span::new(10, 25, 5, 8).unwrap();
851
852 assert_eq!(span.byte_len(), 15);
854
855 assert_eq!(span.line_count(), 4); let single_line = Span::new(0, 5, 1, 1).unwrap();
860 assert_eq!(single_line.line_count(), 1);
861 assert_eq!(single_line.byte_len(), 5);
862
863 let empty = Span::new(10, 10, 3, 3).unwrap();
865 assert_eq!(empty.byte_len(), 0);
866 assert_eq!(empty.line_count(), 1);
867 }
868
869 #[test]
870 fn test_span_legacy_struct_literal_still_works() {
871 let span = Span {
873 byte_start: 0,
874 byte_end: 10,
875 line_start: 1,
876 line_end: 2,
877 };
878
879 assert_eq!(span.byte_start, 0);
880 assert_eq!(span.byte_end, 10);
881 assert_eq!(span.line_start, 1);
882 assert_eq!(span.line_end, 2);
883 assert!(span.is_valid());
884 }
885
886 #[test]
887 fn test_search_options_default() {
888 let options = SearchOptions::default();
889 assert!(matches!(options.mode, SearchMode::Regex));
890 assert_eq!(options.query, "");
891 assert_eq!(options.path, PathBuf::from("."));
892 assert_eq!(options.top_k, None);
893 assert_eq!(options.threshold, None);
894 assert!(!options.case_insensitive);
895 assert!(!options.whole_word);
896 assert!(!options.fixed_string);
897 assert!(!options.line_numbers);
898 assert_eq!(options.context_lines, 0);
899 assert!(options.recursive);
900 assert!(!options.json_output);
901 assert!(!options.reindex);
902 assert!(!options.show_scores);
903 assert!(!options.show_filenames);
904 }
905
906 #[test]
907 fn test_file_metadata_serialization() {
908 let metadata = FileMetadata {
909 path: PathBuf::from("test.txt"),
910 hash: "abc123".to_string(),
911 last_modified: 1234567890,
912 size: 1024,
913 };
914
915 let json = serde_json::to_string(&metadata).unwrap();
916 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
917
918 assert_eq!(metadata.path, deserialized.path);
919 assert_eq!(metadata.hash, deserialized.hash);
920 assert_eq!(metadata.last_modified, deserialized.last_modified);
921 assert_eq!(metadata.size, deserialized.size);
922 }
923
924 #[test]
925 fn test_search_result_serialization() {
926 let result = SearchResult {
927 file: PathBuf::from("test.txt"),
928 span: Span {
929 byte_start: 0,
930 byte_end: 10,
931 line_start: 1,
932 line_end: 1,
933 },
934 score: 0.95,
935 preview: "hello world".to_string(),
936 lang: Some(Language::Rust),
937 symbol: Some("main".to_string()),
938 chunk_hash: Some("abc123".to_string()),
939 index_epoch: Some(1699123456),
940 };
941
942 let json = serde_json::to_string(&result).unwrap();
943 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
944
945 assert_eq!(result.file, deserialized.file);
946 assert_eq!(result.score, deserialized.score);
947 assert_eq!(result.preview, deserialized.preview);
948 assert_eq!(result.lang, deserialized.lang);
949 assert_eq!(result.symbol, deserialized.symbol);
950 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
951 assert_eq!(result.index_epoch, deserialized.index_epoch);
952 }
953
954 #[test]
955 fn test_jsonl_search_result_conversion() {
956 let result = SearchResult {
957 file: PathBuf::from("src/auth.rs"),
958 span: Span {
959 byte_start: 1203,
960 byte_end: 1456,
961 line_start: 42,
962 line_end: 58,
963 },
964 score: 0.89,
965 preview: "function authenticate(user) {...}".to_string(),
966 lang: Some(Language::Rust),
967 symbol: Some("authenticate".to_string()),
968 chunk_hash: Some("abc123def456".to_string()),
969 index_epoch: Some(1699123456),
970 };
971
972 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
974 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
975 assert_eq!(jsonl_with_snippet.span.line_start, 42);
976 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
977 assert_eq!(
978 jsonl_with_snippet.snippet,
979 Some("function authenticate(user) {...}".to_string())
980 );
981 assert_eq!(jsonl_with_snippet.score, Some(0.89));
982 assert_eq!(
983 jsonl_with_snippet.chunk_hash,
984 Some("abc123def456".to_string())
985 );
986 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
987
988 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
990 assert_eq!(jsonl_no_snippet.snippet, None);
991 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
992 }
993
994 #[test]
995 fn test_get_sidecar_path() {
996 let repo_root = PathBuf::from("/home/user/project");
997 let file_path = PathBuf::from("/home/user/project/src/main.rs");
998
999 let sidecar = get_sidecar_path(&repo_root, &file_path);
1000 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
1001
1002 assert_eq!(sidecar, expected);
1003 }
1004
1005 #[test]
1006 fn test_get_sidecar_path_no_extension() {
1007 let repo_root = PathBuf::from("/project");
1008 let file_path = PathBuf::from("/project/README");
1009
1010 let sidecar = get_sidecar_path(&repo_root, &file_path);
1011 let expected = PathBuf::from("/project/.ck/README.ck");
1012
1013 assert_eq!(sidecar, expected);
1014 }
1015
1016 #[test]
1017 fn test_compute_file_hash() {
1018 let temp_dir = TempDir::new().unwrap();
1019 let file_path = temp_dir.path().join("test.txt");
1020
1021 fs::write(&file_path, "hello world").unwrap();
1022
1023 let hash1 = compute_file_hash(&file_path).unwrap();
1024 let hash2 = compute_file_hash(&file_path).unwrap();
1025
1026 assert_eq!(hash1, hash2);
1028 assert!(!hash1.is_empty());
1029
1030 fs::write(&file_path, "hello rust").unwrap();
1032 let hash3 = compute_file_hash(&file_path).unwrap();
1033 assert_ne!(hash1, hash3);
1034 }
1035
1036 #[test]
1037 fn test_compute_file_hash_nonexistent() {
1038 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
1039 assert!(result.is_err());
1040 }
1041
1042 #[test]
1043 fn test_compute_file_hash_large_file() {
1044 let temp_dir = TempDir::new().unwrap();
1045 let file_path = temp_dir.path().join("large_test.txt");
1046
1047 let large_content = "a".repeat(100_000); fs::write(&file_path, &large_content).unwrap();
1050
1051 let hash1 = compute_file_hash(&file_path).unwrap();
1052 let hash2 = compute_file_hash(&file_path).unwrap();
1053
1054 assert_eq!(hash1, hash2);
1056 assert!(!hash1.is_empty());
1057
1058 fs::write(&file_path, "small content").unwrap();
1060 let hash3 = compute_file_hash(&file_path).unwrap();
1061 assert_ne!(hash1, hash3);
1062 }
1063
1064 #[test]
1065 fn test_json_search_result_serialization() {
1066 let signals = SearchSignals {
1067 lex_rank: Some(1),
1068 vec_rank: Some(2),
1069 rrf_score: 0.85,
1070 };
1071
1072 let result = JsonSearchResult {
1073 file: "test.txt".to_string(),
1074 span: Span {
1075 byte_start: 0,
1076 byte_end: 5,
1077 line_start: 1,
1078 line_end: 1,
1079 },
1080 lang: None, symbol: None,
1082 score: 0.95,
1083 signals,
1084 preview: "hello".to_string(),
1085 model: "bge-small".to_string(),
1086 };
1087
1088 let json = serde_json::to_string(&result).unwrap();
1089 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1090
1091 assert_eq!(result.file, deserialized.file);
1092 assert_eq!(result.score, deserialized.score);
1093 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1094 assert_eq!(result.model, deserialized.model);
1095 }
1096
1097 #[test]
1098 fn test_language_from_extension() {
1099 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1100 assert_eq!(Language::from_extension("py"), Some(Language::Python));
1101 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1102 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1103 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1104 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1105 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1106 assert_eq!(Language::from_extension("go"), Some(Language::Go));
1107 assert_eq!(Language::from_extension("java"), Some(Language::Java));
1108 assert_eq!(Language::from_extension("c"), Some(Language::C));
1109 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1110 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1111 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1112 assert_eq!(Language::from_extension("php"), Some(Language::Php));
1113 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1114 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1115 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1116 assert_eq!(Language::from_extension("ex"), Some(Language::Elixir));
1117 assert_eq!(Language::from_extension("exs"), Some(Language::Elixir));
1118 assert_eq!(Language::from_extension("unknown"), None);
1119 }
1120
1121 #[test]
1122 fn test_language_from_extension_case_insensitive() {
1123 assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1125 assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1126 assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1127 assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1128 assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1129 assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1130 assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1131 assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1132 assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1133 assert_eq!(Language::from_extension("C"), Some(Language::C));
1134 assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1135 assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1136 assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1137 assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1138 assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1139 assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1140 assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1141 assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1142 assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1143 assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1144 assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1145 assert_eq!(Language::from_extension("EX"), Some(Language::Elixir));
1146 assert_eq!(Language::from_extension("EXS"), Some(Language::Elixir));
1147 assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1148
1149 assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1151 assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1152 assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1153 assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1154 assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1155 assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1156 assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1157 assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1158 assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1159 assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1160 assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1161 assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1162 assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1163 assert_eq!(Language::from_extension("Ex"), Some(Language::Elixir));
1164 assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1165
1166 assert_eq!(Language::from_extension("UNKNOWN"), None);
1168 assert_eq!(Language::from_extension("Unknown"), None);
1169 }
1170
1171 #[test]
1172 fn test_language_from_path() {
1173 assert_eq!(
1174 Language::from_path(&PathBuf::from("test.rs")),
1175 Some(Language::Rust)
1176 );
1177 assert_eq!(
1178 Language::from_path(&PathBuf::from("test.py")),
1179 Some(Language::Python)
1180 );
1181 assert_eq!(
1182 Language::from_path(&PathBuf::from("test.js")),
1183 Some(Language::JavaScript)
1184 );
1185 assert_eq!(
1186 Language::from_path(&PathBuf::from("test.hs")),
1187 Some(Language::Haskell)
1188 );
1189 assert_eq!(
1190 Language::from_path(&PathBuf::from("test.lhs")),
1191 Some(Language::Haskell)
1192 );
1193 assert_eq!(
1194 Language::from_path(&PathBuf::from("test.go")),
1195 Some(Language::Go)
1196 );
1197 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
1200
1201 #[test]
1202 fn test_language_from_path_case_insensitive() {
1203 assert_eq!(
1205 Language::from_path(&PathBuf::from("MAIN.RS")),
1206 Some(Language::Rust)
1207 );
1208 assert_eq!(
1209 Language::from_path(&PathBuf::from("app.PY")),
1210 Some(Language::Python)
1211 );
1212 assert_eq!(
1213 Language::from_path(&PathBuf::from("script.JS")),
1214 Some(Language::JavaScript)
1215 );
1216 assert_eq!(
1217 Language::from_path(&PathBuf::from("types.TS")),
1218 Some(Language::TypeScript)
1219 );
1220 assert_eq!(
1221 Language::from_path(&PathBuf::from("Component.TSX")),
1222 Some(Language::TypeScript)
1223 );
1224 assert_eq!(
1225 Language::from_path(&PathBuf::from("module.HS")),
1226 Some(Language::Haskell)
1227 );
1228 assert_eq!(
1229 Language::from_path(&PathBuf::from("server.GO")),
1230 Some(Language::Go)
1231 );
1232 assert_eq!(
1233 Language::from_path(&PathBuf::from("App.JAVA")),
1234 Some(Language::Java)
1235 );
1236 assert_eq!(
1237 Language::from_path(&PathBuf::from("main.C")),
1238 Some(Language::C)
1239 );
1240 assert_eq!(
1241 Language::from_path(&PathBuf::from("utils.CPP")),
1242 Some(Language::Cpp)
1243 );
1244 assert_eq!(
1245 Language::from_path(&PathBuf::from("Program.CS")),
1246 Some(Language::CSharp)
1247 );
1248 assert_eq!(
1249 Language::from_path(&PathBuf::from("script.RB")),
1250 Some(Language::Ruby)
1251 );
1252 assert_eq!(
1253 Language::from_path(&PathBuf::from("index.PHP")),
1254 Some(Language::Php)
1255 );
1256 assert_eq!(
1257 Language::from_path(&PathBuf::from("App.SWIFT")),
1258 Some(Language::Swift)
1259 );
1260 assert_eq!(
1261 Language::from_path(&PathBuf::from("Main.KT")),
1262 Some(Language::Kotlin)
1263 );
1264 assert_eq!(
1265 Language::from_path(&PathBuf::from("document.PDF")),
1266 Some(Language::Pdf)
1267 );
1268
1269 assert_eq!(
1271 Language::from_path(&PathBuf::from("config.Rs")),
1272 Some(Language::Rust)
1273 );
1274 assert_eq!(
1275 Language::from_path(&PathBuf::from("helper.Py")),
1276 Some(Language::Python)
1277 );
1278 assert_eq!(
1279 Language::from_path(&PathBuf::from("utils.Js")),
1280 Some(Language::JavaScript)
1281 );
1282 assert_eq!(
1283 Language::from_path(&PathBuf::from("interfaces.Ts")),
1284 Some(Language::TypeScript)
1285 );
1286 assert_eq!(
1287 Language::from_path(&PathBuf::from("Component.TsX")),
1288 Some(Language::TypeScript)
1289 );
1290 assert_eq!(
1291 Language::from_path(&PathBuf::from("main.Cpp")),
1292 Some(Language::Cpp)
1293 );
1294 assert_eq!(
1295 Language::from_path(&PathBuf::from("report.Pdf")),
1296 Some(Language::Pdf)
1297 );
1298
1299 assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1301 assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1302 }
1303
1304 #[test]
1305 fn test_language_display() {
1306 assert_eq!(Language::Rust.to_string(), "rust");
1307 assert_eq!(Language::Python.to_string(), "python");
1308 assert_eq!(Language::JavaScript.to_string(), "javascript");
1309 assert_eq!(Language::TypeScript.to_string(), "typescript");
1310 assert_eq!(Language::Go.to_string(), "go");
1311 assert_eq!(Language::Java.to_string(), "java");
1312 }
1313
1314 #[test]
1315 fn test_create_ckignore_if_missing() {
1316 let temp_dir = TempDir::new().unwrap();
1317 let test_path = temp_dir.path();
1318
1319 let created = create_ckignore_if_missing(test_path).unwrap();
1321 assert!(created);
1322
1323 let ckignore_path = test_path.join(".ckignore");
1325 assert!(ckignore_path.exists());
1326
1327 let content = fs::read_to_string(&ckignore_path).unwrap();
1329 assert!(content.contains("*.png"));
1330 assert!(content.contains("*.json"));
1331 assert!(content.contains("*.yaml"));
1332 assert!(content.contains("# Images"));
1333 assert!(content.contains("# Config formats"));
1334
1335 let created_again = create_ckignore_if_missing(test_path).unwrap();
1337 assert!(!created_again);
1338 }
1339
1340 #[test]
1341 fn test_read_ckignore_patterns() {
1342 let temp_dir = TempDir::new().unwrap();
1343 let test_path = temp_dir.path();
1344
1345 let patterns = read_ckignore_patterns(test_path).unwrap();
1347 assert_eq!(patterns.len(), 0);
1348
1349 let ckignore_path = test_path.join(".ckignore");
1351 fs::write(
1352 &ckignore_path,
1353 r#"# Comment line
1354*.png
1355*.jpg
1356
1357# Another comment
1358*.json
1359*.yaml
1360"#,
1361 )
1362 .unwrap();
1363
1364 let patterns = read_ckignore_patterns(test_path).unwrap();
1366 assert_eq!(patterns.len(), 4);
1367 assert!(patterns.contains(&"*.png".to_string()));
1368 assert!(patterns.contains(&"*.jpg".to_string()));
1369 assert!(patterns.contains(&"*.json".to_string()));
1370 assert!(patterns.contains(&"*.yaml".to_string()));
1371 assert!(!patterns.iter().any(|p| p.starts_with('#')));
1373 }
1374
1375 #[test]
1376 fn test_read_ckignore_patterns_with_empty_lines() {
1377 let temp_dir = TempDir::new().unwrap();
1378 let test_path = temp_dir.path();
1379
1380 let ckignore_path = test_path.join(".ckignore");
1381 fs::write(
1382 &ckignore_path,
1383 r#"
1384*.png
1385
1386*.jpg
1387
1388
1389*.json
1390"#,
1391 )
1392 .unwrap();
1393
1394 let patterns = read_ckignore_patterns(test_path).unwrap();
1395 assert_eq!(patterns.len(), 3);
1396 assert!(patterns.contains(&"*.png".to_string()));
1397 assert!(patterns.contains(&"*.jpg".to_string()));
1398 assert!(patterns.contains(&"*.json".to_string()));
1399 }
1400
1401 #[test]
1402 fn test_get_default_ckignore_content() {
1403 let content = get_default_ckignore_content();
1404
1405 assert!(content.contains("*.png"));
1407 assert!(content.contains("*.jpg"));
1408 assert!(content.contains("*.mp4"));
1409 assert!(content.contains("*.mp3"));
1410 assert!(content.contains("*.exe"));
1411 assert!(content.contains("*.zip"));
1412 assert!(content.contains("*.db"));
1413 assert!(content.contains("*.json"));
1414 assert!(content.contains("*.yaml"));
1415
1416 assert!(content.contains("# Images"));
1418 assert!(content.contains("# Video"));
1419 assert!(content.contains("# Audio"));
1420 assert!(content.contains("# Config formats"));
1421
1422 assert!(content.contains("issue #27"));
1424 }
1425
1426 #[test]
1427 fn test_build_exclude_patterns_with_defaults() {
1428 let additional = vec!["*.custom".to_string(), "temp/".to_string()];
1430 let patterns = build_exclude_patterns(&additional, true);
1431
1432 assert!(patterns.contains(&"*.custom".to_string()));
1434 assert!(patterns.contains(&"temp/".to_string()));
1435
1436 assert!(patterns.iter().any(|p| p.contains(".git")));
1438 assert!(patterns.iter().any(|p| p.contains("node_modules")));
1439
1440 let custom_idx = patterns.iter().position(|p| p == "*.custom").unwrap();
1442 let default_idx = patterns.iter().position(|p| p.contains(".git")).unwrap();
1443 assert!(custom_idx < default_idx);
1444 }
1445
1446 #[test]
1447 fn test_build_exclude_patterns_without_defaults() {
1448 let additional = vec!["*.custom".to_string(), "temp/".to_string()];
1450 let patterns = build_exclude_patterns(&additional, false);
1451
1452 assert!(patterns.contains(&"*.custom".to_string()));
1454 assert!(patterns.contains(&"temp/".to_string()));
1455
1456 assert!(!patterns.iter().any(|p| p.contains(".git")));
1458 assert!(!patterns.iter().any(|p| p.contains("node_modules")));
1459
1460 assert_eq!(patterns.len(), 2);
1462 }
1463
1464 #[test]
1465 fn test_build_exclude_patterns_empty_additional() {
1466 let patterns = build_exclude_patterns(&[], true);
1468
1469 assert!(patterns.iter().any(|p| p.contains(".git")));
1471 assert!(!patterns.is_empty());
1472
1473 let patterns = build_exclude_patterns(&[], false);
1475
1476 assert!(patterns.is_empty());
1478 }
1479
1480 #[test]
1481 fn test_read_ckignore_edge_cases() {
1482 let temp_dir = TempDir::new().unwrap();
1483 let test_path = temp_dir.path();
1484
1485 let ckignore_path = test_path.join(".ckignore");
1487 fs::write(&ckignore_path, "").unwrap();
1488 let patterns = read_ckignore_patterns(test_path).unwrap();
1489 assert_eq!(patterns.len(), 0);
1490
1491 fs::write(&ckignore_path, "# Comment 1\n# Comment 2\n# Comment 3\n").unwrap();
1493 let patterns = read_ckignore_patterns(test_path).unwrap();
1494 assert_eq!(patterns.len(), 0);
1495
1496 fs::write(&ckignore_path, " \n\t\n \t \n").unwrap();
1498 let patterns = read_ckignore_patterns(test_path).unwrap();
1499 assert_eq!(patterns.len(), 0);
1500
1501 fs::write(
1503 &ckignore_path,
1504 "# Comment\n\n \n*.tmp \n *.log\n\n# Another comment\n",
1505 )
1506 .unwrap();
1507 let patterns = read_ckignore_patterns(test_path).unwrap();
1508 assert_eq!(patterns.len(), 2);
1509 assert!(patterns.contains(&"*.tmp".to_string()));
1510 assert!(patterns.contains(&"*.log".to_string()));
1511 assert!(!patterns.iter().any(|p| p.starts_with(' ')));
1513 assert!(!patterns.iter().any(|p| p.ends_with(' ')));
1514 }
1515}