1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9 #[error("IO error: {0}")]
10 Io(#[from] std::io::Error),
11
12 #[error("Regex error: {0}")]
13 Regex(#[from] regex::Error),
14
15 #[error("Serialization error: {0}")]
16 Serialization(#[from] bincode::Error),
17
18 #[error("JSON error: {0}")]
19 Json(#[from] serde_json::Error),
20
21 #[error("Index error: {0}")]
22 Index(String),
23
24 #[error("Search error: {0}")]
25 Search(String),
26
27 #[error("Embedding error: {0}")]
28 Embedding(String),
29
30 #[error("Span validation error: {0}")]
31 SpanValidation(String),
32
33 #[error("Other error: {0}")]
34 Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41 Rust,
42 Python,
43 JavaScript,
44 TypeScript,
45 Haskell,
46 Go,
47 Java,
48 C,
49 Cpp,
50 CSharp,
51 Ruby,
52 Php,
53 Swift,
54 Kotlin,
55 Zig,
56 Pdf,
57}
58
59impl Language {
60 pub fn from_extension(ext: &str) -> Option<Self> {
61 match ext.to_lowercase().as_str() {
63 "rs" => Some(Language::Rust),
64 "py" => Some(Language::Python),
65 "js" => Some(Language::JavaScript),
66 "ts" | "tsx" => Some(Language::TypeScript),
67 "hs" | "lhs" => Some(Language::Haskell),
68 "go" => Some(Language::Go),
69 "java" => Some(Language::Java),
70 "c" => Some(Language::C),
71 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
72 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
74 "rb" => Some(Language::Ruby),
75 "php" => Some(Language::Php),
76 "swift" => Some(Language::Swift),
77 "kt" | "kts" => Some(Language::Kotlin),
78 "zig" => Some(Language::Zig),
79 "pdf" => Some(Language::Pdf),
80 _ => None,
81 }
82 }
83
84 pub fn from_path(path: &Path) -> Option<Self> {
85 path.extension()
86 .and_then(|ext| ext.to_str())
87 .and_then(Self::from_extension)
88 }
89}
90
91impl std::fmt::Display for Language {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 let name = match self {
94 Language::Rust => "rust",
95 Language::Python => "python",
96 Language::JavaScript => "javascript",
97 Language::TypeScript => "typescript",
98 Language::Haskell => "haskell",
99 Language::Go => "go",
100 Language::Java => "java",
101 Language::C => "c",
102 Language::Cpp => "cpp",
103 Language::CSharp => "csharp",
104 Language::Ruby => "ruby",
105 Language::Php => "php",
106 Language::Swift => "swift",
107 Language::Kotlin => "kotlin",
108 Language::Zig => "zig",
109 Language::Pdf => "pdf",
110 };
111 write!(f, "{}", name)
112 }
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Span {
117 pub byte_start: usize,
118 pub byte_end: usize,
119 pub line_start: usize,
120 pub line_end: usize,
121}
122
123impl Span {
124 pub fn new(
126 byte_start: usize,
127 byte_end: usize,
128 line_start: usize,
129 line_end: usize,
130 ) -> Result<Self> {
131 let span = Self {
132 byte_start,
133 byte_end,
134 line_start,
135 line_end,
136 };
137 span.validate()?;
138 Ok(span)
139 }
140
141 pub fn new_unchecked(
147 byte_start: usize,
148 byte_end: usize,
149 line_start: usize,
150 line_end: usize,
151 ) -> Self {
152 Self {
153 byte_start,
154 byte_end,
155 line_start,
156 line_end,
157 }
158 }
159
160 pub fn validate(&self) -> Result<()> {
162 if self.line_start == 0 {
164 return Err(CkError::SpanValidation(
165 "Line start cannot be zero (lines are 1-indexed)".to_string(),
166 ));
167 }
168
169 if self.line_end == 0 {
170 return Err(CkError::SpanValidation(
171 "Line end cannot be zero (lines are 1-indexed)".to_string(),
172 ));
173 }
174
175 if self.byte_start > self.byte_end {
177 return Err(CkError::SpanValidation(format!(
178 "Invalid byte range: start ({}) > end ({})",
179 self.byte_start, self.byte_end
180 )));
181 }
182
183 if self.line_start > self.line_end {
185 return Err(CkError::SpanValidation(format!(
186 "Invalid line range: start ({}) > end ({})",
187 self.line_start, self.line_end
188 )));
189 }
190
191 Ok(())
192 }
193
194 pub fn is_valid(&self) -> bool {
196 self.validate().is_ok()
197 }
198
199 pub fn byte_len(&self) -> usize {
201 self.byte_end.saturating_sub(self.byte_start)
202 }
203
204 pub fn line_count(&self) -> usize {
206 self.line_end.saturating_sub(self.line_start) + 1
207 }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct FileMetadata {
212 pub path: PathBuf,
213 pub hash: String,
214 pub last_modified: u64,
215 pub size: u64,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SearchResult {
220 pub file: PathBuf,
221 pub span: Span,
222 pub score: f32,
223 pub preview: String,
224 #[serde(skip_serializing_if = "Option::is_none")]
225 pub lang: Option<Language>,
226 #[serde(skip_serializing_if = "Option::is_none")]
227 pub symbol: Option<String>,
228 #[serde(skip_serializing_if = "Option::is_none")]
229 pub chunk_hash: Option<String>,
230 #[serde(skip_serializing_if = "Option::is_none")]
231 pub index_epoch: Option<u64>,
232}
233
234#[derive(Debug, Clone)]
236pub struct SearchResults {
237 pub matches: Vec<SearchResult>,
238 pub closest_below_threshold: Option<SearchResult>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct JsonSearchResult {
244 pub file: String,
245 pub span: Span,
246 pub lang: Option<Language>,
247 pub symbol: Option<String>,
248 pub score: f32,
249 pub signals: SearchSignals,
250 pub preview: String,
251 pub model: String,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct JsonlSearchResult {
256 pub path: String,
257 pub span: Span,
258 pub language: Option<String>,
259 #[serde(skip_serializing_if = "Option::is_none")]
260 pub snippet: Option<String>,
261 #[serde(skip_serializing_if = "Option::is_none")]
262 pub score: Option<f32>,
263 #[serde(skip_serializing_if = "Option::is_none")]
264 pub chunk_hash: Option<String>,
265 #[serde(skip_serializing_if = "Option::is_none")]
266 pub index_epoch: Option<u64>,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct SearchSignals {
271 pub lex_rank: Option<usize>,
272 pub vec_rank: Option<usize>,
273 pub rrf_score: f32,
274}
275
276#[derive(Debug, Clone, PartialEq)]
277pub enum SearchMode {
278 Regex,
279 Lexical,
280 Semantic,
281 Hybrid,
282}
283
284#[derive(Debug, Clone)]
285pub struct IncludePattern {
286 pub path: PathBuf,
287 pub is_dir: bool,
288}
289
290#[derive(Debug, Clone)]
291pub struct SearchOptions {
292 pub mode: SearchMode,
293 pub query: String,
294 pub path: PathBuf,
295 pub top_k: Option<usize>,
296 pub threshold: Option<f32>,
297 pub case_insensitive: bool,
298 pub whole_word: bool,
299 pub fixed_string: bool,
300 pub line_numbers: bool,
301 pub context_lines: usize,
302 pub before_context_lines: usize,
303 pub after_context_lines: usize,
304 pub recursive: bool,
305 pub json_output: bool,
306 pub jsonl_output: bool,
307 pub no_snippet: bool,
308 pub reindex: bool,
309 pub show_scores: bool,
310 pub show_filenames: bool,
311 pub files_with_matches: bool,
312 pub files_without_matches: bool,
313 pub exclude_patterns: Vec<String>,
314 pub include_patterns: Vec<IncludePattern>,
315 pub respect_gitignore: bool,
316 pub full_section: bool,
317 pub rerank: bool,
319 pub rerank_model: Option<String>,
320 pub embedding_model: Option<String>,
321}
322
323impl JsonlSearchResult {
324 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
325 Self {
326 path: result.file.to_string_lossy().to_string(),
327 span: result.span.clone(),
328 language: result.lang.as_ref().map(|l| l.to_string()),
329 snippet: if include_snippet {
330 Some(result.preview.clone())
331 } else {
332 None
333 },
334 score: if result.score >= 0.0 {
335 Some(result.score)
336 } else {
337 None
338 },
339 chunk_hash: result.chunk_hash.clone(),
340 index_epoch: result.index_epoch,
341 }
342 }
343}
344
345impl Default for SearchOptions {
346 fn default() -> Self {
347 Self {
348 mode: SearchMode::Regex,
349 query: String::new(),
350 path: PathBuf::from("."),
351 top_k: None,
352 threshold: None,
353 case_insensitive: false,
354 whole_word: false,
355 fixed_string: false,
356 line_numbers: false,
357 context_lines: 0,
358 before_context_lines: 0,
359 after_context_lines: 0,
360 recursive: true,
361 json_output: false,
362 jsonl_output: false,
363 no_snippet: false,
364 reindex: false,
365 show_scores: false,
366 show_filenames: false,
367 files_with_matches: false,
368 files_without_matches: false,
369 exclude_patterns: get_default_exclude_patterns(),
370 include_patterns: Vec::new(),
371 respect_gitignore: true,
372 full_section: false,
373 rerank: false,
375 rerank_model: None,
376 embedding_model: None,
377 }
378 }
379}
380
381pub fn get_default_exclude_patterns() -> Vec<String> {
384 vec![
385 ".ck".to_string(),
387 ".fastembed_cache".to_string(),
389 ".cache".to_string(),
390 "__pycache__".to_string(),
391 ".git".to_string(),
393 ".svn".to_string(),
394 ".hg".to_string(),
395 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
406 ".venv".to_string(),
407 "env".to_string(),
408 ".env".to_string(),
409 "virtualenv".to_string(),
410 ".vscode".to_string(),
412 ".idea".to_string(),
413 ".eclipse".to_string(),
414 "tmp".to_string(),
416 "temp".to_string(),
417 ".tmp".to_string(),
418 ]
419}
420
421pub fn get_default_ckignore_content() -> &'static str {
423 r#"# .ckignore - Default patterns for ck semantic search
424# Created automatically during first index
425# Syntax: same as .gitignore (glob patterns, ! for negation)
426
427# Images
428*.png
429*.jpg
430*.jpeg
431*.gif
432*.bmp
433*.svg
434*.ico
435*.webp
436*.tiff
437
438# Video
439*.mp4
440*.avi
441*.mov
442*.mkv
443*.wmv
444*.flv
445*.webm
446
447# Audio
448*.mp3
449*.wav
450*.flac
451*.aac
452*.ogg
453*.m4a
454
455# Binary/Compiled
456*.exe
457*.dll
458*.so
459*.dylib
460*.a
461*.lib
462*.obj
463*.o
464
465# Archives
466*.zip
467*.tar
468*.tar.gz
469*.tgz
470*.rar
471*.7z
472*.bz2
473*.gz
474
475# Data files
476*.db
477*.sqlite
478*.sqlite3
479*.parquet
480*.arrow
481
482# Config formats (issue #27)
483*.json
484*.yaml
485*.yml
486
487# Add your custom patterns below this line
488"#
489}
490
491pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
493 let ckignore_path = repo_root.join(".ckignore");
494
495 if !ckignore_path.exists() {
496 return Ok(Vec::new());
497 }
498
499 let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
500
501 let patterns: Vec<String> = content
502 .lines()
503 .map(|line| line.trim())
504 .filter(|line| !line.is_empty() && !line.starts_with('#'))
505 .map(|line| line.to_string())
506 .collect();
507
508 Ok(patterns)
509}
510
511pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
513 let ckignore_path = repo_root.join(".ckignore");
514
515 if ckignore_path.exists() {
516 return Ok(false); }
518
519 std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
520
521 Ok(true) }
523
524pub fn build_exclude_patterns(
543 repo_root: Option<&Path>,
544 additional_excludes: &[String],
545 use_ckignore: bool,
546 use_defaults: bool,
547) -> Vec<String> {
548 let mut patterns = Vec::new();
549
550 if use_ckignore
552 && let Some(root) = repo_root
553 && let Ok(ckignore_patterns) = read_ckignore_patterns(root)
554 && !ckignore_patterns.is_empty()
555 {
556 patterns.extend(ckignore_patterns);
557 }
558
559 patterns.extend(additional_excludes.iter().cloned());
561
562 if use_defaults {
564 patterns.extend(get_default_exclude_patterns());
565 }
566
567 patterns
568}
569
570pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
571 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
572 let mut sidecar = repo_root.join(".ck");
573 sidecar.push(relative);
574 let ext = relative
575 .extension()
576 .map(|e| format!("{}.ck", e.to_string_lossy()))
577 .unwrap_or_else(|| "ck".to_string());
578 sidecar.set_extension(ext);
579 sidecar
580}
581
582pub fn compute_file_hash(path: &Path) -> Result<String> {
583 use std::io::Read;
584
585 let mut file = std::fs::File::open(path)?;
586 let mut hasher = blake3::Hasher::new();
587
588 let mut buffer = [0u8; 65536]; loop {
591 let bytes_read = file.read(&mut buffer)?;
592 if bytes_read == 0 {
593 break;
594 }
595 hasher.update(&buffer[..bytes_read]);
596 }
597
598 let hash = hasher.finalize();
599 Ok(hash.to_hex().to_string())
600}
601
602pub mod pdf {
604 use std::path::{Path, PathBuf};
605
606 pub fn is_pdf_file(path: &Path) -> bool {
608 path.extension()
609 .and_then(|ext| ext.to_str())
610 .map(|ext| ext.eq_ignore_ascii_case("pdf")) .unwrap_or(false)
612 }
613
614 pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
616 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
617 let mut cache_path = repo_root.join(".ck").join("content");
618 cache_path.push(relative);
619
620 let ext = relative
622 .extension()
623 .map(|e| format!("{}.txt", e.to_string_lossy()))
624 .unwrap_or_else(|| "txt".to_string());
625 cache_path.set_extension(ext);
626
627 cache_path
628 }
629
630 #[cfg(test)]
631 mod tests {
632 use super::*;
633 use std::path::PathBuf;
634
635 #[test]
636 fn test_is_pdf_file() {
637 assert!(is_pdf_file(&PathBuf::from("test.pdf")));
638 assert!(is_pdf_file(&PathBuf::from("test.PDF"))); assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
640 assert!(!is_pdf_file(&PathBuf::from("test.txt")));
641 assert!(!is_pdf_file(&PathBuf::from("test"))); assert!(!is_pdf_file(&PathBuf::from("pdf"))); }
644
645 #[test]
646 fn test_get_content_cache_path() {
647 let repo_root = PathBuf::from("/project");
648 let file_path = PathBuf::from("/project/docs/manual.pdf");
649
650 let cache_path = get_content_cache_path(&repo_root, &file_path);
651 assert_eq!(
652 cache_path,
653 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
654 );
655 }
656
657 #[test]
658 fn test_get_content_cache_path_no_extension() {
659 let repo_root = PathBuf::from("/project");
660 let file_path = PathBuf::from("/project/docs/manual");
661
662 let cache_path = get_content_cache_path(&repo_root, &file_path);
663 assert_eq!(
664 cache_path,
665 PathBuf::from("/project/.ck/content/docs/manual.txt")
666 );
667 }
668
669 #[test]
670 fn test_get_content_cache_path_relative() {
671 let repo_root = PathBuf::from("/project");
672 let file_path = PathBuf::from("docs/manual.pdf"); let cache_path = get_content_cache_path(&repo_root, &file_path);
675 assert_eq!(
676 cache_path,
677 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
678 );
679 }
680 }
681}
682
683#[cfg(test)]
684mod tests {
685 use super::*;
686 use std::fs;
687 use tempfile::TempDir;
688
689 #[test]
690 fn test_span_valid_creation() {
691 let span = Span::new(0, 10, 1, 2).unwrap();
693 assert_eq!(span.byte_start, 0);
694 assert_eq!(span.byte_end, 10);
695 assert_eq!(span.line_start, 1);
696 assert_eq!(span.line_end, 2);
697 assert!(span.is_valid());
698 }
699
700 #[test]
701 fn test_span_validation_valid_cases() {
702 let span = Span::new(10, 10, 1, 1).unwrap();
704 assert!(span.is_valid());
705 assert_eq!(span.byte_len(), 0);
706 assert_eq!(span.line_count(), 1);
707
708 let span = Span::new(0, 100, 1, 10).unwrap();
710 assert!(span.is_valid());
711 assert_eq!(span.byte_len(), 100);
712 assert_eq!(span.line_count(), 10);
713
714 let span = Span::new(5, 25, 3, 3).unwrap();
716 assert!(span.is_valid());
717 assert_eq!(span.byte_len(), 20);
718 assert_eq!(span.line_count(), 1);
719 }
720
721 #[test]
722 fn test_span_validation_invalid_byte_range() {
723 let result = Span::new(10, 5, 1, 2);
725 assert!(result.is_err());
726 if let Err(CkError::SpanValidation(msg)) = result {
727 assert!(msg.contains("Invalid byte range"));
728 assert!(msg.contains("start (10) > end (5)"));
729 } else {
730 panic!("Expected SpanValidation error");
731 }
732 }
733
734 #[test]
735 fn test_span_validation_invalid_line_range() {
736 let result = Span::new(0, 10, 5, 2);
738 assert!(result.is_err());
739 if let Err(CkError::SpanValidation(msg)) = result {
740 assert!(msg.contains("Invalid line range"));
741 assert!(msg.contains("start (5) > end (2)"));
742 } else {
743 panic!("Expected SpanValidation error");
744 }
745 }
746
747 #[test]
748 fn test_span_validation_zero_line_numbers() {
749 let result = Span::new(0, 10, 0, 2);
751 assert!(result.is_err());
752 if let Err(CkError::SpanValidation(msg)) = result {
753 assert!(msg.contains("Line start cannot be zero"));
754 } else {
755 panic!("Expected SpanValidation error");
756 }
757
758 let result = Span::new(0, 10, 1, 0);
760 assert!(result.is_err());
761 if let Err(CkError::SpanValidation(msg)) = result {
762 assert!(msg.contains("Line end cannot be zero"));
763 } else {
764 panic!("Expected SpanValidation error");
765 }
766 }
767
768 #[test]
769 fn test_span_unchecked_creation() {
770 let span = Span::new_unchecked(10, 5, 0, 1);
772 assert_eq!(span.byte_start, 10);
773 assert_eq!(span.byte_end, 5);
774 assert_eq!(span.line_start, 0);
775 assert_eq!(span.line_end, 1);
776 assert!(!span.is_valid()); }
778
779 #[test]
780 fn test_span_validation_methods() {
781 let valid_span = Span::new_unchecked(0, 10, 1, 2);
783 assert!(valid_span.validate().is_ok());
784 assert!(valid_span.is_valid());
785
786 let invalid_span = Span::new_unchecked(10, 5, 1, 2);
788 assert!(invalid_span.validate().is_err());
789 assert!(!invalid_span.is_valid());
790
791 let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
793 assert!(zero_line_span.validate().is_err());
794 assert!(!zero_line_span.is_valid());
795 }
796
797 #[test]
798 fn test_span_utility_methods() {
799 let span = Span::new(10, 25, 5, 8).unwrap();
800
801 assert_eq!(span.byte_len(), 15);
803
804 assert_eq!(span.line_count(), 4); let single_line = Span::new(0, 5, 1, 1).unwrap();
809 assert_eq!(single_line.line_count(), 1);
810 assert_eq!(single_line.byte_len(), 5);
811
812 let empty = Span::new(10, 10, 3, 3).unwrap();
814 assert_eq!(empty.byte_len(), 0);
815 assert_eq!(empty.line_count(), 1);
816 }
817
818 #[test]
819 fn test_span_legacy_struct_literal_still_works() {
820 let span = Span {
822 byte_start: 0,
823 byte_end: 10,
824 line_start: 1,
825 line_end: 2,
826 };
827
828 assert_eq!(span.byte_start, 0);
829 assert_eq!(span.byte_end, 10);
830 assert_eq!(span.line_start, 1);
831 assert_eq!(span.line_end, 2);
832 assert!(span.is_valid());
833 }
834
835 #[test]
836 fn test_search_options_default() {
837 let options = SearchOptions::default();
838 assert!(matches!(options.mode, SearchMode::Regex));
839 assert_eq!(options.query, "");
840 assert_eq!(options.path, PathBuf::from("."));
841 assert_eq!(options.top_k, None);
842 assert_eq!(options.threshold, None);
843 assert!(!options.case_insensitive);
844 assert!(!options.whole_word);
845 assert!(!options.fixed_string);
846 assert!(!options.line_numbers);
847 assert_eq!(options.context_lines, 0);
848 assert!(options.recursive);
849 assert!(!options.json_output);
850 assert!(!options.reindex);
851 assert!(!options.show_scores);
852 assert!(!options.show_filenames);
853 }
854
855 #[test]
856 fn test_file_metadata_serialization() {
857 let metadata = FileMetadata {
858 path: PathBuf::from("test.txt"),
859 hash: "abc123".to_string(),
860 last_modified: 1234567890,
861 size: 1024,
862 };
863
864 let json = serde_json::to_string(&metadata).unwrap();
865 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
866
867 assert_eq!(metadata.path, deserialized.path);
868 assert_eq!(metadata.hash, deserialized.hash);
869 assert_eq!(metadata.last_modified, deserialized.last_modified);
870 assert_eq!(metadata.size, deserialized.size);
871 }
872
873 #[test]
874 fn test_search_result_serialization() {
875 let result = SearchResult {
876 file: PathBuf::from("test.txt"),
877 span: Span {
878 byte_start: 0,
879 byte_end: 10,
880 line_start: 1,
881 line_end: 1,
882 },
883 score: 0.95,
884 preview: "hello world".to_string(),
885 lang: Some(Language::Rust),
886 symbol: Some("main".to_string()),
887 chunk_hash: Some("abc123".to_string()),
888 index_epoch: Some(1699123456),
889 };
890
891 let json = serde_json::to_string(&result).unwrap();
892 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
893
894 assert_eq!(result.file, deserialized.file);
895 assert_eq!(result.score, deserialized.score);
896 assert_eq!(result.preview, deserialized.preview);
897 assert_eq!(result.lang, deserialized.lang);
898 assert_eq!(result.symbol, deserialized.symbol);
899 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
900 assert_eq!(result.index_epoch, deserialized.index_epoch);
901 }
902
903 #[test]
904 fn test_jsonl_search_result_conversion() {
905 let result = SearchResult {
906 file: PathBuf::from("src/auth.rs"),
907 span: Span {
908 byte_start: 1203,
909 byte_end: 1456,
910 line_start: 42,
911 line_end: 58,
912 },
913 score: 0.89,
914 preview: "function authenticate(user) {...}".to_string(),
915 lang: Some(Language::Rust),
916 symbol: Some("authenticate".to_string()),
917 chunk_hash: Some("abc123def456".to_string()),
918 index_epoch: Some(1699123456),
919 };
920
921 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
923 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
924 assert_eq!(jsonl_with_snippet.span.line_start, 42);
925 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
926 assert_eq!(
927 jsonl_with_snippet.snippet,
928 Some("function authenticate(user) {...}".to_string())
929 );
930 assert_eq!(jsonl_with_snippet.score, Some(0.89));
931 assert_eq!(
932 jsonl_with_snippet.chunk_hash,
933 Some("abc123def456".to_string())
934 );
935 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
936
937 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
939 assert_eq!(jsonl_no_snippet.snippet, None);
940 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
941 }
942
943 #[test]
944 fn test_get_sidecar_path() {
945 let repo_root = PathBuf::from("/home/user/project");
946 let file_path = PathBuf::from("/home/user/project/src/main.rs");
947
948 let sidecar = get_sidecar_path(&repo_root, &file_path);
949 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
950
951 assert_eq!(sidecar, expected);
952 }
953
954 #[test]
955 fn test_get_sidecar_path_no_extension() {
956 let repo_root = PathBuf::from("/project");
957 let file_path = PathBuf::from("/project/README");
958
959 let sidecar = get_sidecar_path(&repo_root, &file_path);
960 let expected = PathBuf::from("/project/.ck/README.ck");
961
962 assert_eq!(sidecar, expected);
963 }
964
965 #[test]
966 fn test_compute_file_hash() {
967 let temp_dir = TempDir::new().unwrap();
968 let file_path = temp_dir.path().join("test.txt");
969
970 fs::write(&file_path, "hello world").unwrap();
971
972 let hash1 = compute_file_hash(&file_path).unwrap();
973 let hash2 = compute_file_hash(&file_path).unwrap();
974
975 assert_eq!(hash1, hash2);
977 assert!(!hash1.is_empty());
978
979 fs::write(&file_path, "hello rust").unwrap();
981 let hash3 = compute_file_hash(&file_path).unwrap();
982 assert_ne!(hash1, hash3);
983 }
984
985 #[test]
986 fn test_compute_file_hash_nonexistent() {
987 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
988 assert!(result.is_err());
989 }
990
991 #[test]
992 fn test_compute_file_hash_large_file() {
993 let temp_dir = TempDir::new().unwrap();
994 let file_path = temp_dir.path().join("large_test.txt");
995
996 let large_content = "a".repeat(100_000); fs::write(&file_path, &large_content).unwrap();
999
1000 let hash1 = compute_file_hash(&file_path).unwrap();
1001 let hash2 = compute_file_hash(&file_path).unwrap();
1002
1003 assert_eq!(hash1, hash2);
1005 assert!(!hash1.is_empty());
1006
1007 fs::write(&file_path, "small content").unwrap();
1009 let hash3 = compute_file_hash(&file_path).unwrap();
1010 assert_ne!(hash1, hash3);
1011 }
1012
1013 #[test]
1014 fn test_json_search_result_serialization() {
1015 let signals = SearchSignals {
1016 lex_rank: Some(1),
1017 vec_rank: Some(2),
1018 rrf_score: 0.85,
1019 };
1020
1021 let result = JsonSearchResult {
1022 file: "test.txt".to_string(),
1023 span: Span {
1024 byte_start: 0,
1025 byte_end: 5,
1026 line_start: 1,
1027 line_end: 1,
1028 },
1029 lang: None, symbol: None,
1031 score: 0.95,
1032 signals,
1033 preview: "hello".to_string(),
1034 model: "bge-small".to_string(),
1035 };
1036
1037 let json = serde_json::to_string(&result).unwrap();
1038 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1039
1040 assert_eq!(result.file, deserialized.file);
1041 assert_eq!(result.score, deserialized.score);
1042 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1043 assert_eq!(result.model, deserialized.model);
1044 }
1045
1046 #[test]
1047 fn test_language_from_extension() {
1048 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1049 assert_eq!(Language::from_extension("py"), Some(Language::Python));
1050 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1051 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1052 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1053 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1054 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1055 assert_eq!(Language::from_extension("go"), Some(Language::Go));
1056 assert_eq!(Language::from_extension("java"), Some(Language::Java));
1057 assert_eq!(Language::from_extension("c"), Some(Language::C));
1058 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1059 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1060 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1061 assert_eq!(Language::from_extension("php"), Some(Language::Php));
1062 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1063 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1064 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1065 assert_eq!(Language::from_extension("unknown"), None);
1066 }
1067
1068 #[test]
1069 fn test_language_from_extension_case_insensitive() {
1070 assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1072 assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1073 assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1074 assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1075 assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1076 assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1077 assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1078 assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1079 assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1080 assert_eq!(Language::from_extension("C"), Some(Language::C));
1081 assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1082 assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1083 assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1084 assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1085 assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1086 assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1087 assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1088 assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1089 assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1090 assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1091 assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1092 assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1093
1094 assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1096 assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1097 assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1098 assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1099 assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1100 assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1101 assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1102 assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1103 assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1104 assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1105 assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1106 assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1107 assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1108 assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1109
1110 assert_eq!(Language::from_extension("UNKNOWN"), None);
1112 assert_eq!(Language::from_extension("Unknown"), None);
1113 }
1114
1115 #[test]
1116 fn test_language_from_path() {
1117 assert_eq!(
1118 Language::from_path(&PathBuf::from("test.rs")),
1119 Some(Language::Rust)
1120 );
1121 assert_eq!(
1122 Language::from_path(&PathBuf::from("test.py")),
1123 Some(Language::Python)
1124 );
1125 assert_eq!(
1126 Language::from_path(&PathBuf::from("test.js")),
1127 Some(Language::JavaScript)
1128 );
1129 assert_eq!(
1130 Language::from_path(&PathBuf::from("test.hs")),
1131 Some(Language::Haskell)
1132 );
1133 assert_eq!(
1134 Language::from_path(&PathBuf::from("test.lhs")),
1135 Some(Language::Haskell)
1136 );
1137 assert_eq!(
1138 Language::from_path(&PathBuf::from("test.go")),
1139 Some(Language::Go)
1140 );
1141 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
1144
1145 #[test]
1146 fn test_language_from_path_case_insensitive() {
1147 assert_eq!(
1149 Language::from_path(&PathBuf::from("MAIN.RS")),
1150 Some(Language::Rust)
1151 );
1152 assert_eq!(
1153 Language::from_path(&PathBuf::from("app.PY")),
1154 Some(Language::Python)
1155 );
1156 assert_eq!(
1157 Language::from_path(&PathBuf::from("script.JS")),
1158 Some(Language::JavaScript)
1159 );
1160 assert_eq!(
1161 Language::from_path(&PathBuf::from("types.TS")),
1162 Some(Language::TypeScript)
1163 );
1164 assert_eq!(
1165 Language::from_path(&PathBuf::from("Component.TSX")),
1166 Some(Language::TypeScript)
1167 );
1168 assert_eq!(
1169 Language::from_path(&PathBuf::from("module.HS")),
1170 Some(Language::Haskell)
1171 );
1172 assert_eq!(
1173 Language::from_path(&PathBuf::from("server.GO")),
1174 Some(Language::Go)
1175 );
1176 assert_eq!(
1177 Language::from_path(&PathBuf::from("App.JAVA")),
1178 Some(Language::Java)
1179 );
1180 assert_eq!(
1181 Language::from_path(&PathBuf::from("main.C")),
1182 Some(Language::C)
1183 );
1184 assert_eq!(
1185 Language::from_path(&PathBuf::from("utils.CPP")),
1186 Some(Language::Cpp)
1187 );
1188 assert_eq!(
1189 Language::from_path(&PathBuf::from("Program.CS")),
1190 Some(Language::CSharp)
1191 );
1192 assert_eq!(
1193 Language::from_path(&PathBuf::from("script.RB")),
1194 Some(Language::Ruby)
1195 );
1196 assert_eq!(
1197 Language::from_path(&PathBuf::from("index.PHP")),
1198 Some(Language::Php)
1199 );
1200 assert_eq!(
1201 Language::from_path(&PathBuf::from("App.SWIFT")),
1202 Some(Language::Swift)
1203 );
1204 assert_eq!(
1205 Language::from_path(&PathBuf::from("Main.KT")),
1206 Some(Language::Kotlin)
1207 );
1208 assert_eq!(
1209 Language::from_path(&PathBuf::from("document.PDF")),
1210 Some(Language::Pdf)
1211 );
1212
1213 assert_eq!(
1215 Language::from_path(&PathBuf::from("config.Rs")),
1216 Some(Language::Rust)
1217 );
1218 assert_eq!(
1219 Language::from_path(&PathBuf::from("helper.Py")),
1220 Some(Language::Python)
1221 );
1222 assert_eq!(
1223 Language::from_path(&PathBuf::from("utils.Js")),
1224 Some(Language::JavaScript)
1225 );
1226 assert_eq!(
1227 Language::from_path(&PathBuf::from("interfaces.Ts")),
1228 Some(Language::TypeScript)
1229 );
1230 assert_eq!(
1231 Language::from_path(&PathBuf::from("Component.TsX")),
1232 Some(Language::TypeScript)
1233 );
1234 assert_eq!(
1235 Language::from_path(&PathBuf::from("main.Cpp")),
1236 Some(Language::Cpp)
1237 );
1238 assert_eq!(
1239 Language::from_path(&PathBuf::from("report.Pdf")),
1240 Some(Language::Pdf)
1241 );
1242
1243 assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1245 assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1246 }
1247
1248 #[test]
1249 fn test_language_display() {
1250 assert_eq!(Language::Rust.to_string(), "rust");
1251 assert_eq!(Language::Python.to_string(), "python");
1252 assert_eq!(Language::JavaScript.to_string(), "javascript");
1253 assert_eq!(Language::TypeScript.to_string(), "typescript");
1254 assert_eq!(Language::Go.to_string(), "go");
1255 assert_eq!(Language::Java.to_string(), "java");
1256 }
1257
1258 #[test]
1259 fn test_create_ckignore_if_missing() {
1260 let temp_dir = TempDir::new().unwrap();
1261 let test_path = temp_dir.path();
1262
1263 let created = create_ckignore_if_missing(test_path).unwrap();
1265 assert!(created);
1266
1267 let ckignore_path = test_path.join(".ckignore");
1269 assert!(ckignore_path.exists());
1270
1271 let content = fs::read_to_string(&ckignore_path).unwrap();
1273 assert!(content.contains("*.png"));
1274 assert!(content.contains("*.json"));
1275 assert!(content.contains("*.yaml"));
1276 assert!(content.contains("# Images"));
1277 assert!(content.contains("# Config formats"));
1278
1279 let created_again = create_ckignore_if_missing(test_path).unwrap();
1281 assert!(!created_again);
1282 }
1283
1284 #[test]
1285 fn test_read_ckignore_patterns() {
1286 let temp_dir = TempDir::new().unwrap();
1287 let test_path = temp_dir.path();
1288
1289 let patterns = read_ckignore_patterns(test_path).unwrap();
1291 assert_eq!(patterns.len(), 0);
1292
1293 let ckignore_path = test_path.join(".ckignore");
1295 fs::write(
1296 &ckignore_path,
1297 r#"# Comment line
1298*.png
1299*.jpg
1300
1301# Another comment
1302*.json
1303*.yaml
1304"#,
1305 )
1306 .unwrap();
1307
1308 let patterns = read_ckignore_patterns(test_path).unwrap();
1310 assert_eq!(patterns.len(), 4);
1311 assert!(patterns.contains(&"*.png".to_string()));
1312 assert!(patterns.contains(&"*.jpg".to_string()));
1313 assert!(patterns.contains(&"*.json".to_string()));
1314 assert!(patterns.contains(&"*.yaml".to_string()));
1315 assert!(!patterns.iter().any(|p| p.starts_with('#')));
1317 }
1318
1319 #[test]
1320 fn test_read_ckignore_patterns_with_empty_lines() {
1321 let temp_dir = TempDir::new().unwrap();
1322 let test_path = temp_dir.path();
1323
1324 let ckignore_path = test_path.join(".ckignore");
1325 fs::write(
1326 &ckignore_path,
1327 r#"
1328*.png
1329
1330*.jpg
1331
1332
1333*.json
1334"#,
1335 )
1336 .unwrap();
1337
1338 let patterns = read_ckignore_patterns(test_path).unwrap();
1339 assert_eq!(patterns.len(), 3);
1340 assert!(patterns.contains(&"*.png".to_string()));
1341 assert!(patterns.contains(&"*.jpg".to_string()));
1342 assert!(patterns.contains(&"*.json".to_string()));
1343 }
1344
1345 #[test]
1346 fn test_get_default_ckignore_content() {
1347 let content = get_default_ckignore_content();
1348
1349 assert!(content.contains("*.png"));
1351 assert!(content.contains("*.jpg"));
1352 assert!(content.contains("*.mp4"));
1353 assert!(content.contains("*.mp3"));
1354 assert!(content.contains("*.exe"));
1355 assert!(content.contains("*.zip"));
1356 assert!(content.contains("*.db"));
1357 assert!(content.contains("*.json"));
1358 assert!(content.contains("*.yaml"));
1359
1360 assert!(content.contains("# Images"));
1362 assert!(content.contains("# Video"));
1363 assert!(content.contains("# Audio"));
1364 assert!(content.contains("# Config formats"));
1365
1366 assert!(content.contains("issue #27"));
1368 }
1369}