1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9 #[error("IO error: {0}")]
10 Io(#[from] std::io::Error),
11
12 #[error("Regex error: {0}")]
13 Regex(#[from] regex::Error),
14
15 #[error("Serialization error: {0}")]
16 Serialization(#[from] bincode::Error),
17
18 #[error("JSON error: {0}")]
19 Json(#[from] serde_json::Error),
20
21 #[error("Index error: {0}")]
22 Index(String),
23
24 #[error("Search error: {0}")]
25 Search(String),
26
27 #[error("Embedding error: {0}")]
28 Embedding(String),
29
30 #[error("Span validation error: {0}")]
31 SpanValidation(String),
32
33 #[error("Other error: {0}")]
34 Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41 Rust,
42 Python,
43 JavaScript,
44 TypeScript,
45 Haskell,
46 Go,
47 Java,
48 C,
49 Cpp,
50 CSharp,
51 Ruby,
52 Php,
53 Swift,
54 Kotlin,
55 Zig,
56 Dart,
57 Elixir,
58 Markdown,
59 Pdf,
60}
61
62impl Language {
63 pub fn from_extension(ext: &str) -> Option<Self> {
64 match ext.to_lowercase().as_str() {
66 "rs" => Some(Language::Rust),
67 "py" => Some(Language::Python),
68 "js" => Some(Language::JavaScript),
69 "ts" | "tsx" => Some(Language::TypeScript),
70 "hs" | "lhs" => Some(Language::Haskell),
71 "go" => Some(Language::Go),
72 "java" => Some(Language::Java),
73 "c" => Some(Language::C),
74 "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
75 "h" | "hpp" => Some(Language::Cpp), "cs" => Some(Language::CSharp),
77 "rb" => Some(Language::Ruby),
78 "php" => Some(Language::Php),
79 "swift" => Some(Language::Swift),
80 "kt" | "kts" => Some(Language::Kotlin),
81 "zig" => Some(Language::Zig),
82 "dart" => Some(Language::Dart),
83 "ex" | "exs" => Some(Language::Elixir),
84 "md" | "markdown" | "mdx" => Some(Language::Markdown),
85 "pdf" => Some(Language::Pdf),
86 _ => None,
87 }
88 }
89
90 pub fn from_path(path: &Path) -> Option<Self> {
91 path.extension()
92 .and_then(|ext| ext.to_str())
93 .and_then(Self::from_extension)
94 }
95}
96
97impl std::fmt::Display for Language {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 let name = match self {
100 Language::Rust => "rust",
101 Language::Python => "python",
102 Language::JavaScript => "javascript",
103 Language::TypeScript => "typescript",
104 Language::Haskell => "haskell",
105 Language::Go => "go",
106 Language::Java => "java",
107 Language::C => "c",
108 Language::Cpp => "cpp",
109 Language::CSharp => "csharp",
110 Language::Ruby => "ruby",
111 Language::Php => "php",
112 Language::Swift => "swift",
113 Language::Kotlin => "kotlin",
114 Language::Zig => "zig",
115 Language::Dart => "dart",
116 Language::Elixir => "elixir",
117 Language::Markdown => "markdown",
118 Language::Pdf => "pdf",
119 };
120 write!(f, "{name}")
121 }
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct Span {
126 pub byte_start: usize,
127 pub byte_end: usize,
128 pub line_start: usize,
129 pub line_end: usize,
130}
131
132impl Span {
133 pub fn new(
135 byte_start: usize,
136 byte_end: usize,
137 line_start: usize,
138 line_end: usize,
139 ) -> Result<Self> {
140 let span = Self {
141 byte_start,
142 byte_end,
143 line_start,
144 line_end,
145 };
146 span.validate()?;
147 Ok(span)
148 }
149
150 pub fn new_unchecked(
156 byte_start: usize,
157 byte_end: usize,
158 line_start: usize,
159 line_end: usize,
160 ) -> Self {
161 Self {
162 byte_start,
163 byte_end,
164 line_start,
165 line_end,
166 }
167 }
168
169 pub fn validate(&self) -> Result<()> {
171 if self.line_start == 0 {
173 return Err(CkError::SpanValidation(
174 "Line start cannot be zero (lines are 1-indexed)".to_string(),
175 ));
176 }
177
178 if self.line_end == 0 {
179 return Err(CkError::SpanValidation(
180 "Line end cannot be zero (lines are 1-indexed)".to_string(),
181 ));
182 }
183
184 if self.byte_start > self.byte_end {
186 return Err(CkError::SpanValidation(format!(
187 "Invalid byte range: start ({}) > end ({})",
188 self.byte_start, self.byte_end
189 )));
190 }
191
192 if self.line_start > self.line_end {
194 return Err(CkError::SpanValidation(format!(
195 "Invalid line range: start ({}) > end ({})",
196 self.line_start, self.line_end
197 )));
198 }
199
200 Ok(())
201 }
202
203 pub fn is_valid(&self) -> bool {
205 self.validate().is_ok()
206 }
207
208 pub fn byte_len(&self) -> usize {
210 self.byte_end.saturating_sub(self.byte_start)
211 }
212
213 pub fn line_count(&self) -> usize {
215 self.line_end.saturating_sub(self.line_start) + 1
216 }
217}
218
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct FileMetadata {
221 pub path: PathBuf,
222 pub hash: String,
223 pub last_modified: u64,
224 pub size: u64,
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct SearchResult {
229 pub file: PathBuf,
230 pub span: Span,
231 pub score: f32,
232 pub preview: String,
233 #[serde(skip_serializing_if = "Option::is_none")]
234 pub lang: Option<Language>,
235 #[serde(skip_serializing_if = "Option::is_none")]
236 pub symbol: Option<String>,
237 #[serde(skip_serializing_if = "Option::is_none")]
238 pub chunk_hash: Option<String>,
239 #[serde(skip_serializing_if = "Option::is_none")]
240 pub index_epoch: Option<u64>,
241}
242
243#[derive(Debug, Clone)]
245pub struct SearchResults {
246 pub matches: Vec<SearchResult>,
247 pub closest_below_threshold: Option<SearchResult>,
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct JsonSearchResult {
253 pub file: String,
254 pub span: Span,
255 pub lang: Option<Language>,
256 pub symbol: Option<String>,
257 pub score: f32,
258 pub signals: SearchSignals,
259 pub preview: String,
260 pub model: String,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct JsonlSearchResult {
265 pub path: String,
266 pub span: Span,
267 pub language: Option<String>,
268 #[serde(skip_serializing_if = "Option::is_none")]
269 pub snippet: Option<String>,
270 #[serde(skip_serializing_if = "Option::is_none")]
271 pub score: Option<f32>,
272 #[serde(skip_serializing_if = "Option::is_none")]
273 pub chunk_hash: Option<String>,
274 #[serde(skip_serializing_if = "Option::is_none")]
275 pub index_epoch: Option<u64>,
276}
277
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct SearchSignals {
280 pub lex_rank: Option<usize>,
281 pub vec_rank: Option<usize>,
282 pub rrf_score: f32,
283}
284
285#[derive(Debug, Clone, PartialEq)]
286pub enum SearchMode {
287 Regex,
288 Lexical,
289 Semantic,
290 Hybrid,
291}
292
293#[derive(Debug, Clone)]
294pub struct IncludePattern {
295 pub path: PathBuf,
296 pub is_dir: bool,
297}
298
299#[derive(Debug, Clone)]
303pub struct FileCollectionOptions {
304 pub respect_gitignore: bool,
306 pub use_ckignore: bool,
308 pub exclude_patterns: Vec<String>,
310}
311
312impl From<&SearchOptions> for FileCollectionOptions {
313 fn from(opts: &SearchOptions) -> Self {
314 Self {
315 respect_gitignore: opts.respect_gitignore,
316 use_ckignore: true, exclude_patterns: opts.exclude_patterns.clone(),
318 }
319 }
320}
321
322#[derive(Debug, Clone)]
323pub struct SearchOptions {
324 pub mode: SearchMode,
325 pub query: String,
326 pub path: PathBuf,
327 pub top_k: Option<usize>,
328 pub threshold: Option<f32>,
329 pub case_insensitive: bool,
330 pub whole_word: bool,
331 pub fixed_string: bool,
332 pub line_numbers: bool,
333 pub context_lines: usize,
334 pub before_context_lines: usize,
335 pub after_context_lines: usize,
336 pub recursive: bool,
337 pub json_output: bool,
338 pub jsonl_output: bool,
339 pub no_snippet: bool,
340 pub reindex: bool,
341 pub show_scores: bool,
342 pub show_filenames: bool,
343 pub files_with_matches: bool,
344 pub files_without_matches: bool,
345 pub exclude_patterns: Vec<String>,
346 pub include_patterns: Vec<IncludePattern>,
347 pub respect_gitignore: bool,
348 pub use_ckignore: bool,
349 pub full_section: bool,
350 pub rerank: bool,
352 pub rerank_model: Option<String>,
353 pub embedding_model: Option<String>,
354}
355
356impl JsonlSearchResult {
357 pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
358 Self {
359 path: result.file.to_string_lossy().to_string(),
360 span: result.span.clone(),
361 language: result.lang.as_ref().map(std::string::ToString::to_string),
362 snippet: if include_snippet {
363 Some(result.preview.clone())
364 } else {
365 None
366 },
367 score: if result.score >= 0.0 {
368 Some(result.score)
369 } else {
370 None
371 },
372 chunk_hash: result.chunk_hash.clone(),
373 index_epoch: result.index_epoch,
374 }
375 }
376}
377
378impl Default for SearchOptions {
379 fn default() -> Self {
380 Self {
381 mode: SearchMode::Regex,
382 query: String::new(),
383 path: PathBuf::from("."),
384 top_k: None,
385 threshold: None,
386 case_insensitive: false,
387 whole_word: false,
388 fixed_string: false,
389 line_numbers: false,
390 context_lines: 0,
391 before_context_lines: 0,
392 after_context_lines: 0,
393 recursive: true,
394 json_output: false,
395 jsonl_output: false,
396 no_snippet: false,
397 reindex: false,
398 show_scores: false,
399 show_filenames: false,
400 files_with_matches: false,
401 files_without_matches: false,
402 exclude_patterns: get_default_exclude_patterns(),
403 include_patterns: Vec::new(),
404 respect_gitignore: true,
405 use_ckignore: true,
406 full_section: false,
407 rerank: false,
409 rerank_model: None,
410 embedding_model: None,
411 }
412 }
413}
414
415pub fn get_default_exclude_patterns() -> Vec<String> {
418 vec![
419 ".ck".to_string(),
421 ".fastembed_cache".to_string(),
423 ".cache".to_string(),
424 "__pycache__".to_string(),
425 ".git".to_string(),
427 ".svn".to_string(),
428 ".hg".to_string(),
429 "target".to_string(), "build".to_string(), "dist".to_string(), "node_modules".to_string(), ".gradle".to_string(), ".mvn".to_string(), "bin".to_string(), "obj".to_string(), "venv".to_string(),
440 ".venv".to_string(),
441 "env".to_string(),
442 ".env".to_string(),
443 "virtualenv".to_string(),
444 ".vscode".to_string(),
446 ".idea".to_string(),
447 ".eclipse".to_string(),
448 "tmp".to_string(),
450 "temp".to_string(),
451 ".tmp".to_string(),
452 ]
453}
454
455pub fn get_default_ckignore_content() -> &'static str {
457 r"# .ckignore - Default patterns for ck semantic search
458# Created automatically during first index
459# Syntax: same as .gitignore (glob patterns, ! for negation)
460
461# Images
462*.png
463*.jpg
464*.jpeg
465*.gif
466*.bmp
467*.svg
468*.ico
469*.webp
470*.tiff
471
472# Video
473*.mp4
474*.avi
475*.mov
476*.mkv
477*.wmv
478*.flv
479*.webm
480
481# Audio
482*.mp3
483*.wav
484*.flac
485*.aac
486*.ogg
487*.m4a
488
489# Binary/Compiled
490*.exe
491*.dll
492*.so
493*.dylib
494*.a
495*.lib
496*.obj
497*.o
498
499# Archives
500*.zip
501*.tar
502*.tar.gz
503*.tgz
504*.rar
505*.7z
506*.bz2
507*.gz
508
509# Data files
510*.db
511*.sqlite
512*.sqlite3
513*.parquet
514*.arrow
515
516# Config formats (issue #27)
517*.json
518*.yaml
519*.yml
520
521# Add your custom patterns below this line
522"
523}
524
525pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
527 let ckignore_path = repo_root.join(".ckignore");
528
529 if !ckignore_path.exists() {
530 return Ok(Vec::new());
531 }
532
533 let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
534
535 let patterns: Vec<String> = content
536 .lines()
537 .map(str::trim)
538 .filter(|line| !line.is_empty() && !line.starts_with('#'))
539 .map(std::string::ToString::to_string)
540 .collect();
541
542 Ok(patterns)
543}
544
545pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
547 let ckignore_path = repo_root.join(".ckignore");
548
549 if ckignore_path.exists() {
550 return Ok(false); }
552
553 std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
554
555 Ok(true) }
557
558pub fn build_exclude_patterns(additional_excludes: &[String], use_defaults: bool) -> Vec<String> {
580 let mut patterns = Vec::new();
581
582 patterns.extend(additional_excludes.iter().cloned());
584
585 if use_defaults {
588 patterns.extend(get_default_exclude_patterns());
589 }
590
591 patterns
592}
593
594pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
595 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
596 let mut sidecar = repo_root.join(".ck");
597 sidecar.push(relative);
598 let ext = relative
599 .extension()
600 .map(|e| format!("{}.ck", e.to_string_lossy()))
601 .unwrap_or_else(|| "ck".to_string());
602 sidecar.set_extension(ext);
603 sidecar
604}
605
606pub fn compute_file_hash(path: &Path) -> Result<String> {
607 use std::io::Read;
608
609 let mut file = std::fs::File::open(path)?;
610 let mut hasher = blake3::Hasher::new();
611
612 let mut buffer = [0u8; 65536]; loop {
615 let bytes_read = file.read(&mut buffer)?;
616 if bytes_read == 0 {
617 break;
618 }
619 hasher.update(&buffer[..bytes_read]);
620 }
621
622 let hash = hasher.finalize();
623 Ok(hash.to_hex().to_string())
624}
625
626pub fn compute_chunk_hash(
634 text: &str,
635 leading_trivia: &[String],
636 trailing_trivia: &[String],
637) -> String {
638 let mut hasher = blake3::Hasher::new();
639
640 hasher.update(text.as_bytes());
642
643 for trivia in leading_trivia {
645 hasher.update(trivia.as_bytes());
646 }
647
648 for trivia in trailing_trivia {
650 hasher.update(trivia.as_bytes());
651 }
652
653 hasher.finalize().to_hex().to_string()
654}
655
656pub mod pdf {
658 use std::path::{Path, PathBuf};
659
660 pub fn is_pdf_file(path: &Path) -> bool {
662 path.extension()
663 .and_then(|ext| ext.to_str())
664 .map(|ext| ext.eq_ignore_ascii_case("pdf")) .unwrap_or(false)
666 }
667
668 pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
670 let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
671 let mut cache_path = repo_root.join(".ck").join("content");
672 cache_path.push(relative);
673
674 let ext = relative
676 .extension()
677 .map(|e| format!("{}.txt", e.to_string_lossy()))
678 .unwrap_or_else(|| "txt".to_string());
679 cache_path.set_extension(ext);
680
681 cache_path
682 }
683
684 #[cfg(test)]
685 mod tests {
686 use super::*;
687 use std::path::PathBuf;
688
689 #[test]
690 fn test_is_pdf_file() {
691 assert!(is_pdf_file(&PathBuf::from("test.pdf")));
692 assert!(is_pdf_file(&PathBuf::from("test.PDF"))); assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
694 assert!(!is_pdf_file(&PathBuf::from("test.txt")));
695 assert!(!is_pdf_file(&PathBuf::from("test"))); assert!(!is_pdf_file(&PathBuf::from("pdf"))); }
698
699 #[test]
700 fn test_get_content_cache_path() {
701 let repo_root = PathBuf::from("/project");
702 let file_path = PathBuf::from("/project/docs/manual.pdf");
703
704 let cache_path = get_content_cache_path(&repo_root, &file_path);
705 assert_eq!(
706 cache_path,
707 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
708 );
709 }
710
711 #[test]
712 fn test_get_content_cache_path_no_extension() {
713 let repo_root = PathBuf::from("/project");
714 let file_path = PathBuf::from("/project/docs/manual");
715
716 let cache_path = get_content_cache_path(&repo_root, &file_path);
717 assert_eq!(
718 cache_path,
719 PathBuf::from("/project/.ck/content/docs/manual.txt")
720 );
721 }
722
723 #[test]
724 fn test_get_content_cache_path_relative() {
725 let repo_root = PathBuf::from("/project");
726 let file_path = PathBuf::from("docs/manual.pdf"); let cache_path = get_content_cache_path(&repo_root, &file_path);
729 assert_eq!(
730 cache_path,
731 PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
732 );
733 }
734 }
735}
736
737#[cfg(test)]
738mod tests {
739 use super::*;
740 use std::fs;
741 use tempfile::TempDir;
742
743 #[test]
744 fn test_span_valid_creation() {
745 let span = Span::new(0, 10, 1, 2).unwrap();
747 assert_eq!(span.byte_start, 0);
748 assert_eq!(span.byte_end, 10);
749 assert_eq!(span.line_start, 1);
750 assert_eq!(span.line_end, 2);
751 assert!(span.is_valid());
752 }
753
754 #[test]
755 fn test_span_validation_valid_cases() {
756 let span = Span::new(10, 10, 1, 1).unwrap();
758 assert!(span.is_valid());
759 assert_eq!(span.byte_len(), 0);
760 assert_eq!(span.line_count(), 1);
761
762 let span = Span::new(0, 100, 1, 10).unwrap();
764 assert!(span.is_valid());
765 assert_eq!(span.byte_len(), 100);
766 assert_eq!(span.line_count(), 10);
767
768 let span = Span::new(5, 25, 3, 3).unwrap();
770 assert!(span.is_valid());
771 assert_eq!(span.byte_len(), 20);
772 assert_eq!(span.line_count(), 1);
773 }
774
775 #[test]
776 fn test_span_validation_invalid_byte_range() {
777 let result = Span::new(10, 5, 1, 2);
779 assert!(result.is_err());
780 if let Err(CkError::SpanValidation(msg)) = result {
781 assert!(msg.contains("Invalid byte range"));
782 assert!(msg.contains("start (10) > end (5)"));
783 } else {
784 panic!("Expected SpanValidation error");
785 }
786 }
787
788 #[test]
789 fn test_span_validation_invalid_line_range() {
790 let result = Span::new(0, 10, 5, 2);
792 assert!(result.is_err());
793 if let Err(CkError::SpanValidation(msg)) = result {
794 assert!(msg.contains("Invalid line range"));
795 assert!(msg.contains("start (5) > end (2)"));
796 } else {
797 panic!("Expected SpanValidation error");
798 }
799 }
800
801 #[test]
802 fn test_span_validation_zero_line_numbers() {
803 let result = Span::new(0, 10, 0, 2);
805 assert!(result.is_err());
806 if let Err(CkError::SpanValidation(msg)) = result {
807 assert!(msg.contains("Line start cannot be zero"));
808 } else {
809 panic!("Expected SpanValidation error");
810 }
811
812 let result = Span::new(0, 10, 1, 0);
814 assert!(result.is_err());
815 if let Err(CkError::SpanValidation(msg)) = result {
816 assert!(msg.contains("Line end cannot be zero"));
817 } else {
818 panic!("Expected SpanValidation error");
819 }
820 }
821
822 #[test]
823 fn test_span_unchecked_creation() {
824 let span = Span::new_unchecked(10, 5, 0, 1);
826 assert_eq!(span.byte_start, 10);
827 assert_eq!(span.byte_end, 5);
828 assert_eq!(span.line_start, 0);
829 assert_eq!(span.line_end, 1);
830 assert!(!span.is_valid()); }
832
833 #[test]
834 fn test_span_validation_methods() {
835 let valid_span = Span::new_unchecked(0, 10, 1, 2);
837 assert!(valid_span.validate().is_ok());
838 assert!(valid_span.is_valid());
839
840 let invalid_span = Span::new_unchecked(10, 5, 1, 2);
842 assert!(invalid_span.validate().is_err());
843 assert!(!invalid_span.is_valid());
844
845 let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
847 assert!(zero_line_span.validate().is_err());
848 assert!(!zero_line_span.is_valid());
849 }
850
851 #[test]
852 fn test_span_utility_methods() {
853 let span = Span::new(10, 25, 5, 8).unwrap();
854
855 assert_eq!(span.byte_len(), 15);
857
858 assert_eq!(span.line_count(), 4); let single_line = Span::new(0, 5, 1, 1).unwrap();
863 assert_eq!(single_line.line_count(), 1);
864 assert_eq!(single_line.byte_len(), 5);
865
866 let empty = Span::new(10, 10, 3, 3).unwrap();
868 assert_eq!(empty.byte_len(), 0);
869 assert_eq!(empty.line_count(), 1);
870 }
871
872 #[test]
873 fn test_span_legacy_struct_literal_still_works() {
874 let span = Span {
876 byte_start: 0,
877 byte_end: 10,
878 line_start: 1,
879 line_end: 2,
880 };
881
882 assert_eq!(span.byte_start, 0);
883 assert_eq!(span.byte_end, 10);
884 assert_eq!(span.line_start, 1);
885 assert_eq!(span.line_end, 2);
886 assert!(span.is_valid());
887 }
888
889 #[test]
890 fn test_search_options_default() {
891 let options = SearchOptions::default();
892 assert!(matches!(options.mode, SearchMode::Regex));
893 assert_eq!(options.query, "");
894 assert_eq!(options.path, PathBuf::from("."));
895 assert_eq!(options.top_k, None);
896 assert_eq!(options.threshold, None);
897 assert!(!options.case_insensitive);
898 assert!(!options.whole_word);
899 assert!(!options.fixed_string);
900 assert!(!options.line_numbers);
901 assert_eq!(options.context_lines, 0);
902 assert!(options.recursive);
903 assert!(!options.json_output);
904 assert!(!options.reindex);
905 assert!(!options.show_scores);
906 assert!(!options.show_filenames);
907 }
908
909 #[test]
910 fn test_file_metadata_serialization() {
911 let metadata = FileMetadata {
912 path: PathBuf::from("test.txt"),
913 hash: "abc123".to_string(),
914 last_modified: 1234567890,
915 size: 1024,
916 };
917
918 let json = serde_json::to_string(&metadata).unwrap();
919 let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
920
921 assert_eq!(metadata.path, deserialized.path);
922 assert_eq!(metadata.hash, deserialized.hash);
923 assert_eq!(metadata.last_modified, deserialized.last_modified);
924 assert_eq!(metadata.size, deserialized.size);
925 }
926
927 #[test]
928 fn test_search_result_serialization() {
929 let result = SearchResult {
930 file: PathBuf::from("test.txt"),
931 span: Span {
932 byte_start: 0,
933 byte_end: 10,
934 line_start: 1,
935 line_end: 1,
936 },
937 score: 0.95,
938 preview: "hello world".to_string(),
939 lang: Some(Language::Rust),
940 symbol: Some("main".to_string()),
941 chunk_hash: Some("abc123".to_string()),
942 index_epoch: Some(1699123456),
943 };
944
945 let json = serde_json::to_string(&result).unwrap();
946 let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
947
948 assert_eq!(result.file, deserialized.file);
949 assert_eq!(result.score, deserialized.score);
950 assert_eq!(result.preview, deserialized.preview);
951 assert_eq!(result.lang, deserialized.lang);
952 assert_eq!(result.symbol, deserialized.symbol);
953 assert_eq!(result.chunk_hash, deserialized.chunk_hash);
954 assert_eq!(result.index_epoch, deserialized.index_epoch);
955 }
956
957 #[test]
958 fn test_jsonl_search_result_conversion() {
959 let result = SearchResult {
960 file: PathBuf::from("src/auth.rs"),
961 span: Span {
962 byte_start: 1203,
963 byte_end: 1456,
964 line_start: 42,
965 line_end: 58,
966 },
967 score: 0.89,
968 preview: "function authenticate(user) {...}".to_string(),
969 lang: Some(Language::Rust),
970 symbol: Some("authenticate".to_string()),
971 chunk_hash: Some("abc123def456".to_string()),
972 index_epoch: Some(1699123456),
973 };
974
975 let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
977 assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
978 assert_eq!(jsonl_with_snippet.span.line_start, 42);
979 assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
980 assert_eq!(
981 jsonl_with_snippet.snippet,
982 Some("function authenticate(user) {...}".to_string())
983 );
984 assert_eq!(jsonl_with_snippet.score, Some(0.89));
985 assert_eq!(
986 jsonl_with_snippet.chunk_hash,
987 Some("abc123def456".to_string())
988 );
989 assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
990
991 let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
993 assert_eq!(jsonl_no_snippet.snippet, None);
994 assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
995 }
996
997 #[test]
998 fn test_get_sidecar_path() {
999 let repo_root = PathBuf::from("/home/user/project");
1000 let file_path = PathBuf::from("/home/user/project/src/main.rs");
1001
1002 let sidecar = get_sidecar_path(&repo_root, &file_path);
1003 let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
1004
1005 assert_eq!(sidecar, expected);
1006 }
1007
1008 #[test]
1009 fn test_get_sidecar_path_no_extension() {
1010 let repo_root = PathBuf::from("/project");
1011 let file_path = PathBuf::from("/project/README");
1012
1013 let sidecar = get_sidecar_path(&repo_root, &file_path);
1014 let expected = PathBuf::from("/project/.ck/README.ck");
1015
1016 assert_eq!(sidecar, expected);
1017 }
1018
1019 #[test]
1020 fn test_compute_file_hash() {
1021 let temp_dir = TempDir::new().unwrap();
1022 let file_path = temp_dir.path().join("test.txt");
1023
1024 fs::write(&file_path, "hello world").unwrap();
1025
1026 let hash1 = compute_file_hash(&file_path).unwrap();
1027 let hash2 = compute_file_hash(&file_path).unwrap();
1028
1029 assert_eq!(hash1, hash2);
1031 assert!(!hash1.is_empty());
1032
1033 fs::write(&file_path, "hello rust").unwrap();
1035 let hash3 = compute_file_hash(&file_path).unwrap();
1036 assert_ne!(hash1, hash3);
1037 }
1038
1039 #[test]
1040 fn test_compute_file_hash_nonexistent() {
1041 let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
1042 assert!(result.is_err());
1043 }
1044
1045 #[test]
1046 fn test_compute_file_hash_large_file() {
1047 let temp_dir = TempDir::new().unwrap();
1048 let file_path = temp_dir.path().join("large_test.txt");
1049
1050 let large_content = "a".repeat(100_000); fs::write(&file_path, &large_content).unwrap();
1053
1054 let hash1 = compute_file_hash(&file_path).unwrap();
1055 let hash2 = compute_file_hash(&file_path).unwrap();
1056
1057 assert_eq!(hash1, hash2);
1059 assert!(!hash1.is_empty());
1060
1061 fs::write(&file_path, "small content").unwrap();
1063 let hash3 = compute_file_hash(&file_path).unwrap();
1064 assert_ne!(hash1, hash3);
1065 }
1066
1067 #[test]
1068 fn test_json_search_result_serialization() {
1069 let signals = SearchSignals {
1070 lex_rank: Some(1),
1071 vec_rank: Some(2),
1072 rrf_score: 0.85,
1073 };
1074
1075 let result = JsonSearchResult {
1076 file: "test.txt".to_string(),
1077 span: Span {
1078 byte_start: 0,
1079 byte_end: 5,
1080 line_start: 1,
1081 line_end: 1,
1082 },
1083 lang: None, symbol: None,
1085 score: 0.95,
1086 signals,
1087 preview: "hello".to_string(),
1088 model: "bge-small".to_string(),
1089 };
1090
1091 let json = serde_json::to_string(&result).unwrap();
1092 let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1093
1094 assert_eq!(result.file, deserialized.file);
1095 assert_eq!(result.score, deserialized.score);
1096 assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1097 assert_eq!(result.model, deserialized.model);
1098 }
1099
1100 #[test]
1101 fn test_language_from_extension() {
1102 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1103 assert_eq!(Language::from_extension("py"), Some(Language::Python));
1104 assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1105 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1106 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1107 assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1108 assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1109 assert_eq!(Language::from_extension("go"), Some(Language::Go));
1110 assert_eq!(Language::from_extension("java"), Some(Language::Java));
1111 assert_eq!(Language::from_extension("c"), Some(Language::C));
1112 assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1113 assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1114 assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1115 assert_eq!(Language::from_extension("php"), Some(Language::Php));
1116 assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1117 assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1118 assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1119 assert_eq!(Language::from_extension("ex"), Some(Language::Elixir));
1120 assert_eq!(Language::from_extension("exs"), Some(Language::Elixir));
1121 assert_eq!(Language::from_extension("unknown"), None);
1122 }
1123
1124 #[test]
1125 fn test_language_from_extension_case_insensitive() {
1126 assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1128 assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1129 assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1130 assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1131 assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1132 assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1133 assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1134 assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1135 assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1136 assert_eq!(Language::from_extension("C"), Some(Language::C));
1137 assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1138 assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1139 assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1140 assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1141 assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1142 assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1143 assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1144 assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1145 assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1146 assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1147 assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1148 assert_eq!(Language::from_extension("EX"), Some(Language::Elixir));
1149 assert_eq!(Language::from_extension("EXS"), Some(Language::Elixir));
1150 assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1151
1152 assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1154 assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1155 assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1156 assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1157 assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1158 assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1159 assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1160 assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1161 assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1162 assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1163 assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1164 assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1165 assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1166 assert_eq!(Language::from_extension("Ex"), Some(Language::Elixir));
1167 assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1168
1169 assert_eq!(Language::from_extension("UNKNOWN"), None);
1171 assert_eq!(Language::from_extension("Unknown"), None);
1172 }
1173
1174 #[test]
1175 fn test_language_from_path() {
1176 assert_eq!(
1177 Language::from_path(&PathBuf::from("test.rs")),
1178 Some(Language::Rust)
1179 );
1180 assert_eq!(
1181 Language::from_path(&PathBuf::from("test.py")),
1182 Some(Language::Python)
1183 );
1184 assert_eq!(
1185 Language::from_path(&PathBuf::from("test.js")),
1186 Some(Language::JavaScript)
1187 );
1188 assert_eq!(
1189 Language::from_path(&PathBuf::from("test.hs")),
1190 Some(Language::Haskell)
1191 );
1192 assert_eq!(
1193 Language::from_path(&PathBuf::from("test.lhs")),
1194 Some(Language::Haskell)
1195 );
1196 assert_eq!(
1197 Language::from_path(&PathBuf::from("test.go")),
1198 Some(Language::Go)
1199 );
1200 assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); assert_eq!(Language::from_path(&PathBuf::from("noext")), None); }
1203
1204 #[test]
1205 fn test_language_from_path_case_insensitive() {
1206 assert_eq!(
1208 Language::from_path(&PathBuf::from("MAIN.RS")),
1209 Some(Language::Rust)
1210 );
1211 assert_eq!(
1212 Language::from_path(&PathBuf::from("app.PY")),
1213 Some(Language::Python)
1214 );
1215 assert_eq!(
1216 Language::from_path(&PathBuf::from("script.JS")),
1217 Some(Language::JavaScript)
1218 );
1219 assert_eq!(
1220 Language::from_path(&PathBuf::from("types.TS")),
1221 Some(Language::TypeScript)
1222 );
1223 assert_eq!(
1224 Language::from_path(&PathBuf::from("Component.TSX")),
1225 Some(Language::TypeScript)
1226 );
1227 assert_eq!(
1228 Language::from_path(&PathBuf::from("module.HS")),
1229 Some(Language::Haskell)
1230 );
1231 assert_eq!(
1232 Language::from_path(&PathBuf::from("server.GO")),
1233 Some(Language::Go)
1234 );
1235 assert_eq!(
1236 Language::from_path(&PathBuf::from("App.JAVA")),
1237 Some(Language::Java)
1238 );
1239 assert_eq!(
1240 Language::from_path(&PathBuf::from("main.C")),
1241 Some(Language::C)
1242 );
1243 assert_eq!(
1244 Language::from_path(&PathBuf::from("utils.CPP")),
1245 Some(Language::Cpp)
1246 );
1247 assert_eq!(
1248 Language::from_path(&PathBuf::from("Program.CS")),
1249 Some(Language::CSharp)
1250 );
1251 assert_eq!(
1252 Language::from_path(&PathBuf::from("script.RB")),
1253 Some(Language::Ruby)
1254 );
1255 assert_eq!(
1256 Language::from_path(&PathBuf::from("index.PHP")),
1257 Some(Language::Php)
1258 );
1259 assert_eq!(
1260 Language::from_path(&PathBuf::from("App.SWIFT")),
1261 Some(Language::Swift)
1262 );
1263 assert_eq!(
1264 Language::from_path(&PathBuf::from("Main.KT")),
1265 Some(Language::Kotlin)
1266 );
1267 assert_eq!(
1268 Language::from_path(&PathBuf::from("document.PDF")),
1269 Some(Language::Pdf)
1270 );
1271
1272 assert_eq!(
1274 Language::from_path(&PathBuf::from("config.Rs")),
1275 Some(Language::Rust)
1276 );
1277 assert_eq!(
1278 Language::from_path(&PathBuf::from("helper.Py")),
1279 Some(Language::Python)
1280 );
1281 assert_eq!(
1282 Language::from_path(&PathBuf::from("utils.Js")),
1283 Some(Language::JavaScript)
1284 );
1285 assert_eq!(
1286 Language::from_path(&PathBuf::from("interfaces.Ts")),
1287 Some(Language::TypeScript)
1288 );
1289 assert_eq!(
1290 Language::from_path(&PathBuf::from("Component.TsX")),
1291 Some(Language::TypeScript)
1292 );
1293 assert_eq!(
1294 Language::from_path(&PathBuf::from("main.Cpp")),
1295 Some(Language::Cpp)
1296 );
1297 assert_eq!(
1298 Language::from_path(&PathBuf::from("report.Pdf")),
1299 Some(Language::Pdf)
1300 );
1301
1302 assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1304 assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1305 }
1306
1307 #[test]
1308 fn test_language_display() {
1309 assert_eq!(Language::Rust.to_string(), "rust");
1310 assert_eq!(Language::Python.to_string(), "python");
1311 assert_eq!(Language::JavaScript.to_string(), "javascript");
1312 assert_eq!(Language::TypeScript.to_string(), "typescript");
1313 assert_eq!(Language::Go.to_string(), "go");
1314 assert_eq!(Language::Java.to_string(), "java");
1315 }
1316
1317 #[test]
1318 fn test_create_ckignore_if_missing() {
1319 let temp_dir = TempDir::new().unwrap();
1320 let test_path = temp_dir.path();
1321
1322 let created = create_ckignore_if_missing(test_path).unwrap();
1324 assert!(created);
1325
1326 let ckignore_path = test_path.join(".ckignore");
1328 assert!(ckignore_path.exists());
1329
1330 let content = fs::read_to_string(&ckignore_path).unwrap();
1332 assert!(content.contains("*.png"));
1333 assert!(content.contains("*.json"));
1334 assert!(content.contains("*.yaml"));
1335 assert!(content.contains("# Images"));
1336 assert!(content.contains("# Config formats"));
1337
1338 let created_again = create_ckignore_if_missing(test_path).unwrap();
1340 assert!(!created_again);
1341 }
1342
1343 #[test]
1344 fn test_read_ckignore_patterns() {
1345 let temp_dir = TempDir::new().unwrap();
1346 let test_path = temp_dir.path();
1347
1348 let patterns = read_ckignore_patterns(test_path).unwrap();
1350 assert_eq!(patterns.len(), 0);
1351
1352 let ckignore_path = test_path.join(".ckignore");
1354 fs::write(
1355 &ckignore_path,
1356 r"# Comment line
1357*.png
1358*.jpg
1359
1360# Another comment
1361*.json
1362*.yaml
1363",
1364 )
1365 .unwrap();
1366
1367 let patterns = read_ckignore_patterns(test_path).unwrap();
1369 assert_eq!(patterns.len(), 4);
1370 assert!(patterns.contains(&"*.png".to_string()));
1371 assert!(patterns.contains(&"*.jpg".to_string()));
1372 assert!(patterns.contains(&"*.json".to_string()));
1373 assert!(patterns.contains(&"*.yaml".to_string()));
1374 assert!(!patterns.iter().any(|p| p.starts_with('#')));
1376 }
1377
1378 #[test]
1379 fn test_read_ckignore_patterns_with_empty_lines() {
1380 let temp_dir = TempDir::new().unwrap();
1381 let test_path = temp_dir.path();
1382
1383 let ckignore_path = test_path.join(".ckignore");
1384 fs::write(
1385 &ckignore_path,
1386 r"
1387*.png
1388
1389*.jpg
1390
1391
1392*.json
1393",
1394 )
1395 .unwrap();
1396
1397 let patterns = read_ckignore_patterns(test_path).unwrap();
1398 assert_eq!(patterns.len(), 3);
1399 assert!(patterns.contains(&"*.png".to_string()));
1400 assert!(patterns.contains(&"*.jpg".to_string()));
1401 assert!(patterns.contains(&"*.json".to_string()));
1402 }
1403
1404 #[test]
1405 fn test_get_default_ckignore_content() {
1406 let content = get_default_ckignore_content();
1407
1408 assert!(content.contains("*.png"));
1410 assert!(content.contains("*.jpg"));
1411 assert!(content.contains("*.mp4"));
1412 assert!(content.contains("*.mp3"));
1413 assert!(content.contains("*.exe"));
1414 assert!(content.contains("*.zip"));
1415 assert!(content.contains("*.db"));
1416 assert!(content.contains("*.json"));
1417 assert!(content.contains("*.yaml"));
1418
1419 assert!(content.contains("# Images"));
1421 assert!(content.contains("# Video"));
1422 assert!(content.contains("# Audio"));
1423 assert!(content.contains("# Config formats"));
1424
1425 assert!(content.contains("issue #27"));
1427 }
1428
1429 #[test]
1430 fn test_build_exclude_patterns_with_defaults() {
1431 let additional = vec!["*.custom".to_string(), "temp/".to_string()];
1433 let patterns = build_exclude_patterns(&additional, true);
1434
1435 assert!(patterns.contains(&"*.custom".to_string()));
1437 assert!(patterns.contains(&"temp/".to_string()));
1438
1439 assert!(patterns.iter().any(|p| p.contains(".git")));
1441 assert!(patterns.iter().any(|p| p.contains("node_modules")));
1442
1443 let custom_idx = patterns.iter().position(|p| p == "*.custom").unwrap();
1445 let default_idx = patterns.iter().position(|p| p.contains(".git")).unwrap();
1446 assert!(custom_idx < default_idx);
1447 }
1448
1449 #[test]
1450 fn test_build_exclude_patterns_without_defaults() {
1451 let additional = vec!["*.custom".to_string(), "temp/".to_string()];
1453 let patterns = build_exclude_patterns(&additional, false);
1454
1455 assert!(patterns.contains(&"*.custom".to_string()));
1457 assert!(patterns.contains(&"temp/".to_string()));
1458
1459 assert!(!patterns.iter().any(|p| p.contains(".git")));
1461 assert!(!patterns.iter().any(|p| p.contains("node_modules")));
1462
1463 assert_eq!(patterns.len(), 2);
1465 }
1466
1467 #[test]
1468 fn test_build_exclude_patterns_empty_additional() {
1469 let patterns = build_exclude_patterns(&[], true);
1471
1472 assert!(patterns.iter().any(|p| p.contains(".git")));
1474 assert!(!patterns.is_empty());
1475
1476 let patterns = build_exclude_patterns(&[], false);
1478
1479 assert!(patterns.is_empty());
1481 }
1482
1483 #[test]
1484 fn test_read_ckignore_edge_cases() {
1485 let temp_dir = TempDir::new().unwrap();
1486 let test_path = temp_dir.path();
1487
1488 let ckignore_path = test_path.join(".ckignore");
1490 fs::write(&ckignore_path, "").unwrap();
1491 let patterns = read_ckignore_patterns(test_path).unwrap();
1492 assert_eq!(patterns.len(), 0);
1493
1494 fs::write(&ckignore_path, "# Comment 1\n# Comment 2\n# Comment 3\n").unwrap();
1496 let patterns = read_ckignore_patterns(test_path).unwrap();
1497 assert_eq!(patterns.len(), 0);
1498
1499 fs::write(&ckignore_path, " \n\t\n \t \n").unwrap();
1501 let patterns = read_ckignore_patterns(test_path).unwrap();
1502 assert_eq!(patterns.len(), 0);
1503
1504 fs::write(
1506 &ckignore_path,
1507 "# Comment\n\n \n*.tmp \n *.log\n\n# Another comment\n",
1508 )
1509 .unwrap();
1510 let patterns = read_ckignore_patterns(test_path).unwrap();
1511 assert_eq!(patterns.len(), 2);
1512 assert!(patterns.contains(&"*.tmp".to_string()));
1513 assert!(patterns.contains(&"*.log".to_string()));
1514 assert!(!patterns.iter().any(|p| p.starts_with(' ')));
1516 assert!(!patterns.iter().any(|p| p.ends_with(' ')));
1517 }
1518}