ck_core/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18
19    #[error("Index error: {0}")]
20    Index(String),
21
22    #[error("Search error: {0}")]
23    Search(String),
24
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27
28    #[error("Span validation error: {0}")]
29    SpanValidation(String),
30
31    #[error("Other error: {0}")]
32    Other(String),
33}
34
35pub type Result<T> = std::result::Result<T, CkError>;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
38pub enum Language {
39    Rust,
40    Python,
41    JavaScript,
42    TypeScript,
43    Haskell,
44    Go,
45    Java,
46    C,
47    Cpp,
48    CSharp,
49    Ruby,
50    Php,
51    Swift,
52    Kotlin,
53    Pdf,
54}
55
56impl Language {
57    pub fn from_extension(ext: &str) -> Option<Self> {
58        // Convert to lowercase for case-insensitive matching
59        match ext.to_lowercase().as_str() {
60            "rs" => Some(Language::Rust),
61            "py" => Some(Language::Python),
62            "js" => Some(Language::JavaScript),
63            "ts" | "tsx" => Some(Language::TypeScript),
64            "hs" | "lhs" => Some(Language::Haskell),
65            "go" => Some(Language::Go),
66            "java" => Some(Language::Java),
67            "c" => Some(Language::C),
68            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
69            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
70            "cs" => Some(Language::CSharp),
71            "rb" => Some(Language::Ruby),
72            "php" => Some(Language::Php),
73            "swift" => Some(Language::Swift),
74            "kt" | "kts" => Some(Language::Kotlin),
75            "pdf" => Some(Language::Pdf),
76            _ => None,
77        }
78    }
79
80    pub fn from_path(path: &Path) -> Option<Self> {
81        path.extension()
82            .and_then(|ext| ext.to_str())
83            .and_then(Self::from_extension)
84    }
85}
86
87impl std::fmt::Display for Language {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        let name = match self {
90            Language::Rust => "rust",
91            Language::Python => "python",
92            Language::JavaScript => "javascript",
93            Language::TypeScript => "typescript",
94            Language::Haskell => "haskell",
95            Language::Go => "go",
96            Language::Java => "java",
97            Language::C => "c",
98            Language::Cpp => "cpp",
99            Language::CSharp => "csharp",
100            Language::Ruby => "ruby",
101            Language::Php => "php",
102            Language::Swift => "swift",
103            Language::Kotlin => "kotlin",
104            Language::Pdf => "pdf",
105        };
106        write!(f, "{}", name)
107    }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Span {
112    pub byte_start: usize,
113    pub byte_end: usize,
114    pub line_start: usize,
115    pub line_end: usize,
116}
117
118impl Span {
119    /// Create a new Span with validation
120    pub fn new(
121        byte_start: usize,
122        byte_end: usize,
123        line_start: usize,
124        line_end: usize,
125    ) -> Result<Self> {
126        let span = Self {
127            byte_start,
128            byte_end,
129            line_start,
130            line_end,
131        };
132        span.validate()?;
133        Ok(span)
134    }
135
136    /// Create a new Span without validation (for backward compatibility)
137    ///
138    /// # Safety
139    ///
140    /// The caller must ensure the span is valid. Use `new()` for validated construction.
141    pub fn new_unchecked(
142        byte_start: usize,
143        byte_end: usize,
144        line_start: usize,
145        line_end: usize,
146    ) -> Self {
147        Self {
148            byte_start,
149            byte_end,
150            line_start,
151            line_end,
152        }
153    }
154
155    /// Validate span invariants
156    pub fn validate(&self) -> Result<()> {
157        // Check for zero line numbers first (lines should be 1-indexed)
158        if self.line_start == 0 {
159            return Err(CkError::SpanValidation(
160                "Line start cannot be zero (lines are 1-indexed)".to_string(),
161            ));
162        }
163
164        if self.line_end == 0 {
165            return Err(CkError::SpanValidation(
166                "Line end cannot be zero (lines are 1-indexed)".to_string(),
167            ));
168        }
169
170        // Check byte range validity
171        if self.byte_start > self.byte_end {
172            return Err(CkError::SpanValidation(format!(
173                "Invalid byte range: start ({}) > end ({})",
174                self.byte_start, self.byte_end
175            )));
176        }
177
178        // Check line range validity
179        if self.line_start > self.line_end {
180            return Err(CkError::SpanValidation(format!(
181                "Invalid line range: start ({}) > end ({})",
182                self.line_start, self.line_end
183            )));
184        }
185
186        Ok(())
187    }
188
189    /// Check if this span is valid
190    pub fn is_valid(&self) -> bool {
191        self.validate().is_ok()
192    }
193
194    /// Get byte length of the span
195    pub fn byte_len(&self) -> usize {
196        self.byte_end.saturating_sub(self.byte_start)
197    }
198
199    /// Get line count of the span
200    pub fn line_count(&self) -> usize {
201        self.line_end.saturating_sub(self.line_start) + 1
202    }
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct FileMetadata {
207    pub path: PathBuf,
208    pub hash: String,
209    pub last_modified: u64,
210    pub size: u64,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct SearchResult {
215    pub file: PathBuf,
216    pub span: Span,
217    pub score: f32,
218    pub preview: String,
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub lang: Option<Language>,
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub symbol: Option<String>,
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub chunk_hash: Option<String>,
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub index_epoch: Option<u64>,
227}
228
229/// Enhanced search results that include near-miss information for threshold queries
230#[derive(Debug, Clone)]
231pub struct SearchResults {
232    pub matches: Vec<SearchResult>,
233    /// The highest scoring result below the threshold (if any)
234    pub closest_below_threshold: Option<SearchResult>,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct JsonSearchResult {
239    pub file: String,
240    pub span: Span,
241    pub lang: Option<Language>,
242    pub symbol: Option<String>,
243    pub score: f32,
244    pub signals: SearchSignals,
245    pub preview: String,
246    pub model: String,
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct JsonlSearchResult {
251    pub path: String,
252    pub span: Span,
253    pub language: Option<String>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub snippet: Option<String>,
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub score: Option<f32>,
258    #[serde(skip_serializing_if = "Option::is_none")]
259    pub chunk_hash: Option<String>,
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub index_epoch: Option<u64>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct SearchSignals {
266    pub lex_rank: Option<usize>,
267    pub vec_rank: Option<usize>,
268    pub rrf_score: f32,
269}
270
271#[derive(Debug, Clone, PartialEq)]
272pub enum SearchMode {
273    Regex,
274    Lexical,
275    Semantic,
276    Hybrid,
277}
278
279#[derive(Debug, Clone)]
280pub struct SearchOptions {
281    pub mode: SearchMode,
282    pub query: String,
283    pub path: PathBuf,
284    pub top_k: Option<usize>,
285    pub threshold: Option<f32>,
286    pub case_insensitive: bool,
287    pub whole_word: bool,
288    pub fixed_string: bool,
289    pub line_numbers: bool,
290    pub context_lines: usize,
291    pub before_context_lines: usize,
292    pub after_context_lines: usize,
293    pub recursive: bool,
294    pub json_output: bool,
295    pub jsonl_output: bool,
296    pub no_snippet: bool,
297    pub reindex: bool,
298    pub show_scores: bool,
299    pub show_filenames: bool,
300    pub files_with_matches: bool,
301    pub files_without_matches: bool,
302    pub exclude_patterns: Vec<String>,
303    pub respect_gitignore: bool,
304    pub full_section: bool,
305    // Enhanced embedding options (search-time only)
306    pub rerank: bool,
307    pub rerank_model: Option<String>,
308    pub embedding_model: Option<String>,
309}
310
311impl JsonlSearchResult {
312    pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
313        Self {
314            path: result.file.to_string_lossy().to_string(),
315            span: result.span.clone(),
316            language: result.lang.as_ref().map(|l| l.to_string()),
317            snippet: if include_snippet {
318                Some(result.preview.clone())
319            } else {
320                None
321            },
322            score: if result.score >= 0.0 {
323                Some(result.score)
324            } else {
325                None
326            },
327            chunk_hash: result.chunk_hash.clone(),
328            index_epoch: result.index_epoch,
329        }
330    }
331}
332
333impl Default for SearchOptions {
334    fn default() -> Self {
335        Self {
336            mode: SearchMode::Regex,
337            query: String::new(),
338            path: PathBuf::from("."),
339            top_k: None,
340            threshold: None,
341            case_insensitive: false,
342            whole_word: false,
343            fixed_string: false,
344            line_numbers: false,
345            context_lines: 0,
346            before_context_lines: 0,
347            after_context_lines: 0,
348            recursive: true,
349            json_output: false,
350            jsonl_output: false,
351            no_snippet: false,
352            reindex: false,
353            show_scores: false,
354            show_filenames: false,
355            files_with_matches: false,
356            files_without_matches: false,
357            exclude_patterns: get_default_exclude_patterns(),
358            respect_gitignore: true,
359            full_section: false,
360            // Enhanced embedding options (search-time only)
361            rerank: false,
362            rerank_model: None,
363            embedding_model: None,
364        }
365    }
366}
367
368/// Get default exclusion patterns for directories that should be skipped during search.
369/// These are common cache, build, and system directories that rarely contain user code.
370pub fn get_default_exclude_patterns() -> Vec<String> {
371    vec![
372        // ck's own index directory
373        ".ck".to_string(),
374        // AI/ML model cache directories
375        ".fastembed_cache".to_string(),
376        ".cache".to_string(),
377        "__pycache__".to_string(),
378        // Version control
379        ".git".to_string(),
380        ".svn".to_string(),
381        ".hg".to_string(),
382        // Build directories
383        "target".to_string(),       // Rust
384        "build".to_string(),        // Various
385        "dist".to_string(),         // JavaScript/Python
386        "node_modules".to_string(), // JavaScript
387        ".gradle".to_string(),      // Java
388        ".mvn".to_string(),         // Maven
389        "bin".to_string(),          // Various
390        "obj".to_string(),          // .NET
391        // Python virtual environments
392        "venv".to_string(),
393        ".venv".to_string(),
394        "env".to_string(),
395        ".env".to_string(),
396        "virtualenv".to_string(),
397        // IDE/Editor directories
398        ".vscode".to_string(),
399        ".idea".to_string(),
400        ".eclipse".to_string(),
401        // Temporary directories
402        "tmp".to_string(),
403        "temp".to_string(),
404        ".tmp".to_string(),
405    ]
406}
407
408pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
409    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
410    let mut sidecar = repo_root.join(".ck");
411    sidecar.push(relative);
412    let ext = relative
413        .extension()
414        .map(|e| format!("{}.ck", e.to_string_lossy()))
415        .unwrap_or_else(|| "ck".to_string());
416    sidecar.set_extension(ext);
417    sidecar
418}
419
420pub fn compute_file_hash(path: &Path) -> Result<String> {
421    use std::io::Read;
422
423    let mut file = std::fs::File::open(path)?;
424    let mut hasher = blake3::Hasher::new();
425
426    // Stream the file in 64KB chunks to avoid loading entire file into memory
427    let mut buffer = [0u8; 65536]; // 64KB buffer
428    loop {
429        let bytes_read = file.read(&mut buffer)?;
430        if bytes_read == 0 {
431            break;
432        }
433        hasher.update(&buffer[..bytes_read]);
434    }
435
436    let hash = hasher.finalize();
437    Ok(hash.to_hex().to_string())
438}
439
440/// PDF-specific utilities
441pub mod pdf {
442    use std::path::{Path, PathBuf};
443
444    /// Check if a file is a PDF by extension (optimized to avoid allocations)
445    pub fn is_pdf_file(path: &Path) -> bool {
446        path.extension()
447            .and_then(|ext| ext.to_str())
448            .map(|ext| ext.eq_ignore_ascii_case("pdf")) // Avoids allocation vs to_lowercase()
449            .unwrap_or(false)
450    }
451
452    /// Get path for cached PDF content
453    pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
454        let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
455        let mut cache_path = repo_root.join(".ck").join("content");
456        cache_path.push(relative);
457
458        // Add .txt extension to the cached file
459        let ext = relative
460            .extension()
461            .map(|e| format!("{}.txt", e.to_string_lossy()))
462            .unwrap_or_else(|| "txt".to_string());
463        cache_path.set_extension(ext);
464
465        cache_path
466    }
467
468    #[cfg(test)]
469    mod tests {
470        use super::*;
471        use std::path::PathBuf;
472
473        #[test]
474        fn test_is_pdf_file() {
475            assert!(is_pdf_file(&PathBuf::from("test.pdf")));
476            assert!(is_pdf_file(&PathBuf::from("test.PDF"))); // Case insensitive
477            assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
478            assert!(!is_pdf_file(&PathBuf::from("test.txt")));
479            assert!(!is_pdf_file(&PathBuf::from("test"))); // No extension
480            assert!(!is_pdf_file(&PathBuf::from("pdf"))); // Just "pdf", no extension
481        }
482
483        #[test]
484        fn test_get_content_cache_path() {
485            let repo_root = PathBuf::from("/project");
486            let file_path = PathBuf::from("/project/docs/manual.pdf");
487
488            let cache_path = get_content_cache_path(&repo_root, &file_path);
489            assert_eq!(
490                cache_path,
491                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
492            );
493        }
494
495        #[test]
496        fn test_get_content_cache_path_no_extension() {
497            let repo_root = PathBuf::from("/project");
498            let file_path = PathBuf::from("/project/docs/manual");
499
500            let cache_path = get_content_cache_path(&repo_root, &file_path);
501            assert_eq!(
502                cache_path,
503                PathBuf::from("/project/.ck/content/docs/manual.txt")
504            );
505        }
506
507        #[test]
508        fn test_get_content_cache_path_relative() {
509            let repo_root = PathBuf::from("/project");
510            let file_path = PathBuf::from("docs/manual.pdf"); // Relative path
511
512            let cache_path = get_content_cache_path(&repo_root, &file_path);
513            assert_eq!(
514                cache_path,
515                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
516            );
517        }
518    }
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524    use std::fs;
525    use tempfile::TempDir;
526
527    #[test]
528    fn test_span_valid_creation() {
529        // Test valid span creation
530        let span = Span::new(0, 10, 1, 2).unwrap();
531        assert_eq!(span.byte_start, 0);
532        assert_eq!(span.byte_end, 10);
533        assert_eq!(span.line_start, 1);
534        assert_eq!(span.line_end, 2);
535        assert!(span.is_valid());
536    }
537
538    #[test]
539    fn test_span_validation_valid_cases() {
540        // Same byte positions (empty span)
541        let span = Span::new(10, 10, 1, 1).unwrap();
542        assert!(span.is_valid());
543        assert_eq!(span.byte_len(), 0);
544        assert_eq!(span.line_count(), 1);
545
546        // Multi-line span
547        let span = Span::new(0, 100, 1, 10).unwrap();
548        assert!(span.is_valid());
549        assert_eq!(span.byte_len(), 100);
550        assert_eq!(span.line_count(), 10);
551
552        // Single line span
553        let span = Span::new(5, 25, 3, 3).unwrap();
554        assert!(span.is_valid());
555        assert_eq!(span.byte_len(), 20);
556        assert_eq!(span.line_count(), 1);
557    }
558
559    #[test]
560    fn test_span_validation_invalid_byte_range() {
561        // Reversed byte range
562        let result = Span::new(10, 5, 1, 2);
563        assert!(result.is_err());
564        if let Err(CkError::SpanValidation(msg)) = result {
565            assert!(msg.contains("Invalid byte range"));
566            assert!(msg.contains("start (10) > end (5)"));
567        } else {
568            panic!("Expected SpanValidation error");
569        }
570    }
571
572    #[test]
573    fn test_span_validation_invalid_line_range() {
574        // Reversed line range
575        let result = Span::new(0, 10, 5, 2);
576        assert!(result.is_err());
577        if let Err(CkError::SpanValidation(msg)) = result {
578            assert!(msg.contains("Invalid line range"));
579            assert!(msg.contains("start (5) > end (2)"));
580        } else {
581            panic!("Expected SpanValidation error");
582        }
583    }
584
585    #[test]
586    fn test_span_validation_zero_line_numbers() {
587        // Zero line start
588        let result = Span::new(0, 10, 0, 2);
589        assert!(result.is_err());
590        if let Err(CkError::SpanValidation(msg)) = result {
591            assert!(msg.contains("Line start cannot be zero"));
592        } else {
593            panic!("Expected SpanValidation error");
594        }
595
596        // Zero line end
597        let result = Span::new(0, 10, 1, 0);
598        assert!(result.is_err());
599        if let Err(CkError::SpanValidation(msg)) = result {
600            assert!(msg.contains("Line end cannot be zero"));
601        } else {
602            panic!("Expected SpanValidation error");
603        }
604    }
605
606    #[test]
607    fn test_span_unchecked_creation() {
608        // Test backward compatibility with unchecked creation
609        let span = Span::new_unchecked(10, 5, 0, 1);
610        assert_eq!(span.byte_start, 10);
611        assert_eq!(span.byte_end, 5);
612        assert_eq!(span.line_start, 0);
613        assert_eq!(span.line_end, 1);
614        assert!(!span.is_valid()); // Should be invalid
615    }
616
617    #[test]
618    fn test_span_validation_methods() {
619        // Valid span
620        let valid_span = Span::new_unchecked(0, 10, 1, 2);
621        assert!(valid_span.validate().is_ok());
622        assert!(valid_span.is_valid());
623
624        // Invalid span (reversed bytes)
625        let invalid_span = Span::new_unchecked(10, 5, 1, 2);
626        assert!(invalid_span.validate().is_err());
627        assert!(!invalid_span.is_valid());
628
629        // Invalid span (zero lines)
630        let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
631        assert!(zero_line_span.validate().is_err());
632        assert!(!zero_line_span.is_valid());
633    }
634
635    #[test]
636    fn test_span_utility_methods() {
637        let span = Span::new(10, 25, 5, 8).unwrap();
638
639        // Test byte_len
640        assert_eq!(span.byte_len(), 15);
641
642        // Test line_count
643        assert_eq!(span.line_count(), 4); // lines 5, 6, 7, 8
644
645        // Test with single-line span
646        let single_line = Span::new(0, 5, 1, 1).unwrap();
647        assert_eq!(single_line.line_count(), 1);
648        assert_eq!(single_line.byte_len(), 5);
649
650        // Test with empty span
651        let empty = Span::new(10, 10, 3, 3).unwrap();
652        assert_eq!(empty.byte_len(), 0);
653        assert_eq!(empty.line_count(), 1);
654    }
655
656    #[test]
657    fn test_span_legacy_struct_literal_still_works() {
658        // Ensure backward compatibility for existing code using struct literals
659        let span = Span {
660            byte_start: 0,
661            byte_end: 10,
662            line_start: 1,
663            line_end: 2,
664        };
665
666        assert_eq!(span.byte_start, 0);
667        assert_eq!(span.byte_end, 10);
668        assert_eq!(span.line_start, 1);
669        assert_eq!(span.line_end, 2);
670        assert!(span.is_valid());
671    }
672
673    #[test]
674    fn test_search_options_default() {
675        let options = SearchOptions::default();
676        assert!(matches!(options.mode, SearchMode::Regex));
677        assert_eq!(options.query, "");
678        assert_eq!(options.path, PathBuf::from("."));
679        assert_eq!(options.top_k, None);
680        assert_eq!(options.threshold, None);
681        assert!(!options.case_insensitive);
682        assert!(!options.whole_word);
683        assert!(!options.fixed_string);
684        assert!(!options.line_numbers);
685        assert_eq!(options.context_lines, 0);
686        assert!(options.recursive);
687        assert!(!options.json_output);
688        assert!(!options.reindex);
689        assert!(!options.show_scores);
690        assert!(!options.show_filenames);
691    }
692
693    #[test]
694    fn test_file_metadata_serialization() {
695        let metadata = FileMetadata {
696            path: PathBuf::from("test.txt"),
697            hash: "abc123".to_string(),
698            last_modified: 1234567890,
699            size: 1024,
700        };
701
702        let json = serde_json::to_string(&metadata).unwrap();
703        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
704
705        assert_eq!(metadata.path, deserialized.path);
706        assert_eq!(metadata.hash, deserialized.hash);
707        assert_eq!(metadata.last_modified, deserialized.last_modified);
708        assert_eq!(metadata.size, deserialized.size);
709    }
710
711    #[test]
712    fn test_search_result_serialization() {
713        let result = SearchResult {
714            file: PathBuf::from("test.txt"),
715            span: Span {
716                byte_start: 0,
717                byte_end: 10,
718                line_start: 1,
719                line_end: 1,
720            },
721            score: 0.95,
722            preview: "hello world".to_string(),
723            lang: Some(Language::Rust),
724            symbol: Some("main".to_string()),
725            chunk_hash: Some("abc123".to_string()),
726            index_epoch: Some(1699123456),
727        };
728
729        let json = serde_json::to_string(&result).unwrap();
730        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
731
732        assert_eq!(result.file, deserialized.file);
733        assert_eq!(result.score, deserialized.score);
734        assert_eq!(result.preview, deserialized.preview);
735        assert_eq!(result.lang, deserialized.lang);
736        assert_eq!(result.symbol, deserialized.symbol);
737        assert_eq!(result.chunk_hash, deserialized.chunk_hash);
738        assert_eq!(result.index_epoch, deserialized.index_epoch);
739    }
740
741    #[test]
742    fn test_jsonl_search_result_conversion() {
743        let result = SearchResult {
744            file: PathBuf::from("src/auth.rs"),
745            span: Span {
746                byte_start: 1203,
747                byte_end: 1456,
748                line_start: 42,
749                line_end: 58,
750            },
751            score: 0.89,
752            preview: "function authenticate(user) {...}".to_string(),
753            lang: Some(Language::Rust),
754            symbol: Some("authenticate".to_string()),
755            chunk_hash: Some("abc123def456".to_string()),
756            index_epoch: Some(1699123456),
757        };
758
759        // Test with snippet
760        let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
761        assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
762        assert_eq!(jsonl_with_snippet.span.line_start, 42);
763        assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
764        assert_eq!(
765            jsonl_with_snippet.snippet,
766            Some("function authenticate(user) {...}".to_string())
767        );
768        assert_eq!(jsonl_with_snippet.score, Some(0.89));
769        assert_eq!(
770            jsonl_with_snippet.chunk_hash,
771            Some("abc123def456".to_string())
772        );
773        assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
774
775        // Test without snippet
776        let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
777        assert_eq!(jsonl_no_snippet.snippet, None);
778        assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
779    }
780
781    #[test]
782    fn test_get_sidecar_path() {
783        let repo_root = PathBuf::from("/home/user/project");
784        let file_path = PathBuf::from("/home/user/project/src/main.rs");
785
786        let sidecar = get_sidecar_path(&repo_root, &file_path);
787        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
788
789        assert_eq!(sidecar, expected);
790    }
791
792    #[test]
793    fn test_get_sidecar_path_no_extension() {
794        let repo_root = PathBuf::from("/project");
795        let file_path = PathBuf::from("/project/README");
796
797        let sidecar = get_sidecar_path(&repo_root, &file_path);
798        let expected = PathBuf::from("/project/.ck/README.ck");
799
800        assert_eq!(sidecar, expected);
801    }
802
803    #[test]
804    fn test_compute_file_hash() {
805        let temp_dir = TempDir::new().unwrap();
806        let file_path = temp_dir.path().join("test.txt");
807
808        fs::write(&file_path, "hello world").unwrap();
809
810        let hash1 = compute_file_hash(&file_path).unwrap();
811        let hash2 = compute_file_hash(&file_path).unwrap();
812
813        // Same content should produce same hash
814        assert_eq!(hash1, hash2);
815        assert!(!hash1.is_empty());
816
817        // Different content should produce different hash
818        fs::write(&file_path, "hello rust").unwrap();
819        let hash3 = compute_file_hash(&file_path).unwrap();
820        assert_ne!(hash1, hash3);
821    }
822
823    #[test]
824    fn test_compute_file_hash_nonexistent() {
825        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
826        assert!(result.is_err());
827    }
828
829    #[test]
830    fn test_compute_file_hash_large_file() {
831        let temp_dir = TempDir::new().unwrap();
832        let file_path = temp_dir.path().join("large_test.txt");
833
834        // Create a file larger than the buffer size (64KB) to test streaming
835        let large_content = "a".repeat(100_000); // 100KB content
836        fs::write(&file_path, &large_content).unwrap();
837
838        let hash1 = compute_file_hash(&file_path).unwrap();
839        let hash2 = compute_file_hash(&file_path).unwrap();
840
841        // Streaming hash should be consistent
842        assert_eq!(hash1, hash2);
843        assert!(!hash1.is_empty());
844
845        // Verify it's different from smaller content
846        fs::write(&file_path, "small content").unwrap();
847        let hash3 = compute_file_hash(&file_path).unwrap();
848        assert_ne!(hash1, hash3);
849    }
850
851    #[test]
852    fn test_json_search_result_serialization() {
853        let signals = SearchSignals {
854            lex_rank: Some(1),
855            vec_rank: Some(2),
856            rrf_score: 0.85,
857        };
858
859        let result = JsonSearchResult {
860            file: "test.txt".to_string(),
861            span: Span {
862                byte_start: 0,
863                byte_end: 5,
864                line_start: 1,
865                line_end: 1,
866            },
867            lang: None, // txt is not a supported language
868            symbol: None,
869            score: 0.95,
870            signals,
871            preview: "hello".to_string(),
872            model: "bge-small".to_string(),
873        };
874
875        let json = serde_json::to_string(&result).unwrap();
876        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
877
878        assert_eq!(result.file, deserialized.file);
879        assert_eq!(result.score, deserialized.score);
880        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
881        assert_eq!(result.model, deserialized.model);
882    }
883
884    #[test]
885    fn test_language_from_extension() {
886        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
887        assert_eq!(Language::from_extension("py"), Some(Language::Python));
888        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
889        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
890        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
891        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
892        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
893        assert_eq!(Language::from_extension("go"), Some(Language::Go));
894        assert_eq!(Language::from_extension("java"), Some(Language::Java));
895        assert_eq!(Language::from_extension("c"), Some(Language::C));
896        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
897        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
898        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
899        assert_eq!(Language::from_extension("php"), Some(Language::Php));
900        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
901        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
902        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
903        assert_eq!(Language::from_extension("unknown"), None);
904    }
905
906    #[test]
907    fn test_language_from_extension_case_insensitive() {
908        // Test uppercase extensions - only for actually supported languages
909        assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
910        assert_eq!(Language::from_extension("PY"), Some(Language::Python));
911        assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
912        assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
913        assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
914        assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
915        assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
916        assert_eq!(Language::from_extension("GO"), Some(Language::Go));
917        assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
918        assert_eq!(Language::from_extension("C"), Some(Language::C));
919        assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
920        assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
921        assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
922        assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
923        assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
924        assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
925        assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
926        assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
927        assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
928        assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
929        assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
930        assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
931
932        // Test mixed case extensions
933        assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
934        assert_eq!(Language::from_extension("Py"), Some(Language::Python));
935        assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
936        assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
937        assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
938        assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
939        assert_eq!(Language::from_extension("Go"), Some(Language::Go));
940        assert_eq!(Language::from_extension("Java"), Some(Language::Java));
941        assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
942        assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
943        assert_eq!(Language::from_extension("Php"), Some(Language::Php));
944        assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
945        assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
946        assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
947
948        // Unknown extensions should still return None
949        assert_eq!(Language::from_extension("UNKNOWN"), None);
950        assert_eq!(Language::from_extension("Unknown"), None);
951    }
952
953    #[test]
954    fn test_language_from_path() {
955        assert_eq!(
956            Language::from_path(&PathBuf::from("test.rs")),
957            Some(Language::Rust)
958        );
959        assert_eq!(
960            Language::from_path(&PathBuf::from("test.py")),
961            Some(Language::Python)
962        );
963        assert_eq!(
964            Language::from_path(&PathBuf::from("test.js")),
965            Some(Language::JavaScript)
966        );
967        assert_eq!(
968            Language::from_path(&PathBuf::from("test.hs")),
969            Some(Language::Haskell)
970        );
971        assert_eq!(
972            Language::from_path(&PathBuf::from("test.lhs")),
973            Some(Language::Haskell)
974        );
975        assert_eq!(
976            Language::from_path(&PathBuf::from("test.go")),
977            Some(Language::Go)
978        );
979        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
980        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
981    }
982
983    #[test]
984    fn test_language_from_path_case_insensitive() {
985        // Test uppercase extensions in file paths - only supported languages
986        assert_eq!(
987            Language::from_path(&PathBuf::from("MAIN.RS")),
988            Some(Language::Rust)
989        );
990        assert_eq!(
991            Language::from_path(&PathBuf::from("app.PY")),
992            Some(Language::Python)
993        );
994        assert_eq!(
995            Language::from_path(&PathBuf::from("script.JS")),
996            Some(Language::JavaScript)
997        );
998        assert_eq!(
999            Language::from_path(&PathBuf::from("types.TS")),
1000            Some(Language::TypeScript)
1001        );
1002        assert_eq!(
1003            Language::from_path(&PathBuf::from("Component.TSX")),
1004            Some(Language::TypeScript)
1005        );
1006        assert_eq!(
1007            Language::from_path(&PathBuf::from("module.HS")),
1008            Some(Language::Haskell)
1009        );
1010        assert_eq!(
1011            Language::from_path(&PathBuf::from("server.GO")),
1012            Some(Language::Go)
1013        );
1014        assert_eq!(
1015            Language::from_path(&PathBuf::from("App.JAVA")),
1016            Some(Language::Java)
1017        );
1018        assert_eq!(
1019            Language::from_path(&PathBuf::from("main.C")),
1020            Some(Language::C)
1021        );
1022        assert_eq!(
1023            Language::from_path(&PathBuf::from("utils.CPP")),
1024            Some(Language::Cpp)
1025        );
1026        assert_eq!(
1027            Language::from_path(&PathBuf::from("Program.CS")),
1028            Some(Language::CSharp)
1029        );
1030        assert_eq!(
1031            Language::from_path(&PathBuf::from("script.RB")),
1032            Some(Language::Ruby)
1033        );
1034        assert_eq!(
1035            Language::from_path(&PathBuf::from("index.PHP")),
1036            Some(Language::Php)
1037        );
1038        assert_eq!(
1039            Language::from_path(&PathBuf::from("App.SWIFT")),
1040            Some(Language::Swift)
1041        );
1042        assert_eq!(
1043            Language::from_path(&PathBuf::from("Main.KT")),
1044            Some(Language::Kotlin)
1045        );
1046        assert_eq!(
1047            Language::from_path(&PathBuf::from("document.PDF")),
1048            Some(Language::Pdf)
1049        );
1050
1051        // Test mixed case extensions in file paths
1052        assert_eq!(
1053            Language::from_path(&PathBuf::from("config.Rs")),
1054            Some(Language::Rust)
1055        );
1056        assert_eq!(
1057            Language::from_path(&PathBuf::from("helper.Py")),
1058            Some(Language::Python)
1059        );
1060        assert_eq!(
1061            Language::from_path(&PathBuf::from("utils.Js")),
1062            Some(Language::JavaScript)
1063        );
1064        assert_eq!(
1065            Language::from_path(&PathBuf::from("interfaces.Ts")),
1066            Some(Language::TypeScript)
1067        );
1068        assert_eq!(
1069            Language::from_path(&PathBuf::from("Component.TsX")),
1070            Some(Language::TypeScript)
1071        );
1072        assert_eq!(
1073            Language::from_path(&PathBuf::from("main.Cpp")),
1074            Some(Language::Cpp)
1075        );
1076        assert_eq!(
1077            Language::from_path(&PathBuf::from("report.Pdf")),
1078            Some(Language::Pdf)
1079        );
1080
1081        // Unknown extensions should still return None regardless of case
1082        assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1083        assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1084    }
1085
1086    #[test]
1087    fn test_language_display() {
1088        assert_eq!(Language::Rust.to_string(), "rust");
1089        assert_eq!(Language::Python.to_string(), "python");
1090        assert_eq!(Language::JavaScript.to_string(), "javascript");
1091        assert_eq!(Language::TypeScript.to_string(), "typescript");
1092        assert_eq!(Language::Go.to_string(), "go");
1093        assert_eq!(Language::Java.to_string(), "java");
1094    }
1095}