ck_core/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18
19    #[error("Index error: {0}")]
20    Index(String),
21
22    #[error("Search error: {0}")]
23    Search(String),
24
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27
28    #[error("Span validation error: {0}")]
29    SpanValidation(String),
30
31    #[error("Other error: {0}")]
32    Other(String),
33}
34
35pub type Result<T> = std::result::Result<T, CkError>;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
38pub enum Language {
39    Rust,
40    Python,
41    JavaScript,
42    TypeScript,
43    Haskell,
44    Go,
45    Java,
46    C,
47    Cpp,
48    CSharp,
49    Ruby,
50    Php,
51    Swift,
52    Kotlin,
53    Pdf,
54}
55
56impl Language {
57    pub fn from_extension(ext: &str) -> Option<Self> {
58        // Convert to lowercase for case-insensitive matching
59        match ext.to_lowercase().as_str() {
60            "rs" => Some(Language::Rust),
61            "py" => Some(Language::Python),
62            "js" => Some(Language::JavaScript),
63            "ts" | "tsx" => Some(Language::TypeScript),
64            "hs" | "lhs" => Some(Language::Haskell),
65            "go" => Some(Language::Go),
66            "java" => Some(Language::Java),
67            "c" => Some(Language::C),
68            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
69            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
70            "cs" => Some(Language::CSharp),
71            "rb" => Some(Language::Ruby),
72            "php" => Some(Language::Php),
73            "swift" => Some(Language::Swift),
74            "kt" | "kts" => Some(Language::Kotlin),
75            "pdf" => Some(Language::Pdf),
76            _ => None,
77        }
78    }
79
80    pub fn from_path(path: &Path) -> Option<Self> {
81        path.extension()
82            .and_then(|ext| ext.to_str())
83            .and_then(Self::from_extension)
84    }
85}
86
87impl std::fmt::Display for Language {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        let name = match self {
90            Language::Rust => "rust",
91            Language::Python => "python",
92            Language::JavaScript => "javascript",
93            Language::TypeScript => "typescript",
94            Language::Haskell => "haskell",
95            Language::Go => "go",
96            Language::Java => "java",
97            Language::C => "c",
98            Language::Cpp => "cpp",
99            Language::CSharp => "csharp",
100            Language::Ruby => "ruby",
101            Language::Php => "php",
102            Language::Swift => "swift",
103            Language::Kotlin => "kotlin",
104            Language::Pdf => "pdf",
105        };
106        write!(f, "{}", name)
107    }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Span {
112    pub byte_start: usize,
113    pub byte_end: usize,
114    pub line_start: usize,
115    pub line_end: usize,
116}
117
118impl Span {
119    /// Create a new Span with validation
120    pub fn new(
121        byte_start: usize,
122        byte_end: usize,
123        line_start: usize,
124        line_end: usize,
125    ) -> Result<Self> {
126        let span = Self {
127            byte_start,
128            byte_end,
129            line_start,
130            line_end,
131        };
132        span.validate()?;
133        Ok(span)
134    }
135
136    /// Create a new Span without validation (for backward compatibility)
137    ///
138    /// # Safety
139    ///
140    /// The caller must ensure the span is valid. Use `new()` for validated construction.
141    pub fn new_unchecked(
142        byte_start: usize,
143        byte_end: usize,
144        line_start: usize,
145        line_end: usize,
146    ) -> Self {
147        Self {
148            byte_start,
149            byte_end,
150            line_start,
151            line_end,
152        }
153    }
154
155    /// Validate span invariants
156    pub fn validate(&self) -> Result<()> {
157        // Check for zero line numbers first (lines should be 1-indexed)
158        if self.line_start == 0 {
159            return Err(CkError::SpanValidation(
160                "Line start cannot be zero (lines are 1-indexed)".to_string(),
161            ));
162        }
163
164        if self.line_end == 0 {
165            return Err(CkError::SpanValidation(
166                "Line end cannot be zero (lines are 1-indexed)".to_string(),
167            ));
168        }
169
170        // Check byte range validity
171        if self.byte_start > self.byte_end {
172            return Err(CkError::SpanValidation(format!(
173                "Invalid byte range: start ({}) > end ({})",
174                self.byte_start, self.byte_end
175            )));
176        }
177
178        // Check line range validity
179        if self.line_start > self.line_end {
180            return Err(CkError::SpanValidation(format!(
181                "Invalid line range: start ({}) > end ({})",
182                self.line_start, self.line_end
183            )));
184        }
185
186        Ok(())
187    }
188
189    /// Check if this span is valid
190    pub fn is_valid(&self) -> bool {
191        self.validate().is_ok()
192    }
193
194    /// Get byte length of the span
195    pub fn byte_len(&self) -> usize {
196        self.byte_end.saturating_sub(self.byte_start)
197    }
198
199    /// Get line count of the span
200    pub fn line_count(&self) -> usize {
201        self.line_end.saturating_sub(self.line_start) + 1
202    }
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct FileMetadata {
207    pub path: PathBuf,
208    pub hash: String,
209    pub last_modified: u64,
210    pub size: u64,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct SearchResult {
215    pub file: PathBuf,
216    pub span: Span,
217    pub score: f32,
218    pub preview: String,
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub lang: Option<Language>,
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub symbol: Option<String>,
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub chunk_hash: Option<String>,
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub index_epoch: Option<u64>,
227}
228
229/// Enhanced search results that include near-miss information for threshold queries
230#[derive(Debug, Clone)]
231pub struct SearchResults {
232    pub matches: Vec<SearchResult>,
233    /// The highest scoring result below the threshold (if any)
234    pub closest_below_threshold: Option<SearchResult>,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct JsonSearchResult {
239    pub file: String,
240    pub span: Span,
241    pub lang: Option<Language>,
242    pub symbol: Option<String>,
243    pub score: f32,
244    pub signals: SearchSignals,
245    pub preview: String,
246    pub model: String,
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct JsonlSearchResult {
251    pub path: String,
252    pub span: Span,
253    pub language: Option<String>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub snippet: Option<String>,
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub score: Option<f32>,
258    #[serde(skip_serializing_if = "Option::is_none")]
259    pub chunk_hash: Option<String>,
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub index_epoch: Option<u64>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct SearchSignals {
266    pub lex_rank: Option<usize>,
267    pub vec_rank: Option<usize>,
268    pub rrf_score: f32,
269}
270
271#[derive(Debug, Clone, PartialEq)]
272pub enum SearchMode {
273    Regex,
274    Lexical,
275    Semantic,
276    Hybrid,
277}
278
279#[derive(Debug, Clone)]
280pub struct SearchOptions {
281    pub mode: SearchMode,
282    pub query: String,
283    pub path: PathBuf,
284    pub top_k: Option<usize>,
285    pub threshold: Option<f32>,
286    pub case_insensitive: bool,
287    pub whole_word: bool,
288    pub fixed_string: bool,
289    pub line_numbers: bool,
290    pub context_lines: usize,
291    pub before_context_lines: usize,
292    pub after_context_lines: usize,
293    pub recursive: bool,
294    pub json_output: bool,
295    pub jsonl_output: bool,
296    pub no_snippet: bool,
297    pub reindex: bool,
298    pub show_scores: bool,
299    pub show_filenames: bool,
300    pub files_with_matches: bool,
301    pub files_without_matches: bool,
302    pub exclude_patterns: Vec<String>,
303    pub respect_gitignore: bool,
304    pub full_section: bool,
305    // Enhanced embedding options (search-time only)
306    pub rerank: bool,
307    pub rerank_model: Option<String>,
308    pub embedding_model: Option<String>,
309}
310
311impl JsonlSearchResult {
312    pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
313        Self {
314            path: result.file.to_string_lossy().to_string(),
315            span: result.span.clone(),
316            language: result.lang.as_ref().map(|l| l.to_string()),
317            snippet: if include_snippet {
318                Some(result.preview.clone())
319            } else {
320                None
321            },
322            score: if result.score >= 0.0 {
323                Some(result.score)
324            } else {
325                None
326            },
327            chunk_hash: result.chunk_hash.clone(),
328            index_epoch: result.index_epoch,
329        }
330    }
331}
332
333impl Default for SearchOptions {
334    fn default() -> Self {
335        Self {
336            mode: SearchMode::Regex,
337            query: String::new(),
338            path: PathBuf::from("."),
339            top_k: None,
340            threshold: None,
341            case_insensitive: false,
342            whole_word: false,
343            fixed_string: false,
344            line_numbers: false,
345            context_lines: 0,
346            before_context_lines: 0,
347            after_context_lines: 0,
348            recursive: true,
349            json_output: false,
350            jsonl_output: false,
351            no_snippet: false,
352            reindex: false,
353            show_scores: false,
354            show_filenames: false,
355            files_with_matches: false,
356            files_without_matches: false,
357            exclude_patterns: get_default_exclude_patterns(),
358            respect_gitignore: true,
359            full_section: false,
360            // Enhanced embedding options (search-time only)
361            rerank: false,
362            rerank_model: None,
363            embedding_model: None,
364        }
365    }
366}
367
368/// Get default exclusion patterns for directories that should be skipped during search.
369/// These are common cache, build, and system directories that rarely contain user code.
370pub fn get_default_exclude_patterns() -> Vec<String> {
371    vec![
372        // ck's own index directory
373        ".ck".to_string(),
374        // AI/ML model cache directories
375        ".fastembed_cache".to_string(),
376        ".cache".to_string(),
377        "__pycache__".to_string(),
378        // Version control
379        ".git".to_string(),
380        ".svn".to_string(),
381        ".hg".to_string(),
382        // Build directories
383        "target".to_string(),       // Rust
384        "build".to_string(),        // Various
385        "dist".to_string(),         // JavaScript/Python
386        "node_modules".to_string(), // JavaScript
387        ".gradle".to_string(),      // Java
388        ".mvn".to_string(),         // Maven
389        "bin".to_string(),          // Various
390        "obj".to_string(),          // .NET
391        // Python virtual environments
392        "venv".to_string(),
393        ".venv".to_string(),
394        "env".to_string(),
395        ".env".to_string(),
396        "virtualenv".to_string(),
397        // IDE/Editor directories
398        ".vscode".to_string(),
399        ".idea".to_string(),
400        ".eclipse".to_string(),
401        // Temporary directories
402        "tmp".to_string(),
403        "temp".to_string(),
404        ".tmp".to_string(),
405    ]
406}
407
408/// Get default .ckignore file content
409pub fn get_default_ckignore_content() -> &'static str {
410    r#"# .ckignore - Default patterns for ck semantic search
411# Created automatically during first index
412# Syntax: same as .gitignore (glob patterns, ! for negation)
413
414# Images
415*.png
416*.jpg
417*.jpeg
418*.gif
419*.bmp
420*.svg
421*.ico
422*.webp
423*.tiff
424
425# Video
426*.mp4
427*.avi
428*.mov
429*.mkv
430*.wmv
431*.flv
432*.webm
433
434# Audio
435*.mp3
436*.wav
437*.flac
438*.aac
439*.ogg
440*.m4a
441
442# Binary/Compiled
443*.exe
444*.dll
445*.so
446*.dylib
447*.a
448*.lib
449*.obj
450*.o
451
452# Archives
453*.zip
454*.tar
455*.tar.gz
456*.tgz
457*.rar
458*.7z
459*.bz2
460*.gz
461
462# Data files
463*.db
464*.sqlite
465*.sqlite3
466*.parquet
467*.arrow
468
469# Config formats (issue #27)
470*.json
471*.yaml
472*.yml
473
474# Add your custom patterns below this line
475"#
476}
477
478/// Read and parse .ckignore file, returning patterns
479pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
480    let ckignore_path = repo_root.join(".ckignore");
481
482    if !ckignore_path.exists() {
483        return Ok(Vec::new());
484    }
485
486    let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
487
488    let patterns: Vec<String> = content
489        .lines()
490        .map(|line| line.trim())
491        .filter(|line| !line.is_empty() && !line.starts_with('#'))
492        .map(|line| line.to_string())
493        .collect();
494
495    Ok(patterns)
496}
497
498/// Create .ckignore file with default content if it doesn't exist
499pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
500    let ckignore_path = repo_root.join(".ckignore");
501
502    if ckignore_path.exists() {
503        return Ok(false); // Already exists
504    }
505
506    std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
507
508    Ok(true) // Created new file
509}
510
511pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
512    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
513    let mut sidecar = repo_root.join(".ck");
514    sidecar.push(relative);
515    let ext = relative
516        .extension()
517        .map(|e| format!("{}.ck", e.to_string_lossy()))
518        .unwrap_or_else(|| "ck".to_string());
519    sidecar.set_extension(ext);
520    sidecar
521}
522
523pub fn compute_file_hash(path: &Path) -> Result<String> {
524    use std::io::Read;
525
526    let mut file = std::fs::File::open(path)?;
527    let mut hasher = blake3::Hasher::new();
528
529    // Stream the file in 64KB chunks to avoid loading entire file into memory
530    let mut buffer = [0u8; 65536]; // 64KB buffer
531    loop {
532        let bytes_read = file.read(&mut buffer)?;
533        if bytes_read == 0 {
534            break;
535        }
536        hasher.update(&buffer[..bytes_read]);
537    }
538
539    let hash = hasher.finalize();
540    Ok(hash.to_hex().to_string())
541}
542
543/// PDF-specific utilities
544pub mod pdf {
545    use std::path::{Path, PathBuf};
546
547    /// Check if a file is a PDF by extension (optimized to avoid allocations)
548    pub fn is_pdf_file(path: &Path) -> bool {
549        path.extension()
550            .and_then(|ext| ext.to_str())
551            .map(|ext| ext.eq_ignore_ascii_case("pdf")) // Avoids allocation vs to_lowercase()
552            .unwrap_or(false)
553    }
554
555    /// Get path for cached PDF content
556    pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
557        let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
558        let mut cache_path = repo_root.join(".ck").join("content");
559        cache_path.push(relative);
560
561        // Add .txt extension to the cached file
562        let ext = relative
563            .extension()
564            .map(|e| format!("{}.txt", e.to_string_lossy()))
565            .unwrap_or_else(|| "txt".to_string());
566        cache_path.set_extension(ext);
567
568        cache_path
569    }
570
571    #[cfg(test)]
572    mod tests {
573        use super::*;
574        use std::path::PathBuf;
575
576        #[test]
577        fn test_is_pdf_file() {
578            assert!(is_pdf_file(&PathBuf::from("test.pdf")));
579            assert!(is_pdf_file(&PathBuf::from("test.PDF"))); // Case insensitive
580            assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
581            assert!(!is_pdf_file(&PathBuf::from("test.txt")));
582            assert!(!is_pdf_file(&PathBuf::from("test"))); // No extension
583            assert!(!is_pdf_file(&PathBuf::from("pdf"))); // Just "pdf", no extension
584        }
585
586        #[test]
587        fn test_get_content_cache_path() {
588            let repo_root = PathBuf::from("/project");
589            let file_path = PathBuf::from("/project/docs/manual.pdf");
590
591            let cache_path = get_content_cache_path(&repo_root, &file_path);
592            assert_eq!(
593                cache_path,
594                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
595            );
596        }
597
598        #[test]
599        fn test_get_content_cache_path_no_extension() {
600            let repo_root = PathBuf::from("/project");
601            let file_path = PathBuf::from("/project/docs/manual");
602
603            let cache_path = get_content_cache_path(&repo_root, &file_path);
604            assert_eq!(
605                cache_path,
606                PathBuf::from("/project/.ck/content/docs/manual.txt")
607            );
608        }
609
610        #[test]
611        fn test_get_content_cache_path_relative() {
612            let repo_root = PathBuf::from("/project");
613            let file_path = PathBuf::from("docs/manual.pdf"); // Relative path
614
615            let cache_path = get_content_cache_path(&repo_root, &file_path);
616            assert_eq!(
617                cache_path,
618                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
619            );
620        }
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627    use std::fs;
628    use tempfile::TempDir;
629
630    #[test]
631    fn test_span_valid_creation() {
632        // Test valid span creation
633        let span = Span::new(0, 10, 1, 2).unwrap();
634        assert_eq!(span.byte_start, 0);
635        assert_eq!(span.byte_end, 10);
636        assert_eq!(span.line_start, 1);
637        assert_eq!(span.line_end, 2);
638        assert!(span.is_valid());
639    }
640
641    #[test]
642    fn test_span_validation_valid_cases() {
643        // Same byte positions (empty span)
644        let span = Span::new(10, 10, 1, 1).unwrap();
645        assert!(span.is_valid());
646        assert_eq!(span.byte_len(), 0);
647        assert_eq!(span.line_count(), 1);
648
649        // Multi-line span
650        let span = Span::new(0, 100, 1, 10).unwrap();
651        assert!(span.is_valid());
652        assert_eq!(span.byte_len(), 100);
653        assert_eq!(span.line_count(), 10);
654
655        // Single line span
656        let span = Span::new(5, 25, 3, 3).unwrap();
657        assert!(span.is_valid());
658        assert_eq!(span.byte_len(), 20);
659        assert_eq!(span.line_count(), 1);
660    }
661
662    #[test]
663    fn test_span_validation_invalid_byte_range() {
664        // Reversed byte range
665        let result = Span::new(10, 5, 1, 2);
666        assert!(result.is_err());
667        if let Err(CkError::SpanValidation(msg)) = result {
668            assert!(msg.contains("Invalid byte range"));
669            assert!(msg.contains("start (10) > end (5)"));
670        } else {
671            panic!("Expected SpanValidation error");
672        }
673    }
674
675    #[test]
676    fn test_span_validation_invalid_line_range() {
677        // Reversed line range
678        let result = Span::new(0, 10, 5, 2);
679        assert!(result.is_err());
680        if let Err(CkError::SpanValidation(msg)) = result {
681            assert!(msg.contains("Invalid line range"));
682            assert!(msg.contains("start (5) > end (2)"));
683        } else {
684            panic!("Expected SpanValidation error");
685        }
686    }
687
688    #[test]
689    fn test_span_validation_zero_line_numbers() {
690        // Zero line start
691        let result = Span::new(0, 10, 0, 2);
692        assert!(result.is_err());
693        if let Err(CkError::SpanValidation(msg)) = result {
694            assert!(msg.contains("Line start cannot be zero"));
695        } else {
696            panic!("Expected SpanValidation error");
697        }
698
699        // Zero line end
700        let result = Span::new(0, 10, 1, 0);
701        assert!(result.is_err());
702        if let Err(CkError::SpanValidation(msg)) = result {
703            assert!(msg.contains("Line end cannot be zero"));
704        } else {
705            panic!("Expected SpanValidation error");
706        }
707    }
708
709    #[test]
710    fn test_span_unchecked_creation() {
711        // Test backward compatibility with unchecked creation
712        let span = Span::new_unchecked(10, 5, 0, 1);
713        assert_eq!(span.byte_start, 10);
714        assert_eq!(span.byte_end, 5);
715        assert_eq!(span.line_start, 0);
716        assert_eq!(span.line_end, 1);
717        assert!(!span.is_valid()); // Should be invalid
718    }
719
720    #[test]
721    fn test_span_validation_methods() {
722        // Valid span
723        let valid_span = Span::new_unchecked(0, 10, 1, 2);
724        assert!(valid_span.validate().is_ok());
725        assert!(valid_span.is_valid());
726
727        // Invalid span (reversed bytes)
728        let invalid_span = Span::new_unchecked(10, 5, 1, 2);
729        assert!(invalid_span.validate().is_err());
730        assert!(!invalid_span.is_valid());
731
732        // Invalid span (zero lines)
733        let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
734        assert!(zero_line_span.validate().is_err());
735        assert!(!zero_line_span.is_valid());
736    }
737
738    #[test]
739    fn test_span_utility_methods() {
740        let span = Span::new(10, 25, 5, 8).unwrap();
741
742        // Test byte_len
743        assert_eq!(span.byte_len(), 15);
744
745        // Test line_count
746        assert_eq!(span.line_count(), 4); // lines 5, 6, 7, 8
747
748        // Test with single-line span
749        let single_line = Span::new(0, 5, 1, 1).unwrap();
750        assert_eq!(single_line.line_count(), 1);
751        assert_eq!(single_line.byte_len(), 5);
752
753        // Test with empty span
754        let empty = Span::new(10, 10, 3, 3).unwrap();
755        assert_eq!(empty.byte_len(), 0);
756        assert_eq!(empty.line_count(), 1);
757    }
758
759    #[test]
760    fn test_span_legacy_struct_literal_still_works() {
761        // Ensure backward compatibility for existing code using struct literals
762        let span = Span {
763            byte_start: 0,
764            byte_end: 10,
765            line_start: 1,
766            line_end: 2,
767        };
768
769        assert_eq!(span.byte_start, 0);
770        assert_eq!(span.byte_end, 10);
771        assert_eq!(span.line_start, 1);
772        assert_eq!(span.line_end, 2);
773        assert!(span.is_valid());
774    }
775
776    #[test]
777    fn test_search_options_default() {
778        let options = SearchOptions::default();
779        assert!(matches!(options.mode, SearchMode::Regex));
780        assert_eq!(options.query, "");
781        assert_eq!(options.path, PathBuf::from("."));
782        assert_eq!(options.top_k, None);
783        assert_eq!(options.threshold, None);
784        assert!(!options.case_insensitive);
785        assert!(!options.whole_word);
786        assert!(!options.fixed_string);
787        assert!(!options.line_numbers);
788        assert_eq!(options.context_lines, 0);
789        assert!(options.recursive);
790        assert!(!options.json_output);
791        assert!(!options.reindex);
792        assert!(!options.show_scores);
793        assert!(!options.show_filenames);
794    }
795
796    #[test]
797    fn test_file_metadata_serialization() {
798        let metadata = FileMetadata {
799            path: PathBuf::from("test.txt"),
800            hash: "abc123".to_string(),
801            last_modified: 1234567890,
802            size: 1024,
803        };
804
805        let json = serde_json::to_string(&metadata).unwrap();
806        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
807
808        assert_eq!(metadata.path, deserialized.path);
809        assert_eq!(metadata.hash, deserialized.hash);
810        assert_eq!(metadata.last_modified, deserialized.last_modified);
811        assert_eq!(metadata.size, deserialized.size);
812    }
813
814    #[test]
815    fn test_search_result_serialization() {
816        let result = SearchResult {
817            file: PathBuf::from("test.txt"),
818            span: Span {
819                byte_start: 0,
820                byte_end: 10,
821                line_start: 1,
822                line_end: 1,
823            },
824            score: 0.95,
825            preview: "hello world".to_string(),
826            lang: Some(Language::Rust),
827            symbol: Some("main".to_string()),
828            chunk_hash: Some("abc123".to_string()),
829            index_epoch: Some(1699123456),
830        };
831
832        let json = serde_json::to_string(&result).unwrap();
833        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
834
835        assert_eq!(result.file, deserialized.file);
836        assert_eq!(result.score, deserialized.score);
837        assert_eq!(result.preview, deserialized.preview);
838        assert_eq!(result.lang, deserialized.lang);
839        assert_eq!(result.symbol, deserialized.symbol);
840        assert_eq!(result.chunk_hash, deserialized.chunk_hash);
841        assert_eq!(result.index_epoch, deserialized.index_epoch);
842    }
843
844    #[test]
845    fn test_jsonl_search_result_conversion() {
846        let result = SearchResult {
847            file: PathBuf::from("src/auth.rs"),
848            span: Span {
849                byte_start: 1203,
850                byte_end: 1456,
851                line_start: 42,
852                line_end: 58,
853            },
854            score: 0.89,
855            preview: "function authenticate(user) {...}".to_string(),
856            lang: Some(Language::Rust),
857            symbol: Some("authenticate".to_string()),
858            chunk_hash: Some("abc123def456".to_string()),
859            index_epoch: Some(1699123456),
860        };
861
862        // Test with snippet
863        let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
864        assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
865        assert_eq!(jsonl_with_snippet.span.line_start, 42);
866        assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
867        assert_eq!(
868            jsonl_with_snippet.snippet,
869            Some("function authenticate(user) {...}".to_string())
870        );
871        assert_eq!(jsonl_with_snippet.score, Some(0.89));
872        assert_eq!(
873            jsonl_with_snippet.chunk_hash,
874            Some("abc123def456".to_string())
875        );
876        assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
877
878        // Test without snippet
879        let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
880        assert_eq!(jsonl_no_snippet.snippet, None);
881        assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
882    }
883
884    #[test]
885    fn test_get_sidecar_path() {
886        let repo_root = PathBuf::from("/home/user/project");
887        let file_path = PathBuf::from("/home/user/project/src/main.rs");
888
889        let sidecar = get_sidecar_path(&repo_root, &file_path);
890        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
891
892        assert_eq!(sidecar, expected);
893    }
894
895    #[test]
896    fn test_get_sidecar_path_no_extension() {
897        let repo_root = PathBuf::from("/project");
898        let file_path = PathBuf::from("/project/README");
899
900        let sidecar = get_sidecar_path(&repo_root, &file_path);
901        let expected = PathBuf::from("/project/.ck/README.ck");
902
903        assert_eq!(sidecar, expected);
904    }
905
906    #[test]
907    fn test_compute_file_hash() {
908        let temp_dir = TempDir::new().unwrap();
909        let file_path = temp_dir.path().join("test.txt");
910
911        fs::write(&file_path, "hello world").unwrap();
912
913        let hash1 = compute_file_hash(&file_path).unwrap();
914        let hash2 = compute_file_hash(&file_path).unwrap();
915
916        // Same content should produce same hash
917        assert_eq!(hash1, hash2);
918        assert!(!hash1.is_empty());
919
920        // Different content should produce different hash
921        fs::write(&file_path, "hello rust").unwrap();
922        let hash3 = compute_file_hash(&file_path).unwrap();
923        assert_ne!(hash1, hash3);
924    }
925
926    #[test]
927    fn test_compute_file_hash_nonexistent() {
928        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
929        assert!(result.is_err());
930    }
931
932    #[test]
933    fn test_compute_file_hash_large_file() {
934        let temp_dir = TempDir::new().unwrap();
935        let file_path = temp_dir.path().join("large_test.txt");
936
937        // Create a file larger than the buffer size (64KB) to test streaming
938        let large_content = "a".repeat(100_000); // 100KB content
939        fs::write(&file_path, &large_content).unwrap();
940
941        let hash1 = compute_file_hash(&file_path).unwrap();
942        let hash2 = compute_file_hash(&file_path).unwrap();
943
944        // Streaming hash should be consistent
945        assert_eq!(hash1, hash2);
946        assert!(!hash1.is_empty());
947
948        // Verify it's different from smaller content
949        fs::write(&file_path, "small content").unwrap();
950        let hash3 = compute_file_hash(&file_path).unwrap();
951        assert_ne!(hash1, hash3);
952    }
953
954    #[test]
955    fn test_json_search_result_serialization() {
956        let signals = SearchSignals {
957            lex_rank: Some(1),
958            vec_rank: Some(2),
959            rrf_score: 0.85,
960        };
961
962        let result = JsonSearchResult {
963            file: "test.txt".to_string(),
964            span: Span {
965                byte_start: 0,
966                byte_end: 5,
967                line_start: 1,
968                line_end: 1,
969            },
970            lang: None, // txt is not a supported language
971            symbol: None,
972            score: 0.95,
973            signals,
974            preview: "hello".to_string(),
975            model: "bge-small".to_string(),
976        };
977
978        let json = serde_json::to_string(&result).unwrap();
979        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
980
981        assert_eq!(result.file, deserialized.file);
982        assert_eq!(result.score, deserialized.score);
983        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
984        assert_eq!(result.model, deserialized.model);
985    }
986
987    #[test]
988    fn test_language_from_extension() {
989        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
990        assert_eq!(Language::from_extension("py"), Some(Language::Python));
991        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
992        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
993        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
994        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
995        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
996        assert_eq!(Language::from_extension("go"), Some(Language::Go));
997        assert_eq!(Language::from_extension("java"), Some(Language::Java));
998        assert_eq!(Language::from_extension("c"), Some(Language::C));
999        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1000        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1001        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1002        assert_eq!(Language::from_extension("php"), Some(Language::Php));
1003        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1004        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1005        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1006        assert_eq!(Language::from_extension("unknown"), None);
1007    }
1008
1009    #[test]
1010    fn test_language_from_extension_case_insensitive() {
1011        // Test uppercase extensions - only for actually supported languages
1012        assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1013        assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1014        assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1015        assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1016        assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1017        assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1018        assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1019        assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1020        assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1021        assert_eq!(Language::from_extension("C"), Some(Language::C));
1022        assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1023        assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1024        assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1025        assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1026        assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1027        assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1028        assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1029        assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1030        assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1031        assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1032        assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1033        assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1034
1035        // Test mixed case extensions
1036        assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1037        assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1038        assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1039        assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1040        assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1041        assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1042        assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1043        assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1044        assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1045        assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1046        assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1047        assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1048        assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1049        assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1050
1051        // Unknown extensions should still return None
1052        assert_eq!(Language::from_extension("UNKNOWN"), None);
1053        assert_eq!(Language::from_extension("Unknown"), None);
1054    }
1055
1056    #[test]
1057    fn test_language_from_path() {
1058        assert_eq!(
1059            Language::from_path(&PathBuf::from("test.rs")),
1060            Some(Language::Rust)
1061        );
1062        assert_eq!(
1063            Language::from_path(&PathBuf::from("test.py")),
1064            Some(Language::Python)
1065        );
1066        assert_eq!(
1067            Language::from_path(&PathBuf::from("test.js")),
1068            Some(Language::JavaScript)
1069        );
1070        assert_eq!(
1071            Language::from_path(&PathBuf::from("test.hs")),
1072            Some(Language::Haskell)
1073        );
1074        assert_eq!(
1075            Language::from_path(&PathBuf::from("test.lhs")),
1076            Some(Language::Haskell)
1077        );
1078        assert_eq!(
1079            Language::from_path(&PathBuf::from("test.go")),
1080            Some(Language::Go)
1081        );
1082        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
1083        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
1084    }
1085
1086    #[test]
1087    fn test_language_from_path_case_insensitive() {
1088        // Test uppercase extensions in file paths - only supported languages
1089        assert_eq!(
1090            Language::from_path(&PathBuf::from("MAIN.RS")),
1091            Some(Language::Rust)
1092        );
1093        assert_eq!(
1094            Language::from_path(&PathBuf::from("app.PY")),
1095            Some(Language::Python)
1096        );
1097        assert_eq!(
1098            Language::from_path(&PathBuf::from("script.JS")),
1099            Some(Language::JavaScript)
1100        );
1101        assert_eq!(
1102            Language::from_path(&PathBuf::from("types.TS")),
1103            Some(Language::TypeScript)
1104        );
1105        assert_eq!(
1106            Language::from_path(&PathBuf::from("Component.TSX")),
1107            Some(Language::TypeScript)
1108        );
1109        assert_eq!(
1110            Language::from_path(&PathBuf::from("module.HS")),
1111            Some(Language::Haskell)
1112        );
1113        assert_eq!(
1114            Language::from_path(&PathBuf::from("server.GO")),
1115            Some(Language::Go)
1116        );
1117        assert_eq!(
1118            Language::from_path(&PathBuf::from("App.JAVA")),
1119            Some(Language::Java)
1120        );
1121        assert_eq!(
1122            Language::from_path(&PathBuf::from("main.C")),
1123            Some(Language::C)
1124        );
1125        assert_eq!(
1126            Language::from_path(&PathBuf::from("utils.CPP")),
1127            Some(Language::Cpp)
1128        );
1129        assert_eq!(
1130            Language::from_path(&PathBuf::from("Program.CS")),
1131            Some(Language::CSharp)
1132        );
1133        assert_eq!(
1134            Language::from_path(&PathBuf::from("script.RB")),
1135            Some(Language::Ruby)
1136        );
1137        assert_eq!(
1138            Language::from_path(&PathBuf::from("index.PHP")),
1139            Some(Language::Php)
1140        );
1141        assert_eq!(
1142            Language::from_path(&PathBuf::from("App.SWIFT")),
1143            Some(Language::Swift)
1144        );
1145        assert_eq!(
1146            Language::from_path(&PathBuf::from("Main.KT")),
1147            Some(Language::Kotlin)
1148        );
1149        assert_eq!(
1150            Language::from_path(&PathBuf::from("document.PDF")),
1151            Some(Language::Pdf)
1152        );
1153
1154        // Test mixed case extensions in file paths
1155        assert_eq!(
1156            Language::from_path(&PathBuf::from("config.Rs")),
1157            Some(Language::Rust)
1158        );
1159        assert_eq!(
1160            Language::from_path(&PathBuf::from("helper.Py")),
1161            Some(Language::Python)
1162        );
1163        assert_eq!(
1164            Language::from_path(&PathBuf::from("utils.Js")),
1165            Some(Language::JavaScript)
1166        );
1167        assert_eq!(
1168            Language::from_path(&PathBuf::from("interfaces.Ts")),
1169            Some(Language::TypeScript)
1170        );
1171        assert_eq!(
1172            Language::from_path(&PathBuf::from("Component.TsX")),
1173            Some(Language::TypeScript)
1174        );
1175        assert_eq!(
1176            Language::from_path(&PathBuf::from("main.Cpp")),
1177            Some(Language::Cpp)
1178        );
1179        assert_eq!(
1180            Language::from_path(&PathBuf::from("report.Pdf")),
1181            Some(Language::Pdf)
1182        );
1183
1184        // Unknown extensions should still return None regardless of case
1185        assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1186        assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1187    }
1188
1189    #[test]
1190    fn test_language_display() {
1191        assert_eq!(Language::Rust.to_string(), "rust");
1192        assert_eq!(Language::Python.to_string(), "python");
1193        assert_eq!(Language::JavaScript.to_string(), "javascript");
1194        assert_eq!(Language::TypeScript.to_string(), "typescript");
1195        assert_eq!(Language::Go.to_string(), "go");
1196        assert_eq!(Language::Java.to_string(), "java");
1197    }
1198
1199    #[test]
1200    fn test_create_ckignore_if_missing() {
1201        let temp_dir = TempDir::new().unwrap();
1202        let test_path = temp_dir.path();
1203
1204        // First creation should succeed
1205        let created = create_ckignore_if_missing(test_path).unwrap();
1206        assert!(created);
1207
1208        // Check that file exists
1209        let ckignore_path = test_path.join(".ckignore");
1210        assert!(ckignore_path.exists());
1211
1212        // Check content contains expected patterns
1213        let content = fs::read_to_string(&ckignore_path).unwrap();
1214        assert!(content.contains("*.png"));
1215        assert!(content.contains("*.json"));
1216        assert!(content.contains("*.yaml"));
1217        assert!(content.contains("# Images"));
1218        assert!(content.contains("# Config formats"));
1219
1220        // Second creation should return false (already exists)
1221        let created_again = create_ckignore_if_missing(test_path).unwrap();
1222        assert!(!created_again);
1223    }
1224
1225    #[test]
1226    fn test_read_ckignore_patterns() {
1227        let temp_dir = TempDir::new().unwrap();
1228        let test_path = temp_dir.path();
1229
1230        // Test with no .ckignore file
1231        let patterns = read_ckignore_patterns(test_path).unwrap();
1232        assert_eq!(patterns.len(), 0);
1233
1234        // Create a .ckignore file
1235        let ckignore_path = test_path.join(".ckignore");
1236        fs::write(
1237            &ckignore_path,
1238            r#"# Comment line
1239*.png
1240*.jpg
1241
1242# Another comment
1243*.json
1244*.yaml
1245"#,
1246        )
1247        .unwrap();
1248
1249        // Read patterns
1250        let patterns = read_ckignore_patterns(test_path).unwrap();
1251        assert_eq!(patterns.len(), 4);
1252        assert!(patterns.contains(&"*.png".to_string()));
1253        assert!(patterns.contains(&"*.jpg".to_string()));
1254        assert!(patterns.contains(&"*.json".to_string()));
1255        assert!(patterns.contains(&"*.yaml".to_string()));
1256        // Comments should be filtered out
1257        assert!(!patterns.iter().any(|p| p.starts_with('#')));
1258    }
1259
1260    #[test]
1261    fn test_read_ckignore_patterns_with_empty_lines() {
1262        let temp_dir = TempDir::new().unwrap();
1263        let test_path = temp_dir.path();
1264
1265        let ckignore_path = test_path.join(".ckignore");
1266        fs::write(
1267            &ckignore_path,
1268            r#"
1269*.png
1270
1271*.jpg
1272
1273
1274*.json
1275"#,
1276        )
1277        .unwrap();
1278
1279        let patterns = read_ckignore_patterns(test_path).unwrap();
1280        assert_eq!(patterns.len(), 3);
1281        assert!(patterns.contains(&"*.png".to_string()));
1282        assert!(patterns.contains(&"*.jpg".to_string()));
1283        assert!(patterns.contains(&"*.json".to_string()));
1284    }
1285
1286    #[test]
1287    fn test_get_default_ckignore_content() {
1288        let content = get_default_ckignore_content();
1289
1290        // Check that default content includes key patterns
1291        assert!(content.contains("*.png"));
1292        assert!(content.contains("*.jpg"));
1293        assert!(content.contains("*.mp4"));
1294        assert!(content.contains("*.mp3"));
1295        assert!(content.contains("*.exe"));
1296        assert!(content.contains("*.zip"));
1297        assert!(content.contains("*.db"));
1298        assert!(content.contains("*.json"));
1299        assert!(content.contains("*.yaml"));
1300
1301        // Check that it has comments
1302        assert!(content.contains("# Images"));
1303        assert!(content.contains("# Video"));
1304        assert!(content.contains("# Audio"));
1305        assert!(content.contains("# Config formats"));
1306
1307        // Check for issue reference
1308        assert!(content.contains("issue #27"));
1309    }
1310}