ck_core/
lib.rs

1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9    #[error("IO error: {0}")]
10    Io(#[from] std::io::Error),
11
12    #[error("Regex error: {0}")]
13    Regex(#[from] regex::Error),
14
15    #[error("Serialization error: {0}")]
16    Serialization(#[from] bincode::Error),
17
18    #[error("JSON error: {0}")]
19    Json(#[from] serde_json::Error),
20
21    #[error("Index error: {0}")]
22    Index(String),
23
24    #[error("Search error: {0}")]
25    Search(String),
26
27    #[error("Embedding error: {0}")]
28    Embedding(String),
29
30    #[error("Span validation error: {0}")]
31    SpanValidation(String),
32
33    #[error("Other error: {0}")]
34    Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41    Rust,
42    Python,
43    JavaScript,
44    TypeScript,
45    Haskell,
46    Go,
47    Java,
48    C,
49    Cpp,
50    CSharp,
51    Ruby,
52    Php,
53    Swift,
54    Kotlin,
55    Zig,
56    Pdf,
57}
58
59impl Language {
60    pub fn from_extension(ext: &str) -> Option<Self> {
61        // Convert to lowercase for case-insensitive matching
62        match ext.to_lowercase().as_str() {
63            "rs" => Some(Language::Rust),
64            "py" => Some(Language::Python),
65            "js" => Some(Language::JavaScript),
66            "ts" | "tsx" => Some(Language::TypeScript),
67            "hs" | "lhs" => Some(Language::Haskell),
68            "go" => Some(Language::Go),
69            "java" => Some(Language::Java),
70            "c" => Some(Language::C),
71            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
72            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
73            "cs" => Some(Language::CSharp),
74            "rb" => Some(Language::Ruby),
75            "php" => Some(Language::Php),
76            "swift" => Some(Language::Swift),
77            "kt" | "kts" => Some(Language::Kotlin),
78            "zig" => Some(Language::Zig),
79            "pdf" => Some(Language::Pdf),
80            _ => None,
81        }
82    }
83
84    pub fn from_path(path: &Path) -> Option<Self> {
85        path.extension()
86            .and_then(|ext| ext.to_str())
87            .and_then(Self::from_extension)
88    }
89}
90
91impl std::fmt::Display for Language {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        let name = match self {
94            Language::Rust => "rust",
95            Language::Python => "python",
96            Language::JavaScript => "javascript",
97            Language::TypeScript => "typescript",
98            Language::Haskell => "haskell",
99            Language::Go => "go",
100            Language::Java => "java",
101            Language::C => "c",
102            Language::Cpp => "cpp",
103            Language::CSharp => "csharp",
104            Language::Ruby => "ruby",
105            Language::Php => "php",
106            Language::Swift => "swift",
107            Language::Kotlin => "kotlin",
108            Language::Zig => "zig",
109            Language::Pdf => "pdf",
110        };
111        write!(f, "{}", name)
112    }
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Span {
117    pub byte_start: usize,
118    pub byte_end: usize,
119    pub line_start: usize,
120    pub line_end: usize,
121}
122
123impl Span {
124    /// Create a new Span with validation
125    pub fn new(
126        byte_start: usize,
127        byte_end: usize,
128        line_start: usize,
129        line_end: usize,
130    ) -> Result<Self> {
131        let span = Self {
132            byte_start,
133            byte_end,
134            line_start,
135            line_end,
136        };
137        span.validate()?;
138        Ok(span)
139    }
140
141    /// Create a new Span without validation (for backward compatibility)
142    ///
143    /// # Safety
144    ///
145    /// The caller must ensure the span is valid. Use `new()` for validated construction.
146    pub fn new_unchecked(
147        byte_start: usize,
148        byte_end: usize,
149        line_start: usize,
150        line_end: usize,
151    ) -> Self {
152        Self {
153            byte_start,
154            byte_end,
155            line_start,
156            line_end,
157        }
158    }
159
160    /// Validate span invariants
161    pub fn validate(&self) -> Result<()> {
162        // Check for zero line numbers first (lines should be 1-indexed)
163        if self.line_start == 0 {
164            return Err(CkError::SpanValidation(
165                "Line start cannot be zero (lines are 1-indexed)".to_string(),
166            ));
167        }
168
169        if self.line_end == 0 {
170            return Err(CkError::SpanValidation(
171                "Line end cannot be zero (lines are 1-indexed)".to_string(),
172            ));
173        }
174
175        // Check byte range validity
176        if self.byte_start > self.byte_end {
177            return Err(CkError::SpanValidation(format!(
178                "Invalid byte range: start ({}) > end ({})",
179                self.byte_start, self.byte_end
180            )));
181        }
182
183        // Check line range validity
184        if self.line_start > self.line_end {
185            return Err(CkError::SpanValidation(format!(
186                "Invalid line range: start ({}) > end ({})",
187                self.line_start, self.line_end
188            )));
189        }
190
191        Ok(())
192    }
193
194    /// Check if this span is valid
195    pub fn is_valid(&self) -> bool {
196        self.validate().is_ok()
197    }
198
199    /// Get byte length of the span
200    pub fn byte_len(&self) -> usize {
201        self.byte_end.saturating_sub(self.byte_start)
202    }
203
204    /// Get line count of the span
205    pub fn line_count(&self) -> usize {
206        self.line_end.saturating_sub(self.line_start) + 1
207    }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct FileMetadata {
212    pub path: PathBuf,
213    pub hash: String,
214    pub last_modified: u64,
215    pub size: u64,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SearchResult {
220    pub file: PathBuf,
221    pub span: Span,
222    pub score: f32,
223    pub preview: String,
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub lang: Option<Language>,
226    #[serde(skip_serializing_if = "Option::is_none")]
227    pub symbol: Option<String>,
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub chunk_hash: Option<String>,
230    #[serde(skip_serializing_if = "Option::is_none")]
231    pub index_epoch: Option<u64>,
232}
233
234/// Enhanced search results that include near-miss information for threshold queries
235#[derive(Debug, Clone)]
236pub struct SearchResults {
237    pub matches: Vec<SearchResult>,
238    /// The highest scoring result below the threshold (if any)
239    pub closest_below_threshold: Option<SearchResult>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct JsonSearchResult {
244    pub file: String,
245    pub span: Span,
246    pub lang: Option<Language>,
247    pub symbol: Option<String>,
248    pub score: f32,
249    pub signals: SearchSignals,
250    pub preview: String,
251    pub model: String,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct JsonlSearchResult {
256    pub path: String,
257    pub span: Span,
258    pub language: Option<String>,
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub snippet: Option<String>,
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub score: Option<f32>,
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub chunk_hash: Option<String>,
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub index_epoch: Option<u64>,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct SearchSignals {
271    pub lex_rank: Option<usize>,
272    pub vec_rank: Option<usize>,
273    pub rrf_score: f32,
274}
275
276#[derive(Debug, Clone, PartialEq)]
277pub enum SearchMode {
278    Regex,
279    Lexical,
280    Semantic,
281    Hybrid,
282}
283
284#[derive(Debug, Clone)]
285pub struct IncludePattern {
286    pub path: PathBuf,
287    pub is_dir: bool,
288}
289
290#[derive(Debug, Clone)]
291pub struct SearchOptions {
292    pub mode: SearchMode,
293    pub query: String,
294    pub path: PathBuf,
295    pub top_k: Option<usize>,
296    pub threshold: Option<f32>,
297    pub case_insensitive: bool,
298    pub whole_word: bool,
299    pub fixed_string: bool,
300    pub line_numbers: bool,
301    pub context_lines: usize,
302    pub before_context_lines: usize,
303    pub after_context_lines: usize,
304    pub recursive: bool,
305    pub json_output: bool,
306    pub jsonl_output: bool,
307    pub no_snippet: bool,
308    pub reindex: bool,
309    pub show_scores: bool,
310    pub show_filenames: bool,
311    pub files_with_matches: bool,
312    pub files_without_matches: bool,
313    pub exclude_patterns: Vec<String>,
314    pub include_patterns: Vec<IncludePattern>,
315    pub respect_gitignore: bool,
316    pub full_section: bool,
317    // Enhanced embedding options (search-time only)
318    pub rerank: bool,
319    pub rerank_model: Option<String>,
320    pub embedding_model: Option<String>,
321}
322
323impl JsonlSearchResult {
324    pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
325        Self {
326            path: result.file.to_string_lossy().to_string(),
327            span: result.span.clone(),
328            language: result.lang.as_ref().map(|l| l.to_string()),
329            snippet: if include_snippet {
330                Some(result.preview.clone())
331            } else {
332                None
333            },
334            score: if result.score >= 0.0 {
335                Some(result.score)
336            } else {
337                None
338            },
339            chunk_hash: result.chunk_hash.clone(),
340            index_epoch: result.index_epoch,
341        }
342    }
343}
344
345impl Default for SearchOptions {
346    fn default() -> Self {
347        Self {
348            mode: SearchMode::Regex,
349            query: String::new(),
350            path: PathBuf::from("."),
351            top_k: None,
352            threshold: None,
353            case_insensitive: false,
354            whole_word: false,
355            fixed_string: false,
356            line_numbers: false,
357            context_lines: 0,
358            before_context_lines: 0,
359            after_context_lines: 0,
360            recursive: true,
361            json_output: false,
362            jsonl_output: false,
363            no_snippet: false,
364            reindex: false,
365            show_scores: false,
366            show_filenames: false,
367            files_with_matches: false,
368            files_without_matches: false,
369            exclude_patterns: get_default_exclude_patterns(),
370            include_patterns: Vec::new(),
371            respect_gitignore: true,
372            full_section: false,
373            // Enhanced embedding options (search-time only)
374            rerank: false,
375            rerank_model: None,
376            embedding_model: None,
377        }
378    }
379}
380
381/// Get default exclusion patterns for directories that should be skipped during search.
382/// These are common cache, build, and system directories that rarely contain user code.
383pub fn get_default_exclude_patterns() -> Vec<String> {
384    vec![
385        // ck's own index directory
386        ".ck".to_string(),
387        // AI/ML model cache directories
388        ".fastembed_cache".to_string(),
389        ".cache".to_string(),
390        "__pycache__".to_string(),
391        // Version control
392        ".git".to_string(),
393        ".svn".to_string(),
394        ".hg".to_string(),
395        // Build directories
396        "target".to_string(),       // Rust
397        "build".to_string(),        // Various
398        "dist".to_string(),         // JavaScript/Python
399        "node_modules".to_string(), // JavaScript
400        ".gradle".to_string(),      // Java
401        ".mvn".to_string(),         // Maven
402        "bin".to_string(),          // Various
403        "obj".to_string(),          // .NET
404        // Python virtual environments
405        "venv".to_string(),
406        ".venv".to_string(),
407        "env".to_string(),
408        ".env".to_string(),
409        "virtualenv".to_string(),
410        // IDE/Editor directories
411        ".vscode".to_string(),
412        ".idea".to_string(),
413        ".eclipse".to_string(),
414        // Temporary directories
415        "tmp".to_string(),
416        "temp".to_string(),
417        ".tmp".to_string(),
418    ]
419}
420
421/// Get default .ckignore file content
422pub fn get_default_ckignore_content() -> &'static str {
423    r#"# .ckignore - Default patterns for ck semantic search
424# Created automatically during first index
425# Syntax: same as .gitignore (glob patterns, ! for negation)
426
427# Images
428*.png
429*.jpg
430*.jpeg
431*.gif
432*.bmp
433*.svg
434*.ico
435*.webp
436*.tiff
437
438# Video
439*.mp4
440*.avi
441*.mov
442*.mkv
443*.wmv
444*.flv
445*.webm
446
447# Audio
448*.mp3
449*.wav
450*.flac
451*.aac
452*.ogg
453*.m4a
454
455# Binary/Compiled
456*.exe
457*.dll
458*.so
459*.dylib
460*.a
461*.lib
462*.obj
463*.o
464
465# Archives
466*.zip
467*.tar
468*.tar.gz
469*.tgz
470*.rar
471*.7z
472*.bz2
473*.gz
474
475# Data files
476*.db
477*.sqlite
478*.sqlite3
479*.parquet
480*.arrow
481
482# Config formats (issue #27)
483*.json
484*.yaml
485*.yml
486
487# Add your custom patterns below this line
488"#
489}
490
491/// Read and parse .ckignore file, returning patterns
492pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
493    let ckignore_path = repo_root.join(".ckignore");
494
495    if !ckignore_path.exists() {
496        return Ok(Vec::new());
497    }
498
499    let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
500
501    let patterns: Vec<String> = content
502        .lines()
503        .map(|line| line.trim())
504        .filter(|line| !line.is_empty() && !line.starts_with('#'))
505        .map(|line| line.to_string())
506        .collect();
507
508    Ok(patterns)
509}
510
511/// Create .ckignore file with default content if it doesn't exist
512pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
513    let ckignore_path = repo_root.join(".ckignore");
514
515    if ckignore_path.exists() {
516        return Ok(false); // Already exists
517    }
518
519    std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
520
521    Ok(true) // Created new file
522}
523
524/// Build exclusion patterns with proper priority ordering
525///
526/// This centralizes the pattern building logic used across CLI, TUI, and MCP interfaces
527/// to prevent drift and ensure consistent behavior.
528///
529/// Priority order (highest to lowest):
530/// 1. .ckignore patterns (if use_ckignore is true)
531/// 2. Additional excludes (from command-line or API calls)
532/// 3. Default patterns (if use_defaults is true)
533///
534/// # Arguments
535/// * `repo_root` - Optional repository root for loading .ckignore file
536/// * `additional_excludes` - Additional exclusion patterns (e.g., from CLI flags)
537/// * `use_ckignore` - Whether to load and include .ckignore patterns
538/// * `use_defaults` - Whether to include default exclusion patterns
539///
540/// # Returns
541/// Combined list of exclusion patterns in priority order
542pub fn build_exclude_patterns(
543    repo_root: Option<&Path>,
544    additional_excludes: &[String],
545    use_ckignore: bool,
546    use_defaults: bool,
547) -> Vec<String> {
548    let mut patterns = Vec::new();
549
550    // 1. Load .ckignore patterns (highest priority among additional patterns)
551    if use_ckignore
552        && let Some(root) = repo_root
553        && let Ok(ckignore_patterns) = read_ckignore_patterns(root)
554        && !ckignore_patterns.is_empty()
555    {
556        patterns.extend(ckignore_patterns);
557    }
558
559    // 2. Add additional exclude patterns (e.g., from command-line)
560    patterns.extend(additional_excludes.iter().cloned());
561
562    // 3. Add defaults (lowest priority)
563    if use_defaults {
564        patterns.extend(get_default_exclude_patterns());
565    }
566
567    patterns
568}
569
570pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
571    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
572    let mut sidecar = repo_root.join(".ck");
573    sidecar.push(relative);
574    let ext = relative
575        .extension()
576        .map(|e| format!("{}.ck", e.to_string_lossy()))
577        .unwrap_or_else(|| "ck".to_string());
578    sidecar.set_extension(ext);
579    sidecar
580}
581
582pub fn compute_file_hash(path: &Path) -> Result<String> {
583    use std::io::Read;
584
585    let mut file = std::fs::File::open(path)?;
586    let mut hasher = blake3::Hasher::new();
587
588    // Stream the file in 64KB chunks to avoid loading entire file into memory
589    let mut buffer = [0u8; 65536]; // 64KB buffer
590    loop {
591        let bytes_read = file.read(&mut buffer)?;
592        if bytes_read == 0 {
593            break;
594        }
595        hasher.update(&buffer[..bytes_read]);
596    }
597
598    let hash = hasher.finalize();
599    Ok(hash.to_hex().to_string())
600}
601
602/// Compute blake3 hash of chunk content for incremental indexing
603/// This enables us to detect which chunks have changed and only re-embed those
604///
605/// Hashes all fields that affect the chunk's display and meaning:
606/// - text: the main chunk content
607/// - leading_trivia: doc comments and comments before the chunk
608/// - trailing_trivia: comments after the chunk
609pub fn compute_chunk_hash(
610    text: &str,
611    leading_trivia: &[String],
612    trailing_trivia: &[String],
613) -> String {
614    let mut hasher = blake3::Hasher::new();
615
616    // Hash the main text
617    hasher.update(text.as_bytes());
618
619    // Hash leading trivia (doc comments, preceding comments)
620    for trivia in leading_trivia {
621        hasher.update(trivia.as_bytes());
622    }
623
624    // Hash trailing trivia (following comments)
625    for trivia in trailing_trivia {
626        hasher.update(trivia.as_bytes());
627    }
628
629    hasher.finalize().to_hex().to_string()
630}
631
632/// PDF-specific utilities
633pub mod pdf {
634    use std::path::{Path, PathBuf};
635
636    /// Check if a file is a PDF by extension (optimized to avoid allocations)
637    pub fn is_pdf_file(path: &Path) -> bool {
638        path.extension()
639            .and_then(|ext| ext.to_str())
640            .map(|ext| ext.eq_ignore_ascii_case("pdf")) // Avoids allocation vs to_lowercase()
641            .unwrap_or(false)
642    }
643
644    /// Get path for cached PDF content
645    pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
646        let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
647        let mut cache_path = repo_root.join(".ck").join("content");
648        cache_path.push(relative);
649
650        // Add .txt extension to the cached file
651        let ext = relative
652            .extension()
653            .map(|e| format!("{}.txt", e.to_string_lossy()))
654            .unwrap_or_else(|| "txt".to_string());
655        cache_path.set_extension(ext);
656
657        cache_path
658    }
659
660    #[cfg(test)]
661    mod tests {
662        use super::*;
663        use std::path::PathBuf;
664
665        #[test]
666        fn test_is_pdf_file() {
667            assert!(is_pdf_file(&PathBuf::from("test.pdf")));
668            assert!(is_pdf_file(&PathBuf::from("test.PDF"))); // Case insensitive
669            assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
670            assert!(!is_pdf_file(&PathBuf::from("test.txt")));
671            assert!(!is_pdf_file(&PathBuf::from("test"))); // No extension
672            assert!(!is_pdf_file(&PathBuf::from("pdf"))); // Just "pdf", no extension
673        }
674
675        #[test]
676        fn test_get_content_cache_path() {
677            let repo_root = PathBuf::from("/project");
678            let file_path = PathBuf::from("/project/docs/manual.pdf");
679
680            let cache_path = get_content_cache_path(&repo_root, &file_path);
681            assert_eq!(
682                cache_path,
683                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
684            );
685        }
686
687        #[test]
688        fn test_get_content_cache_path_no_extension() {
689            let repo_root = PathBuf::from("/project");
690            let file_path = PathBuf::from("/project/docs/manual");
691
692            let cache_path = get_content_cache_path(&repo_root, &file_path);
693            assert_eq!(
694                cache_path,
695                PathBuf::from("/project/.ck/content/docs/manual.txt")
696            );
697        }
698
699        #[test]
700        fn test_get_content_cache_path_relative() {
701            let repo_root = PathBuf::from("/project");
702            let file_path = PathBuf::from("docs/manual.pdf"); // Relative path
703
704            let cache_path = get_content_cache_path(&repo_root, &file_path);
705            assert_eq!(
706                cache_path,
707                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
708            );
709        }
710    }
711}
712
713#[cfg(test)]
714mod tests {
715    use super::*;
716    use std::fs;
717    use tempfile::TempDir;
718
719    #[test]
720    fn test_span_valid_creation() {
721        // Test valid span creation
722        let span = Span::new(0, 10, 1, 2).unwrap();
723        assert_eq!(span.byte_start, 0);
724        assert_eq!(span.byte_end, 10);
725        assert_eq!(span.line_start, 1);
726        assert_eq!(span.line_end, 2);
727        assert!(span.is_valid());
728    }
729
730    #[test]
731    fn test_span_validation_valid_cases() {
732        // Same byte positions (empty span)
733        let span = Span::new(10, 10, 1, 1).unwrap();
734        assert!(span.is_valid());
735        assert_eq!(span.byte_len(), 0);
736        assert_eq!(span.line_count(), 1);
737
738        // Multi-line span
739        let span = Span::new(0, 100, 1, 10).unwrap();
740        assert!(span.is_valid());
741        assert_eq!(span.byte_len(), 100);
742        assert_eq!(span.line_count(), 10);
743
744        // Single line span
745        let span = Span::new(5, 25, 3, 3).unwrap();
746        assert!(span.is_valid());
747        assert_eq!(span.byte_len(), 20);
748        assert_eq!(span.line_count(), 1);
749    }
750
751    #[test]
752    fn test_span_validation_invalid_byte_range() {
753        // Reversed byte range
754        let result = Span::new(10, 5, 1, 2);
755        assert!(result.is_err());
756        if let Err(CkError::SpanValidation(msg)) = result {
757            assert!(msg.contains("Invalid byte range"));
758            assert!(msg.contains("start (10) > end (5)"));
759        } else {
760            panic!("Expected SpanValidation error");
761        }
762    }
763
764    #[test]
765    fn test_span_validation_invalid_line_range() {
766        // Reversed line range
767        let result = Span::new(0, 10, 5, 2);
768        assert!(result.is_err());
769        if let Err(CkError::SpanValidation(msg)) = result {
770            assert!(msg.contains("Invalid line range"));
771            assert!(msg.contains("start (5) > end (2)"));
772        } else {
773            panic!("Expected SpanValidation error");
774        }
775    }
776
777    #[test]
778    fn test_span_validation_zero_line_numbers() {
779        // Zero line start
780        let result = Span::new(0, 10, 0, 2);
781        assert!(result.is_err());
782        if let Err(CkError::SpanValidation(msg)) = result {
783            assert!(msg.contains("Line start cannot be zero"));
784        } else {
785            panic!("Expected SpanValidation error");
786        }
787
788        // Zero line end
789        let result = Span::new(0, 10, 1, 0);
790        assert!(result.is_err());
791        if let Err(CkError::SpanValidation(msg)) = result {
792            assert!(msg.contains("Line end cannot be zero"));
793        } else {
794            panic!("Expected SpanValidation error");
795        }
796    }
797
798    #[test]
799    fn test_span_unchecked_creation() {
800        // Test backward compatibility with unchecked creation
801        let span = Span::new_unchecked(10, 5, 0, 1);
802        assert_eq!(span.byte_start, 10);
803        assert_eq!(span.byte_end, 5);
804        assert_eq!(span.line_start, 0);
805        assert_eq!(span.line_end, 1);
806        assert!(!span.is_valid()); // Should be invalid
807    }
808
809    #[test]
810    fn test_span_validation_methods() {
811        // Valid span
812        let valid_span = Span::new_unchecked(0, 10, 1, 2);
813        assert!(valid_span.validate().is_ok());
814        assert!(valid_span.is_valid());
815
816        // Invalid span (reversed bytes)
817        let invalid_span = Span::new_unchecked(10, 5, 1, 2);
818        assert!(invalid_span.validate().is_err());
819        assert!(!invalid_span.is_valid());
820
821        // Invalid span (zero lines)
822        let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
823        assert!(zero_line_span.validate().is_err());
824        assert!(!zero_line_span.is_valid());
825    }
826
827    #[test]
828    fn test_span_utility_methods() {
829        let span = Span::new(10, 25, 5, 8).unwrap();
830
831        // Test byte_len
832        assert_eq!(span.byte_len(), 15);
833
834        // Test line_count
835        assert_eq!(span.line_count(), 4); // lines 5, 6, 7, 8
836
837        // Test with single-line span
838        let single_line = Span::new(0, 5, 1, 1).unwrap();
839        assert_eq!(single_line.line_count(), 1);
840        assert_eq!(single_line.byte_len(), 5);
841
842        // Test with empty span
843        let empty = Span::new(10, 10, 3, 3).unwrap();
844        assert_eq!(empty.byte_len(), 0);
845        assert_eq!(empty.line_count(), 1);
846    }
847
848    #[test]
849    fn test_span_legacy_struct_literal_still_works() {
850        // Ensure backward compatibility for existing code using struct literals
851        let span = Span {
852            byte_start: 0,
853            byte_end: 10,
854            line_start: 1,
855            line_end: 2,
856        };
857
858        assert_eq!(span.byte_start, 0);
859        assert_eq!(span.byte_end, 10);
860        assert_eq!(span.line_start, 1);
861        assert_eq!(span.line_end, 2);
862        assert!(span.is_valid());
863    }
864
865    #[test]
866    fn test_search_options_default() {
867        let options = SearchOptions::default();
868        assert!(matches!(options.mode, SearchMode::Regex));
869        assert_eq!(options.query, "");
870        assert_eq!(options.path, PathBuf::from("."));
871        assert_eq!(options.top_k, None);
872        assert_eq!(options.threshold, None);
873        assert!(!options.case_insensitive);
874        assert!(!options.whole_word);
875        assert!(!options.fixed_string);
876        assert!(!options.line_numbers);
877        assert_eq!(options.context_lines, 0);
878        assert!(options.recursive);
879        assert!(!options.json_output);
880        assert!(!options.reindex);
881        assert!(!options.show_scores);
882        assert!(!options.show_filenames);
883    }
884
885    #[test]
886    fn test_file_metadata_serialization() {
887        let metadata = FileMetadata {
888            path: PathBuf::from("test.txt"),
889            hash: "abc123".to_string(),
890            last_modified: 1234567890,
891            size: 1024,
892        };
893
894        let json = serde_json::to_string(&metadata).unwrap();
895        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
896
897        assert_eq!(metadata.path, deserialized.path);
898        assert_eq!(metadata.hash, deserialized.hash);
899        assert_eq!(metadata.last_modified, deserialized.last_modified);
900        assert_eq!(metadata.size, deserialized.size);
901    }
902
903    #[test]
904    fn test_search_result_serialization() {
905        let result = SearchResult {
906            file: PathBuf::from("test.txt"),
907            span: Span {
908                byte_start: 0,
909                byte_end: 10,
910                line_start: 1,
911                line_end: 1,
912            },
913            score: 0.95,
914            preview: "hello world".to_string(),
915            lang: Some(Language::Rust),
916            symbol: Some("main".to_string()),
917            chunk_hash: Some("abc123".to_string()),
918            index_epoch: Some(1699123456),
919        };
920
921        let json = serde_json::to_string(&result).unwrap();
922        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
923
924        assert_eq!(result.file, deserialized.file);
925        assert_eq!(result.score, deserialized.score);
926        assert_eq!(result.preview, deserialized.preview);
927        assert_eq!(result.lang, deserialized.lang);
928        assert_eq!(result.symbol, deserialized.symbol);
929        assert_eq!(result.chunk_hash, deserialized.chunk_hash);
930        assert_eq!(result.index_epoch, deserialized.index_epoch);
931    }
932
933    #[test]
934    fn test_jsonl_search_result_conversion() {
935        let result = SearchResult {
936            file: PathBuf::from("src/auth.rs"),
937            span: Span {
938                byte_start: 1203,
939                byte_end: 1456,
940                line_start: 42,
941                line_end: 58,
942            },
943            score: 0.89,
944            preview: "function authenticate(user) {...}".to_string(),
945            lang: Some(Language::Rust),
946            symbol: Some("authenticate".to_string()),
947            chunk_hash: Some("abc123def456".to_string()),
948            index_epoch: Some(1699123456),
949        };
950
951        // Test with snippet
952        let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
953        assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
954        assert_eq!(jsonl_with_snippet.span.line_start, 42);
955        assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
956        assert_eq!(
957            jsonl_with_snippet.snippet,
958            Some("function authenticate(user) {...}".to_string())
959        );
960        assert_eq!(jsonl_with_snippet.score, Some(0.89));
961        assert_eq!(
962            jsonl_with_snippet.chunk_hash,
963            Some("abc123def456".to_string())
964        );
965        assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
966
967        // Test without snippet
968        let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
969        assert_eq!(jsonl_no_snippet.snippet, None);
970        assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
971    }
972
973    #[test]
974    fn test_get_sidecar_path() {
975        let repo_root = PathBuf::from("/home/user/project");
976        let file_path = PathBuf::from("/home/user/project/src/main.rs");
977
978        let sidecar = get_sidecar_path(&repo_root, &file_path);
979        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
980
981        assert_eq!(sidecar, expected);
982    }
983
984    #[test]
985    fn test_get_sidecar_path_no_extension() {
986        let repo_root = PathBuf::from("/project");
987        let file_path = PathBuf::from("/project/README");
988
989        let sidecar = get_sidecar_path(&repo_root, &file_path);
990        let expected = PathBuf::from("/project/.ck/README.ck");
991
992        assert_eq!(sidecar, expected);
993    }
994
995    #[test]
996    fn test_compute_file_hash() {
997        let temp_dir = TempDir::new().unwrap();
998        let file_path = temp_dir.path().join("test.txt");
999
1000        fs::write(&file_path, "hello world").unwrap();
1001
1002        let hash1 = compute_file_hash(&file_path).unwrap();
1003        let hash2 = compute_file_hash(&file_path).unwrap();
1004
1005        // Same content should produce same hash
1006        assert_eq!(hash1, hash2);
1007        assert!(!hash1.is_empty());
1008
1009        // Different content should produce different hash
1010        fs::write(&file_path, "hello rust").unwrap();
1011        let hash3 = compute_file_hash(&file_path).unwrap();
1012        assert_ne!(hash1, hash3);
1013    }
1014
1015    #[test]
1016    fn test_compute_file_hash_nonexistent() {
1017        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
1018        assert!(result.is_err());
1019    }
1020
1021    #[test]
1022    fn test_compute_file_hash_large_file() {
1023        let temp_dir = TempDir::new().unwrap();
1024        let file_path = temp_dir.path().join("large_test.txt");
1025
1026        // Create a file larger than the buffer size (64KB) to test streaming
1027        let large_content = "a".repeat(100_000); // 100KB content
1028        fs::write(&file_path, &large_content).unwrap();
1029
1030        let hash1 = compute_file_hash(&file_path).unwrap();
1031        let hash2 = compute_file_hash(&file_path).unwrap();
1032
1033        // Streaming hash should be consistent
1034        assert_eq!(hash1, hash2);
1035        assert!(!hash1.is_empty());
1036
1037        // Verify it's different from smaller content
1038        fs::write(&file_path, "small content").unwrap();
1039        let hash3 = compute_file_hash(&file_path).unwrap();
1040        assert_ne!(hash1, hash3);
1041    }
1042
1043    #[test]
1044    fn test_json_search_result_serialization() {
1045        let signals = SearchSignals {
1046            lex_rank: Some(1),
1047            vec_rank: Some(2),
1048            rrf_score: 0.85,
1049        };
1050
1051        let result = JsonSearchResult {
1052            file: "test.txt".to_string(),
1053            span: Span {
1054                byte_start: 0,
1055                byte_end: 5,
1056                line_start: 1,
1057                line_end: 1,
1058            },
1059            lang: None, // txt is not a supported language
1060            symbol: None,
1061            score: 0.95,
1062            signals,
1063            preview: "hello".to_string(),
1064            model: "bge-small".to_string(),
1065        };
1066
1067        let json = serde_json::to_string(&result).unwrap();
1068        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1069
1070        assert_eq!(result.file, deserialized.file);
1071        assert_eq!(result.score, deserialized.score);
1072        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1073        assert_eq!(result.model, deserialized.model);
1074    }
1075
1076    #[test]
1077    fn test_language_from_extension() {
1078        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1079        assert_eq!(Language::from_extension("py"), Some(Language::Python));
1080        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1081        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1082        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1083        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1084        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1085        assert_eq!(Language::from_extension("go"), Some(Language::Go));
1086        assert_eq!(Language::from_extension("java"), Some(Language::Java));
1087        assert_eq!(Language::from_extension("c"), Some(Language::C));
1088        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1089        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1090        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1091        assert_eq!(Language::from_extension("php"), Some(Language::Php));
1092        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1093        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1094        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1095        assert_eq!(Language::from_extension("unknown"), None);
1096    }
1097
1098    #[test]
1099    fn test_language_from_extension_case_insensitive() {
1100        // Test uppercase extensions - only for actually supported languages
1101        assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1102        assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1103        assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1104        assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1105        assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1106        assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1107        assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1108        assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1109        assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1110        assert_eq!(Language::from_extension("C"), Some(Language::C));
1111        assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1112        assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1113        assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1114        assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1115        assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1116        assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1117        assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1118        assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1119        assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1120        assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1121        assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1122        assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1123
1124        // Test mixed case extensions
1125        assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1126        assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1127        assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1128        assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1129        assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1130        assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1131        assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1132        assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1133        assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1134        assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1135        assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1136        assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1137        assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1138        assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1139
1140        // Unknown extensions should still return None
1141        assert_eq!(Language::from_extension("UNKNOWN"), None);
1142        assert_eq!(Language::from_extension("Unknown"), None);
1143    }
1144
1145    #[test]
1146    fn test_language_from_path() {
1147        assert_eq!(
1148            Language::from_path(&PathBuf::from("test.rs")),
1149            Some(Language::Rust)
1150        );
1151        assert_eq!(
1152            Language::from_path(&PathBuf::from("test.py")),
1153            Some(Language::Python)
1154        );
1155        assert_eq!(
1156            Language::from_path(&PathBuf::from("test.js")),
1157            Some(Language::JavaScript)
1158        );
1159        assert_eq!(
1160            Language::from_path(&PathBuf::from("test.hs")),
1161            Some(Language::Haskell)
1162        );
1163        assert_eq!(
1164            Language::from_path(&PathBuf::from("test.lhs")),
1165            Some(Language::Haskell)
1166        );
1167        assert_eq!(
1168            Language::from_path(&PathBuf::from("test.go")),
1169            Some(Language::Go)
1170        );
1171        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
1172        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
1173    }
1174
1175    #[test]
1176    fn test_language_from_path_case_insensitive() {
1177        // Test uppercase extensions in file paths - only supported languages
1178        assert_eq!(
1179            Language::from_path(&PathBuf::from("MAIN.RS")),
1180            Some(Language::Rust)
1181        );
1182        assert_eq!(
1183            Language::from_path(&PathBuf::from("app.PY")),
1184            Some(Language::Python)
1185        );
1186        assert_eq!(
1187            Language::from_path(&PathBuf::from("script.JS")),
1188            Some(Language::JavaScript)
1189        );
1190        assert_eq!(
1191            Language::from_path(&PathBuf::from("types.TS")),
1192            Some(Language::TypeScript)
1193        );
1194        assert_eq!(
1195            Language::from_path(&PathBuf::from("Component.TSX")),
1196            Some(Language::TypeScript)
1197        );
1198        assert_eq!(
1199            Language::from_path(&PathBuf::from("module.HS")),
1200            Some(Language::Haskell)
1201        );
1202        assert_eq!(
1203            Language::from_path(&PathBuf::from("server.GO")),
1204            Some(Language::Go)
1205        );
1206        assert_eq!(
1207            Language::from_path(&PathBuf::from("App.JAVA")),
1208            Some(Language::Java)
1209        );
1210        assert_eq!(
1211            Language::from_path(&PathBuf::from("main.C")),
1212            Some(Language::C)
1213        );
1214        assert_eq!(
1215            Language::from_path(&PathBuf::from("utils.CPP")),
1216            Some(Language::Cpp)
1217        );
1218        assert_eq!(
1219            Language::from_path(&PathBuf::from("Program.CS")),
1220            Some(Language::CSharp)
1221        );
1222        assert_eq!(
1223            Language::from_path(&PathBuf::from("script.RB")),
1224            Some(Language::Ruby)
1225        );
1226        assert_eq!(
1227            Language::from_path(&PathBuf::from("index.PHP")),
1228            Some(Language::Php)
1229        );
1230        assert_eq!(
1231            Language::from_path(&PathBuf::from("App.SWIFT")),
1232            Some(Language::Swift)
1233        );
1234        assert_eq!(
1235            Language::from_path(&PathBuf::from("Main.KT")),
1236            Some(Language::Kotlin)
1237        );
1238        assert_eq!(
1239            Language::from_path(&PathBuf::from("document.PDF")),
1240            Some(Language::Pdf)
1241        );
1242
1243        // Test mixed case extensions in file paths
1244        assert_eq!(
1245            Language::from_path(&PathBuf::from("config.Rs")),
1246            Some(Language::Rust)
1247        );
1248        assert_eq!(
1249            Language::from_path(&PathBuf::from("helper.Py")),
1250            Some(Language::Python)
1251        );
1252        assert_eq!(
1253            Language::from_path(&PathBuf::from("utils.Js")),
1254            Some(Language::JavaScript)
1255        );
1256        assert_eq!(
1257            Language::from_path(&PathBuf::from("interfaces.Ts")),
1258            Some(Language::TypeScript)
1259        );
1260        assert_eq!(
1261            Language::from_path(&PathBuf::from("Component.TsX")),
1262            Some(Language::TypeScript)
1263        );
1264        assert_eq!(
1265            Language::from_path(&PathBuf::from("main.Cpp")),
1266            Some(Language::Cpp)
1267        );
1268        assert_eq!(
1269            Language::from_path(&PathBuf::from("report.Pdf")),
1270            Some(Language::Pdf)
1271        );
1272
1273        // Unknown extensions should still return None regardless of case
1274        assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1275        assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1276    }
1277
1278    #[test]
1279    fn test_language_display() {
1280        assert_eq!(Language::Rust.to_string(), "rust");
1281        assert_eq!(Language::Python.to_string(), "python");
1282        assert_eq!(Language::JavaScript.to_string(), "javascript");
1283        assert_eq!(Language::TypeScript.to_string(), "typescript");
1284        assert_eq!(Language::Go.to_string(), "go");
1285        assert_eq!(Language::Java.to_string(), "java");
1286    }
1287
1288    #[test]
1289    fn test_create_ckignore_if_missing() {
1290        let temp_dir = TempDir::new().unwrap();
1291        let test_path = temp_dir.path();
1292
1293        // First creation should succeed
1294        let created = create_ckignore_if_missing(test_path).unwrap();
1295        assert!(created);
1296
1297        // Check that file exists
1298        let ckignore_path = test_path.join(".ckignore");
1299        assert!(ckignore_path.exists());
1300
1301        // Check content contains expected patterns
1302        let content = fs::read_to_string(&ckignore_path).unwrap();
1303        assert!(content.contains("*.png"));
1304        assert!(content.contains("*.json"));
1305        assert!(content.contains("*.yaml"));
1306        assert!(content.contains("# Images"));
1307        assert!(content.contains("# Config formats"));
1308
1309        // Second creation should return false (already exists)
1310        let created_again = create_ckignore_if_missing(test_path).unwrap();
1311        assert!(!created_again);
1312    }
1313
1314    #[test]
1315    fn test_read_ckignore_patterns() {
1316        let temp_dir = TempDir::new().unwrap();
1317        let test_path = temp_dir.path();
1318
1319        // Test with no .ckignore file
1320        let patterns = read_ckignore_patterns(test_path).unwrap();
1321        assert_eq!(patterns.len(), 0);
1322
1323        // Create a .ckignore file
1324        let ckignore_path = test_path.join(".ckignore");
1325        fs::write(
1326            &ckignore_path,
1327            r#"# Comment line
1328*.png
1329*.jpg
1330
1331# Another comment
1332*.json
1333*.yaml
1334"#,
1335        )
1336        .unwrap();
1337
1338        // Read patterns
1339        let patterns = read_ckignore_patterns(test_path).unwrap();
1340        assert_eq!(patterns.len(), 4);
1341        assert!(patterns.contains(&"*.png".to_string()));
1342        assert!(patterns.contains(&"*.jpg".to_string()));
1343        assert!(patterns.contains(&"*.json".to_string()));
1344        assert!(patterns.contains(&"*.yaml".to_string()));
1345        // Comments should be filtered out
1346        assert!(!patterns.iter().any(|p| p.starts_with('#')));
1347    }
1348
1349    #[test]
1350    fn test_read_ckignore_patterns_with_empty_lines() {
1351        let temp_dir = TempDir::new().unwrap();
1352        let test_path = temp_dir.path();
1353
1354        let ckignore_path = test_path.join(".ckignore");
1355        fs::write(
1356            &ckignore_path,
1357            r#"
1358*.png
1359
1360*.jpg
1361
1362
1363*.json
1364"#,
1365        )
1366        .unwrap();
1367
1368        let patterns = read_ckignore_patterns(test_path).unwrap();
1369        assert_eq!(patterns.len(), 3);
1370        assert!(patterns.contains(&"*.png".to_string()));
1371        assert!(patterns.contains(&"*.jpg".to_string()));
1372        assert!(patterns.contains(&"*.json".to_string()));
1373    }
1374
1375    #[test]
1376    fn test_get_default_ckignore_content() {
1377        let content = get_default_ckignore_content();
1378
1379        // Check that default content includes key patterns
1380        assert!(content.contains("*.png"));
1381        assert!(content.contains("*.jpg"));
1382        assert!(content.contains("*.mp4"));
1383        assert!(content.contains("*.mp3"));
1384        assert!(content.contains("*.exe"));
1385        assert!(content.contains("*.zip"));
1386        assert!(content.contains("*.db"));
1387        assert!(content.contains("*.json"));
1388        assert!(content.contains("*.yaml"));
1389
1390        // Check that it has comments
1391        assert!(content.contains("# Images"));
1392        assert!(content.contains("# Video"));
1393        assert!(content.contains("# Audio"));
1394        assert!(content.contains("# Config formats"));
1395
1396        // Check for issue reference
1397        assert!(content.contains("issue #27"));
1398    }
1399}