ck_core/
lib.rs

1pub mod heatmap;
2
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum CkError {
9    #[error("IO error: {0}")]
10    Io(#[from] std::io::Error),
11
12    #[error("Regex error: {0}")]
13    Regex(#[from] regex::Error),
14
15    #[error("Serialization error: {0}")]
16    Serialization(#[from] bincode::Error),
17
18    #[error("JSON error: {0}")]
19    Json(#[from] serde_json::Error),
20
21    #[error("Index error: {0}")]
22    Index(String),
23
24    #[error("Search error: {0}")]
25    Search(String),
26
27    #[error("Embedding error: {0}")]
28    Embedding(String),
29
30    #[error("Span validation error: {0}")]
31    SpanValidation(String),
32
33    #[error("Other error: {0}")]
34    Other(String),
35}
36
37pub type Result<T> = std::result::Result<T, CkError>;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40pub enum Language {
41    Rust,
42    Python,
43    JavaScript,
44    TypeScript,
45    Haskell,
46    Go,
47    Java,
48    C,
49    Cpp,
50    CSharp,
51    Ruby,
52    Php,
53    Swift,
54    Kotlin,
55    Zig,
56    Pdf,
57}
58
59impl Language {
60    pub fn from_extension(ext: &str) -> Option<Self> {
61        // Convert to lowercase for case-insensitive matching
62        match ext.to_lowercase().as_str() {
63            "rs" => Some(Language::Rust),
64            "py" => Some(Language::Python),
65            "js" => Some(Language::JavaScript),
66            "ts" | "tsx" => Some(Language::TypeScript),
67            "hs" | "lhs" => Some(Language::Haskell),
68            "go" => Some(Language::Go),
69            "java" => Some(Language::Java),
70            "c" => Some(Language::C),
71            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
72            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
73            "cs" => Some(Language::CSharp),
74            "rb" => Some(Language::Ruby),
75            "php" => Some(Language::Php),
76            "swift" => Some(Language::Swift),
77            "kt" | "kts" => Some(Language::Kotlin),
78            "zig" => Some(Language::Zig),
79            "pdf" => Some(Language::Pdf),
80            _ => None,
81        }
82    }
83
84    pub fn from_path(path: &Path) -> Option<Self> {
85        path.extension()
86            .and_then(|ext| ext.to_str())
87            .and_then(Self::from_extension)
88    }
89}
90
91impl std::fmt::Display for Language {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        let name = match self {
94            Language::Rust => "rust",
95            Language::Python => "python",
96            Language::JavaScript => "javascript",
97            Language::TypeScript => "typescript",
98            Language::Haskell => "haskell",
99            Language::Go => "go",
100            Language::Java => "java",
101            Language::C => "c",
102            Language::Cpp => "cpp",
103            Language::CSharp => "csharp",
104            Language::Ruby => "ruby",
105            Language::Php => "php",
106            Language::Swift => "swift",
107            Language::Kotlin => "kotlin",
108            Language::Zig => "zig",
109            Language::Pdf => "pdf",
110        };
111        write!(f, "{}", name)
112    }
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Span {
117    pub byte_start: usize,
118    pub byte_end: usize,
119    pub line_start: usize,
120    pub line_end: usize,
121}
122
123impl Span {
124    /// Create a new Span with validation
125    pub fn new(
126        byte_start: usize,
127        byte_end: usize,
128        line_start: usize,
129        line_end: usize,
130    ) -> Result<Self> {
131        let span = Self {
132            byte_start,
133            byte_end,
134            line_start,
135            line_end,
136        };
137        span.validate()?;
138        Ok(span)
139    }
140
141    /// Create a new Span without validation (for backward compatibility)
142    ///
143    /// # Safety
144    ///
145    /// The caller must ensure the span is valid. Use `new()` for validated construction.
146    pub fn new_unchecked(
147        byte_start: usize,
148        byte_end: usize,
149        line_start: usize,
150        line_end: usize,
151    ) -> Self {
152        Self {
153            byte_start,
154            byte_end,
155            line_start,
156            line_end,
157        }
158    }
159
160    /// Validate span invariants
161    pub fn validate(&self) -> Result<()> {
162        // Check for zero line numbers first (lines should be 1-indexed)
163        if self.line_start == 0 {
164            return Err(CkError::SpanValidation(
165                "Line start cannot be zero (lines are 1-indexed)".to_string(),
166            ));
167        }
168
169        if self.line_end == 0 {
170            return Err(CkError::SpanValidation(
171                "Line end cannot be zero (lines are 1-indexed)".to_string(),
172            ));
173        }
174
175        // Check byte range validity
176        if self.byte_start > self.byte_end {
177            return Err(CkError::SpanValidation(format!(
178                "Invalid byte range: start ({}) > end ({})",
179                self.byte_start, self.byte_end
180            )));
181        }
182
183        // Check line range validity
184        if self.line_start > self.line_end {
185            return Err(CkError::SpanValidation(format!(
186                "Invalid line range: start ({}) > end ({})",
187                self.line_start, self.line_end
188            )));
189        }
190
191        Ok(())
192    }
193
194    /// Check if this span is valid
195    pub fn is_valid(&self) -> bool {
196        self.validate().is_ok()
197    }
198
199    /// Get byte length of the span
200    pub fn byte_len(&self) -> usize {
201        self.byte_end.saturating_sub(self.byte_start)
202    }
203
204    /// Get line count of the span
205    pub fn line_count(&self) -> usize {
206        self.line_end.saturating_sub(self.line_start) + 1
207    }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct FileMetadata {
212    pub path: PathBuf,
213    pub hash: String,
214    pub last_modified: u64,
215    pub size: u64,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SearchResult {
220    pub file: PathBuf,
221    pub span: Span,
222    pub score: f32,
223    pub preview: String,
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub lang: Option<Language>,
226    #[serde(skip_serializing_if = "Option::is_none")]
227    pub symbol: Option<String>,
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub chunk_hash: Option<String>,
230    #[serde(skip_serializing_if = "Option::is_none")]
231    pub index_epoch: Option<u64>,
232}
233
234/// Enhanced search results that include near-miss information for threshold queries
235#[derive(Debug, Clone)]
236pub struct SearchResults {
237    pub matches: Vec<SearchResult>,
238    /// The highest scoring result below the threshold (if any)
239    pub closest_below_threshold: Option<SearchResult>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct JsonSearchResult {
244    pub file: String,
245    pub span: Span,
246    pub lang: Option<Language>,
247    pub symbol: Option<String>,
248    pub score: f32,
249    pub signals: SearchSignals,
250    pub preview: String,
251    pub model: String,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct JsonlSearchResult {
256    pub path: String,
257    pub span: Span,
258    pub language: Option<String>,
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub snippet: Option<String>,
261    #[serde(skip_serializing_if = "Option::is_none")]
262    pub score: Option<f32>,
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub chunk_hash: Option<String>,
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub index_epoch: Option<u64>,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct SearchSignals {
271    pub lex_rank: Option<usize>,
272    pub vec_rank: Option<usize>,
273    pub rrf_score: f32,
274}
275
276#[derive(Debug, Clone, PartialEq)]
277pub enum SearchMode {
278    Regex,
279    Lexical,
280    Semantic,
281    Hybrid,
282}
283
284#[derive(Debug, Clone)]
285pub struct IncludePattern {
286    pub path: PathBuf,
287    pub is_dir: bool,
288}
289
290#[derive(Debug, Clone)]
291pub struct SearchOptions {
292    pub mode: SearchMode,
293    pub query: String,
294    pub path: PathBuf,
295    pub top_k: Option<usize>,
296    pub threshold: Option<f32>,
297    pub case_insensitive: bool,
298    pub whole_word: bool,
299    pub fixed_string: bool,
300    pub line_numbers: bool,
301    pub context_lines: usize,
302    pub before_context_lines: usize,
303    pub after_context_lines: usize,
304    pub recursive: bool,
305    pub json_output: bool,
306    pub jsonl_output: bool,
307    pub no_snippet: bool,
308    pub reindex: bool,
309    pub show_scores: bool,
310    pub show_filenames: bool,
311    pub files_with_matches: bool,
312    pub files_without_matches: bool,
313    pub exclude_patterns: Vec<String>,
314    pub include_patterns: Vec<IncludePattern>,
315    pub respect_gitignore: bool,
316    pub full_section: bool,
317    // Enhanced embedding options (search-time only)
318    pub rerank: bool,
319    pub rerank_model: Option<String>,
320    pub embedding_model: Option<String>,
321}
322
323impl JsonlSearchResult {
324    pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
325        Self {
326            path: result.file.to_string_lossy().to_string(),
327            span: result.span.clone(),
328            language: result.lang.as_ref().map(|l| l.to_string()),
329            snippet: if include_snippet {
330                Some(result.preview.clone())
331            } else {
332                None
333            },
334            score: if result.score >= 0.0 {
335                Some(result.score)
336            } else {
337                None
338            },
339            chunk_hash: result.chunk_hash.clone(),
340            index_epoch: result.index_epoch,
341        }
342    }
343}
344
345impl Default for SearchOptions {
346    fn default() -> Self {
347        Self {
348            mode: SearchMode::Regex,
349            query: String::new(),
350            path: PathBuf::from("."),
351            top_k: None,
352            threshold: None,
353            case_insensitive: false,
354            whole_word: false,
355            fixed_string: false,
356            line_numbers: false,
357            context_lines: 0,
358            before_context_lines: 0,
359            after_context_lines: 0,
360            recursive: true,
361            json_output: false,
362            jsonl_output: false,
363            no_snippet: false,
364            reindex: false,
365            show_scores: false,
366            show_filenames: false,
367            files_with_matches: false,
368            files_without_matches: false,
369            exclude_patterns: get_default_exclude_patterns(),
370            include_patterns: Vec::new(),
371            respect_gitignore: true,
372            full_section: false,
373            // Enhanced embedding options (search-time only)
374            rerank: false,
375            rerank_model: None,
376            embedding_model: None,
377        }
378    }
379}
380
381/// Get default exclusion patterns for directories that should be skipped during search.
382/// These are common cache, build, and system directories that rarely contain user code.
383pub fn get_default_exclude_patterns() -> Vec<String> {
384    vec![
385        // ck's own index directory
386        ".ck".to_string(),
387        // AI/ML model cache directories
388        ".fastembed_cache".to_string(),
389        ".cache".to_string(),
390        "__pycache__".to_string(),
391        // Version control
392        ".git".to_string(),
393        ".svn".to_string(),
394        ".hg".to_string(),
395        // Build directories
396        "target".to_string(),       // Rust
397        "build".to_string(),        // Various
398        "dist".to_string(),         // JavaScript/Python
399        "node_modules".to_string(), // JavaScript
400        ".gradle".to_string(),      // Java
401        ".mvn".to_string(),         // Maven
402        "bin".to_string(),          // Various
403        "obj".to_string(),          // .NET
404        // Python virtual environments
405        "venv".to_string(),
406        ".venv".to_string(),
407        "env".to_string(),
408        ".env".to_string(),
409        "virtualenv".to_string(),
410        // IDE/Editor directories
411        ".vscode".to_string(),
412        ".idea".to_string(),
413        ".eclipse".to_string(),
414        // Temporary directories
415        "tmp".to_string(),
416        "temp".to_string(),
417        ".tmp".to_string(),
418    ]
419}
420
421/// Get default .ckignore file content
422pub fn get_default_ckignore_content() -> &'static str {
423    r#"# .ckignore - Default patterns for ck semantic search
424# Created automatically during first index
425# Syntax: same as .gitignore (glob patterns, ! for negation)
426
427# Images
428*.png
429*.jpg
430*.jpeg
431*.gif
432*.bmp
433*.svg
434*.ico
435*.webp
436*.tiff
437
438# Video
439*.mp4
440*.avi
441*.mov
442*.mkv
443*.wmv
444*.flv
445*.webm
446
447# Audio
448*.mp3
449*.wav
450*.flac
451*.aac
452*.ogg
453*.m4a
454
455# Binary/Compiled
456*.exe
457*.dll
458*.so
459*.dylib
460*.a
461*.lib
462*.obj
463*.o
464
465# Archives
466*.zip
467*.tar
468*.tar.gz
469*.tgz
470*.rar
471*.7z
472*.bz2
473*.gz
474
475# Data files
476*.db
477*.sqlite
478*.sqlite3
479*.parquet
480*.arrow
481
482# Config formats (issue #27)
483*.json
484*.yaml
485*.yml
486
487# Add your custom patterns below this line
488"#
489}
490
491/// Read and parse .ckignore file, returning patterns
492pub fn read_ckignore_patterns(repo_root: &Path) -> Result<Vec<String>> {
493    let ckignore_path = repo_root.join(".ckignore");
494
495    if !ckignore_path.exists() {
496        return Ok(Vec::new());
497    }
498
499    let content = std::fs::read_to_string(&ckignore_path).map_err(CkError::Io)?;
500
501    let patterns: Vec<String> = content
502        .lines()
503        .map(|line| line.trim())
504        .filter(|line| !line.is_empty() && !line.starts_with('#'))
505        .map(|line| line.to_string())
506        .collect();
507
508    Ok(patterns)
509}
510
511/// Create .ckignore file with default content if it doesn't exist
512pub fn create_ckignore_if_missing(repo_root: &Path) -> Result<bool> {
513    let ckignore_path = repo_root.join(".ckignore");
514
515    if ckignore_path.exists() {
516        return Ok(false); // Already exists
517    }
518
519    std::fs::write(&ckignore_path, get_default_ckignore_content()).map_err(CkError::Io)?;
520
521    Ok(true) // Created new file
522}
523
524/// Build exclusion patterns with proper priority ordering
525///
526/// This centralizes the pattern building logic used across CLI, TUI, and MCP interfaces
527/// to prevent drift and ensure consistent behavior.
528///
529/// Priority order (highest to lowest):
530/// 1. .ckignore patterns (if use_ckignore is true)
531/// 2. Additional excludes (from command-line or API calls)
532/// 3. Default patterns (if use_defaults is true)
533///
534/// # Arguments
535/// * `repo_root` - Optional repository root for loading .ckignore file
536/// * `additional_excludes` - Additional exclusion patterns (e.g., from CLI flags)
537/// * `use_ckignore` - Whether to load and include .ckignore patterns
538/// * `use_defaults` - Whether to include default exclusion patterns
539///
540/// # Returns
541/// Combined list of exclusion patterns in priority order
542pub fn build_exclude_patterns(
543    repo_root: Option<&Path>,
544    additional_excludes: &[String],
545    use_ckignore: bool,
546    use_defaults: bool,
547) -> Vec<String> {
548    let mut patterns = Vec::new();
549
550    // 1. Load .ckignore patterns (highest priority among additional patterns)
551    if use_ckignore
552        && let Some(root) = repo_root
553        && let Ok(ckignore_patterns) = read_ckignore_patterns(root)
554        && !ckignore_patterns.is_empty()
555    {
556        patterns.extend(ckignore_patterns);
557    }
558
559    // 2. Add additional exclude patterns (e.g., from command-line)
560    patterns.extend(additional_excludes.iter().cloned());
561
562    // 3. Add defaults (lowest priority)
563    if use_defaults {
564        patterns.extend(get_default_exclude_patterns());
565    }
566
567    patterns
568}
569
570pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
571    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
572    let mut sidecar = repo_root.join(".ck");
573    sidecar.push(relative);
574    let ext = relative
575        .extension()
576        .map(|e| format!("{}.ck", e.to_string_lossy()))
577        .unwrap_or_else(|| "ck".to_string());
578    sidecar.set_extension(ext);
579    sidecar
580}
581
582pub fn compute_file_hash(path: &Path) -> Result<String> {
583    use std::io::Read;
584
585    let mut file = std::fs::File::open(path)?;
586    let mut hasher = blake3::Hasher::new();
587
588    // Stream the file in 64KB chunks to avoid loading entire file into memory
589    let mut buffer = [0u8; 65536]; // 64KB buffer
590    loop {
591        let bytes_read = file.read(&mut buffer)?;
592        if bytes_read == 0 {
593            break;
594        }
595        hasher.update(&buffer[..bytes_read]);
596    }
597
598    let hash = hasher.finalize();
599    Ok(hash.to_hex().to_string())
600}
601
602/// PDF-specific utilities
603pub mod pdf {
604    use std::path::{Path, PathBuf};
605
606    /// Check if a file is a PDF by extension (optimized to avoid allocations)
607    pub fn is_pdf_file(path: &Path) -> bool {
608        path.extension()
609            .and_then(|ext| ext.to_str())
610            .map(|ext| ext.eq_ignore_ascii_case("pdf")) // Avoids allocation vs to_lowercase()
611            .unwrap_or(false)
612    }
613
614    /// Get path for cached PDF content
615    pub fn get_content_cache_path(repo_root: &Path, file_path: &Path) -> PathBuf {
616        let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
617        let mut cache_path = repo_root.join(".ck").join("content");
618        cache_path.push(relative);
619
620        // Add .txt extension to the cached file
621        let ext = relative
622            .extension()
623            .map(|e| format!("{}.txt", e.to_string_lossy()))
624            .unwrap_or_else(|| "txt".to_string());
625        cache_path.set_extension(ext);
626
627        cache_path
628    }
629
630    #[cfg(test)]
631    mod tests {
632        use super::*;
633        use std::path::PathBuf;
634
635        #[test]
636        fn test_is_pdf_file() {
637            assert!(is_pdf_file(&PathBuf::from("test.pdf")));
638            assert!(is_pdf_file(&PathBuf::from("test.PDF"))); // Case insensitive
639            assert!(is_pdf_file(&PathBuf::from("test.Pdf")));
640            assert!(!is_pdf_file(&PathBuf::from("test.txt")));
641            assert!(!is_pdf_file(&PathBuf::from("test"))); // No extension
642            assert!(!is_pdf_file(&PathBuf::from("pdf"))); // Just "pdf", no extension
643        }
644
645        #[test]
646        fn test_get_content_cache_path() {
647            let repo_root = PathBuf::from("/project");
648            let file_path = PathBuf::from("/project/docs/manual.pdf");
649
650            let cache_path = get_content_cache_path(&repo_root, &file_path);
651            assert_eq!(
652                cache_path,
653                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
654            );
655        }
656
657        #[test]
658        fn test_get_content_cache_path_no_extension() {
659            let repo_root = PathBuf::from("/project");
660            let file_path = PathBuf::from("/project/docs/manual");
661
662            let cache_path = get_content_cache_path(&repo_root, &file_path);
663            assert_eq!(
664                cache_path,
665                PathBuf::from("/project/.ck/content/docs/manual.txt")
666            );
667        }
668
669        #[test]
670        fn test_get_content_cache_path_relative() {
671            let repo_root = PathBuf::from("/project");
672            let file_path = PathBuf::from("docs/manual.pdf"); // Relative path
673
674            let cache_path = get_content_cache_path(&repo_root, &file_path);
675            assert_eq!(
676                cache_path,
677                PathBuf::from("/project/.ck/content/docs/manual.pdf.txt")
678            );
679        }
680    }
681}
682
683#[cfg(test)]
684mod tests {
685    use super::*;
686    use std::fs;
687    use tempfile::TempDir;
688
689    #[test]
690    fn test_span_valid_creation() {
691        // Test valid span creation
692        let span = Span::new(0, 10, 1, 2).unwrap();
693        assert_eq!(span.byte_start, 0);
694        assert_eq!(span.byte_end, 10);
695        assert_eq!(span.line_start, 1);
696        assert_eq!(span.line_end, 2);
697        assert!(span.is_valid());
698    }
699
700    #[test]
701    fn test_span_validation_valid_cases() {
702        // Same byte positions (empty span)
703        let span = Span::new(10, 10, 1, 1).unwrap();
704        assert!(span.is_valid());
705        assert_eq!(span.byte_len(), 0);
706        assert_eq!(span.line_count(), 1);
707
708        // Multi-line span
709        let span = Span::new(0, 100, 1, 10).unwrap();
710        assert!(span.is_valid());
711        assert_eq!(span.byte_len(), 100);
712        assert_eq!(span.line_count(), 10);
713
714        // Single line span
715        let span = Span::new(5, 25, 3, 3).unwrap();
716        assert!(span.is_valid());
717        assert_eq!(span.byte_len(), 20);
718        assert_eq!(span.line_count(), 1);
719    }
720
721    #[test]
722    fn test_span_validation_invalid_byte_range() {
723        // Reversed byte range
724        let result = Span::new(10, 5, 1, 2);
725        assert!(result.is_err());
726        if let Err(CkError::SpanValidation(msg)) = result {
727            assert!(msg.contains("Invalid byte range"));
728            assert!(msg.contains("start (10) > end (5)"));
729        } else {
730            panic!("Expected SpanValidation error");
731        }
732    }
733
734    #[test]
735    fn test_span_validation_invalid_line_range() {
736        // Reversed line range
737        let result = Span::new(0, 10, 5, 2);
738        assert!(result.is_err());
739        if let Err(CkError::SpanValidation(msg)) = result {
740            assert!(msg.contains("Invalid line range"));
741            assert!(msg.contains("start (5) > end (2)"));
742        } else {
743            panic!("Expected SpanValidation error");
744        }
745    }
746
747    #[test]
748    fn test_span_validation_zero_line_numbers() {
749        // Zero line start
750        let result = Span::new(0, 10, 0, 2);
751        assert!(result.is_err());
752        if let Err(CkError::SpanValidation(msg)) = result {
753            assert!(msg.contains("Line start cannot be zero"));
754        } else {
755            panic!("Expected SpanValidation error");
756        }
757
758        // Zero line end
759        let result = Span::new(0, 10, 1, 0);
760        assert!(result.is_err());
761        if let Err(CkError::SpanValidation(msg)) = result {
762            assert!(msg.contains("Line end cannot be zero"));
763        } else {
764            panic!("Expected SpanValidation error");
765        }
766    }
767
768    #[test]
769    fn test_span_unchecked_creation() {
770        // Test backward compatibility with unchecked creation
771        let span = Span::new_unchecked(10, 5, 0, 1);
772        assert_eq!(span.byte_start, 10);
773        assert_eq!(span.byte_end, 5);
774        assert_eq!(span.line_start, 0);
775        assert_eq!(span.line_end, 1);
776        assert!(!span.is_valid()); // Should be invalid
777    }
778
779    #[test]
780    fn test_span_validation_methods() {
781        // Valid span
782        let valid_span = Span::new_unchecked(0, 10, 1, 2);
783        assert!(valid_span.validate().is_ok());
784        assert!(valid_span.is_valid());
785
786        // Invalid span (reversed bytes)
787        let invalid_span = Span::new_unchecked(10, 5, 1, 2);
788        assert!(invalid_span.validate().is_err());
789        assert!(!invalid_span.is_valid());
790
791        // Invalid span (zero lines)
792        let zero_line_span = Span::new_unchecked(0, 10, 0, 1);
793        assert!(zero_line_span.validate().is_err());
794        assert!(!zero_line_span.is_valid());
795    }
796
797    #[test]
798    fn test_span_utility_methods() {
799        let span = Span::new(10, 25, 5, 8).unwrap();
800
801        // Test byte_len
802        assert_eq!(span.byte_len(), 15);
803
804        // Test line_count
805        assert_eq!(span.line_count(), 4); // lines 5, 6, 7, 8
806
807        // Test with single-line span
808        let single_line = Span::new(0, 5, 1, 1).unwrap();
809        assert_eq!(single_line.line_count(), 1);
810        assert_eq!(single_line.byte_len(), 5);
811
812        // Test with empty span
813        let empty = Span::new(10, 10, 3, 3).unwrap();
814        assert_eq!(empty.byte_len(), 0);
815        assert_eq!(empty.line_count(), 1);
816    }
817
818    #[test]
819    fn test_span_legacy_struct_literal_still_works() {
820        // Ensure backward compatibility for existing code using struct literals
821        let span = Span {
822            byte_start: 0,
823            byte_end: 10,
824            line_start: 1,
825            line_end: 2,
826        };
827
828        assert_eq!(span.byte_start, 0);
829        assert_eq!(span.byte_end, 10);
830        assert_eq!(span.line_start, 1);
831        assert_eq!(span.line_end, 2);
832        assert!(span.is_valid());
833    }
834
835    #[test]
836    fn test_search_options_default() {
837        let options = SearchOptions::default();
838        assert!(matches!(options.mode, SearchMode::Regex));
839        assert_eq!(options.query, "");
840        assert_eq!(options.path, PathBuf::from("."));
841        assert_eq!(options.top_k, None);
842        assert_eq!(options.threshold, None);
843        assert!(!options.case_insensitive);
844        assert!(!options.whole_word);
845        assert!(!options.fixed_string);
846        assert!(!options.line_numbers);
847        assert_eq!(options.context_lines, 0);
848        assert!(options.recursive);
849        assert!(!options.json_output);
850        assert!(!options.reindex);
851        assert!(!options.show_scores);
852        assert!(!options.show_filenames);
853    }
854
855    #[test]
856    fn test_file_metadata_serialization() {
857        let metadata = FileMetadata {
858            path: PathBuf::from("test.txt"),
859            hash: "abc123".to_string(),
860            last_modified: 1234567890,
861            size: 1024,
862        };
863
864        let json = serde_json::to_string(&metadata).unwrap();
865        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
866
867        assert_eq!(metadata.path, deserialized.path);
868        assert_eq!(metadata.hash, deserialized.hash);
869        assert_eq!(metadata.last_modified, deserialized.last_modified);
870        assert_eq!(metadata.size, deserialized.size);
871    }
872
873    #[test]
874    fn test_search_result_serialization() {
875        let result = SearchResult {
876            file: PathBuf::from("test.txt"),
877            span: Span {
878                byte_start: 0,
879                byte_end: 10,
880                line_start: 1,
881                line_end: 1,
882            },
883            score: 0.95,
884            preview: "hello world".to_string(),
885            lang: Some(Language::Rust),
886            symbol: Some("main".to_string()),
887            chunk_hash: Some("abc123".to_string()),
888            index_epoch: Some(1699123456),
889        };
890
891        let json = serde_json::to_string(&result).unwrap();
892        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
893
894        assert_eq!(result.file, deserialized.file);
895        assert_eq!(result.score, deserialized.score);
896        assert_eq!(result.preview, deserialized.preview);
897        assert_eq!(result.lang, deserialized.lang);
898        assert_eq!(result.symbol, deserialized.symbol);
899        assert_eq!(result.chunk_hash, deserialized.chunk_hash);
900        assert_eq!(result.index_epoch, deserialized.index_epoch);
901    }
902
903    #[test]
904    fn test_jsonl_search_result_conversion() {
905        let result = SearchResult {
906            file: PathBuf::from("src/auth.rs"),
907            span: Span {
908                byte_start: 1203,
909                byte_end: 1456,
910                line_start: 42,
911                line_end: 58,
912            },
913            score: 0.89,
914            preview: "function authenticate(user) {...}".to_string(),
915            lang: Some(Language::Rust),
916            symbol: Some("authenticate".to_string()),
917            chunk_hash: Some("abc123def456".to_string()),
918            index_epoch: Some(1699123456),
919        };
920
921        // Test with snippet
922        let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
923        assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
924        assert_eq!(jsonl_with_snippet.span.line_start, 42);
925        assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
926        assert_eq!(
927            jsonl_with_snippet.snippet,
928            Some("function authenticate(user) {...}".to_string())
929        );
930        assert_eq!(jsonl_with_snippet.score, Some(0.89));
931        assert_eq!(
932            jsonl_with_snippet.chunk_hash,
933            Some("abc123def456".to_string())
934        );
935        assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
936
937        // Test without snippet
938        let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
939        assert_eq!(jsonl_no_snippet.snippet, None);
940        assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
941    }
942
943    #[test]
944    fn test_get_sidecar_path() {
945        let repo_root = PathBuf::from("/home/user/project");
946        let file_path = PathBuf::from("/home/user/project/src/main.rs");
947
948        let sidecar = get_sidecar_path(&repo_root, &file_path);
949        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
950
951        assert_eq!(sidecar, expected);
952    }
953
954    #[test]
955    fn test_get_sidecar_path_no_extension() {
956        let repo_root = PathBuf::from("/project");
957        let file_path = PathBuf::from("/project/README");
958
959        let sidecar = get_sidecar_path(&repo_root, &file_path);
960        let expected = PathBuf::from("/project/.ck/README.ck");
961
962        assert_eq!(sidecar, expected);
963    }
964
965    #[test]
966    fn test_compute_file_hash() {
967        let temp_dir = TempDir::new().unwrap();
968        let file_path = temp_dir.path().join("test.txt");
969
970        fs::write(&file_path, "hello world").unwrap();
971
972        let hash1 = compute_file_hash(&file_path).unwrap();
973        let hash2 = compute_file_hash(&file_path).unwrap();
974
975        // Same content should produce same hash
976        assert_eq!(hash1, hash2);
977        assert!(!hash1.is_empty());
978
979        // Different content should produce different hash
980        fs::write(&file_path, "hello rust").unwrap();
981        let hash3 = compute_file_hash(&file_path).unwrap();
982        assert_ne!(hash1, hash3);
983    }
984
985    #[test]
986    fn test_compute_file_hash_nonexistent() {
987        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
988        assert!(result.is_err());
989    }
990
991    #[test]
992    fn test_compute_file_hash_large_file() {
993        let temp_dir = TempDir::new().unwrap();
994        let file_path = temp_dir.path().join("large_test.txt");
995
996        // Create a file larger than the buffer size (64KB) to test streaming
997        let large_content = "a".repeat(100_000); // 100KB content
998        fs::write(&file_path, &large_content).unwrap();
999
1000        let hash1 = compute_file_hash(&file_path).unwrap();
1001        let hash2 = compute_file_hash(&file_path).unwrap();
1002
1003        // Streaming hash should be consistent
1004        assert_eq!(hash1, hash2);
1005        assert!(!hash1.is_empty());
1006
1007        // Verify it's different from smaller content
1008        fs::write(&file_path, "small content").unwrap();
1009        let hash3 = compute_file_hash(&file_path).unwrap();
1010        assert_ne!(hash1, hash3);
1011    }
1012
1013    #[test]
1014    fn test_json_search_result_serialization() {
1015        let signals = SearchSignals {
1016            lex_rank: Some(1),
1017            vec_rank: Some(2),
1018            rrf_score: 0.85,
1019        };
1020
1021        let result = JsonSearchResult {
1022            file: "test.txt".to_string(),
1023            span: Span {
1024                byte_start: 0,
1025                byte_end: 5,
1026                line_start: 1,
1027                line_end: 1,
1028            },
1029            lang: None, // txt is not a supported language
1030            symbol: None,
1031            score: 0.95,
1032            signals,
1033            preview: "hello".to_string(),
1034            model: "bge-small".to_string(),
1035        };
1036
1037        let json = serde_json::to_string(&result).unwrap();
1038        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
1039
1040        assert_eq!(result.file, deserialized.file);
1041        assert_eq!(result.score, deserialized.score);
1042        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
1043        assert_eq!(result.model, deserialized.model);
1044    }
1045
1046    #[test]
1047    fn test_language_from_extension() {
1048        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
1049        assert_eq!(Language::from_extension("py"), Some(Language::Python));
1050        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
1051        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
1052        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
1053        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
1054        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
1055        assert_eq!(Language::from_extension("go"), Some(Language::Go));
1056        assert_eq!(Language::from_extension("java"), Some(Language::Java));
1057        assert_eq!(Language::from_extension("c"), Some(Language::C));
1058        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
1059        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
1060        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
1061        assert_eq!(Language::from_extension("php"), Some(Language::Php));
1062        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
1063        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
1064        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
1065        assert_eq!(Language::from_extension("unknown"), None);
1066    }
1067
1068    #[test]
1069    fn test_language_from_extension_case_insensitive() {
1070        // Test uppercase extensions - only for actually supported languages
1071        assert_eq!(Language::from_extension("RS"), Some(Language::Rust));
1072        assert_eq!(Language::from_extension("PY"), Some(Language::Python));
1073        assert_eq!(Language::from_extension("JS"), Some(Language::JavaScript));
1074        assert_eq!(Language::from_extension("TS"), Some(Language::TypeScript));
1075        assert_eq!(Language::from_extension("TSX"), Some(Language::TypeScript));
1076        assert_eq!(Language::from_extension("HS"), Some(Language::Haskell));
1077        assert_eq!(Language::from_extension("LHS"), Some(Language::Haskell));
1078        assert_eq!(Language::from_extension("GO"), Some(Language::Go));
1079        assert_eq!(Language::from_extension("JAVA"), Some(Language::Java));
1080        assert_eq!(Language::from_extension("C"), Some(Language::C));
1081        assert_eq!(Language::from_extension("CPP"), Some(Language::Cpp));
1082        assert_eq!(Language::from_extension("CC"), Some(Language::Cpp));
1083        assert_eq!(Language::from_extension("CXX"), Some(Language::Cpp));
1084        assert_eq!(Language::from_extension("H"), Some(Language::Cpp));
1085        assert_eq!(Language::from_extension("HPP"), Some(Language::Cpp));
1086        assert_eq!(Language::from_extension("CS"), Some(Language::CSharp));
1087        assert_eq!(Language::from_extension("RB"), Some(Language::Ruby));
1088        assert_eq!(Language::from_extension("PHP"), Some(Language::Php));
1089        assert_eq!(Language::from_extension("SWIFT"), Some(Language::Swift));
1090        assert_eq!(Language::from_extension("KT"), Some(Language::Kotlin));
1091        assert_eq!(Language::from_extension("KTS"), Some(Language::Kotlin));
1092        assert_eq!(Language::from_extension("PDF"), Some(Language::Pdf));
1093
1094        // Test mixed case extensions
1095        assert_eq!(Language::from_extension("Rs"), Some(Language::Rust));
1096        assert_eq!(Language::from_extension("Py"), Some(Language::Python));
1097        assert_eq!(Language::from_extension("Js"), Some(Language::JavaScript));
1098        assert_eq!(Language::from_extension("Ts"), Some(Language::TypeScript));
1099        assert_eq!(Language::from_extension("TsX"), Some(Language::TypeScript));
1100        assert_eq!(Language::from_extension("Hs"), Some(Language::Haskell));
1101        assert_eq!(Language::from_extension("Go"), Some(Language::Go));
1102        assert_eq!(Language::from_extension("Java"), Some(Language::Java));
1103        assert_eq!(Language::from_extension("Cpp"), Some(Language::Cpp));
1104        assert_eq!(Language::from_extension("Rb"), Some(Language::Ruby));
1105        assert_eq!(Language::from_extension("Php"), Some(Language::Php));
1106        assert_eq!(Language::from_extension("Swift"), Some(Language::Swift));
1107        assert_eq!(Language::from_extension("Kt"), Some(Language::Kotlin));
1108        assert_eq!(Language::from_extension("Pdf"), Some(Language::Pdf));
1109
1110        // Unknown extensions should still return None
1111        assert_eq!(Language::from_extension("UNKNOWN"), None);
1112        assert_eq!(Language::from_extension("Unknown"), None);
1113    }
1114
1115    #[test]
1116    fn test_language_from_path() {
1117        assert_eq!(
1118            Language::from_path(&PathBuf::from("test.rs")),
1119            Some(Language::Rust)
1120        );
1121        assert_eq!(
1122            Language::from_path(&PathBuf::from("test.py")),
1123            Some(Language::Python)
1124        );
1125        assert_eq!(
1126            Language::from_path(&PathBuf::from("test.js")),
1127            Some(Language::JavaScript)
1128        );
1129        assert_eq!(
1130            Language::from_path(&PathBuf::from("test.hs")),
1131            Some(Language::Haskell)
1132        );
1133        assert_eq!(
1134            Language::from_path(&PathBuf::from("test.lhs")),
1135            Some(Language::Haskell)
1136        );
1137        assert_eq!(
1138            Language::from_path(&PathBuf::from("test.go")),
1139            Some(Language::Go)
1140        );
1141        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
1142        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
1143    }
1144
1145    #[test]
1146    fn test_language_from_path_case_insensitive() {
1147        // Test uppercase extensions in file paths - only supported languages
1148        assert_eq!(
1149            Language::from_path(&PathBuf::from("MAIN.RS")),
1150            Some(Language::Rust)
1151        );
1152        assert_eq!(
1153            Language::from_path(&PathBuf::from("app.PY")),
1154            Some(Language::Python)
1155        );
1156        assert_eq!(
1157            Language::from_path(&PathBuf::from("script.JS")),
1158            Some(Language::JavaScript)
1159        );
1160        assert_eq!(
1161            Language::from_path(&PathBuf::from("types.TS")),
1162            Some(Language::TypeScript)
1163        );
1164        assert_eq!(
1165            Language::from_path(&PathBuf::from("Component.TSX")),
1166            Some(Language::TypeScript)
1167        );
1168        assert_eq!(
1169            Language::from_path(&PathBuf::from("module.HS")),
1170            Some(Language::Haskell)
1171        );
1172        assert_eq!(
1173            Language::from_path(&PathBuf::from("server.GO")),
1174            Some(Language::Go)
1175        );
1176        assert_eq!(
1177            Language::from_path(&PathBuf::from("App.JAVA")),
1178            Some(Language::Java)
1179        );
1180        assert_eq!(
1181            Language::from_path(&PathBuf::from("main.C")),
1182            Some(Language::C)
1183        );
1184        assert_eq!(
1185            Language::from_path(&PathBuf::from("utils.CPP")),
1186            Some(Language::Cpp)
1187        );
1188        assert_eq!(
1189            Language::from_path(&PathBuf::from("Program.CS")),
1190            Some(Language::CSharp)
1191        );
1192        assert_eq!(
1193            Language::from_path(&PathBuf::from("script.RB")),
1194            Some(Language::Ruby)
1195        );
1196        assert_eq!(
1197            Language::from_path(&PathBuf::from("index.PHP")),
1198            Some(Language::Php)
1199        );
1200        assert_eq!(
1201            Language::from_path(&PathBuf::from("App.SWIFT")),
1202            Some(Language::Swift)
1203        );
1204        assert_eq!(
1205            Language::from_path(&PathBuf::from("Main.KT")),
1206            Some(Language::Kotlin)
1207        );
1208        assert_eq!(
1209            Language::from_path(&PathBuf::from("document.PDF")),
1210            Some(Language::Pdf)
1211        );
1212
1213        // Test mixed case extensions in file paths
1214        assert_eq!(
1215            Language::from_path(&PathBuf::from("config.Rs")),
1216            Some(Language::Rust)
1217        );
1218        assert_eq!(
1219            Language::from_path(&PathBuf::from("helper.Py")),
1220            Some(Language::Python)
1221        );
1222        assert_eq!(
1223            Language::from_path(&PathBuf::from("utils.Js")),
1224            Some(Language::JavaScript)
1225        );
1226        assert_eq!(
1227            Language::from_path(&PathBuf::from("interfaces.Ts")),
1228            Some(Language::TypeScript)
1229        );
1230        assert_eq!(
1231            Language::from_path(&PathBuf::from("Component.TsX")),
1232            Some(Language::TypeScript)
1233        );
1234        assert_eq!(
1235            Language::from_path(&PathBuf::from("main.Cpp")),
1236            Some(Language::Cpp)
1237        );
1238        assert_eq!(
1239            Language::from_path(&PathBuf::from("report.Pdf")),
1240            Some(Language::Pdf)
1241        );
1242
1243        // Unknown extensions should still return None regardless of case
1244        assert_eq!(Language::from_path(&PathBuf::from("test.UNKNOWN")), None);
1245        assert_eq!(Language::from_path(&PathBuf::from("test.Unknown")), None);
1246    }
1247
1248    #[test]
1249    fn test_language_display() {
1250        assert_eq!(Language::Rust.to_string(), "rust");
1251        assert_eq!(Language::Python.to_string(), "python");
1252        assert_eq!(Language::JavaScript.to_string(), "javascript");
1253        assert_eq!(Language::TypeScript.to_string(), "typescript");
1254        assert_eq!(Language::Go.to_string(), "go");
1255        assert_eq!(Language::Java.to_string(), "java");
1256    }
1257
1258    #[test]
1259    fn test_create_ckignore_if_missing() {
1260        let temp_dir = TempDir::new().unwrap();
1261        let test_path = temp_dir.path();
1262
1263        // First creation should succeed
1264        let created = create_ckignore_if_missing(test_path).unwrap();
1265        assert!(created);
1266
1267        // Check that file exists
1268        let ckignore_path = test_path.join(".ckignore");
1269        assert!(ckignore_path.exists());
1270
1271        // Check content contains expected patterns
1272        let content = fs::read_to_string(&ckignore_path).unwrap();
1273        assert!(content.contains("*.png"));
1274        assert!(content.contains("*.json"));
1275        assert!(content.contains("*.yaml"));
1276        assert!(content.contains("# Images"));
1277        assert!(content.contains("# Config formats"));
1278
1279        // Second creation should return false (already exists)
1280        let created_again = create_ckignore_if_missing(test_path).unwrap();
1281        assert!(!created_again);
1282    }
1283
1284    #[test]
1285    fn test_read_ckignore_patterns() {
1286        let temp_dir = TempDir::new().unwrap();
1287        let test_path = temp_dir.path();
1288
1289        // Test with no .ckignore file
1290        let patterns = read_ckignore_patterns(test_path).unwrap();
1291        assert_eq!(patterns.len(), 0);
1292
1293        // Create a .ckignore file
1294        let ckignore_path = test_path.join(".ckignore");
1295        fs::write(
1296            &ckignore_path,
1297            r#"# Comment line
1298*.png
1299*.jpg
1300
1301# Another comment
1302*.json
1303*.yaml
1304"#,
1305        )
1306        .unwrap();
1307
1308        // Read patterns
1309        let patterns = read_ckignore_patterns(test_path).unwrap();
1310        assert_eq!(patterns.len(), 4);
1311        assert!(patterns.contains(&"*.png".to_string()));
1312        assert!(patterns.contains(&"*.jpg".to_string()));
1313        assert!(patterns.contains(&"*.json".to_string()));
1314        assert!(patterns.contains(&"*.yaml".to_string()));
1315        // Comments should be filtered out
1316        assert!(!patterns.iter().any(|p| p.starts_with('#')));
1317    }
1318
1319    #[test]
1320    fn test_read_ckignore_patterns_with_empty_lines() {
1321        let temp_dir = TempDir::new().unwrap();
1322        let test_path = temp_dir.path();
1323
1324        let ckignore_path = test_path.join(".ckignore");
1325        fs::write(
1326            &ckignore_path,
1327            r#"
1328*.png
1329
1330*.jpg
1331
1332
1333*.json
1334"#,
1335        )
1336        .unwrap();
1337
1338        let patterns = read_ckignore_patterns(test_path).unwrap();
1339        assert_eq!(patterns.len(), 3);
1340        assert!(patterns.contains(&"*.png".to_string()));
1341        assert!(patterns.contains(&"*.jpg".to_string()));
1342        assert!(patterns.contains(&"*.json".to_string()));
1343    }
1344
1345    #[test]
1346    fn test_get_default_ckignore_content() {
1347        let content = get_default_ckignore_content();
1348
1349        // Check that default content includes key patterns
1350        assert!(content.contains("*.png"));
1351        assert!(content.contains("*.jpg"));
1352        assert!(content.contains("*.mp4"));
1353        assert!(content.contains("*.mp3"));
1354        assert!(content.contains("*.exe"));
1355        assert!(content.contains("*.zip"));
1356        assert!(content.contains("*.db"));
1357        assert!(content.contains("*.json"));
1358        assert!(content.contains("*.yaml"));
1359
1360        // Check that it has comments
1361        assert!(content.contains("# Images"));
1362        assert!(content.contains("# Video"));
1363        assert!(content.contains("# Audio"));
1364        assert!(content.contains("# Config formats"));
1365
1366        // Check for issue reference
1367        assert!(content.contains("issue #27"));
1368    }
1369}