codeprism_core/content/
search.rs

1//! High-level content search interface
2//!
3//! This module provides a unified interface for searching content across
4//! all file types including documentation, configuration, comments, and source code.
5
6use super::{
7    extractors::CommentExtractor,
8    index::{ContentIndex, ContentUpdateListener},
9    parsers::DocumentParser,
10    CommentContext, ConfigFormat, ContentChunk, ContentNode, ContentStats, ContentType,
11    DocumentFormat, SearchQuery, SearchResult,
12};
13use crate::ast::{Language, NodeId};
14use crate::graph::GraphStore;
15use anyhow::Result;
16
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19use tree_sitter::Tree;
20
21/// High-level content search manager
22pub struct ContentSearchManager {
23    /// Content index for fast search
24    index: Arc<ContentIndex>,
25    /// Document parser for non-code files
26    document_parser: DocumentParser,
27    /// Comment extractor for source files
28    comment_extractor: CommentExtractor,
29    /// Graph store reference for AST integration
30    graph_store: Option<Arc<GraphStore>>,
31}
32
33impl ContentSearchManager {
34    /// Create a new content search manager
35    pub fn new() -> Self {
36        Self {
37            index: Arc::new(ContentIndex::new()),
38            document_parser: DocumentParser::new(),
39            comment_extractor: CommentExtractor::new(),
40            graph_store: None,
41        }
42    }
43
44    /// Create with graph store integration
45    pub fn with_graph_store(graph_store: Arc<GraphStore>) -> Self {
46        let mut manager = Self::new();
47        manager.graph_store = Some(graph_store);
48        manager
49    }
50
51    /// Index a file's content
52    pub fn index_file(&self, file_path: &Path, content: &str) -> Result<()> {
53        let language = self.detect_language(file_path);
54
55        let content_node = match language {
56            Some(lang) if self.is_source_code_language(lang) => {
57                self.index_source_file(file_path, content, lang)?
58            }
59            _ => {
60                // Handle as document/config file
61                self.document_parser.parse_file(file_path, content)?
62            }
63        };
64
65        self.index.add_node(content_node)?;
66        Ok(())
67    }
68
69    /// Index a source code file with comments
70    pub fn index_source_file_with_tree(
71        &self,
72        file_path: &Path,
73        content: &str,
74        tree: &Tree,
75        language: Language,
76        ast_nodes: &[NodeId],
77    ) -> Result<()> {
78        let mut content_node = self.index_source_file(file_path, content, language)?;
79
80        // Extract comments from the parse tree
81        if self.comment_extractor.supports_language(language) {
82            let comment_chunks = self
83                .comment_extractor
84                .extract_comments(language, tree, content, file_path, ast_nodes)?;
85
86            for chunk in comment_chunks {
87                content_node.add_chunk(chunk);
88            }
89        }
90
91        // Link AST nodes
92        for node_id in ast_nodes {
93            content_node.add_ast_node(*node_id);
94        }
95
96        self.index.add_node(content_node)?;
97        Ok(())
98    }
99
100    /// Remove a file from the index
101    pub fn remove_file(&self, file_path: &Path) -> Result<()> {
102        self.index.remove_node(file_path)
103    }
104
105    /// Search for content
106    pub fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
107        self.index.search(query)
108    }
109
110    /// Search with simple text query
111    pub fn simple_search(
112        &self,
113        query: &str,
114        max_results: Option<usize>,
115    ) -> Result<Vec<SearchResult>> {
116        let search_query = SearchQuery {
117            query: query.to_string(),
118            max_results: max_results.unwrap_or(50),
119            ..Default::default()
120        };
121
122        self.search(&search_query)
123    }
124
125    /// Search only in documentation
126    pub fn search_documentation(
127        &self,
128        query: &str,
129        max_results: Option<usize>,
130    ) -> Result<Vec<SearchResult>> {
131        let search_query = SearchQuery {
132            query: query.to_string(),
133            content_types: vec![
134                ContentType::Documentation {
135                    format: DocumentFormat::Markdown,
136                },
137                ContentType::Documentation {
138                    format: DocumentFormat::PlainText,
139                },
140                ContentType::Documentation {
141                    format: DocumentFormat::RestructuredText,
142                },
143                ContentType::Documentation {
144                    format: DocumentFormat::AsciiDoc,
145                },
146                ContentType::Documentation {
147                    format: DocumentFormat::Html,
148                },
149            ],
150            max_results: max_results.unwrap_or(50),
151            ..Default::default()
152        };
153
154        self.search(&search_query)
155    }
156
157    /// Search only in comments
158    pub fn search_comments(
159        &self,
160        query: &str,
161        language: Option<Language>,
162        max_results: Option<usize>,
163    ) -> Result<Vec<SearchResult>> {
164        let content_types = if let Some(lang) = language {
165            vec![
166                ContentType::Comment {
167                    language: lang,
168                    context: CommentContext::Block,
169                },
170                ContentType::Comment {
171                    language: lang,
172                    context: CommentContext::Inline,
173                },
174                ContentType::Comment {
175                    language: lang,
176                    context: CommentContext::Documentation,
177                },
178            ]
179        } else {
180            vec![
181                ContentType::Comment {
182                    language: Language::Unknown,
183                    context: CommentContext::Block,
184                },
185                ContentType::Comment {
186                    language: Language::Unknown,
187                    context: CommentContext::Inline,
188                },
189                ContentType::Comment {
190                    language: Language::Unknown,
191                    context: CommentContext::Documentation,
192                },
193            ]
194        };
195
196        let search_query = SearchQuery {
197            query: query.to_string(),
198            content_types,
199            max_results: max_results.unwrap_or(50),
200            ..Default::default()
201        };
202
203        self.search(&search_query)
204    }
205
206    /// Search only in configuration files
207    pub fn search_configuration(
208        &self,
209        query: &str,
210        max_results: Option<usize>,
211    ) -> Result<Vec<SearchResult>> {
212        let search_query = SearchQuery {
213            query: query.to_string(),
214            content_types: vec![
215                ContentType::Configuration {
216                    format: ConfigFormat::Json,
217                },
218                ContentType::Configuration {
219                    format: ConfigFormat::Yaml,
220                },
221                ContentType::Configuration {
222                    format: ConfigFormat::Toml,
223                },
224                ContentType::Configuration {
225                    format: ConfigFormat::Ini,
226                },
227                ContentType::Configuration {
228                    format: ConfigFormat::Properties,
229                },
230                ContentType::Configuration {
231                    format: ConfigFormat::Env,
232                },
233                ContentType::Configuration {
234                    format: ConfigFormat::Xml,
235                },
236            ],
237            max_results: max_results.unwrap_or(50),
238            ..Default::default()
239        };
240
241        self.search(&search_query)
242    }
243
244    /// Find files by pattern
245    pub fn find_files(&self, pattern: &str) -> Result<Vec<PathBuf>> {
246        self.index.find_files(pattern)
247    }
248
249    /// Get content statistics
250    pub fn get_stats(&self) -> ContentStats {
251        self.index.get_stats()
252    }
253
254    /// Get a specific content node
255    pub fn get_node(&self, file_path: &Path) -> Option<ContentNode> {
256        self.index.get_node(file_path)
257    }
258
259    /// Add an update listener
260    pub fn add_update_listener(&self, listener: Box<dyn ContentUpdateListener>) {
261        self.index.add_update_listener(listener);
262    }
263
264    /// Clear all indexed content
265    pub fn clear(&self) {
266        self.index.clear();
267    }
268
269    /// Search with regex pattern
270    pub fn regex_search(
271        &self,
272        pattern: &str,
273        max_results: Option<usize>,
274    ) -> Result<Vec<SearchResult>> {
275        let search_query = SearchQuery {
276            query: pattern.to_string(),
277            use_regex: true,
278            max_results: max_results.unwrap_or(50),
279            ..Default::default()
280        };
281
282        self.search(&search_query)
283    }
284
285    /// Search within specific file types
286    pub fn search_in_files(
287        &self,
288        query: &str,
289        file_patterns: Vec<String>,
290        max_results: Option<usize>,
291    ) -> Result<Vec<SearchResult>> {
292        let search_query = SearchQuery {
293            query: query.to_string(),
294            file_patterns,
295            max_results: max_results.unwrap_or(50),
296            ..Default::default()
297        };
298
299        self.search(&search_query)
300    }
301
302    /// Get supported languages for comment extraction
303    pub fn supported_comment_languages(&self) -> Vec<Language> {
304        self.comment_extractor.supported_languages()
305    }
306
307    /// Check if a language is supported for comment extraction
308    pub fn supports_comment_extraction(&self, language: Language) -> bool {
309        self.comment_extractor.supports_language(language)
310    }
311
312    // Private helper methods
313
314    /// Detect programming language from file extension
315    fn detect_language(&self, file_path: &Path) -> Option<Language> {
316        let extension = file_path.extension()?.to_str()?;
317        let lang = Language::from_extension(extension);
318        if matches!(lang, Language::Unknown) {
319            None
320        } else {
321            Some(lang)
322        }
323    }
324
325    /// Check if a language is a source code language
326    fn is_source_code_language(&self, language: Language) -> bool {
327        matches!(
328            language,
329            Language::JavaScript
330                | Language::TypeScript
331                | Language::Python
332                | Language::Rust
333                | Language::Java
334                | Language::Cpp
335                | Language::C
336                | Language::Go
337        )
338    }
339
340    /// Index a source code file (without tree-sitter integration)
341    fn index_source_file(
342        &self,
343        file_path: &Path,
344        content: &str,
345        language: Language,
346    ) -> Result<ContentNode> {
347        // Currently creating a simple code content node
348        // In the future, this could be enhanced with basic syntax highlighting
349        let content_type = ContentType::Code { language };
350        let mut node = ContentNode::new(file_path.to_path_buf(), content_type.clone());
351
352        // Create a single chunk for the entire file
353        let span = crate::ast::Span::new(
354            0,
355            content.len(),
356            1,
357            content.lines().count(),
358            1,
359            content.lines().last().map(|l| l.len()).unwrap_or(0),
360        );
361
362        let chunk = ContentChunk::new(
363            file_path.to_path_buf(),
364            content_type,
365            content.to_string(),
366            span,
367            0,
368        )
369        .with_metadata(serde_json::json!({
370            "language": format!("{:?}", language),
371            "content_type": "source_code"
372        }));
373
374        node.add_chunk(chunk);
375        node.file_size = content.len();
376
377        Ok(node)
378    }
379}
380
381impl Default for ContentSearchManager {
382    fn default() -> Self {
383        Self::new()
384    }
385}
386
387/// Builder for creating search queries
388#[derive(Debug, Clone)]
389pub struct SearchQueryBuilder {
390    query: SearchQuery,
391}
392
393impl SearchQueryBuilder {
394    /// Create a new search query builder
395    pub fn new(query: impl Into<String>) -> Self {
396        Self {
397            query: SearchQuery {
398                query: query.into(),
399                ..Default::default()
400            },
401        }
402    }
403
404    /// Set content types to search in
405    pub fn content_types(mut self, types: Vec<ContentType>) -> Self {
406        self.query.content_types = types;
407        self
408    }
409
410    /// Add file patterns to include
411    pub fn include_files(mut self, patterns: Vec<String>) -> Self {
412        self.query.file_patterns = patterns;
413        self
414    }
415
416    /// Add file patterns to exclude
417    pub fn exclude_files(mut self, patterns: Vec<String>) -> Self {
418        self.query.exclude_patterns = patterns;
419        self
420    }
421
422    /// Set maximum number of results
423    pub fn max_results(mut self, max: usize) -> Self {
424        self.query.max_results = max;
425        self
426    }
427
428    /// Enable case sensitive search
429    pub fn case_sensitive(mut self) -> Self {
430        self.query.case_sensitive = true;
431        self
432    }
433
434    /// Enable regex pattern matching
435    pub fn use_regex(mut self) -> Self {
436        self.query.use_regex = true;
437        self
438    }
439
440    /// Include context around matches
441    pub fn with_context(mut self, lines: usize) -> Self {
442        self.query.include_context = true;
443        self.query.context_lines = lines;
444        self
445    }
446
447    /// Disable context around matches
448    pub fn without_context(mut self) -> Self {
449        self.query.include_context = false;
450        self
451    }
452
453    /// Build the search query
454    pub fn build(self) -> SearchQuery {
455        self.query
456    }
457}
458
459/// Convenience functions for common search patterns
460impl SearchQueryBuilder {
461    /// Search only in markdown documentation
462    pub fn markdown_docs(query: impl Into<String>) -> Self {
463        Self::new(query).content_types(vec![ContentType::Documentation {
464            format: DocumentFormat::Markdown,
465        }])
466    }
467
468    /// Search only in JavaScript/TypeScript comments
469    pub fn js_comments(query: impl Into<String>) -> Self {
470        Self::new(query).content_types(vec![
471            ContentType::Comment {
472                language: Language::JavaScript,
473                context: CommentContext::Block,
474            },
475            ContentType::Comment {
476                language: Language::JavaScript,
477                context: CommentContext::Documentation,
478            },
479            ContentType::Comment {
480                language: Language::TypeScript,
481                context: CommentContext::Block,
482            },
483            ContentType::Comment {
484                language: Language::TypeScript,
485                context: CommentContext::Documentation,
486            },
487        ])
488    }
489
490    /// Search only in Python docstrings and comments
491    pub fn python_docs(query: impl Into<String>) -> Self {
492        Self::new(query).content_types(vec![
493            ContentType::Comment {
494                language: Language::Python,
495                context: CommentContext::Documentation,
496            },
497            ContentType::Comment {
498                language: Language::Python,
499                context: CommentContext::Inline,
500            },
501        ])
502    }
503
504    /// Search only in JSON configuration files
505    pub fn json_config(query: impl Into<String>) -> Self {
506        Self::new(query).content_types(vec![ContentType::Configuration {
507            format: ConfigFormat::Json,
508        }])
509    }
510
511    /// Search only in YAML configuration files
512    pub fn yaml_config(query: impl Into<String>) -> Self {
513        Self::new(query).content_types(vec![ContentType::Configuration {
514            format: ConfigFormat::Yaml,
515        }])
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522    use std::path::Path;
523
524    #[test]
525    fn test_content_search_manager_creation() {
526        let manager = ContentSearchManager::new();
527        assert!(
528            manager.graph_store.is_none(),
529            "Default search manager should not have graph store initialized"
530        );
531
532        // Test default implementation
533        let manager_default = ContentSearchManager::default();
534        assert!(
535            manager_default.graph_store.is_none(),
536            "Default-created search manager should not have graph store"
537        );
538
539        // Verify managers are functional
540        // Verify managers have proper initialization
541        assert!(
542            manager.index.get_stats().total_files == 0,
543            "New manager should start with no files"
544        );
545        assert!(
546            manager_default.index.get_stats().total_files == 0,
547            "Default manager should start with no files"
548        );
549    }
550
551    #[test]
552    fn test_with_graph_store() {
553        let graph_store = Arc::new(GraphStore::new());
554        let manager = ContentSearchManager::with_graph_store(graph_store.clone());
555        assert!(
556            manager.graph_store.is_some(),
557            "Search manager should have graph store after enabling"
558        );
559
560        // Verify the graph store is functional
561        let graph_store = manager.graph_store.as_ref().unwrap();
562        // Verify the graph store is accessible
563        // Verify the graph store functionality (basic check since internal fields are private)
564        use crate::{NodeKind, Span};
565        assert!(
566            graph_store
567                .get_node(&NodeId::new(
568                    "test",
569                    Path::new("test.rs"),
570                    &Span::new(0, 1, 1, 1, 1, 2),
571                    &NodeKind::Function
572                ))
573                .is_none(),
574            "New graph store should start empty"
575        );
576    }
577
578    #[test]
579    fn test_language_detection() {
580        let manager = ContentSearchManager::new();
581
582        // Test various file extensions
583        assert_eq!(
584            manager.detect_language(Path::new("test.js")),
585            Some(Language::JavaScript)
586        );
587        assert_eq!(
588            manager.detect_language(Path::new("test.py")),
589            Some(Language::Python)
590        );
591        assert_eq!(
592            manager.detect_language(Path::new("test.rs")),
593            Some(Language::Rust)
594        );
595        assert_eq!(
596            manager.detect_language(Path::new("test.java")),
597            Some(Language::Java)
598        );
599        assert_eq!(
600            manager.detect_language(Path::new("test.ts")),
601            Some(Language::TypeScript)
602        );
603
604        // Test unknown extensions
605        assert_eq!(manager.detect_language(Path::new("test.unknown")), None);
606        assert_eq!(manager.detect_language(Path::new("README")), None);
607    }
608
609    #[test]
610    fn test_is_source_code_language() {
611        let manager = ContentSearchManager::new();
612
613        assert!(manager.is_source_code_language(Language::JavaScript));
614        assert!(manager.is_source_code_language(Language::Python));
615        assert!(manager.is_source_code_language(Language::Rust));
616        assert!(!manager.is_source_code_language(Language::Unknown));
617    }
618
619    #[test]
620    fn test_index_markdown_file() {
621        let manager = ContentSearchManager::new();
622        let file_path = Path::new("test.md");
623        let content = "# Title\n\nThis is a test document.";
624
625        let result = manager.index_file(file_path, content);
626        assert!(result.is_ok(), "Search operation should succeed");
627
628        // Verify the file was indexed
629        let node = manager.get_node(file_path);
630        assert!(node.is_some(), "Should find content node");
631        let node = node.unwrap();
632        assert_eq!(node.file_path, file_path);
633        assert!(!node.chunks.is_empty(), "Node should have content chunks");
634    }
635
636    #[test]
637    fn test_index_javascript_file() {
638        let manager = ContentSearchManager::new();
639        let file_path = Path::new("test.js");
640        let content = "// Comment\nfunction test() { return 42; }";
641
642        let result = manager.index_file(file_path, content);
643        assert!(result.is_ok(), "Search operation should succeed");
644
645        // Verify the file was indexed as source code
646        let node = manager.get_node(file_path);
647        assert!(node.is_some(), "Should find content node");
648        let node = node.unwrap();
649        assert_eq!(node.file_path, file_path);
650        assert!(!node.chunks.is_empty(), "Node should have content chunks");
651
652        // Should have one chunk with the entire source code
653        assert_eq!(node.chunks.len(), 1, "Should have 1 items");
654        if let ContentType::Code { language } = &node.chunks[0].content_type {
655            assert_eq!(*language, Language::JavaScript);
656        } else {
657            panic!("Expected code content type");
658        }
659    }
660
661    #[test]
662    fn test_simple_search() {
663        let manager = ContentSearchManager::new();
664
665        // Index some test content
666        let _ = manager.index_file(
667            Path::new("test1.md"),
668            "# Hello World\n\nThis is a test document about programming.",
669        );
670        let _ = manager.index_file(
671            Path::new("test2.md"),
672            "# Testing\n\nAnother document for testing purposes.",
673        );
674
675        // Search for content
676        let results = manager.simple_search("test", Some(10)).unwrap();
677        assert!(!results.is_empty(), "Should not be empty");
678
679        // Search with max results
680        let results = manager.simple_search("test", Some(1)).unwrap();
681        assert!(results.len() <= 1);
682
683        // Search for non-existent content
684        let results = manager.simple_search("nonexistent", Some(10)).unwrap();
685        assert!(
686            results.is_empty(),
687            "Should be empty for non-existent content"
688        );
689    }
690
691    #[test]
692    fn test_search_documentation() {
693        let manager = ContentSearchManager::new();
694
695        // Index documentation
696        let _ = manager.index_file(
697            Path::new("doc.md"),
698            "# API Documentation\n\nThis describes the API.",
699        );
700        let _ = manager.index_file(Path::new("readme.txt"), "README file with API information.");
701        let _ = manager.index_file(
702            Path::new("code.js"),
703            "// This is not documentation\nfunction api() {}",
704        );
705
706        let results = manager.search_documentation("API", Some(10)).unwrap();
707
708        // Should only find documentation files, not source code
709        assert!(!results.is_empty(), "Should not be empty");
710        for result in &results {
711            match &result.chunk.content_type {
712                ContentType::Documentation { .. } => {} // Expected
713                _ => panic!("Found non-documentation content in documentation search"),
714            }
715        }
716    }
717
718    #[test]
719    fn test_search_configuration() {
720        let manager = ContentSearchManager::new();
721
722        // Index configuration files
723        let _ = manager.index_file(Path::new("config.json"), r#"{"database": "localhost"}"#);
724        let _ = manager.index_file(Path::new("settings.yaml"), "database:\n  host: localhost");
725        let _ = manager.index_file(Path::new("readme.md"), "Database configuration info");
726
727        let results = manager.search_configuration("database", Some(10)).unwrap();
728
729        // Should only find configuration files
730        assert!(!results.is_empty(), "Should not be empty");
731        for result in &results {
732            match &result.chunk.content_type {
733                ContentType::Configuration { .. } => {} // Expected
734                _ => panic!("Found non-configuration content in configuration search"),
735            }
736        }
737    }
738
739    #[test]
740    fn test_regex_search() {
741        let manager = ContentSearchManager::new();
742
743        // Index content with patterns
744        let _ = manager.index_file(
745            Path::new("test.md"),
746            "Email: user@example.com\nAnother: admin@test.org",
747        );
748
749        // Search with regex pattern
750        let results = manager.regex_search(r"\b\w+@\w+\.\w+\b", Some(10)).unwrap();
751        assert!(!results.is_empty(), "Should not be empty");
752
753        // Invalid regex should return error
754        let invalid_result = manager.regex_search("[invalid", Some(10));
755        assert!(invalid_result.is_err());
756    }
757
758    #[test]
759    fn test_search_in_files() {
760        let manager = ContentSearchManager::new();
761
762        // Index different file types
763        let _ = manager.index_file(Path::new("test.md"), "markdown content");
764        let _ = manager.index_file(Path::new("test.txt"), "text content");
765        let _ = manager.index_file(Path::new("config.json"), r#"{"content": "json"}"#);
766
767        // Search only in markdown files
768        let results = manager
769            .search_in_files("content", vec!["*.md".to_string()], Some(10))
770            .unwrap();
771        assert!(!results.is_empty(), "Should not be empty");
772    }
773
774    #[test]
775    fn test_file_removal() {
776        let manager = ContentSearchManager::new();
777        let file_path = Path::new("temp.md");
778
779        // Index a file
780        let _ = manager.index_file(file_path, "# Temporary\n\nThis will be removed.");
781        assert!(
782            manager.get_node(file_path).is_some(),
783            "Node should exist after indexing"
784        );
785        let node = manager.get_node(file_path).unwrap();
786        assert_eq!(
787            node.file_path, file_path,
788            "Node should have correct file path"
789        );
790        assert!(!node.chunks.is_empty(), "Node should have content chunks");
791
792        // Remove the file
793        let result = manager.remove_file(file_path);
794        assert!(result.is_ok(), "Search operation should succeed");
795        assert!(manager.get_node(file_path).is_none());
796    }
797
798    #[test]
799    fn test_clear() {
800        let manager = ContentSearchManager::new();
801
802        // Index some files
803        let _ = manager.index_file(Path::new("test1.md"), "Content 1");
804        let _ = manager.index_file(Path::new("test2.md"), "Content 2");
805
806        // Verify files are indexed
807        assert!(
808            manager.get_node(Path::new("test1.md")).is_some(),
809            "First file should be indexed"
810        );
811        let node1 = manager.get_node(Path::new("test1.md")).unwrap();
812        assert_eq!(
813            node1.chunks[0].content, "Content 1",
814            "First file should have correct content"
815        );
816
817        assert!(
818            manager.get_node(Path::new("test2.md")).is_some(),
819            "Second file should be indexed"
820        );
821        let node2 = manager.get_node(Path::new("test2.md")).unwrap();
822        assert_eq!(
823            node2.chunks[0].content, "Content 2",
824            "Second file should have correct content"
825        );
826
827        // Clear all content
828        manager.clear();
829
830        // Verify files are removed
831        assert!(manager.get_node(Path::new("test1.md")).is_none());
832        assert!(manager.get_node(Path::new("test2.md")).is_none());
833    }
834
835    #[test]
836    fn test_get_stats() {
837        let manager = ContentSearchManager::new();
838
839        // Initially should have empty stats
840        let stats = manager.get_stats();
841        assert_eq!(stats.total_files, 0);
842        assert_eq!(stats.total_chunks, 0);
843
844        // Index some content
845        let _ = manager.index_file(Path::new("test.md"), "# Title\n\nContent");
846
847        // Stats should be updated
848        let stats = manager.get_stats();
849        assert!(stats.total_files > 0);
850        assert!(stats.total_chunks > 0);
851    }
852
853    #[test]
854    fn test_find_files() {
855        let manager = ContentSearchManager::new();
856
857        // Index files with different names
858        let _ = manager.index_file(Path::new("test_one.md"), "Content 1");
859        let _ = manager.index_file(Path::new("test_two.md"), "Content 2");
860        let _ = manager.index_file(Path::new("other.txt"), "Content 3");
861
862        // Find markdown files
863        let md_files = manager.find_files(r"\.md$").unwrap();
864        assert_eq!(md_files.len(), 2, "Should have 2 items");
865
866        // Find all test files
867        let test_files = manager.find_files(r"test_").unwrap();
868        assert_eq!(test_files.len(), 2, "Should have 2 items");
869    }
870
871    #[test]
872    fn test_supported_comment_languages() {
873        let manager = ContentSearchManager::new();
874
875        let supported = manager.supported_comment_languages();
876        assert!(supported.contains(&Language::JavaScript));
877        assert!(supported.contains(&Language::Python));
878
879        assert!(manager.supports_comment_extraction(Language::JavaScript));
880        assert!(manager.supports_comment_extraction(Language::Python));
881        assert!(!manager.supports_comment_extraction(Language::Unknown));
882    }
883
884    #[test]
885    fn test_search_query_builder() {
886        // Test basic builder
887        let query = SearchQueryBuilder::new("test query")
888            .max_results(10)
889            .case_sensitive()
890            .build();
891
892        assert_eq!(query.query, "test query");
893        assert_eq!(query.max_results, 10);
894        assert!(query.case_sensitive);
895
896        // Test with content types
897        let query = SearchQueryBuilder::new("search")
898            .content_types(vec![ContentType::Documentation {
899                format: DocumentFormat::Markdown,
900            }])
901            .build();
902
903        assert_eq!(query.content_types.len(), 1, "Should have 1 items");
904
905        // Test with file patterns
906        let query = SearchQueryBuilder::new("search")
907            .include_files(vec!["*.md".to_string()])
908            .exclude_files(vec!["*.tmp".to_string()])
909            .build();
910
911        assert_eq!(query.file_patterns.len(), 1, "Should have 1 items");
912        assert_eq!(query.exclude_patterns.len(), 1, "Should have 1 items");
913
914        // Test with regex and context
915        let query = SearchQueryBuilder::new("pattern")
916            .use_regex()
917            .with_context(3)
918            .build();
919
920        assert!(query.use_regex);
921        assert!(query.include_context);
922        assert_eq!(query.context_lines, 3);
923
924        // Test without context
925        let query = SearchQueryBuilder::new("pattern").without_context().build();
926
927        assert!(!query.include_context);
928    }
929
930    #[test]
931    fn test_search_query_builder_convenience_methods() {
932        // Test markdown docs builder
933        let query = SearchQueryBuilder::markdown_docs("test").build();
934        assert_eq!(query.content_types.len(), 1, "Should have 1 items");
935        match &query.content_types[0] {
936            ContentType::Documentation {
937                format: DocumentFormat::Markdown,
938            } => {}
939            _ => panic!("Expected markdown documentation type"),
940        }
941
942        // Test JS comments builder
943        let query = SearchQueryBuilder::js_comments("test").build();
944        assert_eq!(query.content_types.len(), 4, "Should have 4 items"); // JS + TS, Block + Documentation
945
946        // Test Python docs builder
947        let query = SearchQueryBuilder::python_docs("test").build();
948        assert_eq!(query.content_types.len(), 2, "Should have 2 items"); // Documentation + Inline
949
950        // Test JSON config builder
951        let query = SearchQueryBuilder::json_config("test").build();
952        assert_eq!(query.content_types.len(), 1, "Should have 1 items");
953        match &query.content_types[0] {
954            ContentType::Configuration {
955                format: ConfigFormat::Json,
956            } => {}
957            _ => panic!("Expected JSON configuration type"),
958        }
959
960        // Test YAML config builder
961        let query = SearchQueryBuilder::yaml_config("test").build();
962        assert_eq!(query.content_types.len(), 1, "Should have 1 items");
963        match &query.content_types[0] {
964            ContentType::Configuration {
965                format: ConfigFormat::Yaml,
966            } => {}
967            _ => panic!("Expected YAML configuration type"),
968        }
969    }
970}