1use super::{
7 extractors::CommentExtractor,
8 index::{ContentIndex, ContentUpdateListener},
9 parsers::DocumentParser,
10 CommentContext, ConfigFormat, ContentChunk, ContentNode, ContentStats, ContentType,
11 DocumentFormat, SearchQuery, SearchResult,
12};
13use crate::ast::{Language, NodeId};
14use crate::graph::GraphStore;
15use anyhow::Result;
16
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19use tree_sitter::Tree;
20
21pub struct ContentSearchManager {
23 index: Arc<ContentIndex>,
25 document_parser: DocumentParser,
27 comment_extractor: CommentExtractor,
29 graph_store: Option<Arc<GraphStore>>,
31}
32
33impl ContentSearchManager {
34 pub fn new() -> Self {
36 Self {
37 index: Arc::new(ContentIndex::new()),
38 document_parser: DocumentParser::new(),
39 comment_extractor: CommentExtractor::new(),
40 graph_store: None,
41 }
42 }
43
44 pub fn with_graph_store(graph_store: Arc<GraphStore>) -> Self {
46 let mut manager = Self::new();
47 manager.graph_store = Some(graph_store);
48 manager
49 }
50
51 pub fn index_file(&self, file_path: &Path, content: &str) -> Result<()> {
53 let language = self.detect_language(file_path);
54
55 let content_node = match language {
56 Some(lang) if self.is_source_code_language(lang) => {
57 self.index_source_file(file_path, content, lang)?
58 }
59 _ => {
60 self.document_parser.parse_file(file_path, content)?
62 }
63 };
64
65 self.index.add_node(content_node)?;
66 Ok(())
67 }
68
69 pub fn index_source_file_with_tree(
71 &self,
72 file_path: &Path,
73 content: &str,
74 tree: &Tree,
75 language: Language,
76 ast_nodes: &[NodeId],
77 ) -> Result<()> {
78 let mut content_node = self.index_source_file(file_path, content, language)?;
79
80 if self.comment_extractor.supports_language(language) {
82 let comment_chunks = self
83 .comment_extractor
84 .extract_comments(language, tree, content, file_path, ast_nodes)?;
85
86 for chunk in comment_chunks {
87 content_node.add_chunk(chunk);
88 }
89 }
90
91 for node_id in ast_nodes {
93 content_node.add_ast_node(*node_id);
94 }
95
96 self.index.add_node(content_node)?;
97 Ok(())
98 }
99
100 pub fn remove_file(&self, file_path: &Path) -> Result<()> {
102 self.index.remove_node(file_path)
103 }
104
105 pub fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
107 self.index.search(query)
108 }
109
110 pub fn simple_search(
112 &self,
113 query: &str,
114 max_results: Option<usize>,
115 ) -> Result<Vec<SearchResult>> {
116 let search_query = SearchQuery {
117 query: query.to_string(),
118 max_results: max_results.unwrap_or(50),
119 ..Default::default()
120 };
121
122 self.search(&search_query)
123 }
124
125 pub fn search_documentation(
127 &self,
128 query: &str,
129 max_results: Option<usize>,
130 ) -> Result<Vec<SearchResult>> {
131 let search_query = SearchQuery {
132 query: query.to_string(),
133 content_types: vec![
134 ContentType::Documentation {
135 format: DocumentFormat::Markdown,
136 },
137 ContentType::Documentation {
138 format: DocumentFormat::PlainText,
139 },
140 ContentType::Documentation {
141 format: DocumentFormat::RestructuredText,
142 },
143 ContentType::Documentation {
144 format: DocumentFormat::AsciiDoc,
145 },
146 ContentType::Documentation {
147 format: DocumentFormat::Html,
148 },
149 ],
150 max_results: max_results.unwrap_or(50),
151 ..Default::default()
152 };
153
154 self.search(&search_query)
155 }
156
157 pub fn search_comments(
159 &self,
160 query: &str,
161 language: Option<Language>,
162 max_results: Option<usize>,
163 ) -> Result<Vec<SearchResult>> {
164 let content_types = if let Some(lang) = language {
165 vec![
166 ContentType::Comment {
167 language: lang,
168 context: CommentContext::Block,
169 },
170 ContentType::Comment {
171 language: lang,
172 context: CommentContext::Inline,
173 },
174 ContentType::Comment {
175 language: lang,
176 context: CommentContext::Documentation,
177 },
178 ]
179 } else {
180 vec![
181 ContentType::Comment {
182 language: Language::Unknown,
183 context: CommentContext::Block,
184 },
185 ContentType::Comment {
186 language: Language::Unknown,
187 context: CommentContext::Inline,
188 },
189 ContentType::Comment {
190 language: Language::Unknown,
191 context: CommentContext::Documentation,
192 },
193 ]
194 };
195
196 let search_query = SearchQuery {
197 query: query.to_string(),
198 content_types,
199 max_results: max_results.unwrap_or(50),
200 ..Default::default()
201 };
202
203 self.search(&search_query)
204 }
205
206 pub fn search_configuration(
208 &self,
209 query: &str,
210 max_results: Option<usize>,
211 ) -> Result<Vec<SearchResult>> {
212 let search_query = SearchQuery {
213 query: query.to_string(),
214 content_types: vec![
215 ContentType::Configuration {
216 format: ConfigFormat::Json,
217 },
218 ContentType::Configuration {
219 format: ConfigFormat::Yaml,
220 },
221 ContentType::Configuration {
222 format: ConfigFormat::Toml,
223 },
224 ContentType::Configuration {
225 format: ConfigFormat::Ini,
226 },
227 ContentType::Configuration {
228 format: ConfigFormat::Properties,
229 },
230 ContentType::Configuration {
231 format: ConfigFormat::Env,
232 },
233 ContentType::Configuration {
234 format: ConfigFormat::Xml,
235 },
236 ],
237 max_results: max_results.unwrap_or(50),
238 ..Default::default()
239 };
240
241 self.search(&search_query)
242 }
243
244 pub fn find_files(&self, pattern: &str) -> Result<Vec<PathBuf>> {
246 self.index.find_files(pattern)
247 }
248
249 pub fn get_stats(&self) -> ContentStats {
251 self.index.get_stats()
252 }
253
254 pub fn get_node(&self, file_path: &Path) -> Option<ContentNode> {
256 self.index.get_node(file_path)
257 }
258
259 pub fn add_update_listener(&self, listener: Box<dyn ContentUpdateListener>) {
261 self.index.add_update_listener(listener);
262 }
263
264 pub fn clear(&self) {
266 self.index.clear();
267 }
268
269 pub fn regex_search(
271 &self,
272 pattern: &str,
273 max_results: Option<usize>,
274 ) -> Result<Vec<SearchResult>> {
275 let search_query = SearchQuery {
276 query: pattern.to_string(),
277 use_regex: true,
278 max_results: max_results.unwrap_or(50),
279 ..Default::default()
280 };
281
282 self.search(&search_query)
283 }
284
285 pub fn search_in_files(
287 &self,
288 query: &str,
289 file_patterns: Vec<String>,
290 max_results: Option<usize>,
291 ) -> Result<Vec<SearchResult>> {
292 let search_query = SearchQuery {
293 query: query.to_string(),
294 file_patterns,
295 max_results: max_results.unwrap_or(50),
296 ..Default::default()
297 };
298
299 self.search(&search_query)
300 }
301
302 pub fn supported_comment_languages(&self) -> Vec<Language> {
304 self.comment_extractor.supported_languages()
305 }
306
307 pub fn supports_comment_extraction(&self, language: Language) -> bool {
309 self.comment_extractor.supports_language(language)
310 }
311
312 fn detect_language(&self, file_path: &Path) -> Option<Language> {
316 let extension = file_path.extension()?.to_str()?;
317 let lang = Language::from_extension(extension);
318 if matches!(lang, Language::Unknown) {
319 None
320 } else {
321 Some(lang)
322 }
323 }
324
325 fn is_source_code_language(&self, language: Language) -> bool {
327 matches!(
328 language,
329 Language::JavaScript
330 | Language::TypeScript
331 | Language::Python
332 | Language::Rust
333 | Language::Java
334 | Language::Cpp
335 | Language::C
336 | Language::Go
337 )
338 }
339
340 fn index_source_file(
342 &self,
343 file_path: &Path,
344 content: &str,
345 language: Language,
346 ) -> Result<ContentNode> {
347 let content_type = ContentType::Code { language };
350 let mut node = ContentNode::new(file_path.to_path_buf(), content_type.clone());
351
352 let span = crate::ast::Span::new(
354 0,
355 content.len(),
356 1,
357 content.lines().count(),
358 1,
359 content.lines().last().map(|l| l.len()).unwrap_or(0),
360 );
361
362 let chunk = ContentChunk::new(
363 file_path.to_path_buf(),
364 content_type,
365 content.to_string(),
366 span,
367 0,
368 )
369 .with_metadata(serde_json::json!({
370 "language": format!("{:?}", language),
371 "content_type": "source_code"
372 }));
373
374 node.add_chunk(chunk);
375 node.file_size = content.len();
376
377 Ok(node)
378 }
379}
380
381impl Default for ContentSearchManager {
382 fn default() -> Self {
383 Self::new()
384 }
385}
386
387#[derive(Debug, Clone)]
389pub struct SearchQueryBuilder {
390 query: SearchQuery,
391}
392
393impl SearchQueryBuilder {
394 pub fn new(query: impl Into<String>) -> Self {
396 Self {
397 query: SearchQuery {
398 query: query.into(),
399 ..Default::default()
400 },
401 }
402 }
403
404 pub fn content_types(mut self, types: Vec<ContentType>) -> Self {
406 self.query.content_types = types;
407 self
408 }
409
410 pub fn include_files(mut self, patterns: Vec<String>) -> Self {
412 self.query.file_patterns = patterns;
413 self
414 }
415
416 pub fn exclude_files(mut self, patterns: Vec<String>) -> Self {
418 self.query.exclude_patterns = patterns;
419 self
420 }
421
422 pub fn max_results(mut self, max: usize) -> Self {
424 self.query.max_results = max;
425 self
426 }
427
428 pub fn case_sensitive(mut self) -> Self {
430 self.query.case_sensitive = true;
431 self
432 }
433
434 pub fn use_regex(mut self) -> Self {
436 self.query.use_regex = true;
437 self
438 }
439
440 pub fn with_context(mut self, lines: usize) -> Self {
442 self.query.include_context = true;
443 self.query.context_lines = lines;
444 self
445 }
446
447 pub fn without_context(mut self) -> Self {
449 self.query.include_context = false;
450 self
451 }
452
453 pub fn build(self) -> SearchQuery {
455 self.query
456 }
457}
458
459impl SearchQueryBuilder {
461 pub fn markdown_docs(query: impl Into<String>) -> Self {
463 Self::new(query).content_types(vec![ContentType::Documentation {
464 format: DocumentFormat::Markdown,
465 }])
466 }
467
468 pub fn js_comments(query: impl Into<String>) -> Self {
470 Self::new(query).content_types(vec![
471 ContentType::Comment {
472 language: Language::JavaScript,
473 context: CommentContext::Block,
474 },
475 ContentType::Comment {
476 language: Language::JavaScript,
477 context: CommentContext::Documentation,
478 },
479 ContentType::Comment {
480 language: Language::TypeScript,
481 context: CommentContext::Block,
482 },
483 ContentType::Comment {
484 language: Language::TypeScript,
485 context: CommentContext::Documentation,
486 },
487 ])
488 }
489
490 pub fn python_docs(query: impl Into<String>) -> Self {
492 Self::new(query).content_types(vec![
493 ContentType::Comment {
494 language: Language::Python,
495 context: CommentContext::Documentation,
496 },
497 ContentType::Comment {
498 language: Language::Python,
499 context: CommentContext::Inline,
500 },
501 ])
502 }
503
504 pub fn json_config(query: impl Into<String>) -> Self {
506 Self::new(query).content_types(vec![ContentType::Configuration {
507 format: ConfigFormat::Json,
508 }])
509 }
510
511 pub fn yaml_config(query: impl Into<String>) -> Self {
513 Self::new(query).content_types(vec![ContentType::Configuration {
514 format: ConfigFormat::Yaml,
515 }])
516 }
517}
518
519#[cfg(test)]
520mod tests {
521 use super::*;
522 use std::path::Path;
523
524 #[test]
525 fn test_content_search_manager_creation() {
526 let manager = ContentSearchManager::new();
527 assert!(manager.graph_store.is_none());
528
529 let manager_default = ContentSearchManager::default();
531 assert!(manager_default.graph_store.is_none());
532 }
533
534 #[test]
535 fn test_with_graph_store() {
536 let graph_store = Arc::new(GraphStore::new());
537 let manager = ContentSearchManager::with_graph_store(graph_store.clone());
538 assert!(manager.graph_store.is_some());
539 }
540
541 #[test]
542 fn test_language_detection() {
543 let manager = ContentSearchManager::new();
544
545 assert_eq!(
547 manager.detect_language(Path::new("test.js")),
548 Some(Language::JavaScript)
549 );
550 assert_eq!(
551 manager.detect_language(Path::new("test.py")),
552 Some(Language::Python)
553 );
554 assert_eq!(
555 manager.detect_language(Path::new("test.rs")),
556 Some(Language::Rust)
557 );
558 assert_eq!(
559 manager.detect_language(Path::new("test.java")),
560 Some(Language::Java)
561 );
562 assert_eq!(
563 manager.detect_language(Path::new("test.ts")),
564 Some(Language::TypeScript)
565 );
566
567 assert_eq!(manager.detect_language(Path::new("test.unknown")), None);
569 assert_eq!(manager.detect_language(Path::new("README")), None);
570 }
571
572 #[test]
573 fn test_is_source_code_language() {
574 let manager = ContentSearchManager::new();
575
576 assert!(manager.is_source_code_language(Language::JavaScript));
577 assert!(manager.is_source_code_language(Language::Python));
578 assert!(manager.is_source_code_language(Language::Rust));
579 assert!(!manager.is_source_code_language(Language::Unknown));
580 }
581
582 #[test]
583 fn test_index_markdown_file() {
584 let manager = ContentSearchManager::new();
585 let file_path = Path::new("test.md");
586 let content = "# Title\n\nThis is a test document.";
587
588 let result = manager.index_file(file_path, content);
589 assert!(result.is_ok());
590
591 let node = manager.get_node(file_path);
593 assert!(node.is_some());
594 let node = node.unwrap();
595 assert_eq!(node.file_path, file_path);
596 assert!(!node.chunks.is_empty());
597 }
598
599 #[test]
600 fn test_index_javascript_file() {
601 let manager = ContentSearchManager::new();
602 let file_path = Path::new("test.js");
603 let content = "// Comment\nfunction test() { return 42; }";
604
605 let result = manager.index_file(file_path, content);
606 assert!(result.is_ok());
607
608 let node = manager.get_node(file_path);
610 assert!(node.is_some());
611 let node = node.unwrap();
612 assert_eq!(node.file_path, file_path);
613 assert!(!node.chunks.is_empty());
614
615 assert_eq!(node.chunks.len(), 1);
617 if let ContentType::Code { language } = &node.chunks[0].content_type {
618 assert_eq!(*language, Language::JavaScript);
619 } else {
620 panic!("Expected code content type");
621 }
622 }
623
624 #[test]
625 fn test_simple_search() {
626 let manager = ContentSearchManager::new();
627
628 let _ = manager.index_file(
630 Path::new("test1.md"),
631 "# Hello World\n\nThis is a test document about programming.",
632 );
633 let _ = manager.index_file(
634 Path::new("test2.md"),
635 "# Testing\n\nAnother document for testing purposes.",
636 );
637
638 let results = manager.simple_search("test", Some(10)).unwrap();
640 assert!(!results.is_empty());
641
642 let results = manager.simple_search("test", Some(1)).unwrap();
644 assert!(results.len() <= 1);
645
646 let results = manager.simple_search("nonexistent", Some(10)).unwrap();
648 assert!(results.is_empty());
649 }
650
651 #[test]
652 fn test_search_documentation() {
653 let manager = ContentSearchManager::new();
654
655 let _ = manager.index_file(
657 Path::new("doc.md"),
658 "# API Documentation\n\nThis describes the API.",
659 );
660 let _ = manager.index_file(Path::new("readme.txt"), "README file with API information.");
661 let _ = manager.index_file(
662 Path::new("code.js"),
663 "// This is not documentation\nfunction api() {}",
664 );
665
666 let results = manager.search_documentation("API", Some(10)).unwrap();
667
668 assert!(!results.is_empty());
670 for result in &results {
671 match &result.chunk.content_type {
672 ContentType::Documentation { .. } => {} _ => panic!("Found non-documentation content in documentation search"),
674 }
675 }
676 }
677
678 #[test]
679 fn test_search_configuration() {
680 let manager = ContentSearchManager::new();
681
682 let _ = manager.index_file(Path::new("config.json"), r#"{"database": "localhost"}"#);
684 let _ = manager.index_file(Path::new("settings.yaml"), "database:\n host: localhost");
685 let _ = manager.index_file(Path::new("readme.md"), "Database configuration info");
686
687 let results = manager.search_configuration("database", Some(10)).unwrap();
688
689 assert!(!results.is_empty());
691 for result in &results {
692 match &result.chunk.content_type {
693 ContentType::Configuration { .. } => {} _ => panic!("Found non-configuration content in configuration search"),
695 }
696 }
697 }
698
699 #[test]
700 fn test_regex_search() {
701 let manager = ContentSearchManager::new();
702
703 let _ = manager.index_file(
705 Path::new("test.md"),
706 "Email: user@example.com\nAnother: admin@test.org",
707 );
708
709 let results = manager.regex_search(r"\b\w+@\w+\.\w+\b", Some(10)).unwrap();
711 assert!(!results.is_empty());
712
713 let invalid_result = manager.regex_search("[invalid", Some(10));
715 assert!(invalid_result.is_err());
716 }
717
718 #[test]
719 fn test_search_in_files() {
720 let manager = ContentSearchManager::new();
721
722 let _ = manager.index_file(Path::new("test.md"), "markdown content");
724 let _ = manager.index_file(Path::new("test.txt"), "text content");
725 let _ = manager.index_file(Path::new("config.json"), r#"{"content": "json"}"#);
726
727 let results = manager
729 .search_in_files("content", vec!["*.md".to_string()], Some(10))
730 .unwrap();
731 assert!(!results.is_empty());
732 }
733
734 #[test]
735 fn test_file_removal() {
736 let manager = ContentSearchManager::new();
737 let file_path = Path::new("temp.md");
738
739 let _ = manager.index_file(file_path, "# Temporary\n\nThis will be removed.");
741 assert!(manager.get_node(file_path).is_some());
742
743 let result = manager.remove_file(file_path);
745 assert!(result.is_ok());
746 assert!(manager.get_node(file_path).is_none());
747 }
748
749 #[test]
750 fn test_clear() {
751 let manager = ContentSearchManager::new();
752
753 let _ = manager.index_file(Path::new("test1.md"), "Content 1");
755 let _ = manager.index_file(Path::new("test2.md"), "Content 2");
756
757 assert!(manager.get_node(Path::new("test1.md")).is_some());
759 assert!(manager.get_node(Path::new("test2.md")).is_some());
760
761 manager.clear();
763
764 assert!(manager.get_node(Path::new("test1.md")).is_none());
766 assert!(manager.get_node(Path::new("test2.md")).is_none());
767 }
768
769 #[test]
770 fn test_get_stats() {
771 let manager = ContentSearchManager::new();
772
773 let stats = manager.get_stats();
775 assert_eq!(stats.total_files, 0);
776 assert_eq!(stats.total_chunks, 0);
777
778 let _ = manager.index_file(Path::new("test.md"), "# Title\n\nContent");
780
781 let stats = manager.get_stats();
783 assert!(stats.total_files > 0);
784 assert!(stats.total_chunks > 0);
785 }
786
787 #[test]
788 fn test_find_files() {
789 let manager = ContentSearchManager::new();
790
791 let _ = manager.index_file(Path::new("test_one.md"), "Content 1");
793 let _ = manager.index_file(Path::new("test_two.md"), "Content 2");
794 let _ = manager.index_file(Path::new("other.txt"), "Content 3");
795
796 let md_files = manager.find_files(r"\.md$").unwrap();
798 assert_eq!(md_files.len(), 2);
799
800 let test_files = manager.find_files(r"test_").unwrap();
802 assert_eq!(test_files.len(), 2);
803 }
804
805 #[test]
806 fn test_supported_comment_languages() {
807 let manager = ContentSearchManager::new();
808
809 let supported = manager.supported_comment_languages();
810 assert!(supported.contains(&Language::JavaScript));
811 assert!(supported.contains(&Language::Python));
812
813 assert!(manager.supports_comment_extraction(Language::JavaScript));
814 assert!(manager.supports_comment_extraction(Language::Python));
815 assert!(!manager.supports_comment_extraction(Language::Unknown));
816 }
817
818 #[test]
819 fn test_search_query_builder() {
820 let query = SearchQueryBuilder::new("test query")
822 .max_results(10)
823 .case_sensitive()
824 .build();
825
826 assert_eq!(query.query, "test query");
827 assert_eq!(query.max_results, 10);
828 assert!(query.case_sensitive);
829
830 let query = SearchQueryBuilder::new("search")
832 .content_types(vec![ContentType::Documentation {
833 format: DocumentFormat::Markdown,
834 }])
835 .build();
836
837 assert_eq!(query.content_types.len(), 1);
838
839 let query = SearchQueryBuilder::new("search")
841 .include_files(vec!["*.md".to_string()])
842 .exclude_files(vec!["*.tmp".to_string()])
843 .build();
844
845 assert_eq!(query.file_patterns.len(), 1);
846 assert_eq!(query.exclude_patterns.len(), 1);
847
848 let query = SearchQueryBuilder::new("pattern")
850 .use_regex()
851 .with_context(3)
852 .build();
853
854 assert!(query.use_regex);
855 assert!(query.include_context);
856 assert_eq!(query.context_lines, 3);
857
858 let query = SearchQueryBuilder::new("pattern").without_context().build();
860
861 assert!(!query.include_context);
862 }
863
864 #[test]
865 fn test_search_query_builder_convenience_methods() {
866 let query = SearchQueryBuilder::markdown_docs("test").build();
868 assert_eq!(query.content_types.len(), 1);
869 match &query.content_types[0] {
870 ContentType::Documentation {
871 format: DocumentFormat::Markdown,
872 } => {}
873 _ => panic!("Expected markdown documentation type"),
874 }
875
876 let query = SearchQueryBuilder::js_comments("test").build();
878 assert_eq!(query.content_types.len(), 4); let query = SearchQueryBuilder::python_docs("test").build();
882 assert_eq!(query.content_types.len(), 2); let query = SearchQueryBuilder::json_config("test").build();
886 assert_eq!(query.content_types.len(), 1);
887 match &query.content_types[0] {
888 ContentType::Configuration {
889 format: ConfigFormat::Json,
890 } => {}
891 _ => panic!("Expected JSON configuration type"),
892 }
893
894 let query = SearchQueryBuilder::yaml_config("test").build();
896 assert_eq!(query.content_types.len(), 1);
897 match &query.content_types[0] {
898 ContentType::Configuration {
899 format: ConfigFormat::Yaml,
900 } => {}
901 _ => panic!("Expected YAML configuration type"),
902 }
903 }
904}