1use super::{
7 ChunkId, ContentChunk, ContentNode, ContentStats, ContentType, ContentUpdate,
8 ContentUpdateKind, SearchMatch, SearchQuery, SearchResult,
9};
10
11use anyhow::Result;
12use dashmap::DashMap;
13use regex::Regex;
14use std::collections::HashSet;
15use std::path::{Path, PathBuf};
16use std::sync::{Arc, RwLock};
17use std::time::SystemTime;
18
19pub struct ContentIndex {
21 nodes: DashMap<PathBuf, ContentNode>,
23 chunks: DashMap<ChunkId, ContentChunk>,
25 token_index: DashMap<String, HashSet<ChunkId>>,
27 file_index: DashMap<String, HashSet<PathBuf>>,
29 type_index: DashMap<String, HashSet<ChunkId>>,
31 stats_cache: Arc<RwLock<Option<ContentStats>>>,
33 update_listeners: Arc<RwLock<Vec<Box<dyn ContentUpdateListener>>>>,
35}
36
37impl ContentIndex {
38 pub fn new() -> Self {
40 Self {
41 nodes: DashMap::new(),
42 chunks: DashMap::new(),
43 token_index: DashMap::new(),
44 file_index: DashMap::new(),
45 type_index: DashMap::new(),
46 stats_cache: Arc::new(RwLock::new(None)),
47 update_listeners: Arc::new(RwLock::new(Vec::new())),
48 }
49 }
50
51 pub fn add_node(&self, node: ContentNode) -> Result<()> {
53 let file_path = node.file_path.clone();
54
55 if let Some(old_node) = self.nodes.get(&file_path) {
57 for chunk in &old_node.chunks {
58 self.remove_chunk_from_indexes(&chunk.id);
59 }
60 }
61
62 for chunk in &node.chunks {
64 self.add_chunk_to_indexes(chunk.clone())?;
65 }
66
67 self.index_file_pattern(&file_path);
69
70 self.nodes.insert(file_path.clone(), node);
72
73 *self.stats_cache.write().unwrap() = None;
75
76 self.notify_update(ContentUpdate {
78 file_path,
79 update_kind: ContentUpdateKind::Modified,
80 timestamp: SystemTime::now(),
81 });
82
83 Ok(())
84 }
85
86 pub fn remove_node(&self, file_path: &Path) -> Result<()> {
88 if let Some((_, node)) = self.nodes.remove(file_path) {
89 for chunk in &node.chunks {
91 self.remove_chunk_from_indexes(&chunk.id);
92 }
93
94 self.remove_file_pattern(file_path);
96
97 *self.stats_cache.write().unwrap() = None;
99
100 self.notify_update(ContentUpdate {
102 file_path: file_path.to_path_buf(),
103 update_kind: ContentUpdateKind::Deleted,
104 timestamp: SystemTime::now(),
105 });
106 }
107
108 Ok(())
109 }
110
111 pub fn get_node(&self, file_path: &Path) -> Option<ContentNode> {
113 self.nodes.get(file_path).map(|entry| entry.value().clone())
114 }
115
116 pub fn get_chunk(&self, chunk_id: &ChunkId) -> Option<ContentChunk> {
118 self.chunks.get(chunk_id).map(|entry| entry.value().clone())
119 }
120
121 pub fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
123 let mut results = Vec::new();
124 let mut seen_chunks = HashSet::new();
125
126 let search_regex = if query.use_regex {
128 Some(Regex::new(&query.query)?)
129 } else {
130 None
131 };
132
133 let candidate_chunks = if query.use_regex {
135 self.search_by_regex(search_regex.as_ref().unwrap(), query)?
136 } else {
137 self.search_by_tokens(&query.query, query)?
138 };
139
140 for chunk_id in candidate_chunks {
142 if seen_chunks.contains(&chunk_id) {
143 continue;
144 }
145 seen_chunks.insert(chunk_id);
146
147 if let Some(chunk) = self.get_chunk(&chunk_id) {
148 if !query.content_types.is_empty()
150 && !self.matches_content_type(&chunk.content_type, &query.content_types)
151 {
152 continue;
153 }
154
155 if !self.matches_file_patterns(
157 &chunk.file_path,
158 &query.file_patterns,
159 &query.exclude_patterns,
160 )? {
161 continue;
162 }
163
164 let matches = self.find_matches_in_chunk(&chunk, query, &search_regex)?;
166 if !matches.is_empty() {
167 let score = self.calculate_relevance_score(&chunk, &matches, query);
168 results.push(SearchResult {
169 chunk: chunk.clone(),
170 score,
171 matches,
172 related_nodes: chunk.related_nodes.clone(),
173 });
174 }
175 }
176
177 if results.len() >= query.max_results {
178 break;
179 }
180 }
181
182 results.sort_by(|a, b| {
184 b.score
185 .partial_cmp(&a.score)
186 .unwrap_or(std::cmp::Ordering::Equal)
187 });
188
189 Ok(results)
190 }
191
192 pub fn find_files(&self, pattern: &str) -> Result<Vec<PathBuf>> {
194 let pattern_regex = Regex::new(pattern)?;
195 let mut matching_files = Vec::new();
196
197 for entry in self.nodes.iter() {
198 let file_path = entry.key();
199 if pattern_regex.is_match(&file_path.to_string_lossy()) {
200 matching_files.push(file_path.clone());
201 }
202 }
203
204 Ok(matching_files)
205 }
206
207 pub fn get_stats(&self) -> ContentStats {
209 if let Ok(cache) = self.stats_cache.read() {
211 if let Some(stats) = cache.as_ref() {
212 return stats.clone();
213 }
214 }
215
216 let stats = self.compute_stats();
218
219 if let Ok(mut cache) = self.stats_cache.write() {
221 *cache = Some(stats.clone());
222 }
223
224 stats
225 }
226
227 pub fn add_update_listener(&self, listener: Box<dyn ContentUpdateListener>) {
229 if let Ok(mut listeners) = self.update_listeners.write() {
230 listeners.push(listener);
231 }
232 }
233
234 pub fn clear(&self) {
236 self.nodes.clear();
237 self.chunks.clear();
238 self.token_index.clear();
239 self.file_index.clear();
240 self.type_index.clear();
241 *self.stats_cache.write().unwrap() = None;
242 }
243
244 fn add_chunk_to_indexes(&self, chunk: ContentChunk) -> Result<()> {
248 let chunk_id = chunk.id;
249
250 for token in &chunk.tokens {
252 self.token_index
253 .entry(token.clone())
254 .or_default()
255 .insert(chunk_id);
256 }
257
258 let type_key = self.content_type_to_string(&chunk.content_type);
260 self.type_index
261 .entry(type_key)
262 .or_default()
263 .insert(chunk_id);
264
265 self.chunks.insert(chunk_id, chunk);
267
268 Ok(())
269 }
270
271 fn remove_chunk_from_indexes(&self, chunk_id: &ChunkId) {
273 if let Some((_, chunk)) = self.chunks.remove(chunk_id) {
275 for token in &chunk.tokens {
277 if let Some(mut token_set) = self.token_index.get_mut(token) {
278 token_set.remove(chunk_id);
279 if token_set.is_empty() {
280 drop(token_set);
281 self.token_index.remove(token);
282 }
283 }
284 }
285
286 let type_key = self.content_type_to_string(&chunk.content_type);
288 if let Some(mut type_set) = self.type_index.get_mut(&type_key) {
289 type_set.remove(chunk_id);
290 if type_set.is_empty() {
291 drop(type_set);
292 self.type_index.remove(&type_key);
293 }
294 }
295 }
296 }
297
298 fn index_file_pattern(&self, file_path: &Path) {
300 let file_name = file_path
301 .file_name()
302 .and_then(|name| name.to_str())
303 .unwrap_or("");
304
305 let extension = file_path
306 .extension()
307 .and_then(|ext| ext.to_str())
308 .unwrap_or("");
309
310 self.file_index
312 .entry(file_name.to_lowercase())
313 .or_default()
314 .insert(file_path.to_path_buf());
315
316 if !extension.is_empty() {
318 self.file_index
319 .entry(format!("*.{}", extension.to_lowercase()))
320 .or_default()
321 .insert(file_path.to_path_buf());
322 }
323
324 for component in file_path.components() {
326 if let Some(component_str) = component.as_os_str().to_str() {
327 self.file_index
328 .entry(component_str.to_lowercase())
329 .or_default()
330 .insert(file_path.to_path_buf());
331 }
332 }
333 }
334
335 fn remove_file_pattern(&self, file_path: &Path) {
337 let file_name = file_path
338 .file_name()
339 .and_then(|name| name.to_str())
340 .unwrap_or("");
341
342 let extension = file_path
343 .extension()
344 .and_then(|ext| ext.to_str())
345 .unwrap_or("");
346
347 if let Some(mut file_set) = self.file_index.get_mut(&file_name.to_lowercase()) {
349 file_set.remove(file_path);
350 if file_set.is_empty() {
351 drop(file_set);
352 self.file_index.remove(&file_name.to_lowercase());
353 }
354 }
355
356 if !extension.is_empty() {
358 let ext_key = format!("*.{}", extension.to_lowercase());
359 if let Some(mut ext_set) = self.file_index.get_mut(&ext_key) {
360 ext_set.remove(file_path);
361 if ext_set.is_empty() {
362 drop(ext_set);
363 self.file_index.remove(&ext_key);
364 }
365 }
366 }
367 }
368
369 fn search_by_tokens(&self, query: &str, _search_query: &SearchQuery) -> Result<Vec<ChunkId>> {
371 let query_tokens: Vec<String> = query
372 .to_lowercase()
373 .split_whitespace()
374 .map(|s| s.to_string())
375 .collect();
376
377 if query_tokens.is_empty() {
378 return Ok(Vec::new());
379 }
380
381 let mut result_chunks: Option<HashSet<ChunkId>> = None;
382
383 for token in &query_tokens {
385 if let Some(chunk_set) = self.token_index.get(token) {
386 let chunk_ids: HashSet<ChunkId> = chunk_set.iter().copied().collect();
387 result_chunks = Some(match result_chunks {
388 None => chunk_ids,
389 Some(existing) => existing.intersection(&chunk_ids).copied().collect(),
390 });
391 } else {
392 return Ok(Vec::new());
394 }
395 }
396
397 Ok(result_chunks.unwrap_or_default().into_iter().collect())
398 }
399
400 fn search_by_regex(&self, regex: &Regex, search_query: &SearchQuery) -> Result<Vec<ChunkId>> {
402 let mut matching_chunks = Vec::new();
403
404 for entry in self.chunks.iter() {
405 let chunk = entry.value();
406 let content = if search_query.case_sensitive {
407 &chunk.content
408 } else {
409 &chunk.content.to_lowercase()
410 };
411
412 if regex.is_match(content) {
413 matching_chunks.push(chunk.id);
414 }
415 }
416
417 Ok(matching_chunks)
418 }
419
420 fn find_matches_in_chunk(
422 &self,
423 chunk: &ContentChunk,
424 query: &SearchQuery,
425 regex: &Option<Regex>,
426 ) -> Result<Vec<SearchMatch>> {
427 let mut matches = Vec::new();
428 let content = if query.case_sensitive {
429 chunk.content.clone()
430 } else {
431 chunk.content.to_lowercase()
432 };
433
434 let search_term = if query.case_sensitive {
435 query.query.clone()
436 } else {
437 query.query.to_lowercase()
438 };
439
440 if let Some(regex) = regex {
441 for regex_match in regex.find_iter(&content) {
443 let line_info = self.calculate_line_info(&content, regex_match.start());
444 let search_match = SearchMatch {
445 text: regex_match.as_str().to_string(),
446 position: regex_match.start(),
447 line_number: line_info.0,
448 column_number: line_info.1,
449 context_before: if query.include_context {
450 self.get_context_before(&content, regex_match.start(), query.context_lines)
451 } else {
452 None
453 },
454 context_after: if query.include_context {
455 self.get_context_after(&content, regex_match.end(), query.context_lines)
456 } else {
457 None
458 },
459 };
460 matches.push(search_match);
461 }
462 } else {
463 let mut start = 0;
465 while let Some(pos) = content[start..].find(&search_term) {
466 let absolute_pos = start + pos;
467 let line_info = self.calculate_line_info(&content, absolute_pos);
468 let search_match = SearchMatch {
469 text: search_term.clone(),
470 position: absolute_pos,
471 line_number: line_info.0,
472 column_number: line_info.1,
473 context_before: if query.include_context {
474 self.get_context_before(&content, absolute_pos, query.context_lines)
475 } else {
476 None
477 },
478 context_after: if query.include_context {
479 self.get_context_after(
480 &content,
481 absolute_pos + search_term.len(),
482 query.context_lines,
483 )
484 } else {
485 None
486 },
487 };
488 matches.push(search_match);
489 start = absolute_pos + 1;
490 }
491 }
492
493 Ok(matches)
494 }
495
496 fn calculate_line_info(&self, content: &str, position: usize) -> (usize, usize) {
498 let before_position = &content[..position.min(content.len())];
499 let line_number = before_position.lines().count();
500 let column_number = before_position
501 .lines()
502 .last()
503 .map(|line| line.len() + 1)
504 .unwrap_or(1);
505 (line_number, column_number)
506 }
507
508 fn get_context_before(
510 &self,
511 content: &str,
512 position: usize,
513 context_lines: usize,
514 ) -> Option<String> {
515 if context_lines == 0 {
516 return None;
517 }
518
519 let lines: Vec<&str> = content.lines().collect();
520 let (line_number, _) = self.calculate_line_info(content, position);
521
522 if line_number == 0 {
523 return None;
524 }
525
526 let start_line = line_number.saturating_sub(context_lines + 1);
527 let end_line = line_number.saturating_sub(1);
528
529 if start_line >= lines.len() || end_line >= lines.len() || start_line > end_line {
530 return None;
531 }
532
533 Some(lines[start_line..=end_line].join("\n"))
534 }
535
536 fn get_context_after(
538 &self,
539 content: &str,
540 position: usize,
541 context_lines: usize,
542 ) -> Option<String> {
543 if context_lines == 0 {
544 return None;
545 }
546
547 let lines: Vec<&str> = content.lines().collect();
548 let (line_number, _) = self.calculate_line_info(content, position);
549
550 let start_line = line_number;
551 let end_line = (start_line + context_lines).min(lines.len().saturating_sub(1));
552
553 if start_line >= lines.len() || start_line > end_line {
554 return None;
555 }
556
557 Some(lines[start_line..=end_line].join("\n"))
558 }
559
560 fn calculate_relevance_score(
562 &self,
563 chunk: &ContentChunk,
564 matches: &[SearchMatch],
565 _query: &SearchQuery,
566 ) -> f32 {
567 if matches.is_empty() {
568 return 0.0;
569 }
570
571 let type_score = match &chunk.content_type {
573 ContentType::Documentation { .. } => 0.8,
574 ContentType::Comment { context, .. } => match context {
575 super::CommentContext::Documentation => 0.7,
576 super::CommentContext::Function { .. } => 0.6,
577 super::CommentContext::Class { .. } => 0.6,
578 _ => 0.4,
579 },
580 ContentType::Code { .. } => 0.5,
581 ContentType::Configuration { .. } => 0.4,
582 ContentType::PlainText => 0.2,
583 };
584
585 let match_bonus = matches.len() as f32 * 0.1;
587
588 (type_score + match_bonus).min(1.0)
590 }
591
592 fn matches_content_type(
594 &self,
595 content_type: &ContentType,
596 allowed_types: &[ContentType],
597 ) -> bool {
598 allowed_types
599 .iter()
600 .any(|allowed| std::mem::discriminant(content_type) == std::mem::discriminant(allowed))
601 }
602
603 fn matches_file_patterns(
605 &self,
606 file_path: &Path,
607 include_patterns: &[String],
608 exclude_patterns: &[String],
609 ) -> Result<bool> {
610 let path_str = file_path.to_string_lossy();
611
612 for pattern in exclude_patterns {
614 let regex_pattern = self.glob_to_regex(pattern);
615 let regex = Regex::new(®ex_pattern)?;
616 if regex.is_match(&path_str) {
617 return Ok(false);
618 }
619 }
620
621 if include_patterns.is_empty() {
623 return Ok(true);
624 }
625
626 for pattern in include_patterns {
628 let regex_pattern = self.glob_to_regex(pattern);
629 let regex = Regex::new(®ex_pattern)?;
630 if regex.is_match(&path_str) {
631 return Ok(true);
632 }
633 }
634
635 Ok(false)
636 }
637
638 fn glob_to_regex(&self, glob: &str) -> String {
640 let mut regex = String::new();
641 regex.push('^');
642
643 for ch in glob.chars() {
644 match ch {
645 '*' => regex.push_str(".*"),
646 '?' => regex.push('.'),
647 '.' => regex.push_str("\\."),
648 '+' => regex.push_str("\\+"),
649 '^' => regex.push_str("\\^"),
650 '$' => regex.push_str("\\$"),
651 '(' => regex.push_str("\\("),
652 ')' => regex.push_str("\\)"),
653 '[' => regex.push_str("\\["),
654 ']' => regex.push_str("\\]"),
655 '{' => regex.push_str("\\{"),
656 '}' => regex.push_str("\\}"),
657 '|' => regex.push_str("\\|"),
658 '\\' => regex.push_str("\\\\"),
659 c => regex.push(c),
660 }
661 }
662
663 regex.push('$');
664 regex
665 }
666
667 fn content_type_to_string(&self, content_type: &ContentType) -> String {
669 match content_type {
670 ContentType::Code { language } => format!("code:{:?}", language),
671 ContentType::Documentation { format } => format!("doc:{:?}", format),
672 ContentType::Configuration { format } => format!("config:{:?}", format),
673 ContentType::Comment { language, context } => {
674 format!("comment:{:?}:{:?}", language, context)
675 }
676 ContentType::PlainText => "text".to_string(),
677 }
678 }
679
680 fn compute_stats(&self) -> ContentStats {
682 let mut stats = ContentStats::new();
683
684 stats.total_files = self.nodes.len();
685 stats.total_chunks = self.chunks.len();
686
687 stats.total_tokens = self.token_index.len();
689
690 for entry in self.type_index.iter() {
692 let type_name = entry.key().clone();
693 let chunk_count = entry.value().len();
694 stats.content_by_type.insert(type_name, chunk_count);
695 }
696
697 for entry in self.nodes.iter() {
699 let node = entry.value();
700 let size_bucket = match node.file_size {
701 0..=1024 => "small (0-1KB)",
702 1025..=10240 => "medium (1-10KB)",
703 10241..=102400 => "large (10-100KB)",
704 _ => "very_large (>100KB)",
705 };
706 *stats
707 .size_distribution
708 .entry(size_bucket.to_string())
709 .or_insert(0) += 1;
710 }
711
712 stats.computed_at = SystemTime::now();
713 stats
714 }
715
716 fn notify_update(&self, update: ContentUpdate) {
718 if let Ok(listeners) = self.update_listeners.read() {
719 for listener in listeners.iter() {
720 listener.on_content_update(&update);
721 }
722 }
723 }
724}
725
726impl Default for ContentIndex {
727 fn default() -> Self {
728 Self::new()
729 }
730}
731
732pub trait ContentUpdateListener: Send + Sync {
734 fn on_content_update(&self, update: &ContentUpdate);
736}
737
738pub struct LoggingUpdateListener;
740
741impl ContentUpdateListener for LoggingUpdateListener {
742 fn on_content_update(&self, update: &ContentUpdate) {
743 eprintln!(
744 "Content updated: {:?} at {:?}",
745 update.file_path, update.timestamp
746 );
747 }
748}
749
750#[cfg(test)]
751mod tests {
752 use super::*;
753 use crate::ast::Span;
754 use crate::content::ChunkId;
755 use crate::{ConfigFormat, DocumentFormat};
756 use std::path::Path;
757
758 fn create_test_chunk(
759 file_path: &Path,
760 content: &str,
761 content_type: ContentType,
762 chunk_index: usize,
763 ) -> ContentChunk {
764 let span = Span::new(0, content.len(), 1, 1, 1, content.len());
765 ContentChunk::new(
766 file_path.to_path_buf(),
767 content_type,
768 content.to_string(),
769 span,
770 chunk_index,
771 )
772 }
773
774 fn create_test_node(file_path: &Path, chunks: Vec<ContentChunk>) -> ContentNode {
775 let mut node = ContentNode::new(file_path.to_path_buf(), chunks[0].content_type.clone());
776 for chunk in chunks {
777 node.add_chunk(chunk);
778 }
779 node.file_size = 1000; node
781 }
782
783 #[test]
784 fn test_content_index_creation() {
785 let index = ContentIndex::new();
786
787 let _index_default = ContentIndex::default();
789
790 let stats = index.get_stats();
792 assert_eq!(stats.total_files, 0);
793 assert_eq!(stats.total_chunks, 0);
794 }
795
796 #[test]
797 fn test_add_and_get_node() {
798 let index = ContentIndex::new();
799 let file_path = Path::new("test.md");
800
801 let chunk = create_test_chunk(
803 file_path,
804 "# Test Document\n\nThis is a test.",
805 ContentType::Documentation {
806 format: DocumentFormat::Markdown,
807 },
808 0,
809 );
810 let node = create_test_node(file_path, vec![chunk]);
811
812 let result = index.add_node(node.clone());
814 assert!(result.is_ok());
815
816 let retrieved_node = index.get_node(file_path);
818 assert!(retrieved_node.is_some());
819 let retrieved_node = retrieved_node.unwrap();
820 assert_eq!(retrieved_node.file_path, file_path);
821 assert_eq!(retrieved_node.chunks.len(), 1);
822 }
823
824 #[test]
825 fn test_add_node_replaces_existing() {
826 let index = ContentIndex::new();
827 let file_path = Path::new("test.md");
828
829 let chunk1 = create_test_chunk(
831 file_path,
832 "Original content",
833 ContentType::Documentation {
834 format: DocumentFormat::Markdown,
835 },
836 0,
837 );
838 let node1 = create_test_node(file_path, vec![chunk1]);
839 let _ = index.add_node(node1);
840
841 let chunk2 = create_test_chunk(
843 file_path,
844 "Updated content",
845 ContentType::Documentation {
846 format: DocumentFormat::Markdown,
847 },
848 1,
849 );
850 let node2 = create_test_node(file_path, vec![chunk2]);
851 let _ = index.add_node(node2);
852
853 let retrieved_node = index.get_node(file_path).unwrap();
855 assert_eq!(retrieved_node.chunks[0].content, "Updated content");
856 }
857
858 #[test]
859 fn test_remove_node() {
860 let index = ContentIndex::new();
861 let file_path = Path::new("test.md");
862
863 let chunk = create_test_chunk(
865 file_path,
866 "Test content",
867 ContentType::Documentation {
868 format: DocumentFormat::Markdown,
869 },
870 0,
871 );
872 let node = create_test_node(file_path, vec![chunk]);
873 let _ = index.add_node(node);
874
875 assert!(index.get_node(file_path).is_some());
877
878 let result = index.remove_node(file_path);
880 assert!(result.is_ok());
881
882 assert!(index.get_node(file_path).is_none());
884 }
885
886 #[test]
887 fn test_get_chunk() {
888 let index = ContentIndex::new();
889 let file_path = Path::new("test.md");
890
891 let chunk = create_test_chunk(
892 file_path,
893 "Test content",
894 ContentType::Documentation {
895 format: DocumentFormat::Markdown,
896 },
897 42,
898 );
899 let chunk_id = chunk.id;
900 let node = create_test_node(file_path, vec![chunk]);
901
902 let _ = index.add_node(node);
903
904 let retrieved_chunk = index.get_chunk(&chunk_id);
906 assert!(retrieved_chunk.is_some());
907 assert_eq!(retrieved_chunk.unwrap().content, "Test content");
908
909 let fake_chunk_id = ChunkId::new(Path::new("nonexistent.md"), 9999, &[0u8; 32]);
911 let non_existent = index.get_chunk(&fake_chunk_id);
912 assert!(non_existent.is_none());
913 }
914
915 #[test]
916 fn test_simple_text_search() {
917 let index = ContentIndex::new();
918
919 let file1 = Path::new("doc1.md");
921 let chunk1 = create_test_chunk(
922 file1,
923 "This is a test document about programming",
924 ContentType::Documentation {
925 format: DocumentFormat::Markdown,
926 },
927 1,
928 );
929 let node1 = create_test_node(file1, vec![chunk1]);
930 let _ = index.add_node(node1);
931
932 let file2 = Path::new("doc2.md");
933 let chunk2 = create_test_chunk(
934 file2,
935 "Another document for testing purposes",
936 ContentType::Documentation {
937 format: DocumentFormat::Markdown,
938 },
939 2,
940 );
941 let node2 = create_test_node(file2, vec![chunk2]);
942 let _ = index.add_node(node2);
943
944 let search_query = SearchQuery {
946 query: "document".to_string(),
947 max_results: 10,
948 ..Default::default()
949 };
950
951 let results = index.search(&search_query).unwrap();
952 assert!(!results.is_empty());
953
954 let result_contents: Vec<_> = results.iter().map(|r| &r.chunk.content).collect();
956 assert!(result_contents
957 .iter()
958 .any(|content| content.contains("programming")));
959 assert!(result_contents
960 .iter()
961 .any(|content| content.contains("testing")));
962 }
963
964 #[test]
965 fn test_regex_search() {
966 let index = ContentIndex::new();
967
968 let file_path = Path::new("contacts.md");
970 let chunk = create_test_chunk(
971 file_path,
972 "Contact John at john@example.com or Mary at mary@test.org",
973 ContentType::Documentation {
974 format: DocumentFormat::Markdown,
975 },
976 1,
977 );
978 let node = create_test_node(file_path, vec![chunk]);
979 let _ = index.add_node(node);
980
981 let search_query = SearchQuery {
983 query: r"\b\w+@\w+\.\w+\b".to_string(),
984 use_regex: true,
985 max_results: 10,
986 ..Default::default()
987 };
988
989 let results = index.search(&search_query).unwrap();
990 assert!(!results.is_empty());
991
992 let result = &results[0];
994 assert!(!result.matches.is_empty());
995 }
996
997 #[test]
998 fn test_search_with_content_type_filter() {
999 let index = ContentIndex::new();
1000
1001 let md_file = Path::new("doc.md");
1003 let md_chunk = create_test_chunk(
1004 md_file,
1005 "Documentation content",
1006 ContentType::Documentation {
1007 format: DocumentFormat::Markdown,
1008 },
1009 1,
1010 );
1011 let md_node = create_test_node(md_file, vec![md_chunk]);
1012 let _ = index.add_node(md_node);
1013
1014 let json_file = Path::new("config.json");
1015 let json_chunk = create_test_chunk(
1016 json_file,
1017 r#"{"config": "content"}"#,
1018 ContentType::Configuration {
1019 format: ConfigFormat::Json,
1020 },
1021 2,
1022 );
1023 let json_node = create_test_node(json_file, vec![json_chunk]);
1024 let _ = index.add_node(json_node);
1025
1026 let search_query = SearchQuery {
1028 query: "content".to_string(),
1029 content_types: vec![ContentType::Documentation {
1030 format: DocumentFormat::Markdown,
1031 }],
1032 max_results: 10,
1033 ..Default::default()
1034 };
1035
1036 let results = index.search(&search_query).unwrap();
1037 assert_eq!(results.len(), 1);
1038 assert!(results[0].chunk.content.contains("Documentation"));
1039 }
1040
1041 #[test]
1042 fn test_search_with_file_patterns() {
1043 let index = ContentIndex::new();
1044
1045 let md_file = Path::new("test.md");
1047 let md_chunk = create_test_chunk(
1048 md_file,
1049 "Markdown content",
1050 ContentType::Documentation {
1051 format: DocumentFormat::Markdown,
1052 },
1053 1,
1054 );
1055 let md_node = create_test_node(md_file, vec![md_chunk]);
1056 let _ = index.add_node(md_node);
1057
1058 let txt_file = Path::new("test.txt");
1059 let txt_chunk = create_test_chunk(
1060 txt_file,
1061 "Text content",
1062 ContentType::Documentation {
1063 format: DocumentFormat::PlainText,
1064 },
1065 2,
1066 );
1067 let txt_node = create_test_node(txt_file, vec![txt_chunk]);
1068 let _ = index.add_node(txt_node);
1069
1070 let search_query = SearchQuery {
1072 query: "content".to_string(),
1073 file_patterns: vec!["*.md".to_string()],
1074 max_results: 10,
1075 ..Default::default()
1076 };
1077
1078 let results = index.search(&search_query).unwrap();
1079 assert_eq!(results.len(), 1);
1080 assert!(results[0].chunk.content.contains("Markdown"));
1081 }
1082
1083 #[test]
1084 fn test_search_with_exclude_patterns() {
1085 let index = ContentIndex::new();
1086
1087 let md_file = Path::new("test.md");
1089 let md_chunk = create_test_chunk(
1090 md_file,
1091 "Markdown content",
1092 ContentType::Documentation {
1093 format: DocumentFormat::Markdown,
1094 },
1095 1,
1096 );
1097 let md_node = create_test_node(md_file, vec![md_chunk]);
1098 let _ = index.add_node(md_node);
1099
1100 let tmp_file = Path::new("temp.tmp");
1101 let tmp_chunk = create_test_chunk(
1102 tmp_file,
1103 "Temporary content",
1104 ContentType::Documentation {
1105 format: DocumentFormat::PlainText,
1106 },
1107 2,
1108 );
1109 let tmp_node = create_test_node(tmp_file, vec![tmp_chunk]);
1110 let _ = index.add_node(tmp_node);
1111
1112 let search_query = SearchQuery {
1114 query: "content".to_string(),
1115 exclude_patterns: vec!["*.tmp".to_string()],
1116 max_results: 10,
1117 ..Default::default()
1118 };
1119
1120 let results = index.search(&search_query).unwrap();
1121 assert_eq!(results.len(), 1);
1122 assert!(results[0].chunk.content.contains("Markdown"));
1123 }
1124
1125 #[test]
1126 fn test_search_with_context() {
1127 let index = ContentIndex::new();
1128
1129 let file_path = Path::new("test.md");
1130 let content = "Line 1\nLine 2 with target\nLine 3\nLine 4";
1131 let chunk = create_test_chunk(
1132 file_path,
1133 content,
1134 ContentType::Documentation {
1135 format: DocumentFormat::Markdown,
1136 },
1137 1,
1138 );
1139 let node = create_test_node(file_path, vec![chunk]);
1140 let _ = index.add_node(node);
1141
1142 let search_query = SearchQuery {
1144 query: "target".to_string(),
1145 include_context: true,
1146 context_lines: 1,
1147 max_results: 10,
1148 ..Default::default()
1149 };
1150
1151 let results = index.search(&search_query).unwrap();
1152 assert!(!results.is_empty());
1153
1154 let result = &results[0];
1155 assert!(!result.matches.is_empty());
1156
1157 let search_match = &result.matches[0];
1159 assert!(search_match.context_before.is_some());
1160 assert!(search_match.context_after.is_some());
1161 }
1162
1163 #[test]
1164 fn test_search_case_sensitive() {
1165 let index = ContentIndex::new();
1166
1167 let file_path = Path::new("test.md");
1168 let chunk = create_test_chunk(
1169 file_path,
1170 "Test with UPPERCASE and lowercase",
1171 ContentType::Documentation {
1172 format: DocumentFormat::Markdown,
1173 },
1174 1,
1175 );
1176 let node = create_test_node(file_path, vec![chunk]);
1177 let _ = index.add_node(node);
1178
1179 let search_query = SearchQuery {
1181 query: "UPPERCASE".to_string(),
1182 case_sensitive: true,
1183 max_results: 10,
1184 ..Default::default()
1185 };
1186
1187 let results = index.search(&search_query).unwrap();
1188 assert!(!results.is_empty());
1189
1190 let search_query_lower = SearchQuery {
1192 query: "uppercase".to_string(),
1193 case_sensitive: true,
1194 max_results: 10,
1195 ..Default::default()
1196 };
1197
1198 let results_lower = index.search(&search_query_lower).unwrap();
1199 assert!(results_lower.is_empty());
1200 }
1201
1202 #[test]
1203 fn test_search_max_results() {
1204 let index = ContentIndex::new();
1205
1206 for i in 0..10 {
1208 let file_path = PathBuf::from(format!("doc{}.md", i));
1209 let chunk = create_test_chunk(
1210 &file_path,
1211 &format!("Document {} contains the search term", i),
1212 ContentType::Documentation {
1213 format: DocumentFormat::Markdown,
1214 },
1215 i,
1216 );
1217 let node = create_test_node(&file_path, vec![chunk]);
1218 let _ = index.add_node(node);
1219 }
1220
1221 let search_query = SearchQuery {
1223 query: "search".to_string(),
1224 max_results: 3,
1225 ..Default::default()
1226 };
1227
1228 let results = index.search(&search_query).unwrap();
1229 assert_eq!(results.len(), 3);
1230 }
1231
1232 #[test]
1233 fn test_find_files() {
1234 let index = ContentIndex::new();
1235
1236 let files = ["test_one.md", "test_two.md", "other.txt", "config.json"];
1238 for (i, file_name) in files.iter().enumerate() {
1239 let file_path = Path::new(file_name);
1240 let chunk = create_test_chunk(
1241 file_path,
1242 &format!("Content {}", i),
1243 ContentType::Documentation {
1244 format: DocumentFormat::Markdown,
1245 },
1246 i,
1247 );
1248 let node = create_test_node(file_path, vec![chunk]);
1249 let _ = index.add_node(node);
1250 }
1251
1252 let md_files = index.find_files(r"\.md$").unwrap();
1254 assert_eq!(md_files.len(), 2);
1255
1256 let test_files = index.find_files(r"test_").unwrap();
1258 assert_eq!(test_files.len(), 2);
1259
1260 let all_files = index.find_files(r".*").unwrap();
1262 assert_eq!(all_files.len(), 4);
1263 }
1264
1265 #[test]
1266 fn test_content_stats() {
1267 let index = ContentIndex::new();
1268
1269 let stats = index.get_stats();
1271 assert_eq!(stats.total_files, 0);
1272 assert_eq!(stats.total_chunks, 0);
1273
1274 let file1 = Path::new("doc1.md");
1276 let chunk1 = create_test_chunk(
1277 file1,
1278 "First document",
1279 ContentType::Documentation {
1280 format: DocumentFormat::Markdown,
1281 },
1282 1,
1283 );
1284 let node1 = create_test_node(file1, vec![chunk1]);
1285 let _ = index.add_node(node1);
1286
1287 let file2 = Path::new("doc2.md");
1288 let chunk2a = create_test_chunk(
1289 file2,
1290 "Second document first chunk",
1291 ContentType::Documentation {
1292 format: DocumentFormat::Markdown,
1293 },
1294 2,
1295 );
1296 let chunk2b = create_test_chunk(
1297 file2,
1298 "Second document second chunk",
1299 ContentType::Documentation {
1300 format: DocumentFormat::Markdown,
1301 },
1302 3,
1303 );
1304 let node2 = create_test_node(file2, vec![chunk2a, chunk2b]);
1305 let _ = index.add_node(node2);
1306
1307 let stats = index.get_stats();
1309 assert_eq!(stats.total_files, 2);
1310 assert_eq!(stats.total_chunks, 3);
1311 }
1312
1313 #[test]
1314 fn test_content_update_listeners() {
1315 struct TestListener {
1316 updates: Arc<std::sync::Mutex<Vec<ContentUpdate>>>,
1317 }
1318
1319 impl ContentUpdateListener for TestListener {
1320 fn on_content_update(&self, update: &ContentUpdate) {
1321 self.updates.lock().unwrap().push(update.clone());
1322 }
1323 }
1324
1325 let index = ContentIndex::new();
1326 let updates = Arc::new(std::sync::Mutex::new(Vec::new()));
1327 let listener = TestListener {
1328 updates: updates.clone(),
1329 };
1330
1331 index.add_update_listener(Box::new(listener));
1332
1333 let file_path = Path::new("test.md");
1335 let chunk = create_test_chunk(
1336 file_path,
1337 "Test content",
1338 ContentType::Documentation {
1339 format: DocumentFormat::Markdown,
1340 },
1341 1,
1342 );
1343 let node = create_test_node(file_path, vec![chunk]);
1344 let _ = index.add_node(node);
1345
1346 let updates = updates.lock().unwrap();
1348 assert_eq!(updates.len(), 1);
1349 assert_eq!(updates[0].file_path, file_path);
1350 assert!(matches!(
1351 updates[0].update_kind,
1352 ContentUpdateKind::Modified
1353 ));
1354 }
1355
1356 #[test]
1357 fn test_clear() {
1358 let index = ContentIndex::new();
1359
1360 let file_path = Path::new("test.md");
1362 let chunk = create_test_chunk(
1363 file_path,
1364 "Test content",
1365 ContentType::Documentation {
1366 format: DocumentFormat::Markdown,
1367 },
1368 1,
1369 );
1370 let node = create_test_node(file_path, vec![chunk]);
1371 let _ = index.add_node(node);
1372
1373 assert!(index.get_node(file_path).is_some());
1375 let stats = index.get_stats();
1376 assert!(stats.total_files > 0);
1377
1378 index.clear();
1380
1381 assert!(index.get_node(file_path).is_none());
1383 let stats = index.get_stats();
1384 assert_eq!(stats.total_files, 0);
1385 assert_eq!(stats.total_chunks, 0);
1386 }
1387
1388 #[test]
1389 fn test_invalid_regex_search() {
1390 let index = ContentIndex::new();
1391
1392 let file_path = Path::new("test.md");
1394 let chunk = create_test_chunk(
1395 file_path,
1396 "Test content",
1397 ContentType::Documentation {
1398 format: DocumentFormat::Markdown,
1399 },
1400 1,
1401 );
1402 let node = create_test_node(file_path, vec![chunk]);
1403 let _ = index.add_node(node);
1404
1405 let search_query = SearchQuery {
1407 query: "[invalid".to_string(),
1408 use_regex: true,
1409 max_results: 10,
1410 ..Default::default()
1411 };
1412
1413 let result = index.search(&search_query);
1414 assert!(result.is_err());
1415 }
1416
1417 #[test]
1418 fn test_logging_update_listener() {
1419 let listener = LoggingUpdateListener;
1420 let update = ContentUpdate {
1421 file_path: PathBuf::from("test.md"),
1422 update_kind: ContentUpdateKind::Modified,
1423 timestamp: SystemTime::now(),
1424 };
1425
1426 listener.on_content_update(&update);
1428 }
1429
1430 #[test]
1431 fn test_line_info_calculation() {
1432 let index = ContentIndex::new();
1433
1434 let content = "Line 1\nLine 2\nLine 3 with text\nLine 4";
1435 let position = content.find("text").unwrap();
1436
1437 let (line, column) = index.calculate_line_info(content, position);
1438 assert_eq!(line, 3); assert!(column > 1); }
1441
1442 #[test]
1443 fn test_context_extraction() {
1444 let index = ContentIndex::new();
1445
1446 let content = "Line 1\nLine 2\nLine 3 target\nLine 4\nLine 5";
1447 let position = content.find("target").unwrap();
1448
1449 let context_before = index.get_context_before(content, position, 1);
1451 assert!(context_before.is_some());
1452 assert!(context_before.unwrap().contains("Line 2"));
1453
1454 let context_after = index.get_context_after(content, position + 6, 1);
1456 assert!(context_after.is_some());
1457 assert!(context_after.unwrap().contains("Line 4"));
1458
1459 let no_context = index.get_context_before(content, position, 0);
1461 assert!(no_context.is_none());
1462 }
1463
1464 #[test]
1465 fn test_relevance_score_calculation() {
1466 let index = ContentIndex::new();
1467
1468 let file_path = Path::new("test.md");
1469 let chunk = create_test_chunk(
1470 file_path,
1471 "Test document with multiple test occurrences",
1472 ContentType::Documentation {
1473 format: DocumentFormat::Markdown,
1474 },
1475 1,
1476 );
1477
1478 let matches = vec![
1479 SearchMatch {
1480 text: "test".to_string(),
1481 position: 0,
1482 line_number: 1,
1483 column_number: 1,
1484 context_before: None,
1485 context_after: None,
1486 },
1487 SearchMatch {
1488 text: "test".to_string(),
1489 position: 30,
1490 line_number: 1,
1491 column_number: 31,
1492 context_before: None,
1493 context_after: None,
1494 },
1495 ];
1496
1497 let query = SearchQuery {
1498 query: "test".to_string(),
1499 ..Default::default()
1500 };
1501
1502 let score = index.calculate_relevance_score(&chunk, &matches, &query);
1503 assert!(score > 0.0);
1504
1505 let single_match = vec![matches[0].clone()];
1507 let single_score = index.calculate_relevance_score(&chunk, &single_match, &query);
1508 assert!(score > single_score);
1509 }
1510}