1use super::{
7 ChunkId, ContentChunk, ContentNode, ContentStats, ContentType, ContentUpdate,
8 ContentUpdateKind, SearchMatch, SearchQuery, SearchResult,
9};
10
11use anyhow::Result;
12use dashmap::DashMap;
13use regex::Regex;
14use std::collections::HashSet;
15use std::path::{Path, PathBuf};
16use std::sync::{Arc, RwLock};
17use std::time::SystemTime;
18
19pub struct ContentIndex {
21 nodes: DashMap<PathBuf, ContentNode>,
23 chunks: DashMap<ChunkId, ContentChunk>,
25 token_index: DashMap<String, HashSet<ChunkId>>,
27 file_index: DashMap<String, HashSet<PathBuf>>,
29 type_index: DashMap<String, HashSet<ChunkId>>,
31 stats_cache: Arc<RwLock<Option<ContentStats>>>,
33 update_listeners: Arc<RwLock<Vec<Box<dyn ContentUpdateListener>>>>,
35}
36
37impl ContentIndex {
38 pub fn new() -> Self {
40 Self {
41 nodes: DashMap::new(),
42 chunks: DashMap::new(),
43 token_index: DashMap::new(),
44 file_index: DashMap::new(),
45 type_index: DashMap::new(),
46 stats_cache: Arc::new(RwLock::new(None)),
47 update_listeners: Arc::new(RwLock::new(Vec::new())),
48 }
49 }
50
51 pub fn add_node(&self, node: ContentNode) -> Result<()> {
53 let file_path = node.file_path.clone();
54
55 if let Some(old_node) = self.nodes.get(&file_path) {
57 for chunk in &old_node.chunks {
58 self.remove_chunk_from_indexes(&chunk.id);
59 }
60 }
61
62 for chunk in &node.chunks {
64 self.add_chunk_to_indexes(chunk.clone())?;
65 }
66
67 self.index_file_pattern(&file_path);
69
70 self.nodes.insert(file_path.clone(), node);
72
73 *self.stats_cache.write().unwrap() = None;
75
76 self.notify_update(ContentUpdate {
78 file_path,
79 update_kind: ContentUpdateKind::Modified,
80 timestamp: SystemTime::now(),
81 });
82
83 Ok(())
84 }
85
86 pub fn remove_node(&self, file_path: &Path) -> Result<()> {
88 if let Some((_, node)) = self.nodes.remove(file_path) {
89 for chunk in &node.chunks {
91 self.remove_chunk_from_indexes(&chunk.id);
92 }
93
94 self.remove_file_pattern(file_path);
96
97 *self.stats_cache.write().unwrap() = None;
99
100 self.notify_update(ContentUpdate {
102 file_path: file_path.to_path_buf(),
103 update_kind: ContentUpdateKind::Deleted,
104 timestamp: SystemTime::now(),
105 });
106 }
107
108 Ok(())
109 }
110
111 pub fn get_node(&self, file_path: &Path) -> Option<ContentNode> {
113 self.nodes.get(file_path).map(|entry| entry.value().clone())
114 }
115
116 pub fn get_chunk(&self, chunk_id: &ChunkId) -> Option<ContentChunk> {
118 self.chunks.get(chunk_id).map(|entry| entry.value().clone())
119 }
120
121 pub fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
123 let mut results = Vec::new();
124 let mut seen_chunks = HashSet::new();
125
126 let search_regex = if query.use_regex {
128 Some(Regex::new(&query.query)?)
129 } else {
130 None
131 };
132
133 let candidate_chunks = if query.use_regex {
135 self.search_by_regex(search_regex.as_ref().unwrap(), query)?
136 } else {
137 self.search_by_tokens(&query.query, query)?
138 };
139
140 for chunk_id in candidate_chunks {
142 if seen_chunks.contains(&chunk_id) {
143 continue;
144 }
145 seen_chunks.insert(chunk_id);
146
147 if let Some(chunk) = self.get_chunk(&chunk_id) {
148 if !query.content_types.is_empty()
150 && !self.matches_content_type(&chunk.content_type, &query.content_types)
151 {
152 continue;
153 }
154
155 if !self.matches_file_patterns(
157 &chunk.file_path,
158 &query.file_patterns,
159 &query.exclude_patterns,
160 )? {
161 continue;
162 }
163
164 let matches = self.find_matches_in_chunk(&chunk, query, &search_regex)?;
166 if !matches.is_empty() {
167 let score = self.calculate_relevance_score(&chunk, &matches, query);
168 results.push(SearchResult {
169 chunk: chunk.clone(),
170 score,
171 matches,
172 related_nodes: chunk.related_nodes.clone(),
173 });
174 }
175 }
176
177 if results.len() >= query.max_results {
178 break;
179 }
180 }
181
182 results.sort_by(|a, b| {
184 b.score
185 .partial_cmp(&a.score)
186 .unwrap_or(std::cmp::Ordering::Equal)
187 });
188
189 Ok(results)
190 }
191
192 pub fn find_files(&self, pattern: &str) -> Result<Vec<PathBuf>> {
194 let pattern_regex = Regex::new(pattern)?;
195 let mut matching_files = Vec::new();
196
197 for entry in self.nodes.iter() {
198 let file_path = entry.key();
199 if pattern_regex.is_match(&file_path.to_string_lossy()) {
200 matching_files.push(file_path.clone());
201 }
202 }
203
204 Ok(matching_files)
205 }
206
207 pub fn get_stats(&self) -> ContentStats {
209 if let Ok(cache) = self.stats_cache.read() {
211 if let Some(stats) = cache.as_ref() {
212 return stats.clone();
213 }
214 }
215
216 let stats = self.compute_stats();
218
219 if let Ok(mut cache) = self.stats_cache.write() {
221 *cache = Some(stats.clone());
222 }
223
224 stats
225 }
226
227 pub fn add_update_listener(&self, listener: Box<dyn ContentUpdateListener>) {
229 if let Ok(mut listeners) = self.update_listeners.write() {
230 listeners.push(listener);
231 }
232 }
233
234 pub fn clear(&self) {
236 self.nodes.clear();
237 self.chunks.clear();
238 self.token_index.clear();
239 self.file_index.clear();
240 self.type_index.clear();
241 *self.stats_cache.write().unwrap() = None;
242 }
243
244 fn add_chunk_to_indexes(&self, chunk: ContentChunk) -> Result<()> {
248 let chunk_id = chunk.id;
249
250 for token in &chunk.tokens {
252 self.token_index
253 .entry(token.clone())
254 .or_default()
255 .insert(chunk_id);
256 }
257
258 let type_key = self.content_type_to_string(&chunk.content_type);
260 self.type_index
261 .entry(type_key)
262 .or_default()
263 .insert(chunk_id);
264
265 self.chunks.insert(chunk_id, chunk);
267
268 Ok(())
269 }
270
271 fn remove_chunk_from_indexes(&self, chunk_id: &ChunkId) {
273 if let Some((_, chunk)) = self.chunks.remove(chunk_id) {
275 for token in &chunk.tokens {
277 if let Some(mut token_set) = self.token_index.get_mut(token) {
278 token_set.remove(chunk_id);
279 if token_set.is_empty() {
280 drop(token_set);
281 self.token_index.remove(token);
282 }
283 }
284 }
285
286 let type_key = self.content_type_to_string(&chunk.content_type);
288 if let Some(mut type_set) = self.type_index.get_mut(&type_key) {
289 type_set.remove(chunk_id);
290 if type_set.is_empty() {
291 drop(type_set);
292 self.type_index.remove(&type_key);
293 }
294 }
295 }
296 }
297
298 fn index_file_pattern(&self, file_path: &Path) {
300 let file_name = file_path
301 .file_name()
302 .and_then(|name| name.to_str())
303 .unwrap_or("");
304
305 let extension = file_path
306 .extension()
307 .and_then(|ext| ext.to_str())
308 .unwrap_or("");
309
310 self.file_index
312 .entry(file_name.to_lowercase())
313 .or_default()
314 .insert(file_path.to_path_buf());
315
316 if !extension.is_empty() {
318 self.file_index
319 .entry(format!("*.{}", extension.to_lowercase()))
320 .or_default()
321 .insert(file_path.to_path_buf());
322 }
323
324 for component in file_path.components() {
326 if let Some(component_str) = component.as_os_str().to_str() {
327 self.file_index
328 .entry(component_str.to_lowercase())
329 .or_default()
330 .insert(file_path.to_path_buf());
331 }
332 }
333 }
334
335 fn remove_file_pattern(&self, file_path: &Path) {
337 let file_name = file_path
338 .file_name()
339 .and_then(|name| name.to_str())
340 .unwrap_or("");
341
342 let extension = file_path
343 .extension()
344 .and_then(|ext| ext.to_str())
345 .unwrap_or("");
346
347 if let Some(mut file_set) = self.file_index.get_mut(&file_name.to_lowercase()) {
349 file_set.remove(file_path);
350 if file_set.is_empty() {
351 drop(file_set);
352 self.file_index.remove(&file_name.to_lowercase());
353 }
354 }
355
356 if !extension.is_empty() {
358 let ext_key = format!("*.{}", extension.to_lowercase());
359 if let Some(mut ext_set) = self.file_index.get_mut(&ext_key) {
360 ext_set.remove(file_path);
361 if ext_set.is_empty() {
362 drop(ext_set);
363 self.file_index.remove(&ext_key);
364 }
365 }
366 }
367 }
368
369 fn search_by_tokens(&self, query: &str, _search_query: &SearchQuery) -> Result<Vec<ChunkId>> {
371 let query_tokens: Vec<String> = query
372 .to_lowercase()
373 .split_whitespace()
374 .map(|s| s.to_string())
375 .collect();
376
377 if query_tokens.is_empty() {
378 return Ok(Vec::new());
379 }
380
381 let mut result_chunks: Option<HashSet<ChunkId>> = None;
382
383 for token in &query_tokens {
385 if let Some(chunk_set) = self.token_index.get(token) {
386 let chunk_ids: HashSet<ChunkId> = chunk_set.iter().copied().collect();
387 result_chunks = Some(match result_chunks {
388 None => chunk_ids,
389 Some(existing) => existing.intersection(&chunk_ids).copied().collect(),
390 });
391 } else {
392 return Ok(Vec::new());
394 }
395 }
396
397 Ok(result_chunks.unwrap_or_default().into_iter().collect())
398 }
399
400 fn search_by_regex(&self, regex: &Regex, search_query: &SearchQuery) -> Result<Vec<ChunkId>> {
402 let mut matching_chunks = Vec::new();
403
404 for entry in self.chunks.iter() {
405 let chunk = entry.value();
406 let content = if search_query.case_sensitive {
407 &chunk.content
408 } else {
409 &chunk.content.to_lowercase()
410 };
411
412 if regex.is_match(content) {
413 matching_chunks.push(chunk.id);
414 }
415 }
416
417 Ok(matching_chunks)
418 }
419
420 fn find_matches_in_chunk(
422 &self,
423 chunk: &ContentChunk,
424 query: &SearchQuery,
425 regex: &Option<Regex>,
426 ) -> Result<Vec<SearchMatch>> {
427 let mut matches = Vec::new();
428 let content = if query.case_sensitive {
429 chunk.content.clone()
430 } else {
431 chunk.content.to_lowercase()
432 };
433
434 let search_term = if query.case_sensitive {
435 query.query.clone()
436 } else {
437 query.query.to_lowercase()
438 };
439
440 if let Some(regex) = regex {
441 for regex_match in regex.find_iter(&content) {
443 let line_info = self.calculate_line_info(&content, regex_match.start());
444 let search_match = SearchMatch {
445 text: regex_match.as_str().to_string(),
446 position: regex_match.start(),
447 line_number: line_info.0,
448 column_number: line_info.1,
449 context_before: if query.include_context {
450 self.get_context_before(&content, regex_match.start(), query.context_lines)
451 } else {
452 None
453 },
454 context_after: if query.include_context {
455 self.get_context_after(&content, regex_match.end(), query.context_lines)
456 } else {
457 None
458 },
459 };
460 matches.push(search_match);
461 }
462 } else {
463 let mut start = 0;
465 while let Some(pos) = content[start..].find(&search_term) {
466 let absolute_pos = start + pos;
467 let line_info = self.calculate_line_info(&content, absolute_pos);
468 let search_match = SearchMatch {
469 text: search_term.clone(),
470 position: absolute_pos,
471 line_number: line_info.0,
472 column_number: line_info.1,
473 context_before: if query.include_context {
474 self.get_context_before(&content, absolute_pos, query.context_lines)
475 } else {
476 None
477 },
478 context_after: if query.include_context {
479 self.get_context_after(
480 &content,
481 absolute_pos + search_term.len(),
482 query.context_lines,
483 )
484 } else {
485 None
486 },
487 };
488 matches.push(search_match);
489 start = absolute_pos + 1;
490 }
491 }
492
493 Ok(matches)
494 }
495
496 fn calculate_line_info(&self, content: &str, position: usize) -> (usize, usize) {
498 let before_position = &content[..position.min(content.len())];
499 let line_number = before_position.lines().count();
500 let column_number = before_position
501 .lines()
502 .last()
503 .map(|line| line.len() + 1)
504 .unwrap_or(1);
505 (line_number, column_number)
506 }
507
508 fn get_context_before(
510 &self,
511 content: &str,
512 position: usize,
513 context_lines: usize,
514 ) -> Option<String> {
515 if context_lines == 0 {
516 return None;
517 }
518
519 let lines: Vec<&str> = content.lines().collect();
520 let (line_number, _) = self.calculate_line_info(content, position);
521
522 if line_number == 0 {
523 return None;
524 }
525
526 let start_line = line_number.saturating_sub(context_lines + 1);
527 let end_line = line_number.saturating_sub(1);
528
529 if start_line >= lines.len() || end_line >= lines.len() || start_line > end_line {
530 return None;
531 }
532
533 Some(lines[start_line..=end_line].join("\n"))
534 }
535
536 fn get_context_after(
538 &self,
539 content: &str,
540 position: usize,
541 context_lines: usize,
542 ) -> Option<String> {
543 if context_lines == 0 {
544 return None;
545 }
546
547 let lines: Vec<&str> = content.lines().collect();
548 let (line_number, _) = self.calculate_line_info(content, position);
549
550 let start_line = line_number;
551 let end_line = (start_line + context_lines).min(lines.len().saturating_sub(1));
552
553 if start_line >= lines.len() || start_line > end_line {
554 return None;
555 }
556
557 Some(lines[start_line..=end_line].join("\n"))
558 }
559
560 fn calculate_relevance_score(
562 &self,
563 chunk: &ContentChunk,
564 matches: &[SearchMatch],
565 _query: &SearchQuery,
566 ) -> f32 {
567 if matches.is_empty() {
568 return 0.0;
569 }
570
571 let type_score = match &chunk.content_type {
573 ContentType::Documentation { .. } => 0.8,
574 ContentType::Comment { context, .. } => match context {
575 super::CommentContext::Documentation => 0.7,
576 super::CommentContext::Function { .. } => 0.6,
577 super::CommentContext::Class { .. } => 0.6,
578 _ => 0.4,
579 },
580 ContentType::Code { .. } => 0.5,
581 ContentType::Configuration { .. } => 0.4,
582 ContentType::PlainText => 0.2,
583 };
584
585 let match_bonus = matches.len() as f32 * 0.1;
587
588 (type_score + match_bonus).min(1.0)
590 }
591
592 fn matches_content_type(
594 &self,
595 content_type: &ContentType,
596 allowed_types: &[ContentType],
597 ) -> bool {
598 allowed_types
599 .iter()
600 .any(|allowed| std::mem::discriminant(content_type) == std::mem::discriminant(allowed))
601 }
602
603 fn matches_file_patterns(
605 &self,
606 file_path: &Path,
607 include_patterns: &[String],
608 exclude_patterns: &[String],
609 ) -> Result<bool> {
610 let path_str = file_path.to_string_lossy();
611
612 for pattern in exclude_patterns {
614 let regex_pattern = self.glob_to_regex(pattern);
615 let regex = Regex::new(®ex_pattern)?;
616 if regex.is_match(&path_str) {
617 return Ok(false);
618 }
619 }
620
621 if include_patterns.is_empty() {
623 return Ok(true);
624 }
625
626 for pattern in include_patterns {
628 let regex_pattern = self.glob_to_regex(pattern);
629 let regex = Regex::new(®ex_pattern)?;
630 if regex.is_match(&path_str) {
631 return Ok(true);
632 }
633 }
634
635 Ok(false)
636 }
637
638 fn glob_to_regex(&self, glob: &str) -> String {
640 let mut regex = String::new();
641 regex.push('^');
642
643 for ch in glob.chars() {
644 match ch {
645 '*' => regex.push_str(".*"),
646 '?' => regex.push('.'),
647 '.' => regex.push_str("\\."),
648 '+' => regex.push_str("\\+"),
649 '^' => regex.push_str("\\^"),
650 '$' => regex.push_str("\\$"),
651 '(' => regex.push_str("\\("),
652 ')' => regex.push_str("\\)"),
653 '[' => regex.push_str("\\["),
654 ']' => regex.push_str("\\]"),
655 '{' => regex.push_str("\\{"),
656 '}' => regex.push_str("\\}"),
657 '|' => regex.push_str("\\|"),
658 '\\' => regex.push_str("\\\\"),
659 c => regex.push(c),
660 }
661 }
662
663 regex.push('$');
664 regex
665 }
666
667 fn content_type_to_string(&self, content_type: &ContentType) -> String {
669 match content_type {
670 ContentType::Code { language } => format!("code:{language:?}"),
671 ContentType::Documentation { format } => format!("doc:{format:?}"),
672 ContentType::Configuration { format } => format!("config:{format:?}"),
673 ContentType::Comment { language, context } => {
674 format!("comment:{language:?}:{context:?}")
675 }
676 ContentType::PlainText => "text".to_string(),
677 }
678 }
679
680 fn compute_stats(&self) -> ContentStats {
682 let mut stats = ContentStats::new();
683
684 stats.total_files = self.nodes.len();
685 stats.total_chunks = self.chunks.len();
686
687 stats.total_tokens = self.token_index.len();
689
690 for entry in self.type_index.iter() {
692 let type_name = entry.key().clone();
693 let chunk_count = entry.value().len();
694 stats.content_by_type.insert(type_name, chunk_count);
695 }
696
697 for entry in self.nodes.iter() {
699 let node = entry.value();
700 let size_bucket = match node.file_size {
701 0..=1024 => "small (0-1KB)",
702 1025..=10240 => "medium (1-10KB)",
703 10241..=102400 => "large (10-100KB)",
704 _ => "very_large (>100KB)",
705 };
706 *stats
707 .size_distribution
708 .entry(size_bucket.to_string())
709 .or_insert(0) += 1;
710 }
711
712 stats.computed_at = SystemTime::now();
713 stats
714 }
715
716 fn notify_update(&self, update: ContentUpdate) {
718 if let Ok(listeners) = self.update_listeners.read() {
719 for listener in listeners.iter() {
720 listener.on_content_update(&update);
721 }
722 }
723 }
724}
725
726impl Default for ContentIndex {
727 fn default() -> Self {
728 Self::new()
729 }
730}
731
732pub trait ContentUpdateListener: Send + Sync {
734 fn on_content_update(&self, update: &ContentUpdate);
736}
737
738pub struct LoggingUpdateListener;
740
741impl ContentUpdateListener for LoggingUpdateListener {
742 fn on_content_update(&self, update: &ContentUpdate) {
743 eprintln!(
744 "Content updated: {:?} at {:?}",
745 update.file_path, update.timestamp
746 );
747 }
748}
749
750#[cfg(test)]
751mod tests {
752 use super::*;
753 use crate::ast::Span;
754 use crate::content::ChunkId;
755 use crate::{ConfigFormat, DocumentFormat};
756 use std::path::Path;
757
758 fn create_test_chunk(
759 file_path: &Path,
760 content: &str,
761 content_type: ContentType,
762 chunk_index: usize,
763 ) -> ContentChunk {
764 let span = Span::new(0, content.len(), 1, 1, 1, content.len());
765 ContentChunk::new(
766 file_path.to_path_buf(),
767 content_type,
768 content.to_string(),
769 span,
770 chunk_index,
771 )
772 }
773
774 fn create_test_node(file_path: &Path, chunks: Vec<ContentChunk>) -> ContentNode {
775 let mut node = ContentNode::new(file_path.to_path_buf(), chunks[0].content_type.clone());
776 for chunk in chunks {
777 node.add_chunk(chunk);
778 }
779 node.file_size = 1000; node
781 }
782
783 #[test]
784 fn test_content_index_creation() {
785 let index = ContentIndex::new();
786
787 let _index_default = ContentIndex::default();
789
790 let stats = index.get_stats();
792 assert_eq!(stats.total_files, 0);
793 assert_eq!(stats.total_chunks, 0);
794 }
795
796 #[test]
797 fn test_add_and_get_node() {
798 let index = ContentIndex::new();
799 let file_path = Path::new("test.md");
800
801 let chunk = create_test_chunk(
803 file_path,
804 "# Test Document\n\nThis is a test.",
805 ContentType::Documentation {
806 format: DocumentFormat::Markdown,
807 },
808 0,
809 );
810 let node = create_test_node(file_path, vec![chunk]);
811
812 let result = index.add_node(node.clone());
814 assert!(result.is_ok(), "Adding valid content node should succeed");
815
816 let retrieved_node = index.get_node(file_path);
818 assert!(
819 retrieved_node.is_some(),
820 "Should be able to retrieve added node"
821 );
822 let retrieved_node = retrieved_node.unwrap();
823 assert_eq!(
824 retrieved_node.file_path, file_path,
825 "Retrieved node should have correct file path"
826 );
827 assert_eq!(
828 retrieved_node.chunks.len(),
829 1,
830 "Retrieved node should have 1 chunk"
831 );
832
833 assert_eq!(
835 retrieved_node.chunks[0].content, "# Test Document\n\nThis is a test.",
836 "Chunk content should be preserved"
837 );
838 assert!(
839 matches!(
840 retrieved_node.chunks[0].content_type,
841 ContentType::Documentation { .. }
842 ),
843 "Content type should be preserved"
844 );
845
846 let stats = index.get_stats();
848 assert_eq!(stats.total_files, 1, "Stats should show 1 file");
849 assert_eq!(stats.total_chunks, 1, "Stats should show 1 chunk");
850 }
851
852 #[test]
853 fn test_add_node_replaces_existing() {
854 let index = ContentIndex::new();
855 let file_path = Path::new("test.md");
856
857 let chunk1 = create_test_chunk(
859 file_path,
860 "Original content",
861 ContentType::Documentation {
862 format: DocumentFormat::Markdown,
863 },
864 0,
865 );
866 let node1 = create_test_node(file_path, vec![chunk1]);
867 let _ = index.add_node(node1);
868
869 let chunk2 = create_test_chunk(
871 file_path,
872 "Updated content",
873 ContentType::Documentation {
874 format: DocumentFormat::Markdown,
875 },
876 1,
877 );
878 let node2 = create_test_node(file_path, vec![chunk2]);
879 let _ = index.add_node(node2);
880
881 let retrieved_node = index.get_node(file_path).unwrap();
883 assert_eq!(retrieved_node.chunks[0].content, "Updated content");
884 }
885
886 #[test]
887 fn test_remove_node() {
888 let index = ContentIndex::new();
889 let file_path = Path::new("test.md");
890
891 let chunk = create_test_chunk(
893 file_path,
894 "Test content",
895 ContentType::Documentation {
896 format: DocumentFormat::Markdown,
897 },
898 0,
899 );
900 let node = create_test_node(file_path, vec![chunk]);
901 let _ = index.add_node(node);
902
903 assert!(
905 index.get_node(file_path).is_some(),
906 "Node should exist after adding"
907 );
908 let retrieved_node = index.get_node(file_path).unwrap();
909 assert_eq!(
910 retrieved_node.file_path, file_path,
911 "Retrieved node should have correct path"
912 );
913 assert!(
914 !retrieved_node.chunks.is_empty(),
915 "Retrieved node should have chunks"
916 );
917
918 let result = index.remove_node(file_path);
920 assert!(result.is_ok(), "Operation should succeed");
921
922 assert!(index.get_node(file_path).is_none());
924 }
925
926 #[test]
927 fn test_get_chunk() {
928 let index = ContentIndex::new();
929 let file_path = Path::new("test.md");
930
931 let chunk = create_test_chunk(
932 file_path,
933 "Test content",
934 ContentType::Documentation {
935 format: DocumentFormat::Markdown,
936 },
937 42,
938 );
939 let chunk_id = chunk.id;
940 let node = create_test_node(file_path, vec![chunk]);
941
942 let _ = index.add_node(node);
943
944 let retrieved_chunk = index.get_chunk(&chunk_id);
946 assert!(retrieved_chunk.is_some(), "Should have value");
947 assert_eq!(retrieved_chunk.unwrap().content, "Test content");
948
949 let fake_chunk_id = ChunkId::new(Path::new("nonexistent.md"), 9999, &[0u8; 32]);
951 let non_existent = index.get_chunk(&fake_chunk_id);
952 assert!(non_existent.is_none(), "Should be none");
953 }
954
955 #[test]
956 fn test_simple_text_search() {
957 let index = ContentIndex::new();
958
959 let file1 = Path::new("doc1.md");
961 let chunk1 = create_test_chunk(
962 file1,
963 "This is a test document about programming",
964 ContentType::Documentation {
965 format: DocumentFormat::Markdown,
966 },
967 1,
968 );
969 let node1 = create_test_node(file1, vec![chunk1]);
970 let _ = index.add_node(node1);
971
972 let file2 = Path::new("doc2.md");
973 let chunk2 = create_test_chunk(
974 file2,
975 "Another document for testing purposes",
976 ContentType::Documentation {
977 format: DocumentFormat::Markdown,
978 },
979 2,
980 );
981 let node2 = create_test_node(file2, vec![chunk2]);
982 let _ = index.add_node(node2);
983
984 let search_query = SearchQuery {
986 query: "document".to_string(),
987 max_results: 10,
988 ..Default::default()
989 };
990
991 let results = index.search(&search_query).unwrap();
992 assert!(!results.is_empty(), "Should not be empty");
993
994 let result_contents: Vec<_> = results.iter().map(|r| &r.chunk.content).collect();
996 assert!(result_contents
997 .iter()
998 .any(|content| content.contains("programming")));
999 assert!(result_contents
1000 .iter()
1001 .any(|content| content.contains("testing")));
1002 }
1003
1004 #[test]
1005 fn test_regex_search() {
1006 let index = ContentIndex::new();
1007
1008 let file_path = Path::new("contacts.md");
1010 let chunk = create_test_chunk(
1011 file_path,
1012 "Contact John at john@example.com or Mary at mary@test.org",
1013 ContentType::Documentation {
1014 format: DocumentFormat::Markdown,
1015 },
1016 1,
1017 );
1018 let node = create_test_node(file_path, vec![chunk]);
1019 let _ = index.add_node(node);
1020
1021 let search_query = SearchQuery {
1023 query: r"\b\w+@\w+\.\w+\b".to_string(),
1024 use_regex: true,
1025 max_results: 10,
1026 ..Default::default()
1027 };
1028
1029 let results = index.search(&search_query).unwrap();
1030 assert!(!results.is_empty(), "Should not be empty");
1031
1032 let result = &results[0];
1034 assert!(!result.matches.is_empty(), "Should not be empty");
1035 }
1036
1037 #[test]
1038 fn test_search_with_content_type_filter() {
1039 let index = ContentIndex::new();
1040
1041 let md_file = Path::new("doc.md");
1043 let md_chunk = create_test_chunk(
1044 md_file,
1045 "Documentation content",
1046 ContentType::Documentation {
1047 format: DocumentFormat::Markdown,
1048 },
1049 1,
1050 );
1051 let md_node = create_test_node(md_file, vec![md_chunk]);
1052 let _ = index.add_node(md_node);
1053
1054 let json_file = Path::new("config.json");
1055 let json_chunk = create_test_chunk(
1056 json_file,
1057 r#"{"config": "content"}"#,
1058 ContentType::Configuration {
1059 format: ConfigFormat::Json,
1060 },
1061 2,
1062 );
1063 let json_node = create_test_node(json_file, vec![json_chunk]);
1064 let _ = index.add_node(json_node);
1065
1066 let search_query = SearchQuery {
1068 query: "content".to_string(),
1069 content_types: vec![ContentType::Documentation {
1070 format: DocumentFormat::Markdown,
1071 }],
1072 max_results: 10,
1073 ..Default::default()
1074 };
1075
1076 let results = index.search(&search_query).unwrap();
1077 assert_eq!(results.len(), 1, "Should have 1 items");
1078 assert!(results[0].chunk.content.contains("Documentation"));
1079 }
1080
1081 #[test]
1082 fn test_search_with_file_patterns() {
1083 let index = ContentIndex::new();
1084
1085 let md_file = Path::new("test.md");
1087 let md_chunk = create_test_chunk(
1088 md_file,
1089 "Markdown content",
1090 ContentType::Documentation {
1091 format: DocumentFormat::Markdown,
1092 },
1093 1,
1094 );
1095 let md_node = create_test_node(md_file, vec![md_chunk]);
1096 let _ = index.add_node(md_node);
1097
1098 let txt_file = Path::new("test.txt");
1099 let txt_chunk = create_test_chunk(
1100 txt_file,
1101 "Text content",
1102 ContentType::Documentation {
1103 format: DocumentFormat::PlainText,
1104 },
1105 2,
1106 );
1107 let txt_node = create_test_node(txt_file, vec![txt_chunk]);
1108 let _ = index.add_node(txt_node);
1109
1110 let search_query = SearchQuery {
1112 query: "content".to_string(),
1113 file_patterns: vec!["*.md".to_string()],
1114 max_results: 10,
1115 ..Default::default()
1116 };
1117
1118 let results = index.search(&search_query).unwrap();
1119 assert_eq!(results.len(), 1, "Should have 1 items");
1120 assert!(results[0].chunk.content.contains("Markdown"));
1121 }
1122
1123 #[test]
1124 fn test_search_with_exclude_patterns() {
1125 let index = ContentIndex::new();
1126
1127 let md_file = Path::new("test.md");
1129 let md_chunk = create_test_chunk(
1130 md_file,
1131 "Markdown content",
1132 ContentType::Documentation {
1133 format: DocumentFormat::Markdown,
1134 },
1135 1,
1136 );
1137 let md_node = create_test_node(md_file, vec![md_chunk]);
1138 let _ = index.add_node(md_node);
1139
1140 let tmp_file = Path::new("temp.tmp");
1141 let tmp_chunk = create_test_chunk(
1142 tmp_file,
1143 "Temporary content",
1144 ContentType::Documentation {
1145 format: DocumentFormat::PlainText,
1146 },
1147 2,
1148 );
1149 let tmp_node = create_test_node(tmp_file, vec![tmp_chunk]);
1150 let _ = index.add_node(tmp_node);
1151
1152 let search_query = SearchQuery {
1154 query: "content".to_string(),
1155 exclude_patterns: vec!["*.tmp".to_string()],
1156 max_results: 10,
1157 ..Default::default()
1158 };
1159
1160 let results = index.search(&search_query).unwrap();
1161 assert_eq!(results.len(), 1, "Should have 1 items");
1162 assert!(results[0].chunk.content.contains("Markdown"));
1163 }
1164
1165 #[test]
1166 fn test_search_with_context() {
1167 let index = ContentIndex::new();
1168
1169 let file_path = Path::new("test.md");
1170 let content = "Line 1\nLine 2 with target\nLine 3\nLine 4";
1171 let chunk = create_test_chunk(
1172 file_path,
1173 content,
1174 ContentType::Documentation {
1175 format: DocumentFormat::Markdown,
1176 },
1177 1,
1178 );
1179 let node = create_test_node(file_path, vec![chunk]);
1180 let _ = index.add_node(node);
1181
1182 let search_query = SearchQuery {
1184 query: "target".to_string(),
1185 include_context: true,
1186 context_lines: 1,
1187 max_results: 10,
1188 ..Default::default()
1189 };
1190
1191 let results = index.search(&search_query).unwrap();
1192 assert!(!results.is_empty(), "Should not be empty");
1193
1194 let result = &results[0];
1195 assert!(!result.matches.is_empty(), "Should not be empty");
1196
1197 let search_match = &result.matches[0];
1199 assert!(search_match.context_before.is_some(), "Should have value");
1200 assert!(search_match.context_after.is_some(), "Should have value");
1201 }
1202
1203 #[test]
1204 fn test_search_case_sensitive() {
1205 let index = ContentIndex::new();
1206
1207 let file_path = Path::new("test.md");
1208 let chunk = create_test_chunk(
1209 file_path,
1210 "Test with UPPERCASE and lowercase",
1211 ContentType::Documentation {
1212 format: DocumentFormat::Markdown,
1213 },
1214 1,
1215 );
1216 let node = create_test_node(file_path, vec![chunk]);
1217 let _ = index.add_node(node);
1218
1219 let search_query = SearchQuery {
1221 query: "UPPERCASE".to_string(),
1222 case_sensitive: true,
1223 max_results: 10,
1224 ..Default::default()
1225 };
1226
1227 let results = index.search(&search_query).unwrap();
1228 assert!(!results.is_empty(), "Should not be empty");
1229
1230 let search_query_lower = SearchQuery {
1232 query: "uppercase".to_string(),
1233 case_sensitive: true,
1234 max_results: 10,
1235 ..Default::default()
1236 };
1237
1238 let results_lower = index.search(&search_query_lower).unwrap();
1239 assert!(
1240 results_lower.is_empty(),
1241 "Should be empty for case mismatch"
1242 );
1243 }
1244
1245 #[test]
1246 fn test_search_max_results() {
1247 let index = ContentIndex::new();
1248
1249 for i in 0..10 {
1251 let file_path = PathBuf::from(format!("doc{i}.md"));
1252 let chunk = create_test_chunk(
1253 &file_path,
1254 &format!("Document {i} contains the search term"),
1255 ContentType::Documentation {
1256 format: DocumentFormat::Markdown,
1257 },
1258 i,
1259 );
1260 let node = create_test_node(&file_path, vec![chunk]);
1261 let _ = index.add_node(node);
1262 }
1263
1264 let search_query = SearchQuery {
1266 query: "search".to_string(),
1267 max_results: 3,
1268 ..Default::default()
1269 };
1270
1271 let results = index.search(&search_query).unwrap();
1272 assert_eq!(results.len(), 3, "Should have 3 items");
1273 }
1274
1275 #[test]
1276 fn test_find_files() {
1277 let index = ContentIndex::new();
1278
1279 let files = ["test_one.md", "test_two.md", "other.txt", "config.json"];
1281 for (i, file_name) in files.iter().enumerate() {
1282 let file_path = Path::new(file_name);
1283 let chunk = create_test_chunk(
1284 file_path,
1285 &format!("Content {i}"),
1286 ContentType::Documentation {
1287 format: DocumentFormat::Markdown,
1288 },
1289 i,
1290 );
1291 let node = create_test_node(file_path, vec![chunk]);
1292 let _ = index.add_node(node);
1293 }
1294
1295 let md_files = index.find_files(r"\.md$").unwrap();
1297 assert_eq!(md_files.len(), 2, "Should have 2 items");
1298
1299 let test_files = index.find_files(r"test_").unwrap();
1301 assert_eq!(test_files.len(), 2, "Should have 2 items");
1302
1303 let all_files = index.find_files(r".*").unwrap();
1305 assert_eq!(all_files.len(), 4, "Should have 4 items");
1306 }
1307
1308 #[test]
1309 fn test_content_stats() {
1310 let index = ContentIndex::new();
1311
1312 let stats = index.get_stats();
1314 assert_eq!(stats.total_files, 0);
1315 assert_eq!(stats.total_chunks, 0);
1316
1317 let file1 = Path::new("doc1.md");
1319 let chunk1 = create_test_chunk(
1320 file1,
1321 "First document",
1322 ContentType::Documentation {
1323 format: DocumentFormat::Markdown,
1324 },
1325 1,
1326 );
1327 let node1 = create_test_node(file1, vec![chunk1]);
1328 let _ = index.add_node(node1);
1329
1330 let file2 = Path::new("doc2.md");
1331 let chunk2a = create_test_chunk(
1332 file2,
1333 "Second document first chunk",
1334 ContentType::Documentation {
1335 format: DocumentFormat::Markdown,
1336 },
1337 2,
1338 );
1339 let chunk2b = create_test_chunk(
1340 file2,
1341 "Second document second chunk",
1342 ContentType::Documentation {
1343 format: DocumentFormat::Markdown,
1344 },
1345 3,
1346 );
1347 let node2 = create_test_node(file2, vec![chunk2a, chunk2b]);
1348 let _ = index.add_node(node2);
1349
1350 let stats = index.get_stats();
1352 assert_eq!(stats.total_files, 2);
1353 assert_eq!(stats.total_chunks, 3);
1354 }
1355
1356 #[test]
1357 fn test_content_update_listeners() {
1358 struct TestListener {
1359 updates: Arc<std::sync::Mutex<Vec<ContentUpdate>>>,
1360 }
1361
1362 impl ContentUpdateListener for TestListener {
1363 fn on_content_update(&self, update: &ContentUpdate) {
1364 self.updates.lock().unwrap().push(update.clone());
1365 }
1366 }
1367
1368 let index = ContentIndex::new();
1369 let updates = Arc::new(std::sync::Mutex::new(Vec::new()));
1370 let listener = TestListener {
1371 updates: updates.clone(),
1372 };
1373
1374 index.add_update_listener(Box::new(listener));
1375
1376 let file_path = Path::new("test.md");
1378 let chunk = create_test_chunk(
1379 file_path,
1380 "Test content",
1381 ContentType::Documentation {
1382 format: DocumentFormat::Markdown,
1383 },
1384 1,
1385 );
1386 let node = create_test_node(file_path, vec![chunk]);
1387 let _ = index.add_node(node);
1388
1389 let updates = updates.lock().unwrap();
1391 assert_eq!(updates.len(), 1, "Should have 1 items");
1392 assert_eq!(updates[0].file_path, file_path);
1393 assert!(matches!(
1394 updates[0].update_kind,
1395 ContentUpdateKind::Modified
1396 ));
1397 }
1398
1399 #[test]
1400 fn test_clear() {
1401 let index = ContentIndex::new();
1402
1403 let file_path = Path::new("test.md");
1405 let chunk = create_test_chunk(
1406 file_path,
1407 "Test content",
1408 ContentType::Documentation {
1409 format: DocumentFormat::Markdown,
1410 },
1411 1,
1412 );
1413 let node = create_test_node(file_path, vec![chunk]);
1414 let _ = index.add_node(node);
1415
1416 assert!(
1418 index.get_node(file_path).is_some(),
1419 "Node should exist after adding"
1420 );
1421 let retrieved_node = index.get_node(file_path).unwrap();
1422 assert_eq!(
1423 retrieved_node.file_path, file_path,
1424 "Retrieved node should have correct file path"
1425 );
1426 assert!(
1427 !retrieved_node.chunks.is_empty(),
1428 "Retrieved node should have chunks"
1429 );
1430 assert_eq!(
1431 retrieved_node.chunks[0].content, "Test content",
1432 "Chunk should have correct content"
1433 );
1434
1435 let stats = index.get_stats();
1436 assert!(
1437 stats.total_files > 0,
1438 "Stats should show files after adding content"
1439 );
1440 assert_eq!(stats.total_files, 1, "Should have exactly 1 file");
1441
1442 index.clear();
1444
1445 assert!(index.get_node(file_path).is_none());
1447 let stats = index.get_stats();
1448 assert_eq!(stats.total_files, 0);
1449 assert_eq!(stats.total_chunks, 0);
1450 }
1451
1452 #[test]
1453 fn test_invalid_regex_search() {
1454 let index = ContentIndex::new();
1455
1456 let file_path = Path::new("test.md");
1458 let chunk = create_test_chunk(
1459 file_path,
1460 "Test content",
1461 ContentType::Documentation {
1462 format: DocumentFormat::Markdown,
1463 },
1464 1,
1465 );
1466 let node = create_test_node(file_path, vec![chunk]);
1467 let _ = index.add_node(node);
1468
1469 let search_query = SearchQuery {
1471 query: "[invalid".to_string(),
1472 use_regex: true,
1473 max_results: 10,
1474 ..Default::default()
1475 };
1476
1477 let result = index.search(&search_query);
1478 assert!(result.is_err());
1479 }
1480
1481 #[test]
1482 fn test_logging_update_listener() {
1483 let listener = LoggingUpdateListener;
1484 let update = ContentUpdate {
1485 file_path: PathBuf::from("test.md"),
1486 update_kind: ContentUpdateKind::Modified,
1487 timestamp: SystemTime::now(),
1488 };
1489
1490 listener.on_content_update(&update);
1492 }
1493
1494 #[test]
1495 fn test_line_info_calculation() {
1496 let index = ContentIndex::new();
1497
1498 let content = "Line 1\nLine 2\nLine 3 with text\nLine 4";
1499 let position = content.find("text").unwrap();
1500
1501 let (line, column) = index.calculate_line_info(content, position);
1502 assert_eq!(line, 3); assert!(column > 1); }
1505
1506 #[test]
1507 fn test_context_extraction() {
1508 let index = ContentIndex::new();
1509
1510 let content = "Line 1\nLine 2\nLine 3 target\nLine 4\nLine 5";
1511 let position = content.find("target").unwrap();
1512
1513 let context_before = index.get_context_before(content, position, 1);
1515 assert!(context_before.is_some(), "Should have value");
1516 assert!(context_before.unwrap().contains("Line 2"));
1517
1518 let context_after = index.get_context_after(content, position + 6, 1);
1520 assert!(context_after.is_some(), "Should have value");
1521 assert!(context_after.unwrap().contains("Line 4"));
1522
1523 let no_context = index.get_context_before(content, position, 0);
1525 assert!(no_context.is_none(), "Should be none");
1526 }
1527
1528 #[test]
1529 fn test_relevance_score_calculation() {
1530 let index = ContentIndex::new();
1531
1532 let file_path = Path::new("test.md");
1533 let chunk = create_test_chunk(
1534 file_path,
1535 "Test document with multiple test occurrences",
1536 ContentType::Documentation {
1537 format: DocumentFormat::Markdown,
1538 },
1539 1,
1540 );
1541
1542 let matches = vec![
1543 SearchMatch {
1544 text: "test".to_string(),
1545 position: 0,
1546 line_number: 1,
1547 column_number: 1,
1548 context_before: None,
1549 context_after: None,
1550 },
1551 SearchMatch {
1552 text: "test".to_string(),
1553 position: 30,
1554 line_number: 1,
1555 column_number: 31,
1556 context_before: None,
1557 context_after: None,
1558 },
1559 ];
1560
1561 let query = SearchQuery {
1562 query: "test".to_string(),
1563 ..Default::default()
1564 };
1565
1566 let score = index.calculate_relevance_score(&chunk, &matches, &query);
1567 assert!(score > 0.0);
1568
1569 let single_match = vec![matches[0].clone()];
1571 let single_score = index.calculate_relevance_score(&chunk, &single_match, &query);
1572 assert!(score > single_score);
1573 }
1574}