1use crate::profiling::{ComponentTimings, OperationTimer, PerformanceMetrics};
2use crate::{Error, HeadingBlock, Result, SearchHit};
3use base64::{Engine, engine::general_purpose::STANDARD as B64};
4use sha2::{Digest, Sha256};
5use std::path::Path;
6use tantivy::collector::TopDocs;
7use tantivy::query::QueryParser;
8use tantivy::schema::{Field, STORED, STRING, Schema, TEXT, Value};
9use tantivy::{Index, IndexReader, doc};
10use tracing::{Level, debug, info};
11
12pub struct SearchIndex {
14 index: Index,
15 #[allow(dead_code)]
16 schema: Schema,
17 content_field: Field,
18 path_field: Field,
19 heading_path_field: Field,
20 lines_field: Field,
21 alias_field: Field,
22 anchor_field: Option<Field>,
23 reader: IndexReader,
24 metrics: Option<PerformanceMetrics>,
25}
26
27impl SearchIndex {
28 #[must_use]
30 pub fn with_metrics(mut self, metrics: PerformanceMetrics) -> Self {
31 self.metrics = Some(metrics);
32 self
33 }
34
35 #[must_use]
37 pub const fn metrics(&self) -> Option<&PerformanceMetrics> {
38 self.metrics.as_ref()
39 }
40 pub fn create(index_path: &Path) -> Result<Self> {
42 let mut schema_builder = Schema::builder();
43
44 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
45 let path_field = schema_builder.add_text_field("path", STRING | STORED);
46 let heading_path_field = schema_builder.add_text_field("heading_path", TEXT | STORED);
47 let lines_field = schema_builder.add_text_field("lines", STRING | STORED);
48 let alias_field = schema_builder.add_text_field("alias", STRING | STORED);
49 let anchor_field = schema_builder.add_text_field("anchor", STRING | STORED);
50
51 let schema = schema_builder.build();
52
53 std::fs::create_dir_all(index_path)
54 .map_err(|e| Error::Index(format!("Failed to create index directory: {e}")))?;
55
56 let index = Index::create_in_dir(index_path, schema.clone())
57 .map_err(|e| Error::Index(format!("Failed to create index: {e}")))?;
58
59 let reader = index
60 .reader_builder()
61 .reload_policy(tantivy::ReloadPolicy::OnCommitWithDelay)
62 .try_into()
63 .map_err(|e| Error::Index(format!("Failed to create reader: {e}")))?;
64
65 Ok(Self {
66 index,
67 schema,
68 content_field,
69 path_field,
70 heading_path_field,
71 lines_field,
72 alias_field,
73 reader,
74 anchor_field: Some(anchor_field),
75 metrics: None,
76 })
77 }
78
79 pub fn create_or_open(index_path: &Path) -> Result<Self> {
81 if index_path.exists() {
82 Self::open(index_path)
83 } else {
84 Self::create(index_path)
85 }
86 }
87
88 pub fn open(index_path: &Path) -> Result<Self> {
90 let index = Index::open_in_dir(index_path)
91 .map_err(|e| Error::Index(format!("Failed to open index: {e}")))?;
92
93 let schema = index.schema();
94
95 let content_field = schema
96 .get_field("content")
97 .map_err(|_| Error::Index("Missing content field".into()))?;
98 let path_field = schema
99 .get_field("path")
100 .map_err(|_| Error::Index("Missing path field".into()))?;
101 let heading_path_field = schema
102 .get_field("heading_path")
103 .map_err(|_| Error::Index("Missing heading_path field".into()))?;
104 let lines_field = schema
105 .get_field("lines")
106 .map_err(|_| Error::Index("Missing lines field".into()))?;
107 let alias_field = schema
108 .get_field("alias")
109 .map_err(|_| Error::Index("Missing alias field".into()))?;
110
111 let anchor_field = schema.get_field("anchor").ok();
113
114 let reader = index
115 .reader_builder()
116 .reload_policy(tantivy::ReloadPolicy::OnCommitWithDelay)
117 .try_into()
118 .map_err(|e| Error::Index(format!("Failed to create reader: {e}")))?;
119
120 Ok(Self {
121 index,
122 schema,
123 content_field,
124 path_field,
125 heading_path_field,
126 lines_field,
127 alias_field,
128 reader,
129 anchor_field,
130 metrics: None,
131 })
132 }
133
134 pub fn index_blocks(
136 &self,
137 alias: &str,
138 file_path: &str,
139 blocks: &[HeadingBlock],
140 ) -> Result<()> {
141 let timer = self.metrics.as_ref().map_or_else(
142 || OperationTimer::new(&format!("index_{alias}")),
143 |metrics| OperationTimer::with_metrics(&format!("index_{alias}"), metrics.clone()),
144 );
145
146 let mut timings = ComponentTimings::new();
147
148 let mut writer = timings.time("writer_creation", || {
149 self.index
150 .writer(50_000_000)
151 .map_err(|e| Error::Index(format!("Failed to create writer: {e}")))
152 })?;
153
154 let _deleted = timings.time("delete_existing", || {
155 writer.delete_term(tantivy::Term::from_field_text(self.alias_field, alias))
156 });
157
158 let mut total_content_bytes = 0usize;
159
160 timings.time("document_creation", || {
161 for block in blocks {
162 total_content_bytes += block.content.len();
163 let heading_path_str = block.path.join(" > ");
164 let lines_str = format!("{}-{}", block.start_line, block.end_line);
165 let anchor = block.path.last().map(|h| Self::compute_anchor(h));
167
168 let mut doc = doc!(
169 self.content_field => block.content.as_str(), self.path_field => file_path,
171 self.heading_path_field => heading_path_str,
172 self.lines_field => lines_str,
173 self.alias_field => alias
174 );
175 if let (Some(f), Some(a)) = (self.anchor_field, anchor) {
176 doc.add_text(f, a);
177 }
178
179 writer
180 .add_document(doc)
181 .map_err(|e| Error::Index(format!("Failed to add document: {e}")))?;
182 }
183 Ok::<(), Error>(())
184 })?;
185
186 timings.time("commit", || {
187 writer
188 .commit()
189 .map_err(|e| Error::Index(format!("Failed to commit: {e}")))
190 })?;
191
192 timings.time("reader_reload", || {
193 self.reader
194 .reload()
195 .map_err(|e| Error::Index(format!("Failed to reload reader: {e}")))
196 })?;
197
198 let duration = timer.finish_index(total_content_bytes);
199
200 if tracing::enabled!(Level::DEBUG) {
202 timings.print_breakdown();
203 }
204
205 info!(
206 "Indexed {} blocks ({} bytes) for {} in {:.2}ms",
207 blocks.len(),
208 total_content_bytes,
209 alias,
210 duration.as_millis()
211 );
212
213 Ok(())
214 }
215
216 #[allow(clippy::too_many_lines)] pub fn search(
219 &self,
220 query_str: &str,
221 alias: Option<&str>,
222 limit: usize,
223 ) -> Result<Vec<SearchHit>> {
224 let timer = self.metrics.as_ref().map_or_else(
225 || OperationTimer::new(&format!("search_{query_str}")),
226 |metrics| OperationTimer::with_metrics(&format!("search_{query_str}"), metrics.clone()),
227 );
228
229 let mut timings = ComponentTimings::new();
230 let mut lines_searched = 0usize;
231
232 let searcher = timings.time("searcher_creation", || self.reader.searcher());
233
234 let query_parser = timings.time("query_parser_creation", || {
235 QueryParser::for_index(
236 &self.index,
237 vec![self.content_field, self.heading_path_field],
238 )
239 });
240
241 let needs_escaping = query_str.chars().any(|c| {
243 matches!(
244 c,
245 '\\' | '"' | '(' | ')' | '[' | ']' | '{' | '}' | '^' | '~'
246 )
247 });
248
249 let full_query_str = if needs_escaping {
250 let mut sanitized = String::with_capacity(query_str.len() * 2);
252
253 for ch in query_str.chars() {
254 match ch {
255 '\\' => sanitized.push_str("\\\\"),
256 '"' => sanitized.push_str("\\\""),
257 '(' => sanitized.push_str("\\("),
258 ')' => sanitized.push_str("\\)"),
259 '[' => sanitized.push_str("\\["),
260 ']' => sanitized.push_str("\\]"),
261 '{' => sanitized.push_str("\\{"),
262 '}' => sanitized.push_str("\\}"),
263 '^' => sanitized.push_str("\\^"),
264 '~' => sanitized.push_str("\\~"),
265 _ => sanitized.push(ch),
266 }
267 }
268
269 if let Some(alias) = alias {
270 format!("alias:{alias} AND ({sanitized})")
272 } else {
273 sanitized
274 }
275 } else {
276 alias.map_or_else(
278 || query_str.to_string(),
279 |alias| format!("alias:{alias} AND ({query_str})"),
280 )
281 };
282
283 let query = timings.time("query_parsing", || {
284 query_parser
285 .parse_query(&full_query_str)
286 .map_err(|e| Error::Index(format!("Failed to parse query: {e}")))
287 })?;
288
289 let top_docs = timings.time("tantivy_search", || {
290 searcher
291 .search(&query, &TopDocs::with_limit(limit))
292 .map_err(|e| Error::Index(format!("Search failed: {e}")))
293 })?;
294
295 let mut hits = Vec::new();
296
297 timings.time("result_processing", || {
298 for (score, doc_address) in top_docs {
299 let doc = searcher
300 .doc(doc_address)
301 .map_err(|e| Error::Index(format!("Failed to retrieve doc: {e}")))?;
302
303 let alias = Self::get_field_text(&doc, self.alias_field)?;
304 let file = Self::get_field_text(&doc, self.path_field)?;
305 let heading_path_str = Self::get_field_text(&doc, self.heading_path_field)?;
306 let lines = Self::get_field_text(&doc, self.lines_field)?;
307 let content = Self::get_field_text(&doc, self.content_field)?;
308 let anchor = self.anchor_field.and_then(|f| {
309 doc.get_first(f)
310 .and_then(|v| v.as_str())
311 .map(std::string::ToString::to_string)
312 });
313
314 lines_searched += content.lines().count();
316
317 let heading_path: Vec<String> = heading_path_str
318 .split(" > ")
319 .map(std::string::ToString::to_string)
320 .collect();
321
322 let snippet = Self::extract_snippet(&content, query_str, 100);
323
324 let exact_lines = Self::compute_match_lines(&content, query_str, &lines)
326 .unwrap_or_else(|| lines.clone());
327
328 let line_numbers = Self::parse_lines_range(&exact_lines);
330
331 hits.push(SearchHit {
332 alias: alias.clone(),
333 source: alias,
334 file,
335 heading_path,
336 lines: exact_lines,
337 line_numbers,
338 snippet,
339 score,
340 source_url: None,
341 checksum: String::new(),
342 anchor,
343 });
344 }
345 Ok::<(), Error>(())
346 })?;
347
348 let duration = timer.finish_search(lines_searched);
349
350 if tracing::enabled!(Level::DEBUG) {
352 timings.print_breakdown();
353 }
354
355 debug!(
356 "Found {} hits for query '{}' in {:.2}ms (searched {} lines)",
357 hits.len(),
358 query_str,
359 duration.as_millis(),
360 lines_searched
361 );
362
363 Ok(hits)
364 }
365
366 fn compute_anchor(heading_text: &str) -> String {
367 let mut hasher = Sha256::new();
368 hasher.update(heading_text.trim().to_lowercase().as_bytes());
369 let digest = hasher.finalize();
370 let full = B64.encode(digest);
371 full[..22.min(full.len())].to_string()
372 }
373
374 fn get_field_text(doc: &tantivy::TantivyDocument, field: Field) -> Result<String> {
375 doc.get_first(field)
376 .and_then(|v| v.as_str())
377 .map(std::string::ToString::to_string)
378 .ok_or_else(|| Error::Index("Field not found in document".into()))
379 }
380
381 fn compute_match_lines(content: &str, query: &str, block_lines: &str) -> Option<String> {
384 let block_start: usize = block_lines
386 .split(['-', ':'])
387 .next()
388 .and_then(|s| s.trim().parse::<usize>().ok())?;
389
390 let mut best_pos: Option<usize> = None;
392 for token in query.split_whitespace() {
393 if token.is_empty() {
394 continue;
395 }
396 if let Some(pos) = content.find(token) {
397 match best_pos {
398 Some(cur) if pos < cur => best_pos = Some(pos),
399 None => best_pos = Some(pos),
400 _ => {},
401 }
402 }
403 }
404
405 let pos = best_pos?;
406 let local_line = content[..pos].bytes().filter(|&b| b == b'\n').count();
408 let abs_line = block_start.saturating_add(local_line);
409 Some(format!("{abs_line}-{abs_line}"))
410 }
411
412 fn parse_lines_range(range: &str) -> Option<Vec<usize>> {
415 let mut parts = range.split(['-', ':']);
416 let start = parts.next()?.trim().parse::<usize>().ok()?;
417 let end = parts.next()?.trim().parse::<usize>().ok()?;
418 Some(vec![start, end])
419 }
420
421 fn extract_snippet(content: &str, query: &str, max_len: usize) -> String {
422 let query_lower = query.to_lowercase();
423
424 let mut match_char_pos = None;
426
427 let content_chars: Vec<char> = content.chars().collect();
429 let query_chars: Vec<char> = query_lower.chars().collect();
430
431 if !query_chars.is_empty() {
432 for window_start in 0..content_chars.len() {
433 let window_end = (window_start + query_chars.len()).min(content_chars.len());
434 if window_end - window_start < query_chars.len() {
435 break;
436 }
437
438 let window_matches = content_chars[window_start..window_end]
440 .iter()
441 .zip(query_chars.iter())
442 .all(|(c1, c2)| c1.to_lowercase().eq(c2.to_lowercase()));
443
444 if window_matches {
445 match_char_pos = Some(window_start);
446 break;
447 }
448 }
449 }
450
451 if let Some(char_pos) = match_char_pos {
452 let total_chars = content_chars.len();
454 let qlen = query_chars.len();
455 let ctx_each_side = max_len.saturating_sub(qlen) / 2;
456
457 let start_char = char_pos.saturating_sub(ctx_each_side);
458 let mut end_char = (char_pos + qlen + ctx_each_side).min(total_chars);
459
460 let span = end_char.saturating_sub(start_char);
462 if span > max_len {
463 end_char = start_char + max_len;
464 }
465
466 let left_trunc = start_char > 0;
467 let right_trunc = end_char < total_chars;
468
469 let mut snippet = String::with_capacity((end_char - start_char) * 4 + 6);
471 if left_trunc {
472 snippet.push_str("...");
473 }
474 for &ch in content_chars.iter().take(end_char).skip(start_char) {
475 snippet.push(ch);
476 }
477 if right_trunc {
478 snippet.push_str("...");
479 }
480 return snippet;
481 }
482
483 let content_chars: Vec<char> = content.chars().collect();
485 if content_chars.len() <= max_len {
486 content.to_string()
487 } else {
488 let mut result = String::with_capacity(max_len * 4 + 3);
490 for (i, ch) in content_chars.iter().enumerate() {
491 if i >= max_len {
492 break;
493 }
494 result.push(*ch);
495 }
496 result.push_str("...");
497 result
498 }
499 }
500}
501
502#[cfg(test)]
503mod tests {
504 #![allow(clippy::panic)]
505 #![allow(clippy::disallowed_macros)]
506 #![allow(clippy::unwrap_used)]
507 use super::*;
508 use crate::HeadingBlock;
509 use std::time::Instant;
510 use tempfile::TempDir;
511
512 fn create_test_blocks() -> Vec<HeadingBlock> {
513 vec![
514 HeadingBlock {
515 path: vec!["React".to_string(), "Hooks".to_string()],
516 content: "useState is a React hook that lets you add state to functional components. It returns an array with the current state value and a function to update it.".to_string(),
517 start_line: 100,
518 end_line: 120,
519 },
520 HeadingBlock {
521 path: vec!["React".to_string(), "Components".to_string()],
522 content: "Components are the building blocks of React applications. They can be function components or class components.".to_string(),
523 start_line: 50,
524 end_line: 75,
525 },
526 HeadingBlock {
527 path: vec!["Next.js".to_string(), "Routing".to_string()],
528 content: "App Router is the new routing system in Next.js 13+. It provides better performance and developer experience.".to_string(),
529 start_line: 200,
530 end_line: 250,
531 },
532 ]
533 }
534
535 #[test]
536 fn test_index_creation() {
537 let temp_dir = TempDir::new().expect("Failed to create temp dir");
538 let index_path = temp_dir.path().join("test_index");
539
540 let result = SearchIndex::create(&index_path);
541 assert!(result.is_ok(), "Should create index successfully");
542
543 assert!(index_path.exists());
545 }
546
547 #[test]
548 fn test_index_open_nonexistent() {
549 let temp_dir = TempDir::new().expect("Failed to create temp dir");
550 let index_path = temp_dir.path().join("nonexistent");
551
552 let result = SearchIndex::open(&index_path);
553 assert!(result.is_err(), "Should fail to open non-existent index");
554 }
555
556 #[test]
557 fn test_index_and_search_basic() {
558 let temp_dir = TempDir::new().expect("Failed to create temp dir");
559 let index_path = temp_dir.path().join("test_index");
560
561 let index = SearchIndex::create(&index_path).expect("Should create index");
563 let blocks = create_test_blocks();
564
565 index
566 .index_blocks("test", "test.md", &blocks)
567 .expect("Should index blocks");
568
569 let hits = index
571 .search("useState", Some("test"), 10)
572 .expect("Should search");
573
574 assert!(!hits.is_empty(), "Should find results for useState");
575 assert!(
576 hits[0].snippet.contains("useState"),
577 "Result should contain useState"
578 );
579 assert_eq!(hits[0].alias, "test");
580 assert_eq!(hits[0].file, "test.md");
581 }
582
583 #[test]
584 fn test_search_limit() {
585 let temp_dir = TempDir::new().expect("Failed to create temp dir");
586 let index_path = temp_dir.path().join("test_index");
587
588 let index = SearchIndex::create(&index_path).expect("Should create index");
589 let blocks = create_test_blocks();
590
591 index
592 .index_blocks("test", "test.md", &blocks)
593 .expect("Should index blocks");
594
595 let hits = index
597 .search("React", Some("test"), 1)
598 .expect("Should search");
599
600 assert!(!hits.is_empty(), "Should find results");
601 assert!(hits.len() <= 1, "Should respect limit");
602 }
603
604 #[test]
605 fn test_search_includes_anchor() {
606 let temp_dir = TempDir::new().expect("Failed to create temp dir");
607 let index_path = temp_dir.path().join("test_index");
608
609 let index = SearchIndex::create(&index_path).expect("Should create index");
610
611 let blocks = vec![HeadingBlock {
612 path: vec!["API".to_string(), "Reference".to_string()],
613 content: "token auth key".to_string(),
614 start_line: 10,
615 end_line: 20,
616 }];
617
618 index
619 .index_blocks("test", "api.md", &blocks)
620 .expect("Should index blocks");
621
622 let hits = index
623 .search("token", Some("test"), 10)
624 .expect("Should search");
625
626 assert!(!hits.is_empty());
627 assert!(hits[0].anchor.is_some(), "anchor should be present in hits");
628 let expected = SearchIndex::compute_anchor("Reference");
630 assert_eq!(hits[0].anchor.clone().unwrap(), expected);
631 }
632
633 #[test]
634 fn test_search_no_results() {
635 let temp_dir = TempDir::new().expect("Failed to create temp dir");
636 let index_path = temp_dir.path().join("test_index");
637
638 let index = SearchIndex::create(&index_path).expect("Should create index");
639 let blocks = create_test_blocks();
640
641 index
642 .index_blocks("test", "test.md", &blocks)
643 .expect("Should index blocks");
644
645 let hits = index
647 .search("nonexistentterm12345", Some("test"), 10)
648 .expect("Should search");
649
650 assert!(
651 hits.is_empty(),
652 "Should find no results for non-existent term"
653 );
654 }
655
656 #[test]
657 fn test_search_performance() {
658 let temp_dir = TempDir::new().expect("Failed to create temp dir");
659 let index_path = temp_dir.path().join("test_index");
660
661 let index = SearchIndex::create(&index_path).expect("Should create index");
662
663 let mut blocks = Vec::new();
665 for i in 0..100 {
666 blocks.push(HeadingBlock {
667 path: vec![format!("Section{}", i)],
668 content: format!("This is content block {i} with various keywords like React, hooks, components, and performance testing."),
669 start_line: i * 10,
670 end_line: i * 10 + 5,
671 });
672 }
673
674 index
675 .index_blocks("perftest", "large.md", &blocks)
676 .expect("Should index many blocks");
677
678 let start = Instant::now();
680 let hits = index
681 .search("React", Some("perftest"), 50)
682 .expect("Should search");
683 let duration = start.elapsed();
684
685 assert!(!hits.is_empty(), "Should find results");
686 assert!(
687 duration.as_millis() < 100,
688 "Search should be fast (<100ms), took {}ms",
689 duration.as_millis()
690 );
691 }
692
693 #[test]
694 fn test_search_scoring() {
695 let temp_dir = TempDir::new().expect("Failed to create temp dir");
696 let index_path = temp_dir.path().join("test_index");
697
698 let index = SearchIndex::create(&index_path).expect("Should create index");
699
700 let blocks = vec![
701 HeadingBlock {
702 path: vec!["Exact Match".to_string()],
703 content: "React hooks".to_string(),
704 start_line: 1,
705 end_line: 5,
706 },
707 HeadingBlock {
708 path: vec!["Partial Match".to_string()],
709 content: "React components and hooks are useful features".to_string(),
710 start_line: 10,
711 end_line: 15,
712 },
713 HeadingBlock {
714 path: vec!["Distant Match".to_string()],
715 content: "In React, you can use various hooks for different purposes".to_string(),
716 start_line: 20,
717 end_line: 25,
718 },
719 ];
720
721 index
722 .index_blocks("test", "test.md", &blocks)
723 .expect("Should index blocks");
724
725 let hits = index
726 .search("React hooks", Some("test"), 10)
727 .expect("Should search");
728
729 assert!(!hits.is_empty(), "Should find results");
730
731 for i in 1..hits.len() {
733 assert!(
734 hits[i - 1].score >= hits[i].score,
735 "Results should be ordered by descending score"
736 );
737 }
738
739 assert!(
741 hits[0].snippet.contains("React hooks"),
742 "Highest scored result should contain exact match"
743 );
744 }
745
746 #[test]
747 fn test_heading_path_in_results() {
748 let temp_dir = TempDir::new().expect("Failed to create temp dir");
749 let index_path = temp_dir.path().join("test_index");
750
751 let index = SearchIndex::create(&index_path).expect("Should create index");
752
753 let blocks = vec![HeadingBlock {
754 path: vec![
755 "API".to_string(),
756 "Reference".to_string(),
757 "Hooks".to_string(),
758 ],
759 content: "useState hook documentation".to_string(),
760 start_line: 100,
761 end_line: 120,
762 }];
763
764 index
765 .index_blocks("test", "api.md", &blocks)
766 .expect("Should index blocks");
767
768 let hits = index
769 .search("useState", Some("test"), 10)
770 .expect("Should search");
771
772 assert!(!hits.is_empty(), "Should find results");
773 assert_eq!(hits[0].heading_path, vec!["API", "Reference", "Hooks"]);
774 assert_eq!(hits[0].file, "api.md");
775 assert!(
777 hits[0].lines.starts_with("100-"),
778 "Expected match to start at line 100, got {}",
779 hits[0].lines
780 );
781 }
782
783 #[test]
784 fn test_unicode_snippet_extraction() {
785 let temp_dir = TempDir::new().expect("Failed to create temp dir");
786 let index_path = temp_dir.path().join("test_index");
787 let index = SearchIndex::create(&index_path).expect("Should create index");
788
789 let unicode_blocks = vec![
791 HeadingBlock {
792 path: vec!["Unicode".to_string(), "Emoji".to_string()],
793 content: "This is a test with emojis: π Hello π World! π Let's go! π"
794 .to_string(),
795 start_line: 1,
796 end_line: 10,
797 },
798 HeadingBlock {
799 path: vec!["Unicode".to_string(), "Chinese".to_string()],
800 content: "θΏζ―δΈζζ΅θ―γHello δΈηοΌProgramming ηΌη¨ is εΎζθΆ£γ".to_string(),
801 start_line: 20,
802 end_line: 30,
803 },
804 HeadingBlock {
805 path: vec!["Unicode".to_string(), "Mixed".to_string()],
806 content: "ζ₯ζ¬θͺ γγΉγ π―π΅ with mixed content".to_string(),
807 start_line: 40,
808 end_line: 50,
809 },
810 ];
811
812 index
813 .index_blocks("unicode_test", "test.md", &unicode_blocks)
814 .expect("Should index blocks");
815
816 let test_cases = vec![("emoji", "π"), ("δΈζ", "ζ΅θ―"), ("programming", "ηΌη¨")];
818
819 for (query, _expected_content) in test_cases {
820 let results = index
821 .search(query, Some("unicode_test"), 10)
822 .unwrap_or_else(|_| panic!("Should search for '{query}'"));
823
824 if !results.is_empty() {
825 let hit = &results[0];
826 assert!(hit.snippet.is_char_boundary(0));
828 assert!(hit.snippet.is_char_boundary(hit.snippet.len()));
829
830 let _char_count = hit.snippet.chars().count();
832 }
833 }
834 }
835
836 #[test]
837 fn test_edge_case_unicode_truncation() {
838 let temp_dir = TempDir::new().expect("Failed to create temp dir");
839 let index_path = temp_dir.path().join("test_index");
840 let index = SearchIndex::create(&index_path).expect("Should create index");
841
842 let mut long_content = String::new();
844 for _ in 0..20 {
845 long_content.push_str("π¨βπ©βπ§βπ¦"); }
847 long_content.push_str(" MARKER ");
848 for _ in 0..20 {
849 long_content.push_str("π³οΈβπ"); }
851
852 let blocks = vec![HeadingBlock {
853 path: vec!["Test".to_string()],
854 content: long_content.clone(),
855 start_line: 1,
856 end_line: 10,
857 }];
858
859 index
860 .index_blocks("edge_test", "test.md", &blocks)
861 .expect("Should index blocks");
862
863 let results = index
864 .search("MARKER", Some("edge_test"), 10)
865 .expect("Should search");
866
867 assert!(!results.is_empty());
868 let snippet = &results[0].snippet;
869
870 assert!(snippet.is_char_boundary(0));
872 assert!(snippet.is_char_boundary(snippet.len()));
873 assert!(snippet.contains("MARKER"));
874
875 let char_count = snippet.chars().count();
877 assert!(char_count > 0);
878 }
879}