ass_editor/utils/
search.rs

1//! Search functionality for ASS documents
2//!
3//! Provides the `DocumentSearch` trait for efficient text search with
4//! FST-based indexing, regex support, and incremental updates. Targets
5//! <10ms initial indexing and <1ms query response times.
6
7use crate::core::{EditorDocument, Position, Range, Result};
8
9#[cfg(feature = "std")]
10use std::borrow::Cow;
11
12#[cfg(not(feature = "std"))]
13use alloc::borrow::Cow;
14
15#[cfg(feature = "std")]
16use std::collections::HashMap;
17
18#[cfg(not(feature = "std"))]
19use alloc::{
20    boxed::Box,
21    format,
22    string::{String, ToString},
23    vec::Vec,
24};
25
26#[cfg(not(feature = "std"))]
27use hashbrown::HashMap;
28
29#[cfg(feature = "std")]
30use std::time::Instant;
31
32#[cfg(not(feature = "std"))]
33#[allow(dead_code)]
34type Instant = u64;
35
36#[cfg(feature = "search-index")]
37use fst::{automaton, IntoStreamer, Set, SetBuilder, Streamer};
38
39#[cfg(all(feature = "formats", feature = "std"))]
40use regex::Regex;
41
42/// Search options for document queries
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct SearchOptions {
45    /// Whether to perform case-sensitive search
46    pub case_sensitive: bool,
47
48    /// Whether to match whole words only
49    pub whole_words: bool,
50
51    /// Maximum number of results to return (0 = unlimited)
52    pub max_results: usize,
53
54    /// Whether to use regular expressions
55    pub use_regex: bool,
56
57    /// Search scope (e.g., specific sections or lines)
58    pub scope: SearchScope,
59}
60
61impl Default for SearchOptions {
62    fn default() -> Self {
63        Self {
64            case_sensitive: false,
65            whole_words: false,
66            max_results: 100,
67            use_regex: false,
68            scope: SearchScope::All,
69        }
70    }
71}
72
73/// Defines the scope of a search operation
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub enum SearchScope {
76    /// Search entire document
77    All,
78
79    /// Search within specific line range
80    Lines { start: usize, end: usize },
81
82    /// Search within specific sections (e.g., Events, Styles)
83    Sections(Vec<String>),
84
85    /// Search within a character range
86    Range(Range),
87}
88
89/// A single search result
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct SearchResult<'a> {
92    /// Position where the match starts
93    pub start: Position,
94
95    /// Position where the match ends
96    pub end: Position,
97
98    /// The matched text
99    pub text: Cow<'a, str>,
100
101    /// Context around the match (for display purposes)
102    pub context: Cow<'a, str>,
103
104    /// Line number where the match occurs (0-based)
105    pub line: usize,
106
107    /// Column where the match starts (0-based)
108    pub column: usize,
109}
110
111/// Statistics about search operations
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct SearchStats {
114    /// Number of matches found
115    pub match_count: usize,
116
117    /// Time taken for the search in microseconds
118    pub search_time_us: u64,
119
120    /// Whether the search hit the result limit
121    pub hit_limit: bool,
122
123    /// Size of the search index in bytes
124    pub index_size: usize,
125}
126
127/// Error types specific to search operations
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub enum SearchError {
130    /// Invalid regular expression
131    InvalidRegex { pattern: String, error: String },
132
133    /// Search index is corrupted or outdated
134    IndexCorrupted,
135
136    /// Feature not available (e.g., FST not compiled)
137    FeatureNotAvailable { feature: String },
138
139    /// Search operation timed out
140    Timeout { duration_ms: u64 },
141}
142
143/// Main trait for document search functionality
144///
145/// Provides unified search capabilities with optional FST-based indexing for
146/// fast substring searches and regex support for complex pattern matching.
147///
148/// # Examples
149///
150/// ```
151/// use ass_editor::{EditorDocument, utils::search::*};
152///
153/// let mut doc = EditorDocument::from_content(r#"
154/// [Events]
155/// Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Hello World
156/// Dialogue: 0,0:00:05.00,0:00:10.00,Default,,0,0,0,,Goodbye World
157/// "#).unwrap();
158///
159/// // Create and use a search instance
160/// let mut search = DocumentSearchImpl::new();
161/// search.build_index(&doc).unwrap();
162///
163/// // Basic text search
164/// let options = SearchOptions::default();
165/// let results = search.search("World", &options).unwrap();
166/// assert_eq!(results.len(), 2);
167///
168/// // Case-insensitive search with options
169/// let options = SearchOptions {
170///     case_sensitive: false,
171///     max_results: 10,
172///     ..Default::default()
173/// };
174/// ```
175pub trait DocumentSearch {
176    /// Build or rebuild the search index for the document
177    fn build_index(&mut self, document: &EditorDocument) -> Result<()>;
178
179    /// Update the search index incrementally after document changes
180    fn update_index(&mut self, document: &EditorDocument, changes: &[Range]) -> Result<()>;
181
182    /// Search for a pattern in the document
183    fn search<'a>(
184        &'a self,
185        pattern: &str,
186        options: &SearchOptions,
187    ) -> Result<Vec<SearchResult<'a>>>;
188
189    /// Find and replace text in the document
190    fn find_replace<'a>(
191        &'a self,
192        document: &mut EditorDocument,
193        pattern: &str,
194        replacement: &str,
195        options: &SearchOptions,
196    ) -> Result<Vec<SearchResult<'a>>>;
197
198    /// Get search statistics
199    fn stats(&self) -> SearchStats;
200
201    /// Clear the search index to free memory
202    fn clear_index(&mut self);
203}
204
205/// Document search implementation with optional FST indexing
206#[derive(Debug)]
207pub struct DocumentSearchImpl {
208    /// FST set for fast prefix/substring searches (when feature enabled)
209    #[cfg(feature = "search-index")]
210    word_index: Option<Set<Vec<u8>>>,
211
212    /// Mapping from words to their positions in the document
213    #[cfg(feature = "search-index")]
214    word_positions: HashMap<String, Vec<Position>>,
215
216    /// Last known document version for incremental updates
217    document_version: u64,
218
219    /// Cache of recent search results (owned to persist across calls)
220    result_cache: HashMap<String, Vec<SearchResult<'static>>>,
221
222    /// Maximum cache size
223    max_cache_entries: usize,
224
225    /// Search statistics
226    stats: SearchStats,
227
228    /// Cached document text for regex and basic searches
229    cached_text: String,
230}
231
232impl DocumentSearchImpl {
233    /// Create a new unified search instance
234    ///
235    /// # Examples
236    ///
237    /// ```
238    /// use ass_editor::utils::search::DocumentSearchImpl;
239    ///
240    /// let mut search = DocumentSearchImpl::new();
241    /// // Use search with documents...
242    /// ```
243    pub fn new() -> Self {
244        Self {
245            #[cfg(feature = "search-index")]
246            word_index: None,
247            #[cfg(feature = "search-index")]
248            word_positions: HashMap::new(),
249            document_version: 0,
250            result_cache: HashMap::new(),
251            max_cache_entries: 100,
252            stats: SearchStats {
253                match_count: 0,
254                search_time_us: 0,
255                hit_limit: false,
256                index_size: 0,
257            },
258            cached_text: String::new(),
259        }
260    }
261
262    /// Set maximum number of cached search results
263    pub fn set_cache_size(&mut self, size: usize) {
264        self.max_cache_entries = size;
265        self.trim_cache();
266    }
267
268    /// Extract words from document text for indexing
269    #[cfg(feature = "search-index")]
270    fn extract_words<'a>(&self, text: &'a str) -> Vec<(Cow<'a, str>, Vec<Position>)> {
271        let mut words: HashMap<Cow<'a, str>, Vec<Position>> = HashMap::new();
272        let mut word_start_byte = None;
273        let mut current_word = String::new();
274        let mut word_start_idx = 0;
275
276        for (byte_idx, ch) in text.char_indices() {
277            if ch.is_alphanumeric() || ch == '_' {
278                if word_start_byte.is_none() {
279                    word_start_byte = Some(byte_idx);
280                    word_start_idx = byte_idx;
281                }
282                current_word.push(ch);
283            } else {
284                if let Some(start_byte) = word_start_byte {
285                    if !current_word.is_empty() {
286                        // Use Cow to avoid allocation when possible
287                        let word_slice = &text[word_start_idx..byte_idx];
288                        let word_key = if word_slice.chars().all(|c| c.is_lowercase()) {
289                            Cow::Borrowed(word_slice)
290                        } else {
291                            Cow::Owned(current_word.to_lowercase())
292                        };
293
294                        words
295                            .entry(word_key)
296                            .or_default()
297                            .push(Position::new(start_byte));
298                    }
299                }
300                current_word.clear();
301                word_start_byte = None;
302            }
303        }
304
305        // Handle final word
306        if let Some(start_byte) = word_start_byte {
307            if !current_word.is_empty() {
308                let word_slice = &text[word_start_idx..];
309                let word_key = if word_slice.chars().all(|c| c.is_lowercase()) {
310                    Cow::Borrowed(word_slice)
311                } else {
312                    Cow::Owned(current_word.to_lowercase())
313                };
314
315                words
316                    .entry(word_key)
317                    .or_default()
318                    .push(Position::new(start_byte));
319            }
320        }
321
322        words.into_iter().collect()
323    }
324
325    /// Build FST from word list
326    #[cfg(feature = "search-index")]
327    fn build_fst(&self, words: &[(Cow<str>, Vec<Position>)]) -> Result<Set<Vec<u8>>> {
328        let mut builder = SetBuilder::memory();
329        let mut word_list: Vec<&str> = words.iter().map(|(word, _)| word.as_ref()).collect();
330        word_list.sort();
331        word_list.dedup();
332
333        for word in word_list {
334            builder.insert(word.as_bytes()).map_err(|e| {
335                crate::core::EditorError::CommandFailed {
336                    message: format!("FST build error: {e}"),
337                }
338            })?;
339        }
340
341        Ok(builder.into_set())
342    }
343
344    /// Trim cache to maximum size
345    fn trim_cache(&mut self) {
346        if self.result_cache.len() > self.max_cache_entries {
347            // Simple LRU-like eviction: remove half of the entries
348            let keys_to_remove: Vec<String> = self
349                .result_cache
350                .keys()
351                .take(self.result_cache.len() / 2)
352                .cloned()
353                .collect();
354
355            for key in keys_to_remove {
356                self.result_cache.remove(&key);
357            }
358        }
359    }
360
361    /// Generate cache key for search parameters
362    fn cache_key(&self, pattern: &str, options: &SearchOptions) -> String {
363        format!(
364            "{}|{}|{}|{}|{:?}",
365            pattern,
366            options.case_sensitive,
367            options.whole_words,
368            options.max_results,
369            options.scope
370        )
371    }
372
373    /// Perform basic text search without FST indexing
374    fn basic_search(
375        &self,
376        pattern: &str,
377        options: &SearchOptions,
378    ) -> Result<Vec<SearchResult<'static>>> {
379        let mut results = Vec::new();
380        let search_pattern = if options.case_sensitive {
381            pattern.to_string()
382        } else {
383            pattern.to_lowercase()
384        };
385
386        let search_text = if options.case_sensitive {
387            Cow::Borrowed(&self.cached_text)
388        } else {
389            Cow::Owned(self.cached_text.to_lowercase())
390        };
391
392        // Apply search scope
393        let (search_start, search_end) = match &options.scope {
394            SearchScope::All => (0, search_text.len()),
395            SearchScope::Range(range) => {
396                (range.start.offset, range.end.offset.min(search_text.len()))
397            }
398            SearchScope::Lines { start, end } => {
399                // Calculate byte offsets for line range
400                let mut line_num = 0;
401                let mut start_offset = 0;
402                let mut end_offset = search_text.len();
403
404                for (i, ch) in search_text.char_indices() {
405                    if line_num == *start && start_offset == 0 {
406                        start_offset = i;
407                    }
408                    if ch == '\n' {
409                        line_num += 1;
410                        if line_num > *end {
411                            end_offset = i;
412                            break;
413                        }
414                    }
415                }
416                (start_offset, end_offset)
417            }
418            SearchScope::Sections(_) => {
419                // For sections, would need to parse and identify section boundaries
420                // For now, search entire document
421                (0, search_text.len())
422            }
423        };
424
425        let search_region = &search_text[search_start..search_end];
426        let mut region_offset = 0;
427
428        while let Some(match_idx) = search_region[region_offset..].find(&search_pattern) {
429            let absolute_idx = search_start + region_offset + match_idx;
430            let match_end = absolute_idx + search_pattern.len();
431
432            // Calculate line and column
433            let mut line = 0;
434            let mut line_start = 0;
435
436            for (i, ch) in self.cached_text[..absolute_idx].char_indices() {
437                if ch == '\n' {
438                    line += 1;
439                    line_start = i + 1;
440                }
441            }
442
443            let column = absolute_idx - line_start;
444
445            // Extract context (line containing the match)
446            let context_start = self.cached_text[..absolute_idx]
447                .rfind('\n')
448                .map(|i| i + 1)
449                .unwrap_or(0);
450            let context_end = self.cached_text[absolute_idx..]
451                .find('\n')
452                .map(|i| absolute_idx + i)
453                .unwrap_or(self.cached_text.len());
454            let context = self.cached_text[context_start..context_end].to_string();
455
456            results.push(SearchResult {
457                start: Position::new(absolute_idx),
458                end: Position::new(match_end),
459                text: Cow::Owned(self.cached_text[absolute_idx..match_end].to_string()),
460                context: Cow::Owned(context),
461                line,
462                column,
463            });
464
465            if options.max_results > 0 && results.len() >= options.max_results {
466                break;
467            }
468
469            region_offset += match_idx + 1;
470        }
471
472        Ok(results)
473    }
474
475    /// Perform regex-based search
476    #[cfg(all(feature = "formats", feature = "std"))]
477    fn regex_search(
478        &self,
479        pattern: &str,
480        options: &SearchOptions,
481    ) -> Result<Vec<SearchResult<'static>>> {
482        use crate::core::EditorError;
483
484        // Compile regex
485        let regex_pattern = if options.case_sensitive {
486            Regex::new(pattern)
487        } else {
488            Regex::new(&format!("(?i){pattern}"))
489        };
490
491        let regex = regex_pattern.map_err(|e| EditorError::CommandFailed {
492            message: format!("Invalid regex pattern: {e}"),
493        })?;
494
495        let mut results = Vec::new();
496        let text = &self.cached_text;
497
498        // Apply search scope
499        let (search_start, search_end) = match &options.scope {
500            SearchScope::All => (0, text.len()),
501            SearchScope::Range(range) => (range.start.offset, range.end.offset.min(text.len())),
502            SearchScope::Lines { start, end } => {
503                // Calculate byte offsets for line range
504                let mut line_num = 0;
505                let mut start_offset = 0;
506                let mut end_offset = text.len();
507
508                for (i, ch) in text.char_indices() {
509                    if line_num == *start && start_offset == 0 {
510                        start_offset = i;
511                    }
512                    if ch == '\n' {
513                        line_num += 1;
514                        if line_num > *end {
515                            end_offset = i;
516                            break;
517                        }
518                    }
519                }
520                (start_offset, end_offset)
521            }
522            SearchScope::Sections(_sections) => {
523                // For sections, would need to parse and identify section boundaries
524                // For now, search entire document
525                (0, text.len())
526            }
527        };
528
529        let search_text = &text[search_start..search_end];
530
531        // Find all matches
532        for mat in regex.find_iter(search_text) {
533            let match_start = search_start + mat.start();
534            let match_end = search_start + mat.end();
535
536            // Calculate line and column
537            let mut line = 0;
538            let mut line_start = 0;
539            for (i, ch) in text[..match_start].char_indices() {
540                if ch == '\n' {
541                    line += 1;
542                    line_start = i + 1;
543                }
544            }
545            let column = match_start - line_start;
546
547            // Extract context (line containing the match)
548            let context_start = text[..match_start].rfind('\n').map(|i| i + 1).unwrap_or(0);
549            let context_end = text[match_start..]
550                .find('\n')
551                .map(|i| match_start + i)
552                .unwrap_or(text.len());
553            let context = text[context_start..context_end].to_string();
554
555            results.push(SearchResult {
556                start: Position::new(match_start),
557                end: Position::new(match_end),
558                text: Cow::Owned(text[match_start..match_end].to_string()),
559                context: Cow::Owned(context),
560                line,
561                column,
562            });
563
564            if options.max_results > 0 && results.len() >= options.max_results {
565                break;
566            }
567        }
568
569        Ok(results)
570    }
571}
572
573impl Default for DocumentSearchImpl {
574    fn default() -> Self {
575        Self::new()
576    }
577}
578
579impl DocumentSearch for DocumentSearchImpl {
580    fn build_index(&mut self, document: &EditorDocument) -> Result<()> {
581        #[cfg(feature = "std")]
582        let _start_time = Instant::now();
583
584        let text = document.text();
585        self.cached_text = text.clone(); // Cache for regex and basic searches
586
587        #[cfg(feature = "search-index")]
588        {
589            let words = self.extract_words(&text);
590
591            // Build FST index
592            let fst = self.build_fst(&words)?;
593            self.word_index = Some(fst);
594
595            // Store word positions
596            self.word_positions.clear();
597            for (word, positions) in words {
598                self.word_positions.insert(word.into_owned(), positions);
599            }
600
601            // Update statistics
602            self.stats.index_size = self.word_positions.len() * 50; // Rough estimate
603        }
604
605        #[cfg(not(feature = "search-index"))]
606        {
607            self.stats.index_size = self.cached_text.len();
608        }
609
610        self.document_version += 1; // Simple increment for cache invalidation
611
612        // Clear cache since index changed
613        self.result_cache.clear();
614
615        Ok(())
616    }
617
618    fn update_index(&mut self, document: &EditorDocument, changes: &[Range]) -> Result<()> {
619        if changes.is_empty() {
620            return Ok(());
621        }
622
623        // Update cached text first
624        self.cached_text = document.text();
625
626        // For non-FST search, we're done after updating the text
627        #[cfg(not(feature = "search-index"))]
628        {
629            self.document_version += 1;
630            self.result_cache.clear();
631            Ok(())
632        }
633
634        // For FST search, perform incremental updates
635        #[cfg(feature = "search-index")]
636        {
637            // Calculate total change size
638            let total_change_size: usize = changes
639                .iter()
640                .map(|r| r.end.offset.saturating_sub(r.start.offset))
641                .sum();
642
643            // If too many changes or large changes, rebuild entirely
644            if changes.len() > 10 || total_change_size > 1000 {
645                return self.build_index(document);
646            }
647
648            // Sort changes by position to process them in order
649            let mut sorted_changes = changes.to_vec();
650            sorted_changes.sort_by_key(|r| r.start.offset);
651
652            // Calculate cumulative offset adjustments
653            let mut offset_adjustments: Vec<(usize, isize)> = Vec::new();
654            let mut cumulative_adjustment = 0isize;
655
656            for change in &sorted_changes {
657                let old_len = change.end.offset - change.start.offset;
658                // Find the actual new text in this region
659                let new_text_region = &self.cached_text[change
660                    .start
661                    .offset
662                    .saturating_add_signed(cumulative_adjustment)..];
663
664                // Estimate new length by finding next unchanged content
665                let new_len = if let Some(next_change) = sorted_changes
666                    .iter()
667                    .find(|c| c.start.offset > change.end.offset)
668                {
669                    let expected_gap = next_change.start.offset - change.end.offset;
670                    new_text_region.len().min(expected_gap)
671                } else {
672                    // No more changes, estimate based on word boundaries
673                    new_text_region
674                        .find(['\n', '\r'])
675                        .unwrap_or(new_text_region.len().min(old_len * 2))
676                };
677
678                let adjustment = new_len as isize - old_len as isize;
679                offset_adjustments.push((change.start.offset, adjustment));
680                cumulative_adjustment += adjustment;
681            }
682
683            // Update word positions
684            let mut updated_positions = HashMap::new();
685
686            for (word, positions) in &self.word_positions {
687                let mut new_positions = Vec::new();
688
689                for &pos in positions {
690                    let mut adjusted_offset = pos.offset;
691                    let mut should_remove = false;
692
693                    // Check each change to see how it affects this position
694                    for (i, change) in sorted_changes.iter().enumerate() {
695                        if pos.offset >= change.start.offset && pos.offset < change.end.offset {
696                            // Word was in changed region - remove it
697                            should_remove = true;
698                            break;
699                        } else if pos.offset >= change.end.offset {
700                            // Word is after this change - apply adjustment
701                            let (_, adjustment) = offset_adjustments[i];
702                            adjusted_offset = adjusted_offset.saturating_add_signed(adjustment);
703                        }
704                    }
705
706                    if !should_remove {
707                        new_positions.push(Position::new(adjusted_offset));
708                    }
709                }
710
711                if !new_positions.is_empty() {
712                    updated_positions.insert(word.clone(), new_positions);
713                }
714            }
715
716            // Extract new words from changed regions
717            for (i, change) in sorted_changes.iter().enumerate() {
718                let start_adjustment: isize =
719                    offset_adjustments[..i].iter().map(|(_, adj)| adj).sum();
720                let adjusted_start = change.start.offset.saturating_add_signed(start_adjustment);
721
722                // Get text around the change
723                let extract_start = adjusted_start.saturating_sub(50);
724                let extract_end = (adjusted_start + 100).min(self.cached_text.len());
725
726                if extract_start < extract_end {
727                    let region_text = &self.cached_text[extract_start..extract_end];
728                    let new_words = self.extract_words(region_text);
729
730                    // Add new words with adjusted positions
731                    for (word, positions) in new_words {
732                        let entry = updated_positions.entry(word.into_owned()).or_default();
733                        for pos in positions {
734                            let global_pos = Position::new(pos.offset + extract_start);
735                            if !entry.contains(&global_pos) {
736                                entry.push(global_pos);
737                            }
738                        }
739                        entry.sort_by_key(|p| p.offset);
740                    }
741                }
742            }
743
744            // Update the word positions
745            self.word_positions = updated_positions;
746
747            // Rebuild FST with updated word list
748            if !self.word_positions.is_empty() {
749                let words: Vec<(Cow<str>, Vec<Position>)> = self
750                    .word_positions
751                    .iter()
752                    .map(|(k, v)| (Cow::Borrowed(k.as_str()), v.clone()))
753                    .collect();
754                self.word_index = Some(self.build_fst(&words)?);
755            } else {
756                self.word_index = None;
757            }
758
759            // Update statistics
760            self.stats.index_size = self.word_positions.len() * 50;
761            self.document_version += 1;
762            self.result_cache.clear();
763
764            Ok(())
765        }
766    }
767
768    fn search(&self, pattern: &str, options: &SearchOptions) -> Result<Vec<SearchResult>> {
769        #[cfg(feature = "std")]
770        let _start_time = Instant::now();
771
772        // Check cache first
773        let cache_key = self.cache_key(pattern, options);
774        if let Some(cached_results) = self.result_cache.get(&cache_key) {
775            return Ok(cached_results.clone());
776        }
777
778        let results = if options.use_regex {
779            // Use regex search
780            #[cfg(all(feature = "formats", feature = "std"))]
781            {
782                self.regex_search(pattern, options)?
783            }
784            #[cfg(not(all(feature = "formats", feature = "std")))]
785            {
786                return Err(crate::core::EditorError::CommandFailed {
787                    message: "Regex search requires the 'formats' feature to be enabled"
788                        .to_string(),
789                });
790            }
791        } else {
792            // Check if we should use FST or basic search
793            #[cfg(feature = "search-index")]
794            {
795                if let Some(ref fst) = self.word_index {
796                    // Use FST for search
797                    let search_pattern = if options.case_sensitive {
798                        pattern.to_string()
799                    } else {
800                        pattern.to_lowercase()
801                    };
802
803                    let mut results = Vec::new();
804
805                    // Create automaton for prefix search
806                    let mut stream = fst
807                        .search(automaton::Str::new(&search_pattern))
808                        .into_stream();
809
810                    while let Some(key) = stream.next() {
811                        let word = String::from_utf8_lossy(key);
812                        if let Some(positions) = self.word_positions.get(word.as_ref()) {
813                            for &pos in positions {
814                                results.push(SearchResult {
815                                    start: pos,
816                                    end: Position::new(pos.offset + pattern.len()),
817                                    text: std::borrow::Cow::Owned(pattern.to_string()),
818                                    context: std::borrow::Cow::Owned(format!(
819                                        "Offset {}",
820                                        pos.offset
821                                    )),
822                                    line: 0, // Would need document context to calculate actual line
823                                    column: pos.offset,
824                                });
825
826                                if options.max_results > 0 && results.len() >= options.max_results {
827                                    break;
828                                }
829                            }
830                        }
831
832                        if options.max_results > 0 && results.len() >= options.max_results {
833                            break;
834                        }
835                    }
836
837                    results
838                } else {
839                    // FST index not built yet, use basic search
840                    self.basic_search(pattern, options)?
841                }
842            }
843            #[cfg(not(feature = "search-index"))]
844            {
845                // Use basic text search when FST is not available
846                self.basic_search(pattern, options)?
847            }
848        };
849
850        // Update statistics (would need mutable access in real implementation)
851        // self.stats.match_count = results.len();
852        // self.stats.search_time_us = search_time;
853        // self.stats.hit_limit = options.max_results > 0 && results.len() >= options.max_results;
854
855        Ok(results)
856    }
857
858    fn find_replace(
859        &self,
860        document: &mut EditorDocument,
861        pattern: &str,
862        replacement: &str,
863        options: &SearchOptions,
864    ) -> Result<Vec<SearchResult>> {
865        let results = self.search(pattern, options)?;
866        let mut replaced = Vec::new();
867
868        // Apply replacements in reverse order to maintain position validity
869        for result in results.iter().rev() {
870            let range = Range::new(result.start, result.end);
871            document.delete(range)?;
872            document.insert(result.start, replacement)?;
873            replaced.push(result.clone());
874        }
875
876        replaced.reverse(); // Restore original order
877        Ok(replaced)
878    }
879
880    fn stats(&self) -> SearchStats {
881        self.stats.clone()
882    }
883
884    fn clear_index(&mut self) {
885        #[cfg(feature = "search-index")]
886        {
887            self.word_index = None;
888            self.word_positions.clear();
889        }
890        self.result_cache.clear();
891        self.cached_text.clear();
892        self.stats.index_size = 0;
893    }
894}
895
896/// Factory function to create a document search implementation
897pub fn create_search() -> Box<dyn DocumentSearch> {
898    Box::new(DocumentSearchImpl::new())
899}
900
901#[cfg(test)]
902mod tests {
903    use super::*;
904    #[cfg(not(feature = "std"))]
905    use alloc::{borrow::Cow, string::ToString, vec};
906
907    #[test]
908    fn search_options_default() {
909        let options = SearchOptions::default();
910        assert!(!options.case_sensitive);
911        assert!(!options.whole_words);
912        assert_eq!(options.max_results, 100);
913        assert!(!options.use_regex);
914        assert_eq!(options.scope, SearchScope::All);
915    }
916
917    #[test]
918    fn search_result_creation() {
919        let result = SearchResult {
920            start: Position::new(0),
921            end: Position::new(5),
922            text: Cow::Borrowed("hello"),
923            context: Cow::Borrowed("hello world"),
924            line: 0,
925            column: 0,
926        };
927
928        assert_eq!(result.text, "hello");
929        assert_eq!(result.line, 0);
930        assert_eq!(result.column, 0);
931    }
932
933    #[test]
934    fn document_search_creation() {
935        let search = DocumentSearchImpl::new();
936        let stats = search.stats();
937        assert_eq!(stats.index_size, 0);
938        assert_eq!(search.document_version, 0);
939    }
940
941    #[test]
942    fn search_scope_variants() {
943        let scope_all = SearchScope::All;
944        let scope_lines = SearchScope::Lines { start: 0, end: 10 };
945        let scope_sections = SearchScope::Sections(vec!["Events".to_string()]);
946
947        assert_eq!(scope_all, SearchScope::All);
948        assert!(matches!(scope_lines, SearchScope::Lines { .. }));
949        assert!(matches!(scope_sections, SearchScope::Sections(_)));
950    }
951
952    #[test]
953    fn search_cache_settings() {
954        let mut search = DocumentSearchImpl::new();
955        assert_eq!(search.max_cache_entries, 100);
956        search.set_cache_size(50);
957        assert_eq!(search.max_cache_entries, 50);
958    }
959
960    #[test]
961    fn create_search_factory() {
962        let search = create_search();
963        let stats = search.stats();
964        assert_eq!(stats.match_count, 0);
965        assert_eq!(stats.search_time_us, 0);
966        assert!(!stats.hit_limit);
967    }
968
969    #[test]
970    #[cfg(feature = "search-index")]
971    fn test_incremental_index_updates() {
972        use crate::core::{EditorDocument, Range};
973
974        // Create a document with initial content
975        let mut doc = EditorDocument::from_content(
976            "[Script Info]\nTitle: Test Search\n\n[Events]\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Hello world"
977        ).unwrap();
978
979        // Create a search index and build initial index
980        let mut search = DocumentSearchImpl::new();
981        search.build_index(&doc).unwrap();
982
983        // Search for "hello" - should find it
984        let results = search.search("hello", &SearchOptions::default()).unwrap();
985        assert_eq!(results.len(), 1);
986
987        // Actually modify the document - change "Hello" to "Goodbye"
988        let hello_pos = doc.text().find("Hello").unwrap();
989        let change_range = Range::new(Position::new(hello_pos), Position::new(hello_pos + 5));
990
991        // Delete "Hello" and insert "Goodbye"
992        doc.delete(change_range).unwrap();
993        doc.insert(Position::new(hello_pos), "Goodbye").unwrap();
994
995        // Update the index incrementally with the change
996        search.update_index(&doc, &[change_range]).unwrap();
997
998        // Search for "hello" - should not find it anymore
999        let results = search.search("hello", &SearchOptions::default()).unwrap();
1000        assert_eq!(results.len(), 0);
1001
1002        // Search for "goodbye" - should find it
1003        let results = search.search("goodbye", &SearchOptions::default()).unwrap();
1004        assert_eq!(results.len(), 1);
1005    }
1006
1007    #[test]
1008    fn test_simple_document_search_rebuild() {
1009        use crate::core::{EditorDocument, Range};
1010
1011        let mut doc =
1012            EditorDocument::from_content("The quick brown fox jumps over the lazy dog.").unwrap();
1013
1014        let mut search = DocumentSearchImpl::new();
1015        search.build_index(&doc).unwrap();
1016
1017        // Verify initial search works
1018        let results = search.search("fox", &SearchOptions::default()).unwrap();
1019        assert_eq!(results.len(), 1);
1020
1021        // Modify document
1022        let fox_pos = doc.text().find("fox").unwrap();
1023        let change_range = Range::new(Position::new(fox_pos), Position::new(fox_pos + 3));
1024        doc.replace(change_range, "cat").unwrap();
1025
1026        // Update index with the change
1027        search.update_index(&doc, &[change_range]).unwrap();
1028
1029        // Now search for "fox" should find nothing
1030        let results = search.search("fox", &SearchOptions::default()).unwrap();
1031        assert_eq!(results.len(), 0);
1032
1033        // And "cat" should be found
1034        let results = search.search("cat", &SearchOptions::default()).unwrap();
1035        assert_eq!(results.len(), 1);
1036    }
1037
1038    #[test]
1039    #[cfg(all(feature = "formats", feature = "std"))]
1040    fn test_regex_search_basic() {
1041        use crate::core::EditorDocument;
1042
1043        let doc = EditorDocument::from_content(
1044            "[Script Info]\nTitle: Test123\nPlayResX: 1920\nPlayResY: 1080",
1045        )
1046        .unwrap();
1047
1048        let mut search = DocumentSearchImpl::new();
1049        search.build_index(&doc).unwrap();
1050
1051        // Test basic regex pattern
1052        let options = SearchOptions {
1053            use_regex: true,
1054            ..Default::default()
1055        };
1056
1057        // Search for numbers
1058        let results = search.search(r"\d+", &options).unwrap();
1059        assert_eq!(results.len(), 3); // 123, 1920, 1080
1060
1061        // Search for "Play" followed by any word characters
1062        let results = search.search(r"Play\w+", &options).unwrap();
1063        assert_eq!(results.len(), 2); // PlayResX, PlayResY
1064    }
1065
1066    #[test]
1067    #[cfg(all(feature = "formats", feature = "std"))]
1068    fn test_regex_search_case_insensitive() {
1069        use crate::core::EditorDocument;
1070
1071        let doc = EditorDocument::from_content("Hello WORLD\nhello world\nHeLLo WoRlD").unwrap();
1072
1073        let mut search = DocumentSearchImpl::new();
1074        search.build_index(&doc).unwrap();
1075
1076        let options = SearchOptions {
1077            use_regex: true,
1078            case_sensitive: false,
1079            ..Default::default()
1080        };
1081
1082        // Case-insensitive regex search
1083        let results = search.search(r"hello\s+world", &options).unwrap();
1084        assert_eq!(results.len(), 3);
1085    }
1086
1087    #[test]
1088    #[cfg(all(feature = "formats", feature = "std", feature = "search-index"))]
1089    fn test_fst_regex_search() {
1090        use crate::core::EditorDocument;
1091
1092        let doc = EditorDocument::from_content(
1093            "[Events]\nDialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,Test dialogue",
1094        )
1095        .unwrap();
1096
1097        let mut search = DocumentSearchImpl::new();
1098        search.build_index(&doc).unwrap();
1099
1100        let options = SearchOptions {
1101            use_regex: true,
1102            ..Default::default()
1103        };
1104
1105        // Search for time codes pattern
1106        let results = search.search(r"\d:\d{2}:\d{2}\.\d{2}", &options).unwrap();
1107        assert_eq!(results.len(), 2); // Two time codes
1108
1109        // Verify the matches are correct
1110        assert_eq!(results[0].text, "0:00:00.00");
1111        assert_eq!(results[1].text, "0:00:05.00");
1112    }
1113
1114    #[test]
1115    #[cfg(all(feature = "formats", feature = "std"))]
1116    fn test_regex_search_with_scope() {
1117        use crate::core::EditorDocument;
1118
1119        let doc =
1120            EditorDocument::from_content("Line 1: ABC\nLine 2: DEF\nLine 3: ABC\nLine 4: GHI")
1121                .unwrap();
1122
1123        let mut search = DocumentSearchImpl::new();
1124        search.build_index(&doc).unwrap();
1125
1126        let options = SearchOptions {
1127            use_regex: true,
1128            scope: SearchScope::Lines { start: 1, end: 2 },
1129            ..Default::default()
1130        };
1131
1132        // Search for ABC in lines 1-2 only (0-based: lines at index 1 and 2)
1133        let results = search.search("ABC", &options).unwrap();
1134        assert_eq!(results.len(), 1); // ABC is on line 3 (index 2)
1135
1136        // Search for DEF in lines 1-2
1137        let results = search.search("DEF", &options).unwrap();
1138        assert_eq!(results.len(), 1); // DEF is on line 2 (index 1)
1139    }
1140
1141    #[test]
1142    #[cfg(not(all(feature = "formats", feature = "std")))]
1143    fn test_regex_search_feature_disabled() {
1144        use crate::core::EditorDocument;
1145
1146        let doc = EditorDocument::from_content("Test content").unwrap();
1147
1148        let mut search = DocumentSearchImpl::new();
1149        search.build_index(&doc).unwrap();
1150
1151        let options = SearchOptions {
1152            use_regex: true,
1153            ..Default::default()
1154        };
1155
1156        // Should return error when regex feature is not enabled
1157        let result = search.search("test", &options);
1158        assert!(result.is_err());
1159        let error_msg = result.unwrap_err().to_string();
1160        assert!(
1161            error_msg.contains("Regex") || error_msg.contains("regex"),
1162            "Expected error to contain 'regex', but got: {error_msg}"
1163        );
1164    }
1165}