1use crate::core::{EditorDocument, Position, Range, Result};
7#[cfg(feature = "search-index")]
8use crate::utils::search::SearchScope;
9use crate::utils::search::{DocumentSearch, SearchOptions, SearchResult, SearchStats};
10
11#[cfg(feature = "search-index")]
12use fst::{automaton, IntoStreamer, Set, SetBuilder, Streamer};
13
14#[cfg(feature = "std")]
15use std::{borrow::Cow, collections::HashMap};
16
17#[cfg(not(feature = "std"))]
18use alloc::{borrow::Cow, collections::BTreeMap as HashMap, string::String};
19
20#[cfg(not(feature = "std"))]
21use alloc::{boxed::Box, string::ToString, vec::Vec};
22
23#[cfg(feature = "std")]
24use std::time::Instant;
25
26#[cfg(not(feature = "std"))]
27type Instant = u64;
28
29#[cfg(feature = "search-index")]
31pub struct FstSearchIndex {
32 fst_set: Option<Set<Vec<u8>>>,
34
35 position_map: HashMap<String, Vec<IndexEntry>>,
37
38 content_hash: u64,
40
41 build_time: Instant,
43
44 index_size: usize,
46}
47
48pub struct LinearSearchIndex {
50 word_positions: HashMap<String, Vec<IndexEntry>>,
52
53 content_hash: u64,
55
56 build_time: Instant,
58}
59
60#[derive(Debug, Clone)]
62pub struct IndexEntry {
63 pub position: Position,
65
66 pub context: String,
68
69 pub line: usize,
71
72 pub column: usize,
74
75 pub section_type: Option<String>,
77}
78
79#[cfg(feature = "search-index")]
80impl Default for FstSearchIndex {
81 fn default() -> Self {
82 Self::new()
83 }
84}
85
86#[cfg(feature = "search-index")]
87impl FstSearchIndex {
88 pub fn new() -> Self {
90 Self {
91 fst_set: None,
92 position_map: HashMap::new(),
93 content_hash: 0,
94 build_time: {
95 #[cfg(feature = "std")]
96 {
97 Instant::now()
98 }
99 #[cfg(not(feature = "std"))]
100 {
101 0
102 }
103 },
104 index_size: 0,
105 }
106 }
107
108 fn extract_words(&self, content: &str) -> Vec<(String, IndexEntry)> {
110 let mut words = Vec::new();
111 let mut line_start = 0;
112 let mut current_section = None;
113
114 for (current_line, line) in content.lines().enumerate() {
115 if line.starts_with('[') && line.ends_with(']') {
117 current_section = Some(line[1..line.len() - 1].to_string());
118 }
119
120 let mut word_start = 0;
122 let mut in_word = false;
123
124 for (char_offset, ch) in line.char_indices() {
125 if ch.is_alphanumeric() || ch == '_' {
126 if !in_word {
127 word_start = char_offset;
128 in_word = true;
129 }
130 } else if in_word {
131 let word = &line[word_start..char_offset];
133 if word.len() >= 2 {
134 let entry = IndexEntry {
136 position: Position::new(line_start + word_start),
137 context: line.to_string(),
138 line: current_line,
139 column: word_start,
140 section_type: current_section.clone(),
141 };
142 words.push((word.to_lowercase(), entry));
143 }
144 in_word = false;
145 }
146 }
147
148 if in_word {
150 let word = &line[word_start..];
151 if word.len() >= 2 {
152 let entry = IndexEntry {
153 position: Position::new(line_start + word_start),
154 context: line.to_string(),
155 line: current_line,
156 column: word_start,
157 section_type: current_section.clone(),
158 };
159 words.push((word.to_lowercase(), entry));
160 }
161 }
162
163 line_start += line.len() + 1; }
165
166 words
167 }
168
169 fn build_fst(&mut self, words: Vec<(String, IndexEntry)>) -> Result<()> {
171 let mut word_map: HashMap<String, Vec<IndexEntry>> = HashMap::new();
173 for (word, entry) in words {
174 word_map.entry(word).or_default().push(entry);
175 }
176
177 let mut set_builder = SetBuilder::memory();
179 let mut keys: Vec<_> = word_map.keys().cloned().collect();
180 keys.sort();
181
182 for key in &keys {
183 set_builder
184 .insert(key)
185 .map_err(|e| crate::EditorError::SearchIndexError {
186 message: format!("FST insert error: {e}"),
187 })?;
188 }
189
190 let fst_bytes =
191 set_builder
192 .into_inner()
193 .map_err(|e| crate::EditorError::SearchIndexError {
194 message: format!("FST build error: {e}"),
195 })?;
196 self.index_size = fst_bytes.len();
197 self.fst_set =
198 Some(
199 Set::new(fst_bytes).map_err(|e| crate::EditorError::SearchIndexError {
200 message: format!("FST creation error: {e}"),
201 })?,
202 );
203 self.position_map = word_map;
204
205 Ok(())
206 }
207}
208
209#[cfg(feature = "search-index")]
210impl DocumentSearch for FstSearchIndex {
211 fn build_index(&mut self, document: &EditorDocument) -> Result<()> {
212 let content = document.text();
213 self.content_hash = calculate_hash(&content);
214 self.build_time = {
215 #[cfg(feature = "std")]
216 {
217 Instant::now()
218 }
219 #[cfg(not(feature = "std"))]
220 {
221 0
222 }
223 };
224
225 let words = self.extract_words(&content);
226 self.build_fst(words)?;
227
228 Ok(())
229 }
230
231 fn update_index(&mut self, document: &EditorDocument, _changes: &[Range]) -> Result<()> {
232 self.build_index(document)
235 }
236
237 fn search(&self, pattern: &str, options: &SearchOptions) -> Result<Vec<SearchResult>> {
238 #[cfg(feature = "std")]
239 let _start_time = Instant::now();
240 let mut results = Vec::new();
241
242 if let Some(ref fst_set) = self.fst_set {
243 let query = if options.case_sensitive {
244 pattern.to_string()
245 } else {
246 pattern.to_lowercase()
247 };
248
249 let automaton = automaton::Subsequence::new(&query);
252 let mut stream = fst_set.search(automaton).into_stream();
253 let mut count = 0;
254
255 while let Some(key) = stream.next() {
256 if options.max_results > 0 && count >= options.max_results {
257 break;
258 }
259
260 let key_str = String::from_utf8_lossy(key);
261 if let Some(entries) = self.position_map.get(key_str.as_ref()) {
262 for entry in entries {
263 if self.matches_scope(entry, &options.scope) {
265 results.push(SearchResult {
266 start: entry.position,
267 end: Position::new(entry.position.offset + pattern.len()),
268 text: std::borrow::Cow::Owned(pattern.to_string()),
269 context: std::borrow::Cow::Owned(entry.context.clone()),
270 line: entry.line,
271 column: entry.column,
272 });
273 count += 1;
274
275 if options.max_results > 0 && count >= options.max_results {
276 break;
277 }
278 }
279 }
280 }
281 }
282 }
283
284 Ok(results)
285 }
286
287 fn find_replace(
288 &self,
289 document: &mut EditorDocument,
290 pattern: &str,
291 replacement: &str,
292 options: &SearchOptions,
293 ) -> Result<Vec<SearchResult>> {
294 let results = self.search(pattern, options)?;
295
296 let mut sorted_results = results.clone();
298 sorted_results.sort_by(|a, b| b.start.offset.cmp(&a.start.offset));
299
300 for result in &sorted_results {
301 let range = Range::new(result.start, result.end);
302 document.replace(range, replacement)?;
303 }
304
305 Ok(results)
306 }
307
308 fn stats(&self) -> SearchStats {
309 SearchStats {
310 match_count: self.position_map.len(),
311 search_time_us: {
312 #[cfg(feature = "std")]
313 {
314 self.build_time.elapsed().as_micros() as u64
315 }
316 #[cfg(not(feature = "std"))]
317 {
318 0
319 }
320 },
321 hit_limit: false,
322 index_size: self.index_size,
323 }
324 }
325
326 fn clear_index(&mut self) {
327 self.fst_set = None;
328 self.position_map.clear();
329 self.index_size = 0;
330 }
331}
332
333#[cfg(feature = "search-index")]
334impl FstSearchIndex {
335 fn matches_scope(&self, entry: &IndexEntry, scope: &SearchScope) -> bool {
336 match scope {
337 SearchScope::All => true,
338 SearchScope::Lines { start, end } => entry.line >= *start && entry.line <= *end,
339 SearchScope::Sections(sections) => {
340 if let Some(ref section) = entry.section_type {
341 sections.contains(section)
342 } else {
343 false
344 }
345 }
346 SearchScope::Range(range) => {
347 entry.position.offset >= range.start.offset
348 && entry.position.offset <= range.end.offset
349 }
350 }
351 }
352}
353
354impl Default for LinearSearchIndex {
356 fn default() -> Self {
357 Self::new()
358 }
359}
360
361impl LinearSearchIndex {
362 pub fn new() -> Self {
364 Self {
365 word_positions: HashMap::new(),
366 content_hash: 0,
367 build_time: {
368 #[cfg(feature = "std")]
369 {
370 Instant::now()
371 }
372 #[cfg(not(feature = "std"))]
373 {
374 0
375 }
376 },
377 }
378 }
379}
380
381impl DocumentSearch for LinearSearchIndex {
382 fn build_index(&mut self, document: &EditorDocument) -> Result<()> {
383 let content = document.text();
384 self.content_hash = calculate_hash(&content);
385 self.build_time = {
386 #[cfg(feature = "std")]
387 {
388 Instant::now()
389 }
390 #[cfg(not(feature = "std"))]
391 {
392 0
393 }
394 };
395
396 self.word_positions.clear();
398
399 let mut line_start = 0;
400
401 for (current_line, line) in content.lines().enumerate() {
402 for (word_start, word) in line.split_whitespace().enumerate() {
403 let entry = IndexEntry {
404 position: Position::new(line_start + word_start),
405 context: line.to_string(),
406 line: current_line,
407 column: word_start,
408 section_type: None,
409 };
410 self.word_positions
411 .entry(word.to_lowercase())
412 .or_default()
413 .push(entry);
414 }
415 line_start += line.len() + 1;
416 }
417
418 Ok(())
419 }
420
421 fn update_index(&mut self, document: &EditorDocument, _changes: &[Range]) -> Result<()> {
422 self.build_index(document)
423 }
424
425 fn search(&self, pattern: &str, _options: &SearchOptions) -> Result<Vec<SearchResult>> {
426 let query = pattern.to_lowercase();
427 let mut results = Vec::new();
428
429 for (word, entries) in &self.word_positions {
431 if word.contains(&query) {
432 for entry in entries {
433 results.push(SearchResult {
434 start: entry.position,
435 end: Position::new(entry.position.offset + pattern.len()),
436 text: Cow::Owned(pattern.to_string()),
437 context: Cow::Owned(entry.context.clone()),
438 line: entry.line,
439 column: entry.column,
440 });
441 }
442 }
443 }
444
445 Ok(results)
446 }
447
448 fn find_replace(
449 &self,
450 document: &mut EditorDocument,
451 pattern: &str,
452 replacement: &str,
453 options: &SearchOptions,
454 ) -> Result<Vec<SearchResult>> {
455 let results = self.search(pattern, options)?;
456
457 for result in &results {
458 let range = Range::new(result.start, result.end);
459 document.replace(range, replacement)?;
460 }
461
462 Ok(results)
463 }
464
465 fn stats(&self) -> SearchStats {
466 SearchStats {
467 match_count: self.word_positions.len(),
468 search_time_us: {
469 #[cfg(feature = "std")]
470 {
471 self.build_time.elapsed().as_micros() as u64
472 }
473 #[cfg(not(feature = "std"))]
474 {
475 0
476 }
477 },
478 hit_limit: false,
479 index_size: self.word_positions.len() * 64, }
481 }
482
483 fn clear_index(&mut self) {
484 self.word_positions.clear();
485 }
486}
487
488pub fn create_search_index() -> Box<dyn DocumentSearch> {
490 #[cfg(feature = "search-index")]
491 {
492 Box::new(FstSearchIndex::new())
493 }
494 #[cfg(not(feature = "search-index"))]
495 {
496 Box::new(LinearSearchIndex::new())
497 }
498}
499
500fn calculate_hash(content: &str) -> u64 {
502 let mut hash = 0xcbf29ce484222325u64;
504 for byte in content.bytes() {
505 hash ^= byte as u64;
506 hash = hash.wrapping_mul(0x100000001b3);
507 }
508 hash
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514 use crate::utils::search::SearchScope;
515 #[cfg(not(feature = "std"))]
516 use crate::EditorDocument;
517
518 #[test]
519 fn test_linear_search_index() {
520 let content = r#"[Script Info]
521Title: Test Movie
522Author: Test Author
523
524[Events]
525Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
526Dialogue: 0,0:00:05.00,0:00:10.00,Default,John,0,0,0,,Hello world
527Dialogue: 0,0:00:12.00,0:00:15.00,Default,Jane,0,0,0,,How are you"#;
528
529 let document = EditorDocument::from_content(content).unwrap();
530 let mut search_index = LinearSearchIndex::new();
531
532 search_index.build_index(&document).unwrap();
533
534 let results = search_index
535 .search("Hello", &SearchOptions::default())
536 .unwrap();
537 assert!(!results.is_empty());
538
539 let stats = search_index.stats();
540 assert!(stats.match_count > 0);
541 }
542
543 #[cfg(feature = "search-index")]
544 #[test]
545 fn test_fst_search_index() {
546 let content = r#"[Script Info]
547Title: Test Movie
548
549[Events]
550Dialogue: 0,0:00:05.00,0:00:10.00,Default,John,0,0,0,,Hello world"#;
551
552 let document = EditorDocument::from_content(content).unwrap();
553 let mut search_index = FstSearchIndex::new();
554
555 search_index.build_index(&document).unwrap();
556
557 let results = search_index
558 .search("hello", &SearchOptions::default())
559 .unwrap();
560 assert!(!results.is_empty());
561
562 let stats = search_index.stats();
563 assert!(stats.index_size > 0);
564 }
565
566 #[test]
567 fn test_search_factory() {
568 let index = create_search_index();
569
570 let content = "[Script Info]\nTitle: Test";
571 let _document = EditorDocument::from_content(content).unwrap();
572
573 let stats = index.stats();
575 assert_eq!(stats.match_count, 0); }
577
578 #[test]
579 fn test_scope_filtering() {
580 let content = r#"[Script Info]
581Title: Test
582
583[Events]
584Dialogue: Hello world"#;
585
586 let document = EditorDocument::from_content(content).unwrap();
587 let mut index = LinearSearchIndex::new();
588 index.build_index(&document).unwrap();
589
590 let line_scope = SearchOptions {
592 scope: SearchScope::Lines { start: 0, end: 2 },
593 ..Default::default()
594 };
595
596 let _results = index.search("Test", &line_scope).unwrap();
597 }
599}