1#![forbid(unsafe_code)]
2
3#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct SearchResult {
21 pub range: std::ops::Range<usize>,
23}
24
25impl SearchResult {
26 #[must_use]
28 pub fn new(start: usize, end: usize) -> Self {
29 Self { range: start..end }
30 }
31
32 #[must_use]
34 pub fn text<'a>(&self, source: &'a str) -> &'a str {
35 &source[self.range.clone()]
36 }
37}
38
39#[must_use]
43pub fn search_exact(haystack: &str, needle: &str) -> Vec<SearchResult> {
44 if needle.is_empty() {
45 return Vec::new();
46 }
47 let mut results = Vec::new();
48 let mut start = 0;
49 while let Some(pos) = haystack[start..].find(needle) {
50 let abs_pos = start + pos;
51 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
52 start = abs_pos + needle.len();
53 }
54 results
55}
56
57#[must_use]
61pub fn search_exact_overlapping(haystack: &str, needle: &str) -> Vec<SearchResult> {
62 if needle.is_empty() {
63 return Vec::new();
64 }
65 let mut results = Vec::new();
66 let mut start = 0;
67 while let Some(pos) = haystack[start..].find(needle) {
68 let abs_pos = start + pos;
69 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
70 start = abs_pos + 1;
72 while start < haystack.len() && !haystack.is_char_boundary(start) {
74 start += 1;
75 }
76 }
77 results
78}
79
80#[must_use]
85pub fn search_ascii_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
86 if needle.is_empty() {
87 return Vec::new();
88 }
89 let haystack_lower = haystack.to_ascii_lowercase();
90 let needle_lower = needle.to_ascii_lowercase();
91 let mut results = Vec::new();
92 let mut start = 0;
93 while let Some(pos) = haystack_lower[start..].find(&needle_lower) {
94 let abs_pos = start + pos;
95 results.push(SearchResult::new(abs_pos, abs_pos + needle.len()));
96 start = abs_pos + needle.len();
97 }
98 results
99}
100
101#[cfg(feature = "normalization")]
106#[must_use]
107pub fn search_case_insensitive(haystack: &str, needle: &str) -> Vec<SearchResult> {
108 if needle.is_empty() {
109 return Vec::new();
110 }
111 let needle_norm = crate::normalization::normalize_for_search(needle);
112 if needle_norm.is_empty() {
113 return Vec::new();
114 }
115
116 use unicode_segmentation::UnicodeSegmentation;
117
118 let mut norm_start_map: Vec<usize> = Vec::new();
123 let mut norm_end_map: Vec<usize> = Vec::new();
124 let mut normalized = String::new();
125
126 for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
127 let chunk = crate::normalization::normalize_for_search(grapheme);
128 if chunk.is_empty() {
129 continue;
130 }
131 let orig_end = orig_byte + grapheme.len();
132 for _ in chunk.bytes() {
133 norm_start_map.push(orig_byte);
134 norm_end_map.push(orig_end);
135 }
136 normalized.push_str(&chunk);
137 }
138 if normalized.is_empty() {
139 return Vec::new();
140 }
141
142 let mut results = Vec::new();
143 let mut start = 0;
144 while let Some(pos) = normalized[start..].find(&needle_norm) {
145 let norm_start = start + pos;
146 let norm_end = norm_start + needle_norm.len();
147
148 let orig_start = norm_start_map
149 .get(norm_start)
150 .copied()
151 .unwrap_or(haystack.len());
152 let orig_end = if norm_end == 0 {
153 orig_start
154 } else {
155 norm_end_map
156 .get(norm_end - 1)
157 .copied()
158 .unwrap_or(haystack.len())
159 };
160
161 if results
164 .last()
165 .is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
166 {
167 start = norm_end;
168 continue;
169 }
170
171 results.push(SearchResult::new(orig_start, orig_end));
172 start = norm_end;
173 }
174 results
175}
176
177#[cfg(feature = "normalization")]
182#[must_use]
183pub fn search_normalized(
184 haystack: &str,
185 needle: &str,
186 form: crate::normalization::NormForm,
187) -> Vec<SearchResult> {
188 use crate::normalization::normalize;
189 use unicode_segmentation::UnicodeSegmentation;
190
191 if needle.is_empty() {
192 return Vec::new();
193 }
194 let needle_norm = normalize(needle, form);
195 if needle_norm.is_empty() {
196 return Vec::new();
197 }
198
199 let mut norm_start_map: Vec<usize> = Vec::new();
203 let mut norm_end_map: Vec<usize> = Vec::new();
204 let mut normalized = String::new();
205
206 for (orig_byte, grapheme) in haystack.grapheme_indices(true) {
207 let chunk = normalize(grapheme, form);
208 if chunk.is_empty() {
209 continue;
210 }
211 let orig_end = orig_byte + grapheme.len();
212 for _ in chunk.bytes() {
213 norm_start_map.push(orig_byte);
214 norm_end_map.push(orig_end);
215 }
216 normalized.push_str(&chunk);
217 }
218 if normalized.is_empty() {
219 return Vec::new();
220 }
221
222 let mut results = Vec::new();
223 let mut start = 0;
224 while let Some(pos) = normalized[start..].find(&needle_norm) {
225 let norm_start = start + pos;
226 let norm_end = norm_start + needle_norm.len();
227
228 let orig_start = norm_start_map
229 .get(norm_start)
230 .copied()
231 .unwrap_or(haystack.len());
232 let orig_end = if norm_end == 0 {
233 orig_start
234 } else {
235 norm_end_map
236 .get(norm_end - 1)
237 .copied()
238 .unwrap_or(haystack.len())
239 };
240
241 if results
242 .last()
243 .is_some_and(|r: &SearchResult| r.range.start == orig_start && r.range.end == orig_end)
244 {
245 start = norm_end;
246 continue;
247 }
248
249 results.push(SearchResult::new(orig_start, orig_end));
250 start = norm_end;
251 }
252 results
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258
259 #[test]
264 fn exact_basic() {
265 let results = search_exact("hello world hello", "hello");
266 assert_eq!(results.len(), 2);
267 assert_eq!(results[0].range, 0..5);
268 assert_eq!(results[1].range, 12..17);
269 }
270
271 #[test]
272 fn exact_no_match() {
273 let results = search_exact("hello world", "xyz");
274 assert!(results.is_empty());
275 }
276
277 #[test]
278 fn exact_empty_needle() {
279 let results = search_exact("hello", "");
280 assert!(results.is_empty());
281 }
282
283 #[test]
284 fn exact_empty_haystack() {
285 let results = search_exact("", "hello");
286 assert!(results.is_empty());
287 }
288
289 #[test]
290 fn exact_needle_equals_haystack() {
291 let results = search_exact("hello", "hello");
292 assert_eq!(results.len(), 1);
293 assert_eq!(results[0].range, 0..5);
294 }
295
296 #[test]
297 fn exact_needle_longer() {
298 let results = search_exact("hi", "hello");
299 assert!(results.is_empty());
300 }
301
302 #[test]
303 fn exact_adjacent_matches() {
304 let results = search_exact("aaa", "a");
305 assert_eq!(results.len(), 3);
306 }
307
308 #[test]
309 fn exact_text_extraction() {
310 let haystack = "foo bar baz";
311 let results = search_exact(haystack, "bar");
312 assert_eq!(results[0].text(haystack), "bar");
313 }
314
315 #[test]
316 fn exact_unicode() {
317 let results = search_exact("café résumé café", "café");
318 assert_eq!(results.len(), 2);
319 }
320
321 #[test]
322 fn exact_cjk() {
323 let results = search_exact("你好世界你好", "你好");
324 assert_eq!(results.len(), 2);
325 }
326
327 #[test]
332 fn overlapping_basic() {
333 let results = search_exact_overlapping("aaa", "aa");
334 assert_eq!(results.len(), 2);
335 assert_eq!(results[0].range, 0..2);
336 assert_eq!(results[1].range, 1..3);
337 }
338
339 #[test]
340 fn overlapping_no_overlap() {
341 let results = search_exact_overlapping("abcabc", "abc");
342 assert_eq!(results.len(), 2);
343 }
344
345 #[test]
346 fn overlapping_empty_needle() {
347 let results = search_exact_overlapping("abc", "");
348 assert!(results.is_empty());
349 }
350
351 #[test]
356 fn ascii_ci_basic() {
357 let results = search_ascii_case_insensitive("Hello World HELLO", "hello");
358 assert_eq!(results.len(), 2);
359 }
360
361 #[test]
362 fn ascii_ci_mixed_case() {
363 let results = search_ascii_case_insensitive("FoO BaR fOo", "foo");
364 assert_eq!(results.len(), 2);
365 }
366
367 #[test]
368 fn ascii_ci_no_match() {
369 let results = search_ascii_case_insensitive("hello", "xyz");
370 assert!(results.is_empty());
371 }
372
373 #[test]
378 fn results_have_valid_ranges() {
379 let test_cases = [
380 ("hello world", "o"),
381 ("aaaa", "aa"),
382 ("", "x"),
383 ("x", ""),
384 ("café", "é"),
385 ("🌍 world 🌍", "🌍"),
386 ];
387 for (haystack, needle) in test_cases {
388 let results = search_exact(haystack, needle);
389 for r in &results {
390 assert!(
391 r.range.start <= r.range.end,
392 "Invalid range for '{needle}' in '{haystack}'"
393 );
394 assert!(
395 r.range.end <= haystack.len(),
396 "Out of bounds for '{needle}' in '{haystack}'"
397 );
398 assert!(
399 haystack.is_char_boundary(r.range.start),
400 "Not char boundary at start"
401 );
402 assert!(
403 haystack.is_char_boundary(r.range.end),
404 "Not char boundary at end"
405 );
406 }
407 }
408 }
409
410 #[test]
411 fn emoji_search() {
412 let results = search_exact("hello 🌍 world 🌍 end", "🌍");
413 assert_eq!(results.len(), 2);
414 for r in &results {
415 assert_eq!(&"hello 🌍 world 🌍 end"[r.range.clone()], "🌍");
416 }
417 }
418}
419
420#[cfg(all(test, feature = "normalization"))]
421mod normalization_tests {
422 use super::*;
423
424 #[test]
425 fn case_insensitive_unicode() {
426 let results = search_case_insensitive("Straße Strasse", "strasse");
429 assert!(
430 !results.is_empty(),
431 "Should find literal case-insensitive match"
432 );
433 }
434
435 #[test]
436 fn case_insensitive_expansion_range_maps_to_grapheme() {
437 let haystack = "STRAßE";
440 let results = search_case_insensitive(haystack, "straße");
441 assert_eq!(results.len(), 1);
442 let result = &results[0];
443 assert_eq!(result.text(haystack), "STRAßE");
444 assert!(haystack.is_char_boundary(result.range.start));
445 assert!(haystack.is_char_boundary(result.range.end));
446 }
447
448 #[test]
449 fn case_insensitive_accented() {
450 let results = search_case_insensitive("CAFÉ café Café", "café");
451 assert_eq!(results.len(), 3);
453 }
454
455 #[test]
456 fn case_insensitive_empty() {
457 let results = search_case_insensitive("hello", "");
458 assert!(results.is_empty());
459 }
460
461 #[test]
462 fn case_insensitive_fullwidth() {
463 let results = search_case_insensitive("\u{FF28}\u{FF25}\u{FF2C}\u{FF2C}\u{FF2F}", "hello");
465 assert!(!results.is_empty(), "Fullwidth should match via NFKC");
466 }
467
468 #[test]
469 fn normalized_composed_vs_decomposed() {
470 use crate::normalization::NormForm;
471 let haystack = "caf\u{0065}\u{0301}"; let needle = "caf\u{00E9}"; let results = search_normalized(haystack, needle, NormForm::Nfc);
475 assert_eq!(results.len(), 1, "Should find NFC-equivalent match");
476 }
477
478 #[test]
479 fn normalized_no_false_positive() {
480 use crate::normalization::NormForm;
481 let results = search_normalized("hello", "world", NormForm::Nfc);
482 assert!(results.is_empty());
483 }
484
485 #[test]
486 fn normalized_result_ranges_valid() {
487 use crate::normalization::NormForm;
488 let haystack = "café résumé café";
489 let needle = "café";
490 let results = search_normalized(haystack, needle, NormForm::Nfc);
491 for r in &results {
492 assert!(r.range.start <= r.range.end);
493 assert!(r.range.end <= haystack.len());
494 assert!(haystack.is_char_boundary(r.range.start));
495 assert!(haystack.is_char_boundary(r.range.end));
496 }
497 }
498
499 #[test]
500 fn case_insensitive_result_ranges_valid() {
501 let haystack = "Hello WORLD hello";
502 let results = search_case_insensitive(haystack, "hello");
503 for r in &results {
504 assert!(r.range.start <= r.range.end);
505 assert!(r.range.end <= haystack.len());
506 assert!(haystack.is_char_boundary(r.range.start));
507 assert!(haystack.is_char_boundary(r.range.end));
508 }
509 }
510}