1use super::{Regex, Region, SearchOptions};
2use std::iter::FusedIterator;
3
4impl Regex {
5 pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
9 let mut region = Region::new();
10 self.search_with_options(
11 text,
12 0,
13 text.len(),
14 SearchOptions::SEARCH_OPTION_NONE,
15 Some(&mut region),
16 )
17 .map(|pos| Captures {
18 text,
19 region,
20 offset: pos,
21 })
22 }
23
24 pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
47 FindMatches {
48 regex: self,
49 region: Region::new(),
50 text,
51 last_end: 0,
52 last_match_end: None,
53 }
54 }
55
56 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> FindCaptures<'r, 't> {
81 FindCaptures {
82 regex: self,
83 text,
84 last_end: 0,
85 last_match_end: None,
86 }
87 }
88
89 pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
109 RegexSplits {
110 finder: self.find_iter(text),
111 last: 0,
112 }
113 }
114
115 pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: usize) -> RegexSplitsN<'r, 't> {
138 RegexSplitsN {
139 splits: self.split(text),
140 n: limit,
141 }
142 }
143
144 pub fn scan_with_region<F>(
147 &self,
148 to_search: &str,
149 region: &mut Region,
150 options: SearchOptions,
151 mut callback: F,
152 ) -> i32
153 where
154 F: Fn(i32, i32, &Region) -> bool,
155 {
156 use onig_sys::{onig_scan, OnigRegion};
157 use std::os::raw::{c_int, c_void};
158
159 let start = to_search.as_ptr();
161 let end = to_search[to_search.len()..].as_ptr();
162
163 unsafe extern "C" fn scan_cb<F>(
164 i: c_int,
165 j: c_int,
166 r: *mut OnigRegion,
167 ud: *mut c_void,
168 ) -> c_int
169 where
170 F: Fn(i32, i32, &Region) -> bool,
171 {
172 let region = Region::clone_from_raw(r);
173 let callback = &*(ud as *mut F);
174 if callback(i, j, ®ion) {
175 0
176 } else {
177 -1
178 }
179 }
180
181 unsafe {
182 onig_scan(
183 self.raw,
184 start,
185 end,
186 (&mut region.raw) as *mut ::onig_sys::OnigRegion,
187 options.bits(),
188 Some(scan_cb::<F>),
189 &mut callback as *mut F as *mut c_void,
190 )
191 }
192 }
193
194 pub fn scan<'t, CB>(&self, to_search: &'t str, callback: CB)
199 where
200 CB: Fn(i32, Captures<'t>) -> bool,
201 {
202 let mut region = Region::new();
203 self.scan_with_region(
204 to_search,
205 &mut region,
206 SearchOptions::SEARCH_OPTION_NONE,
207 |n, s, region| {
208 let captures = Captures {
209 text: to_search,
210 region: region.clone(),
211 offset: s as usize,
212 };
213 callback(n, captures)
214 },
215 );
216 }
217}
218
219#[derive(Debug)]
227pub struct Captures<'t> {
228 text: &'t str,
229 region: Region,
230 offset: usize,
231}
232
233impl<'t> Captures<'t> {
234 pub(crate) fn new(text: &'t str, region: Region, offset: usize) -> Self {
237 Captures {
238 text,
239 region,
240 offset,
241 }
242 }
243
244 pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
249 self.region.pos(pos)
250 }
251
252 pub fn at(&self, pos: usize) -> Option<&'t str> {
255 self.pos(pos).map(|(beg, end)| &self.text[beg..end])
256 }
257
258 pub fn len(&self) -> usize {
260 self.region.len()
261 }
262
263 pub fn is_empty(&self) -> bool {
265 self.len() == 0
266 }
267
268 pub fn iter(&'t self) -> SubCaptures<'t> {
271 SubCaptures { idx: 0, caps: self }
272 }
273
274 pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
278 SubCapturesPos { idx: 0, caps: self }
279 }
280
281 pub fn offset(&self) -> usize {
283 self.offset
284 }
285}
286
287pub struct SubCaptures<'t> {
292 idx: usize,
293 caps: &'t Captures<'t>,
294}
295
296impl<'t> Iterator for SubCaptures<'t> {
297 type Item = Option<&'t str>;
298
299 fn next(&mut self) -> Option<Option<&'t str>> {
300 if self.idx < self.caps.len() {
301 self.idx += 1;
302 Some(self.caps.at(self.idx - 1))
303 } else {
304 None
305 }
306 }
307
308 fn size_hint(&self) -> (usize, Option<usize>) {
309 let size = self.caps.len();
310 (size, Some(size))
311 }
312
313 fn count(self) -> usize {
314 self.caps.len()
315 }
316}
317
318impl<'t> FusedIterator for SubCaptures<'t> {}
319
320impl<'t> ExactSizeIterator for SubCaptures<'t> {}
321
322pub struct SubCapturesPos<'t> {
328 idx: usize,
329 caps: &'t Captures<'t>,
330}
331
332impl<'t> Iterator for SubCapturesPos<'t> {
333 type Item = Option<(usize, usize)>;
334
335 fn next(&mut self) -> Option<Option<(usize, usize)>> {
336 if self.idx < self.caps.len() {
337 self.idx += 1;
338 Some(self.caps.pos(self.idx - 1))
339 } else {
340 None
341 }
342 }
343
344 fn size_hint(&self) -> (usize, Option<usize>) {
345 let size = self.caps.len();
346 (size, Some(size))
347 }
348
349 fn count(self) -> usize {
350 self.caps.len()
351 }
352}
353
354impl<'t> FusedIterator for SubCapturesPos<'t> {}
355
356impl<'t> ExactSizeIterator for SubCapturesPos<'t> {}
357
358pub struct FindMatches<'r, 't> {
367 regex: &'r Regex,
368 region: Region,
369 text: &'t str,
370 last_end: usize,
371 last_match_end: Option<usize>,
372}
373
374impl<'r, 't> Iterator for FindMatches<'r, 't> {
375 type Item = (usize, usize);
376
377 fn next(&mut self) -> Option<(usize, usize)> {
378 if self.last_end > self.text.len() {
379 return None;
380 }
381 self.region.clear();
382 self.regex.search_with_options(
383 self.text,
384 self.last_end,
385 self.text.len(),
386 SearchOptions::SEARCH_OPTION_NONE,
387 Some(&mut self.region),
388 )?;
389 let (s, e) = self.region.pos(0).unwrap();
390
391 if e == s && self.last_match_end.map_or(false, |l| l == e) {
394 self.last_end += self.text[self.last_end..]
395 .chars()
396 .next()
397 .map(|c| c.len_utf8())
398 .unwrap_or(1);
399 return self.next();
400 } else {
401 self.last_end = e;
402 self.last_match_end = Some(e);
403 }
404
405 Some((s, e))
406 }
407}
408
409impl<'r, 't> FusedIterator for FindMatches<'r, 't> {}
410
411pub struct FindCaptures<'r, 't> {
419 regex: &'r Regex,
420 text: &'t str,
421 last_end: usize,
422 last_match_end: Option<usize>,
423}
424
425impl<'r, 't> Iterator for FindCaptures<'r, 't> {
426 type Item = Captures<'t>;
427
428 fn next(&mut self) -> Option<Captures<'t>> {
429 if self.last_end > self.text.len() {
430 return None;
431 }
432
433 let mut region = Region::new();
434 let r = self.regex.search_with_options(
435 self.text,
436 self.last_end,
437 self.text.len(),
438 SearchOptions::SEARCH_OPTION_NONE,
439 Some(&mut region),
440 )?;
441 let (s, e) = region.pos(0).unwrap();
442
443 if e == s && self.last_match_end.map_or(false, |l| l == e) {
446 self.last_end += self.text[self.last_end..]
447 .chars()
448 .next()
449 .map(|c| c.len_utf8())
450 .unwrap_or(1);
451 return self.next();
452 } else {
453 self.last_end = e;
454 self.last_match_end = Some(e);
455 }
456 Some(Captures {
457 text: self.text,
458 region,
459 offset: r,
460 })
461 }
462}
463
464impl<'r, 't> FusedIterator for FindCaptures<'r, 't> {}
465
466pub struct RegexSplits<'r, 't> {
471 finder: FindMatches<'r, 't>,
472 last: usize,
473}
474
475impl<'r, 't> Iterator for RegexSplits<'r, 't> {
476 type Item = &'t str;
477
478 fn next(&mut self) -> Option<&'t str> {
479 let text = self.finder.text;
480 match self.finder.next() {
481 None => {
482 if self.last >= text.len() {
483 None
484 } else {
485 let s = &text[self.last..];
486 self.last = text.len();
487 Some(s)
488 }
489 }
490 Some((s, e)) => {
491 let matched = &text[self.last..s];
492 self.last = e;
493 Some(matched)
494 }
495 }
496 }
497}
498
499impl<'r, 't> FusedIterator for RegexSplits<'r, 't> {}
500
501pub struct RegexSplitsN<'r, 't> {
508 splits: RegexSplits<'r, 't>,
509 n: usize,
510}
511
512impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
513 type Item = &'t str;
514
515 fn next(&mut self) -> Option<&'t str> {
516 if self.n == 0 {
517 return None;
518 }
519 self.n -= 1;
520 if self.n == 0 {
521 let text = self.splits.finder.text;
522 Some(&text[self.splits.last..])
523 } else {
524 self.splits.next()
525 }
526 }
527
528 fn size_hint(&self) -> (usize, Option<usize>) {
529 (0, Some(self.n))
530 }
531}
532
533impl<'r, 't> FusedIterator for RegexSplitsN<'r, 't> {}
534
535#[cfg(test)]
536mod tests {
537 use super::super::*;
538
539 #[test]
540 fn test_regex_captures() {
541 let regex = Regex::new("e(l+)|(r+)").unwrap();
542 let captures = regex.captures("hello").unwrap();
543 assert_eq!(captures.len(), 3);
544 assert_eq!(captures.is_empty(), false);
545 let pos1 = captures.pos(0).unwrap();
546 let pos2 = captures.pos(1).unwrap();
547 let pos3 = captures.pos(2);
548 assert_eq!(pos1, (1, 4));
549 assert_eq!(pos2, (2, 4));
550 assert_eq!(pos3, None);
551 let str1 = captures.at(0).unwrap();
552 let str2 = captures.at(1).unwrap();
553 let str3 = captures.at(2);
554 assert_eq!(str1, "ell");
555 assert_eq!(str2, "ll");
556 assert_eq!(str3, None);
557 }
558
559 #[test]
560 fn test_regex_subcaptures() {
561 let regex = Regex::new("e(l+)").unwrap();
562 let captures = regex.captures("hello").unwrap();
563 let caps = captures.iter().collect::<Vec<_>>();
564 assert_eq!(caps[0], Some("ell"));
565 assert_eq!(caps[1], Some("ll"));
566 assert_eq!(caps.len(), 2);
567 }
568
569 #[test]
570 fn test_regex_subcapturespos() {
571 let regex = Regex::new("e(l+)").unwrap();
572 let captures = regex.captures("hello").unwrap();
573 let caps = captures.iter_pos().collect::<Vec<_>>();
574 assert_eq!(caps[0], Some((1, 4)));
575 assert_eq!(caps[1], Some((2, 4)));
576 assert_eq!(caps.len(), 2);
577 }
578
579 #[test]
580 fn test_find_iter() {
581 let re = Regex::new(r"\d+").unwrap();
582 let ms = re.find_iter("a12b2").collect::<Vec<_>>();
583 assert_eq!(ms, vec![(1, 3), (4, 5)]);
584 }
585
586 #[test]
587 fn test_find_iter_one_zero_length() {
588 let re = Regex::new(r"\d*").unwrap();
589 let ms = re.find_iter("a1b2").collect::<Vec<_>>();
590 assert_eq!(ms, vec![(0, 0), (1, 2), (3, 4)]);
591 }
592
593 #[test]
594 fn test_find_iter_many_zero_length() {
595 let re = Regex::new(r"\d*").unwrap();
596 let ms = re.find_iter("a1bbb2").collect::<Vec<_>>();
597 assert_eq!(ms, vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)]);
598 }
599
600 #[test]
601 fn test_find_iter_empty_after_match() {
602 let re = Regex::new(r"b|(?=,)").unwrap();
603 let ms = re.find_iter("ba,").collect::<Vec<_>>();
604 assert_eq!(ms, vec![(0, 1), (2, 2)]);
605 }
606
607 #[test]
608 fn test_zero_length_matches_jumps_past_match_location() {
609 let re = Regex::new(r"\b").unwrap();
610 let matches = re.find_iter("test string").collect::<Vec<_>>();
611 assert_eq!(matches, [(0, 0), (4, 4), (5, 5), (11, 11)]);
612 }
613
614 #[test]
615 fn test_captures_iter() {
616 let re = Regex::new(r"\d+").unwrap();
617 let ms = re.captures_iter("a12b2").collect::<Vec<_>>();
618 assert_eq!(ms[0].pos(0).unwrap(), (1, 3));
619 assert_eq!(ms[1].pos(0).unwrap(), (4, 5));
620 }
621
622 #[test]
623 fn test_captures_stores_match_offset() {
624 let reg = Regex::new(r"\d+\.(\d+)").unwrap();
625 let captures = reg.captures("100 - 3.1415 / 2.0").unwrap();
626 assert_eq!(6, captures.offset());
627 let all_caps = reg
628 .captures_iter("1 - 3234.3 * 123.2 - 100")
629 .map(|cap| cap.offset())
630 .collect::<Vec<_>>();
631 assert_eq!(vec![4, 13], all_caps);
632 }
633}