use crate::DIGIT_CHARS;
use aho_corasick::AhoCorasick;
use aho_corasick::FindIter;
use core::ops::Range;
#[derive(Debug)]
pub struct SplitKeepWithPos<'a> {
haystack: &'a str,
finder: FindIter<'a, 'a>,
last: usize,
pending: Option<(usize, usize)>, }
impl<'a> SplitKeepWithPos<'a> {
#[inline]
pub fn new(ac: &'a AhoCorasick, haystack: &'a str) -> Self {
Self {
haystack,
finder: ac.find_iter(haystack),
last: 0,
pending: None,
}
}
}
impl<'a> Iterator for SplitKeepWithPos<'a> {
type Item = (&'a str, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
if self.last >= self.haystack.len() {
return None;
}
if let Some((mstart, mend)) = self.pending.take() {
if mstart > self.last {
let gap = &self.haystack[self.last..mstart];
let range = self.last..mstart;
self.last = mstart;
self.pending = Some((mstart, mend));
return Some((gap, range));
} else {
self.last = mend;
return Some((&self.haystack[mstart..mend], mstart..mend));
}
}
if let Some(m) = self.finder.next() {
self.pending = Some((m.start(), m.end()));
return self.next(); }
let start = self.last;
let end = self.haystack.len();
self.last = end;
Some((&self.haystack[start..end], start..end))
}
}
macro_rules! define_ends_with_methods {
($($method_name:ident => $byte:literal),* $(,)?) => {
pub(crate) trait EndsWithExt {
$(fn $method_name(&self) -> bool;)*
fn ends_with_ascii_digit(&self) -> bool;
}
impl EndsWithExt for str {
$(#[inline]
fn $method_name(&self) -> bool {
self.as_bytes().last() == Some(&$byte)
})*
#[inline]
fn ends_with_ascii_digit(&self) -> bool {
matches!(self.as_bytes().last(), Some(b'0'..=b'9'))
}
}
};
}
define_ends_with_methods! {
ends_with_space => b' ',
ends_with_dot => b'.',
ends_with_comma => b',',
ends_with_slash => b'/',
ends_with_colon => b':',
ends_with_plus => b'+',
ends_with_minus => b'-',
ends_with_lbracket => b'[',
ends_with_rbracket => b']',
}
pub(crate) fn to_ascii_digit(ch: char) -> Option<char> {
if let Some(d) = ch.to_digit(10) {
return Some(DIGIT_CHARS[d as usize]);
}
let codepoint = ch as u32;
let idx = match codepoint {
0xFF10..=0xFF19 => codepoint - 0xFF10, 0x0660..=0x0669 => codepoint - 0x0660, 0x06F0..=0x06F9 => codepoint - 0x06F0, 0x0966..=0x096F => codepoint - 0x0966, 0x09E6..=0x09EF => codepoint - 0x09E6, 0x0A66..=0x0A6F => codepoint - 0x0A66, 0x0AE6..=0x0AEF => codepoint - 0x0AE6, 0x0B66..=0x0B6F => codepoint - 0x0B66, 0x0BE6..=0x0BEF => codepoint - 0x0BE6, 0x0C66..=0x0C6F => codepoint - 0x0C66, 0x0CE6..=0x0CEF => codepoint - 0x0CE6, 0x0D66..=0x0D6F => codepoint - 0x0D66, 0x0DE6..=0x0DEF => codepoint - 0x0DE6, 0x0E50..=0x0E59 => codepoint - 0x0E50, 0x0ED0..=0x0ED9 => codepoint - 0x0ED0, 0x1040..=0x1049 => codepoint - 0x1040, 0x17E0..=0x17E9 => codepoint - 0x17E0, 0x07C0..=0x07C9 => codepoint - 0x07C0, 0x0F20..=0x0F29 => codepoint - 0x0F20, 0x1810..=0x1819 => codepoint - 0x1810, 0x19D0..=0x19D9 => codepoint - 0x19D0, 0x1A80..=0x1A89 => codepoint - 0x1A80, 0x1A90..=0x1A99 => codepoint - 0x1A90, 0x1B50..=0x1B59 => codepoint - 0x1B50, 0x1BB0..=0x1BB9 => codepoint - 0x1BB0, 0xA9D0..=0xA9D9 => codepoint - 0xA9D0, 0xAA50..=0xAA59 => codepoint - 0xAA50, 0xABF0..=0xABF9 => codepoint - 0xABF0, _ => return None,
};
Some(DIGIT_CHARS[idx as usize])
}