use core::cmp;
use tables::grapheme::GraphemeCat;
#[derive(Clone)]
pub struct GraphemeIndices<'a> {
start_offset: usize,
iter: Graphemes<'a>,
}
impl<'a> Iterator for GraphemeIndices<'a> {
type Item = (usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, &'a str)> {
self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(usize, &'a str)> {
self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
}
#[derive(Clone)]
pub struct Graphemes<'a> {
string: &'a str,
extended: bool,
cat: Option<GraphemeCat>,
catb: Option<GraphemeCat>,
regional_count_back: Option<usize>,
}
#[derive(Copy,Clone,PartialEq,Eq)]
enum GraphemeState {
Start,
FindExtend,
HangulL,
HangulLV,
HangulLVT,
Prepend,
Regional,
Emoji,
Zwj,
}
impl<'a> Iterator for Graphemes<'a> {
type Item = &'a str;
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let slen = self.string.len();
(cmp::min(slen, 1), Some(slen))
}
#[inline]
fn next(&mut self) -> Option<&'a str> {
use self::GraphemeState::*;
use tables::grapheme as gr;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = 0;
let mut state = Start;
let mut cat = gr::GC_Any;
self.regional_count_back = None;
self.catb = None;
for (curr, ch) in self.string.char_indices() {
idx = curr;
cat = match self.cat {
None => gr::grapheme_category(ch),
_ => self.cat.take().unwrap()
};
if (state, cat) == (Emoji, gr::GC_Extend) {
continue; }
if let Some(new_state) = match cat {
gr::GC_Extend => Some(FindExtend), gr::GC_SpacingMark if self.extended => Some(FindExtend), gr::GC_ZWJ => Some(Zwj), _ => None
} {
state = new_state;
continue;
}
state = match state {
Start if '\r' == ch => {
let slen = self.string.len();
let nidx = idx + 1;
if nidx != slen && self.string[nidx..].chars().next().unwrap() == '\n' {
idx = nidx; }
break; }
Start | Prepend => match cat {
gr::GC_Control => { take_curr = state == Start;
break;
}
gr::GC_L => HangulL,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT | gr::GC_T => HangulLVT,
gr::GC_Prepend if self.extended => Prepend,
gr::GC_Regional_Indicator => Regional,
gr::GC_E_Base | gr::GC_E_Base_GAZ => Emoji,
_ => FindExtend
},
FindExtend => { take_curr = false;
break;
},
HangulL => match cat { gr::GC_L => continue,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLV => match cat { gr::GC_V => continue,
gr::GC_T => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { gr::GC_T => continue,
_ => {
take_curr = false;
break;
}
},
Regional => match cat { gr::GC_Regional_Indicator => FindExtend,
_ => {
take_curr = false;
break;
}
},
Emoji => match cat { gr::GC_E_Modifier => continue,
_ => {
take_curr = false;
break;
}
},
Zwj => match cat { gr::GC_Glue_After_Zwj => continue,
gr::GC_E_Base_GAZ => Emoji,
_ => {
take_curr = false;
break;
}
},
}
}
self.cat = if take_curr {
idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
None
} else {
Some(cat)
};
let retstr = &self.string[..idx];
self.string = &self.string[idx..];
Some(retstr)
}
}
impl<'a> DoubleEndedIterator for Graphemes<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
use self::GraphemeState::*;
use tables::grapheme as gr;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = self.string.len();
let mut previdx = idx;
let mut state = Start;
let mut cat = gr::GC_Any;
self.cat = None;
'outer: for (curr, ch) in self.string.char_indices().rev() {
previdx = idx;
idx = curr;
cat = match self.catb {
None => gr::grapheme_category(ch),
_ => self.catb.take().unwrap()
};
state = match state {
Start if '\n' == ch => {
if idx > 0 && '\r' == self.string[..idx].chars().next_back().unwrap() {
idx -= 1; }
break; },
Start | FindExtend => match cat {
gr::GC_Extend => FindExtend,
gr::GC_SpacingMark if self.extended => FindExtend,
gr::GC_ZWJ => FindExtend,
gr::GC_E_Modifier => Emoji,
gr::GC_Glue_After_Zwj | gr::GC_E_Base_GAZ => Zwj,
gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
gr::GC_V => HangulLV,
gr::GC_T => HangulLVT,
gr::GC_Regional_Indicator => Regional,
gr::GC_Control => {
take_curr = Start == state;
break;
},
_ => break
},
HangulL => match cat { gr::GC_L => continue, _ => {
take_curr = false;
break;
}
},
HangulLV => match cat { gr::GC_V => continue, gr::GC_L | gr::GC_LV => HangulL, _ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { gr::GC_T => continue, gr::GC_V => HangulLV, gr::GC_LV | gr::GC_LVT => HangulL, _ => {
take_curr = false;
break;
}
},
Prepend => {
unreachable!()
},
Regional => { let count = match self.regional_count_back {
Some(count) => count,
None => self.string[..previdx].chars().rev().take_while(|c| {
gr::grapheme_category(*c) == gr::GC_Regional_Indicator
}).count()
};
self.regional_count_back = count.checked_sub(1);
if count % 2 == 0 {
take_curr = false;
break;
}
continue;
},
Emoji => { let mut ebg_idx = None;
for (startidx, prev) in self.string[..previdx].char_indices().rev() {
match (ebg_idx, gr::grapheme_category(prev)) {
(None, gr::GC_Extend) => continue,
(None, gr::GC_E_Base) => { idx = startidx;
break 'outer;
}
(None, gr::GC_E_Base_GAZ) => { ebg_idx = Some(startidx);
}
(Some(_), gr::GC_ZWJ) => { idx = startidx;
break 'outer;
}
_ => break
}
}
if let Some(ebg_idx) = ebg_idx {
idx = ebg_idx;
break;
}
take_curr = false;
break;
},
Zwj => match cat { gr::GC_ZWJ => continue, _ => {
take_curr = false;
break;
}
}
}
}
self.catb = if take_curr {
None
} else {
idx = previdx;
Some(cat)
};
if self.extended && cat != gr::GC_Control {
for (i, c) in self.string[..idx].char_indices().rev() {
match gr::grapheme_category(c) {
gr::GC_Prepend => idx = i,
cat => {
self.catb = Some(cat);
break;
}
}
}
}
let retstr = &self.string[idx..];
self.string = &self.string[..idx];
Some(retstr)
}
}
#[inline]
pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
Graphemes {
string: s,
extended: is_extended,
cat: None,
catb: None,
regional_count_back: None
}
}
#[inline]
pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
}