use core::cmp;
use tables::grapheme::GraphemeCat;
#[derive(Clone)]
pub struct GraphemeIndices<'a> {
start_offset: usize,
iter: Graphemes<'a>,
}
impl<'a> Iterator for GraphemeIndices<'a> {
type Item = (usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, &'a str)> {
self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(usize, &'a str)> {
self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
}
#[derive(Clone)]
pub struct Graphemes<'a> {
string: &'a str,
extended: bool,
cat: Option<GraphemeCat>,
catb: Option<GraphemeCat>,
}
#[derive(PartialEq,Eq)]
enum GraphemeState {
Start,
FindExtend,
HangulL,
HangulLV,
HangulLVT,
Regional,
}
impl<'a> Iterator for Graphemes<'a> {
type Item = &'a str;
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let slen = self.string.len();
(cmp::min(slen, 1), Some(slen))
}
#[inline]
fn next(&mut self) -> Option<&'a str> {
use self::GraphemeState::*;
use tables::grapheme as gr;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = 0;
let mut state = Start;
let mut cat = gr::GC_Any;
for (curr, ch) in self.string.char_indices() {
idx = curr;
cat = match self.cat {
None => gr::grapheme_category(ch),
_ => self.cat.take().unwrap()
};
if match cat {
gr::GC_Extend => true,
gr::GC_SpacingMark if self.extended => true,
_ => false
} {
state = FindExtend; continue;
}
state = match state {
Start if '\r' == ch => {
let slen = self.string.len();
let nidx = idx + 1;
if nidx != slen && self.string[nidx..].chars().next().unwrap() == '\n' {
idx = nidx; }
break; }
Start => match cat {
gr::GC_Control => break,
gr::GC_L => HangulL,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT | gr::GC_T => HangulLVT,
gr::GC_Regional_Indicator => Regional,
_ => FindExtend
},
FindExtend => { take_curr = false;
break;
},
HangulL => match cat { gr::GC_L => continue,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLV => match cat { gr::GC_V => continue,
gr::GC_T => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { gr::GC_T => continue,
_ => {
take_curr = false;
break;
}
},
Regional => match cat { gr::GC_Regional_Indicator => continue,
_ => {
take_curr = false;
break;
}
}
}
}
self.cat = if take_curr {
idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
None
} else {
Some(cat)
};
let retstr = &self.string[..idx];
self.string = &self.string[idx..];
Some(retstr)
}
}
impl<'a> DoubleEndedIterator for Graphemes<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
use self::GraphemeState::*;
use tables::grapheme as gr;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = self.string.len();
let mut previdx = idx;
let mut state = Start;
let mut cat = gr::GC_Any;
for (curr, ch) in self.string.char_indices().rev() {
previdx = idx;
idx = curr;
cat = match self.catb {
None => gr::grapheme_category(ch),
_ => self.catb.take().unwrap()
};
state = match state {
Start if '\n' == ch => {
if idx > 0 && '\r' == self.string[..idx].chars().next_back().unwrap() {
idx -= 1; }
break; },
Start | FindExtend => match cat {
gr::GC_Extend => FindExtend,
gr::GC_SpacingMark if self.extended => FindExtend,
gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
gr::GC_V => HangulLV,
gr::GC_T => HangulLVT,
gr::GC_Regional_Indicator => Regional,
gr::GC_Control => {
take_curr = Start == state;
break;
},
_ => break
},
HangulL => match cat { gr::GC_L => continue, _ => {
take_curr = false;
break;
}
},
HangulLV => match cat { gr::GC_V => continue, gr::GC_L | gr::GC_LV => HangulL, _ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { gr::GC_T => continue, gr::GC_V => HangulLV, gr::GC_LV | gr::GC_LVT => HangulL, _ => {
take_curr = false;
break;
}
},
Regional => match cat { gr::GC_Regional_Indicator => continue,
_ => {
take_curr = false;
break;
}
}
}
}
self.catb = if take_curr {
None
} else {
idx = previdx;
Some(cat)
};
let retstr = &self.string[idx..];
self.string = &self.string[..idx];
Some(retstr)
}
}
#[inline]
pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
Graphemes { string: s, extended: is_extended, cat: None, catb: None }
}
#[inline]
pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
}