use crate::complex::{Dictionary, LstmPayloads};
use crate::provider::RuleBreakDataV1;
use crate::symbols::*;
pub trait RuleBreakType<'l, 's> {
type IterAttr: Iterator<Item = (usize, Self::CharType)> + Clone;
type CharType: Copy + Into<u32>;
fn get_current_position_character_len(iter: &RuleBreakIterator<'l, 's, Self>) -> usize;
fn handle_complex_language(
iter: &mut RuleBreakIterator<'l, 's, Self>,
left_codepoint: Self::CharType,
) -> Option<usize>;
}
pub struct RuleBreakIterator<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> {
pub(crate) iter: Y::IterAttr,
pub(crate) len: usize,
pub(crate) current_pos_data: Option<(usize, Y::CharType)>,
pub(crate) result_cache: alloc::vec::Vec<usize>,
pub(crate) data: &'l RuleBreakDataV1<'l>,
pub(crate) dictionary: &'l Dictionary,
pub(crate) lstm: &'l LstmPayloads,
}
impl<'l, 's, Y: RuleBreakType<'l, 's>> Iterator for RuleBreakIterator<'l, 's, Y> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
if !self.result_cache.is_empty() {
let mut i = 0;
loop {
if i == *self.result_cache.first().unwrap() {
self.result_cache = self.result_cache.iter().skip(1).map(|r| r - i).collect();
return Some(self.current_pos_data.unwrap().0);
}
i += Y::get_current_position_character_len(self);
self.current_pos_data = self.iter.next();
if self.current_pos_data.is_none() {
self.result_cache.clear();
return Some(self.len);
}
}
}
if self.current_pos_data.is_none() {
let current_pos_data = self.iter.next()?;
self.current_pos_data = Some(current_pos_data);
let right_prop = self.get_current_break_property();
if self.is_break_from_table(self.data.sot_property, right_prop) {
return Some(current_pos_data.0);
}
}
loop {
let left_codepoint = self.get_current_codepoint();
let left_prop = self.get_break_property(left_codepoint);
self.current_pos_data = self.iter.next();
if self.current_pos_data.is_none() {
return Some(self.len);
}
let right_prop = self.get_current_break_property();
if right_prop == self.data.complex_property {
if left_prop != self.data.complex_property {
return Some(self.current_pos_data.unwrap().0);
}
let break_offset = Y::handle_complex_language(self, left_codepoint);
if break_offset.is_some() {
return break_offset;
}
}
let mut break_state = self.get_break_state_from_table(left_prop, right_prop);
if break_state >= 0 {
let mut previous_iter = self.iter.clone();
let mut previous_pos_data = self.current_pos_data;
loop {
self.current_pos_data = self.iter.next();
if self.current_pos_data.is_none() {
if self
.get_break_state_from_table(break_state as u8, self.data.eot_property)
== NOT_MATCH_RULE
{
self.iter = previous_iter;
self.current_pos_data = previous_pos_data;
return Some(previous_pos_data.unwrap().0);
}
return Some(self.len);
}
let previous_break_state = break_state;
let prop = self.get_current_break_property();
break_state = self.get_break_state_from_table(break_state as u8, prop);
if break_state < 0 {
break;
}
if previous_break_state >= 0
&& previous_break_state <= self.data.last_codepoint_property
{
previous_iter = self.iter.clone();
previous_pos_data = self.current_pos_data;
}
if (break_state & INTERMEDIATE_MATCH_RULE) != 0 {
break_state -= INTERMEDIATE_MATCH_RULE;
previous_iter = self.iter.clone();
previous_pos_data = self.current_pos_data;
}
}
if break_state == KEEP_RULE {
continue;
}
if break_state == NOT_MATCH_RULE {
self.iter = previous_iter;
self.current_pos_data = previous_pos_data;
return Some(previous_pos_data.unwrap().0);
}
return Some(self.current_pos_data.unwrap().0);
}
if self.is_break_from_table(left_prop, right_prop) {
return Some(self.current_pos_data.unwrap().0);
}
}
}
}
impl<'l, 's, Y: RuleBreakType<'l, 's>> RuleBreakIterator<'l, 's, Y> {
pub(crate) fn get_current_break_property(&self) -> u8 {
self.get_break_property(self.get_current_codepoint())
}
fn get_current_codepoint(&self) -> Y::CharType {
self.current_pos_data
.expect("Not at the end of the string!")
.1
}
fn get_break_property(&self, codepoint: Y::CharType) -> u8 {
self.data.property_table.0.get(codepoint.into())
}
fn get_break_state_from_table(&self, left: u8, right: u8) -> i8 {
let idx = left as usize * self.data.property_count as usize + right as usize;
self.data.break_state_table.0.get(idx).unwrap_or(KEEP_RULE)
}
fn is_break_from_table(&self, left: u8, right: u8) -> bool {
let rule = self.get_break_state_from_table(left, right);
if rule == KEEP_RULE {
return false;
}
if rule >= 0 {
return false;
}
true
}
}