use crate::dictionary::connector::Connector;
use crate::dictionary::mapper::{ConnIdCounter, ConnIdProbs};
use crate::sentence::Sentence;
use crate::token::{Token, TokenIter};
use crate::tokenizer::lattice::{Lattice, Node};
use crate::tokenizer::Tokenizer;
pub struct Worker<'t> {
pub(crate) tokenizer: &'t Tokenizer,
pub(crate) sent: Sentence,
pub(crate) lattice: Lattice,
pub(crate) top_nodes: Vec<(usize, Node)>,
pub(crate) counter: Option<ConnIdCounter>,
}
impl<'t> Worker<'t> {
pub(crate) fn new(tokenizer: &'t Tokenizer) -> Self {
Self {
tokenizer,
sent: Sentence::new(),
lattice: Lattice::default(),
top_nodes: vec![],
counter: None,
}
}
pub fn reset_sentence<S>(&mut self, input: S)
where
S: AsRef<str>,
{
self.sent.clear();
self.top_nodes.clear();
let input = input.as_ref();
if !input.is_empty() {
self.sent.set_sentence(input);
self.sent.compile(self.tokenizer.dictionary().char_prop());
}
}
pub fn tokenize(&mut self) {
if self.sent.chars().is_empty() {
return;
}
self.tokenizer.build_lattice(&self.sent, &mut self.lattice);
self.lattice.append_top_nodes(&mut self.top_nodes);
}
#[inline(always)]
pub fn num_tokens(&self) -> usize {
self.top_nodes.len()
}
#[inline(always)]
pub fn token<'w>(&'w self, i: usize) -> Token<'w, 't> {
let index = self.num_tokens() - i - 1;
Token::new(self, index)
}
#[inline(always)]
pub const fn token_iter<'w>(&'w self) -> TokenIter<'w, 't> {
TokenIter::new(self, 0)
}
pub fn init_connid_counter(&mut self) {
let connector = self.tokenizer.dictionary().connector();
self.counter = Some(ConnIdCounter::new(
connector.num_left(),
connector.num_right(),
));
}
pub fn update_connid_counts(&mut self) {
self.lattice
.add_connid_counts(self.counter.as_mut().unwrap());
}
pub fn compute_connid_probs(&self) -> (ConnIdProbs, ConnIdProbs) {
self.counter.as_ref().unwrap().compute_probs()
}
}