1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use crate::dictionary::mapper::{ConnIdCounter, ConnIdProbs};
use crate::errors::Result;
use crate::sentence::Sentence;
use crate::token::{Token, TokenIter};
use crate::tokenizer::lattice::{Lattice, Node};
use crate::tokenizer::Tokenizer;
pub struct Worker<'a> {
pub(crate) tokenizer: &'a Tokenizer,
pub(crate) sent: Sentence,
pub(crate) lattice: Lattice,
pub(crate) top_nodes: Vec<(u16, Node)>,
pub(crate) counter: Option<ConnIdCounter>,
}
impl<'a> Worker<'a> {
pub(crate) fn new(tokenizer: &'a Tokenizer) -> Self {
Self {
tokenizer,
sent: Sentence::new(),
lattice: Lattice::default(),
top_nodes: vec![],
counter: None,
}
}
pub fn reset_sentence<S>(&mut self, input: S) -> Result<()>
where
S: AsRef<str>,
{
self.sent.clear();
self.top_nodes.clear();
let input = input.as_ref();
if !input.is_empty() {
self.sent.set_sentence(input);
self.sent.compile(self.tokenizer.dictionary().char_prop())?;
}
Ok(())
}
pub fn tokenize(&mut self) {
if self.sent.chars().is_empty() {
return;
}
self.tokenizer.build_lattice(&self.sent, &mut self.lattice);
self.lattice.append_top_nodes(&mut self.top_nodes);
}
#[inline(always)]
pub fn num_tokens(&self) -> usize {
self.top_nodes.len()
}
#[inline(always)]
pub fn token(&self, i: usize) -> Token {
let index = self.num_tokens() - i - 1;
Token::new(self, index)
}
#[inline(always)]
pub const fn token_iter(&'a self) -> TokenIter<'a> {
TokenIter::new(self, 0)
}
pub fn init_connid_counter(&mut self) {
let connector = self.tokenizer.dictionary().connector();
self.counter = Some(ConnIdCounter::new(
connector.num_left(),
connector.num_right(),
));
}
pub fn update_connid_counts(&mut self) {
self.lattice
.add_connid_counts(self.counter.as_mut().unwrap());
}
pub fn compute_connid_probs(&self) -> (ConnIdProbs, ConnIdProbs) {
self.counter.as_ref().unwrap().compute_probs()
}
}