1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
pub mod extended;
mod trie;
use std::collections::HashMap;
use std::hash::Hash;
use dictionary::trie::PrefixMatches;
pub use dictionary::trie::{Error, Trie};
use language::Language;
use parse::Parse;
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Locus {
pub index : u8,
pub value : u8,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Patterns {
tallies : Vec<Vec<Locus>>,
automaton : Trie,
}
impl Patterns {
pub fn from_iter<I>(iter : I) -> Result<Self, trie::Error>
where I : IntoIterator<Item = (String, <Patterns as Parse>::Tally)>
{
let (kvs, tallies) = uniques(iter.into_iter());
let automaton = Trie::from_iter(kvs.into_iter())?;
Ok(Patterns { tallies, automaton })
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Exceptions(pub HashMap<String, Vec<usize>>);
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Standard {
language : Language,
patterns : Patterns,
pub exceptions : Exceptions,
pub minima : (usize, usize),
}
impl Standard {
pub fn language(&self) -> Language { self.language }
pub fn prefix_tallies<'f, 'q>(&'f self, query : &'q [u8]) -> PrefixTallies<'f, 'q, Vec<Locus>> {
PrefixTallies { matches : self.patterns.automaton.get_prefixes(query),
tallies : &self.patterns.tallies, }
}
}
pub struct PrefixTallies<'f, 'q, T> {
tallies : &'f [T],
matches : PrefixMatches<'f, 'q>,
}
impl<'f, 'q, T> Iterator for PrefixTallies<'f, 'q, T> {
type Item = &'f T;
fn next(&mut self) -> Option<Self::Item> {
self.matches
.next()
.and_then(|i| self.tallies.get(i as usize))
}
}
#[derive(Debug)]
pub struct Builder {
pub language : Language,
pub patterns : Patterns,
pub exceptions : Exceptions,
}
impl From<Builder> for Standard {
fn from(b : Builder) -> Standard {
Standard { language : b.language,
patterns : b.patterns,
exceptions : b.exceptions,
minima : b.language.minima(), }
}
}
pub fn uniques<I, T>(iter : I) -> (Vec<(String, u64)>, Vec<T>)
where T : Eq + Clone + Hash,
I : Iterator<Item = (String, T)>
{
let mut pairs = Vec::new();
let mut tally_ids = HashMap::new();
let mut tallies : Vec<T> = Vec::with_capacity(256);
for (pattern, tally) in iter {
match tally_ids.get(&tally) {
Some(&id) => pairs.push((pattern, id)),
None => {
let id = tallies.len() as u64;
tallies.push(tally.clone());
tally_ids.insert(tally, id);
pairs.push((pattern, id));
}
}
}
pairs.sort_by(|a, b| a.0.cmp(&b.0));
pairs.dedup_by(|a, b| a.0 == b.0);
(pairs, tallies)
}