1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
use std::collections::HashMap;
use crate::dictionary::trie::{self, Trie};
use crate::dictionary::{uniques, Locus, PrefixTallies};
use crate::language::Language;
use crate::parse::Parse;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Tally {
pub standard : Vec<Locus>,
pub subregion : Option<(Locus, Subregion)>,
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Subregion {
pub left : usize,
pub right : usize,
pub substitution : String,
pub breakpoint : usize,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Patterns {
tallies : Vec<Tally>,
automaton : Trie,
}
impl Patterns {
pub fn from_iter<I>(iter : I) -> Result<Self, trie::Error>
where I : IntoIterator<Item = (String, <Patterns as Parse>::Tally)>
{
let (kvs, tallies) = uniques(iter.into_iter());
let automaton = Trie::from_iter(kvs.into_iter())?;
Ok(Patterns { tallies, automaton })
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Exceptions(pub HashMap<String, Vec<(usize, Option<Subregion>)>>);
#[derive(Debug, Serialize, Deserialize)]
pub struct Extended {
language : Language,
patterns : Patterns,
pub exceptions : Exceptions,
pub minima : (usize, usize),
}
impl Extended {
pub fn language(&self) -> Language { self.language }
pub fn prefix_tallies<'f, 'q>(&'f self, query : &'q [u8]) -> PrefixTallies<'f, 'q, Tally> {
PrefixTallies { matches : self.patterns.automaton.get_prefixes(query),
tallies : &self.patterns.tallies, }
}
}
#[derive(Debug)]
pub struct Builder {
pub language : Language,
pub patterns : Patterns,
pub exceptions : Exceptions,
}
impl From<Builder> for Extended {
fn from(b : Builder) -> Extended {
Extended { language : b.language,
patterns : b.patterns,
exceptions : b.exceptions,
minima : b.language.minima(), }
}
}