hyphenation_commons/dictionary/
mod.rs1pub mod extended;
4mod trie;
5
6use std::collections::HashMap;
7use std::hash::Hash;
8
9use crate::dictionary::trie::PrefixMatches;
10pub use crate::dictionary::trie::{Error, Trie};
11use crate::language::Language;
12use crate::parse::Parse;
13
14
15#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
16pub struct Locus {
17 pub index : u8,
18 pub value : u8,
19}
20
21#[derive(Clone, Debug, Default, Serialize, Deserialize)]
23pub struct Patterns {
24 tallies : Vec<Vec<Locus>>,
25 automaton : Trie,
26}
27
28impl Patterns {
29 pub fn from_iter<I>(iter : I) -> Result<Self, trie::Error>
30 where I : IntoIterator<Item = (String, <Patterns as Parse>::Tally)>
31 {
32 let (kvs, tallies) = uniques(iter.into_iter());
33 let automaton = Trie::from_iter(kvs.into_iter())?;
34 Ok(Patterns { tallies, automaton })
35 }
36}
37
38#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
40pub struct Exceptions(pub HashMap<String, Vec<usize>>);
41
42#[derive(Clone, Debug, Serialize, Deserialize)]
47pub struct Standard {
48 language : Language,
49 patterns : Patterns,
50 pub exceptions : Exceptions,
51 pub minima : (usize, usize),
54}
55
56
57impl Standard {
58 pub fn language(&self) -> Language { self.language }
60
61 pub fn prefix_tallies<'f, 'q>(&'f self, query : &'q [u8]) -> PrefixTallies<'f, 'q, Vec<Locus>> {
64 PrefixTallies { matches : self.patterns.automaton.get_prefixes(query),
65 tallies : &self.patterns.tallies, }
66 }
67}
68
69pub struct PrefixTallies<'f, 'q, T> {
70 tallies : &'f [T],
71 matches : PrefixMatches<'f, 'q>,
72}
73
74impl<'f, 'q, T> Iterator for PrefixTallies<'f, 'q, T> {
75 type Item = &'f T;
76
77 fn next(&mut self) -> Option<Self::Item> {
78 self.matches
79 .next()
80 .and_then(|i| self.tallies.get(i as usize))
81 }
82}
83
84
85#[derive(Debug)]
88pub struct Builder {
89 pub language : Language,
90 pub patterns : Patterns,
91 pub exceptions : Exceptions,
92}
93
94impl From<Builder> for Standard {
95 fn from(b : Builder) -> Standard {
96 Standard { language : b.language,
97 patterns : b.patterns,
98 exceptions : b.exceptions,
99 minima : b.language.minima(), }
100 }
101}
102
103
104pub fn uniques<I, T>(iter : I) -> (Vec<(String, u64)>, Vec<T>)
105 where T : Eq + Clone + Hash,
106 I : Iterator<Item = (String, T)>
107{
108 let mut pairs = Vec::new();
109 let mut tally_ids = HashMap::new();
110 let mut tallies : Vec<T> = Vec::with_capacity(256);
111 for (pattern, tally) in iter {
112 match tally_ids.get(&tally) {
113 Some(&id) => pairs.push((pattern, id)),
114 None => {
115 let id = tallies.len() as u64;
116 tallies.push(tally.clone());
117 tally_ids.insert(tally, id);
118 pairs.push((pattern, id));
119 }
120 }
121 }
122 pairs.sort_by(|a, b| a.0.cmp(&b.0));
123 pairs.dedup_by(|a, b| a.0 == b.0);
124 (pairs, tallies)
125}