unicode_collation/
table.rs1use pest::{self, Parser};
2use multistage::MultiStage;
3use std::{usize, u16, u32};
4use std::fs::File;
5use std::io::Read;
6use std::fmt;
7
8#[derive(Parser)]
9#[grammar = "syntax.pest"]
10struct TableParser;
11
12type Nodes<'a> = pest::iterators::Pairs<'a, Rule>;
13type Node<'a> = pest::iterators::Pair<'a, Rule>;
14
15pub struct WeightEntry {
16 pub weights: [u16; 4],
18}
19
20impl fmt::Debug for WeightEntry {
21 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
22 write!(f, "[")?;
23 for weight in &self.weights {
24 write!(f, ".{:04X}", weight)?;
25 }
26 write!(f, "]")?;
27 Ok(())
28 }
29}
30
31pub struct CollationTable {
32 weights: Vec<WeightEntry>,
34 table: MultiStage<PrimaryEntry>,
35}
36
37impl CollationTable {
38 fn new() -> Self {
39 Self {
40 weights: Vec::new(),
41 table: MultiStage::new(8),
42 }
43 }
44
45 pub fn from_text(data: &str) -> CollationTable {
46 let mut table: Nodes = TableParser::parse(Rule::table, data).unwrap();
47 let table = table.next().unwrap();
48 let entries = table
49 .into_inner()
50 .skip_while(|p| p.as_rule() != Rule::entry);
51
52 let mut max_contr = 0;
53
54 let mut table = CollationTable::new();
55 for entry in entries {
56 let mut it = entry.into_inner();
57 let mut codepoints = it.next().unwrap().into_inner();
58 let elements = it.next().unwrap();
59
60 let weights_idx = table.weights.len();
62 table.weights.extend(elements.into_inner().map(scan_weight));
63
64 let first = u32::from_str_radix(codepoints.next().unwrap().as_str(), 16).unwrap();
65 let entry = table.table.entry(first);
66
67 if codepoints.next().is_some() {
68 entry.contraction += 1;
69 max_contr = max_contr.max(entry.contraction);
70 } else {
71 debug_assert_eq!(entry.weights, usize::MAX);
73 entry.weights = weights_idx;
74 entry.len = (table.weights.len() - weights_idx) as u8;
75 }
76 }
77
78 table
79 }
80
81 pub fn from_text_file(path: &str) -> CollationTable {
82 let mut file = File::open(path).unwrap();
83 let mut buf = String::new();
84 file.read_to_string(&mut buf).unwrap();
85 CollationTable::from_text(&buf)
86 }
87
88 pub fn resolve(&self, c: char) -> &[WeightEntry] {
89 self.table
90 .get(c as u32)
91 .and_then(|e| {
92 if e.weights == usize::MAX {
93 None
94 } else {
95 Some(&self.weights[e.weights..e.weights + (e.len as usize)])
96 }
97 })
98 .unwrap_or(&[])
99 }
100}
101
102#[derive(Clone)]
104struct PrimaryEntry {
105 weights: usize,
107 len: u8,
110 contraction: u16,
113}
114
115impl Default for PrimaryEntry {
116 fn default() -> Self {
117 Self {
118 weights: usize::MAX,
119 len: 0,
120 contraction: 0,
121 }
122 }
123}
124
125fn scan_weight(node: Node) -> WeightEntry {
126 debug_assert_eq!(node.as_rule(), Rule::element);
127
128 let mut it = node.into_inner();
129 let _alt = it.next().unwrap();
130
131 let mut weights = [0; 4];
132 for i in 0..4 {
133 weights[i] = it.next()
134 .map(|r| u16::from_str_radix(r.as_str(), 16).unwrap())
135 .unwrap_or(0);
136 }
137
138 WeightEntry { weights }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 pub fn parse() {
147 let _table = CollationTable::from_text_file("data/allkeys.txt");
148 }
149}