1extern crate pest;
29#[macro_use]
30extern crate pest_derive;
31extern crate unicode_normalization;
32
33use unicode_normalization::UnicodeNormalization;
34use std::fmt;
35use std::ops::Deref;
36
37mod table;
38mod multistage;
39
40pub struct SortKey(Vec<u16>);
41pub use table::CollationTable;
42
43impl Deref for SortKey {
44 type Target = Vec<u16>;
45
46 fn deref(&self) -> &Self::Target {
47 &self.0
48 }
49}
50
51impl fmt::Debug for SortKey {
52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 write!(f, "[")?;
54 for weight in &self.0 {
55 if *weight == 0 {
56 write!(f, "| ")?;
57 } else {
58 write!(f, "{:04X} ", weight)?;
59 }
60 }
61 write!(f, "|")?;
62 write!(f, "]")?;
63 Ok(())
64 }
65}
66
67
68pub fn collate(text: &str, table: &table::CollationTable) -> SortKey {
69 let mut weights = Vec::new();
70 for c in text.nfd() {
71 weights.extend(table.resolve(c));
72 }
73
74 let mut sort_key = Vec::with_capacity(weights.len());
75 for level in 0..4 {
77 for entry in &weights {
78 let weight = entry.weights[level];
79 if weight != 0 {
80 sort_key.push(weight);
81 }
82 }
83 sort_key.push(0);
84 }
85 while sort_key.last() == Some(&0) {
86 sort_key.pop();
87 }
88
89 SortKey(sort_key)
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95 use std::fs::File;
96 use std::io::{BufRead, BufReader};
97 use std::{char, u32};
98
99 #[test]
100 fn test() {
101 let table = CollationTable::from_text_file("data/allkeys.txt");
102
103 let file = File::open("data/CollationTest/CollationTest_NON_IGNORABLE.txt").unwrap();
104 let file = BufReader::new(&file);
105 for (line_num, line) in file.lines().enumerate() {
106 let line = line.unwrap();
107 let line = line.trim();
108 if line.starts_with("#") || line.is_empty() {
109 continue;
110 }
111 let mut parts = line.split(';');
112
113 let codes = parts.next().unwrap();
114 let text = codes
115 .split(" ")
116 .map(|s| u32::from_str_radix(s, 16).unwrap())
117 .map(|c| char::from_u32(c).unwrap())
118 .collect::<String>();
119
120 let sort_key = collate(&text, &table);
121
122 let comment = parts.next().unwrap();
123 let from = comment.find('[').unwrap();
124 let to = comment.rfind(']').unwrap();
125 let expected = &comment[from..to + 1];
126
127 let actual = format!("{:?}", sort_key);
128 assert_eq!(expected, actual, "failed on line '{}': {}", line_num, line);
129 }
130 }
131}