1#![warn(clippy::all)]
21#![warn(missing_docs)]
22
23mod alternative_parser;
24mod data_int;
25mod hyph_dict;
26mod pyphen;
27
28use std::cell::RefCell;
29use std::collections::HashMap;
30use std::rc::Rc;
31use std::thread_local;
32
33use alternative_parser::AlternativeParser;
34use data_int::DataInt;
35use hyph_dict::HyphDict;
36pub use pyphen::{builder::Builder, iter::Iter, Pyphen};
37
38#[macro_use]
39extern crate lazy_static;
40
41use regex::Regex;
42
43lazy_static! {
45 static ref PARSE_HEX: Regex = Regex::new(r"\^{2}([0-9a-f]{2})").unwrap();
46 static ref PARSE: Regex = Regex::new(r"(\d?)(\D?)").unwrap();
47}
48
49thread_local! {
50 static HD_CACHE: RefCell<HashMap<String, Rc<HyphDict>>> = RefCell::new(HashMap::new());
52
53 pub static LANGUAGES: RefCell<HashMap<String, Rc<String>>> = {
55 let mut dict = HashMap::new();
56 let dir = format!("{}/dictionaries", env!("CARGO_MANIFEST_DIR"));
57
58 if let Ok(read_dir) = std::fs::read_dir(dir) {
59 for entry in read_dir {
60 if let Ok(entry) = entry {
61 if let Some(filepath) = entry.path().to_str() {
62 let filename = entry.file_name();
63 let filename = filename
64 .to_str()
65 .unwrap()
66 .trim_start_matches("hyph_")
67 .trim_end_matches(".dic");
68 dict.insert(filename.to_string(), Rc::new(filepath.to_string()));
69 }
70 }
71 }
72 }
73
74 RefCell::new(dict)
75 }
76}
77
78pub fn language_fallback(language: &str) -> Option<String> {
85 let language = language.replace('-', "_");
86 let mut parts: Vec<_> = language.split('_').collect();
87
88 while !parts.is_empty() {
89 let language = parts.join("_");
90 let mut flag = false;
91 LANGUAGES.with(|l| {
92 if l.borrow().contains_key(&language) {
93 flag = true;
94 }
95 });
96 if flag {
97 return Some(language);
98 }
99
100 parts.pop();
101 }
102
103 None
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109 use std::ops::Deref;
110
111 fn match_tuple<T, U>(tup1: (T, U), s1: &str, s2: &str)
112 where
113 T: Deref<Target = str>,
114 U: Deref<Target = str>,
115 {
116 let (a, b) = tup1;
117
118 assert_eq!(&*a, s1);
119 assert_eq!(&*b, s2);
120 }
121
122 fn match_iter<T>(iter: Option<(T, T)>, s1: &str, s2: &str)
123 where
124 T: Deref<Target = str>,
125 {
126 assert!(iter.is_some());
127 let x = iter.unwrap();
128 match_tuple(x, s1, s2);
129 }
130
131 fn test_lang(a: Option<String>, b: &str) {
132 assert!(a.is_some());
133 assert_eq!(a.unwrap(), b);
134 }
135
136 #[test]
138 fn test_inserted() {
139 let dic = Builder::lang("nl_NL").build().unwrap();
140 assert_eq!(dic.inserted("lettergrepen"), "let-ter-gre-pen");
141 }
142
143 #[test]
145 fn test_wrap() {
146 let dic = Builder::lang("nl_NL").build().unwrap();
147 match_tuple(
148 dic.wrap("autobandventieldopje", 11).unwrap(),
149 "autoband-",
150 "ventieldopje",
151 );
152 }
153
154 #[test]
156 fn test_iterate() {
157 let dic = Builder::lang("nl_NL").build().unwrap();
158 let mut iter = dic.iterate("Amsterdam");
159 match_iter(iter.next(), "Amster", "dam");
160 match_iter(iter.next(), "Am", "sterdam");
161 assert_eq!(iter.next(), None);
162 }
163
164 #[test]
166 fn test_fallback_dict() {
167 let dic = Builder::lang("nl_NL-variant").build().unwrap();
168 let mut iter = dic.iterate("Amsterdam");
169 match_iter(iter.next(), "Amster", "dam");
170 match_iter(iter.next(), "Am", "sterdam");
171 assert_eq!(iter.next(), None);
172 }
173
174 #[test]
176 fn test_missing_dict() {
177 assert!(Builder::lang("mi_SS").build().is_err());
178 }
179
180 #[test]
182 fn test_personal_dict() {
183 let dic = Builder::lang("fr").build().unwrap();
184 assert_ne!(
185 dic.inserted("autobandventieldopje"),
186 "au-to-band-ven-tiel-dop-je"
187 );
188 LANGUAGES.with(|l| {
189 let nl = {
190 let l = l.borrow();
191 l["nl_NL"].clone()
192 };
193 let mut l = l.borrow_mut();
194 let fr = l.get_mut("fr").unwrap();
195 *fr = nl;
196 });
197 let dic = Builder::lang("fr").build().unwrap();
198 assert_eq!(
199 dic.inserted("autobandventieldopje"),
200 "au-to-band-ven-tiel-dop-je"
201 );
202 }
203
204 #[test]
206 fn test_left_right() {
207 let dic = Builder::lang("nl_NL").build().unwrap();
208 assert_eq!(dic.inserted("lettergrepen"), "let-ter-gre-pen");
209 let dic = Builder::lang("nl_NL").left(4).build().unwrap();
210 assert_eq!(dic.inserted("lettergrepen"), "letter-gre-pen");
211 let dic = Builder::lang("nl_NL").right(4).build().unwrap();
212 assert_eq!(dic.inserted("lettergrepen"), "let-ter-grepen");
213 let dic = Builder::lang("nl_NL").left(4).right(4).build().unwrap();
214 assert_eq!(dic.inserted("lettergrepen"), "letter-grepen");
215 }
216
217 #[test]
219 fn test_filename() {
220 LANGUAGES.with(|l| {
221 let l = l.borrow();
222 let filename = l["nl_NL"].clone();
223
224 let dic = Builder::filename(filename).build().unwrap();
225 assert_eq!(dic.inserted("lettergrepen"), "let-ter-gre-pen");
226 });
227 }
228
229 #[test]
231 fn test_alternative() {
232 let dic = Builder::lang("hu").left(1).right(1).build().unwrap();
233 let mut iter = dic.iterate("kulissza");
234 match_iter(iter.next(), "kulisz", "sza");
235 match_iter(iter.next(), "ku", "lissza");
236 assert_eq!(iter.next(), None);
237 assert_eq!(dic.inserted("kulissza"), "ku-lisz-sza");
238 }
239
240 #[test]
242 fn test_upper() {
243 let dic = Builder::lang("nl_NL").build().unwrap();
244 assert_eq!(dic.inserted("LETTERGREPEN"), "LET-TER-GRE-PEN");
245 }
246
247 #[test]
249 fn test_upper_alternative() {
250 let dic = Builder::lang("hu").left(1).right(1).build().unwrap();
251 let mut iter = dic.iterate("KULISSZA");
252 match_iter(iter.next(), "KULISZ", "SZA");
253 match_iter(iter.next(), "KU", "LISSZA");
254 assert_eq!(iter.next(), None);
255 assert_eq!(dic.inserted("KULISSZA"), "KU-LISZ-SZA");
256 }
257
258 #[test]
260 fn test_all_dictionaries() {
261 LANGUAGES.with(|l| {
262 for lang in l.borrow().keys() {
263 Builder::lang(lang).build().unwrap();
264 }
265 });
266 }
267
268 #[test]
270 fn test_fallback() {
271 test_lang(language_fallback("en"), "en");
272 test_lang(language_fallback("en_US"), "en_US");
273 test_lang(language_fallback("en_FR"), "en");
274 test_lang(language_fallback("en-Latn-US"), "en_Latn_US");
275 test_lang(language_fallback("en-Cyrl-US"), "en");
276 test_lang(language_fallback("fr-Latn-FR"), "fr");
277 test_lang(language_fallback("en-US_variant1-x"), "en_US");
278 }
279}