1use cranpose_ui::text::TextStyle;
2#[cfg(feature = "text-hyphenation")]
3use hyphenation::{Hyphenator, Language, Load, Standard};
4#[cfg(feature = "text-hyphenation")]
5use std::collections::HashMap;
6#[cfg(feature = "text-hyphenation")]
7use std::path::Path;
8#[cfg(feature = "text-hyphenation")]
9use std::sync::RwLock;
10
11#[cfg(feature = "text-hyphenation")]
12const MIN_SEGMENT_CHARS: usize = 2;
13
14#[cfg(feature = "text-hyphenation")]
15#[derive(thiserror::Error, Debug)]
16pub enum HyphenationDictionaryError {
17 #[error("Unsupported hyphenation locale: {0}")]
18 UnsupportedLocale(String),
19 #[error("Failed to load hyphenation dictionary for {locale}: {message}")]
20 LoadFailed { locale: String, message: String },
21 #[error("Hyphenation dictionary cache is unavailable")]
22 CacheUnavailable,
23}
24
25#[cfg(feature = "text-hyphenation")]
26pub struct HyphenationDictionaryStore {
27 dictionaries: RwLock<HashMap<Language, Standard>>,
28}
29
30#[cfg(feature = "text-hyphenation")]
31impl Default for HyphenationDictionaryStore {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37#[cfg(feature = "text-hyphenation")]
38impl HyphenationDictionaryStore {
39 pub fn new() -> Self {
40 Self {
41 dictionaries: RwLock::new(HashMap::new()),
42 }
43 }
44
45 pub fn register_dictionary_path(
46 &self,
47 locale: &str,
48 path: impl AsRef<Path>,
49 ) -> Result<(), HyphenationDictionaryError> {
50 let language = resolve_language_tag(locale)
51 .ok_or_else(|| HyphenationDictionaryError::UnsupportedLocale(locale.to_string()))?;
52 let dictionary = Standard::from_path(language, path).map_err(|err| {
53 HyphenationDictionaryError::LoadFailed {
54 locale: locale.to_string(),
55 message: err.to_string(),
56 }
57 })?;
58 self.store_dictionary(language, dictionary)
59 }
60
61 pub fn register_dictionary_reader(
62 &self,
63 locale: &str,
64 reader: &mut impl std::io::Read,
65 ) -> Result<(), HyphenationDictionaryError> {
66 let language = resolve_language_tag(locale)
67 .ok_or_else(|| HyphenationDictionaryError::UnsupportedLocale(locale.to_string()))?;
68 let dictionary = Standard::from_reader(language, reader).map_err(|err| {
69 HyphenationDictionaryError::LoadFailed {
70 locale: locale.to_string(),
71 message: err.to_string(),
72 }
73 })?;
74 self.store_dictionary(language, dictionary)
75 }
76
77 fn store_dictionary(
78 &self,
79 language: Language,
80 dictionary: Standard,
81 ) -> Result<(), HyphenationDictionaryError> {
82 let mut write_guard = self
83 .dictionaries
84 .write()
85 .map_err(|_| HyphenationDictionaryError::CacheUnavailable)?;
86 write_guard.insert(language, dictionary);
87 Ok(())
88 }
89
90 fn get_dictionary(&self, language: Language) -> Option<Standard> {
91 if let Ok(read_guard) = self.dictionaries.read() {
92 if let Some(dict) = read_guard.get(&language) {
93 return Some(dict.clone());
94 }
95 }
96
97 #[cfg(feature = "text-hyphenation-embedded")]
98 {
99 if let Ok(dict) = Standard::from_embedded(language) {
100 let _ = self.store_dictionary(language, dict.clone());
101 return Some(dict);
102 }
103 }
104
105 None
106 }
107
108 pub fn choose_auto_hyphen_break(
109 &self,
110 line: &str,
111 style: &TextStyle,
112 segment_start_char: usize,
113 measured_break_char: usize,
114 ) -> Option<usize> {
115 if line.is_empty() || measured_break_char <= segment_start_char {
116 return None;
117 }
118
119 let language = resolve_hyphenation_language(style)?;
120
121 let dictionary = self.get_dictionary(language)?;
122 let boundaries = char_boundaries(line);
123 let char_count = boundaries.len().saturating_sub(1);
124
125 if measured_break_char == 0 || measured_break_char >= char_count {
126 return None;
127 }
128 if !is_break_inside_word(line, &boundaries, measured_break_char) {
129 return None;
130 }
131
132 let (word_start, word_end) = word_bounds(line, &boundaries, measured_break_char);
133 let word = &line[boundaries[word_start]..boundaries[word_end]];
134 if word.is_empty() {
135 return None;
136 }
137
138 let max_local_break = measured_break_char.saturating_sub(word_start);
139 let min_local_break = segment_start_char
140 .saturating_sub(word_start)
141 .saturating_add(MIN_SEGMENT_CHARS);
142
143 if min_local_break > max_local_break {
144 return None;
145 }
146
147 let hyphenated = dictionary.hyphenate(word);
148 for break_byte in hyphenated.breaks.into_iter().rev() {
149 if !word.is_char_boundary(break_byte) {
150 continue;
151 }
152 let local_break_chars = word[..break_byte].chars().count();
153 if local_break_chars < min_local_break || local_break_chars > max_local_break {
154 continue;
155 }
156 return Some(word_start + local_break_chars);
157 }
158
159 None
160 }
161}
162
163#[cfg(not(feature = "text-hyphenation"))]
164#[derive(Default)]
165pub struct HyphenationDictionaryStore;
166
167#[cfg(not(feature = "text-hyphenation"))]
168impl HyphenationDictionaryStore {
169 pub fn new() -> Self {
170 Self
171 }
172
173 pub fn choose_auto_hyphen_break(
174 &self,
175 line: &str,
176 _style: &TextStyle,
177 segment_start_char: usize,
178 measured_break_char: usize,
179 ) -> Option<usize> {
180 let _ = (self, line, segment_start_char, measured_break_char);
181 None
182 }
183}
184
185pub fn choose_auto_hyphen_break(
186 line: &str,
187 style: &TextStyle,
188 segment_start_char: usize,
189 measured_break_char: usize,
190) -> Option<usize> {
191 HyphenationDictionaryStore::new().choose_auto_hyphen_break(
192 line,
193 style,
194 segment_start_char,
195 measured_break_char,
196 )
197}
198
199#[cfg(feature = "text-hyphenation")]
200fn resolve_hyphenation_language(style: &TextStyle) -> Option<Language> {
201 let Some(locale_list) = style.span_style.locale_list.as_ref() else {
202 return Some(Language::EnglishUS);
203 };
204 if locale_list.is_empty() {
205 return Some(Language::EnglishUS);
206 }
207
208 let primary_locale = locale_list.locales().first()?;
209 resolve_language_tag(primary_locale)
210}
211
212#[cfg(feature = "text-hyphenation")]
213fn resolve_language_tag(locale: &str) -> Option<Language> {
214 if locale.trim().is_empty() {
215 return Some(Language::EnglishUS);
216 }
217
218 let normalized = locale.trim().replace('_', "-").to_ascii_lowercase();
219
220 if normalized.starts_with("en-gb") {
221 return Some(Language::EnglishGB);
222 }
223 if normalized.starts_with("en") || normalized == "und" {
224 return Some(Language::EnglishUS);
225 }
226 if normalized.starts_with("fr") {
227 return Some(Language::French);
228 }
229 if normalized.starts_with("de") {
230 return Some(Language::German1996);
231 }
232 if normalized.starts_with("es") {
233 return Some(Language::Spanish);
234 }
235 if normalized.starts_with("it") {
236 return Some(Language::Italian);
237 }
238 if normalized.starts_with("ru") {
239 return Some(Language::Russian);
240 }
241 if normalized.starts_with("pt") {
242 return Some(Language::Portuguese);
243 }
244 if normalized.starts_with("nl") {
245 return Some(Language::Dutch);
246 }
247 if normalized.starts_with("pl") {
248 return Some(Language::Polish);
249 }
250 if normalized.starts_with("sv") {
251 return Some(Language::Swedish);
252 }
253 if normalized.starts_with("da") {
254 return Some(Language::Danish);
255 }
256 if normalized.starts_with("cs") {
257 return Some(Language::Czech);
258 }
259 if normalized.starts_with("sk") {
260 return Some(Language::Slovak);
261 }
262 if normalized.starts_with("uk") {
263 return Some(Language::Ukrainian);
264 }
265
266 None
267}
268
269#[cfg(feature = "text-hyphenation")]
270fn char_boundaries(text: &str) -> Vec<usize> {
271 let mut out = Vec::with_capacity(text.chars().count() + 1);
272 out.push(0);
273 for (idx, _) in text.char_indices() {
274 if idx != 0 {
275 out.push(idx);
276 }
277 }
278 out.push(text.len());
279 out
280}
281
282#[cfg(feature = "text-hyphenation")]
283fn is_break_inside_word(line: &str, boundaries: &[usize], break_idx: usize) -> bool {
284 if break_idx == 0 || break_idx >= boundaries.len() - 1 {
285 return false;
286 }
287 let prev = &line[boundaries[break_idx - 1]..boundaries[break_idx]];
288 let next = &line[boundaries[break_idx]..boundaries[break_idx + 1]];
289 !prev.chars().all(char::is_whitespace) && !next.chars().all(char::is_whitespace)
290}
291
292#[cfg(feature = "text-hyphenation")]
293fn word_bounds(line: &str, boundaries: &[usize], anchor: usize) -> (usize, usize) {
294 let mut start = anchor;
295 while start > 0 {
296 let prev = &line[boundaries[start - 1]..boundaries[start]];
297 if prev.chars().all(char::is_whitespace) {
298 break;
299 }
300 start -= 1;
301 }
302
303 let mut end = anchor;
304 while end < boundaries.len() - 1 {
305 let current = &line[boundaries[end]..boundaries[end + 1]];
306 if current.chars().all(char::is_whitespace) {
307 break;
308 }
309 end += 1;
310 }
311 (start, end)
312}
313
314#[cfg(all(test, not(feature = "text-hyphenation")))]
315mod disabled_tests {
316 use super::*;
317
318 #[test]
319 fn auto_hyphenation_without_dictionary_feature_returns_none() {
320 let break_idx = choose_auto_hyphen_break("Transformation", &TextStyle::default(), 8, 12);
321 assert_eq!(break_idx, None);
322 }
323}
324
325#[cfg(all(test, feature = "text-hyphenation-embedded"))]
326mod tests {
327 use super::*;
328 use cranpose_ui::text::{LocaleList, SpanStyle, TextStyle};
329
330 fn style_with_locale(tags: &str) -> TextStyle {
331 TextStyle {
332 span_style: SpanStyle {
333 locale_list: Some(LocaleList::from_language_tags(tags)),
334 ..Default::default()
335 },
336 ..Default::default()
337 }
338 }
339
340 #[test]
341 fn dictionary_breaks_transformation_like_compose_contract() {
342 let break_idx = choose_auto_hyphen_break("Transformation", &TextStyle::default(), 8, 12);
343 assert_eq!(break_idx, Some(10));
344 }
345
346 #[test]
347 fn locale_gate_uses_french_dictionary() {
348 let break_idx = choose_auto_hyphen_break("éléphant", &style_with_locale("fr-FR"), 0, 7);
349 assert_eq!(break_idx, Some(3));
350 }
351
352 #[test]
353 fn locale_gate_uses_german_dictionary() {
354 let break_idx = choose_auto_hyphen_break(
355 "Geschwindigkeitsbegrenzung",
356 &style_with_locale("de-DE"),
357 10,
358 20,
359 );
360 assert!(break_idx.is_some());
361 }
362
363 #[test]
364 fn unknown_locale_disables_hyphenation() {
365 let break_idx =
366 choose_auto_hyphen_break("Transformation", &style_with_locale("ja-JP"), 8, 12);
367 assert_eq!(break_idx, None);
368 }
369
370 #[test]
371 fn dictionary_uses_english_locale_alias() {
372 let break_idx =
373 choose_auto_hyphen_break("Transformation", &style_with_locale("en_GB"), 8, 12);
374 assert_eq!(break_idx, Some(10));
375 }
376
377 #[test]
378 fn ignores_breaks_outside_words() {
379 let break_idx = choose_auto_hyphen_break("ab cd", &TextStyle::default(), 0, 2);
380 assert_eq!(break_idx, None);
381 }
382}