Skip to main content

cranpose_render_common/
text_hyphenation.rs

1use cranpose_ui::text::TextStyle;
2#[cfg(feature = "text-hyphenation")]
3use hyphenation::{Hyphenator, Language, Load, Standard};
4#[cfg(feature = "text-hyphenation")]
5use std::collections::HashMap;
6#[cfg(feature = "text-hyphenation")]
7use std::path::Path;
8#[cfg(feature = "text-hyphenation")]
9use std::sync::RwLock;
10
11#[cfg(feature = "text-hyphenation")]
12const MIN_SEGMENT_CHARS: usize = 2;
13
14#[cfg(feature = "text-hyphenation")]
15#[derive(thiserror::Error, Debug)]
16pub enum HyphenationDictionaryError {
17    #[error("Unsupported hyphenation locale: {0}")]
18    UnsupportedLocale(String),
19    #[error("Failed to load hyphenation dictionary for {locale}: {message}")]
20    LoadFailed { locale: String, message: String },
21    #[error("Hyphenation dictionary cache is unavailable")]
22    CacheUnavailable,
23}
24
25#[cfg(feature = "text-hyphenation")]
26pub struct HyphenationDictionaryStore {
27    dictionaries: RwLock<HashMap<Language, Standard>>,
28}
29
30#[cfg(feature = "text-hyphenation")]
31impl Default for HyphenationDictionaryStore {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37#[cfg(feature = "text-hyphenation")]
38impl HyphenationDictionaryStore {
39    pub fn new() -> Self {
40        Self {
41            dictionaries: RwLock::new(HashMap::new()),
42        }
43    }
44
45    pub fn register_dictionary_path(
46        &self,
47        locale: &str,
48        path: impl AsRef<Path>,
49    ) -> Result<(), HyphenationDictionaryError> {
50        let language = resolve_language_tag(locale)
51            .ok_or_else(|| HyphenationDictionaryError::UnsupportedLocale(locale.to_string()))?;
52        let dictionary = Standard::from_path(language, path).map_err(|err| {
53            HyphenationDictionaryError::LoadFailed {
54                locale: locale.to_string(),
55                message: err.to_string(),
56            }
57        })?;
58        self.store_dictionary(language, dictionary)
59    }
60
61    pub fn register_dictionary_reader(
62        &self,
63        locale: &str,
64        reader: &mut impl std::io::Read,
65    ) -> Result<(), HyphenationDictionaryError> {
66        let language = resolve_language_tag(locale)
67            .ok_or_else(|| HyphenationDictionaryError::UnsupportedLocale(locale.to_string()))?;
68        let dictionary = Standard::from_reader(language, reader).map_err(|err| {
69            HyphenationDictionaryError::LoadFailed {
70                locale: locale.to_string(),
71                message: err.to_string(),
72            }
73        })?;
74        self.store_dictionary(language, dictionary)
75    }
76
77    fn store_dictionary(
78        &self,
79        language: Language,
80        dictionary: Standard,
81    ) -> Result<(), HyphenationDictionaryError> {
82        let mut write_guard = self
83            .dictionaries
84            .write()
85            .map_err(|_| HyphenationDictionaryError::CacheUnavailable)?;
86        write_guard.insert(language, dictionary);
87        Ok(())
88    }
89
90    fn get_dictionary(&self, language: Language) -> Option<Standard> {
91        if let Ok(read_guard) = self.dictionaries.read() {
92            if let Some(dict) = read_guard.get(&language) {
93                return Some(dict.clone());
94            }
95        }
96
97        #[cfg(feature = "text-hyphenation-embedded")]
98        {
99            if let Ok(dict) = Standard::from_embedded(language) {
100                let _ = self.store_dictionary(language, dict.clone());
101                return Some(dict);
102            }
103        }
104
105        None
106    }
107
108    pub fn choose_auto_hyphen_break(
109        &self,
110        line: &str,
111        style: &TextStyle,
112        segment_start_char: usize,
113        measured_break_char: usize,
114    ) -> Option<usize> {
115        if line.is_empty() || measured_break_char <= segment_start_char {
116            return None;
117        }
118
119        let language = resolve_hyphenation_language(style)?;
120
121        let dictionary = self.get_dictionary(language)?;
122        let boundaries = char_boundaries(line);
123        let char_count = boundaries.len().saturating_sub(1);
124
125        if measured_break_char == 0 || measured_break_char >= char_count {
126            return None;
127        }
128        if !is_break_inside_word(line, &boundaries, measured_break_char) {
129            return None;
130        }
131
132        let (word_start, word_end) = word_bounds(line, &boundaries, measured_break_char);
133        let word = &line[boundaries[word_start]..boundaries[word_end]];
134        if word.is_empty() {
135            return None;
136        }
137
138        let max_local_break = measured_break_char.saturating_sub(word_start);
139        let min_local_break = segment_start_char
140            .saturating_sub(word_start)
141            .saturating_add(MIN_SEGMENT_CHARS);
142
143        if min_local_break > max_local_break {
144            return None;
145        }
146
147        let hyphenated = dictionary.hyphenate(word);
148        for break_byte in hyphenated.breaks.into_iter().rev() {
149            if !word.is_char_boundary(break_byte) {
150                continue;
151            }
152            let local_break_chars = word[..break_byte].chars().count();
153            if local_break_chars < min_local_break || local_break_chars > max_local_break {
154                continue;
155            }
156            return Some(word_start + local_break_chars);
157        }
158
159        None
160    }
161}
162
163#[cfg(not(feature = "text-hyphenation"))]
164#[derive(Default)]
165pub struct HyphenationDictionaryStore;
166
167#[cfg(not(feature = "text-hyphenation"))]
168impl HyphenationDictionaryStore {
169    pub fn new() -> Self {
170        Self
171    }
172
173    pub fn choose_auto_hyphen_break(
174        &self,
175        line: &str,
176        _style: &TextStyle,
177        segment_start_char: usize,
178        measured_break_char: usize,
179    ) -> Option<usize> {
180        let _ = (self, line, segment_start_char, measured_break_char);
181        None
182    }
183}
184
185pub fn choose_auto_hyphen_break(
186    line: &str,
187    style: &TextStyle,
188    segment_start_char: usize,
189    measured_break_char: usize,
190) -> Option<usize> {
191    HyphenationDictionaryStore::new().choose_auto_hyphen_break(
192        line,
193        style,
194        segment_start_char,
195        measured_break_char,
196    )
197}
198
199#[cfg(feature = "text-hyphenation")]
200fn resolve_hyphenation_language(style: &TextStyle) -> Option<Language> {
201    let Some(locale_list) = style.span_style.locale_list.as_ref() else {
202        return Some(Language::EnglishUS);
203    };
204    if locale_list.is_empty() {
205        return Some(Language::EnglishUS);
206    }
207
208    let primary_locale = locale_list.locales().first()?;
209    resolve_language_tag(primary_locale)
210}
211
212#[cfg(feature = "text-hyphenation")]
213fn resolve_language_tag(locale: &str) -> Option<Language> {
214    if locale.trim().is_empty() {
215        return Some(Language::EnglishUS);
216    }
217
218    let normalized = locale.trim().replace('_', "-").to_ascii_lowercase();
219
220    if normalized.starts_with("en-gb") {
221        return Some(Language::EnglishGB);
222    }
223    if normalized.starts_with("en") || normalized == "und" {
224        return Some(Language::EnglishUS);
225    }
226    if normalized.starts_with("fr") {
227        return Some(Language::French);
228    }
229    if normalized.starts_with("de") {
230        return Some(Language::German1996);
231    }
232    if normalized.starts_with("es") {
233        return Some(Language::Spanish);
234    }
235    if normalized.starts_with("it") {
236        return Some(Language::Italian);
237    }
238    if normalized.starts_with("ru") {
239        return Some(Language::Russian);
240    }
241    if normalized.starts_with("pt") {
242        return Some(Language::Portuguese);
243    }
244    if normalized.starts_with("nl") {
245        return Some(Language::Dutch);
246    }
247    if normalized.starts_with("pl") {
248        return Some(Language::Polish);
249    }
250    if normalized.starts_with("sv") {
251        return Some(Language::Swedish);
252    }
253    if normalized.starts_with("da") {
254        return Some(Language::Danish);
255    }
256    if normalized.starts_with("cs") {
257        return Some(Language::Czech);
258    }
259    if normalized.starts_with("sk") {
260        return Some(Language::Slovak);
261    }
262    if normalized.starts_with("uk") {
263        return Some(Language::Ukrainian);
264    }
265
266    None
267}
268
269#[cfg(feature = "text-hyphenation")]
270fn char_boundaries(text: &str) -> Vec<usize> {
271    let mut out = Vec::with_capacity(text.chars().count() + 1);
272    out.push(0);
273    for (idx, _) in text.char_indices() {
274        if idx != 0 {
275            out.push(idx);
276        }
277    }
278    out.push(text.len());
279    out
280}
281
282#[cfg(feature = "text-hyphenation")]
283fn is_break_inside_word(line: &str, boundaries: &[usize], break_idx: usize) -> bool {
284    if break_idx == 0 || break_idx >= boundaries.len() - 1 {
285        return false;
286    }
287    let prev = &line[boundaries[break_idx - 1]..boundaries[break_idx]];
288    let next = &line[boundaries[break_idx]..boundaries[break_idx + 1]];
289    !prev.chars().all(char::is_whitespace) && !next.chars().all(char::is_whitespace)
290}
291
292#[cfg(feature = "text-hyphenation")]
293fn word_bounds(line: &str, boundaries: &[usize], anchor: usize) -> (usize, usize) {
294    let mut start = anchor;
295    while start > 0 {
296        let prev = &line[boundaries[start - 1]..boundaries[start]];
297        if prev.chars().all(char::is_whitespace) {
298            break;
299        }
300        start -= 1;
301    }
302
303    let mut end = anchor;
304    while end < boundaries.len() - 1 {
305        let current = &line[boundaries[end]..boundaries[end + 1]];
306        if current.chars().all(char::is_whitespace) {
307            break;
308        }
309        end += 1;
310    }
311    (start, end)
312}
313
314#[cfg(all(test, not(feature = "text-hyphenation")))]
315mod disabled_tests {
316    use super::*;
317
318    #[test]
319    fn auto_hyphenation_without_dictionary_feature_returns_none() {
320        let break_idx = choose_auto_hyphen_break("Transformation", &TextStyle::default(), 8, 12);
321        assert_eq!(break_idx, None);
322    }
323}
324
325#[cfg(all(test, feature = "text-hyphenation-embedded"))]
326mod tests {
327    use super::*;
328    use cranpose_ui::text::{LocaleList, SpanStyle, TextStyle};
329
330    fn style_with_locale(tags: &str) -> TextStyle {
331        TextStyle {
332            span_style: SpanStyle {
333                locale_list: Some(LocaleList::from_language_tags(tags)),
334                ..Default::default()
335            },
336            ..Default::default()
337        }
338    }
339
340    #[test]
341    fn dictionary_breaks_transformation_like_compose_contract() {
342        let break_idx = choose_auto_hyphen_break("Transformation", &TextStyle::default(), 8, 12);
343        assert_eq!(break_idx, Some(10));
344    }
345
346    #[test]
347    fn locale_gate_uses_french_dictionary() {
348        let break_idx = choose_auto_hyphen_break("éléphant", &style_with_locale("fr-FR"), 0, 7);
349        assert_eq!(break_idx, Some(3));
350    }
351
352    #[test]
353    fn locale_gate_uses_german_dictionary() {
354        let break_idx = choose_auto_hyphen_break(
355            "Geschwindigkeitsbegrenzung",
356            &style_with_locale("de-DE"),
357            10,
358            20,
359        );
360        assert!(break_idx.is_some());
361    }
362
363    #[test]
364    fn unknown_locale_disables_hyphenation() {
365        let break_idx =
366            choose_auto_hyphen_break("Transformation", &style_with_locale("ja-JP"), 8, 12);
367        assert_eq!(break_idx, None);
368    }
369
370    #[test]
371    fn dictionary_uses_english_locale_alias() {
372        let break_idx =
373            choose_auto_hyphen_break("Transformation", &style_with_locale("en_GB"), 8, 12);
374        assert_eq!(break_idx, Some(10));
375    }
376
377    #[test]
378    fn ignores_breaks_outside_words() {
379        let break_idx = choose_auto_hyphen_break("ab cd", &TextStyle::default(), 0, 2);
380        assert_eq!(break_idx, None);
381    }
382}