cranpose_render_common/
text_hyphenation.rs1use cranpose_ui::text::TextStyle;
2use hyphenation::{Hyphenator, Language, Load, Standard};
3use std::collections::HashMap;
4use std::sync::{OnceLock, RwLock};
5
6const MIN_SEGMENT_CHARS: usize = 2;
7
8fn get_dictionary(language: Language) -> Option<Standard> {
9 static DICTIONARIES: OnceLock<RwLock<HashMap<Language, Standard>>> = OnceLock::new();
10 let cache = DICTIONARIES.get_or_init(|| RwLock::new(HashMap::new()));
11
12 if let Ok(read_guard) = cache.read() {
13 if let Some(dict) = read_guard.get(&language) {
14 return Some(dict.clone());
15 }
16 }
17
18 match Standard::from_embedded(language) {
20 Ok(dict) => {
21 if let Ok(mut write_guard) = cache.write() {
22 write_guard.insert(language, dict.clone());
23 }
24 Some(dict)
25 }
26 Err(_) => None,
27 }
28}
29
30pub fn choose_auto_hyphen_break(
31 line: &str,
32 style: &TextStyle,
33 segment_start_char: usize,
34 measured_break_char: usize,
35) -> Option<usize> {
36 if line.is_empty() || measured_break_char <= segment_start_char {
37 return None;
38 }
39
40 let language = resolve_hyphenation_language(style)?;
41
42 let dictionary = get_dictionary(language)?;
43 let boundaries = char_boundaries(line);
44 let char_count = boundaries.len().saturating_sub(1);
45
46 if measured_break_char == 0 || measured_break_char >= char_count {
47 return None;
48 }
49 if !is_break_inside_word(line, &boundaries, measured_break_char) {
50 return None;
51 }
52
53 let (word_start, word_end) = word_bounds(line, &boundaries, measured_break_char);
54 let word = &line[boundaries[word_start]..boundaries[word_end]];
55 if word.is_empty() {
56 return None;
57 }
58
59 let max_local_break = measured_break_char.saturating_sub(word_start);
60 let min_local_break = segment_start_char
61 .saturating_sub(word_start)
62 .saturating_add(MIN_SEGMENT_CHARS);
63
64 if min_local_break > max_local_break {
65 return None;
66 }
67
68 let hyphenated = dictionary.hyphenate(word);
69 for break_byte in hyphenated.breaks.into_iter().rev() {
70 if !word.is_char_boundary(break_byte) {
71 continue;
72 }
73 let local_break_chars = word[..break_byte].chars().count();
74 if local_break_chars < min_local_break || local_break_chars > max_local_break {
75 continue;
76 }
77 return Some(word_start + local_break_chars);
78 }
79
80 None
81}
82
83fn resolve_hyphenation_language(style: &TextStyle) -> Option<Language> {
84 let Some(locale_list) = style.span_style.locale_list.as_ref() else {
85 return Some(Language::EnglishUS);
86 };
87 if locale_list.is_empty() {
88 return Some(Language::EnglishUS);
89 }
90
91 let primary_locale = locale_list.locales().first()?;
93 let normalized = primary_locale.trim().replace('_', "-").to_ascii_lowercase();
94
95 if normalized.starts_with("en-gb") {
96 return Some(Language::EnglishGB);
97 }
98 if normalized.starts_with("en") || normalized == "und" {
99 return Some(Language::EnglishUS);
100 }
101 if normalized.starts_with("fr") {
102 return Some(Language::French);
103 }
104 if normalized.starts_with("de") {
105 return Some(Language::German1996);
106 }
107 if normalized.starts_with("es") {
108 return Some(Language::Spanish);
109 }
110 if normalized.starts_with("it") {
111 return Some(Language::Italian);
112 }
113 if normalized.starts_with("ru") {
114 return Some(Language::Russian);
115 }
116 if normalized.starts_with("pt") {
117 return Some(Language::Portuguese);
118 }
119 if normalized.starts_with("nl") {
120 return Some(Language::Dutch);
121 }
122 if normalized.starts_with("pl") {
123 return Some(Language::Polish);
124 }
125 if normalized.starts_with("sv") {
126 return Some(Language::Swedish);
127 }
128 if normalized.starts_with("da") {
129 return Some(Language::Danish);
130 }
131 if normalized.starts_with("cs") {
132 return Some(Language::Czech);
133 }
134 if normalized.starts_with("sk") {
135 return Some(Language::Slovak);
136 }
137 if normalized.starts_with("uk") {
138 return Some(Language::Ukrainian);
139 }
140
141 None
142}
143
144fn char_boundaries(text: &str) -> Vec<usize> {
145 let mut out = Vec::with_capacity(text.chars().count() + 1);
146 out.push(0);
147 for (idx, _) in text.char_indices() {
148 if idx != 0 {
149 out.push(idx);
150 }
151 }
152 out.push(text.len());
153 out
154}
155
156fn is_break_inside_word(line: &str, boundaries: &[usize], break_idx: usize) -> bool {
157 if break_idx == 0 || break_idx >= boundaries.len() - 1 {
158 return false;
159 }
160 let prev = &line[boundaries[break_idx - 1]..boundaries[break_idx]];
161 let next = &line[boundaries[break_idx]..boundaries[break_idx + 1]];
162 !prev.chars().all(char::is_whitespace) && !next.chars().all(char::is_whitespace)
163}
164
165fn word_bounds(line: &str, boundaries: &[usize], anchor: usize) -> (usize, usize) {
166 let mut start = anchor;
167 while start > 0 {
168 let prev = &line[boundaries[start - 1]..boundaries[start]];
169 if prev.chars().all(char::is_whitespace) {
170 break;
171 }
172 start -= 1;
173 }
174
175 let mut end = anchor;
176 while end < boundaries.len() - 1 {
177 let current = &line[boundaries[end]..boundaries[end + 1]];
178 if current.chars().all(char::is_whitespace) {
179 break;
180 }
181 end += 1;
182 }
183 (start, end)
184}
185
186#[cfg(test)]
187mod tests {
188 use super::*;
189 use cranpose_ui::text::{LocaleList, SpanStyle, TextStyle};
190
191 fn style_with_locale(tags: &str) -> TextStyle {
192 TextStyle {
193 span_style: SpanStyle {
194 locale_list: Some(LocaleList::from_language_tags(tags)),
195 ..Default::default()
196 },
197 ..Default::default()
198 }
199 }
200
201 #[test]
202 fn dictionary_breaks_transformation_like_compose_contract() {
203 let break_idx = choose_auto_hyphen_break("Transformation", &TextStyle::default(), 8, 12);
204 assert_eq!(break_idx, Some(10));
205 }
206
207 #[test]
208 fn locale_gate_uses_french_dictionary() {
209 let break_idx = choose_auto_hyphen_break("éléphant", &style_with_locale("fr-FR"), 0, 7);
210 assert_eq!(break_idx, Some(3));
211 }
212
213 #[test]
214 fn locale_gate_uses_german_dictionary() {
215 let break_idx = choose_auto_hyphen_break(
216 "Geschwindigkeitsbegrenzung",
217 &style_with_locale("de-DE"),
218 10,
219 20,
220 );
221 assert!(break_idx.is_some());
222 }
223
224 #[test]
225 fn unknown_locale_disables_hyphenation() {
226 let break_idx =
227 choose_auto_hyphen_break("Transformation", &style_with_locale("ja-JP"), 8, 12);
228 assert_eq!(break_idx, None);
229 }
230
231 #[test]
232 fn dictionary_uses_english_locale_alias() {
233 let break_idx =
234 choose_auto_hyphen_break("Transformation", &style_with_locale("en_GB"), 8, 12);
235 assert_eq!(break_idx, Some(10));
236 }
237
238 #[test]
239 fn ignores_breaks_outside_words() {
240 let break_idx = choose_auto_hyphen_break("ab cd", &TextStyle::default(), 0, 2);
241 assert_eq!(break_idx, None);
242 }
243}