1use fret_runtime::TextBoundaryMode;
15use unicode_segmentation::UnicodeSegmentation;
16
17pub fn clamp_to_char_boundary(text: &str, idx: usize) -> usize {
18 if idx >= text.len() {
19 return text.len();
20 }
21 if text.is_char_boundary(idx) {
22 return idx;
23 }
24 let mut i = idx;
25 while i > 0 && !text.is_char_boundary(i) {
26 i = i.saturating_sub(1);
27 }
28 i
29}
30
31pub fn prev_char_boundary(text: &str, idx: usize) -> usize {
32 let idx = clamp_to_char_boundary(text, idx);
33 if idx == 0 {
34 return 0;
35 }
36
37 let mut i = idx.saturating_sub(1);
39 while i > 0 && !text.is_char_boundary(i) {
40 i = i.saturating_sub(1);
41 }
42 i
43}
44
45pub fn next_char_boundary(text: &str, idx: usize) -> usize {
46 let idx = clamp_to_char_boundary(text, idx);
47 if idx >= text.len() {
48 return text.len();
49 }
50 let ch = text[idx..].chars().next().unwrap_or('\0');
51 idx.saturating_add(ch.len_utf8()).min(text.len())
52}
53
54pub fn is_grapheme_boundary(text: &str, idx: usize) -> bool {
55 let idx = idx.min(text.len());
56 if idx == 0 || idx == text.len() {
57 return true;
58 }
59 text.grapheme_indices(true).any(|(start, _)| start == idx)
60}
61
62pub fn prev_grapheme_boundary(text: &str, idx: usize) -> usize {
63 let idx = idx.min(text.len());
64 if idx == 0 {
65 return 0;
66 }
67
68 let mut prev = 0usize;
69 for (start, _) in text.grapheme_indices(true) {
70 if start >= idx {
71 break;
72 }
73 prev = start;
74 }
75 prev
76}
77
78pub fn next_grapheme_boundary(text: &str, idx: usize) -> usize {
79 let idx = idx.min(text.len());
80 if idx >= text.len() {
81 return text.len();
82 }
83
84 for (start, g) in text.grapheme_indices(true) {
85 let end = start + g.len();
86 if idx < end {
87 return end;
88 }
89 }
90 text.len()
91}
92
93pub fn clamp_to_grapheme_boundary(text: &str, idx: usize) -> usize {
94 let idx = idx.min(text.len());
95 if is_grapheme_boundary(text, idx) {
96 return idx;
97 }
98
99 for (start, g) in text.grapheme_indices(true) {
101 let end = start + g.len();
102 if idx < end {
103 return if idx - start <= end - idx { start } else { end };
104 }
105 }
106
107 text.len()
108}
109
110pub fn clamp_to_grapheme_boundary_down(text: &str, idx: usize) -> usize {
111 let idx = idx.min(text.len());
112 if is_grapheme_boundary(text, idx) {
113 idx
114 } else {
115 prev_grapheme_boundary(text, idx)
116 }
117}
118
119pub fn clamp_to_grapheme_boundary_up(text: &str, idx: usize) -> usize {
120 let idx = idx.min(text.len());
121 if is_grapheme_boundary(text, idx) {
122 idx
123 } else {
124 next_grapheme_boundary(text, idx)
125 }
126}
127
128fn is_identifier_char(ch: char) -> bool {
129 ch == '_' || unicode_ident::is_xid_continue(ch)
130}
131
132fn char_at(text: &str, idx: usize) -> Option<char> {
133 let idx = clamp_to_char_boundary(text, idx);
134 text.get(idx..)?.chars().next()
135}
136
137fn is_unicode_word_char(text: &str, idx: usize) -> bool {
138 let idx = clamp_to_char_boundary(text, idx);
139 text.unicode_word_indices()
140 .any(|(start, word)| (start..start + word.len()).contains(&idx))
141}
142
143fn unicode_word_range_at(text: &str, idx: usize) -> Option<(usize, usize)> {
144 let idx = clamp_to_char_boundary(text, idx);
145 for (start, word) in text.unicode_word_indices() {
146 let end = start + word.len();
147 if (start..end).contains(&idx) {
148 return Some((start, end));
149 }
150 }
151 None
152}
153
154fn identifier_range_at(text: &str, idx: usize) -> Option<(usize, usize)> {
155 let idx = clamp_to_char_boundary(text, idx);
156 let ch = char_at(text, idx)?;
157 if !is_identifier_char(ch) {
158 return None;
159 }
160
161 let mut start = idx;
162 while start > 0 {
163 let prev = prev_char_boundary(text, start);
164 let prev_ch = char_at(text, prev).unwrap_or(' ');
165 if !is_identifier_char(prev_ch) {
166 break;
167 }
168 start = prev;
169 }
170
171 let mut end = next_char_boundary(text, idx);
172 while end < text.len() {
173 let next_ch = char_at(text, end).unwrap_or(' ');
174 if !is_identifier_char(next_ch) {
175 break;
176 }
177 end = next_char_boundary(text, end);
178 }
179
180 Some((start, end))
181}
182
183pub fn select_word_range(text: &str, idx: usize, mode: TextBoundaryMode) -> (usize, usize) {
184 if text.is_empty() {
185 return (0, 0);
186 }
187
188 let mut idx = clamp_to_grapheme_boundary(text, idx).min(text.len());
189 if idx >= text.len() {
190 idx = prev_grapheme_boundary(text, idx);
191 }
192
193 if char_at(text, idx).is_some_and(|c| c.is_whitespace()) && idx > 0 {
195 let prev = prev_grapheme_boundary(text, idx);
196 let prev_is_word = match mode {
197 TextBoundaryMode::UnicodeWord => is_unicode_word_char(text, prev),
198 TextBoundaryMode::Identifier => char_at(text, prev).is_some_and(is_identifier_char),
199 };
200 if prev_is_word {
201 idx = prev;
202 }
203 }
204
205 let Some(ch) = char_at(text, idx) else {
206 return (0, 0);
207 };
208
209 if ch.is_whitespace() {
210 let mut start = idx;
211 while start > 0 {
212 let prev = prev_grapheme_boundary(text, start);
213 if char_at(text, prev).is_some_and(|c| c.is_whitespace()) {
214 start = prev;
215 } else {
216 break;
217 }
218 }
219 let mut end = next_grapheme_boundary(text, idx);
220 while end < text.len() {
221 if char_at(text, end).is_some_and(|c| c.is_whitespace()) {
222 end = next_grapheme_boundary(text, end);
223 } else {
224 break;
225 }
226 }
227 return (
228 clamp_to_grapheme_boundary_down(text, start),
229 clamp_to_grapheme_boundary_up(text, end),
230 );
231 }
232
233 let (start, end) = match mode {
234 TextBoundaryMode::UnicodeWord => {
235 unicode_word_range_at(text, idx).unwrap_or((idx, next_grapheme_boundary(text, idx)))
236 }
237 TextBoundaryMode::Identifier => {
238 identifier_range_at(text, idx).unwrap_or((idx, next_grapheme_boundary(text, idx)))
239 }
240 };
241
242 (
243 clamp_to_grapheme_boundary_down(text, start),
244 clamp_to_grapheme_boundary_up(text, end),
245 )
246}
247
248pub fn select_line_range(text: &str, idx: usize) -> (usize, usize) {
249 if text.is_empty() {
250 return (0, 0);
251 }
252
253 let idx = clamp_to_grapheme_boundary(text, idx).min(text.len());
254 let start = text[..idx]
255 .rfind('\n')
256 .map(|i| (i + 1).min(text.len()))
257 .unwrap_or(0);
258 let end = text[idx..]
259 .find('\n')
260 .map(|i| (idx + i + 1).min(text.len()))
261 .unwrap_or(text.len());
262 (
263 clamp_to_grapheme_boundary_down(text, start),
264 clamp_to_grapheme_boundary_up(text, end),
265 )
266}
267
268pub fn move_word_left(text: &str, idx: usize, mode: TextBoundaryMode) -> usize {
269 let mut i = clamp_to_grapheme_boundary(text, idx);
270 while i > 0 {
271 let prev = prev_grapheme_boundary(text, i);
272 let ch = text[prev..i].chars().next().unwrap_or(' ');
273 if !ch.is_whitespace() {
274 break;
275 }
276 i = prev;
277 }
278
279 if i == 0 {
280 return 0;
281 }
282
283 let anchor = prev_grapheme_boundary(text, i);
286
287 let next = match mode {
288 TextBoundaryMode::UnicodeWord => unicode_word_range_at(text, anchor)
289 .map(|(start, _)| start)
290 .unwrap_or(anchor),
291 TextBoundaryMode::Identifier => identifier_range_at(text, anchor)
292 .map(|(start, _)| start)
293 .unwrap_or(anchor),
294 };
295 clamp_to_grapheme_boundary(text, next)
296}
297
298pub fn move_word_right(text: &str, idx: usize, mode: TextBoundaryMode) -> usize {
299 let mut i = next_grapheme_boundary(text, idx);
300 while i < text.len() {
301 let next = next_grapheme_boundary(text, i);
302 let ch = text[i..next].chars().next().unwrap_or(' ');
303 if !ch.is_whitespace() {
304 break;
305 }
306 i = next;
307 }
308
309 if i >= text.len() {
310 return text.len();
311 }
312
313 let next = match mode {
314 TextBoundaryMode::UnicodeWord => unicode_word_range_at(text, i)
315 .map(|(_, end)| end)
316 .unwrap_or(i),
317 TextBoundaryMode::Identifier => identifier_range_at(text, i)
318 .map(|(_, end)| end)
319 .unwrap_or(i),
320 };
321 clamp_to_grapheme_boundary(text, next)
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 #[test]
329 fn move_word_right_distinguishes_unicode_word_and_identifier_for_apostrophe() {
330 let text = "can't";
331 assert_eq!(
332 move_word_right(text, 0, TextBoundaryMode::UnicodeWord),
333 text.len(),
334 "UnicodeWord should treat \"can't\" as a single word"
335 );
336 assert_eq!(
337 move_word_right(text, 0, TextBoundaryMode::Identifier),
338 3,
339 "Identifier should split \"can't\" around the apostrophe"
340 );
341 }
342
343 #[test]
344 fn select_word_range_identifier_uses_xid_continue() {
345 assert_eq!(
346 select_word_range("αβγ δ", 1, TextBoundaryMode::Identifier),
347 (0, "αβγ".len())
348 );
349 assert_eq!(
350 select_word_range("a_b c", 1, TextBoundaryMode::Identifier),
351 (0, "a_b".len())
352 );
353 }
354
355 #[test]
356 fn select_word_range_prefers_previous_word_when_clicking_whitespace_after_word() {
357 let text = "foo bar";
358 let idx = "foo".len();
359 assert_eq!(
360 select_word_range(text, idx, TextBoundaryMode::UnicodeWord),
361 (0, "foo".len())
362 );
363 assert_eq!(
364 select_word_range(text, idx, TextBoundaryMode::Identifier),
365 (0, "foo".len())
366 );
367 }
368
369 #[test]
370 fn select_word_range_selects_whitespace_runs() {
371 let text = "foo bar";
372 let idx = "foo ".len();
373 assert_eq!(
374 select_word_range(text, idx, TextBoundaryMode::UnicodeWord),
375 ("foo".len(), "foo ".len())
376 );
377 assert_eq!(
378 select_word_range(text, idx, TextBoundaryMode::Identifier),
379 ("foo".len(), "foo ".len())
380 );
381 }
382
383 #[test]
384 fn select_word_range_unicode_word_handles_cjk_runs() {
385 let text = "世界 hello";
386 assert_eq!(
387 select_word_range(text, 0, TextBoundaryMode::UnicodeWord),
388 (0, "世".len())
389 );
390 assert_eq!(
391 select_word_range(text, "世".len(), TextBoundaryMode::UnicodeWord),
392 ("世".len(), "世界".len())
393 );
394 }
395
396 #[test]
397 fn select_word_range_unicode_word_falls_back_to_single_grapheme_on_emoji() {
398 let text = "hi😀there";
399 let emoji_start = "hi".len();
400 let emoji_end = emoji_start + "😀".len();
401 assert_eq!(
402 select_word_range(text, emoji_start, TextBoundaryMode::UnicodeWord),
403 (emoji_start, emoji_end)
404 );
405 }
406
407 #[test]
408 fn select_word_range_identifier_includes_digits_and_underscores() {
409 let text = "foo123_bar baz";
410 assert_eq!(
411 select_word_range(text, 2, TextBoundaryMode::Identifier),
412 (0, "foo123_bar".len())
413 );
414 }
415
416 #[test]
417 fn select_word_range_identifier_falls_back_to_single_grapheme_on_punctuation() {
418 let text = "foo.bar";
419 let dot = "foo".len();
420 assert_eq!(
421 select_word_range(text, dot, TextBoundaryMode::Identifier),
422 (dot, dot + ".".len())
423 );
424 }
425
426 #[test]
427 fn select_word_range_unicode_word_falls_back_to_single_grapheme_on_zwj_emoji() {
428 let emoji = "👩💻";
429 let text = format!("a{emoji}b");
430 let start = "a".len();
431 assert_eq!(
432 select_word_range(&text, start, TextBoundaryMode::UnicodeWord),
433 (start, start + emoji.len())
434 );
435 }
436
437 #[test]
438 fn move_word_identifier_treats_punctuation_as_delimiter() {
439 let text = "foo.bar";
440 assert_eq!(
441 move_word_right(text, 0, TextBoundaryMode::Identifier),
442 "foo".len()
443 );
444 assert_eq!(
445 move_word_left(text, text.len(), TextBoundaryMode::Identifier),
446 "foo.".len()
447 );
448 }
449
450 #[test]
451 fn move_word_left_skips_whitespace_and_moves_to_word_start() {
452 let text = "foo bar";
453 assert_eq!(
454 move_word_left(text, text.len(), TextBoundaryMode::UnicodeWord),
455 6
456 );
457 assert_eq!(
458 move_word_left(text, "foo ".len(), TextBoundaryMode::UnicodeWord),
459 0
460 );
461 }
462
463 #[test]
464 fn move_word_right_skips_whitespace_and_moves_to_word_end() {
465 let text = "foo bar";
466 assert_eq!(
467 move_word_right(text, 0, TextBoundaryMode::UnicodeWord),
468 "foo".len()
469 );
470 assert_eq!(
471 move_word_right(text, "foo".len(), TextBoundaryMode::UnicodeWord),
472 text.len()
473 );
474 }
475
476 #[test]
477 fn select_line_range_includes_trailing_newline_when_present() {
478 let text = "a\nb\nc";
479 assert_eq!(select_line_range(text, 0), (0, "a\n".len()));
480 assert_eq!(select_line_range(text, "a".len()), (0, "a\n".len()));
481
482 let b_idx = "a\n".len();
483 assert_eq!(select_line_range(text, b_idx), (b_idx, "a\nb\n".len()));
484 assert_eq!(
485 select_line_range(text, b_idx + "b".len()),
486 (b_idx, "a\nb\n".len())
487 );
488
489 let c_idx = "a\nb\n".len();
490 assert_eq!(select_line_range(text, c_idx), (c_idx, text.len()));
491 }
492}