Skip to main content

fret_text_nav/
lib.rs

1//! Text navigation helpers shared across Fret surfaces.
2//!
3//! This crate centralizes v1 "word" and "line" semantics used by:
4//! - core text widgets (`TextInput`, `TextArea`, `SelectableText`)
5//! - ecosystem code editor surfaces
6//!
7//! The active word-boundary mode (`UnicodeWord` vs `Identifier`) remains a policy input
8//! (`TextBoundaryMode`). This crate only provides deterministic algorithms for those modes.
9//!
10//! Normative behavior is defined by:
11//! - ADR 0179: Text Navigation and Word Boundaries (v1)
12//! - ADR 0044: Text editing command vocabulary and UTF-8 byte indices (clamping rules)
13
14use fret_runtime::TextBoundaryMode;
15use unicode_segmentation::UnicodeSegmentation;
16
17pub fn clamp_to_char_boundary(text: &str, idx: usize) -> usize {
18    if idx >= text.len() {
19        return text.len();
20    }
21    if text.is_char_boundary(idx) {
22        return idx;
23    }
24    let mut i = idx;
25    while i > 0 && !text.is_char_boundary(i) {
26        i = i.saturating_sub(1);
27    }
28    i
29}
30
31pub fn prev_char_boundary(text: &str, idx: usize) -> usize {
32    let idx = clamp_to_char_boundary(text, idx);
33    if idx == 0 {
34        return 0;
35    }
36
37    // Avoid scanning from the start (which is O(n)). Back up to the previous UTF-8 char boundary.
38    let mut i = idx.saturating_sub(1);
39    while i > 0 && !text.is_char_boundary(i) {
40        i = i.saturating_sub(1);
41    }
42    i
43}
44
45pub fn next_char_boundary(text: &str, idx: usize) -> usize {
46    let idx = clamp_to_char_boundary(text, idx);
47    if idx >= text.len() {
48        return text.len();
49    }
50    let ch = text[idx..].chars().next().unwrap_or('\0');
51    idx.saturating_add(ch.len_utf8()).min(text.len())
52}
53
54pub fn is_grapheme_boundary(text: &str, idx: usize) -> bool {
55    let idx = idx.min(text.len());
56    if idx == 0 || idx == text.len() {
57        return true;
58    }
59    text.grapheme_indices(true).any(|(start, _)| start == idx)
60}
61
62pub fn prev_grapheme_boundary(text: &str, idx: usize) -> usize {
63    let idx = idx.min(text.len());
64    if idx == 0 {
65        return 0;
66    }
67
68    let mut prev = 0usize;
69    for (start, _) in text.grapheme_indices(true) {
70        if start >= idx {
71            break;
72        }
73        prev = start;
74    }
75    prev
76}
77
78pub fn next_grapheme_boundary(text: &str, idx: usize) -> usize {
79    let idx = idx.min(text.len());
80    if idx >= text.len() {
81        return text.len();
82    }
83
84    for (start, g) in text.grapheme_indices(true) {
85        let end = start + g.len();
86        if idx < end {
87            return end;
88        }
89    }
90    text.len()
91}
92
93pub fn clamp_to_grapheme_boundary(text: &str, idx: usize) -> usize {
94    let idx = idx.min(text.len());
95    if is_grapheme_boundary(text, idx) {
96        return idx;
97    }
98
99    // Prefer the closest grapheme boundary; ties clamp down.
100    for (start, g) in text.grapheme_indices(true) {
101        let end = start + g.len();
102        if idx < end {
103            return if idx - start <= end - idx { start } else { end };
104        }
105    }
106
107    text.len()
108}
109
110pub fn clamp_to_grapheme_boundary_down(text: &str, idx: usize) -> usize {
111    let idx = idx.min(text.len());
112    if is_grapheme_boundary(text, idx) {
113        idx
114    } else {
115        prev_grapheme_boundary(text, idx)
116    }
117}
118
119pub fn clamp_to_grapheme_boundary_up(text: &str, idx: usize) -> usize {
120    let idx = idx.min(text.len());
121    if is_grapheme_boundary(text, idx) {
122        idx
123    } else {
124        next_grapheme_boundary(text, idx)
125    }
126}
127
128fn is_identifier_char(ch: char) -> bool {
129    ch == '_' || unicode_ident::is_xid_continue(ch)
130}
131
132fn char_at(text: &str, idx: usize) -> Option<char> {
133    let idx = clamp_to_char_boundary(text, idx);
134    text.get(idx..)?.chars().next()
135}
136
137fn is_unicode_word_char(text: &str, idx: usize) -> bool {
138    let idx = clamp_to_char_boundary(text, idx);
139    text.unicode_word_indices()
140        .any(|(start, word)| (start..start + word.len()).contains(&idx))
141}
142
143fn unicode_word_range_at(text: &str, idx: usize) -> Option<(usize, usize)> {
144    let idx = clamp_to_char_boundary(text, idx);
145    for (start, word) in text.unicode_word_indices() {
146        let end = start + word.len();
147        if (start..end).contains(&idx) {
148            return Some((start, end));
149        }
150    }
151    None
152}
153
154fn identifier_range_at(text: &str, idx: usize) -> Option<(usize, usize)> {
155    let idx = clamp_to_char_boundary(text, idx);
156    let ch = char_at(text, idx)?;
157    if !is_identifier_char(ch) {
158        return None;
159    }
160
161    let mut start = idx;
162    while start > 0 {
163        let prev = prev_char_boundary(text, start);
164        let prev_ch = char_at(text, prev).unwrap_or(' ');
165        if !is_identifier_char(prev_ch) {
166            break;
167        }
168        start = prev;
169    }
170
171    let mut end = next_char_boundary(text, idx);
172    while end < text.len() {
173        let next_ch = char_at(text, end).unwrap_or(' ');
174        if !is_identifier_char(next_ch) {
175            break;
176        }
177        end = next_char_boundary(text, end);
178    }
179
180    Some((start, end))
181}
182
183pub fn select_word_range(text: &str, idx: usize, mode: TextBoundaryMode) -> (usize, usize) {
184    if text.is_empty() {
185        return (0, 0);
186    }
187
188    let mut idx = clamp_to_grapheme_boundary(text, idx).min(text.len());
189    if idx >= text.len() {
190        idx = prev_grapheme_boundary(text, idx);
191    }
192
193    // Prefer selecting the previous word when clicking just after it.
194    if char_at(text, idx).is_some_and(|c| c.is_whitespace()) && idx > 0 {
195        let prev = prev_grapheme_boundary(text, idx);
196        let prev_is_word = match mode {
197            TextBoundaryMode::UnicodeWord => is_unicode_word_char(text, prev),
198            TextBoundaryMode::Identifier => char_at(text, prev).is_some_and(is_identifier_char),
199        };
200        if prev_is_word {
201            idx = prev;
202        }
203    }
204
205    let Some(ch) = char_at(text, idx) else {
206        return (0, 0);
207    };
208
209    if ch.is_whitespace() {
210        let mut start = idx;
211        while start > 0 {
212            let prev = prev_grapheme_boundary(text, start);
213            if char_at(text, prev).is_some_and(|c| c.is_whitespace()) {
214                start = prev;
215            } else {
216                break;
217            }
218        }
219        let mut end = next_grapheme_boundary(text, idx);
220        while end < text.len() {
221            if char_at(text, end).is_some_and(|c| c.is_whitespace()) {
222                end = next_grapheme_boundary(text, end);
223            } else {
224                break;
225            }
226        }
227        return (
228            clamp_to_grapheme_boundary_down(text, start),
229            clamp_to_grapheme_boundary_up(text, end),
230        );
231    }
232
233    let (start, end) = match mode {
234        TextBoundaryMode::UnicodeWord => {
235            unicode_word_range_at(text, idx).unwrap_or((idx, next_grapheme_boundary(text, idx)))
236        }
237        TextBoundaryMode::Identifier => {
238            identifier_range_at(text, idx).unwrap_or((idx, next_grapheme_boundary(text, idx)))
239        }
240    };
241
242    (
243        clamp_to_grapheme_boundary_down(text, start),
244        clamp_to_grapheme_boundary_up(text, end),
245    )
246}
247
248pub fn select_line_range(text: &str, idx: usize) -> (usize, usize) {
249    if text.is_empty() {
250        return (0, 0);
251    }
252
253    let idx = clamp_to_grapheme_boundary(text, idx).min(text.len());
254    let start = text[..idx]
255        .rfind('\n')
256        .map(|i| (i + 1).min(text.len()))
257        .unwrap_or(0);
258    let end = text[idx..]
259        .find('\n')
260        .map(|i| (idx + i + 1).min(text.len()))
261        .unwrap_or(text.len());
262    (
263        clamp_to_grapheme_boundary_down(text, start),
264        clamp_to_grapheme_boundary_up(text, end),
265    )
266}
267
268pub fn move_word_left(text: &str, idx: usize, mode: TextBoundaryMode) -> usize {
269    let mut i = clamp_to_grapheme_boundary(text, idx);
270    while i > 0 {
271        let prev = prev_grapheme_boundary(text, i);
272        let ch = text[prev..i].chars().next().unwrap_or(' ');
273        if !ch.is_whitespace() {
274            break;
275        }
276        i = prev;
277    }
278
279    if i == 0 {
280        return 0;
281    }
282
283    // `i` is the boundary after any trailing whitespace. Anchor inside the grapheme immediately
284    // to the left so we always query a position inside the word/token.
285    let anchor = prev_grapheme_boundary(text, i);
286
287    let next = match mode {
288        TextBoundaryMode::UnicodeWord => unicode_word_range_at(text, anchor)
289            .map(|(start, _)| start)
290            .unwrap_or(anchor),
291        TextBoundaryMode::Identifier => identifier_range_at(text, anchor)
292            .map(|(start, _)| start)
293            .unwrap_or(anchor),
294    };
295    clamp_to_grapheme_boundary(text, next)
296}
297
298pub fn move_word_right(text: &str, idx: usize, mode: TextBoundaryMode) -> usize {
299    let mut i = next_grapheme_boundary(text, idx);
300    while i < text.len() {
301        let next = next_grapheme_boundary(text, i);
302        let ch = text[i..next].chars().next().unwrap_or(' ');
303        if !ch.is_whitespace() {
304            break;
305        }
306        i = next;
307    }
308
309    if i >= text.len() {
310        return text.len();
311    }
312
313    let next = match mode {
314        TextBoundaryMode::UnicodeWord => unicode_word_range_at(text, i)
315            .map(|(_, end)| end)
316            .unwrap_or(i),
317        TextBoundaryMode::Identifier => identifier_range_at(text, i)
318            .map(|(_, end)| end)
319            .unwrap_or(i),
320    };
321    clamp_to_grapheme_boundary(text, next)
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn move_word_right_distinguishes_unicode_word_and_identifier_for_apostrophe() {
330        let text = "can't";
331        assert_eq!(
332            move_word_right(text, 0, TextBoundaryMode::UnicodeWord),
333            text.len(),
334            "UnicodeWord should treat \"can't\" as a single word"
335        );
336        assert_eq!(
337            move_word_right(text, 0, TextBoundaryMode::Identifier),
338            3,
339            "Identifier should split \"can't\" around the apostrophe"
340        );
341    }
342
343    #[test]
344    fn select_word_range_identifier_uses_xid_continue() {
345        assert_eq!(
346            select_word_range("αβγ δ", 1, TextBoundaryMode::Identifier),
347            (0, "αβγ".len())
348        );
349        assert_eq!(
350            select_word_range("a_b c", 1, TextBoundaryMode::Identifier),
351            (0, "a_b".len())
352        );
353    }
354
355    #[test]
356    fn select_word_range_prefers_previous_word_when_clicking_whitespace_after_word() {
357        let text = "foo bar";
358        let idx = "foo".len();
359        assert_eq!(
360            select_word_range(text, idx, TextBoundaryMode::UnicodeWord),
361            (0, "foo".len())
362        );
363        assert_eq!(
364            select_word_range(text, idx, TextBoundaryMode::Identifier),
365            (0, "foo".len())
366        );
367    }
368
369    #[test]
370    fn select_word_range_selects_whitespace_runs() {
371        let text = "foo   bar";
372        let idx = "foo ".len();
373        assert_eq!(
374            select_word_range(text, idx, TextBoundaryMode::UnicodeWord),
375            ("foo".len(), "foo   ".len())
376        );
377        assert_eq!(
378            select_word_range(text, idx, TextBoundaryMode::Identifier),
379            ("foo".len(), "foo   ".len())
380        );
381    }
382
383    #[test]
384    fn select_word_range_unicode_word_handles_cjk_runs() {
385        let text = "世界 hello";
386        assert_eq!(
387            select_word_range(text, 0, TextBoundaryMode::UnicodeWord),
388            (0, "世".len())
389        );
390        assert_eq!(
391            select_word_range(text, "世".len(), TextBoundaryMode::UnicodeWord),
392            ("世".len(), "世界".len())
393        );
394    }
395
396    #[test]
397    fn select_word_range_unicode_word_falls_back_to_single_grapheme_on_emoji() {
398        let text = "hi😀there";
399        let emoji_start = "hi".len();
400        let emoji_end = emoji_start + "😀".len();
401        assert_eq!(
402            select_word_range(text, emoji_start, TextBoundaryMode::UnicodeWord),
403            (emoji_start, emoji_end)
404        );
405    }
406
407    #[test]
408    fn select_word_range_identifier_includes_digits_and_underscores() {
409        let text = "foo123_bar baz";
410        assert_eq!(
411            select_word_range(text, 2, TextBoundaryMode::Identifier),
412            (0, "foo123_bar".len())
413        );
414    }
415
416    #[test]
417    fn select_word_range_identifier_falls_back_to_single_grapheme_on_punctuation() {
418        let text = "foo.bar";
419        let dot = "foo".len();
420        assert_eq!(
421            select_word_range(text, dot, TextBoundaryMode::Identifier),
422            (dot, dot + ".".len())
423        );
424    }
425
426    #[test]
427    fn select_word_range_unicode_word_falls_back_to_single_grapheme_on_zwj_emoji() {
428        let emoji = "👩‍💻";
429        let text = format!("a{emoji}b");
430        let start = "a".len();
431        assert_eq!(
432            select_word_range(&text, start, TextBoundaryMode::UnicodeWord),
433            (start, start + emoji.len())
434        );
435    }
436
437    #[test]
438    fn move_word_identifier_treats_punctuation_as_delimiter() {
439        let text = "foo.bar";
440        assert_eq!(
441            move_word_right(text, 0, TextBoundaryMode::Identifier),
442            "foo".len()
443        );
444        assert_eq!(
445            move_word_left(text, text.len(), TextBoundaryMode::Identifier),
446            "foo.".len()
447        );
448    }
449
450    #[test]
451    fn move_word_left_skips_whitespace_and_moves_to_word_start() {
452        let text = "foo   bar";
453        assert_eq!(
454            move_word_left(text, text.len(), TextBoundaryMode::UnicodeWord),
455            6
456        );
457        assert_eq!(
458            move_word_left(text, "foo   ".len(), TextBoundaryMode::UnicodeWord),
459            0
460        );
461    }
462
463    #[test]
464    fn move_word_right_skips_whitespace_and_moves_to_word_end() {
465        let text = "foo   bar";
466        assert_eq!(
467            move_word_right(text, 0, TextBoundaryMode::UnicodeWord),
468            "foo".len()
469        );
470        assert_eq!(
471            move_word_right(text, "foo".len(), TextBoundaryMode::UnicodeWord),
472            text.len()
473        );
474    }
475
476    #[test]
477    fn select_line_range_includes_trailing_newline_when_present() {
478        let text = "a\nb\nc";
479        assert_eq!(select_line_range(text, 0), (0, "a\n".len()));
480        assert_eq!(select_line_range(text, "a".len()), (0, "a\n".len()));
481
482        let b_idx = "a\n".len();
483        assert_eq!(select_line_range(text, b_idx), (b_idx, "a\nb\n".len()));
484        assert_eq!(
485            select_line_range(text, b_idx + "b".len()),
486            (b_idx, "a\nb\n".len())
487        );
488
489        let c_idx = "a\nb\n".len();
490        assert_eq!(select_line_range(text, c_idx), (c_idx, text.len()));
491    }
492}