Skip to main content

sivtr_core/parse/
unicode.rs

1use unicode_width::UnicodeWidthChar;
2
3/// Compute the display width of each character in the string.
4///
5/// Returns a Vec where each element is the display column width (0, 1, or 2)
6/// of the corresponding character. This is essential for correct cursor
7/// positioning and block selection with CJK/wide characters.
8pub fn compute_display_widths(s: &str) -> Vec<u8> {
9    s.chars()
10        .map(|ch| {
11            if ch == '\t' {
12                // Tab is treated as 8 spaces for display purposes.
13                // This can be made configurable later.
14                8u8
15            } else {
16                ch.width().unwrap_or(0) as u8
17            }
18        })
19        .collect()
20}
21
22/// Compute the total display width of a string.
23pub fn display_width(s: &str) -> usize {
24    compute_display_widths(s).iter().map(|&w| w as usize).sum()
25}
26
27/// Given a string and a display column range [col_start, col_end),
28/// return the char range that covers those display columns.
29pub fn display_col_to_char_range(s: &str, col_start: usize, col_end: usize) -> (usize, usize) {
30    if col_start >= col_end {
31        let char_idx = s.chars().count().min(
32            compute_display_widths(s)
33                .iter()
34                .scan(0usize, |col, width| {
35                    let start = *col;
36                    *col += *width as usize;
37                    Some(start)
38                })
39                .position(|start| start >= col_start)
40                .unwrap_or_else(|| s.chars().count()),
41        );
42        return (char_idx, char_idx);
43    }
44
45    let mut current_col = 0usize;
46    let mut char_start = None;
47    let mut char_end = 0;
48
49    for (i, ch) in s.chars().enumerate() {
50        let w = if ch == '\t' {
51            8
52        } else {
53            ch.width().unwrap_or(0)
54        };
55
56        if char_start.is_none() && current_col + w > col_start {
57            char_start = Some(i);
58        }
59
60        current_col += w;
61
62        if char_start.is_some() {
63            char_end = i + 1;
64        }
65
66        if current_col >= col_end {
67            break;
68        }
69    }
70
71    (char_start.unwrap_or(0), char_end)
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn test_ascii_widths() {
80        let widths = compute_display_widths("hello");
81        assert_eq!(widths, vec![1, 1, 1, 1, 1]);
82    }
83
84    #[test]
85    fn test_cjk_widths() {
86        let widths = compute_display_widths("你好");
87        assert_eq!(widths, vec![2, 2]);
88    }
89
90    #[test]
91    fn test_mixed_widths() {
92        let widths = compute_display_widths("hi你好");
93        assert_eq!(widths, vec![1, 1, 2, 2]);
94    }
95
96    #[test]
97    fn test_display_width() {
98        assert_eq!(display_width("hello"), 5);
99        assert_eq!(display_width("你好"), 4);
100        assert_eq!(display_width("hi你好"), 6);
101    }
102
103    #[test]
104    fn test_tab_width() {
105        let widths = compute_display_widths("\t");
106        assert_eq!(widths, vec![8]);
107    }
108
109    #[test]
110    fn test_display_col_to_char_range_ascii() {
111        let (start, end) = display_col_to_char_range("hello", 1, 4);
112        assert_eq!(start, 1);
113        assert_eq!(end, 4);
114    }
115
116    #[test]
117    fn test_display_col_to_char_range_cjk() {
118        let (start, end) = display_col_to_char_range("你好世界", 0, 4);
119        assert_eq!(start, 0);
120        assert_eq!(end, 2);
121    }
122
123    #[test]
124    fn test_display_col_to_char_range_mixed() {
125        let (start, end) = display_col_to_char_range("hi你好", 1, 5);
126        assert_eq!(start, 1);
127        assert_eq!(end, 4);
128    }
129
130    #[test]
131    fn test_display_col_to_char_range_empty() {
132        assert_eq!(display_col_to_char_range("hello", 0, 0), (0, 0));
133        assert_eq!(display_col_to_char_range("hello", 2, 2), (2, 2));
134    }
135}