ghostscope_ui/utils/
utf8.rs

1use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
2
3/// UTF-8 handling utilities
4pub struct UTF8Utils;
5
6impl UTF8Utils {
7    /// Convert character position to byte position in a UTF-8 string
8    pub fn char_pos_to_byte_pos(text: &str, char_pos: usize) -> usize {
9        text.char_indices()
10            .nth(char_pos)
11            .map_or(text.len(), |(pos, _)| pos)
12    }
13
14    /// Convert byte position to character position in a UTF-8 string
15    pub fn byte_pos_to_char_pos(text: &str, byte_pos: usize) -> usize {
16        text[..byte_pos.min(text.len())].chars().count()
17    }
18
19    /// Get the display width of a string (handling wide characters)
20    pub fn display_width(text: &str) -> usize {
21        text.width()
22    }
23
24    /// Truncate string to fit within a given display width
25    pub fn truncate_to_width(text: &str, max_width: usize) -> String {
26        let mut width = 0;
27        let mut result = String::new();
28
29        for ch in text.chars() {
30            let ch_width = ch.width().unwrap_or(0);
31            if width + ch_width > max_width {
32                break;
33            }
34            width += ch_width;
35            result.push(ch);
36        }
37
38        result
39    }
40
41    /// Split text at character boundaries, not byte boundaries
42    pub fn split_at_char_boundary(text: &str, char_index: usize) -> (&str, &str) {
43        let byte_index = Self::char_pos_to_byte_pos(text, char_index);
44        text.split_at(byte_index)
45    }
46
47    /// Check if a byte position is at a character boundary
48    pub fn is_char_boundary(text: &str, byte_pos: usize) -> bool {
49        text.is_char_boundary(byte_pos)
50    }
51
52    /// Find the next character boundary after a given byte position
53    pub fn next_char_boundary(text: &str, byte_pos: usize) -> usize {
54        let mut pos = byte_pos;
55        while pos < text.len() && !text.is_char_boundary(pos) {
56            pos += 1;
57        }
58        pos
59    }
60
61    /// Find the previous character boundary before a given byte position
62    pub fn prev_char_boundary(text: &str, byte_pos: usize) -> usize {
63        let mut pos = byte_pos;
64        while pos > 0 && !text.is_char_boundary(pos) {
65            pos -= 1;
66        }
67        pos
68    }
69
70    /// Count characters in a string (not bytes)
71    pub fn char_count(text: &str) -> usize {
72        text.chars().count()
73    }
74
75    /// Get the nth character from a string
76    pub fn nth_char(text: &str, n: usize) -> Option<char> {
77        text.chars().nth(n)
78    }
79
80    /// Pad string to a specific display width with spaces
81    pub fn pad_to_width(text: &str, width: usize) -> String {
82        let current_width = Self::display_width(text);
83        if current_width >= width {
84            text.to_string()
85        } else {
86            format!("{}{}", text, " ".repeat(width - current_width))
87        }
88    }
89
90    /// Check if a character is a word boundary
91    pub fn is_word_boundary(ch: char) -> bool {
92        ch.is_whitespace() || ch.is_ascii_punctuation()
93    }
94
95    /// Find the start of the current word at cursor position
96    pub fn find_word_start(text: &str, cursor_pos: usize) -> usize {
97        if cursor_pos == 0 {
98            return 0;
99        }
100
101        let chars: Vec<char> = text.chars().collect();
102        let mut pos = cursor_pos.saturating_sub(1);
103
104        // Skip current character if it's a word boundary
105        while pos > 0 && Self::is_word_boundary(chars[pos]) {
106            pos -= 1;
107        }
108
109        // Find the start of the word
110        while pos > 0 && !Self::is_word_boundary(chars[pos - 1]) {
111            pos -= 1;
112        }
113
114        pos
115    }
116
117    /// Find the end of the current word at cursor position
118    pub fn find_word_end(text: &str, cursor_pos: usize) -> usize {
119        let chars: Vec<char> = text.chars().collect();
120        let mut pos = cursor_pos;
121
122        if pos >= chars.len() {
123            return chars.len();
124        }
125
126        // Skip current character if it's a word boundary
127        while pos < chars.len() && Self::is_word_boundary(chars[pos]) {
128            pos += 1;
129        }
130
131        // Find the end of the word
132        while pos < chars.len() && !Self::is_word_boundary(chars[pos]) {
133            pos += 1;
134        }
135
136        pos
137    }
138}