runefix_core/grapheme.rs
1use unicode_segmentation::UnicodeSegmentation;
2use crate::width::get_display_width;
3
4/// Returns the display width (in columns) of each grapheme cluster in the input string.
5///
6/// This function segments the input string into Unicode grapheme clusters and computes
7/// the display width of each one individually. It is useful for scenarios like monospace
8/// text layout, visual alignment, or rendering terminals where East Asian characters
9/// and emoji take more than one column.
10///
11/// # Arguments
12///
13/// * `s` - The input string to analyze
14///
15/// # Returns
16///
17/// A vector of display widths (`usize`) for each grapheme cluster in order.
18///
19/// # Example
20///
21/// ```
22/// use runefix_core::display_widths;
23///
24/// let widths = display_widths("Hi,世界");
25/// assert_eq!(widths, vec![1, 1, 2, 2, 2]);
26/// ```
27pub fn display_widths(s: &str) -> Vec<usize> {
28 UnicodeSegmentation::graphemes(s, true)
29 .map(|g| get_display_width(g))
30 .collect()
31}
32
33/// Returns the total display width (in columns) of a string, based on grapheme clusters.
34///
35/// This function segments the input string into Unicode grapheme clusters and sums
36/// the display width of each one using [`display_width`]. The result reflects
37/// how much horizontal space the entire string occupies in a monospace terminal,
38/// accounting for wide characters such as CJK ideographs and emoji.
39///
40/// # Arguments
41///
42/// * `s` - The input string to measure
43///
44/// # Returns
45///
46/// The total display width of the string in terminal columns.
47///
48/// # Example
49///
50/// ```
51/// use runefix_core::display_width;
52///
53/// let width = display_width("Hi,世界");
54/// assert_eq!(width, 8); // 1 + 1 + 2 + 2 + 2
55/// ```
56pub fn display_width(s: &str) -> usize {
57 display_widths(s).iter().sum()
58}
59
60/// Returns all grapheme clusters in the input string as a vector of string slices.
61///
62/// This function uses Unicode text segmentation to split the input into grapheme clusters,
63/// which represent user-perceived characters. It correctly preserves multi-codepoint
64/// graphemes such as emoji with skin tone modifiers, ZWJ sequences, and composed Hangul.
65///
66/// Useful when you need to iterate over characters in a way that aligns with human perception.
67///
68/// # Arguments
69///
70/// * `s` - The input string to split
71///
72/// # Returns
73///
74/// A vector of `&str`, each representing a single grapheme cluster in order.
75///
76/// # Example
77///
78/// ```
79/// use runefix_core::split_graphemes;
80///
81/// let clusters = split_graphemes("Love👩❤️💋👨爱");
82/// assert_eq!(clusters, vec!["L", "o", "v", "e", "👩❤️💋👨", "爱"]);
83/// ```
84pub fn split_graphemes(s: &str) -> Vec<&str> {
85 UnicodeSegmentation::graphemes(s, true).collect()
86}
87
88/// Returns the display width of each grapheme cluster in the input string.
89///
90/// This function splits the string into Unicode grapheme clusters and pairs
91/// each one with its terminal display width (in columns). This is useful for
92/// visually aligned rendering, layout calculation, and Unicode debugging,
93/// especially with complex emoji or East Asian characters.
94///
95/// # Arguments
96///
97/// * `s` - The input string to analyze
98///
99/// # Returns
100///
101/// A vector of tuples, where each item is a grapheme cluster and its
102/// corresponding display width: `(&str, usize)`
103///
104/// # Example
105///
106/// ```
107/// use runefix_core::grapheme_widths;
108///
109/// let result = grapheme_widths("Hi,世界");
110/// assert_eq!(
111/// result,
112/// vec![("H", 1), ("i", 1), (",", 2), ("世", 2), ("界", 2)]
113/// );
114/// ```
115pub fn grapheme_widths(s: &str) -> Vec<(&str, usize)> {
116 UnicodeSegmentation::graphemes(s, true)
117 .map(|g| {
118 let width = get_display_width(g);
119 (g, width)
120 })
121 .collect()
122}
123
124/// Truncates a string by display width while preserving grapheme cluster boundaries.
125///
126/// This function ensures that wide characters such as emoji or CJK ideographs are
127/// never split in the middle. It safely cuts off the string so that its total
128/// display width does not exceed the given `max_width`, making it ideal for
129/// terminal or TUI rendering.
130///
131/// # Arguments
132///
133/// * `s` - The input string to truncate
134/// * `max_width` - Maximum allowed display width in terminal columns
135///
136/// # Returns
137///
138/// A string slice that fits within the specified display width without cutting graphemes.
139///
140/// # Example
141///
142/// ```
143/// use runefix_core::truncate_by_width;
144///
145/// let s = "Hi 👋,世界";
146/// let short = truncate_by_width(s, 6);
147/// assert_eq!(short, "Hi 👋");
148/// ```
149pub fn truncate_by_width(s: &str, max_width: usize) -> &str {
150 let mut total_width = 0;
151 let mut end_byte = 0;
152
153 for g in UnicodeSegmentation::graphemes(s, true) {
154 let w = get_display_width(g);
155
156 if total_width + w > max_width {
157 break;
158 }
159
160 total_width += w;
161 end_byte += g.len(); // Byte offset to cut safely
162 }
163
164 &s[..end_byte]
165}
166
167/// Splits a string into lines based on display width, preserving grapheme boundaries.
168///
169/// This function ensures that wide characters such as emoji, CJK ideographs, or
170/// fullwidth punctuation are not split mid-grapheme. It breaks the input string
171/// into a sequence of lines, each with a total display width that does not exceed
172/// the given `max_width`. Ideal for terminal word wrapping and monospace layout.
173///
174/// # Arguments
175///
176/// * `s` - The input string to wrap
177/// * `max_width` - Maximum display width (in columns) for each line
178///
179/// # Returns
180///
181/// A vector of strings, each representing a wrapped line within the given width.
182///
183/// # Example
184///
185/// ```
186/// use runefix_core::split_by_width;
187///
188/// let lines = split_by_width("Hello 👋 世界!", 5);
189/// assert_eq!(lines, vec!["Hello", " 👋 ", "世界", "!"]);
190/// ```
191pub fn split_by_width(s: &str, max_width: usize) -> Vec<String> {
192 let mut result = Vec::new();
193 let mut current_line = String::new();
194 let mut current_width = 0;
195
196 for g in UnicodeSegmentation::graphemes(s, true) {
197 let width: usize = get_display_width(g);
198
199 if current_width + width > max_width && !current_line.is_empty() {
200 result.push(current_line.clone());
201 current_line.clear();
202 current_width = 0;
203 }
204
205 current_line.push_str(g);
206 current_width += width;
207 }
208
209 if !current_line.is_empty() {
210 result.push(current_line);
211 }
212
213 result
214}