runefix_core/
grapheme.rs

1use unicode_segmentation::UnicodeSegmentation;
2use crate::width::get_display_width;
3
4/// Returns the display width (in columns) of each grapheme cluster in the input string.
5///
6/// This function segments the input string into Unicode grapheme clusters and computes
7/// the display width of each one individually. It is useful for scenarios like monospace
8/// text layout, visual alignment, or rendering terminals where East Asian characters
9/// and emoji take more than one column.
10///
11/// # Arguments
12///
13/// * `s` - The input string to analyze
14///
15/// # Returns
16///
17/// A vector of display widths (`usize`) for each grapheme cluster in order.
18///
19/// # Example
20///
21/// ```
22/// use runefix_core::display_widths;
23///
24/// let widths = display_widths("Hi,世界");
25/// assert_eq!(widths, vec![1, 1, 2, 2, 2]);
26/// ```
27pub fn display_widths(s: &str) -> Vec<usize> {
28    UnicodeSegmentation::graphemes(s, true)
29        .map(|g| get_display_width(g))
30        .collect()
31}
32
33/// Returns the total display width (in columns) of a string, based on grapheme clusters.
34///
35/// This function segments the input string into Unicode grapheme clusters and sums
36/// the display width of each one using [`display_width`]. The result reflects
37/// how much horizontal space the entire string occupies in a monospace terminal,
38/// accounting for wide characters such as CJK ideographs and emoji.
39///
40/// # Arguments
41///
42/// * `s` - The input string to measure
43///
44/// # Returns
45///
46/// The total display width of the string in terminal columns.
47///
48/// # Example
49///
50/// ```
51/// use runefix_core::display_width;
52///
53/// let width = display_width("Hi,世界");
54/// assert_eq!(width, 8); // 1 + 1 + 2 + 2 + 2
55/// ```
56pub fn display_width(s: &str) -> usize {
57    display_widths(s).iter().sum()
58}
59
60/// Returns all grapheme clusters in the input string as a vector of string slices.
61///
62/// This function uses Unicode text segmentation to split the input into grapheme clusters,
63/// which represent user-perceived characters. It correctly preserves multi-codepoint
64/// graphemes such as emoji with skin tone modifiers, ZWJ sequences, and composed Hangul.
65///
66/// Useful when you need to iterate over characters in a way that aligns with human perception.
67///
68/// # Arguments
69///
70/// * `s` - The input string to split
71///
72/// # Returns
73///
74/// A vector of `&str`, each representing a single grapheme cluster in order.
75///
76/// # Example
77///
78/// ```
79/// use runefix_core::split_graphemes;
80///
81/// let clusters = split_graphemes("Love👩‍❤️‍💋‍👨爱");
82/// assert_eq!(clusters, vec!["L", "o", "v", "e", "👩‍❤️‍💋‍👨", "爱"]);
83/// ```
84pub fn split_graphemes(s: &str) -> Vec<&str> {
85    UnicodeSegmentation::graphemes(s, true).collect()
86}
87
88/// Returns the display width of each grapheme cluster in the input string.
89///
90/// This function splits the string into Unicode grapheme clusters and pairs
91/// each one with its terminal display width (in columns). This is useful for
92/// visually aligned rendering, layout calculation, and Unicode debugging,
93/// especially with complex emoji or East Asian characters.
94///
95/// # Arguments
96///
97/// * `s` - The input string to analyze
98///
99/// # Returns
100///
101/// A vector of tuples, where each item is a grapheme cluster and its
102/// corresponding display width: `(&str, usize)`
103///
104/// # Example
105///
106/// ```
107/// use runefix_core::grapheme_widths;
108/// 
109/// let result = grapheme_widths("Hi,世界");
110/// assert_eq!(
111///     result,
112///     vec![("H", 1), ("i", 1), (",", 2), ("世", 2), ("界", 2)]
113/// );
114/// ```
115pub fn grapheme_widths(s: &str) -> Vec<(&str, usize)> {
116    UnicodeSegmentation::graphemes(s, true)
117        .map(|g| {
118            let width = get_display_width(g);
119            (g, width)
120        })
121        .collect()
122}
123
124/// Truncates a string by display width while preserving grapheme cluster boundaries.
125///
126/// This function ensures that wide characters such as emoji or CJK ideographs are
127/// never split in the middle. It safely cuts off the string so that its total
128/// display width does not exceed the given `max_width`, making it ideal for
129/// terminal or TUI rendering.
130///
131/// # Arguments
132///
133/// * `s` - The input string to truncate
134/// * `max_width` - Maximum allowed display width in terminal columns
135///
136/// # Returns
137///
138/// A string slice that fits within the specified display width without cutting graphemes.
139///
140/// # Example
141///
142/// ```
143/// use runefix_core::truncate_by_width;
144///
145/// let s = "Hi 👋,世界";
146/// let short = truncate_by_width(s, 6);
147/// assert_eq!(short, "Hi 👋");
148/// ```
149pub fn truncate_by_width(s: &str, max_width: usize) -> &str {
150    let mut total_width = 0;
151    let mut end_byte = 0;
152
153    for g in UnicodeSegmentation::graphemes(s, true) {
154        let w = get_display_width(g);
155
156        if total_width + w > max_width {
157            break;
158        }
159
160        total_width += w;
161        end_byte += g.len(); // Byte offset to cut safely
162    }
163
164    &s[..end_byte]
165}
166
167/// Splits a string into lines based on display width, preserving grapheme boundaries.
168///
169/// This function ensures that wide characters such as emoji, CJK ideographs, or
170/// fullwidth punctuation are not split mid-grapheme. It breaks the input string
171/// into a sequence of lines, each with a total display width that does not exceed
172/// the given `max_width`. Ideal for terminal word wrapping and monospace layout.
173///
174/// # Arguments
175///
176/// * `s` - The input string to wrap
177/// * `max_width` - Maximum display width (in columns) for each line
178///
179/// # Returns
180///
181/// A vector of strings, each representing a wrapped line within the given width.
182///
183/// # Example
184///
185/// ```
186/// use runefix_core::split_by_width;
187///
188/// let lines = split_by_width("Hello 👋 世界!", 5);
189/// assert_eq!(lines, vec!["Hello", " 👋 ", "世界", "!"]);
190/// ```
191pub fn split_by_width(s: &str, max_width: usize) -> Vec<String> {
192    let mut result = Vec::new();
193    let mut current_line = String::new();
194    let mut current_width = 0;
195
196    for g in UnicodeSegmentation::graphemes(s, true) {
197        let width: usize = get_display_width(g);
198
199        if current_width + width > max_width && !current_line.is_empty() {
200            result.push(current_line.clone());
201            current_line.clear();
202            current_width = 0;
203        }
204
205        current_line.push_str(g);
206        current_width += width;
207    }
208
209    if !current_line.is_empty() {
210        result.push(current_line);
211    }
212
213    result
214}