ratatui_core/buffer/cell_width.rs
1use unicode_width::UnicodeWidthStr;
2
3/// Halfwidth Katakana Voiced Sound Mark (dakuten).
4const HALFWIDTH_KATAKANA_VOICED_SOUND_MARK: char = '\u{FF9E}';
5/// Halfwidth Katakana Semi-Voiced Sound Mark (handakuten).
6const HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK: char = '\u{FF9F}';
7
8/// Returns the display width of a value in terminal cells.
9///
10/// This trait provides a unified way to compute cell widths for both string content
11/// and [`Cell`](super::Cell)s:
12///
13/// - **`str`**: width is derived from [`UnicodeWidthStr`], with a fast path for single-byte ASCII
14/// characters and a terminal-compatibility adjustment for halfwidth katakana dakuten/handakuten
15/// (`U+FF9E`/`U+FF9F`).
16/// - **[`Cell`](super::Cell)**: returns the
17/// [`CellDiffOption::ForcedWidth`](super::CellDiffOption::ForcedWidth) when set, otherwise falls
18/// back to the width of the cell's symbol.
19pub trait CellWidth {
20 /// Returns the display width in terminal cells.
21 fn cell_width(&self) -> u16;
22}
23
24impl CellWidth for str {
25 /// Returns the display width in terminal cells.
26 ///
27 /// ## Note
28 ///
29 /// Control characters are filtered out by `Span::styled_graphemes()` and
30 /// `Buffer::set_stringn()` before reaching this point. `Cell::set_symbol()`
31 /// and `set_char()` do not filter, but those are low-level APIs where the
32 /// caller is responsible for providing valid content. Single-byte control
33 /// characters that slip through will be reported as width 1.
34 fn cell_width(&self) -> u16 {
35 if self.len() == 1 {
36 debug_assert!(
37 !self.as_bytes()[0].is_ascii_control(),
38 "control character passed to cell_width without filtering"
39 );
40 1
41 } else {
42 let width = self.width() as u16;
43 width.saturating_add(count_halfwidth_sound_marks(self))
44 }
45 }
46}
47
48/// Returns how many halfwidth dakuten/handakuten marks are present.
49///
50/// `unicode-width` reports U+FF9E (゙) and U+FF9F (゚) as zero-width because
51/// they have the `Grapheme_Extend` property, but terminals typically render
52/// them as independent halfwidth characters occupying one cell each.
53///
54/// We compensate for that terminal behavior by adding `+1` for each occurrence.
55/// This does not affect the combining variants U+3099 and U+309A, which keep
56/// their normal combining behavior and width handling through `unicode-width`.
57///
58/// # References
59///
60/// - Ruby reline PR [#832](https://github.com/ruby/reline/pull/832): Fix cursor positioning for
61/// invalid halfwidth dakuten/handakuten
62/// - Microsoft Terminal Issue [#18087](https://github.com/microsoft/terminal/issues/18087):
63/// Half-width Katakana and (han)dakuten should not overlap
64/// - [Unicode L2/19-039](https://www.unicode.org/L2/L2019/19039-grapheme-break.pdf): Grapheme break
65/// property for U+FF9E and U+FF9F
66fn count_halfwidth_sound_marks(s: &str) -> u16 {
67 s.chars()
68 .filter(|c| {
69 matches!(
70 *c,
71 HALFWIDTH_KATAKANA_VOICED_SOUND_MARK | HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK
72 )
73 })
74 .count() as u16
75}
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80
81 fn width(s: &str) -> u16 {
82 s.cell_width()
83 }
84
85 fn width_char(c: char) -> u16 {
86 let mut buf = [0; 4];
87 width(c.encode_utf8(&mut buf))
88 }
89
90 #[test]
91 fn wide_char() {
92 assert_eq!("あ".cell_width(), 2);
93 }
94
95 #[test]
96 fn empty() {
97 assert_eq!("".cell_width(), 0);
98 }
99
100 #[test]
101 fn halfwidth_dakuten_alone() {
102 assert_eq!(width_char(HALFWIDTH_KATAKANA_VOICED_SOUND_MARK), 1); // ゙
103 }
104
105 #[test]
106 fn halfwidth_handakuten_alone() {
107 assert_eq!(width_char(HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK), 1); // ゚
108 }
109
110 #[test]
111 fn halfwidth_katakana_with_dakuten() {
112 // Valid combinations (halfwidth katakana + non-combining dakuten)
113 assert_eq!(width("ガ"), 2); // U+FF76 + U+FF9E
114 assert_eq!(width("ザ"), 2); // U+FF7B + U+FF9E
115 }
116
117 #[test]
118 fn halfwidth_katakana_with_handakuten() {
119 // Valid combinations (halfwidth katakana + non-combining handakuten)
120 assert_eq!(width("パ"), 2); // U+FF8A + U+FF9F
121 assert_eq!(width("ピ"), 2); // U+FF8B + U+FF9F
122 }
123
124 #[test]
125 fn non_katakana_with_halfwidth_dakuten() {
126 // Non-katakana characters + halfwidth dakuten.
127 // These form valid grapheme clusters but are linguistically incorrect.
128 // The dakuten still takes 1 column width regardless.
129 assert_eq!(width("a゙"), 2); // ASCII (1) + dakuten (1)
130 assert_eq!(width("1゚"), 2); // Digit (1) + handakuten (1)
131 assert_eq!(width("あ゙"), 3); // Hiragana (2) + dakuten (1)
132 assert_eq!(width("紅゙"), 3); // Kanji (2) + dakuten (1)
133 }
134
135 #[test]
136 #[allow(clippy::unicode_not_nfc)]
137 fn combining_dakuten_no_special_handling() {
138 // Combining dakuten (U+3099) should follow unicode-width behavior.
139 assert_eq!(width("ガ"), 1); // U+FF76 + U+3099
140 assert_eq!(width("ガ"), 2); // U+30AB + U+3099
141 }
142
143 #[test]
144 #[allow(clippy::unicode_not_nfc)]
145 fn combining_handakuten_no_special_handling() {
146 // Combining handakuten (U+309A) should follow unicode-width behavior.
147 assert_eq!(width("パ"), 1); // U+FF8A + U+309A
148 assert_eq!(width("パ"), 2); // U+30CF + U+309A
149 }
150
151 #[test]
152 fn mixed_text_unchanged() {
153 assert_eq!(width("a"), 1);
154 assert_eq!(width("あ"), 2);
155 assert_eq!(width("カ"), 1);
156 assert_eq!(width("カ"), 2);
157 assert_eq!(width("aガb"), 4); // a(1) + ガ(2) + b(1)
158 assert_eq!(width("あガ"), 4); // あ(2) + ガ(2)
159 }
160}