Skip to main content

ratatui_core/buffer/
cell_width.rs

1use unicode_width::UnicodeWidthStr;
2
3/// Halfwidth Katakana Voiced Sound Mark (dakuten).
4const HALFWIDTH_KATAKANA_VOICED_SOUND_MARK: char = '\u{FF9E}';
5/// Halfwidth Katakana Semi-Voiced Sound Mark (handakuten).
6const HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK: char = '\u{FF9F}';
7
8/// Returns the display width of a value in terminal cells.
9///
10/// This trait provides a unified way to compute cell widths for both string content
11/// and [`Cell`](super::Cell)s:
12///
13/// - **`str`**: width is derived from [`UnicodeWidthStr`], with a fast path for single-byte ASCII
14///   characters and a terminal-compatibility adjustment for halfwidth katakana dakuten/handakuten
15///   (`U+FF9E`/`U+FF9F`).
16/// - **[`Cell`](super::Cell)**: returns the
17///   [`CellDiffOption::ForcedWidth`](super::CellDiffOption::ForcedWidth) when set, otherwise falls
18///   back to the width of the cell's symbol.
19pub trait CellWidth {
20    /// Returns the display width in terminal cells.
21    fn cell_width(&self) -> u16;
22}
23
24impl CellWidth for str {
25    /// Returns the display width in terminal cells.
26    ///
27    /// ## Note
28    ///
29    /// Control characters are filtered out by `Span::styled_graphemes()` and
30    /// `Buffer::set_stringn()` before reaching this point. `Cell::set_symbol()`
31    /// and `set_char()` do not filter, but those are low-level APIs where the
32    /// caller is responsible for providing valid content. Single-byte control
33    /// characters that slip through will be reported as width 1.
34    fn cell_width(&self) -> u16 {
35        if self.len() == 1 {
36            debug_assert!(
37                !self.as_bytes()[0].is_ascii_control(),
38                "control character passed to cell_width without filtering"
39            );
40            1
41        } else {
42            let width = self.width() as u16;
43            width.saturating_add(count_halfwidth_sound_marks(self))
44        }
45    }
46}
47
48/// Returns how many halfwidth dakuten/handakuten marks are present.
49///
50/// `unicode-width` reports U+FF9E (゙) and U+FF9F (゚) as zero-width because
51/// they have the `Grapheme_Extend` property, but terminals typically render
52/// them as independent halfwidth characters occupying one cell each.
53///
54/// We compensate for that terminal behavior by adding `+1` for each occurrence.
55/// This does not affect the combining variants U+3099 and U+309A, which keep
56/// their normal combining behavior and width handling through `unicode-width`.
57///
58/// # References
59///
60/// - Ruby reline PR [#832](https://github.com/ruby/reline/pull/832): Fix cursor positioning for
61///   invalid halfwidth dakuten/handakuten
62/// - Microsoft Terminal Issue [#18087](https://github.com/microsoft/terminal/issues/18087):
63///   Half-width Katakana and (han)dakuten should not overlap
64/// - [Unicode L2/19-039](https://www.unicode.org/L2/L2019/19039-grapheme-break.pdf): Grapheme break
65///   property for U+FF9E and U+FF9F
66fn count_halfwidth_sound_marks(s: &str) -> u16 {
67    s.chars()
68        .filter(|c| {
69            matches!(
70                *c,
71                HALFWIDTH_KATAKANA_VOICED_SOUND_MARK | HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK
72            )
73        })
74        .count() as u16
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    fn width(s: &str) -> u16 {
82        s.cell_width()
83    }
84
85    fn width_char(c: char) -> u16 {
86        let mut buf = [0; 4];
87        width(c.encode_utf8(&mut buf))
88    }
89
90    #[test]
91    fn wide_char() {
92        assert_eq!("あ".cell_width(), 2);
93    }
94
95    #[test]
96    fn empty() {
97        assert_eq!("".cell_width(), 0);
98    }
99
100    #[test]
101    fn halfwidth_dakuten_alone() {
102        assert_eq!(width_char(HALFWIDTH_KATAKANA_VOICED_SOUND_MARK), 1); // ゙
103    }
104
105    #[test]
106    fn halfwidth_handakuten_alone() {
107        assert_eq!(width_char(HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK), 1); // ゚
108    }
109
110    #[test]
111    fn halfwidth_katakana_with_dakuten() {
112        // Valid combinations (halfwidth katakana + non-combining dakuten)
113        assert_eq!(width("ガ"), 2); // U+FF76 + U+FF9E
114        assert_eq!(width("ザ"), 2); // U+FF7B + U+FF9E
115    }
116
117    #[test]
118    fn halfwidth_katakana_with_handakuten() {
119        // Valid combinations (halfwidth katakana + non-combining handakuten)
120        assert_eq!(width("パ"), 2); // U+FF8A + U+FF9F
121        assert_eq!(width("ピ"), 2); // U+FF8B + U+FF9F
122    }
123
124    #[test]
125    fn non_katakana_with_halfwidth_dakuten() {
126        // Non-katakana characters + halfwidth dakuten.
127        // These form valid grapheme clusters but are linguistically incorrect.
128        // The dakuten still takes 1 column width regardless.
129        assert_eq!(width("a゙"), 2); // ASCII (1) + dakuten (1)
130        assert_eq!(width("1゚"), 2); // Digit (1) + handakuten (1)
131        assert_eq!(width("あ゙"), 3); // Hiragana (2) + dakuten (1)
132        assert_eq!(width("紅゙"), 3); // Kanji (2) + dakuten (1)
133    }
134
135    #[test]
136    #[allow(clippy::unicode_not_nfc)]
137    fn combining_dakuten_no_special_handling() {
138        // Combining dakuten (U+3099) should follow unicode-width behavior.
139        assert_eq!(width("ガ"), 1); // U+FF76 + U+3099
140        assert_eq!(width("ガ"), 2); // U+30AB + U+3099
141    }
142
143    #[test]
144    #[allow(clippy::unicode_not_nfc)]
145    fn combining_handakuten_no_special_handling() {
146        // Combining handakuten (U+309A) should follow unicode-width behavior.
147        assert_eq!(width("パ"), 1); // U+FF8A + U+309A
148        assert_eq!(width("パ"), 2); // U+30CF + U+309A
149    }
150
151    #[test]
152    fn mixed_text_unchanged() {
153        assert_eq!(width("a"), 1);
154        assert_eq!(width("あ"), 2);
155        assert_eq!(width("カ"), 1);
156        assert_eq!(width("カ"), 2);
157        assert_eq!(width("aガb"), 4); // a(1) + ガ(2) + b(1)
158        assert_eq!(width("あガ"), 4); // あ(2) + ガ(2)
159    }
160}