ansi_width/
lib.rs

1#![doc = include_str!("../README.md")]
2
3/// Character that starts escape codes
4const ESC: char = '\x1b';
5
6/// Calculate the width of a string.
7///
8/// See the [crate documentation](crate) for more information.
9pub fn ansi_width(s: &str) -> usize {
10    let mut width = 0;
11    let mut chars = s.chars();
12
13    // This lint is a false positive, because we use the iterator later, leading to
14    // ownership issues if we follow the lint.
15    #[allow(clippy::while_let_on_iterator)]
16    while let Some(c) = chars.next() {
17        // ESC starts escape sequences, so we need to take characters until the
18        // end of the escape sequence.
19        if c == ESC {
20            let Some(c) = chars.next() else {
21                break;
22            };
23            match c {
24                // String terminator character: ends other sequences
25                // We probably won't encounter this but it's here for completeness.
26                // Or for if we get passed invalid codes.
27                '\\' => {
28                    // ignore
29                }
30                // Control Sequence Introducer: continue until `\x40-\x7C`
31                '[' => while !matches!(chars.next(), Some('\x40'..='\x7C') | None) {},
32                // Operating System Command: continue until ST
33                ']' => {
34                    let mut last = c;
35                    while let Some(new) = chars.next() {
36                        if new == '\x07' || (new == '\\' && last == ESC) {
37                            break;
38                        }
39                        last = new;
40                    }
41                }
42                // We don't know what character it is, best bet is to fall back to unicode width
43                // The ESC is assumed to have 0 width in this case.
44                _ => {
45                    width += unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
46                }
47            }
48        } else {
49            // If it's a normal character outside an escape sequence, use the
50            // unicode width.
51            width += unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
52        }
53    }
54    width
55}
56
57#[cfg(test)]
58mod tests {
59    use super::ansi_width;
60
61    #[test]
62    fn ascii() {
63        assert_eq!(ansi_width(""), 0);
64        assert_eq!(ansi_width("hello"), 5);
65        assert_eq!(ansi_width("hello world"), 11);
66        assert_eq!(ansi_width("WOW!"), 4);
67    }
68
69    #[test]
70    fn c0_characters() {
71        // Bell
72        assert_eq!(ansi_width("\x07"), 0);
73
74        // Backspace
75        assert_eq!(ansi_width("\x08"), 0);
76
77        // Tab
78        assert_eq!(ansi_width("\t"), 0);
79    }
80
81    #[test]
82    fn some_escape_codes() {
83        // Simple
84        assert_eq!(ansi_width("\u{1b}[34mHello\u{1b}[0m"), 5);
85        // Red
86        assert_eq!(ansi_width("\u{1b}[31mRed\u{1b}[0m"), 3);
87    }
88
89    #[test]
90    fn hyperlink() {
91        assert_eq!(
92            ansi_width("\x1b]8;;http://example.com\x1b\\This is a link\x1b]8;;\x1b\\"),
93            14
94        )
95    }
96
97    #[test]
98    fn nonstandard_hyperlink() {
99        // This hyperlink has a BEL character in the middle instead of `\x1b\\`
100        assert_eq!(
101            ansi_width("\x1b]8;;file://coreutils.md\x07coreutils.md\x1b]8;;\x07"),
102            12
103        )
104    }
105}