1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// devela::sys::os::term::ansi::strip
//
//! Defines the [`Ansi::strip_codes`] method.
//
use crate::Ansi;
/// The state of an Ansi code for [`Ansi::strip_codes`].
#[derive(Clone, Copy, PartialEq)]
enum State {
Normal, // Copying text to output
Escape, // Saw ESC, determining sequence type
Csi, // In CSI sequence (ESC[)
Osc, // In OSC sequence (ESC])
}
impl Ansi {
/// Removes ANSI escape sequences from a byte slice and writes the clean text to a buffer.
///
/// This function processes well-formed ANSI escape sequences as defined by ECMA-48,
/// including CSI (Control Sequence Introducer) sequences like `ESC [ ... m` and
/// OSC (Operating System Command) sequences like `ESC ] ... BEL`.
///
/// # Behavior
///
/// - **Strips valid sequences**: Complete, well-formed ANSI sequences are removed.
/// - **Preserves malformed content**: Invalid sequences or partial sequences are
/// passed through as-is to avoid data loss.
/// - **Handles partial buffers**: Works correctly if buffers end mid-sequence.
/// - **No allocations**: Uses only the provided buffers.
///
/// # Supported Sequences
///
/// - CSI sequences: `ESC [ P... I... F` where:
/// - `P...` = parameters (digits, semicolons)
/// - `I...` = intermediate bytes (rare)
/// - `F` = final byte (command: 0x40-0x7E)
/// - OSC sequences: `ESC ] ... (BEL or ESC \)`
/// - Single-character escapes (stripped)
///
/// # Returns
/// The number of bytes written to `dest`. This will always be ≤ the original text length.
///
/// # Example
/// ```
/// # use devela::Ansi;
/// let input = b"Hello \x1B[32mWorld\x1B[0m!";
/// let mut output = [0u8; 20];
/// let len = Ansi::strip_codes(input, &mut output);
/// assert_eq!(&output[0..len], b"Hello World!");
/// ```
///
/// # Notes
/// - Malformed sequences (e.g., `ESC` in middle of parameters) are preserved as text.
/// - Lone `ESC` bytes (0x1B) without valid sequence syntax are stripped.
/// - If `dest` is too small, output is truncated (no panic).
pub const fn strip_codes(src: &[u8], dest: &mut [u8]) -> usize {
let mut src_pos = 0;
let mut dest_pos = 0;
let mut state = State::Normal;
while src_pos < src.len() && dest_pos < dest.len() {
let byte = src[src_pos];
match state {
State::Normal => {
if byte == 0x1B {
// ESC character
state = State::Escape;
} else {
dest[dest_pos] = byte;
dest_pos += 1;
}
src_pos += 1;
}
State::Escape => {
if byte == b'[' {
state = State::Csi;
src_pos += 1;
} else if byte == b']' {
state = State::Osc;
src_pos += 1;
} else if (byte >= b'0' && byte <= b'9') || byte == b';' {
// Parameter in progress, keep consuming
src_pos += 1;
} else {
// Single-character escape or invalid sequence.
// Strip the ESC and process current byte.
state = State::Normal;
}
}
State::Csi => {
// Consume until command character
if byte >= 0x40 && byte <= 0x7E {
// Command character
state = State::Normal;
}
src_pos += 1;
}
State::Osc => {
// OSC sequences end with BEL or ESC\
if byte == 0x07 {
// BEL
state = State::Normal;
} else if byte == 0x1B && src_pos + 1 < src.len() && src[src_pos + 1] == b'\\' {
state = State::Normal;
src_pos += 1; // Skip the backslash
}
src_pos += 1;
}
}
}
dest_pos
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_codes() {
// Test 1: Basic sequence
let input = b"Hello \x1B[32mWorld\x1B[0m!";
let mut output = [0u8; 50];
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"Hello World!");
// Test 2: Consecutive escape sequences
let input = b"\x1B[1m\x1B[32mBold Green\x1B[0mText";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"Bold GreenText");
// Test 3: CSI sequences with parameters
let input = b"Normal\x1B[1;33;44mText\x1B[0mEnd";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"NormalTextEnd");
// Test 4: OSC sequences (less common but important)
let input = b"Start\x1B]0;Window Title\x07End";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"StartEnd");
// Test 5: Incomplete escape sequence at end
let input = b"Text\x1B[";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"Text");
// Test 6: Real ESC character (0x1B) in normal text (should be stripped)
let input = b"Has\x1B ESC char";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"Has ESC char");
// Test 7: Empty input
let input = b"";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"");
// Test 8: Only escape sequences
let input = b"\x1B[1m\x1B[32m\x1B[0m";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"");
// Test 9: Mixed content with various sequence types
let input = b"Start\x1B[32mGreen\x1B]0;Title\x07\x1B[0mEnd";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"StartGreenEnd");
// Test 10: Buffer too small for output
let input = b"Hello World";
let mut small_output = [0u8; 5];
let len = Ansi::strip_codes(input, &mut small_output);
assert_eq!(&small_output[0..len], b"Hello");
// Test 11: Escape sequences that look like text
let input = b"Text\x1B[0;1;mMore[Text]";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"TextMore[Text]");
// Test 12: Binary data with ESC characters
let input = &[b'A', 0x1B, b'[', b'1', b'm', b'B', 0x00, b'C'];
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], &[b'A', b'B', 0x00, b'C']);
// Test 13: Multiple ESC characters in a row
let input = b"\x1B\x1B\x1B[32mText";
let len = Ansi::strip_codes(input, &mut output);
assert_eq!(&output[0..len], b"Text");
}
}