1use unicode_segmentation::UnicodeSegmentation;
2use unicode_width::UnicodeWidthStr;
3
4#[derive(Debug, Clone, PartialEq, Eq)]
5pub enum Cell {
6 Char { ch: char, width: u8 },
7 Continuation,
8 Empty,
9}
10
11#[derive(Debug, Clone)]
12pub struct RenderOpts {
13 pub tab_width: u8,
14 pub wrap: bool,
15 pub cols: u16,
16}
17
18impl Default for RenderOpts {
19 fn default() -> Self {
20 Self { tab_width: 8, wrap: true, cols: 80 }
21 }
22}
23
24fn decode_cluster(bytes: &[u8], i: usize) -> Option<(&str, usize)> {
28 let max = (i + 4).min(bytes.len());
36 let mut end = i;
37 for try_end in (i + 1)..=max {
38 if std::str::from_utf8(&bytes[i..try_end]).is_ok() {
39 end = try_end;
40 break;
41 }
42 }
43 if end == i {
44 return None;
45 }
46
47 let mut probe_end = end;
52 loop {
53 let probe_max = (probe_end + 4).min(bytes.len());
55 let mut next_end = probe_end;
56 for try_end in (probe_end + 1)..=probe_max {
57 if std::str::from_utf8(&bytes[i..try_end]).is_ok() {
58 next_end = try_end;
59 break;
60 }
61 }
62 if next_end == probe_end {
63 break;
64 }
65 let candidate = std::str::from_utf8(&bytes[i..next_end]).unwrap();
66 let cluster_count = candidate.graphemes(true).count();
67 if cluster_count > 1 {
68 break;
70 }
71 probe_end = next_end;
72 }
73
74 Some((std::str::from_utf8(&bytes[i..probe_end]).unwrap(), probe_end - i))
75}
76
77pub fn render_line(bytes: &[u8], opts: &RenderOpts) -> Vec<Vec<Cell>> {
78 let cols = opts.cols as usize;
79 let mut rows: Vec<Vec<Cell>> = Vec::new();
80 let mut current: Vec<Cell> = Vec::with_capacity(cols);
81
82 fn push(current: &mut Vec<Cell>, rows: &mut Vec<Vec<Cell>>, cell: Cell, opts: &RenderOpts) {
83 if current.len() >= opts.cols as usize {
84 if opts.wrap {
85 let mut full = std::mem::replace(current, Vec::with_capacity(opts.cols as usize));
86 while full.len() < opts.cols as usize { full.push(Cell::Empty); }
87 rows.push(full);
88 } else {
89 return;
90 }
91 }
92 current.push(cell);
93 }
94
95 fn push_str(current: &mut Vec<Cell>, rows: &mut Vec<Vec<Cell>>, s: &str, opts: &RenderOpts) {
96 for c in s.chars() {
97 push(current, rows, Cell::Char { ch: c, width: 1 }, opts);
98 }
99 }
100
101 fn push_wide(
102 current: &mut Vec<Cell>,
103 rows: &mut Vec<Vec<Cell>>,
104 ch: char,
105 width: u8,
106 opts: &RenderOpts,
107 ) {
108 let cols = opts.cols as usize;
109 if current.len() + width as usize > cols {
111 if opts.wrap {
112 let mut full = std::mem::replace(current, Vec::with_capacity(cols));
113 while full.len() < cols { full.push(Cell::Empty); }
114 rows.push(full);
115 } else {
116 return; }
118 }
119 current.push(Cell::Char { ch, width });
120 for _ in 1..width {
121 current.push(Cell::Continuation);
122 }
123 }
124
125 let mut i = 0;
126 while i < bytes.len() {
127 let b = bytes[i];
128 if b == b'\t' {
129 let stop = opts.tab_width.max(1) as usize;
130 let cur_col = current.len();
131 let next_stop = ((cur_col / stop) + 1) * stop;
132 for _ in cur_col..next_stop {
133 push(&mut current, &mut rows, Cell::Char { ch: ' ', width: 1 }, opts);
134 }
135 i += 1;
136 } else if b == b'\n' {
137 i += 1;
138 } else if b < 0x20 || b == 0x7F {
139 let printable = if b == 0x7F { '?' } else { (b ^ 0x40) as char };
140 push(&mut current, &mut rows, Cell::Char { ch: '^', width: 1 }, opts);
141 push(&mut current, &mut rows, Cell::Char { ch: printable, width: 1 }, opts);
142 i += 1;
143 } else {
144 match decode_cluster(bytes, i) {
146 Some((cluster, consumed)) => {
147 let w = UnicodeWidthStr::width(cluster) as u8;
148 let base_char = cluster.chars().next().unwrap_or('\u{FFFD}');
149 if w == 0 {
150 push(&mut current, &mut rows, Cell::Char { ch: '\u{FFFD}', width: 1 }, opts);
152 } else {
153 push_wide(&mut current, &mut rows, base_char, w, opts);
154 }
155 i += consumed;
156 }
157 None => {
158 let s = format!("<{:02X}>", b);
160 push_str(&mut current, &mut rows, &s, opts);
161 i += 1;
162 }
163 }
164 }
165 }
166
167 while current.len() < cols {
168 current.push(Cell::Empty);
169 }
170 rows.push(current);
171 rows
172}
173
174pub fn count_rows(bytes: &[u8], opts: &RenderOpts) -> usize {
175 if !opts.wrap {
176 return 1;
177 }
178 let cols = opts.cols.max(1) as usize;
179 let mut col = 0usize;
180 let mut rows = 1usize;
181
182 let bump = |w: usize, col: &mut usize, rows: &mut usize| {
183 if *col + w > cols {
184 *rows += 1;
185 *col = 0;
186 }
187 *col += w;
188 };
189
190 let mut i = 0;
191 while i < bytes.len() {
192 let b = bytes[i];
193 if b == b'\t' {
194 let stop = opts.tab_width.max(1) as usize;
195 let next_stop = ((col / stop) + 1) * stop;
196 let advance = next_stop - col;
197 for _ in 0..advance {
199 bump(1, &mut col, &mut rows);
200 }
201 i += 1;
202 } else if b == b'\n' {
203 i += 1;
204 } else if b < 0x20 || b == 0x7F {
205 bump(1, &mut col, &mut rows); bump(1, &mut col, &mut rows); i += 1;
208 } else {
209 match decode_cluster(bytes, i) {
210 Some((cluster, consumed)) => {
211 let w = UnicodeWidthStr::width(cluster);
212 let w = if w == 0 { 1 } else { w };
213 bump(w, &mut col, &mut rows);
214 i += consumed;
215 }
216 None => {
217 for _ in 0..4 { bump(1, &mut col, &mut rows); }
219 i += 1;
220 }
221 }
222 }
223 }
224 rows
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 fn opts(cols: u16, wrap: bool) -> RenderOpts {
232 RenderOpts { tab_width: 8, wrap, cols }
233 }
234
235 fn ch(c: char) -> Cell { Cell::Char { ch: c, width: 1 } }
236
237 #[test]
238 fn ascii_short_line_pads_to_cols() {
239 let rows = render_line(b"hi", &opts(5, true));
240 assert_eq!(rows.len(), 1);
241 assert_eq!(rows[0], vec![ch('h'), ch('i'), Cell::Empty, Cell::Empty, Cell::Empty]);
242 }
243
244 #[test]
245 fn ascii_exact_width() {
246 let rows = render_line(b"hello", &opts(5, true));
247 assert_eq!(rows.len(), 1);
248 assert_eq!(rows[0], vec![ch('h'), ch('e'), ch('l'), ch('l'), ch('o')]);
249 }
250
251 #[test]
252 fn empty_input_yields_one_empty_row() {
253 let rows = render_line(b"", &opts(3, true));
254 assert_eq!(rows, vec![vec![Cell::Empty, Cell::Empty, Cell::Empty]]);
255 }
256
257 #[test]
258 fn tab_at_col_zero_expands_to_eight() {
259 let rows = render_line(b"\tx", &opts(20, true));
260 for (i, cell) in rows[0].iter().take(8).enumerate() {
262 assert_eq!(*cell, ch(' '), "col {i} should be space");
263 }
264 assert_eq!(rows[0][8], ch('x'));
265 }
266
267 #[test]
268 fn tab_at_col_three_advances_to_next_stop() {
269 let rows = render_line(b"abc\tx", &opts(20, true));
271 assert_eq!(rows[0][0], ch('a'));
272 assert_eq!(rows[0][2], ch('c'));
273 for cell in rows[0].iter().skip(3).take(5) {
274 assert_eq!(*cell, ch(' '));
275 }
276 assert_eq!(rows[0][8], ch('x'));
277 }
278
279 #[test]
280 fn tab_at_col_eight_advances_to_sixteen() {
281 let mut input = vec![b'a'; 8];
282 input.push(b'\t');
283 input.push(b'x');
284 let rows = render_line(&input, &opts(20, true));
285 for cell in rows[0].iter().skip(8).take(8) {
286 assert_eq!(*cell, ch(' '));
287 }
288 assert_eq!(rows[0][16], ch('x'));
289 }
290
291 #[test]
292 fn null_renders_as_caret_at() {
293 let rows = render_line(b"\0", &opts(5, true));
294 assert_eq!(rows[0][0], ch('^'));
295 assert_eq!(rows[0][1], ch('@'));
296 }
297
298 #[test]
299 fn esc_renders_as_caret_lbracket() {
300 let rows = render_line(b"\x1b", &opts(5, true));
301 assert_eq!(rows[0][0], ch('^'));
302 assert_eq!(rows[0][1], ch('['));
303 }
304
305 #[test]
306 fn del_renders_as_caret_question() {
307 let rows = render_line(b"\x7f", &opts(5, true));
308 assert_eq!(rows[0][0], ch('^'));
309 assert_eq!(rows[0][1], ch('?'));
310 }
311
312 #[test]
313 fn invalid_utf8_byte_renders_as_angle_hex() {
314 let rows = render_line(&[0xFF], &opts(8, true));
315 assert_eq!(rows[0][0], ch('<'));
316 assert_eq!(rows[0][1], ch('F'));
317 assert_eq!(rows[0][2], ch('F'));
318 assert_eq!(rows[0][3], ch('>'));
319 }
320
321 #[test]
322 fn partial_multibyte_each_byte_renders_separately() {
323 let rows = render_line(&[0xC3], &opts(8, true));
325 assert_eq!(rows[0][0], ch('<'));
326 assert_eq!(rows[0][1], ch('C'));
327 assert_eq!(rows[0][2], ch('3'));
328 assert_eq!(rows[0][3], ch('>'));
329 }
330
331 #[test]
332 fn single_byte_utf8_e_acute() {
333 let rows = render_line("é".as_bytes(), &opts(5, true));
334 assert_eq!(rows[0][0], Cell::Char { ch: 'é', width: 1 });
335 }
336
337 #[test]
338 fn cjk_char_takes_two_columns() {
339 let rows = render_line("日".as_bytes(), &opts(5, true));
341 assert_eq!(rows[0][0], Cell::Char { ch: '日', width: 2 });
342 assert_eq!(rows[0][1], Cell::Continuation);
343 assert_eq!(rows[0][2], Cell::Empty);
344 }
345
346 #[test]
347 fn emoji_takes_two_columns() {
348 let rows = render_line("🦀".as_bytes(), &opts(5, true));
349 assert!(matches!(rows[0][0], Cell::Char { width: 2, .. }));
351 assert_eq!(rows[0][1], Cell::Continuation);
352 }
353
354 #[test]
355 fn combining_mark_folds_into_prior_cell() {
356 let rows = render_line("e\u{0301}".as_bytes(), &opts(5, true));
358 assert!(matches!(rows[0][0], Cell::Char { width: 1, .. }));
360 assert_eq!(rows[0][1], Cell::Empty);
361 }
362
363 #[test]
364 fn wrap_long_line_into_multiple_rows() {
365 let rows = render_line(b"abcdefghij", &opts(4, true));
366 assert_eq!(rows.len(), 3);
367 assert_eq!(rows[0], vec![ch('a'), ch('b'), ch('c'), ch('d')]);
368 assert_eq!(rows[1], vec![ch('e'), ch('f'), ch('g'), ch('h')]);
369 assert_eq!(rows[2], vec![ch('i'), ch('j'), Cell::Empty, Cell::Empty]);
370 }
371
372 #[test]
373 fn chop_long_line_truncates() {
374 let rows = render_line(b"abcdefghij", &opts(4, false));
375 assert_eq!(rows.len(), 1);
376 assert_eq!(rows[0], vec![ch('a'), ch('b'), ch('c'), ch('d')]);
377 }
378
379 #[test]
380 fn wide_char_at_boundary_pushed_to_next_row() {
381 let rows = render_line("ab日".as_bytes(), &opts(3, true));
384 assert_eq!(rows.len(), 2);
385 assert_eq!(rows[0], vec![ch('a'), ch('b'), Cell::Empty]);
386 assert_eq!(rows[1][0], Cell::Char { ch: '日', width: 2 });
387 assert_eq!(rows[1][1], Cell::Continuation);
388 assert_eq!(rows[1][2], Cell::Empty);
389 }
390
391 #[test]
392 fn count_rows_matches_render_line_for_short() {
393 let o = opts(80, true);
394 let bytes = b"hello world";
395 assert_eq!(count_rows(bytes, &o), render_line(bytes, &o).len());
396 }
397
398 #[test]
399 fn count_rows_matches_render_line_for_long_wrap() {
400 let o = opts(4, true);
401 let bytes = b"abcdefghij";
402 assert_eq!(count_rows(bytes, &o), render_line(bytes, &o).len());
403 }
404
405 #[test]
406 fn count_rows_chop_is_one() {
407 let o = opts(4, false);
408 let bytes = b"abcdefghij";
409 assert_eq!(count_rows(bytes, &o), 1);
410 }
411
412 #[test]
413 fn count_rows_handles_wide_char() {
414 let o = opts(3, true);
415 let bytes = "ab日".as_bytes();
416 assert_eq!(count_rows(bytes, &o), render_line(bytes, &o).len());
417 }
418}