atomcode_tuix/render/cell.rs
1// crates/atomcode-tuix/src/render/cell.rs
2//
3// Ink-style cell buffer for footer/menu rendering.
4//
5// The row-level diff we had before was correct but coarse: any byte change
6// in a row triggered a full-row re-emit. Combined with UTF-8 rule characters
7// (`─` is 3 bytes × 200 cols × 2 rules = 1254 bytes of rule per redraw) and
8// footer-height oscillation when the slash palette opens/closes, every
9// menu toggle pushed 1800+ bytes to Mac Terminal.app's GUI pipeline — the
10// threshold where its coalesce + repaint latency becomes user-visible.
11//
12// Ink (Claude Code's renderer) works on cells: (char, style) pairs indexed
13// by absolute terminal position. New frame → diff cell-by-cell → emit
14// minimal patches. A row whose status stayed "glm-5 · ~/project" across
15// frames contributes zero bytes. Rule middles stay identical after a
16// single-column input change → zero bytes. This module gives us that
17// primitive.
18//
19// Scope: footer + slash palette only. Body content (streaming text, tool
20// output) keeps the pure-append path — body lines enter scrollback and
21// never need a diff cycle.
22
23use crossterm::style::{Color, SetForegroundColor};
24use std::io::Write as _;
25
26/// Visual attributes that can vary per cell in our footer. Kept minimal
27/// on purpose: footer uses fg color, bold, and reverse-video
28/// (for the palette's selected row). Extending this to bg / underline
29/// / italic is a future concern — adding fields is the mechanical part,
30/// but every field widens the diff equality surface and the SGR state
31/// machine's emit path, so we don't preemptively carry what we don't use.
32#[derive(Debug, Clone, Default, PartialEq, Eq)]
33pub struct CellStyle {
34 /// Foreground colour via crossterm SGR. `None` = terminal default
35 /// foreground (emitted as `\x1b[39m` by the serialiser).
36 pub fg: Option<Color>,
37 /// SGR bold (`\x1b[1m` / `\x1b[22m`).
38 pub bold: bool,
39 /// SGR reverse video (`\x1b[7m` / `\x1b[27m`). Used for the
40 /// highlighted menu row.
41 pub reverse: bool,
42 /// SGR faint / decreased intensity (`\x1b[2m`). Renders the current
43 /// fg at ~50% intensity — terminal-theme-aware muting that adapts
44 /// to both light and dark schemes (unlike a fixed DarkGrey which
45 /// vanishes on some palettes). Toggled off via SGR 22, which is the
46 /// shared "normal intensity" reset for both bold and faint, so the
47 /// transition path goes through full reset when faint→off.
48 pub faint: bool,
49}
50
51/// One screen cell: glyph + its visual attributes. Cell equality is
52/// byte-perfect — two cells are equal iff their serialised bytes
53/// would be identical, which is the invariant the diff relies on.
54///
55/// `width` is the **display width** in terminal columns: 1 for ASCII
56/// and other narrow glyphs, 2 for CJK / emoji / other wide glyphs,
57/// and 0 for **continuation cells** — placeholder cells that follow a
58/// wide glyph to keep the invariant `cell_index == terminal_column`.
59/// Without continuation cells, typing "你是谁" (3 wide chars = 6 cols)
60/// into a row model that tracked only char count (3 cells) would emit
61/// patches at model cols 5/6/7 while the terminal had just advanced
62/// to actual col 11 after the first `你`, overwriting each preceding
63/// glyph's right half with the next glyph — the "you3-type-shows-only-
64/// last-char" bug.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Cell {
67 pub ch: char,
68 pub style: CellStyle,
69 pub width: u8,
70}
71
72impl Default for Cell {
73 /// Default blank cell = ASCII space, width 1, default style.
74 fn default() -> Self {
75 Self {
76 ch: ' ',
77 style: CellStyle::default(),
78 width: 1,
79 }
80 }
81}
82
83impl Cell {
84 /// Blank narrow cell — space, width 1. Used for padding and as
85 /// the diff's "erase" glyph.
86 pub fn blank() -> Self {
87 Self::default()
88 }
89
90 /// Continuation cell — placeholder for the 2nd (or 3rd, if any)
91 /// terminal column occupied by a wide glyph. `width = 0` tells
92 /// `serialize_patches` to skip emit for this cell: the wide
93 /// glyph emitted in the cell immediately before has already
94 /// advanced the terminal cursor past this column.
95 pub fn continuation() -> Self {
96 Self {
97 ch: ' ',
98 style: CellStyle::default(),
99 width: 0,
100 }
101 }
102}
103
104/// Fixed soft-tab width — `\t` expands to this many spaces when a
105/// caller pushes a string that slipped past higher-level tab-aware
106/// paths. Matches claude-code / CC-style tooling conventions.
107const SOFT_TAB_WIDTH: usize = 4;
108
109/// Append each char of `s` as cells, all sharing `style`. Wide chars
110/// (CJK, emoji, etc.) expand to one real cell carrying the glyph +
111/// `(display_width - 1)` continuation cells so `cell_index ==
112/// terminal_column` holds across the row — critical for the cell-diff
113/// to produce correct patches.
114///
115/// Control chars that would mis-align the cell model vs the terminal
116/// are normalised here:
117/// - `\n` / `\r`: dropped. Multi-line content must be split by the
118/// caller (`push_body_text` does this); writing a bare LF under
119/// raw-mode drops a row without CR, and a bare CR returns to col
120/// 0 mid-row — both produce the "staircase" bug.
121/// - `\t`: expanded to SOFT_TAB_WIDTH spaces so cell col == terminal
122/// col. Without this, the terminal jumps to its hardware tab stop
123/// (col 9/17/25/…) while our cell model advances 1 col per `\t`
124/// cell, and subsequent diffs patch the wrong columns.
125pub fn push_str_cells(row: &mut Vec<Cell>, s: &str, style: &CellStyle) {
126 for ch in s.chars() {
127 if ch == '\n' || ch == '\r' {
128 continue;
129 }
130 if ch == '\t' {
131 for _ in 0..SOFT_TAB_WIDTH {
132 row.push(Cell {
133 ch: ' ',
134 style: style.clone(),
135 width: 1,
136 });
137 }
138 continue;
139 }
140 let w = unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1);
141 if w == 0 {
142 // Zero-width (combining marks, control chars). Caller has
143 // already scrubbed real controls; skip here rather than
144 // emit a phantom cell that diff can't align.
145 continue;
146 }
147 row.push(Cell {
148 ch,
149 style: style.clone(),
150 width: w as u8,
151 });
152 for _ in 1..w {
153 row.push(Cell::continuation());
154 }
155 }
156}
157
158/// Like [`push_str_cells`] but parses embedded SGR escape sequences
159/// (`\x1b[...m`) inline, mutating a working `CellStyle` so subsequent
160/// cells pick up the colour / bold / faint / reverse attributes the
161/// terminal would otherwise paint via raw ANSI. Returns the style
162/// state at end-of-input so a caller wrapping a single physical line
163/// into multiple chunks can carry attributes across chunk boundaries
164/// (e.g. `\x1b[31m` on one chunk and `\x1b[39m` on the next).
165///
166/// Why this exists: the retained renderer paints from a cell grid
167/// rather than streaming raw bytes to stdout, so SGR sequences that
168/// survive [`crate::sanitize::scrub_controls_keep_sgr`] would
169/// otherwise land as literal `^[[31m` characters in cells. This
170/// function is the cell-pipeline equivalent of alt-screen's
171/// `truncate_to_width_sgr_aware` — it understands SGR enough to
172/// translate it into `CellStyle` mutations on the way in.
173///
174/// Non-SGR CSI sequences (cursor moves, DSR, etc.) are silently
175/// dropped — they should have been scrubbed upstream; this is
176/// belt-and-suspenders.
177pub fn push_str_cells_sgr(
178 row: &mut Vec<Cell>,
179 s: &str,
180 mut working_style: CellStyle,
181) -> CellStyle {
182 let mut chars = s.chars().peekable();
183 while let Some(ch) = chars.next() {
184 if ch == '\x1b' {
185 if chars.peek() == Some(&'[') {
186 chars.next();
187 let mut params = String::new();
188 let mut final_byte: Option<char> = None;
189 while let Some(&p) = chars.peek() {
190 chars.next();
191 if ('\x40'..='\x7E').contains(&p) {
192 final_byte = Some(p);
193 break;
194 }
195 params.push(p);
196 }
197 if final_byte == Some('m') {
198 apply_sgr_params(¶ms, &mut working_style);
199 }
200 }
201 continue;
202 }
203 if ch == '\n' || ch == '\r' {
204 continue;
205 }
206 if ch == '\t' {
207 for _ in 0..SOFT_TAB_WIDTH {
208 row.push(Cell {
209 ch: ' ',
210 style: working_style.clone(),
211 width: 1,
212 });
213 }
214 continue;
215 }
216 let w = unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1);
217 if w == 0 {
218 continue;
219 }
220 row.push(Cell {
221 ch,
222 style: working_style.clone(),
223 width: w as u8,
224 });
225 for _ in 1..w {
226 row.push(Cell::continuation());
227 }
228 }
229 working_style
230}
231
232/// Parse a `;`-separated SGR parameter list and fold each recognised
233/// code into `style`. The crossterm colour variants chosen here mirror
234/// the SGR-to-name mapping that `serialize_row` uses to emit cells
235/// back to the terminal — so a row built from `\x1b[31m…` round-trips
236/// to `\x1b[31m…` on output, and the terminal's theme palette gets to
237/// pick the actual shade rather than us hard-coding RGB.
238///
239/// Unknown / unsupported codes (256-colour `38;5;N`, RGB `38;2;R;G;B`,
240/// background colours, italic, underline) are silently skipped —
241/// they're outside the cosmetic surface `CellStyle` currently
242/// represents, so picking up an LLM-emitted underline would just be
243/// lost on the retained path. Adding fields to `CellStyle` is the
244/// trigger for extending this match.
245fn apply_sgr_params(params: &str, style: &mut CellStyle) {
246 // `\x1b[m` (empty params) means "reset" — same as `\x1b[0m`.
247 if params.is_empty() {
248 *style = CellStyle::default();
249 return;
250 }
251 for code in params.split(';') {
252 // `\x1b[;31m` (leading empty) also resets before applying.
253 if code.is_empty() {
254 *style = CellStyle::default();
255 continue;
256 }
257 let Ok(n) = code.parse::<u16>() else { continue };
258 match n {
259 0 => *style = CellStyle::default(),
260 1 => style.bold = true,
261 2 => style.faint = true,
262 7 => style.reverse = true,
263 22 => {
264 // SGR 22 = normal intensity, clears BOTH bold and faint.
265 style.bold = false;
266 style.faint = false;
267 }
268 27 => style.reverse = false,
269 30 => style.fg = Some(Color::Black),
270 31 => style.fg = Some(Color::DarkRed),
271 32 => style.fg = Some(Color::DarkGreen),
272 33 => style.fg = Some(Color::DarkYellow),
273 34 => style.fg = Some(Color::DarkBlue),
274 35 => style.fg = Some(Color::DarkMagenta),
275 36 => style.fg = Some(Color::DarkCyan),
276 37 => style.fg = Some(Color::Grey),
277 39 => style.fg = None,
278 90 => style.fg = Some(Color::DarkGrey),
279 91 => style.fg = Some(Color::Red),
280 92 => style.fg = Some(Color::Green),
281 93 => style.fg = Some(Color::Yellow),
282 94 => style.fg = Some(Color::Blue),
283 95 => style.fg = Some(Color::Magenta),
284 96 => style.fg = Some(Color::Cyan),
285 97 => style.fg = Some(Color::White),
286 _ => {}
287 }
288 }
289}
290
291/// A single cell's worth of change: "put this cell at absolute position
292/// (row, col)". Multiple adjacent patches with the same style serialise
293/// into one cursor move + a run of characters, so small clusters stay
294/// cheap. Rows/cols are 1-indexed to match ANSI (`\x1b[row;col H`).
295#[derive(Debug, Clone, PartialEq, Eq)]
296pub struct Patch {
297 pub row: u16,
298 pub col: u16,
299 pub cell: Cell,
300}
301
302/// Slice-based cell-diff for the retained-mode `Screen` buffer.
303/// Both frames are `[Vec<Cell>]` indexed by **screen row** (0..H-1),
304/// with each inner `Vec<Cell>` indexed by **screen col** (0..W-1).
305/// Unlike `diff_cells`, this variant doesn't allocate a hashmap per
306/// frame — Screen always knows all its rows upfront, so a contiguous
307/// slice is both faster and maps 1:1 onto the 2D `cells[row][col]`
308/// access pattern.
309///
310/// Emits patches with **1-indexed** (row, col) matching ANSI cursor
311/// addressing. When one frame is shorter than the other (shouldn't
312/// happen in practice if both come from the same `Screen`, but keep
313/// the robustness for safety), missing rows / columns are treated as
314/// blank so the "other" frame's content generates explicit patches.
315pub fn diff_cell_frames(prev: &[Vec<Cell>], next: &[Vec<Cell>]) -> Vec<Patch> {
316 let mut patches = Vec::new();
317 let max_rows = prev.len().max(next.len());
318 let blank = Cell::blank();
319 for r in 0..max_rows {
320 let p = prev.get(r).map(Vec::as_slice).unwrap_or(&[]);
321 let n = next.get(r).map(Vec::as_slice).unwrap_or(&[]);
322 let max_cols = p.len().max(n.len());
323 for c in 0..max_cols {
324 let pc = p.get(c).unwrap_or(&blank);
325 let nc = n.get(c).unwrap_or(&blank);
326 if pc != nc {
327 patches.push(Patch {
328 row: (r + 1) as u16,
329 col: (c + 1) as u16,
330 cell: nc.clone(),
331 });
332 }
333 }
334 }
335 patches
336}
337
338/// Serialise patches into ANSI bytes with an SGR state machine: emit
339/// cursor-position only when we're jumping, emit SGR only when the
340/// outgoing cell's style differs from the last one we set, and run-pack
341/// adjacent same-style patches into contiguous character streams.
342///
343/// Ends with `\x1b[0m` so the caller's subsequent writes (body text,
344/// cursor positioning, etc.) start from a clean SGR state — leaving a
345/// bold/reverse bit set across paint boundaries was a class of rare
346/// but hard-to-reproduce "random colour leak" bugs in the old path.
347pub fn serialize_patches(patches: &[Patch]) -> Vec<u8> {
348 if patches.is_empty() {
349 return Vec::new();
350 }
351
352 let mut out = Vec::with_capacity(patches.len() * 8);
353 let mut current_style: Option<CellStyle> = None;
354 let mut expected_cursor: Option<(u16, u16)> = None;
355 let mut emitted_any_sgr = false;
356
357 for patch in patches {
358 // Continuation cell: the wide glyph in the previous cell has
359 // already advanced the terminal cursor past this column. Emit
360 // nothing — writing here would clobber the wide glyph's right
361 // half *and* scramble our cursor model.
362 if patch.cell.width == 0 {
363 continue;
364 }
365
366 if expected_cursor != Some((patch.row, patch.col)) {
367 let _ = write!(out, "\x1b[{};{}H", patch.row, patch.col);
368 expected_cursor = Some((patch.row, patch.col));
369 }
370
371 if current_style.as_ref() != Some(&patch.cell.style) {
372 let before = out.len();
373 emit_sgr_transition(&mut out, current_style.as_ref(), &patch.cell.style);
374 if out.len() > before {
375 emitted_any_sgr = true;
376 }
377 current_style = Some(patch.cell.style.clone());
378 }
379
380 let mut buf = [0u8; 4];
381 let encoded = patch.cell.ch.encode_utf8(&mut buf);
382 out.extend_from_slice(encoded.as_bytes());
383
384 // Cursor advances by the glyph's display width. For narrow
385 // cells this is +1 (the common case), for wide cells (CJK,
386 // emoji) it's +2 — matching what the terminal actually does
387 // so the next patch's `expected_cursor` comparison is sound.
388 if let Some((r, c)) = expected_cursor {
389 expected_cursor = Some((r, c + patch.cell.width as u16));
390 }
391 }
392
393 // Final `\x1b[0m` only if we ever turned an attribute on — otherwise
394 // we'd leak a pointless reset into the stream every time the footer
395 // is pure-default-style (all-blank padding, plain rule without
396 // colour, etc.). The legacy `row_to_bytes` case exercises this in
397 // its tests.
398 if emitted_any_sgr {
399 out.extend_from_slice(b"\x1b[0m");
400 }
401
402 out
403}
404
405/// Serialise a single row of cells into ANSI bytes **without any cursor
406/// positioning**. Used by the scrollback-push path (write row to stdout
407/// at the current cursor, then let `\n` advance). Skips continuation
408/// cells; closes with `\x1b[0m` iff any SGR was emitted so subsequent
409/// writes start from a clean state.
410pub fn serialize_row(row: &[Cell]) -> Vec<u8> {
411 let mut out = Vec::with_capacity(row.len() * 4);
412 let mut current_style: Option<CellStyle> = None;
413 let mut emitted_any_sgr = false;
414 for cell in row {
415 if cell.width == 0 {
416 continue;
417 }
418 if current_style.as_ref() != Some(&cell.style) {
419 let before = out.len();
420 emit_sgr_transition(&mut out, current_style.as_ref(), &cell.style);
421 if out.len() > before {
422 emitted_any_sgr = true;
423 }
424 current_style = Some(cell.style.clone());
425 }
426 let mut buf = [0u8; 4];
427 let encoded = cell.ch.encode_utf8(&mut buf);
428 out.extend_from_slice(encoded.as_bytes());
429 }
430 if emitted_any_sgr {
431 out.extend_from_slice(b"\x1b[0m");
432 }
433 out
434}
435
436/// Emit the minimal SGR sequence to move from `from` style to `to` style.
437/// Uses reset-and-reapply whenever a "sticky" attribute (bold/reverse)
438/// needs clearing; per-attr toggles (`\x1b[22m` for bold off, `\x1b[27m`
439/// for reverse off) are respected by modern terminals but reset+reapply
440/// is shorter when multiple attributes change at once.
441fn emit_sgr_transition(out: &mut Vec<u8>, from: Option<&CellStyle>, to: &CellStyle) {
442 let from_default = CellStyle::default();
443 let from = from.unwrap_or(&from_default);
444
445 // Determine if any attribute is being turned OFF — if so, cheapest
446 // path is reset everything and reapply the ON set. If only
447 // additive, use targeted enables.
448 let bold_off = from.bold && !to.bold;
449 let reverse_off = from.reverse && !to.reverse;
450 // SGR 22 ("normal intensity") clears both bold AND faint — there is
451 // no per-attribute toggle for faint. So a faint→off transition
452 // always goes through full reset to avoid clobbering bold state.
453 let faint_off = from.faint && !to.faint;
454 let fg_change = from.fg != to.fg;
455
456 let needs_reset = bold_off
457 || reverse_off
458 || faint_off
459 || (from.fg.is_some() && to.fg.is_none());
460
461 if needs_reset {
462 out.extend_from_slice(b"\x1b[0m");
463 // After reset, nothing is on — apply `to`'s positive attrs.
464 if to.bold {
465 out.extend_from_slice(b"\x1b[1m");
466 }
467 if to.faint {
468 out.extend_from_slice(b"\x1b[2m");
469 }
470 if to.reverse {
471 out.extend_from_slice(b"\x1b[7m");
472 }
473 if let Some(c) = to.fg {
474 let _ = write!(out, "{}", SetForegroundColor(c));
475 }
476 } else {
477 // Additive path — current attributes stay, just flip on whatever
478 // `to` adds.
479 if !from.bold && to.bold {
480 out.extend_from_slice(b"\x1b[1m");
481 }
482 if !from.faint && to.faint {
483 out.extend_from_slice(b"\x1b[2m");
484 }
485 if !from.reverse && to.reverse {
486 out.extend_from_slice(b"\x1b[7m");
487 }
488 if fg_change {
489 if let Some(c) = to.fg {
490 let _ = write!(out, "{}", SetForegroundColor(c));
491 } else {
492 // Should have been caught by needs_reset, but defensive.
493 out.extend_from_slice(b"\x1b[39m");
494 }
495 }
496 }
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502
503 fn cyan() -> Color {
504 Color::Cyan
505 }
506
507 fn style_bold_cyan() -> CellStyle {
508 CellStyle {
509 fg: Some(cyan()),
510 bold: true,
511 reverse: false,
512 faint: false,
513 }
514 }
515
516 #[test]
517 fn cell_equality_is_field_wise() {
518 let a = Cell {
519 ch: 'x',
520 style: style_bold_cyan(),
521 width: 1,
522 };
523 let b = Cell {
524 ch: 'x',
525 style: style_bold_cyan(),
526 width: 1,
527 };
528 assert_eq!(a, b);
529 let c = Cell {
530 ch: 'y',
531 style: style_bold_cyan(),
532 width: 1,
533 };
534 assert_ne!(a, c);
535 }
536
537 #[test]
538 fn push_str_cells_spreads_one_char_per_cell() {
539 let mut row = Vec::new();
540 push_str_cells(&mut row, "ab", &CellStyle::default());
541 assert_eq!(row.len(), 2);
542 assert_eq!(row[0].ch, 'a');
543 assert_eq!(row[1].ch, 'b');
544 }
545
546 /// Uses 0-indexed slice input, produces 1-indexed (row, col)
547 /// patches matching ANSI cursor addressing convention.
548 #[test]
549 fn diff_cell_frames_produces_one_indexed_coords() {
550 let row: Vec<Cell> = "ab"
551 .chars()
552 .map(|ch| Cell {
553 ch,
554 style: Default::default(),
555 width: 1,
556 })
557 .collect();
558 let mut changed = row.clone();
559 changed[0].ch = 'X';
560 let prev = vec![row.clone()];
561 let next = vec![changed];
562 let patches = diff_cell_frames(&prev, &next);
563 assert_eq!(patches.len(), 1);
564 assert_eq!(patches[0].row, 1, "slice row 0 -> ANSI row 1");
565 assert_eq!(patches[0].col, 1, "slice col 0 -> ANSI col 1");
566 assert_eq!(patches[0].cell.ch, 'X');
567 }
568
569 /// Empty frames produce zero patches.
570 #[test]
571 fn diff_cell_frames_empty_frames() {
572 let patches = diff_cell_frames(&[], &[]);
573 assert!(patches.is_empty());
574 }
575
576 #[test]
577 fn diff_shorter_next_emits_blanks_for_trailing() {
578 // prev has 5 cells, next has 2 — the 3 tail cells in prev need
579 // blanking patches so leftover glyphs get overwritten.
580 let prev_row: Vec<Cell> = "hello"
581 .chars()
582 .map(|ch| Cell {
583 ch,
584 style: Default::default(),
585 width: 1,
586 })
587 .collect();
588 let next_row: Vec<Cell> = "he"
589 .chars()
590 .map(|ch| Cell {
591 ch,
592 style: Default::default(),
593 width: 1,
594 })
595 .collect();
596 let prev = vec![prev_row];
597 let next = vec![next_row];
598 let patches = diff_cell_frames(&prev, &next);
599 assert_eq!(patches.len(), 3);
600 for p in &patches {
601 assert_eq!(p.cell, Cell::blank());
602 }
603 }
604
605 #[test]
606 fn serialize_empty_patches_emits_nothing() {
607 assert!(serialize_patches(&[]).is_empty());
608 }
609
610 #[test]
611 fn serialize_single_patch_emits_cursor_plus_char() {
612 let p = Patch {
613 row: 10,
614 col: 5,
615 cell: Cell {
616 ch: 'x',
617 style: Default::default(),
618 width: 1,
619 },
620 };
621 let bytes = serialize_patches(std::slice::from_ref(&p));
622 let s = String::from_utf8(bytes).unwrap();
623 assert!(s.contains("\x1b[10;5H"));
624 assert!(s.contains('x'));
625 // Default-style cell → no SGR was turned on, so no trailing
626 // \x1b[0m is needed (would be a wasted 4 bytes per emit).
627 assert!(!s.contains("\x1b[0m"));
628 }
629
630 #[test]
631 fn serialize_final_reset_on_styled_patches() {
632 // When a patch carries a non-default style, the emit path MUST
633 // close with \x1b[0m so subsequent writes start clean.
634 let p = Patch {
635 row: 1,
636 col: 1,
637 cell: Cell {
638 ch: 'x',
639 style: style_bold_cyan(),
640 width: 1,
641 },
642 };
643 let bytes = serialize_patches(std::slice::from_ref(&p));
644 let s = String::from_utf8(bytes).unwrap();
645 assert!(s.ends_with("\x1b[0m"));
646 }
647
648 #[test]
649 fn serialize_adjacent_cells_skip_cursor_move() {
650 // Two patches at (5, 1) and (5, 2) with same default style —
651 // second should NOT emit a cursor move (cursor auto-advanced)
652 // AND no final reset (default style, no SGR on).
653 let p1 = Patch {
654 row: 5,
655 col: 1,
656 cell: Cell {
657 ch: 'a',
658 style: Default::default(),
659 width: 1,
660 },
661 };
662 let p2 = Patch {
663 row: 5,
664 col: 2,
665 cell: Cell {
666 ch: 'b',
667 style: Default::default(),
668 width: 1,
669 },
670 };
671 let bytes = serialize_patches(&[p1, p2]);
672 let s = String::from_utf8(bytes).unwrap();
673 // Exactly one CSI: `\x1b[5;1H`. No SGR, no final reset.
674 assert_eq!(s.matches("\x1b[").count(), 1);
675 }
676
677 #[test]
678 fn serialize_style_change_only_emits_sgr_once() {
679 // Two patches at (5,1) and (5,2), second changes to bold —
680 // should emit one SGR transition, not two.
681 let p1 = Patch {
682 row: 5,
683 col: 1,
684 cell: Cell {
685 ch: 'a',
686 style: Default::default(),
687 width: 1,
688 },
689 };
690 let p2 = Patch {
691 row: 5,
692 col: 2,
693 cell: Cell {
694 ch: 'b',
695 style: CellStyle {
696 fg: None,
697 bold: true,
698 reverse: false,
699 faint: false,
700 },
701 width: 1,
702 },
703 };
704 let bytes = serialize_patches(&[p1, p2]);
705 let s = String::from_utf8(bytes).unwrap();
706 assert!(s.contains("\x1b[1m"), "expected bold SGR, got: {:?}", s);
707 }
708
709 /// Faint cells emit SGR 2 — theme-aware muting for hint/status text.
710 /// Final reset must close the run because faint is "sticky" until
711 /// SGR 22 / SGR 0 clears it.
712 #[test]
713 fn serialize_faint_emits_sgr_two_and_final_reset() {
714 let p = Patch {
715 row: 1,
716 col: 1,
717 cell: Cell {
718 ch: 'h',
719 style: CellStyle {
720 fg: None,
721 bold: false,
722 reverse: false,
723 faint: true,
724 },
725 width: 1,
726 },
727 };
728 let bytes = serialize_patches(std::slice::from_ref(&p));
729 let s = String::from_utf8(bytes).unwrap();
730 assert!(s.contains("\x1b[2m"), "expected faint SGR, got: {:?}", s);
731 assert!(s.ends_with("\x1b[0m"), "faint cell must close with reset");
732 }
733
734 /// Faint→non-faint transition routes through full reset (SGR 0)
735 /// rather than per-attribute toggle, because SGR 22 ("normal
736 /// intensity") would also clobber bold if present. Reset path is
737 /// what `emit_sgr_transition` already uses for bold-off and
738 /// reverse-off — extending to faint-off keeps the invariant.
739 #[test]
740 fn serialize_faint_off_goes_through_reset() {
741 let faint = Patch {
742 row: 1,
743 col: 1,
744 cell: Cell {
745 ch: 'a',
746 style: CellStyle {
747 fg: None,
748 bold: false,
749 reverse: false,
750 faint: true,
751 },
752 width: 1,
753 },
754 };
755 let plain = Patch {
756 row: 1,
757 col: 2,
758 cell: Cell {
759 ch: 'b',
760 style: CellStyle::default(),
761 width: 1,
762 },
763 };
764 let bytes = serialize_patches(&[faint, plain]);
765 let s = String::from_utf8(bytes).unwrap();
766 // \x1b[2m for faint cell → \x1b[0m before the plain cell.
767 assert!(s.contains("\x1b[2m"));
768 // The plain cell must be preceded by a reset, not just \x1b[22m.
769 let reset_idx = s
770 .match_indices("\x1b[0m")
771 .map(|(i, _)| i)
772 .find(|&i| i < s.find('b').unwrap())
773 .expect("expected mid-stream reset before plain cell");
774 let _ = reset_idx;
775 }
776
777 // ── Unicode edge-case regression tests ─────────────────────────
778 //
779 // These pin down how `push_str_cells` / `diff_cells` / `serialize_patches`
780 // treat "tricky" Unicode input. For each scenario we either:
781 // (a) assert the behaviour we DO support, or
782 // (b) document a known limitation with `#[ignore]` + a doc
783 // comment explaining what actually happens + why we accept
784 // it for now.
785 //
786 // Run only this group: `cargo test -p atomcode-tuix --lib unicode_`
787 // ──────────────────────────────────────────────────────────────
788
789 /// Baseline: CJK ideographs expand correctly (1 real cell + 1
790 /// continuation per char). Covers "你是谁" → 6 cells total,
791 /// model col matching terminal col exactly.
792 #[test]
793 fn unicode_cjk_ideograph_expands_to_two_cells() {
794 let mut row = Vec::new();
795 push_str_cells(&mut row, "你是谁", &CellStyle::default());
796 assert_eq!(row.len(), 6, "3 CJK chars × (1 real + 1 cont) = 6 cells");
797
798 // Real cells carry the glyph + width 2.
799 assert_eq!(row[0].ch, '你');
800 assert_eq!(row[0].width, 2);
801 assert_eq!(row[2].ch, '是');
802 assert_eq!(row[2].width, 2);
803 assert_eq!(row[4].ch, '谁');
804 assert_eq!(row[4].width, 2);
805
806 // Continuation cells: width 0, glyph = space (harmless if
807 // something ever did try to serialise them).
808 for i in [1, 3, 5] {
809 assert_eq!(row[i].width, 0, "cell {} should be continuation", i);
810 }
811 }
812
813 /// Emoji like 😀 are Unicode "wide" (East Asian Width Wide/Full).
814 /// Single code-point emoji should behave like CJK — 1 real + 1
815 /// continuation.
816 #[test]
817 fn unicode_single_codepoint_emoji_expands_to_two_cells() {
818 let mut row = Vec::new();
819 push_str_cells(&mut row, "😀", &CellStyle::default());
820 assert_eq!(row.len(), 2);
821 assert_eq!(row[0].ch, '😀');
822 assert_eq!(row[0].width, 2);
823 assert_eq!(row[1].width, 0);
824 }
825
826 /// **Known limitation**: ZWJ-sequence emoji (family, profession,
827 /// flag-of-england-style tag sequences, skin-tone modifiers) are
828 /// composed of multiple Unicode code points joined by U+200D (ZWJ)
829 /// or other joiners. Each code point gets its own `unicode_width`
830 /// lookup and our model treats them independently.
831 ///
832 /// Example: "👨👩👧" (man + ZWJ + woman + ZWJ + girl)
833 /// - Terminal displays: 1 glyph occupying 2 columns
834 /// - `unicode_width` per codepoint: 2 + 0 + 2 + 0 + 2 = 6
835 /// - Our model: 3 real cells (w=2 each) + 2 skipped (w=0 ZWJ) = 3 real wide glyph cells
836 /// - Total real cells: 3 with width 2 = 6 terminal cols claimed
837 ///
838 /// If the terminal actually renders the ZWJ sequence as a single
839 /// 2-col glyph, our cursor advances 6 while terminal advances 2,
840 /// drift = 4. Next character lands 4 cols too far right.
841 ///
842 /// This test pins down the **current** behaviour so we notice if
843 /// we ever silently change it; it doesn't prescribe what's "right"
844 /// because the fix requires a grapheme segmenter (unicode-segmentation
845 /// crate) that we're not bringing in yet.
846 #[test]
847 fn unicode_zwj_sequence_is_not_grapheme_aware_known_limitation() {
848 let mut row = Vec::new();
849 // family emoji: man + ZWJ + woman + ZWJ + girl
850 push_str_cells(&mut row, "👨\u{200D}👩\u{200D}👧", &CellStyle::default());
851 // 3 wide base chars → 3 real + 3 continuation = 6
852 // 2 ZWJ (w=0) → skipped
853 // Real cells + continuations only.
854 let real_cells = row.iter().filter(|c| c.width > 0).count();
855 let cont_cells = row.iter().filter(|c| c.width == 0).count();
856 eprintln!(
857 "[UNICODE DIAG] ZWJ family: real={} cont={} total={} (terminal would show 1 glyph = 2 cols)",
858 real_cells, cont_cells, row.len()
859 );
860 // Exact counts: 3 real + 3 continuation (ZWJ are width-0, skipped
861 // by push_str_cells' early `continue`).
862 assert_eq!(real_cells, 3);
863 assert_eq!(cont_cells, 3);
864 assert_eq!(row.len(), 6);
865 // → Known drift: model says 6 cols occupied, terminal shows 2.
866 }
867
868 /// **Known limitation**: skin-tone modifiers (👍🏽) — base emoji
869 /// U+1F44D followed by Fitzpatrick modifier U+1F3FD. Terminal
870 /// typically renders as one 2-col glyph.
871 ///
872 /// Our model:
873 /// - base: width 2 → 1 real + 1 cont
874 /// - modifier: `unicode_width` returns 2 (wide) → 1 real + 1 cont
875 /// - total: 4 cells, model claims 4 cols; terminal uses 2.
876 ///
877 /// Drift 2 per skin-toned emoji. Same grapheme-segmenter fix.
878 #[test]
879 fn unicode_skin_tone_modifier_not_segmented_known_limitation() {
880 let mut row = Vec::new();
881 push_str_cells(&mut row, "👍🏽", &CellStyle::default());
882 let real_cells = row.iter().filter(|c| c.width > 0).count();
883 eprintln!(
884 "[UNICODE DIAG] skin-tone emoji: real={} cells total={}",
885 real_cells,
886 row.len()
887 );
888 // 2 real cells, each advancing cursor by 2 = 4 cols model.
889 // Terminal advances 2. Drift = 2.
890 assert_eq!(real_cells, 2);
891 }
892
893 /// Ambiguous-width chars (East Asian Ambiguous class): `§`, `±`,
894 /// `¶`, Greek letters, box-drawing. `unicode-width` crate defaults
895 /// these to **1** (narrow); CJK-locale terminals may render them
896 /// at **2**. Our model → 1-col advance.
897 ///
898 /// If your Mac Terminal is set to a CJK language, these chars
899 /// will drift left by 1 col per occurrence. Changing `unicode-width`
900 /// to ambiguous-as-wide mode is a per-locale decision we don't
901 /// attempt; we pin the default behaviour here.
902 #[test]
903 fn unicode_ambiguous_width_defaults_narrow() {
904 let mut row = Vec::new();
905 push_str_cells(&mut row, "§±¶", &CellStyle::default());
906 // Each char → 1 cell with width 1 (no continuation).
907 assert_eq!(row.len(), 3);
908 for (i, ch) in "§±¶".chars().enumerate() {
909 assert_eq!(row[i].ch, ch);
910 assert_eq!(row[i].width, 1);
911 }
912 }
913
914 /// Combining marks (NFD form): "e + combining acute" = "e\u{301}"
915 /// displays as "é" (1 col total), but is **2 code points**. Our
916 /// current impl: the combining char has `unicode_width = 0` and
917 /// gets dropped by `push_str_cells`' early-continue — the base 'e'
918 /// survives unadorned.
919 ///
920 /// Effect: an NFD-form string in input loses its accent on screen.
921 /// NFC-form ("é" U+00E9, single codepoint) renders fine.
922 ///
923 /// For CJK-dominant usage this is acceptable. Fixing requires
924 /// either (a) NFC-normalising input before push, or (b) attaching
925 /// combining marks to the preceding cell's char. Neither is in
926 /// scope for Ink Phase 1.
927 #[test]
928 fn unicode_nfd_combining_mark_is_dropped_known_limitation() {
929 let mut row = Vec::new();
930 push_str_cells(&mut row, "e\u{301}", &CellStyle::default());
931 // 'e' kept, combining acute dropped.
932 assert_eq!(row.len(), 1, "combining mark dropped by width=0 guard");
933 assert_eq!(row[0].ch, 'e');
934 assert_eq!(row[0].width, 1);
935 }
936
937 /// NFC precomposed accented chars render correctly as narrow cells.
938 /// Contrast with `unicode_nfd_combining_mark_is_dropped` above.
939 #[test]
940 fn unicode_nfc_precomposed_accent_narrow_cell() {
941 let mut row = Vec::new();
942 push_str_cells(&mut row, "café", &CellStyle::default());
943 assert_eq!(row.len(), 4);
944 for (i, ch) in "café".chars().enumerate() {
945 assert_eq!(row[i].ch, ch);
946 assert_eq!(row[i].width, 1);
947 }
948 }
949
950 /// Zero-width space (U+200B) and BOM (U+FEFF) are width-0 and
951 /// legitimately get dropped — they carry no glyph. This verifies
952 /// the width=0 guard does the right thing rather than accidentally
953 /// inserting a phantom cell that would misalign diff col numbers.
954 #[test]
955 fn unicode_zero_width_invisibles_dropped() {
956 let mut row = Vec::new();
957 push_str_cells(&mut row, "a\u{200B}b\u{FEFF}c", &CellStyle::default());
958 // a, b, c — invisibles silently dropped.
959 assert_eq!(row.len(), 3);
960 assert_eq!(row[0].ch, 'a');
961 assert_eq!(row[1].ch, 'b');
962 assert_eq!(row[2].ch, 'c');
963 }
964
965 /// Mixed-width input: the `cell_index == terminal_column`
966 /// invariant holds across narrow + wide alternation.
967 #[test]
968 fn unicode_mixed_width_cell_indices_match_terminal_cols() {
969 let mut row = Vec::new();
970 push_str_cells(&mut row, "a你b", &CellStyle::default());
971 // Expect: 'a'(w=1) + '你'(w=2) + cont(w=0) + 'b'(w=1) = 4 cells
972 assert_eq!(row.len(), 4);
973 // Verify terminal-col calculation: cell i represents col i+1.
974 // If cursor_advance = cell.width summed, total = 1+2+0+1 = 4,
975 // matching terminal "a你b" = 1+2+1 = 4 cols.
976 let total_advance: u16 = row.iter().map(|c| c.width as u16).sum();
977 assert_eq!(total_advance, 4);
978 }
979
980 /// Diff + serialize round-trip with wide chars — a narrow→wide
981 /// transition at the same cell position must emit the wide glyph
982 /// AND leave the continuation position "covered" so subsequent
983 /// writes don't overwrite the glyph's right half.
984 #[test]
985 fn unicode_diff_narrow_to_wide_at_same_position() {
986 let prev_row: Vec<Cell> = vec![
987 Cell {
988 ch: 'a',
989 style: CellStyle::default(),
990 width: 1,
991 },
992 Cell {
993 ch: 'b',
994 style: CellStyle::default(),
995 width: 1,
996 },
997 ];
998 let next_row: Vec<Cell> = vec![
999 Cell {
1000 ch: '你',
1001 style: CellStyle::default(),
1002 width: 2,
1003 },
1004 Cell::continuation(),
1005 ];
1006 // Row 9 in the slice (index) → ANSI row 10 in the patch.
1007 let prev: Vec<Vec<Cell>> = (0..9)
1008 .map(|_| Vec::new())
1009 .chain(std::iter::once(prev_row))
1010 .collect();
1011 let next: Vec<Vec<Cell>> = (0..9)
1012 .map(|_| Vec::new())
1013 .chain(std::iter::once(next_row))
1014 .collect();
1015
1016 let patches = diff_cell_frames(&prev, &next);
1017 assert_eq!(patches.len(), 2, "both cols changed");
1018
1019 let bytes = serialize_patches(&patches);
1020 let s = String::from_utf8(bytes).unwrap();
1021 assert!(s.contains('你'));
1022 // Exactly one cursor-position CSI (the initial move).
1023 assert_eq!(
1024 s.matches("\x1b[10;").count(),
1025 1,
1026 "continuation must not trigger a cursor move: {:?}",
1027 s
1028 );
1029 }
1030
1031 /// Reverse: wide→narrow at same cell index. The wide char's
1032 /// right-half column needs an explicit blank patch to erase the
1033 /// stale glyph half left over after overwriting the left half.
1034 #[test]
1035 fn unicode_diff_wide_to_narrow_erases_right_half() {
1036 let prev_row: Vec<Cell> = vec![
1037 Cell {
1038 ch: '你',
1039 style: CellStyle::default(),
1040 width: 2,
1041 },
1042 Cell::continuation(),
1043 ];
1044 let next_row: Vec<Cell> = vec![
1045 Cell {
1046 ch: 'a',
1047 style: CellStyle::default(),
1048 width: 1,
1049 },
1050 Cell {
1051 ch: 'b',
1052 style: CellStyle::default(),
1053 width: 1,
1054 },
1055 ];
1056 let prev: Vec<Vec<Cell>> = (0..4)
1057 .map(|_| Vec::new())
1058 .chain(std::iter::once(prev_row))
1059 .collect();
1060 let next: Vec<Vec<Cell>> = (0..4)
1061 .map(|_| Vec::new())
1062 .chain(std::iter::once(next_row))
1063 .collect();
1064
1065 let patches = diff_cell_frames(&prev, &next);
1066 assert_eq!(patches.len(), 2);
1067 assert_eq!(patches[0].col, 1);
1068 assert_eq!(patches[0].cell.ch, 'a');
1069 assert_eq!(patches[1].col, 2);
1070 assert_eq!(patches[1].cell.ch, 'b');
1071
1072 let bytes = serialize_patches(&patches);
1073 let s = String::from_utf8(bytes).unwrap();
1074 assert!(s.contains('a'));
1075 assert!(s.contains('b'));
1076 }
1077}