Skip to main content

atomcode_tuix/render/
cell.rs

1// crates/atomcode-tuix/src/render/cell.rs
2//
3// Ink-style cell buffer for footer/menu rendering.
4//
5// The row-level diff we had before was correct but coarse: any byte change
6// in a row triggered a full-row re-emit. Combined with UTF-8 rule characters
7// (`─` is 3 bytes × 200 cols × 2 rules = 1254 bytes of rule per redraw) and
8// footer-height oscillation when the slash palette opens/closes, every
9// menu toggle pushed 1800+ bytes to Mac Terminal.app's GUI pipeline — the
10// threshold where its coalesce + repaint latency becomes user-visible.
11//
12// Ink (Claude Code's renderer) works on cells: (char, style) pairs indexed
13// by absolute terminal position. New frame → diff cell-by-cell → emit
14// minimal patches. A row whose status stayed "glm-5 · ~/project" across
15// frames contributes zero bytes. Rule middles stay identical after a
16// single-column input change → zero bytes. This module gives us that
17// primitive.
18//
19// Scope: footer + slash palette only. Body content (streaming text, tool
20// output) keeps the pure-append path — body lines enter scrollback and
21// never need a diff cycle.
22
23use crossterm::style::{Color, SetForegroundColor};
24use std::io::Write as _;
25
26/// Visual attributes that can vary per cell in our footer. Kept minimal
27/// on purpose: footer uses fg color, bold, and reverse-video
28/// (for the palette's selected row). Extending this to bg / underline
29/// / italic is a future concern — adding fields is the mechanical part,
30/// but every field widens the diff equality surface and the SGR state
31/// machine's emit path, so we don't preemptively carry what we don't use.
32#[derive(Debug, Clone, Default, PartialEq, Eq)]
33pub struct CellStyle {
34    /// Foreground colour via crossterm SGR. `None` = terminal default
35    /// foreground (emitted as `\x1b[39m` by the serialiser).
36    pub fg: Option<Color>,
37    /// SGR bold (`\x1b[1m` / `\x1b[22m`).
38    pub bold: bool,
39    /// SGR reverse video (`\x1b[7m` / `\x1b[27m`). Used for the
40    /// highlighted menu row.
41    pub reverse: bool,
42    /// SGR faint / decreased intensity (`\x1b[2m`). Renders the current
43    /// fg at ~50% intensity — terminal-theme-aware muting that adapts
44    /// to both light and dark schemes (unlike a fixed DarkGrey which
45    /// vanishes on some palettes). Toggled off via SGR 22, which is the
46    /// shared "normal intensity" reset for both bold and faint, so the
47    /// transition path goes through full reset when faint→off.
48    pub faint: bool,
49}
50
51/// One screen cell: glyph + its visual attributes. Cell equality is
52/// byte-perfect — two cells are equal iff their serialised bytes
53/// would be identical, which is the invariant the diff relies on.
54///
55/// `width` is the **display width** in terminal columns: 1 for ASCII
56/// and other narrow glyphs, 2 for CJK / emoji / other wide glyphs,
57/// and 0 for **continuation cells** — placeholder cells that follow a
58/// wide glyph to keep the invariant `cell_index == terminal_column`.
59/// Without continuation cells, typing "你是谁" (3 wide chars = 6 cols)
60/// into a row model that tracked only char count (3 cells) would emit
61/// patches at model cols 5/6/7 while the terminal had just advanced
62/// to actual col 11 after the first `你`, overwriting each preceding
63/// glyph's right half with the next glyph — the "you3-type-shows-only-
64/// last-char" bug.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Cell {
67    pub ch: char,
68    pub style: CellStyle,
69    pub width: u8,
70}
71
72impl Default for Cell {
73    /// Default blank cell = ASCII space, width 1, default style.
74    fn default() -> Self {
75        Self {
76            ch: ' ',
77            style: CellStyle::default(),
78            width: 1,
79        }
80    }
81}
82
83impl Cell {
84    /// Blank narrow cell — space, width 1. Used for padding and as
85    /// the diff's "erase" glyph.
86    pub fn blank() -> Self {
87        Self::default()
88    }
89
90    /// Continuation cell — placeholder for the 2nd (or 3rd, if any)
91    /// terminal column occupied by a wide glyph. `width = 0` tells
92    /// `serialize_patches` to skip emit for this cell: the wide
93    /// glyph emitted in the cell immediately before has already
94    /// advanced the terminal cursor past this column.
95    pub fn continuation() -> Self {
96        Self {
97            ch: ' ',
98            style: CellStyle::default(),
99            width: 0,
100        }
101    }
102}
103
104/// Fixed soft-tab width — `\t` expands to this many spaces when a
105/// caller pushes a string that slipped past higher-level tab-aware
106/// paths. Matches claude-code / CC-style tooling conventions.
107const SOFT_TAB_WIDTH: usize = 4;
108
109/// Append each char of `s` as cells, all sharing `style`. Wide chars
110/// (CJK, emoji, etc.) expand to one real cell carrying the glyph +
111/// `(display_width - 1)` continuation cells so `cell_index ==
112/// terminal_column` holds across the row — critical for the cell-diff
113/// to produce correct patches.
114///
115/// Control chars that would mis-align the cell model vs the terminal
116/// are normalised here:
117///   - `\n` / `\r`: dropped. Multi-line content must be split by the
118///     caller (`push_body_text` does this); writing a bare LF under
119///     raw-mode drops a row without CR, and a bare CR returns to col
120///     0 mid-row — both produce the "staircase" bug.
121///   - `\t`: expanded to SOFT_TAB_WIDTH spaces so cell col == terminal
122///     col. Without this, the terminal jumps to its hardware tab stop
123///     (col 9/17/25/…) while our cell model advances 1 col per `\t`
124///     cell, and subsequent diffs patch the wrong columns.
125pub fn push_str_cells(row: &mut Vec<Cell>, s: &str, style: &CellStyle) {
126    for ch in s.chars() {
127        if ch == '\n' || ch == '\r' {
128            continue;
129        }
130        if ch == '\t' {
131            for _ in 0..SOFT_TAB_WIDTH {
132                row.push(Cell {
133                    ch: ' ',
134                    style: style.clone(),
135                    width: 1,
136                });
137            }
138            continue;
139        }
140        let w = unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1);
141        if w == 0 {
142            // Zero-width (combining marks, control chars). Caller has
143            // already scrubbed real controls; skip here rather than
144            // emit a phantom cell that diff can't align.
145            continue;
146        }
147        row.push(Cell {
148            ch,
149            style: style.clone(),
150            width: w as u8,
151        });
152        for _ in 1..w {
153            row.push(Cell::continuation());
154        }
155    }
156}
157
158/// Like [`push_str_cells`] but parses embedded SGR escape sequences
159/// (`\x1b[...m`) inline, mutating a working `CellStyle` so subsequent
160/// cells pick up the colour / bold / faint / reverse attributes the
161/// terminal would otherwise paint via raw ANSI. Returns the style
162/// state at end-of-input so a caller wrapping a single physical line
163/// into multiple chunks can carry attributes across chunk boundaries
164/// (e.g. `\x1b[31m` on one chunk and `\x1b[39m` on the next).
165///
166/// Why this exists: the retained renderer paints from a cell grid
167/// rather than streaming raw bytes to stdout, so SGR sequences that
168/// survive [`crate::sanitize::scrub_controls_keep_sgr`] would
169/// otherwise land as literal `^[[31m` characters in cells. This
170/// function is the cell-pipeline equivalent of alt-screen's
171/// `truncate_to_width_sgr_aware` — it understands SGR enough to
172/// translate it into `CellStyle` mutations on the way in.
173///
174/// Non-SGR CSI sequences (cursor moves, DSR, etc.) are silently
175/// dropped — they should have been scrubbed upstream; this is
176/// belt-and-suspenders.
177pub fn push_str_cells_sgr(
178    row: &mut Vec<Cell>,
179    s: &str,
180    mut working_style: CellStyle,
181) -> CellStyle {
182    let mut chars = s.chars().peekable();
183    while let Some(ch) = chars.next() {
184        if ch == '\x1b' {
185            if chars.peek() == Some(&'[') {
186                chars.next();
187                let mut params = String::new();
188                let mut final_byte: Option<char> = None;
189                while let Some(&p) = chars.peek() {
190                    chars.next();
191                    if ('\x40'..='\x7E').contains(&p) {
192                        final_byte = Some(p);
193                        break;
194                    }
195                    params.push(p);
196                }
197                if final_byte == Some('m') {
198                    apply_sgr_params(&params, &mut working_style);
199                }
200            }
201            continue;
202        }
203        if ch == '\n' || ch == '\r' {
204            continue;
205        }
206        if ch == '\t' {
207            for _ in 0..SOFT_TAB_WIDTH {
208                row.push(Cell {
209                    ch: ' ',
210                    style: working_style.clone(),
211                    width: 1,
212                });
213            }
214            continue;
215        }
216        let w = unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1);
217        if w == 0 {
218            continue;
219        }
220        row.push(Cell {
221            ch,
222            style: working_style.clone(),
223            width: w as u8,
224        });
225        for _ in 1..w {
226            row.push(Cell::continuation());
227        }
228    }
229    working_style
230}
231
232/// Parse a `;`-separated SGR parameter list and fold each recognised
233/// code into `style`. The crossterm colour variants chosen here mirror
234/// the SGR-to-name mapping that `serialize_row` uses to emit cells
235/// back to the terminal — so a row built from `\x1b[31m…` round-trips
236/// to `\x1b[31m…` on output, and the terminal's theme palette gets to
237/// pick the actual shade rather than us hard-coding RGB.
238///
239/// Unknown / unsupported codes (256-colour `38;5;N`, RGB `38;2;R;G;B`,
240/// background colours, italic, underline) are silently skipped —
241/// they're outside the cosmetic surface `CellStyle` currently
242/// represents, so picking up an LLM-emitted underline would just be
243/// lost on the retained path. Adding fields to `CellStyle` is the
244/// trigger for extending this match.
245fn apply_sgr_params(params: &str, style: &mut CellStyle) {
246    // `\x1b[m` (empty params) means "reset" — same as `\x1b[0m`.
247    if params.is_empty() {
248        *style = CellStyle::default();
249        return;
250    }
251    for code in params.split(';') {
252        // `\x1b[;31m` (leading empty) also resets before applying.
253        if code.is_empty() {
254            *style = CellStyle::default();
255            continue;
256        }
257        let Ok(n) = code.parse::<u16>() else { continue };
258        match n {
259            0 => *style = CellStyle::default(),
260            1 => style.bold = true,
261            2 => style.faint = true,
262            7 => style.reverse = true,
263            22 => {
264                // SGR 22 = normal intensity, clears BOTH bold and faint.
265                style.bold = false;
266                style.faint = false;
267            }
268            27 => style.reverse = false,
269            30 => style.fg = Some(Color::Black),
270            31 => style.fg = Some(Color::DarkRed),
271            32 => style.fg = Some(Color::DarkGreen),
272            33 => style.fg = Some(Color::DarkYellow),
273            34 => style.fg = Some(Color::DarkBlue),
274            35 => style.fg = Some(Color::DarkMagenta),
275            36 => style.fg = Some(Color::DarkCyan),
276            37 => style.fg = Some(Color::Grey),
277            39 => style.fg = None,
278            90 => style.fg = Some(Color::DarkGrey),
279            91 => style.fg = Some(Color::Red),
280            92 => style.fg = Some(Color::Green),
281            93 => style.fg = Some(Color::Yellow),
282            94 => style.fg = Some(Color::Blue),
283            95 => style.fg = Some(Color::Magenta),
284            96 => style.fg = Some(Color::Cyan),
285            97 => style.fg = Some(Color::White),
286            _ => {}
287        }
288    }
289}
290
291/// A single cell's worth of change: "put this cell at absolute position
292/// (row, col)". Multiple adjacent patches with the same style serialise
293/// into one cursor move + a run of characters, so small clusters stay
294/// cheap. Rows/cols are 1-indexed to match ANSI (`\x1b[row;col H`).
295#[derive(Debug, Clone, PartialEq, Eq)]
296pub struct Patch {
297    pub row: u16,
298    pub col: u16,
299    pub cell: Cell,
300}
301
302/// Slice-based cell-diff for the retained-mode `Screen` buffer.
303/// Both frames are `[Vec<Cell>]` indexed by **screen row** (0..H-1),
304/// with each inner `Vec<Cell>` indexed by **screen col** (0..W-1).
305/// Unlike `diff_cells`, this variant doesn't allocate a hashmap per
306/// frame — Screen always knows all its rows upfront, so a contiguous
307/// slice is both faster and maps 1:1 onto the 2D `cells[row][col]`
308/// access pattern.
309///
310/// Emits patches with **1-indexed** (row, col) matching ANSI cursor
311/// addressing. When one frame is shorter than the other (shouldn't
312/// happen in practice if both come from the same `Screen`, but keep
313/// the robustness for safety), missing rows / columns are treated as
314/// blank so the "other" frame's content generates explicit patches.
315pub fn diff_cell_frames(prev: &[Vec<Cell>], next: &[Vec<Cell>]) -> Vec<Patch> {
316    let mut patches = Vec::new();
317    let max_rows = prev.len().max(next.len());
318    let blank = Cell::blank();
319    for r in 0..max_rows {
320        let p = prev.get(r).map(Vec::as_slice).unwrap_or(&[]);
321        let n = next.get(r).map(Vec::as_slice).unwrap_or(&[]);
322        let max_cols = p.len().max(n.len());
323        for c in 0..max_cols {
324            let pc = p.get(c).unwrap_or(&blank);
325            let nc = n.get(c).unwrap_or(&blank);
326            if pc != nc {
327                patches.push(Patch {
328                    row: (r + 1) as u16,
329                    col: (c + 1) as u16,
330                    cell: nc.clone(),
331                });
332            }
333        }
334    }
335    patches
336}
337
338/// Serialise patches into ANSI bytes with an SGR state machine: emit
339/// cursor-position only when we're jumping, emit SGR only when the
340/// outgoing cell's style differs from the last one we set, and run-pack
341/// adjacent same-style patches into contiguous character streams.
342///
343/// Ends with `\x1b[0m` so the caller's subsequent writes (body text,
344/// cursor positioning, etc.) start from a clean SGR state — leaving a
345/// bold/reverse bit set across paint boundaries was a class of rare
346/// but hard-to-reproduce "random colour leak" bugs in the old path.
347pub fn serialize_patches(patches: &[Patch]) -> Vec<u8> {
348    if patches.is_empty() {
349        return Vec::new();
350    }
351
352    let mut out = Vec::with_capacity(patches.len() * 8);
353    let mut current_style: Option<CellStyle> = None;
354    let mut expected_cursor: Option<(u16, u16)> = None;
355    let mut emitted_any_sgr = false;
356
357    for patch in patches {
358        // Continuation cell: the wide glyph in the previous cell has
359        // already advanced the terminal cursor past this column. Emit
360        // nothing — writing here would clobber the wide glyph's right
361        // half *and* scramble our cursor model.
362        if patch.cell.width == 0 {
363            continue;
364        }
365
366        if expected_cursor != Some((patch.row, patch.col)) {
367            let _ = write!(out, "\x1b[{};{}H", patch.row, patch.col);
368            expected_cursor = Some((patch.row, patch.col));
369        }
370
371        if current_style.as_ref() != Some(&patch.cell.style) {
372            let before = out.len();
373            emit_sgr_transition(&mut out, current_style.as_ref(), &patch.cell.style);
374            if out.len() > before {
375                emitted_any_sgr = true;
376            }
377            current_style = Some(patch.cell.style.clone());
378        }
379
380        let mut buf = [0u8; 4];
381        let encoded = patch.cell.ch.encode_utf8(&mut buf);
382        out.extend_from_slice(encoded.as_bytes());
383
384        // Cursor advances by the glyph's display width. For narrow
385        // cells this is +1 (the common case), for wide cells (CJK,
386        // emoji) it's +2 — matching what the terminal actually does
387        // so the next patch's `expected_cursor` comparison is sound.
388        if let Some((r, c)) = expected_cursor {
389            expected_cursor = Some((r, c + patch.cell.width as u16));
390        }
391    }
392
393    // Final `\x1b[0m` only if we ever turned an attribute on — otherwise
394    // we'd leak a pointless reset into the stream every time the footer
395    // is pure-default-style (all-blank padding, plain rule without
396    // colour, etc.). The legacy `row_to_bytes` case exercises this in
397    // its tests.
398    if emitted_any_sgr {
399        out.extend_from_slice(b"\x1b[0m");
400    }
401
402    out
403}
404
405/// Serialise a single row of cells into ANSI bytes **without any cursor
406/// positioning**. Used by the scrollback-push path (write row to stdout
407/// at the current cursor, then let `\n` advance). Skips continuation
408/// cells; closes with `\x1b[0m` iff any SGR was emitted so subsequent
409/// writes start from a clean state.
410pub fn serialize_row(row: &[Cell]) -> Vec<u8> {
411    let mut out = Vec::with_capacity(row.len() * 4);
412    let mut current_style: Option<CellStyle> = None;
413    let mut emitted_any_sgr = false;
414    for cell in row {
415        if cell.width == 0 {
416            continue;
417        }
418        if current_style.as_ref() != Some(&cell.style) {
419            let before = out.len();
420            emit_sgr_transition(&mut out, current_style.as_ref(), &cell.style);
421            if out.len() > before {
422                emitted_any_sgr = true;
423            }
424            current_style = Some(cell.style.clone());
425        }
426        let mut buf = [0u8; 4];
427        let encoded = cell.ch.encode_utf8(&mut buf);
428        out.extend_from_slice(encoded.as_bytes());
429    }
430    if emitted_any_sgr {
431        out.extend_from_slice(b"\x1b[0m");
432    }
433    out
434}
435
436/// Emit the minimal SGR sequence to move from `from` style to `to` style.
437/// Uses reset-and-reapply whenever a "sticky" attribute (bold/reverse)
438/// needs clearing; per-attr toggles (`\x1b[22m` for bold off, `\x1b[27m`
439/// for reverse off) are respected by modern terminals but reset+reapply
440/// is shorter when multiple attributes change at once.
441fn emit_sgr_transition(out: &mut Vec<u8>, from: Option<&CellStyle>, to: &CellStyle) {
442    let from_default = CellStyle::default();
443    let from = from.unwrap_or(&from_default);
444
445    // Determine if any attribute is being turned OFF — if so, cheapest
446    // path is reset everything and reapply the ON set. If only
447    // additive, use targeted enables.
448    let bold_off = from.bold && !to.bold;
449    let reverse_off = from.reverse && !to.reverse;
450    // SGR 22 ("normal intensity") clears both bold AND faint — there is
451    // no per-attribute toggle for faint. So a faint→off transition
452    // always goes through full reset to avoid clobbering bold state.
453    let faint_off = from.faint && !to.faint;
454    let fg_change = from.fg != to.fg;
455
456    let needs_reset = bold_off
457        || reverse_off
458        || faint_off
459        || (from.fg.is_some() && to.fg.is_none());
460
461    if needs_reset {
462        out.extend_from_slice(b"\x1b[0m");
463        // After reset, nothing is on — apply `to`'s positive attrs.
464        if to.bold {
465            out.extend_from_slice(b"\x1b[1m");
466        }
467        if to.faint {
468            out.extend_from_slice(b"\x1b[2m");
469        }
470        if to.reverse {
471            out.extend_from_slice(b"\x1b[7m");
472        }
473        if let Some(c) = to.fg {
474            let _ = write!(out, "{}", SetForegroundColor(c));
475        }
476    } else {
477        // Additive path — current attributes stay, just flip on whatever
478        // `to` adds.
479        if !from.bold && to.bold {
480            out.extend_from_slice(b"\x1b[1m");
481        }
482        if !from.faint && to.faint {
483            out.extend_from_slice(b"\x1b[2m");
484        }
485        if !from.reverse && to.reverse {
486            out.extend_from_slice(b"\x1b[7m");
487        }
488        if fg_change {
489            if let Some(c) = to.fg {
490                let _ = write!(out, "{}", SetForegroundColor(c));
491            } else {
492                // Should have been caught by needs_reset, but defensive.
493                out.extend_from_slice(b"\x1b[39m");
494            }
495        }
496    }
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    fn cyan() -> Color {
504        Color::Cyan
505    }
506
507    fn style_bold_cyan() -> CellStyle {
508        CellStyle {
509            fg: Some(cyan()),
510            bold: true,
511            reverse: false,
512            faint: false,
513        }
514    }
515
516    #[test]
517    fn cell_equality_is_field_wise() {
518        let a = Cell {
519            ch: 'x',
520            style: style_bold_cyan(),
521            width: 1,
522        };
523        let b = Cell {
524            ch: 'x',
525            style: style_bold_cyan(),
526            width: 1,
527        };
528        assert_eq!(a, b);
529        let c = Cell {
530            ch: 'y',
531            style: style_bold_cyan(),
532            width: 1,
533        };
534        assert_ne!(a, c);
535    }
536
537    #[test]
538    fn push_str_cells_spreads_one_char_per_cell() {
539        let mut row = Vec::new();
540        push_str_cells(&mut row, "ab", &CellStyle::default());
541        assert_eq!(row.len(), 2);
542        assert_eq!(row[0].ch, 'a');
543        assert_eq!(row[1].ch, 'b');
544    }
545
546    /// Uses 0-indexed slice input, produces 1-indexed (row, col)
547    /// patches matching ANSI cursor addressing convention.
548    #[test]
549    fn diff_cell_frames_produces_one_indexed_coords() {
550        let row: Vec<Cell> = "ab"
551            .chars()
552            .map(|ch| Cell {
553                ch,
554                style: Default::default(),
555                width: 1,
556            })
557            .collect();
558        let mut changed = row.clone();
559        changed[0].ch = 'X';
560        let prev = vec![row.clone()];
561        let next = vec![changed];
562        let patches = diff_cell_frames(&prev, &next);
563        assert_eq!(patches.len(), 1);
564        assert_eq!(patches[0].row, 1, "slice row 0 -> ANSI row 1");
565        assert_eq!(patches[0].col, 1, "slice col 0 -> ANSI col 1");
566        assert_eq!(patches[0].cell.ch, 'X');
567    }
568
569    /// Empty frames produce zero patches.
570    #[test]
571    fn diff_cell_frames_empty_frames() {
572        let patches = diff_cell_frames(&[], &[]);
573        assert!(patches.is_empty());
574    }
575
576    #[test]
577    fn diff_shorter_next_emits_blanks_for_trailing() {
578        // prev has 5 cells, next has 2 — the 3 tail cells in prev need
579        // blanking patches so leftover glyphs get overwritten.
580        let prev_row: Vec<Cell> = "hello"
581            .chars()
582            .map(|ch| Cell {
583                ch,
584                style: Default::default(),
585                width: 1,
586            })
587            .collect();
588        let next_row: Vec<Cell> = "he"
589            .chars()
590            .map(|ch| Cell {
591                ch,
592                style: Default::default(),
593                width: 1,
594            })
595            .collect();
596        let prev = vec![prev_row];
597        let next = vec![next_row];
598        let patches = diff_cell_frames(&prev, &next);
599        assert_eq!(patches.len(), 3);
600        for p in &patches {
601            assert_eq!(p.cell, Cell::blank());
602        }
603    }
604
605    #[test]
606    fn serialize_empty_patches_emits_nothing() {
607        assert!(serialize_patches(&[]).is_empty());
608    }
609
610    #[test]
611    fn serialize_single_patch_emits_cursor_plus_char() {
612        let p = Patch {
613            row: 10,
614            col: 5,
615            cell: Cell {
616                ch: 'x',
617                style: Default::default(),
618                width: 1,
619            },
620        };
621        let bytes = serialize_patches(std::slice::from_ref(&p));
622        let s = String::from_utf8(bytes).unwrap();
623        assert!(s.contains("\x1b[10;5H"));
624        assert!(s.contains('x'));
625        // Default-style cell → no SGR was turned on, so no trailing
626        // \x1b[0m is needed (would be a wasted 4 bytes per emit).
627        assert!(!s.contains("\x1b[0m"));
628    }
629
630    #[test]
631    fn serialize_final_reset_on_styled_patches() {
632        // When a patch carries a non-default style, the emit path MUST
633        // close with \x1b[0m so subsequent writes start clean.
634        let p = Patch {
635            row: 1,
636            col: 1,
637            cell: Cell {
638                ch: 'x',
639                style: style_bold_cyan(),
640                width: 1,
641            },
642        };
643        let bytes = serialize_patches(std::slice::from_ref(&p));
644        let s = String::from_utf8(bytes).unwrap();
645        assert!(s.ends_with("\x1b[0m"));
646    }
647
648    #[test]
649    fn serialize_adjacent_cells_skip_cursor_move() {
650        // Two patches at (5, 1) and (5, 2) with same default style —
651        // second should NOT emit a cursor move (cursor auto-advanced)
652        // AND no final reset (default style, no SGR on).
653        let p1 = Patch {
654            row: 5,
655            col: 1,
656            cell: Cell {
657                ch: 'a',
658                style: Default::default(),
659                width: 1,
660            },
661        };
662        let p2 = Patch {
663            row: 5,
664            col: 2,
665            cell: Cell {
666                ch: 'b',
667                style: Default::default(),
668                width: 1,
669            },
670        };
671        let bytes = serialize_patches(&[p1, p2]);
672        let s = String::from_utf8(bytes).unwrap();
673        // Exactly one CSI: `\x1b[5;1H`. No SGR, no final reset.
674        assert_eq!(s.matches("\x1b[").count(), 1);
675    }
676
677    #[test]
678    fn serialize_style_change_only_emits_sgr_once() {
679        // Two patches at (5,1) and (5,2), second changes to bold —
680        // should emit one SGR transition, not two.
681        let p1 = Patch {
682            row: 5,
683            col: 1,
684            cell: Cell {
685                ch: 'a',
686                style: Default::default(),
687                width: 1,
688            },
689        };
690        let p2 = Patch {
691            row: 5,
692            col: 2,
693            cell: Cell {
694                ch: 'b',
695                style: CellStyle {
696                    fg: None,
697                    bold: true,
698                    reverse: false,
699                    faint: false,
700                },
701                width: 1,
702            },
703        };
704        let bytes = serialize_patches(&[p1, p2]);
705        let s = String::from_utf8(bytes).unwrap();
706        assert!(s.contains("\x1b[1m"), "expected bold SGR, got: {:?}", s);
707    }
708
709    /// Faint cells emit SGR 2 — theme-aware muting for hint/status text.
710    /// Final reset must close the run because faint is "sticky" until
711    /// SGR 22 / SGR 0 clears it.
712    #[test]
713    fn serialize_faint_emits_sgr_two_and_final_reset() {
714        let p = Patch {
715            row: 1,
716            col: 1,
717            cell: Cell {
718                ch: 'h',
719                style: CellStyle {
720                    fg: None,
721                    bold: false,
722                    reverse: false,
723                    faint: true,
724                },
725                width: 1,
726            },
727        };
728        let bytes = serialize_patches(std::slice::from_ref(&p));
729        let s = String::from_utf8(bytes).unwrap();
730        assert!(s.contains("\x1b[2m"), "expected faint SGR, got: {:?}", s);
731        assert!(s.ends_with("\x1b[0m"), "faint cell must close with reset");
732    }
733
734    /// Faint→non-faint transition routes through full reset (SGR 0)
735    /// rather than per-attribute toggle, because SGR 22 ("normal
736    /// intensity") would also clobber bold if present. Reset path is
737    /// what `emit_sgr_transition` already uses for bold-off and
738    /// reverse-off — extending to faint-off keeps the invariant.
739    #[test]
740    fn serialize_faint_off_goes_through_reset() {
741        let faint = Patch {
742            row: 1,
743            col: 1,
744            cell: Cell {
745                ch: 'a',
746                style: CellStyle {
747                    fg: None,
748                    bold: false,
749                    reverse: false,
750                    faint: true,
751                },
752                width: 1,
753            },
754        };
755        let plain = Patch {
756            row: 1,
757            col: 2,
758            cell: Cell {
759                ch: 'b',
760                style: CellStyle::default(),
761                width: 1,
762            },
763        };
764        let bytes = serialize_patches(&[faint, plain]);
765        let s = String::from_utf8(bytes).unwrap();
766        // \x1b[2m for faint cell → \x1b[0m before the plain cell.
767        assert!(s.contains("\x1b[2m"));
768        // The plain cell must be preceded by a reset, not just \x1b[22m.
769        let reset_idx = s
770            .match_indices("\x1b[0m")
771            .map(|(i, _)| i)
772            .find(|&i| i < s.find('b').unwrap())
773            .expect("expected mid-stream reset before plain cell");
774        let _ = reset_idx;
775    }
776
777    // ── Unicode edge-case regression tests ─────────────────────────
778    //
779    // These pin down how `push_str_cells` / `diff_cells` / `serialize_patches`
780    // treat "tricky" Unicode input. For each scenario we either:
781    //   (a) assert the behaviour we DO support, or
782    //   (b) document a known limitation with `#[ignore]` + a doc
783    //       comment explaining what actually happens + why we accept
784    //       it for now.
785    //
786    // Run only this group: `cargo test -p atomcode-tuix --lib unicode_`
787    // ──────────────────────────────────────────────────────────────
788
789    /// Baseline: CJK ideographs expand correctly (1 real cell + 1
790    /// continuation per char). Covers "你是谁" → 6 cells total,
791    /// model col matching terminal col exactly.
792    #[test]
793    fn unicode_cjk_ideograph_expands_to_two_cells() {
794        let mut row = Vec::new();
795        push_str_cells(&mut row, "你是谁", &CellStyle::default());
796        assert_eq!(row.len(), 6, "3 CJK chars × (1 real + 1 cont) = 6 cells");
797
798        // Real cells carry the glyph + width 2.
799        assert_eq!(row[0].ch, '你');
800        assert_eq!(row[0].width, 2);
801        assert_eq!(row[2].ch, '是');
802        assert_eq!(row[2].width, 2);
803        assert_eq!(row[4].ch, '谁');
804        assert_eq!(row[4].width, 2);
805
806        // Continuation cells: width 0, glyph = space (harmless if
807        // something ever did try to serialise them).
808        for i in [1, 3, 5] {
809            assert_eq!(row[i].width, 0, "cell {} should be continuation", i);
810        }
811    }
812
813    /// Emoji like 😀 are Unicode "wide" (East Asian Width Wide/Full).
814    /// Single code-point emoji should behave like CJK — 1 real + 1
815    /// continuation.
816    #[test]
817    fn unicode_single_codepoint_emoji_expands_to_two_cells() {
818        let mut row = Vec::new();
819        push_str_cells(&mut row, "😀", &CellStyle::default());
820        assert_eq!(row.len(), 2);
821        assert_eq!(row[0].ch, '😀');
822        assert_eq!(row[0].width, 2);
823        assert_eq!(row[1].width, 0);
824    }
825
826    /// **Known limitation**: ZWJ-sequence emoji (family, profession,
827    /// flag-of-england-style tag sequences, skin-tone modifiers) are
828    /// composed of multiple Unicode code points joined by U+200D (ZWJ)
829    /// or other joiners. Each code point gets its own `unicode_width`
830    /// lookup and our model treats them independently.
831    ///
832    /// Example: "👨‍👩‍👧" (man + ZWJ + woman + ZWJ + girl)
833    ///   - Terminal displays: 1 glyph occupying 2 columns
834    ///   - `unicode_width` per codepoint: 2 + 0 + 2 + 0 + 2 = 6
835    ///   - Our model: 3 real cells (w=2 each) + 2 skipped (w=0 ZWJ) = 3 real wide glyph cells
836    ///   - Total real cells: 3 with width 2 = 6 terminal cols claimed
837    ///
838    /// If the terminal actually renders the ZWJ sequence as a single
839    /// 2-col glyph, our cursor advances 6 while terminal advances 2,
840    /// drift = 4. Next character lands 4 cols too far right.
841    ///
842    /// This test pins down the **current** behaviour so we notice if
843    /// we ever silently change it; it doesn't prescribe what's "right"
844    /// because the fix requires a grapheme segmenter (unicode-segmentation
845    /// crate) that we're not bringing in yet.
846    #[test]
847    fn unicode_zwj_sequence_is_not_grapheme_aware_known_limitation() {
848        let mut row = Vec::new();
849        // family emoji: man + ZWJ + woman + ZWJ + girl
850        push_str_cells(&mut row, "👨\u{200D}👩\u{200D}👧", &CellStyle::default());
851        // 3 wide base chars → 3 real + 3 continuation = 6
852        // 2 ZWJ (w=0) → skipped
853        // Real cells + continuations only.
854        let real_cells = row.iter().filter(|c| c.width > 0).count();
855        let cont_cells = row.iter().filter(|c| c.width == 0).count();
856        eprintln!(
857            "[UNICODE DIAG] ZWJ family: real={} cont={} total={} (terminal would show 1 glyph = 2 cols)",
858            real_cells, cont_cells, row.len()
859        );
860        // Exact counts: 3 real + 3 continuation (ZWJ are width-0, skipped
861        // by push_str_cells' early `continue`).
862        assert_eq!(real_cells, 3);
863        assert_eq!(cont_cells, 3);
864        assert_eq!(row.len(), 6);
865        // → Known drift: model says 6 cols occupied, terminal shows 2.
866    }
867
868    /// **Known limitation**: skin-tone modifiers (👍🏽) — base emoji
869    /// U+1F44D followed by Fitzpatrick modifier U+1F3FD. Terminal
870    /// typically renders as one 2-col glyph.
871    ///
872    /// Our model:
873    ///   - base: width 2 → 1 real + 1 cont
874    ///   - modifier: `unicode_width` returns 2 (wide) → 1 real + 1 cont
875    ///   - total: 4 cells, model claims 4 cols; terminal uses 2.
876    ///
877    /// Drift 2 per skin-toned emoji. Same grapheme-segmenter fix.
878    #[test]
879    fn unicode_skin_tone_modifier_not_segmented_known_limitation() {
880        let mut row = Vec::new();
881        push_str_cells(&mut row, "👍🏽", &CellStyle::default());
882        let real_cells = row.iter().filter(|c| c.width > 0).count();
883        eprintln!(
884            "[UNICODE DIAG] skin-tone emoji: real={} cells total={}",
885            real_cells,
886            row.len()
887        );
888        // 2 real cells, each advancing cursor by 2 = 4 cols model.
889        // Terminal advances 2. Drift = 2.
890        assert_eq!(real_cells, 2);
891    }
892
893    /// Ambiguous-width chars (East Asian Ambiguous class): `§`, `±`,
894    /// `¶`, Greek letters, box-drawing. `unicode-width` crate defaults
895    /// these to **1** (narrow); CJK-locale terminals may render them
896    /// at **2**. Our model → 1-col advance.
897    ///
898    /// If your Mac Terminal is set to a CJK language, these chars
899    /// will drift left by 1 col per occurrence. Changing `unicode-width`
900    /// to ambiguous-as-wide mode is a per-locale decision we don't
901    /// attempt; we pin the default behaviour here.
902    #[test]
903    fn unicode_ambiguous_width_defaults_narrow() {
904        let mut row = Vec::new();
905        push_str_cells(&mut row, "§±¶", &CellStyle::default());
906        // Each char → 1 cell with width 1 (no continuation).
907        assert_eq!(row.len(), 3);
908        for (i, ch) in "§±¶".chars().enumerate() {
909            assert_eq!(row[i].ch, ch);
910            assert_eq!(row[i].width, 1);
911        }
912    }
913
914    /// Combining marks (NFD form): "e + combining acute" = "e\u{301}"
915    /// displays as "é" (1 col total), but is **2 code points**. Our
916    /// current impl: the combining char has `unicode_width = 0` and
917    /// gets dropped by `push_str_cells`' early-continue — the base 'e'
918    /// survives unadorned.
919    ///
920    /// Effect: an NFD-form string in input loses its accent on screen.
921    /// NFC-form ("é" U+00E9, single codepoint) renders fine.
922    ///
923    /// For CJK-dominant usage this is acceptable. Fixing requires
924    /// either (a) NFC-normalising input before push, or (b) attaching
925    /// combining marks to the preceding cell's char. Neither is in
926    /// scope for Ink Phase 1.
927    #[test]
928    fn unicode_nfd_combining_mark_is_dropped_known_limitation() {
929        let mut row = Vec::new();
930        push_str_cells(&mut row, "e\u{301}", &CellStyle::default());
931        // 'e' kept, combining acute dropped.
932        assert_eq!(row.len(), 1, "combining mark dropped by width=0 guard");
933        assert_eq!(row[0].ch, 'e');
934        assert_eq!(row[0].width, 1);
935    }
936
937    /// NFC precomposed accented chars render correctly as narrow cells.
938    /// Contrast with `unicode_nfd_combining_mark_is_dropped` above.
939    #[test]
940    fn unicode_nfc_precomposed_accent_narrow_cell() {
941        let mut row = Vec::new();
942        push_str_cells(&mut row, "café", &CellStyle::default());
943        assert_eq!(row.len(), 4);
944        for (i, ch) in "café".chars().enumerate() {
945            assert_eq!(row[i].ch, ch);
946            assert_eq!(row[i].width, 1);
947        }
948    }
949
950    /// Zero-width space (U+200B) and BOM (U+FEFF) are width-0 and
951    /// legitimately get dropped — they carry no glyph. This verifies
952    /// the width=0 guard does the right thing rather than accidentally
953    /// inserting a phantom cell that would misalign diff col numbers.
954    #[test]
955    fn unicode_zero_width_invisibles_dropped() {
956        let mut row = Vec::new();
957        push_str_cells(&mut row, "a\u{200B}b\u{FEFF}c", &CellStyle::default());
958        // a, b, c — invisibles silently dropped.
959        assert_eq!(row.len(), 3);
960        assert_eq!(row[0].ch, 'a');
961        assert_eq!(row[1].ch, 'b');
962        assert_eq!(row[2].ch, 'c');
963    }
964
965    /// Mixed-width input: the `cell_index == terminal_column`
966    /// invariant holds across narrow + wide alternation.
967    #[test]
968    fn unicode_mixed_width_cell_indices_match_terminal_cols() {
969        let mut row = Vec::new();
970        push_str_cells(&mut row, "a你b", &CellStyle::default());
971        // Expect: 'a'(w=1) + '你'(w=2) + cont(w=0) + 'b'(w=1) = 4 cells
972        assert_eq!(row.len(), 4);
973        // Verify terminal-col calculation: cell i represents col i+1.
974        // If cursor_advance = cell.width summed, total = 1+2+0+1 = 4,
975        // matching terminal "a你b" = 1+2+1 = 4 cols.
976        let total_advance: u16 = row.iter().map(|c| c.width as u16).sum();
977        assert_eq!(total_advance, 4);
978    }
979
980    /// Diff + serialize round-trip with wide chars — a narrow→wide
981    /// transition at the same cell position must emit the wide glyph
982    /// AND leave the continuation position "covered" so subsequent
983    /// writes don't overwrite the glyph's right half.
984    #[test]
985    fn unicode_diff_narrow_to_wide_at_same_position() {
986        let prev_row: Vec<Cell> = vec![
987            Cell {
988                ch: 'a',
989                style: CellStyle::default(),
990                width: 1,
991            },
992            Cell {
993                ch: 'b',
994                style: CellStyle::default(),
995                width: 1,
996            },
997        ];
998        let next_row: Vec<Cell> = vec![
999            Cell {
1000                ch: '你',
1001                style: CellStyle::default(),
1002                width: 2,
1003            },
1004            Cell::continuation(),
1005        ];
1006        // Row 9 in the slice (index) → ANSI row 10 in the patch.
1007        let prev: Vec<Vec<Cell>> = (0..9)
1008            .map(|_| Vec::new())
1009            .chain(std::iter::once(prev_row))
1010            .collect();
1011        let next: Vec<Vec<Cell>> = (0..9)
1012            .map(|_| Vec::new())
1013            .chain(std::iter::once(next_row))
1014            .collect();
1015
1016        let patches = diff_cell_frames(&prev, &next);
1017        assert_eq!(patches.len(), 2, "both cols changed");
1018
1019        let bytes = serialize_patches(&patches);
1020        let s = String::from_utf8(bytes).unwrap();
1021        assert!(s.contains('你'));
1022        // Exactly one cursor-position CSI (the initial move).
1023        assert_eq!(
1024            s.matches("\x1b[10;").count(),
1025            1,
1026            "continuation must not trigger a cursor move: {:?}",
1027            s
1028        );
1029    }
1030
1031    /// Reverse: wide→narrow at same cell index. The wide char's
1032    /// right-half column needs an explicit blank patch to erase the
1033    /// stale glyph half left over after overwriting the left half.
1034    #[test]
1035    fn unicode_diff_wide_to_narrow_erases_right_half() {
1036        let prev_row: Vec<Cell> = vec![
1037            Cell {
1038                ch: '你',
1039                style: CellStyle::default(),
1040                width: 2,
1041            },
1042            Cell::continuation(),
1043        ];
1044        let next_row: Vec<Cell> = vec![
1045            Cell {
1046                ch: 'a',
1047                style: CellStyle::default(),
1048                width: 1,
1049            },
1050            Cell {
1051                ch: 'b',
1052                style: CellStyle::default(),
1053                width: 1,
1054            },
1055        ];
1056        let prev: Vec<Vec<Cell>> = (0..4)
1057            .map(|_| Vec::new())
1058            .chain(std::iter::once(prev_row))
1059            .collect();
1060        let next: Vec<Vec<Cell>> = (0..4)
1061            .map(|_| Vec::new())
1062            .chain(std::iter::once(next_row))
1063            .collect();
1064
1065        let patches = diff_cell_frames(&prev, &next);
1066        assert_eq!(patches.len(), 2);
1067        assert_eq!(patches[0].col, 1);
1068        assert_eq!(patches[0].cell.ch, 'a');
1069        assert_eq!(patches[1].col, 2);
1070        assert_eq!(patches[1].cell.ch, 'b');
1071
1072        let bytes = serialize_patches(&patches);
1073        let s = String::from_utf8(bytes).unwrap();
1074        assert!(s.contains('a'));
1075        assert!(s.contains('b'));
1076    }
1077}