rab/tui/
util.rs

1use unicode_segmentation::UnicodeSegmentation;
2use unicode_width::UnicodeWidthChar;
3
4/// Regex pattern matching CJK characters for word-wrapping breaks.
5/// Matches pi's `cjkBreakRegex` script extension pattern.
6pub const CJK_BREAK_REGEX: &str = r"[\p{Script_Extensions=Han}\p{Script_Extensions=Hiragana}\p{Script_Extensions=Katakana}\p{Script_Extensions=Hangul}\p{Script_Extensions=Bopomofo}]";
7
8/// Calculate the visible width of a string in terminal columns.
9/// Strips ANSI escape codes and counts grapheme cluster widths.
10/// Uses a thread-local LRU cache for non-ASCII strings (matching pi).
11pub fn visible_width(str: &str) -> usize {
12    if str.is_empty() {
13        return 0;
14    }
15
16    // Fast path: pure ASCII printable
17    if is_printable_ascii(str) {
18        return str.len();
19    }
20
21    // Use cache for non-ASCII
22    WIDTH_CACHE.with(|cache| {
23        let mut cache = cache.borrow_mut();
24        if let Some(&w) = cache.get(str) {
25            return w;
26        }
27        let w = compute_visible_width_inner(str);
28        if cache.len() >= WIDTH_CACHE_SIZE {
29            cache.clear();
30        }
31        cache.insert(str.to_string(), w);
32        w
33    })
34}
35
36/// Check if a string consists entirely of printable ASCII characters (0x20-0x7E).
37fn is_printable_ascii(str: &str) -> bool {
38    str.bytes().all(|b| (0x20..=0x7e).contains(&b))
39}
40
41/// Calculate the terminal width of a single grapheme cluster.
42fn grapheme_width(grapheme: &str) -> usize {
43    if grapheme == "\t" {
44        return 3;
45    }
46
47    // Check for zero-width and combining characters
48    let first_char = grapheme.chars().next();
49    if let Some(c) = first_char {
50        // Zero-width characters
51        if is_zero_width_char(c) {
52            return 0;
53        }
54
55        // Emoji width (most emoji are width 2)
56        if could_be_emoji(grapheme) {
57            return 2;
58        }
59
60        // Regional indicator symbols (U+1F1E6..U+1F1FF) are often wide
61        let _cp = c as u32;
62        if (0x1f1e6..=0x1f1ff).contains(&(c as u32)) {
63            return 2;
64        }
65
66        // Use unicode-width for standard characters
67        if let Some(w) = c.width()
68            && w > 0
69        {
70            return w;
71        }
72
73        // Check trailing characters for halfwidth/fullwidth forms
74        let mut w = 0;
75        for ch in grapheme.chars() {
76            if (0xff00..=0xffef).contains(&(ch as u32)) {
77                w += 2;
78            } else if ch as u32 == 0x0e33 || ch as u32 == 0x0eb3 {
79                w += 1;
80            }
81        }
82        if w > 0 {
83            return w;
84        }
85
86        return 2; // Default wide for unknown
87    }
88    0
89}
90
91/// Fast heuristic to check if a grapheme could be emoji.
92fn could_be_emoji(grapheme: &str) -> bool {
93    let first_cp = grapheme.chars().next().map(|c| c as u32).unwrap_or(0);
94    ((0x1f000..=0x1fbff).contains(&first_cp))
95        || ((0x2300..=0x23ff).contains(&first_cp))
96        || ((0x2600..=0x27bf).contains(&first_cp))
97        || ((0x2b50..=0x2b55).contains(&first_cp))
98        || grapheme.contains('\u{FE0F}') // VS16 emoji presentation selector
99        || grapheme.chars().count() > 2 // ZWJ sequences, skin tones
100}
101
102/// Check if a character is zero-width (combining marks, control chars, etc.).
103fn is_zero_width_char(c: char) -> bool {
104    let _cp = c as u32;
105    matches!(
106        c,
107        '\u{200B}'..='\u{200F}' | // Zero-width space, etc.
108        '\u{2028}'..='\u{2029}' | // Line/paragraph separator
109        '\u{202A}'..='\u{202E}' | // Bidi control
110        '\u{2060}'..='\u{2064}' | // Word joiner, etc.
111        '\u{FEFF}'                 // BOM / ZWNBS
112    ) || c.is_control()
113        || (unicode_width::UnicodeWidthChar::width(c) == Some(0))
114}
115
116/// Extract an ANSI escape sequence from a string at the given byte position.
117/// Returns the code string and its byte length, or None if not an ANSI sequence.
118fn extract_ansi_code_at(str: &str, pos: usize) -> Option<&str> {
119    let bytes = str.as_bytes();
120    if pos >= bytes.len() || bytes[pos] != 0x1b {
121        return None;
122    }
123
124    let next = bytes.get(pos + 1).copied();
125
126    // CSI sequence: ESC [ ... (0x40-0x7E)
127    if next == Some(b'[') {
128        let mut j = pos + 2;
129        while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
130            j += 1;
131        }
132        if j < bytes.len() {
133            return Some(&str[pos..=j]);
134        }
135        return None;
136    }
137
138    // OSC sequence: ESC ] ... BEL or ESC ] ... ST (ESC \)
139    if next == Some(b']') {
140        let mut j = pos + 2;
141        while j < bytes.len() {
142            if bytes[j] == 0x07 {
143                return Some(&str[pos..=j]);
144            }
145            if bytes[j] == 0x1b && bytes.get(j + 1) == Some(&b'\\') {
146                return Some(&str[pos..=j + 1]);
147            }
148            j += 1;
149        }
150        return None;
151    }
152
153    // APC sequence: ESC _ ... BEL or ESC _ ... ST (ESC \)
154    if next == Some(b'_') {
155        let mut j = pos + 2;
156        while j < bytes.len() {
157            if bytes[j] == 0x07 {
158                return Some(&str[pos..=j]);
159            }
160            if bytes[j] == 0x1b && bytes.get(j + 1) == Some(&b'\\') {
161                return Some(&str[pos..=j + 1]);
162            }
163            j += 1;
164        }
165        return None;
166    }
167
168    None
169}
170
171/// Truncate text to fit within a maximum visible width, adding ellipsis if needed.
172/// Optionally pad with spaces to reach exactly max_width.
173///
174/// Properly handles ANSI escape codes (they don't count toward width).
175pub fn truncate_to_width(text: &str, max_width: usize, ellipsis: &str, pad: bool) -> String {
176    if max_width == 0 {
177        return String::new();
178    }
179
180    if text.is_empty() {
181        return if pad {
182            " ".repeat(max_width)
183        } else {
184            String::new()
185        };
186    }
187
188    let text_width = visible_width(text);
189    let ellipsis_width = visible_width(ellipsis);
190
191    // Text already fits
192    if text_width <= max_width {
193        return if pad {
194            let mut result = text.to_string();
195            result.push_str(&" ".repeat(max_width - text_width));
196            result
197        } else {
198            text.to_string()
199        };
200    }
201
202    // Ellipsis is wider than available space
203    if ellipsis_width >= max_width {
204        return if pad {
205            " ".repeat(max_width)
206        } else {
207            String::new()
208        };
209    }
210
211    let target_width = max_width - ellipsis_width;
212
213    // Simple ASCII fast path
214    if is_printable_ascii(text) {
215        let prefix = &text[..target_width.min(text.len())];
216        let mut result = String::with_capacity(max_width + 20);
217        result.push_str(prefix);
218        result.push_str("\x1b[0m");
219        result.push_str(ellipsis);
220        result.push_str("\x1b[0m");
221        if pad {
222            let visible = target_width.min(text.len()) + ellipsis_width;
223            if visible < max_width {
224                result.push_str(&" ".repeat(max_width - visible));
225            }
226        }
227        return result;
228    }
229
230    // General: grapheme-by-grapheme truncation
231    let mut kept = String::new();
232    let mut kept_width: usize = 0;
233    let mut pending_ansi = String::new();
234    let mut i = 0;
235    let bytes = text.as_bytes();
236
237    while i < bytes.len() {
238        if bytes[i] == 0x1b
239            && let Some(ansi) = extract_ansi_code_at(text, i)
240        {
241            pending_ansi.push_str(ansi);
242            i += ansi.len();
243            continue;
244        }
245
246        // Get the grapheme at this position
247        let rest = &text[i..];
248        let mut _grapheme_end = i;
249        for g in rest.graphemes(true) {
250            _grapheme_end += g.len();
251            let g_width = grapheme_width(g);
252
253            if kept_width + g_width <= target_width {
254                if !pending_ansi.is_empty() {
255                    kept.push_str(&pending_ansi);
256                    pending_ansi.clear();
257                }
258                kept.push_str(g);
259                kept_width += g_width;
260            } else {
261                // Overflow - stop
262                break;
263            }
264        }
265        break;
266    }
267
268    let mut result = String::new();
269    result.push_str(&kept);
270    result.push_str("\x1b[0m");
271    result.push_str(ellipsis);
272    result.push_str("\x1b[0m");
273    if pad {
274        let visible = kept_width + ellipsis_width;
275        if visible < max_width {
276            result.push_str(&" ".repeat(max_width - visible));
277        }
278    }
279    result
280}
281
282/// Word-wrap text preserving ANSI escape codes.
283/// Returns lines where each line is <= width visible chars.
284pub fn wrap_text_with_ansi(text: &str, width: usize) -> Vec<String> {
285    if text.is_empty() {
286        return vec![String::new()];
287    }
288
289    // Handle newlines by processing each line separately
290    let mut result: Vec<String> = Vec::new();
291    let mut active_codes = String::new();
292
293    for (line_idx, input_line) in text.split('\n').enumerate() {
294        let prefix = if line_idx > 0 {
295            active_codes.clone()
296        } else {
297            String::new()
298        };
299        let wrapped = wrap_single_line(&format!("{}{}", prefix, input_line), width);
300        for line in wrapped {
301            result.push(line);
302        }
303        // Update active codes for next line
304        update_tracker_from_text(input_line, &mut active_codes);
305    }
306
307    if result.is_empty() {
308        vec![String::new()]
309    } else {
310        result
311    }
312}
313
314fn wrap_single_line(line: &str, width: usize) -> Vec<String> {
315    if line.is_empty() {
316        return vec![String::new()];
317    }
318
319    let visible = visible_width(line);
320    if visible <= width {
321        return vec![line.to_string()];
322    }
323
324    // Split line into tokens (words separated by spaces, plus CJK breaks)
325    let tokens = split_into_tokens(line);
326    let mut wrapped: Vec<String> = Vec::new();
327    let mut current_line = String::new();
328    let mut current_width: usize = 0;
329    let mut tracker = AnsiState::new();
330
331    for token in &tokens {
332        let token_width = visible_width(token);
333        let is_space = token.trim().is_empty();
334
335        // Token is wider than available width - break it character by character
336        if token_width > width && !is_space {
337            if !current_line.is_empty() {
338                let line_end = tracker.line_end_reset();
339                if !line_end.is_empty() {
340                    current_line.push_str(&line_end);
341                }
342                wrapped.push(current_line);
343                current_line = String::new();
344                current_width = 0;
345            }
346
347            let broken = break_long_word(token, width, &mut tracker);
348            let last = broken.len().saturating_sub(1);
349            for (i, line) in broken.iter().enumerate() {
350                if i < last {
351                    wrapped.push(line.clone());
352                } else {
353                    current_line = line.clone();
354                    current_width = visible_width(line);
355                }
356            }
357            continue;
358        }
359
360        let total = current_width + token_width;
361        if total > width && current_width > 0 {
362            let mut line_to_wrap = current_line.trim_end().to_string();
363            let line_end = tracker.line_end_reset();
364            if !line_end.is_empty() {
365                line_to_wrap.push_str(&line_end);
366            }
367            wrapped.push(line_to_wrap);
368            if is_space {
369                current_line = tracker.active_codes();
370                current_width = 0;
371            } else {
372                let codes = tracker.active_codes();
373                current_line = format!("{}{}", codes, token);
374                current_width = token_width;
375            }
376        } else {
377            current_line.push_str(token);
378            current_width += token_width;
379        }
380
381        tracker.update(token);
382    }
383
384    if !current_line.is_empty() {
385        wrapped.push(current_line.trim_end().to_string());
386    }
387
388    if wrapped.is_empty() {
389        vec![String::new()]
390    } else {
391        wrapped
392    }
393}
394
395/// Split text into tokens for word wrapping.
396/// Keeps ANSI codes attached to adjacent visible content.
397fn split_into_tokens(text: &str) -> Vec<String> {
398    let mut tokens: Vec<String> = Vec::new();
399    let mut current = String::new();
400    let mut pending_ansi = String::new();
401    let mut current_is_space: Option<bool> = None;
402    let mut i = 0;
403    let bytes = text.as_bytes();
404
405    while i < bytes.len() {
406        if bytes[i] == 0x1b
407            && let Some(ansi) = extract_ansi_code_at(text, i)
408        {
409            pending_ansi.push_str(ansi);
410            i += ansi.len();
411            continue;
412        }
413
414        // Find end of non-ANSI run
415        let mut end = i;
416        while end < bytes.len() && bytes[end] != 0x1b {
417            end += 1;
418        }
419
420        let segment_str = &text[i..end];
421        let mut seg_pos = 0;
422        while seg_pos < segment_str.len() {
423            // Check for paste marker start — treat as single atomic token
424            if segment_str[seg_pos..].starts_with("[paste #") {
425                if !current.is_empty() {
426                    tokens.push(std::mem::take(&mut current));
427                    current_is_space = None;
428                }
429                if let Some(end) = segment_str[seg_pos..].find(']') {
430                    let marker = &segment_str[seg_pos..=seg_pos + end];
431                    let token = format!("{}{}", pending_ansi, marker);
432                    pending_ansi.clear();
433                    tokens.push(token);
434                    seg_pos += end + 1;
435                    continue;
436                }
437            }
438
439            // Get the next grapheme
440            let grapheme = if let Some(g) = segment_str[seg_pos..].graphemes(true).next() {
441                g
442            } else {
443                break;
444            };
445            let g_len = grapheme.len();
446            let is_space = grapheme == " ";
447
448            // CJK characters get their own token
449            if !is_space && is_cjk_break(grapheme) {
450                if !current.is_empty() {
451                    tokens.push(std::mem::take(&mut current));
452                    current_is_space = None;
453                }
454                let token = format!("{}{}", pending_ansi, grapheme);
455                pending_ansi.clear();
456                tokens.push(token);
457                seg_pos += g_len;
458                continue;
459            }
460
461            let segment_is_space = is_space;
462            if current_is_space.is_some_and(|s| s != segment_is_space) && !current.is_empty() {
463                tokens.push(std::mem::take(&mut current));
464            }
465
466            if !pending_ansi.is_empty() {
467                current.push_str(&pending_ansi);
468                pending_ansi.clear();
469            }
470
471            current_is_space = Some(segment_is_space);
472            current.push_str(grapheme);
473            seg_pos += g_len;
474        }
475
476        i = end;
477    }
478
479    // Attach any remaining pending ANSI
480    if !pending_ansi.is_empty() {
481        if !current.is_empty() {
482            current.push_str(&pending_ansi);
483        } else if let Some(last) = tokens.last_mut() {
484            last.push_str(&pending_ansi);
485        } else {
486            current = pending_ansi;
487        }
488    }
489
490    if !current.is_empty() {
491        tokens.push(current);
492    }
493
494    tokens
495}
496
497/// Break a long word (wider than available width) into multiple lines.
498fn break_long_word(word: &str, width: usize, tracker: &mut AnsiState) -> Vec<String> {
499    let mut lines: Vec<String> = Vec::new();
500    let mut current_line = tracker.active_codes();
501    let mut current_width: usize = 0;
502    let mut i = 0;
503    let bytes = word.as_bytes();
504
505    while i < bytes.len() {
506        if bytes[i] == 0x1b
507            && let Some(ansi) = extract_ansi_code_at(word, i)
508        {
509            current_line.push_str(ansi);
510            tracker.update(ansi);
511            i += ansi.len();
512            continue;
513        }
514
515        let rest = &word[i..];
516        let mut grapheme_end = i;
517        for g in rest.graphemes(true) {
518            grapheme_end += g.len();
519            let g_width = grapheme_width(g);
520
521            if current_width + g_width > width && current_width > 0 {
522                let line_end = tracker.line_end_reset();
523                if !line_end.is_empty() {
524                    current_line.push_str(&line_end);
525                }
526                lines.push(std::mem::take(&mut current_line));
527                current_line = tracker.active_codes();
528                current_width = 0;
529            }
530
531            current_line.push_str(g);
532            current_width += g_width;
533        }
534        i = grapheme_end;
535    }
536
537    if !current_line.is_empty() {
538        lines.push(current_line);
539    }
540
541    if lines.is_empty() {
542        vec![String::new()]
543    } else {
544        lines
545    }
546}
547
548/// Extract a range of visible columns from a line. Handles ANSI codes and wide chars.
549pub fn slice_by_column(line: &str, start_col: usize, length: usize) -> String {
550    if length == 0 {
551        return String::new();
552    }
553
554    let end_col = start_col + length;
555    let mut result = String::new();
556    let mut current_col: usize = 0;
557    let mut pending_ansi = String::new();
558    let mut i = 0;
559    let bytes = line.as_bytes();
560
561    while i < bytes.len() {
562        if bytes[i] == 0x1b
563            && let Some(ansi) = extract_ansi_code_at(line, i)
564        {
565            if current_col >= start_col && current_col < end_col {
566                result.push_str(ansi);
567            } else if current_col < start_col {
568                pending_ansi.push_str(ansi);
569            }
570            i += ansi.len();
571            continue;
572        }
573
574        // Find end of non-ANSI run
575        let mut text_end = i;
576        while text_end < bytes.len() && bytes[text_end] != 0x1b {
577            text_end += 1;
578        }
579
580        let segment_str = &line[i..text_end];
581        for grapheme in segment_str.graphemes(true) {
582            let w = grapheme_width(grapheme);
583            let in_range = current_col >= start_col && current_col < end_col;
584
585            if in_range && current_col + w <= end_col {
586                if !pending_ansi.is_empty() {
587                    result.push_str(&pending_ansi);
588                    pending_ansi.clear();
589                }
590                result.push_str(grapheme);
591            }
592
593            current_col += w;
594            if current_col >= end_col {
595                return result;
596            }
597        }
598        i = text_end;
599        if current_col >= end_col {
600            return result;
601        }
602    }
603
604    result
605}
606
607/// Convert a visual column position to a byte offset in the given text.
608/// Handles ANSI escape codes and wide characters correctly.
609pub fn visual_col_to_byte_offset(text: &str, visual_col: usize) -> usize {
610    if text.is_empty() {
611        return 0;
612    }
613
614    let mut vis_so_far: usize = 0;
615    let mut i = 0;
616    let bytes = text.as_bytes();
617
618    while i < bytes.len() {
619        if bytes[i] == 0x1b
620            && let Some(ansi) = extract_ansi_code_at(text, i)
621        {
622            i += ansi.len();
623            continue;
624        }
625
626        let rest = &text[i..];
627        if let Some(g) = rest.graphemes(true).next() {
628            let gw = grapheme_width(g);
629            if vis_so_far + gw > visual_col {
630                return i;
631            }
632            vis_so_far += gw;
633            i += g.len();
634            continue;
635        }
636        break;
637    }
638
639    text.len()
640}
641
642/// Simple ANSI state tracker for wrap_text_with_ansi.
643struct AnsiState {
644    bold: bool,
645    underline: bool,
646    fg_color: Option<String>,
647    bg_color: Option<String>,
648}
649
650impl AnsiState {
651    fn new() -> Self {
652        Self {
653            bold: false,
654            underline: false,
655            fg_color: None,
656            bg_color: None,
657        }
658    }
659
660    fn update(&mut self, text: &str) {
661        let mut i = 0;
662        let bytes = text.as_bytes();
663        while i < bytes.len() {
664            if bytes[i] == 0x1b
665                && let Some(ansi) = extract_ansi_code_at(text, i)
666            {
667                self.process_ansi(ansi);
668                i += ansi.len();
669                continue;
670            }
671            i += 1;
672        }
673    }
674
675    fn process_ansi(&mut self, code: &str) {
676        let code_bytes = code.as_bytes();
677        // Check for SGR codes: ESC [ ... m
678        if code_bytes.len() < 4 || code_bytes[code_bytes.len() - 1] != b'm' {
679            return;
680        }
681
682        let inner = &code[2..code.len() - 1]; // Strip ESC[ and m
683        if inner.is_empty() || inner == "0" {
684            self.bold = false;
685            self.underline = false;
686            self.fg_color = None;
687            self.bg_color = None;
688            return;
689        }
690
691        let params: Vec<&str> = inner.split(';').collect();
692        let mut i = 0;
693        while i < params.len() {
694            let Ok(parsed) = params[i].parse::<u8>() else {
695                i += 1;
696                continue;
697            };
698            match parsed {
699                0 => {
700                    self.bold = false;
701                    self.underline = false;
702                    self.fg_color = None;
703                    self.bg_color = None;
704                }
705                1 => self.bold = true,
706                4 => self.underline = true,
707                22 => self.bold = false,
708                24 => self.underline = false,
709                30..=37 | 90..=97 => {
710                    self.fg_color = Some(parsed.to_string());
711                }
712                40..=47 | 100..=107 => {
713                    self.bg_color = Some(parsed.to_string());
714                }
715                38 => {
716                    // Extended foreground color: 38;5;N or 38;2;R;G;B
717                    if i + 1 < params.len() {
718                        match params[i + 1] {
719                            "5" if i + 2 < params.len() => {
720                                self.fg_color = Some(params[i..=i + 2].join(";"));
721                                i += 2;
722                            }
723                            "2" if i + 4 < params.len() => {
724                                self.fg_color = Some(params[i..=i + 4].join(";"));
725                                i += 4;
726                            }
727                            _ => {}
728                        }
729                    }
730                }
731                48 => {
732                    // Extended background color: 48;5;N or 48;2;R;G;B
733                    if i + 1 < params.len() {
734                        match params[i + 1] {
735                            "5" if i + 2 < params.len() => {
736                                self.bg_color = Some(params[i..=i + 2].join(";"));
737                                i += 2;
738                            }
739                            "2" if i + 4 < params.len() => {
740                                self.bg_color = Some(params[i..=i + 4].join(";"));
741                                i += 4;
742                            }
743                            _ => {}
744                        }
745                    }
746                }
747                39 => self.fg_color = None,
748                49 => self.bg_color = None,
749                _ => {}
750            }
751            i += 1;
752        }
753    }
754
755    fn active_codes(&self) -> String {
756        let mut codes: Vec<String> = Vec::new();
757        if self.bold {
758            codes.push("1".to_string());
759        }
760        if self.underline {
761            codes.push("4".to_string());
762        }
763        if let Some(ref fg) = self.fg_color {
764            codes.push(fg.clone());
765        }
766        if let Some(ref bg) = self.bg_color {
767            codes.push(bg.clone());
768        }
769        if codes.is_empty() {
770            String::new()
771        } else {
772            format!("\x1b[{}m", codes.join(";"))
773        }
774    }
775
776    /// Get reset for underline only (preserves background at line end).
777    fn line_end_reset(&self) -> String {
778        if self.underline {
779            "\x1b[24m".to_string()
780        } else {
781            String::new()
782        }
783    }
784}
785
786/// Normalize a terminal output line by appending a reset + hyperlink-close sequence.
787/// This ensures any open ANSI/OSC styles are cleanly terminated.
788/// Matches pi's normalizeTerminalOutput.
789pub fn normalize_terminal_output(line: &str) -> String {
790    format!("{}\x1b[0m\x1b]8;;\x07", line)
791}
792
793/// Check if a grapheme cluster is whitespace.
794/// Single-char check matching pi's isWhitespaceChar.
795pub fn is_whitespace_char(grapheme: &str) -> bool {
796    grapheme == " " || grapheme == "\t"
797}
798
799/// Extract segments from a line for overlay compositing.
800/// Returns (before_text, before_width, after_text, after_width).
801/// The "before" segment is columns [0, before_end).
802/// The "after" segment is columns [after_start, total_width).
803/// Matches pi's extractSegments.
804pub fn extract_segments(
805    line: &str,
806    before_end: usize,
807    after_start: usize,
808    after_len: usize,
809    strict: bool,
810) -> (String, usize, String, usize) {
811    let before = slice_by_column(line, 0, before_end);
812    let before_width = visible_width(&before);
813    let after = slice_by_column(line, after_start, after_len);
814    let after_width = visible_width(&after);
815
816    if strict {
817        // If before_text is wider than expected, use empty before
818        if before_width > before_end {
819            return (String::new(), 0, after, after_width);
820        }
821    }
822
823    (before, before_width, after, after_width)
824}
825
826/// Apply a background color function to a line, padding it to the given width.
827/// Matches pi's `applyBackgroundToLine`.
828pub fn apply_background_to_line(
829    line: &str,
830    width: usize,
831    bg_fn: &dyn Fn(&str) -> String,
832) -> String {
833    let vis = visible_width(line);
834    let padded = if vis < width {
835        let mut result = line.to_string();
836        result.push_str(&" ".repeat(width - vis));
837        result
838    } else {
839        line.to_string()
840    };
841    bg_fn(&padded)
842}
843
844/// Check if a line contains a Kitty image sequence or data URL.
845/// Data URLs (data:image/...;base64,...) are detected by checking if the
846/// line starts with the data URL prefix.
847pub fn is_image_line(line: &str) -> bool {
848    line.trim_start().starts_with("data:image/") && line.contains(";base64,")
849}
850
851/// Slice text by visible columns, returning both the extracted text and its width.
852/// Like `slice_by_column` but also returns the actual visible width of the result.
853/// Matches pi's `sliceWithWidth`.
854pub fn slice_with_width(line: &str, start_col: usize, length: usize) -> (String, usize) {
855    let text = slice_by_column(line, start_col, length);
856    let width = visible_width(&text);
857    (text, width)
858}
859
860// Width cache for non-ASCII strings (matching pi's WIDTH_CACHE_SIZE = 512)
861use std::cell::RefCell;
862use std::collections::HashMap;
863
864const WIDTH_CACHE_SIZE: usize = 512;
865
866thread_local! {
867    static WIDTH_CACHE: RefCell<HashMap<String, usize>> = RefCell::new(HashMap::new());
868}
869
870/// Compute visible width without cache (used by `visible_width` for cache misses).
871fn compute_visible_width_inner(s: &str) -> usize {
872    if s.is_empty() {
873        return 0;
874    }
875    // Normalize: tabs to 3 spaces, strip ANSI escape codes
876    let mut clean = String::with_capacity(s.len());
877    let mut i = 0;
878    let bytes = s.as_bytes();
879    while i < bytes.len() {
880        if bytes[i] == b'\t' {
881            clean.push_str("   ");
882            i += 1;
883            continue;
884        }
885        if bytes[i] == 0x1b
886            && let Some(ansi) = extract_ansi_code_at(s, i)
887        {
888            i += ansi.len();
889            continue;
890        }
891        if let Some(ch) = s[i..].chars().next() {
892            clean.push(ch);
893            i += ch.len_utf8();
894        } else {
895            i += 1;
896        }
897    }
898
899    let mut width = 0;
900    for grapheme in clean.graphemes(true) {
901        width += grapheme_width(grapheme);
902    }
903    width
904}
905
906/// Check if a grapheme cluster is CJK (needs its own token for wrapping).
907pub fn is_cjk_break(grapheme: &str) -> bool {
908    if let Some(c) = grapheme.chars().next() {
909        let block = c as u32;
910        // CJK Unified, Hiragana, Katakana, Hangul, Bopomofo
911        (0x4E00..=0x9FFF).contains(&block)
912            || (0x3040..=0x309F).contains(&block)
913            || (0x30A0..=0x30FF).contains(&block)
914            || (0xAC00..=0xD7AF).contains(&block)
915            || (0x3100..=0x312F).contains(&block)
916    } else {
917        false
918    }
919}
920
921fn update_tracker_from_text(text: &str, active_codes: &mut String) {
922    // Simple: just re-evaluate ANSI state from scratch for the text
923    let mut tracker = AnsiState::new();
924    tracker.update(text);
925    *active_codes = tracker.active_codes();
926}
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931
932    #[test]
933    fn test_visible_width_ascii() {
934        assert_eq!(visible_width("hello"), 5);
935        assert_eq!(visible_width(""), 0);
936    }
937
938    #[test]
939    fn test_visible_width_with_ansi() {
940        assert_eq!(visible_width("\x1b[31mhello\x1b[0m"), 5);
941        assert_eq!(visible_width("\t\x1b[31m界\x1b[0m"), 5); // tab=3 + CJK=2
942    }
943
944    #[test]
945    fn test_visible_width_cjk() {
946        assert_eq!(visible_width("世界"), 4);
947        assert_eq!(visible_width("hello世界"), 9);
948    }
949
950    #[test]
951    fn test_visible_width_emoji() {
952        assert_eq!(visible_width("🙂"), 2);
953        assert_eq!(visible_width("👋"), 2);
954    }
955
956    #[test]
957    fn test_truncate_to_width_no_truncation() {
958        let result = truncate_to_width("hello", 10, "...", false);
959        assert_eq!(result, "hello");
960    }
961
962    #[test]
963    fn test_truncate_to_width_with_ellipsis() {
964        let result = truncate_to_width("hello world", 8, "...", false);
965        assert!(visible_width(&result) <= 8);
966        assert!(result.contains("..."));
967    }
968
969    #[test]
970    fn test_truncate_to_width_with_pad() {
971        let result = truncate_to_width("hi", 8, "...", true);
972        assert_eq!(visible_width(&result), 8);
973    }
974
975    #[test]
976    fn test_truncate_to_width_empty() {
977        assert_eq!(truncate_to_width("", 5, "...", false), "");
978        assert_eq!(truncate_to_width("", 5, "...", true), " ".repeat(5));
979    }
980
981    #[test]
982    fn test_truncate_to_width_max_zero() {
983        assert_eq!(truncate_to_width("hello", 0, "...", false), "");
984    }
985
986    #[test]
987    fn test_wrap_basic() {
988        let text = "hello world this is a test";
989        let wrapped = wrap_text_with_ansi(text, 10);
990        assert!(wrapped.len() > 1);
991        for line in &wrapped {
992            assert!(visible_width(line) <= 10);
993        }
994    }
995
996    #[test]
997    fn test_wrap_no_wrap_needed() {
998        let text = "hello";
999        let wrapped = wrap_text_with_ansi(text, 10);
1000        assert_eq!(wrapped.len(), 1);
1001        assert_eq!(wrapped[0], "hello");
1002    }
1003
1004    #[test]
1005    fn test_wrap_preserves_ansi() {
1006        let text = "\x1b[31mhello world this is red\x1b[0m";
1007        let wrapped = wrap_text_with_ansi(text, 10);
1008        // Each continuation line should start with red code
1009        for line in wrapped.iter().skip(1) {
1010            assert!(line.starts_with("\x1b[31m"));
1011        }
1012    }
1013
1014    #[test]
1015    fn test_slice_by_column_basic() {
1016        let line = "hello world";
1017        assert_eq!(slice_by_column(line, 0, 5), "hello");
1018        assert_eq!(slice_by_column(line, 6, 5), "world");
1019        assert_eq!(slice_by_column(line, 3, 4), "lo w");
1020    }
1021
1022    #[test]
1023    fn test_slice_by_column_empty() {
1024        assert_eq!(slice_by_column("test", 0, 0), "");
1025    }
1026
1027    #[test]
1028    fn test_normalize_terminal_output() {
1029        let result = normalize_terminal_output("hello");
1030        assert_eq!(result, "hello\x1b[0m\x1b]8;;\x07");
1031    }
1032
1033    #[test]
1034    fn test_is_whitespace_char() {
1035        assert!(is_whitespace_char(" "));
1036        assert!(is_whitespace_char("\t"));
1037        assert!(!is_whitespace_char("a"));
1038        assert!(!is_whitespace_char(""));
1039    }
1040
1041    #[test]
1042    fn test_extract_segments_basic() {
1043        let line = "hello beautiful world";
1044        // before_end=5 → cols [0,5) = "hello"
1045        // after_start=15, len=5 → cols [15,20) = " worl" (space + first 4 chars of "world")
1046        let (before, bw, after, aw) = extract_segments(line, 5, 15, 5, true);
1047        assert_eq!(before, "hello");
1048        assert_eq!(bw, 5);
1049        assert_eq!(after, " worl");
1050        assert_eq!(aw, 5);
1051    }
1052
1053    #[test]
1054    fn test_extract_segments_overflow() {
1055        let line = "short";
1056        // before_end=10 exceeds line width 5, strict mode doesn't trigger
1057        // (before_width=5 <= before_end=10) so returns full line as before
1058        let (before, bw, after, _aw) = extract_segments(line, 10, 15, 5, true);
1059        assert_eq!(before, "short");
1060        assert_eq!(bw, 5);
1061        assert!(after.is_empty());
1062    }
1063}
1064
1065#[test]
1066fn test_wrap_multiline_preserves_line_count() {
1067    // Joint: multiline text where lines both fit and need wrapping
1068    let text = "hello world this is a test\nshort\nanother long line here yes";
1069    let wrapped = wrap_text_with_ansi(text, 10);
1070    // "hello world this is a test" → how many wrapped lines?
1071    // "short" → 1
1072    // "another long line here yes" → how many wrapped lines?
1073    let total_wrapped = wrapped.len();
1074    let expected_min = 3; // at least 3 visual lines
1075    assert!(
1076        total_wrapped >= expected_min,
1077        "Expected at least {} lines, got {}",
1078        expected_min,
1079        total_wrapped
1080    );
1081    // Verify all lines fit within width
1082    for (i, line) in wrapped.iter().enumerate() {
1083        let w = visible_width(line);
1084        assert!(
1085            w <= 10,
1086            "Line {}: '{}' has visible_width {} > 10",
1087            i,
1088            line,
1089            w
1090        );
1091    }
1092}
1093
1094#[test]
1095fn test_wrap_text_with_ansi_no_duplicate_lines() {
1096    // Check that wrapping a multiline string produces exactly
1097    // the sum of wrapped lines for each logical line, with no duplicates.
1098    let text = "abc def ghi\njk lm no pq rs";
1099    let result = wrap_text_with_ansi(text, 5);
1100    // "abc def ghi" → ["abc", "def", "ghi"] (3 lines)
1101    // "jk lm no pq rs" → ["jk lm", "no pq", "rs"] (3 lines)
1102    // Total expected: 6
1103    assert_eq!(
1104        result.len(),
1105        6,
1106        "Expected 6 wrapped lines (3+3), got {}: {:?}",
1107        result.len(),
1108        result
1109    );
1110
1111    // Verify no duplicate lines
1112    let mut seen = std::collections::HashSet::new();
1113    for line in &result {
1114        let trimmed = line.trim().to_string();
1115        if !trimmed.is_empty() && !seen.insert(trimmed.clone()) {
1116            panic!("Duplicate line found: '{}'", trimmed);
1117        }
1118    }
1119}
1120
1121#[test]
1122fn test_wrap_user_text_does_not_introduce_duplicates() {
1123    let t1 = "ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k   ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k";
1124
1125    // The original input has the same 45-char substring twice separated by triple space.
1126    // This is NOT a wrapping bug - the input legitimately has the duplicate.
1127    // This test verifies that wrap_text_with_ansi does not INTRODUCE extra duplicates
1128    // beyond what the input already contains.
1129
1130    // Count occurrences of each substring in the original
1131    fn count_occurrences(text: &str, pattern: &str) -> usize {
1132        text.matches(pattern).count()
1133    }
1134
1135    let pattern = "ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k";
1136    let original_count = count_occurrences(t1, pattern);
1137    assert_eq!(
1138        original_count, 2,
1139        "Input should have 2 occurrences of pattern"
1140    );
1141
1142    for width in [40, 50, 60, 80, 100] {
1143        let wrapped = wrap_text_with_ansi(t1, width);
1144        // Count how many times the pattern appears in the wrapped output
1145        let wrapped_count: usize = wrapped
1146            .iter()
1147            .map(|line| count_occurrences(line, pattern))
1148            .sum();
1149        // The wrapped output should have at most the same number of occurrences as the input
1150        assert!(
1151            wrapped_count <= original_count,
1152            "Width {}: wrapped has {} occurrences, input has {}",
1153            width,
1154            wrapped_count,
1155            original_count
1156        );
1157    }
1158}
rab/tui/util.rs

rab/tui/
util.rs