rab/tui/
util.rs

1use unicode_segmentation::UnicodeSegmentation;
2use unicode_width::UnicodeWidthChar;
3
4/// Regex pattern matching CJK characters for word-wrapping breaks.
5/// Matches pi's `cjkBreakRegex` script extension pattern.
6pub const CJK_BREAK_REGEX: &str = r"[\p{Script_Extensions=Han}\p{Script_Extensions=Hiragana}\p{Script_Extensions=Katakana}\p{Script_Extensions=Hangul}\p{Script_Extensions=Bopomofo}]";
7
8/// Calculate the visible width of a string in terminal columns.
9/// Strips ANSI escape codes and counts grapheme cluster widths.
10/// Uses a thread-local LRU cache for non-ASCII strings (matching pi).
11pub fn visible_width(str: &str) -> usize {
12    if str.is_empty() {
13        return 0;
14    }
15
16    // Fast path: pure ASCII printable
17    if is_printable_ascii(str) {
18        return str.len();
19    }
20
21    // Use cache for non-ASCII
22    WIDTH_CACHE.with(|cache| {
23        let mut cache = cache.borrow_mut();
24        if let Some(&w) = cache.get(str) {
25            return w;
26        }
27        let w = compute_visible_width_inner(str);
28        if cache.len() >= WIDTH_CACHE_SIZE {
29            cache.clear();
30        }
31        cache.insert(str.to_string(), w);
32        w
33    })
34}
35
36/// Check if a string consists entirely of printable ASCII characters (0x20-0x7E).
37fn is_printable_ascii(str: &str) -> bool {
38    str.bytes().all(|b| (0x20..=0x7e).contains(&b))
39}
40
41/// Calculate the terminal width of a single grapheme cluster.
42fn grapheme_width(grapheme: &str) -> usize {
43    if grapheme == "\t" {
44        return 3;
45    }
46
47    // Check for zero-width and combining characters
48    let first_char = grapheme.chars().next();
49    if let Some(c) = first_char {
50        // Zero-width characters
51        if is_zero_width_char(c) {
52            return 0;
53        }
54
55        // Emoji width (most emoji are width 2)
56        if could_be_emoji(grapheme) {
57            return 2;
58        }
59
60        // Regional indicator symbols (U+1F1E6..U+1F1FF) are often wide
61        let _cp = c as u32;
62        if (0x1f1e6..=0x1f1ff).contains(&(c as u32)) {
63            return 2;
64        }
65
66        // Use unicode-width for standard characters
67        if let Some(w) = c.width()
68            && w > 0
69        {
70            return w;
71        }
72
73        // Check trailing characters for halfwidth/fullwidth forms
74        let mut w = 0;
75        for ch in grapheme.chars() {
76            if (0xff00..=0xffef).contains(&(ch as u32)) {
77                w += 2;
78            } else if ch as u32 == 0x0e33 || ch as u32 == 0x0eb3 {
79                w += 1;
80            }
81        }
82        if w > 0 {
83            return w;
84        }
85
86        return 2; // Default wide for unknown
87    }
88    0
89}
90
91/// Fast heuristic to check if a grapheme could be emoji.
92fn could_be_emoji(grapheme: &str) -> bool {
93    let first_cp = grapheme.chars().next().map(|c| c as u32).unwrap_or(0);
94    ((0x1f000..=0x1fbff).contains(&first_cp))
95        || ((0x2300..=0x23ff).contains(&first_cp))
96        || ((0x2600..=0x27bf).contains(&first_cp))
97        || ((0x2b50..=0x2b55).contains(&first_cp))
98        || grapheme.contains('\u{FE0F}') // VS16 emoji presentation selector
99        || grapheme.chars().count() > 2 // ZWJ sequences, skin tones
100}
101
102/// Check if a character is zero-width (combining marks, control chars, etc.).
103fn is_zero_width_char(c: char) -> bool {
104    let _cp = c as u32;
105    matches!(
106        c,
107        '\u{200B}'..='\u{200F}' | // Zero-width space, etc.
108        '\u{2028}'..='\u{2029}' | // Line/paragraph separator
109        '\u{202A}'..='\u{202E}' | // Bidi control
110        '\u{2060}'..='\u{2064}' | // Word joiner, etc.
111        '\u{FEFF}'                 // BOM / ZWNBS
112    ) || c.is_control()
113        || (unicode_width::UnicodeWidthChar::width(c) == Some(0))
114}
115
116/// Extract an ANSI escape sequence from a string at the given byte position.
117/// Returns the code string and its byte length, or None if not an ANSI sequence.
118fn extract_ansi_code_at(str: &str, pos: usize) -> Option<&str> {
119    let bytes = str.as_bytes();
120    if pos >= bytes.len() || bytes[pos] != 0x1b {
121        return None;
122    }
123
124    let next = bytes.get(pos + 1).copied();
125
126    // CSI sequence: ESC [ ... (0x40-0x7E)
127    if next == Some(b'[') {
128        let mut j = pos + 2;
129        while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
130            j += 1;
131        }
132        if j < bytes.len() {
133            return Some(&str[pos..=j]);
134        }
135        return None;
136    }
137
138    // OSC sequence: ESC ] ... BEL or ESC ] ... ST (ESC \)
139    if next == Some(b']') {
140        let mut j = pos + 2;
141        while j < bytes.len() {
142            if bytes[j] == 0x07 {
143                return Some(&str[pos..=j]);
144            }
145            if bytes[j] == 0x1b && bytes.get(j + 1) == Some(&b'\\') {
146                return Some(&str[pos..=j + 1]);
147            }
148            j += 1;
149        }
150        return None;
151    }
152
153    // APC sequence: ESC _ ... BEL or ESC _ ... ST (ESC \)
154    if next == Some(b'_') {
155        let mut j = pos + 2;
156        while j < bytes.len() {
157            if bytes[j] == 0x07 {
158                return Some(&str[pos..=j]);
159            }
160            if bytes[j] == 0x1b && bytes.get(j + 1) == Some(&b'\\') {
161                return Some(&str[pos..=j + 1]);
162            }
163            j += 1;
164        }
165        return None;
166    }
167
168    None
169}
170
171/// Truncate text to fit within a maximum visible width, adding ellipsis if needed.
172/// Optionally pad with spaces to reach exactly max_width.
173///
174/// Properly handles ANSI escape codes (they don't count toward width).
175pub fn truncate_to_width(text: &str, max_width: usize, ellipsis: &str, pad: bool) -> String {
176    if max_width == 0 {
177        return String::new();
178    }
179
180    if text.is_empty() {
181        return if pad {
182            " ".repeat(max_width)
183        } else {
184            String::new()
185        };
186    }
187
188    let text_width = visible_width(text);
189    let ellipsis_width = visible_width(ellipsis);
190
191    // Text already fits
192    if text_width <= max_width {
193        return if pad {
194            let mut result = text.to_string();
195            result.push_str(&" ".repeat(max_width - text_width));
196            result
197        } else {
198            text.to_string()
199        };
200    }
201
202    // Ellipsis is wider than available space
203    if ellipsis_width >= max_width {
204        return if pad {
205            " ".repeat(max_width)
206        } else {
207            String::new()
208        };
209    }
210
211    let target_width = max_width - ellipsis_width;
212
213    // Simple ASCII fast path
214    if is_printable_ascii(text) {
215        let prefix = &text[..target_width.min(text.len())];
216        let mut result = String::with_capacity(max_width + 20);
217        result.push_str(prefix);
218        result.push_str("\x1b[0m");
219        result.push_str(ellipsis);
220        result.push_str("\x1b[0m");
221        if pad {
222            let visible = target_width.min(text.len()) + ellipsis_width;
223            if visible < max_width {
224                result.push_str(&" ".repeat(max_width - visible));
225            }
226        }
227        return result;
228    }
229
230    // General: grapheme-by-grapheme truncation
231    let mut kept = String::new();
232    let mut kept_width: usize = 0;
233    let mut pending_ansi = String::new();
234    let mut i = 0;
235    let bytes = text.as_bytes();
236
237    while i < bytes.len() {
238        if bytes[i] == 0x1b
239            && let Some(ansi) = extract_ansi_code_at(text, i)
240        {
241            pending_ansi.push_str(ansi);
242            i += ansi.len();
243            continue;
244        }
245
246        // Get the grapheme at this position
247        let rest = &text[i..];
248        let mut _grapheme_end = i;
249        for g in rest.graphemes(true) {
250            _grapheme_end += g.len();
251            let g_width = grapheme_width(g);
252
253            if kept_width + g_width <= target_width {
254                if !pending_ansi.is_empty() {
255                    kept.push_str(&pending_ansi);
256                    pending_ansi.clear();
257                }
258                kept.push_str(g);
259                kept_width += g_width;
260            } else {
261                // Overflow - stop
262                break;
263            }
264        }
265        break;
266    }
267
268    let mut result = String::new();
269    result.push_str(&kept);
270    result.push_str("\x1b[0m");
271    result.push_str(ellipsis);
272    result.push_str("\x1b[0m");
273    if pad {
274        let visible = kept_width + ellipsis_width;
275        if visible < max_width {
276            result.push_str(&" ".repeat(max_width - visible));
277        }
278    }
279    result
280}
281
282/// Word-wrap text preserving ANSI escape codes.
283/// Returns lines where each line is <= width visible chars.
284pub fn wrap_text_with_ansi(text: &str, width: usize) -> Vec<String> {
285    if text.is_empty() {
286        return vec![String::new()];
287    }
288
289    // Handle newlines by processing each line separately
290    let mut result: Vec<String> = Vec::new();
291    let mut active_codes = String::new();
292
293    for (line_idx, input_line) in text.split('\n').enumerate() {
294        let prefix = if line_idx > 0 {
295            active_codes.clone()
296        } else {
297            String::new()
298        };
299        let wrapped = wrap_single_line(&format!("{}{}", prefix, input_line), width);
300        for line in wrapped {
301            result.push(line);
302        }
303        // Update active codes for next line
304        update_tracker_from_text(input_line, &mut active_codes);
305    }
306
307    if result.is_empty() {
308        vec![String::new()]
309    } else {
310        result
311    }
312}
313
314fn wrap_single_line(line: &str, width: usize) -> Vec<String> {
315    if line.is_empty() {
316        return vec![String::new()];
317    }
318
319    let visible = visible_width(line);
320    if visible <= width {
321        return vec![line.to_string()];
322    }
323
324    // Split line into tokens (words separated by spaces, plus CJK breaks)
325    let tokens = split_into_tokens(line);
326    let mut wrapped: Vec<String> = Vec::new();
327    let mut current_line = String::new();
328    let mut current_width: usize = 0;
329    let mut tracker = AnsiState::new();
330
331    for token in &tokens {
332        let token_width = visible_width(token);
333        let is_space = token.trim().is_empty();
334
335        // Token is wider than available width - break it character by character
336        if token_width > width && !is_space {
337            if !current_line.is_empty() {
338                let line_end = tracker.line_end_reset();
339                if !line_end.is_empty() {
340                    current_line.push_str(&line_end);
341                }
342                wrapped.push(current_line);
343                current_line = String::new();
344                current_width = 0;
345            }
346
347            let broken = break_long_word(token, width, &mut tracker);
348            let last = broken.len().saturating_sub(1);
349            for (i, line) in broken.iter().enumerate() {
350                if i < last {
351                    wrapped.push(line.clone());
352                } else {
353                    current_line = line.clone();
354                    current_width = visible_width(line);
355                }
356            }
357            continue;
358        }
359
360        let total = current_width + token_width;
361        if total > width && current_width > 0 {
362            // Don't trim trailing spaces: they are valid content (user-typed spaces)
363            // and the line is already within width (current_width <= width).
364            let mut line_to_wrap = current_line.clone();
365            let line_end = tracker.line_end_reset();
366            if !line_end.is_empty() {
367                line_to_wrap.push_str(&line_end);
368            }
369            wrapped.push(line_to_wrap);
370            if is_space {
371                // Place the whitespace at the start of the next visual line
372                // so it's not lost (space typed at wrap boundary).
373                let codes = tracker.active_codes();
374                current_line = format!("{}{}", codes, token);
375                current_width = token_width;
376            } else {
377                let codes = tracker.active_codes();
378                current_line = format!("{}{}", codes, token);
379                current_width = token_width;
380            }
381        } else {
382            current_line.push_str(token);
383            current_width += token_width;
384        }
385
386        tracker.update(token);
387    }
388
389    if !current_line.is_empty() {
390        // No trim: trailing spaces are valid user-typed content and invisible
391        // in the editor's padding anyway.
392        wrapped.push(current_line);
393    }
394
395    if wrapped.is_empty() {
396        vec![String::new()]
397    } else {
398        wrapped
399    }
400}
401
402/// Split text into tokens for word wrapping.
403/// Keeps ANSI codes attached to adjacent visible content.
404fn split_into_tokens(text: &str) -> Vec<String> {
405    let mut tokens: Vec<String> = Vec::new();
406    let mut current = String::new();
407    let mut pending_ansi = String::new();
408    let mut current_is_space: Option<bool> = None;
409    let mut i = 0;
410    let bytes = text.as_bytes();
411
412    while i < bytes.len() {
413        if bytes[i] == 0x1b
414            && let Some(ansi) = extract_ansi_code_at(text, i)
415        {
416            pending_ansi.push_str(ansi);
417            i += ansi.len();
418            continue;
419        }
420
421        // Find end of non-ANSI run
422        let mut end = i;
423        while end < bytes.len() && bytes[end] != 0x1b {
424            end += 1;
425        }
426
427        let segment_str = &text[i..end];
428        let mut seg_pos = 0;
429        while seg_pos < segment_str.len() {
430            // Check for paste marker start - treat as single atomic token
431            if segment_str[seg_pos..].starts_with("[paste #") {
432                if !current.is_empty() {
433                    tokens.push(std::mem::take(&mut current));
434                    current_is_space = None;
435                }
436                if let Some(end) = segment_str[seg_pos..].find(']') {
437                    let marker = &segment_str[seg_pos..=seg_pos + end];
438                    let token = format!("{}{}", pending_ansi, marker);
439                    pending_ansi.clear();
440                    tokens.push(token);
441                    seg_pos += end + 1;
442                    continue;
443                }
444            }
445
446            // Get the next grapheme
447            let grapheme = if let Some(g) = segment_str[seg_pos..].graphemes(true).next() {
448                g
449            } else {
450                break;
451            };
452            let g_len = grapheme.len();
453            let is_space = grapheme == " ";
454
455            // CJK characters get their own token
456            if !is_space && is_cjk_break(grapheme) {
457                if !current.is_empty() {
458                    tokens.push(std::mem::take(&mut current));
459                    current_is_space = None;
460                }
461                let token = format!("{}{}", pending_ansi, grapheme);
462                pending_ansi.clear();
463                tokens.push(token);
464                seg_pos += g_len;
465                continue;
466            }
467
468            let segment_is_space = is_space;
469            if current_is_space.is_some_and(|s| s != segment_is_space) && !current.is_empty() {
470                tokens.push(std::mem::take(&mut current));
471            }
472
473            if !pending_ansi.is_empty() {
474                current.push_str(&pending_ansi);
475                pending_ansi.clear();
476            }
477
478            current_is_space = Some(segment_is_space);
479            current.push_str(grapheme);
480            seg_pos += g_len;
481        }
482
483        i = end;
484    }
485
486    // Attach any remaining pending ANSI
487    if !pending_ansi.is_empty() {
488        if !current.is_empty() {
489            current.push_str(&pending_ansi);
490        } else if let Some(last) = tokens.last_mut() {
491            last.push_str(&pending_ansi);
492        } else {
493            current = pending_ansi;
494        }
495    }
496
497    if !current.is_empty() {
498        tokens.push(current);
499    }
500
501    tokens
502}
503
504/// Break a long word (wider than available width) into multiple lines.
505fn break_long_word(word: &str, width: usize, tracker: &mut AnsiState) -> Vec<String> {
506    let mut lines: Vec<String> = Vec::new();
507    let mut current_line = tracker.active_codes();
508    let mut current_width: usize = 0;
509    let mut i = 0;
510    let bytes = word.as_bytes();
511
512    while i < bytes.len() {
513        if bytes[i] == 0x1b
514            && let Some(ansi) = extract_ansi_code_at(word, i)
515        {
516            current_line.push_str(ansi);
517            tracker.update(ansi);
518            i += ansi.len();
519            continue;
520        }
521
522        let rest = &word[i..];
523        let mut grapheme_end = i;
524        for g in rest.graphemes(true) {
525            grapheme_end += g.len();
526            let g_width = grapheme_width(g);
527
528            if current_width + g_width > width && current_width > 0 {
529                let line_end = tracker.line_end_reset();
530                if !line_end.is_empty() {
531                    current_line.push_str(&line_end);
532                }
533                lines.push(std::mem::take(&mut current_line));
534                current_line = tracker.active_codes();
535                current_width = 0;
536            }
537
538            current_line.push_str(g);
539            current_width += g_width;
540        }
541        i = grapheme_end;
542    }
543
544    if !current_line.is_empty() {
545        lines.push(current_line);
546    }
547
548    if lines.is_empty() {
549        vec![String::new()]
550    } else {
551        lines
552    }
553}
554
555/// Extract a range of visible columns from a line. Handles ANSI codes and wide chars.
556pub fn slice_by_column(line: &str, start_col: usize, length: usize) -> String {
557    if length == 0 {
558        return String::new();
559    }
560
561    let end_col = start_col + length;
562    let mut result = String::new();
563    let mut current_col: usize = 0;
564    let mut pending_ansi = String::new();
565    let mut i = 0;
566    let bytes = line.as_bytes();
567
568    while i < bytes.len() {
569        if bytes[i] == 0x1b
570            && let Some(ansi) = extract_ansi_code_at(line, i)
571        {
572            if current_col >= start_col && current_col < end_col {
573                result.push_str(ansi);
574            } else if current_col < start_col {
575                pending_ansi.push_str(ansi);
576            }
577            i += ansi.len();
578            continue;
579        }
580
581        // Find end of non-ANSI run
582        let mut text_end = i;
583        while text_end < bytes.len() && bytes[text_end] != 0x1b {
584            text_end += 1;
585        }
586
587        let segment_str = &line[i..text_end];
588        for grapheme in segment_str.graphemes(true) {
589            let w = grapheme_width(grapheme);
590            let in_range = current_col >= start_col && current_col < end_col;
591
592            if in_range && current_col + w <= end_col {
593                if !pending_ansi.is_empty() {
594                    result.push_str(&pending_ansi);
595                    pending_ansi.clear();
596                }
597                result.push_str(grapheme);
598            }
599
600            current_col += w;
601            if current_col >= end_col {
602                return result;
603            }
604        }
605        i = text_end;
606        if current_col >= end_col {
607            return result;
608        }
609    }
610
611    result
612}
613
614/// Convert a visual column position to a byte offset in the given text.
615/// Handles ANSI escape codes and wide characters correctly.
616pub fn visual_col_to_byte_offset(text: &str, visual_col: usize) -> usize {
617    if text.is_empty() {
618        return 0;
619    }
620
621    let mut vis_so_far: usize = 0;
622    let mut i = 0;
623    let bytes = text.as_bytes();
624
625    while i < bytes.len() {
626        if bytes[i] == 0x1b
627            && let Some(ansi) = extract_ansi_code_at(text, i)
628        {
629            i += ansi.len();
630            continue;
631        }
632
633        let rest = &text[i..];
634        if let Some(g) = rest.graphemes(true).next() {
635            let gw = grapheme_width(g);
636            if vis_so_far + gw > visual_col {
637                return i;
638            }
639            vis_so_far += gw;
640            i += g.len();
641            continue;
642        }
643        break;
644    }
645
646    text.len()
647}
648
649/// Simple ANSI state tracker for wrap_text_with_ansi.
650struct AnsiState {
651    bold: bool,
652    underline: bool,
653    fg_color: Option<String>,
654    bg_color: Option<String>,
655}
656
657impl AnsiState {
658    fn new() -> Self {
659        Self {
660            bold: false,
661            underline: false,
662            fg_color: None,
663            bg_color: None,
664        }
665    }
666
667    fn update(&mut self, text: &str) {
668        let mut i = 0;
669        let bytes = text.as_bytes();
670        while i < bytes.len() {
671            if bytes[i] == 0x1b
672                && let Some(ansi) = extract_ansi_code_at(text, i)
673            {
674                self.process_ansi(ansi);
675                i += ansi.len();
676                continue;
677            }
678            i += 1;
679        }
680    }
681
682    fn process_ansi(&mut self, code: &str) {
683        let code_bytes = code.as_bytes();
684        // Check for SGR codes: ESC [ ... m
685        if code_bytes.len() < 4 || code_bytes[code_bytes.len() - 1] != b'm' {
686            return;
687        }
688
689        let inner = &code[2..code.len() - 1]; // Strip ESC[ and m
690        if inner.is_empty() || inner == "0" {
691            self.bold = false;
692            self.underline = false;
693            self.fg_color = None;
694            self.bg_color = None;
695            return;
696        }
697
698        let params: Vec<&str> = inner.split(';').collect();
699        let mut i = 0;
700        while i < params.len() {
701            let Ok(parsed) = params[i].parse::<u8>() else {
702                i += 1;
703                continue;
704            };
705            match parsed {
706                0 => {
707                    self.bold = false;
708                    self.underline = false;
709                    self.fg_color = None;
710                    self.bg_color = None;
711                }
712                1 => self.bold = true,
713                4 => self.underline = true,
714                22 => self.bold = false,
715                24 => self.underline = false,
716                30..=37 | 90..=97 => {
717                    self.fg_color = Some(parsed.to_string());
718                }
719                40..=47 | 100..=107 => {
720                    self.bg_color = Some(parsed.to_string());
721                }
722                38 => {
723                    // Extended foreground color: 38;5;N or 38;2;R;G;B
724                    if i + 1 < params.len() {
725                        match params[i + 1] {
726                            "5" if i + 2 < params.len() => {
727                                self.fg_color = Some(params[i..=i + 2].join(";"));
728                                i += 2;
729                            }
730                            "2" if i + 4 < params.len() => {
731                                self.fg_color = Some(params[i..=i + 4].join(";"));
732                                i += 4;
733                            }
734                            _ => {}
735                        }
736                    }
737                }
738                48 => {
739                    // Extended background color: 48;5;N or 48;2;R;G;B
740                    if i + 1 < params.len() {
741                        match params[i + 1] {
742                            "5" if i + 2 < params.len() => {
743                                self.bg_color = Some(params[i..=i + 2].join(";"));
744                                i += 2;
745                            }
746                            "2" if i + 4 < params.len() => {
747                                self.bg_color = Some(params[i..=i + 4].join(";"));
748                                i += 4;
749                            }
750                            _ => {}
751                        }
752                    }
753                }
754                39 => self.fg_color = None,
755                49 => self.bg_color = None,
756                _ => {}
757            }
758            i += 1;
759        }
760    }
761
762    fn active_codes(&self) -> String {
763        let mut codes: Vec<String> = Vec::new();
764        if self.bold {
765            codes.push("1".to_string());
766        }
767        if self.underline {
768            codes.push("4".to_string());
769        }
770        if let Some(ref fg) = self.fg_color {
771            codes.push(fg.clone());
772        }
773        if let Some(ref bg) = self.bg_color {
774            codes.push(bg.clone());
775        }
776        if codes.is_empty() {
777            String::new()
778        } else {
779            format!("\x1b[{}m", codes.join(";"))
780        }
781    }
782
783    /// Get reset for underline only (preserves background at line end).
784    fn line_end_reset(&self) -> String {
785        if self.underline {
786            "\x1b[24m".to_string()
787        } else {
788            String::new()
789        }
790    }
791}
792
793/// Normalize a terminal output line by appending a reset + hyperlink-close sequence.
794/// This ensures any open ANSI/OSC styles are cleanly terminated.
795/// Matches pi's normalizeTerminalOutput.
796pub fn normalize_terminal_output(line: &str) -> String {
797    format!("{}\x1b[0m\x1b]8;;\x07", line)
798}
799
800/// Check if a grapheme cluster is whitespace.
801/// Single-char check matching pi's isWhitespaceChar.
802pub fn is_whitespace_char(grapheme: &str) -> bool {
803    grapheme == " " || grapheme == "\t"
804}
805
806/// Extract segments from a line for overlay compositing.
807/// Returns (before_text, before_width, after_text, after_width).
808/// The "before" segment is columns [0, before_end).
809/// The "after" segment is columns [after_start, total_width).
810/// Matches pi's extractSegments.
811pub fn extract_segments(
812    line: &str,
813    before_end: usize,
814    after_start: usize,
815    after_len: usize,
816    strict: bool,
817) -> (String, usize, String, usize) {
818    let before = slice_by_column(line, 0, before_end);
819    let before_width = visible_width(&before);
820    let after = slice_by_column(line, after_start, after_len);
821    let after_width = visible_width(&after);
822
823    if strict {
824        // If before_text is wider than expected, use empty before
825        if before_width > before_end {
826            return (String::new(), 0, after, after_width);
827        }
828    }
829
830    (before, before_width, after, after_width)
831}
832
833/// Slice text by visible columns, returning both the extracted text and its width.
834/// Like `slice_by_column` but also returns the actual visible width of the result.
835/// Matches pi's `sliceWithWidth`.
836pub fn slice_with_width(line: &str, start_col: usize, length: usize) -> (String, usize) {
837    let text = slice_by_column(line, start_col, length);
838    let width = visible_width(&text);
839    (text, width)
840}
841
842// Width cache for non-ASCII strings (matching pi's WIDTH_CACHE_SIZE = 512)
843use std::cell::RefCell;
844use std::collections::HashMap;
845
846const WIDTH_CACHE_SIZE: usize = 512;
847
848thread_local! {
849    static WIDTH_CACHE: RefCell<HashMap<String, usize>> = RefCell::new(HashMap::new());
850}
851
852/// Compute visible width without cache (used by `visible_width` for cache misses).
853fn compute_visible_width_inner(s: &str) -> usize {
854    if s.is_empty() {
855        return 0;
856    }
857    // Normalize: tabs to 3 spaces, strip ANSI escape codes
858    let mut clean = String::with_capacity(s.len());
859    let mut i = 0;
860    let bytes = s.as_bytes();
861    while i < bytes.len() {
862        if bytes[i] == b'\t' {
863            clean.push_str("   ");
864            i += 1;
865            continue;
866        }
867        if bytes[i] == 0x1b
868            && let Some(ansi) = extract_ansi_code_at(s, i)
869        {
870            i += ansi.len();
871            continue;
872        }
873        if let Some(ch) = s[i..].chars().next() {
874            clean.push(ch);
875            i += ch.len_utf8();
876        } else {
877            i += 1;
878        }
879    }
880
881    let mut width = 0;
882    for grapheme in clean.graphemes(true) {
883        width += grapheme_width(grapheme);
884    }
885    width
886}
887
888/// Check if a grapheme cluster is CJK (needs its own token for wrapping).
889pub fn is_cjk_break(grapheme: &str) -> bool {
890    if let Some(c) = grapheme.chars().next() {
891        let block = c as u32;
892        // CJK Unified, Hiragana, Katakana, Hangul, Bopomofo
893        (0x4E00..=0x9FFF).contains(&block)
894            || (0x3040..=0x309F).contains(&block)
895            || (0x30A0..=0x30FF).contains(&block)
896            || (0xAC00..=0xD7AF).contains(&block)
897            || (0x3100..=0x312F).contains(&block)
898    } else {
899        false
900    }
901}
902
903fn update_tracker_from_text(text: &str, active_codes: &mut String) {
904    // Simple: just re-evaluate ANSI state from scratch for the text
905    let mut tracker = AnsiState::new();
906    tracker.update(text);
907    *active_codes = tracker.active_codes();
908}
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913
914    #[test]
915    fn test_visible_width_ascii() {
916        assert_eq!(visible_width("hello"), 5);
917        assert_eq!(visible_width(""), 0);
918    }
919
920    #[test]
921    fn test_visible_width_with_ansi() {
922        assert_eq!(visible_width("\x1b[31mhello\x1b[0m"), 5);
923        assert_eq!(visible_width("\t\x1b[31m界\x1b[0m"), 5); // tab=3 + CJK=2
924    }
925
926    #[test]
927    fn test_visible_width_cjk() {
928        assert_eq!(visible_width("世界"), 4);
929        assert_eq!(visible_width("hello世界"), 9);
930    }
931
932    #[test]
933    fn test_visible_width_emoji() {
934        assert_eq!(visible_width("🙂"), 2);
935        assert_eq!(visible_width("👋"), 2);
936    }
937
938    #[test]
939    fn test_truncate_to_width_no_truncation() {
940        let result = truncate_to_width("hello", 10, "...", false);
941        assert_eq!(result, "hello");
942    }
943
944    #[test]
945    fn test_truncate_to_width_with_ellipsis() {
946        let result = truncate_to_width("hello world", 8, "...", false);
947        assert!(visible_width(&result) <= 8);
948        assert!(result.contains("..."));
949    }
950
951    #[test]
952    fn test_truncate_to_width_with_pad() {
953        let result = truncate_to_width("hi", 8, "...", true);
954        assert_eq!(visible_width(&result), 8);
955    }
956
957    #[test]
958    fn test_truncate_to_width_empty() {
959        assert_eq!(truncate_to_width("", 5, "...", false), "");
960        assert_eq!(truncate_to_width("", 5, "...", true), " ".repeat(5));
961    }
962
963    #[test]
964    fn test_truncate_to_width_max_zero() {
965        assert_eq!(truncate_to_width("hello", 0, "...", false), "");
966    }
967
968    #[test]
969    fn test_wrap_basic() {
970        let text = "hello world this is a test";
971        let wrapped = wrap_text_with_ansi(text, 10);
972        assert!(wrapped.len() > 1);
973        for line in &wrapped {
974            assert!(visible_width(line) <= 10);
975        }
976    }
977
978    #[test]
979    fn test_wrap_no_wrap_needed() {
980        let text = "hello";
981        let wrapped = wrap_text_with_ansi(text, 10);
982        assert_eq!(wrapped.len(), 1);
983        assert_eq!(wrapped[0], "hello");
984    }
985
986    #[test]
987    fn test_wrap_preserves_ansi() {
988        let text = "\x1b[31mhello world this is red\x1b[0m";
989        let wrapped = wrap_text_with_ansi(text, 10);
990        // Each continuation line should start with red code
991        for line in wrapped.iter().skip(1) {
992            assert!(line.starts_with("\x1b[31m"));
993        }
994    }
995
996    #[test]
997    fn test_slice_by_column_basic() {
998        let line = "hello world";
999        assert_eq!(slice_by_column(line, 0, 5), "hello");
1000        assert_eq!(slice_by_column(line, 6, 5), "world");
1001        assert_eq!(slice_by_column(line, 3, 4), "lo w");
1002    }
1003
1004    #[test]
1005    fn test_slice_by_column_empty() {
1006        assert_eq!(slice_by_column("test", 0, 0), "");
1007    }
1008
1009    #[test]
1010    fn test_normalize_terminal_output() {
1011        let result = normalize_terminal_output("hello");
1012        assert_eq!(result, "hello\x1b[0m\x1b]8;;\x07");
1013    }
1014
1015    #[test]
1016    fn test_is_whitespace_char() {
1017        assert!(is_whitespace_char(" "));
1018        assert!(is_whitespace_char("\t"));
1019        assert!(!is_whitespace_char("a"));
1020        assert!(!is_whitespace_char(""));
1021    }
1022
1023    #[test]
1024    fn test_extract_segments_basic() {
1025        let line = "hello beautiful world";
1026        // before_end=5 → cols [0,5) = "hello"
1027        // after_start=15, len=5 → cols [15,20) = " worl" (space + first 4 chars of "world")
1028        let (before, bw, after, aw) = extract_segments(line, 5, 15, 5, true);
1029        assert_eq!(before, "hello");
1030        assert_eq!(bw, 5);
1031        assert_eq!(after, " worl");
1032        assert_eq!(aw, 5);
1033    }
1034
1035    #[test]
1036    fn test_extract_segments_overflow() {
1037        let line = "short";
1038        // before_end=10 exceeds line width 5, strict mode doesn't trigger
1039        // (before_width=5 <= before_end=10) so returns full line as before
1040        let (before, bw, after, _aw) = extract_segments(line, 10, 15, 5, true);
1041        assert_eq!(before, "short");
1042        assert_eq!(bw, 5);
1043        assert!(after.is_empty());
1044    }
1045}
1046
1047#[test]
1048fn test_wrap_multiline_preserves_line_count() {
1049    // Joint: multiline text where lines both fit and need wrapping
1050    let text = "hello world this is a test\nshort\nanother long line here yes";
1051    let wrapped = wrap_text_with_ansi(text, 10);
1052    // "hello world this is a test" → how many wrapped lines?
1053    // "short" → 1
1054    // "another long line here yes" → how many wrapped lines?
1055    let total_wrapped = wrapped.len();
1056    let expected_min = 3; // at least 3 visual lines
1057    assert!(
1058        total_wrapped >= expected_min,
1059        "Expected at least {} lines, got {}",
1060        expected_min,
1061        total_wrapped
1062    );
1063    // Verify all lines fit within width
1064    for (i, line) in wrapped.iter().enumerate() {
1065        let w = visible_width(line);
1066        assert!(
1067            w <= 10,
1068            "Line {}: '{}' has visible_width {} > 10",
1069            i,
1070            line,
1071            w
1072        );
1073    }
1074}
1075
1076#[test]
1077fn test_wrap_text_with_ansi_no_duplicate_lines() {
1078    // Check that wrapping a multiline string produces exactly
1079    // the sum of wrapped lines for each logical line, with no duplicates.
1080    let text = "abc def ghi\njk lm no pq rs";
1081    let result = wrap_text_with_ansi(text, 5);
1082    // "abc def ghi" → ["abc", "def", "ghi"] (3 lines)
1083    // "jk lm no pq rs" → ["jk lm", "no pq", "rs"] (3 lines)
1084    // Total expected: 6
1085    assert_eq!(
1086        result.len(),
1087        6,
1088        "Expected 6 wrapped lines (3+3), got {}: {:?}",
1089        result.len(),
1090        result
1091    );
1092
1093    // Verify no duplicate lines
1094    let mut seen = std::collections::HashSet::new();
1095    for line in &result {
1096        let trimmed = line.trim().to_string();
1097        if !trimmed.is_empty() && !seen.insert(trimmed.clone()) {
1098            panic!("Duplicate line found: '{}'", trimmed);
1099        }
1100    }
1101}
1102
1103#[test]
1104fn test_wrap_user_text_does_not_introduce_duplicates() {
1105    let t1 = "ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k   ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k";
1106
1107    // The original input has the same 45-char substring twice separated by triple space.
1108    // This is NOT a wrapping bug - the input legitimately has the duplicate.
1109    // This test verifies that wrap_text_with_ansi does not INTRODUCE extra duplicates
1110    // beyond what the input already contains.
1111
1112    // Count occurrences of each substring in the original
1113    fn count_occurrences(text: &str, pattern: &str) -> usize {
1114        text.matches(pattern).count()
1115    }
1116
1117    let pattern = "ghhh jjj jkkk  jrjrnr jrnr rkr rrkr rmrrkrr k";
1118    let original_count = count_occurrences(t1, pattern);
1119    assert_eq!(
1120        original_count, 2,
1121        "Input should have 2 occurrences of pattern"
1122    );
1123
1124    for width in [40, 50, 60, 80, 100] {
1125        let wrapped = wrap_text_with_ansi(t1, width);
1126        // Count how many times the pattern appears in the wrapped output
1127        let wrapped_count: usize = wrapped
1128            .iter()
1129            .map(|line| count_occurrences(line, pattern))
1130            .sum();
1131        // The wrapped output should have at most the same number of occurrences as the input
1132        assert!(
1133            wrapped_count <= original_count,
1134            "Width {}: wrapped has {} occurrences, input has {}",
1135            width,
1136            wrapped_count,
1137            original_count
1138        );
1139    }
1140}
rab/tui/util.rs

rab/tui/
util.rs