fancy_table/
ansi.rs

1use std::cmp::min;
2
3// SmallVec for more efficient ANSI code collection (for common cases)
4use smallvec::SmallVec;
5
6pub const RST_CODE: &str = "\x1b[0m";
7
8// open SGRs, text slice, total length (excluding ANSI codes), has reset code?, needs reset?
9// Using SmallVec for better performance in the common case of few ANSI codes
10type AnsiSegment<'a> = (SmallVec<[&'a str; 4]>, &'a str, usize, bool, bool);
11
12#[derive(Default)]
13pub struct AnsiString<'a> {
14    pub slice: &'a str,
15    pub c2c: Option<String>,
16    pub len: usize,
17    pub needs_rst: bool,
18}
19
20#[derive(PartialEq)]
21enum AnsiToken {
22    Escape,
23    Opening,
24    Code,
25}
26
27#[derive(PartialEq)]
28pub enum Overflow {
29    WordWrap,
30    Truncate,
31}
32
33#[derive(Debug)]
34enum Segment<'a> {
35    Word(&'a str, usize),
36    Term(&'a str, usize),
37}
38#[derive(Debug, Default)]
39struct CodeQueue<'a> {
40    codes_to_continue: SmallVec<[&'a str; 4]>, // Most common case: few ANSI codes
41    codes_to_collect: SmallVec<[&'a str; 4]>,
42    reset_after_get: bool,
43}
44
45impl<'a> CodeQueue<'a> {
46    pub fn codes_to_continue(&mut self) -> Option<String> {
47        let code_seq = if self.codes_to_continue.is_empty() {
48            None
49        } else {
50            let capacity = self.codes_to_continue.iter().map(|s| s.len()).sum();
51            let mut codes = String::with_capacity(capacity);
52
53            for s in &self.codes_to_continue {
54                codes.push_str(s);
55            }
56            Some(codes)
57        };
58        if self.reset_after_get {
59            self.codes_to_continue.clear();
60            self.reset_after_get = false;
61        }
62
63        // Codes collected become codes to continue in case of line breaks
64        self.codes_to_continue
65            .extend_from_slice(&self.codes_to_collect);
66        self.codes_to_collect.clear();
67
68        code_seq
69    }
70
71    pub fn collect(&mut self, codes: SmallVec<[&'a str; 4]>) {
72        self.codes_to_collect.extend(codes);
73    }
74
75    pub fn clear(&mut self) {
76        self.reset_after_get = true;
77        self.codes_to_collect.clear();
78    }
79
80    pub fn has_codes_to_continue(&self) -> bool {
81        !self.codes_to_continue.is_empty()
82    }
83}
84
85impl<'a> Segment<'a> {
86    fn text(&self) -> &'a str {
87        match self {
88            Segment::Word(txt, _) | Segment::Term(txt, _) => txt,
89        }
90    }
91    fn pos(&self) -> usize {
92        match self {
93            Segment::Word(_, pos) | Segment::Term(_, pos) => *pos,
94        }
95    }
96}
97
98impl<'a> AnsiString<'a> {
99    pub fn build(c2c: Option<String>, slice: &'a str, len: usize, needs_rst: bool) -> Self {
100        Self {
101            slice,
102            len,
103            c2c,
104            needs_rst,
105        }
106    }
107}
108
109/// Processes input text with ANSI codes into formatted lines that fit within specified dimensions.
110///
111/// Takes raw text containing ANSI escape sequences and breaks it into lines that respect
112/// both horizontal (character) and vertical (line) constraints while preserving ANSI formatting.
113///
114/// # Arguments
115/// * `input` - Raw input text that may contain ANSI escape sequences
116/// * `hspace` - Maximum characters per line (excluding ANSI codes)
117/// * `vspace` - Maximum number of lines to generate
118/// * `overflow` - Strategy for handling text that exceeds horizontal space
119///
120/// # Returns
121/// Vector of `AnsiString` objects, each representing a formatted line with:
122/// - Original text slice (may include ANSI codes)
123/// - Continuation codes needed to maintain formatting across line breaks
124/// - Actual display length (excluding ANSI codes)
125/// - Whether the line needs a reset code for proper formatting
126pub fn build_string<'a>(
127    input: &'a str,
128    hspace: usize,
129    vspace: usize,
130    overflow: &Overflow,
131) -> Vec<AnsiString<'a>> {
132    if hspace == 0 {
133        return vec![];
134    }
135
136    // Tracks whether the current line needs a reset code to properly close ANSI formatting
137    let mut line_reset = false;
138
139    // A queue keeping ANSI codes to be carried over to the next line
140    let mut queue = CodeQueue::default();
141    let mut result = Vec::with_capacity(vspace);
142    let mut str_pos = 0;
143    let mut end_pos = 0;
144    let mut txt_len = 0;
145
146    let mut segments = build_segments_iter(input, overflow).peekable();
147    while let Some(seg) = segments.next() {
148        let is_last_str = segments.peek().is_none();
149        let is_init_str = txt_len == 0;
150        let is_term_str = matches!(seg, Segment::Term(_, _));
151        let eol = is_last_str || is_term_str;
152        let pos = seg.pos();
153
154        let (new_codes, txt, total_len, has_rst, needs_rst) = parse_segment(seg, hspace);
155        let len = min(total_len, hspace);
156
157        // Separator length: 0 for first segment in line, 1 otherwise.
158        let sep_len = (txt_len > 0) as usize;
159
160        if txt_len == 0 {
161            str_pos = pos;
162            end_pos = pos;
163            line_reset = queue.has_codes_to_continue();
164        }
165
166        // If there is no more space for a segment then wrap-or-truncate the line
167        if !is_init_str && txt_len + total_len + sep_len > hspace {
168            result.push(AnsiString::build(
169                queue.codes_to_continue(),
170                &input[str_pos..end_pos],
171                txt_len,
172                line_reset,
173            ));
174            // Constitute current segment as initial in the new line
175            str_pos = pos;
176            end_pos = pos + txt.len();
177            txt_len = len;
178        } else {
179            end_pos += txt.len() + sep_len;
180            txt_len += len + sep_len;
181        }
182
183        // If segment contains reset code at any position wipe out
184        // all ANSI codes collected up to the reset code so far...
185        if has_rst {
186            queue.clear();
187            line_reset = needs_rst;
188        } else {
189            line_reset = line_reset || needs_rst;
190        }
191
192        // ...and collect all the codes coming right after the reset code
193        queue.collect(new_codes);
194
195        if result.len() < vspace && (txt_len == hspace || eol) {
196            result.push(AnsiString::build(
197                queue.codes_to_continue(),
198                &input[str_pos..end_pos],
199                txt_len,
200                line_reset,
201            ));
202            txt_len = 0;
203        }
204
205        // Bail out early if there is no more vertical space available
206        if result.len() == vspace {
207            return result;
208        }
209    }
210    result
211}
212
213/// Splits input string into segments for parsing based on overflow strategy:
214/// - `Overflow::Truncate`: Splits only on newlines, creating one segment per line
215/// - `Overflow::WordWrap`: Splits on both newlines and spaces for word-based wrapping
216fn build_segments_iter<'a>(
217    input: &'a str,
218    overflow: &Overflow,
219) -> Box<dyn Iterator<Item = Segment<'a>> + 'a> {
220    let input_ptr = input.as_ptr();
221    match overflow {
222        Overflow::Truncate => Box::new(
223            input
224                .lines()
225                .map(move |s| Segment::Term(s, s.as_ptr() as usize - input_ptr as usize)),
226        ),
227        Overflow::WordWrap => Box::new(input.lines().flat_map(move |s| {
228            let mut iter = s.split(' ').peekable();
229            std::iter::from_fn(move || {
230                iter.next().map(|slice| {
231                    let pos = slice.as_ptr() as usize - input_ptr as usize;
232                    if iter.peek().is_none() {
233                        Segment::Term(slice, pos)
234                    } else {
235                        Segment::Word(slice, pos)
236                    }
237                })
238            })
239        })),
240    }
241}
242
243/// Parses a single text segment, extracting ANSI codes and enforcing character limits.
244///
245/// Returns a tuple containing:
246/// - SmallVec of ANSI escape sequences found in the segment (optimized for common case of few codes)
247/// - Text slice (including ANSI codes) truncated to fit the character limit
248/// - Actual text length (excluding ANSI codes)  
249/// - Whether a reset code was found in the segment
250/// - Whether the segment needs a reset code (has unclosed ANSI styling)
251fn parse_segment<'a>(segment: Segment<'a>, len: usize) -> AnsiSegment<'a> {
252    let mut codes = SmallVec::new();
253    let mut expected = AnsiToken::Escape;
254    let mut current_code_start = 0;
255
256    let mut txt_len: usize = 0;
257    let mut end_pos = None;
258    let mut has_rst = false;
259    let mut needs_rst = false;
260    let mut stop_collecting = false;
261
262    let input = segment.text();
263
264    // Fast path: if input is empty or very short, avoid expensive processing
265    if input.is_empty() {
266        return (codes, input, 0, false, false);
267    }
268
269    for (pos, ch) in input.char_indices() {
270        match ch {
271            '\x1b' if expected == AnsiToken::Escape => {
272                expected = AnsiToken::Opening;
273                current_code_start = pos;
274            }
275            '[' if expected == AnsiToken::Opening => expected = AnsiToken::Code,
276            'm' if expected == AnsiToken::Code => {
277                // Valid SGR sequence terminator
278                let sequence = &input[current_code_start..pos + 1];
279                let seq_rst = sequence == RST_CODE;
280
281                has_rst = seq_rst;
282
283                if seq_rst {
284                    codes.clear();
285                } else {
286                    codes.push(sequence);
287                }
288                if !stop_collecting {
289                    needs_rst = !has_rst;
290                    if end_pos.is_some() {
291                        end_pos = Some(pos + 1);
292                    }
293                }
294                expected = AnsiToken::Escape
295            }
296            '0'..='9' | ';' | ':' if expected == AnsiToken::Code => {
297                continue;
298            }
299            _ if end_pos.is_none() => {
300                txt_len += 1;
301
302                if txt_len == len {
303                    end_pos = Some(pos + ch.len_utf8());
304                }
305                expected = AnsiToken::Escape;
306            }
307            _ => {
308                stop_collecting = true;
309                expected = AnsiToken::Escape;
310                // consume, do nothing
311            }
312        }
313    }
314    let slice = &input[0..end_pos.unwrap_or(input.len())];
315    (codes, slice, txt_len, has_rst, needs_rst)
316}
317
318#[macro_export]
319macro_rules! assert_ansi_string {
320        ($string:expr, $hspace:expr, $vspace:expr, $overflow:expr, []) => {
321            let str = format!($string);
322            let segments = $crate::ansi::build_string(&str, $hspace, $vspace, &$overflow);
323
324            assert!(segments.is_empty());
325        };
326        ($string:expr, $hspace:expr, $vspace:expr, $overflow:expr, [$($segment:tt),*]) => {
327            {
328                let str = format!($string);
329                let segments = $crate::ansi::build_string(&str, $hspace, $vspace, &$overflow);
330                let mut segment_index = 0;
331
332                $(
333                    assert_ansi_string!(@verify_segment segments[segment_index], $segment);
334                    segment_index += 1;
335                )+
336                assert_eq!(segments.len(), segment_index, "Expected {} segments, found {}", segment_index, segments.len());
337            }
338        };
339        (@verify_segment $seg:expr, { $($field:ident => $value:tt),* }) => {
340            let seg = &$seg;
341            $(
342                assert_ansi_string!(@check_field seg, $field, $value);
343            )*
344        };
345        (@check_field $seg:expr, len, $expected:expr) => {
346            assert_eq!($seg.len, $expected);
347        };
348        (@check_field $seg:expr, txt, $expected:literal) => {
349            let formatted = format!($expected);
350            assert_eq!($seg.slice.as_ref(), formatted);
351        };
352        (@check_field $seg:expr, rst, $expected:expr) => {
353            assert_eq!($seg.needs_rst, $expected);
354        };
355    }
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360    use smallvec::smallvec;
361
362    #[test]
363    fn test_codes_queue_clear() {
364        let mut queue = CodeQueue::default();
365        queue.collect(smallvec!["\x1b[31m", "\x1b[1m"]);
366
367        // First call moves collected to continue
368        assert_eq!(queue.codes_to_continue(), None);
369        assert!(queue.has_codes_to_continue());
370
371        // Clear should mark for reset
372        queue.clear();
373
374        // Next call should clear and return existing codes
375        assert_eq!(
376            queue.codes_to_continue(),
377            Some(String::from("\x1b[31m\x1b[1m"))
378        );
379        assert!(!queue.has_codes_to_continue());
380    }
381
382    #[test]
383    fn test_codes_queue_multiple_codes_collected() {
384        let mut queue = CodeQueue::default();
385
386        // First collected code.
387        // Nothing to be applied at the beginning of current line.
388        queue.collect(smallvec!["\x1b[31m"]);
389        assert_eq!(queue.codes_to_continue(), None);
390
391        // Second collected code should append to queue of codes to continue
392        // but current line should be prepended with previously collected code.
393        queue.collect(smallvec!["\x1b[1m"]);
394        assert_eq!(queue.codes_to_continue(), Some(String::from("\x1b[31m")));
395
396        // Finally, next call of `codes_to_continue` should generate a sequence
397        // of all codes collected so far.
398        assert_eq!(
399            queue.codes_to_continue(),
400            Some(String::from("\x1b[31m\x1b[1m"))
401        );
402    }
403
404    #[test]
405    fn test_codes_queue_clear_with_new_codes() {
406        let mut queue = CodeQueue::default();
407
408        // Set up some continuing codes
409        queue.collect(smallvec!["\x1b[31m", "\x1b[1m"]);
410        queue.codes_to_continue();
411
412        // Collect new codes then clear
413        queue.collect(smallvec!["\x1b[32m"]);
414        queue.clear();
415
416        // Should get the old continuing codes (before clear) and new codes should be cleared
417        assert_eq!(
418            queue.codes_to_continue(),
419            Some(String::from("\x1b[31m\x1b[1m"))
420        );
421        assert!(!queue.has_codes_to_continue());
422    }
423
424    #[test]
425    fn test_codes_queue_empty_states() {
426        let mut queue = CodeQueue::default();
427
428        // Empty queue
429        assert!(!queue.has_codes_to_continue());
430        assert_eq!(queue.codes_to_continue(), None);
431
432        // Empty collection
433        queue.collect(smallvec![]);
434        assert_eq!(queue.codes_to_continue(), None);
435        assert!(!queue.has_codes_to_continue());
436
437        // Clear empty queue
438        queue.clear();
439        assert_eq!(queue.codes_to_continue(), None);
440        assert!(!queue.has_codes_to_continue());
441    }
442}