ansi_control_codes/
parser.rs

1//! # Parser for ansi-control-codes
2//!
3//! This module contains a parser implementation that can be used to parse string-like types into a sequence
4//! of ansi-control-codes (represented by [`ControlFunction`]s) and strings that do not contain any ansi-control-codes.
5//!
6//! To use the parser module, enable the feature `parser` in your `Cargo.toml`.  
7//!
8//! ```text
9//! cargo add ansi-control-codes --features parser
10//! ```
11//!
12//! ## Example Usage
13//!
14//! ```
15//! use ansi_control_codes::c1::NEL;
16//! use ansi_control_codes::parser::{TokenStream, Token};
17//!
18//! let to_be_parsed = format!("This text{}is spread across{}multiple lines.", NEL, NEL);
19//! let parsed = TokenStream::from(&to_be_parsed);
20//!
21//! let parts: Vec<Token> = parsed.collect();
22//! println!("{:?}", parts);
23//!
24//! assert_eq!(parts[0], Token::String("This text"));
25//! assert_eq!(parts[1], Token::ControlFunction(NEL));
26//! assert_eq!(parts[2], Token::String("is spread across"));
27//! assert_eq!(parts[3], Token::ControlFunction(NEL));
28//! assert_eq!(parts[4], Token::String("multiple lines."));
29//! ```
30//!
31//! ## Parse and Explain
32//!
33//! You can combine the features `parser` and `explain` of this crate to parse text and explain the meaning of
34//! control functions. Also check the [Explain-Module][crate::explain].
35//!
36//! ```
37//! use ansi_control_codes::c1::NEL;
38//! use ansi_control_codes::parser::{TokenStream, Token};
39//! use ansi_control_codes::explain::Explain;
40//!
41//! let example = "\x1b[0u\x1b[62c\x1b[23;6H";
42//! let result = TokenStream::from(&example).collect::<Vec<Token>>();
43//!
44//! for (i, part) in result.iter().enumerate() {
45//!     match part {
46//!         Token::String(string) => println!("{}. Normal String: {}", i, string),
47//!         Token::ControlFunction(control_function) => {
48//!             println!(
49//!                 "{}. Control Function: {} ({})",
50//!                 i,
51//!                 control_function.short_name().unwrap_or_default(),
52//!                 control_function.long_name()
53//!             );
54//!             println!(
55//!                 "Short description: {}",
56//!                 control_function.short_description()
57//!             );
58//!             println!("Long description: {}", control_function.long_description());
59//!         }
60//!     }
61//!     if i < (result.len() - 1) {
62//!         println!("---------------------");
63//!     }
64//! }
65//! ```
66//! This will produce the following output
67//! ```text
68//! 0. Control Function:  (Private Use / Experimental Use)
69//! Short description: Reserved for private use / not standardized.
70//! Long description: Reserved for private use / not standardized.
71//! ---------------------
72//! 1. Control Function: DA (Device Attributes)
73//! Short description: The device sending this identifies as device with code 62.
74//! Long description: The device sending this identifies as device with code 62.
75//! ---------------------
76//! 2. Control Function: CUP (Cursor Position)
77//! Short description: Move the active position to line 23 and character 6.
78//! Long description: Causes the active presentation position to be moved in the presentation component to the 23rd line
79//! position according to the line progression, and to the 6 character position according to the character path.
80//! ```
81
82use crate::{c0::*, c1::*, independent_control_functions::*, ControlFunction};
83
84/// All C0 Codes that can be parsed without any lookahead (all C0 codes except for ESC)
85const C0_CODES: [ControlFunction; 31] = [
86    ACK, BEL, BS, CAN, CR, DC1, DC2, DC3, DC4, DLE, EM, ENQ, EOT, ETB, ETX, FF, HT, IS1, IS2, IS3,
87    IS4, LF, NAK, NUL, SI, SO, SOH, STX, SUB, SYN, VT,
88];
89
90/// All C1 Codes that can be parsed without any lookahead (all C1 codes except for CSI)
91const C1_CODES: [ControlFunction; 27] = [
92    BPH, NBH, NEL, SSA, ESA, HTS, HTJ, VTS, PLD, PLU, RI, SS2, SS3, DCS, PU1, PU2, STS, CCH, MW,
93    SPA, EPA, SOS, SCI, ST, OSC, PM, APC,
94];
95
96/// All independent control codes.
97const INDEPENDENT_CODES: [ControlFunction; 10] =
98    [DMI, INT, EMI, RIS, CMD, LS2, LS3, LS3R, LS2R, LS1R];
99
100/// Lower bound of valid characters for control function values.
101/// Control sequences end with characters between 04/00 and 06/15
102/// (07 / 00 - 07 / 15 is also allowed as private-use area).
103const CONTROL_FUNCTION_LOWER_BOUND: u8 = ascii!(04 / 00).as_bytes()[0];
104
105/// Upper bound of valid characters for control function values.
106/// Control sequences end with characters between 04/00 and 06/15
107/// (07 / 00 - 07 / 15 is also allowed as private-use area).
108const CONTROL_FUNCTION_UPPER_BOUND: u8 = ascii!(07 / 15).as_bytes()[0];
109
110/// Lower bound of valid parameter bytes.
111/// Parameter bytes can be between 03 / 00 and 03 / 15.
112const PARAMETER_LOWER_BOUND: u8 = ascii!(03 / 00).as_bytes()[0];
113
114/// Upper bound of valid parameter bytes.
115/// Parameter bytes can be between 03 / 00 and 03 / 15.
116const PARAMETER_UPPER_BOUND: u8 = ascii!(03 / 15).as_bytes()[0];
117
118/// Parameter separator byte.
119const PARAMETER_SEPARATOR: &str = ascii!(03 / 11);
120
121/// A Token contains a part of the parsed string. Each part is either a String that does not contain any
122/// ansi-control-codes (represented by [`Token::String`]), or a ansi-control-code (represented by
123/// [`Token::ControlFunction`]).
124///
125/// A `Token` can be obtained by creating a [`TokenStream`] and iterating over it.
126#[derive(Debug, PartialEq, Eq)]
127pub enum Token<'a> {
128    /// A string slice that does not contain any valid ansi-control-code.
129    String(&'a str),
130    /// A valid ansi-control-code that was found in the parsed string.
131    ControlFunction(ControlFunction<'a>),
132}
133
134/// A TokenStream is a stream of [`Token`]s that were parsed from an input string.
135/// The TokenStream implements the [`Iterator`] interface, which can be used to extract the result of a parse operation.
136///
137/// The parse operation can never fail. If invalid ansi-control-codes are detected in the input string, they will be
138/// emitted as normal Strings ([`Token::String`]). Only valid ansi-control-codes will be emitted as ControlFunctions
139/// ([`Token::ControlFunction`]).
140#[derive(Debug)]
141pub struct TokenStream<'a> {
142    value: &'a str,
143    position: usize,
144    max_position: usize,
145}
146
147impl<'a> TokenStream<'a> {
148    /// Parse the given string `value` into a [`TokenStream`].
149    ///
150    /// The [`TokenStream`] can be iterated over to inspect the result of the parse operation.
151    pub fn from(value: &'a str) -> Self {
152        TokenStream {
153            value,
154            // invariant: position always points to a valid character boundary inside the string stored in value.
155            position: 0,
156            max_position: value.len(),
157        }
158    }
159
160    fn get_next_char_boundary(&self, position: usize) -> usize {
161        // invariant: position is a valid character boundary. Next character boundary is at least at position + 1
162        // no more boundaries can be discovered, if position >= self.value.len()
163        if position >= self.max_position {
164            return position;
165        }
166
167        let mut next_boundary = position + 1;
168        while !self.value.is_char_boundary(next_boundary) {
169            next_boundary += 1
170        }
171        next_boundary
172    }
173
174    fn emit_current_string(&mut self, position: usize) -> Option<Token<'a>> {
175        let mut emit_token = None;
176        if position != self.position {
177            emit_token = Some(Token::String(&self.value[self.position..position]));
178
179            self.position = position;
180        }
181
182        emit_token
183    }
184}
185impl<'a> Iterator for TokenStream<'a> {
186    type Item = Token<'a>;
187
188    fn next(&mut self) -> Option<Self::Item> {
189        let mut current_position = self.position;
190        while current_position < self.max_position {
191            let next_char_boundary = self.get_next_char_boundary(current_position);
192
193            // invariant: self.value[self.position..next_char_boundary] is a valid sub-string of value, as
194            // current_position and next_char_boundary both point to valid character boundaries.
195            // invariant: self.value[current_position..next_char_boundary] is a valid character.
196
197            let current_char = &self.value[current_position..next_char_boundary];
198            if !current_char.is_ascii() {
199                // all ansi-control-codes are valid ascii. Non-ascii characters can never be part of an
200                // ansi-control-code
201                current_position = next_char_boundary;
202                continue;
203            }
204
205            // when encountering an ASCII character, this might be beginning, or part of an already started
206            // ansi-control-code. It also might just be a normal ascii character that has nothing to do with any
207            // ansi-control-code.
208
209            // All c0 control codes are 1 character long and can be identified directly, except for ESC which might
210            // introduce a longer sequence. All of those, except ESC, are stored in the array C0_CODES
211            if let Some(ansi_control_code) = C0_CODES
212                .into_iter()
213                .find(|c0_code| c0_code == &current_char)
214            {
215                // detected a C0 ansi-control-code. But there might be other data that we need to emit from
216                // previous iterations of this loop that detected string data.
217                return self.emit_current_string(current_position).or_else(|| {
218                    // there was no string to emit before the control function, so we can emit the control function
219                    // instead. We need to change the position of the iterator first.
220                    self.position = next_char_boundary;
221                    Some(Token::ControlFunction(ansi_control_code))
222                });
223            }
224
225            // This is either ESC (maybe introducing a longer sequence of control codes), or a normal string
226            // ESC is a special scenario, as it might introduce longer escape sequences
227            if ESC == current_char {
228                // if we have reached the end of the string, the ESC cannot be part of a longer sequence
229                if self.max_position == next_char_boundary {
230                    // detected an ESC. But there might be other data that we need to emit from
231                    // previous iterations of this loop that detected string data.
232                    return self.emit_current_string(current_position).or_else(|| {
233                        // there was no string to emit before the control function, so we can emit the control function
234                        // instead. We need to change the position of the iterator first.
235                        self.position = next_char_boundary;
236                        Some(Token::ControlFunction(ESC))
237                    });
238                }
239
240                // we need to look-ahead to find if this is part of a longer sequence
241                // possible next character is one of C1, independent control function, or CSI
242                let next_next_char_boundary = self.get_next_char_boundary(next_char_boundary);
243
244                // invariant: self.value[self.position..next_next_char_boundary] is a valid sub-string of value, as
245                // current_position and next_next_char_boundary both point to valid character boundaries.
246                // invariant: self.value[next_char_boundary..next_next_char_boundary] is a valid character.
247                // invariant: self.value[current_position..next_next_char_boundary] is a valid string.
248
249                let current_char = &self.value[next_char_boundary..next_next_char_boundary];
250                let control_sequence = &self.value[current_position..next_next_char_boundary];
251                if !current_char.is_ascii() {
252                    // C1, independent control function, and CSI are valid ascii characters. If we find a non-ascii
253                    // character. this cannot be a control character or sequence. This is a standalone ESC character.
254                    // Emit the ESC. But there might be other data that we need to emit from
255                    // previous iterations of this loop that detected string data.
256                    return self.emit_current_string(current_position).or_else(|| {
257                        // there was no string to emit before the control function, so we can emit the control function
258                        // instead. We need to change the position of the iterator first.
259                        self.position = next_char_boundary;
260                        Some(Token::ControlFunction(ESC))
261                    });
262                }
263
264                // A ASCII character might be a continuation of a longer control sequence, or it might just be normal
265                // text. If it is a continuation of a control function, it needs to be one of the C1 codes, one of
266                // the independent control codes, or a CSI starting a control sequence.
267
268                // Handle C1 Codes
269                // All C1 control codes are 1 character long and can be identified directly, except for CSI which might
270                // introduce a longer sequence. All of those, except CSI, are stored in the array C1_CODES
271                if let Some(ansi_control_code) = C1_CODES
272                    .into_iter()
273                    .find(|c1_code| c1_code == &control_sequence)
274                {
275                    // detected a C1 ansi-control-code. But there might be other data that we need to emit from
276                    // previous iterations of this loop that detected string data.
277                    return self.emit_current_string(current_position).or_else(|| {
278                        // there was no string to emit before the control function, so we can emit the control function
279                        // instead. We need to change the position of the iterator first.
280                        self.position = next_next_char_boundary;
281                        Some(Token::ControlFunction(ansi_control_code))
282                    });
283                }
284
285                // Handle Independent Control Functions
286                // All Independent Control Functions are 1 character long, and can be identified directly.
287                // All Independent control functions are stored in the array INDEPENDENT_CODES
288                if let Some(ansi_control_code) = INDEPENDENT_CODES
289                    .into_iter()
290                    .find(|independent_code| independent_code == &control_sequence)
291                {
292                    // detected a C1 ansi-control-code. But there might be other data that we need to emit from
293                    // previous iterations of this loop that detected string data.
294                    return self.emit_current_string(current_position).or_else(|| {
295                        // there was no string to emit before the control function, so we can emit the control function
296                        // instead.
297                        self.position = next_next_char_boundary;
298                        Some(Token::ControlFunction(ansi_control_code))
299                    });
300                }
301
302                // If the character is CSI, it introduces a control sequence
303                if control_sequence == CSI {
304                    // between the CSI character and the function value. To find the parameter list, we need to find
305                    // the end of the control sequence. Possible end values of the sequence depend on the sequence type.
306                    // Sequences can start with or without an intermediate byte.
307
308                    let control_sequence_position = next_next_char_boundary;
309
310                    let lower_bound = CONTROL_FUNCTION_LOWER_BOUND;
311                    let upper_bound = CONTROL_FUNCTION_UPPER_BOUND;
312                    let parameter_lower_bound = PARAMETER_LOWER_BOUND;
313                    let parameter_upper_bound = PARAMETER_UPPER_BOUND;
314
315                    let mut intermediate_byte = false;
316
317                    // try to find a function value between lower_bound and upper_bound
318                    let mut current_position_cs = control_sequence_position;
319                    let mut next_position_cs =
320                        self.get_next_char_boundary(control_sequence_position);
321                    'control_sequence_loop: loop {
322                        let current_char = &self.value[current_position_cs..next_position_cs];
323
324                        // non-ascii (multi-byte) values are never valid parameters to a control sequence, this is
325                        // invalid!
326                        if current_char.as_bytes().len() != 1 {
327                            break 'control_sequence_loop;
328                        }
329
330                        // does this end the control function?
331                        if current_char.as_bytes()[0] >= lower_bound
332                            && current_char.as_bytes()[0] <= upper_bound
333                        {
334                            // detected the end of a control function
335                            let control_function_value = if intermediate_byte {
336                                &self.value[current_position_cs - 1..next_position_cs]
337                            } else {
338                                current_char
339                            };
340                            let parameters_unparsed = if intermediate_byte {
341                                &self.value[control_sequence_position..(current_position_cs - 1)]
342                            } else {
343                                &self.value[control_sequence_position..current_position_cs]
344                            };
345                            let parameters = parameters_unparsed
346                                .split(PARAMETER_SEPARATOR)
347                                .map(String::from)
348                                .collect();
349
350                            // emit string token (if any) or the control function
351                            return self.emit_current_string(current_position).or_else(|| {
352                                // there was no string to emit before the control function, so we can emit the control
353                                // function instead.
354                                self.position = next_position_cs;
355                                Some(Token::ControlFunction(ControlFunction::new_sequence(
356                                    control_function_value,
357                                    parameters,
358                                )))
359                            });
360                        } else if intermediate_byte {
361                            // we have already seen an intermediate byte, but now the control function is still
362                            // not terminated. This is invalid!
363                            break 'control_sequence_loop;
364                        } else if current_char.as_bytes()[0] < parameter_lower_bound
365                            || current_char.as_bytes()[0] > parameter_upper_bound
366                        {
367                            // this is not a valid function value, and not a valid parameter byte
368                            // if it is not the intermediate byte, this is invalid!
369                            intermediate_byte = current_char == ascii!(02 / 00);
370                            if !intermediate_byte {
371                                break 'control_sequence_loop;
372                            }
373                        }
374
375                        // end of string reached?
376                        // this does not end the control function
377                        // check the next character (or exit, if there are no more characters).
378                        if next_position_cs == self.max_position {
379                            // nothing else to do any more, reached end of string, this can't be valid
380                            // since there was no valid end to this control sequence.
381                            break 'control_sequence_loop;
382                        }
383                        current_position_cs = next_position_cs;
384                        next_position_cs = self.get_next_char_boundary(current_position_cs);
385                    }
386                } else {
387                    // found ESC that did not introduce a longer sequence, emit as-is.
388                    return self.emit_current_string(current_position).or_else(|| {
389                        // there was no string to emit before the control function, so we can emit the control function
390                        // instead.
391                        self.position = next_char_boundary;
392                        Some(Token::ControlFunction(ESC))
393                    });
394                }
395            }
396
397            current_position = next_char_boundary;
398        }
399
400        // reached end of the input string.
401        // emit the last token, if there is still some parts of the input that have not been emitted yet.
402        self.emit_current_string(current_position)
403    }
404}
405
406#[cfg(test)]
407mod tests {
408
409    use crate::{
410        c0::{BEL, CR, ESC, LF},
411        c1::{BPH, CSI, NBH, SOS},
412        control_sequences::{
413            DeviceAttributes, PrintQuality, ReversedString, TabulationControl, CHA, CHT, CTC, CUP,
414            DA, SPQR, SRS, SSW, SU, TCC,
415        },
416        independent_control_functions::{DMI, EMI, RIS},
417        ControlFunction,
418    };
419
420    use super::{Token, TokenStream};
421
422    #[test]
423    fn test_simple_ascii_string() {
424        let simple_ascii_input = "Hello World";
425        let mut token_stream = TokenStream::from(simple_ascii_input);
426
427        let first_element = token_stream.next();
428        let second_element = token_stream.next();
429
430        assert!(first_element.is_some_and(|value| value == Token::String(simple_ascii_input)));
431        assert!(second_element.is_none());
432    }
433
434    #[test]
435    fn test_simple_non_ascii_string() {
436        let simple_non_ascii_input = "Löwe 老虎 Léopard";
437        let mut token_stream = TokenStream::from(simple_non_ascii_input);
438
439        let first_element = token_stream.next();
440        let second_element = token_stream.next();
441
442        assert!(first_element.is_some_and(|value| value == Token::String(simple_non_ascii_input)));
443        assert!(second_element.is_none());
444    }
445
446    #[test]
447    fn test_simple_ascii_string_with_c0() {
448        let simple_ascii_input = "Ring the bell";
449        let input = format!("{}{}", simple_ascii_input, BEL);
450
451        let mut token_stream = TokenStream::from(&input);
452
453        let first_element = token_stream.next();
454        let second_element = token_stream.next();
455        let third_element = token_stream.next();
456
457        assert!(first_element.is_some_and(|value| value == Token::String(simple_ascii_input)));
458        assert!(second_element.is_some_and(|value| value == Token::ControlFunction(BEL)));
459        assert!(third_element.is_none());
460    }
461
462    #[test]
463    fn test_simple_non_ascii_string_with_c0() {
464        let simple_non_ascii_input = "Löwe 老虎 Léopard";
465        let input = format!("{}{}{}", simple_non_ascii_input, CR, LF);
466
467        let mut token_stream = TokenStream::from(&input);
468
469        let first_element = token_stream.next();
470        let second_element = token_stream.next();
471        let third_element = token_stream.next();
472        let forth_element = token_stream.next();
473
474        assert!(first_element.is_some_and(|value| value == Token::String(simple_non_ascii_input)));
475        assert!(second_element.is_some_and(|value| value == Token::ControlFunction(CR)));
476        assert!(third_element.is_some_and(|value| value == Token::ControlFunction(LF)));
477        assert!(forth_element.is_none());
478    }
479
480    #[test]
481    fn test_simple_ascii_string_with_interleaved_c0() {
482        let line1 = "Line1";
483        let line2 = "Line2";
484        let input = format!("{}{}{}", line1, LF, line2);
485
486        let mut token_stream = TokenStream::from(&input);
487
488        let first_element = token_stream.next();
489        let second_element = token_stream.next();
490        let third_element = token_stream.next();
491        let forth_element = token_stream.next();
492
493        assert!(first_element.is_some_and(|value| value == Token::String(line1)));
494        assert!(second_element.is_some_and(|value| value == Token::ControlFunction(LF)));
495        assert!(third_element.is_some_and(|value| value == Token::String(line2)));
496        assert!(forth_element.is_none());
497    }
498
499    #[test]
500    fn test_simple_non_ascii_string_with_interleaved_c0() {
501        let line1 = "Löwe";
502        let line2 = "老虎";
503        let input = format!("{}{}{}", line1, LF, line2);
504
505        let mut token_stream = TokenStream::from(&input);
506
507        let first_element = token_stream.next();
508        let second_element = token_stream.next();
509        let third_element = token_stream.next();
510        let forth_element = token_stream.next();
511
512        assert!(first_element.is_some_and(|value| value == Token::String(line1)));
513        assert!(second_element.is_some_and(|value| value == Token::ControlFunction(LF)));
514        assert!(third_element.is_some_and(|value| value == Token::String(line2)));
515        assert!(forth_element.is_none());
516    }
517
518    #[test]
519    fn test_single_esc() {
520        let esc = ESC.to_string();
521        let mut token_stream = TokenStream::from(&esc);
522
523        let first_element = token_stream.next();
524        let second_element = token_stream.next();
525
526        assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
527        assert!(second_element.is_none());
528    }
529
530    #[test]
531    fn test_esc_at_end_of_string() {
532        let text = "I have to escape";
533        let escape = format!("{}{}", text, ESC);
534        let mut token_stream = TokenStream::from(&escape);
535
536        let first_element = token_stream.next();
537        let second_element = token_stream.next();
538        let third_element = token_stream.next();
539
540        assert!(first_element.is_some_and(|value| value == Token::String(text)));
541        assert!(second_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
542        assert!(third_element.is_none());
543    }
544
545    #[test]
546    fn test_esc_at_start_of_non_ascii_string() {
547        let text = "í have to escape";
548        let escape = format!("{}{}", ESC, text);
549        let mut token_stream = TokenStream::from(&escape);
550
551        let first_element = token_stream.next();
552        let second_element = token_stream.next();
553        let third_element = token_stream.next();
554
555        assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
556        assert!(second_element.is_some_and(|value| value == Token::String(text)));
557        assert!(third_element.is_none());
558    }
559
560    #[test]
561    fn test_esc_at_start_of_ascii_string() {
562        let text = "i have to escape";
563        let escape = format!("{}{}", ESC, text);
564        let mut token_stream = TokenStream::from(&escape);
565
566        let first_element = token_stream.next();
567        let second_element = token_stream.next();
568        let third_element = token_stream.next();
569
570        println!("{:?}", first_element);
571        println!("{:?}", second_element);
572        println!("{:?}", third_element);
573
574        assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
575        assert!(second_element.is_some_and(|value| value == Token::String(text)));
576        assert!(third_element.is_none());
577    }
578
579    #[test]
580    fn test_c1_at_start_of_string() {
581        let text = format!("{}This might be in the next line", BPH);
582        let result = TokenStream::from(&text).collect::<Vec<Token>>();
583
584        assert_eq!(
585            result,
586            vec![
587                Token::ControlFunction(BPH),
588                Token::String("This might be in the next line")
589            ]
590        )
591    }
592
593    #[test]
594    fn test_c1_at_end_of_string() {
595        let text = format!("No break is permitted at the end of this string{}", NBH);
596        let result = TokenStream::from(&text).collect::<Vec<Token>>();
597
598        assert_eq!(
599            result,
600            vec![
601                Token::String("No break is permitted at the end of this string"),
602                Token::ControlFunction(NBH)
603            ]
604        )
605    }
606
607    #[test]
608    fn test_c1_in_between_ascii_strings() {
609        let text = format!("Line1{}Maybe Line2", BPH);
610        let result = TokenStream::from(&text).collect::<Vec<Token>>();
611
612        assert_eq!(
613            result,
614            vec![
615                Token::String("Line1"),
616                Token::ControlFunction(BPH),
617                Token::String("Maybe Line2")
618            ]
619        )
620    }
621
622    #[test]
623    fn test_c1_in_between_non_ascii_strings() {
624        let text = format!("老{}虎", SOS);
625        let result = TokenStream::from(&text).collect::<Vec<Token>>();
626
627        assert_eq!(
628            result,
629            vec![
630                Token::String("老"),
631                Token::ControlFunction(SOS),
632                Token::String("虎")
633            ]
634        )
635    }
636
637    #[test]
638    fn test_independent_code_at_start_of_string() {
639        let text = format!("{}Back to normal", RIS);
640        let result = TokenStream::from(&text).collect::<Vec<Token>>();
641
642        assert_eq!(
643            result,
644            vec![Token::ControlFunction(RIS), Token::String("Back to normal")]
645        )
646    }
647
648    #[test]
649    fn test_independent_code_at_end_of_string() {
650        let text = format!("Now enabling manual input{}", EMI);
651        let result = TokenStream::from(&text).collect::<Vec<Token>>();
652
653        assert_eq!(
654            result,
655            vec![
656                Token::String("Now enabling manual input"),
657                Token::ControlFunction(EMI)
658            ]
659        )
660    }
661
662    #[test]
663    fn test_independent_code_in_between_of_ascii_strings() {
664        let text = format!(
665            "Now enabling manual input{} And now {}disabling it again",
666            EMI, DMI
667        );
668        let result = TokenStream::from(&text).collect::<Vec<Token>>();
669
670        assert_eq!(
671            result,
672            vec![
673                Token::String("Now enabling manual input"),
674                Token::ControlFunction(EMI),
675                Token::String(" And now "),
676                Token::ControlFunction(DMI),
677                Token::String("disabling it again"),
678            ]
679        )
680    }
681
682    #[test]
683    fn test_independent_code_in_between_of_non_ascii_strings() {
684        let text = format!(
685            "Now enabling manual input{} And now 老{}老disabling it again",
686            EMI, DMI
687        );
688        let result = TokenStream::from(&text).collect::<Vec<Token>>();
689
690        assert_eq!(
691            result,
692            vec![
693                Token::String("Now enabling manual input"),
694                Token::ControlFunction(EMI),
695                Token::String(" And now 老"),
696                Token::ControlFunction(DMI),
697                Token::String("老disabling it again"),
698            ]
699        )
700    }
701
702    #[test]
703    fn test_invalid_control_sequence() {
704        let invalid_sequence = format!("{}{}{}", ESC, CSI, "ä");
705        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
706
707        assert_eq!(
708            result,
709            vec![
710                Token::ControlFunction(ESC),
711                Token::String(&format!("{}{}", CSI, "ä")),
712            ]
713        )
714    }
715
716    #[test]
717    fn test_invalid_control_sequence_with_lookalike_arguments() {
718        let invalid_sequence = format!("{}{}{}{}", ESC, CSI, "1;2", "ä");
719        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
720
721        assert_eq!(
722            result,
723            vec![
724                Token::ControlFunction(ESC),
725                Token::String(&format!("{}{}{}", CSI, "1;2", "ä")),
726            ]
727        )
728    }
729
730    #[test]
731    fn test_invalid_control_sequence_with_no_end() {
732        let invalid_sequence = format!("{}{}", ESC, CSI);
733        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
734
735        assert_eq!(
736            result,
737            vec![
738                Token::ControlFunction(ESC),
739                Token::String(&format!("{}", CSI)),
740            ]
741        )
742    }
743
744    #[test]
745    fn test_invalid_control_sequence_with_intermediate() {
746        let invalid_sequence = format!("{}{}{}{}", ESC, CSI, ascii!(02 / 00), "ä");
747        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
748
749        assert_eq!(
750            result,
751            vec![
752                Token::ControlFunction(ESC),
753                Token::String(&format!("{}{}{}", CSI, ascii!(02 / 00), "ä")),
754            ]
755        )
756    }
757
758    #[test]
759    fn test_invalid_control_sequence_with_intermediate_with_lookalike_arguments() {
760        let invalid_sequence = format!("{}{}{}{}{}", ESC, CSI, ascii!(02 / 00), "1;2", "ä");
761        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
762
763        assert_eq!(
764            result,
765            vec![
766                Token::ControlFunction(ESC),
767                Token::String(&format!("{}{}{}{}", CSI, ascii!(02 / 00), "1;2", "ä")),
768            ]
769        )
770    }
771
772    #[test]
773    fn test_invalid_control_sequence_with_intermediate_with_no_end() {
774        let invalid_sequence = format!("{}{}{}", ESC, CSI, ascii!(02 / 00));
775        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
776
777        assert_eq!(
778            result,
779            vec![
780                Token::ControlFunction(ESC),
781                Token::String(&format!("{}{}", CSI, ascii!(02 / 00))),
782            ]
783        )
784    }
785
786    #[test]
787    fn test_invalid_control_sequence_with_no_end_and_parameters() {
788        let invalid_sequence = format!("{}{}{}", ESC, CSI, "1;2");
789        let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
790
791        assert_eq!(
792            result,
793            vec![
794                Token::ControlFunction(ESC),
795                Token::String(&format!("{}{}", CSI, "1;2")),
796            ]
797        )
798    }
799
800    #[test]
801    fn test_valid_control_sequence_no_intermediate_standalone() {
802        let valid_sequence = format!("{}", CHA(None));
803        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
804
805        assert_eq!(result, vec![Token::ControlFunction(CHA(None))])
806    }
807
808    #[test]
809    fn test_valid_control_sequence_no_intermediate_beginning_of_string() {
810        let valid_sequence = format!("{}Hello", CHA(None));
811        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
812
813        assert_eq!(
814            result,
815            vec![Token::ControlFunction(CHA(None)), Token::String("Hello")]
816        )
817    }
818
819    #[test]
820    fn test_valid_control_sequence_no_intermediate_end_of_string() {
821        let valid_sequence = format!("Hello{}", CHT(8.into()));
822        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
823
824        assert_eq!(
825            result,
826            vec![
827                Token::String("Hello"),
828                Token::ControlFunction(CHT(8.into()))
829            ]
830        )
831    }
832
833    #[test]
834    fn test_valid_control_sequence_no_intermediate_middle_of_string() {
835        let valid_sequence = format!(
836            "Take control{} over tabulations",
837            CTC(TabulationControl::ClearAllLineTabulationStops.into())
838        );
839        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
840
841        assert_eq!(
842            result,
843            vec![
844                Token::String("Take control"),
845                Token::ControlFunction(CTC(TabulationControl::ClearAllLineTabulationStops.into())),
846                Token::String(" over tabulations")
847            ]
848        )
849    }
850
851    #[test]
852    fn test_valid_control_sequence_with_intermediate_standalone() {
853        let valid_sequence = format!("{}", SPQR(PrintQuality::HighQualityLowSpeed.into()));
854        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
855
856        assert_eq!(result, vec![Token::ControlFunction(SPQR(None))])
857    }
858
859    #[test]
860    fn test_valid_control_sequence_with_intermediate_beginning_of_string() {
861        let valid_sequence = format!(
862            "{}desreveR{}",
863            SRS(ReversedString::Start.into()),
864            SRS(ReversedString::End.into())
865        );
866        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
867
868        assert_eq!(
869            result,
870            vec![
871                Token::ControlFunction(SRS(ReversedString::Start.into())),
872                Token::String("desreveR"),
873                Token::ControlFunction(SRS(ReversedString::End.into()))
874            ]
875        )
876    }
877
878    #[test]
879    fn test_valid_control_sequence_with_intermediate_end_of_string() {
880        let valid_sequence = format!("No more spaces after me!{}", SSW(0));
881        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
882
883        assert_eq!(
884            result,
885            vec![
886                Token::String("No more spaces after me!"),
887                Token::ControlFunction(SSW(0))
888            ]
889        )
890    }
891
892    #[test]
893    fn test_valid_control_sequence_with_intermediate_middle_of_string() {
894        let valid_sequence = format!("Hold tight!{}We are going up!", SU(50.into()));
895        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
896
897        assert_eq!(
898            result,
899            vec![
900                Token::String("Hold tight!"),
901                Token::ControlFunction(SU(50.into())),
902                Token::String("We are going up!")
903            ]
904        )
905    }
906
907    #[test]
908    fn test_valid_control_sequence_with_multiple_parameters() {
909        let valid_sequence = format!("All or nothing@>Ä{}", TCC(6, 12.into()));
910        let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
911
912        assert_eq!(
913            result,
914            vec![
915                Token::String("All or nothing@>Ä"),
916                Token::ControlFunction(TCC(6, 12.into())),
917            ]
918        )
919    }
920
921    #[test]
922    fn test_example_a() {
923        let example = "\x1b[0u\x1b[62c\x1b[23;6H";
924        let result = TokenStream::from(&example).collect::<Vec<Token>>();
925
926        assert_eq!(
927            result,
928            vec![
929                Token::ControlFunction(
930                    ControlFunction::private_use("u", vec![String::from("0")]).unwrap()
931                ),
932                Token::ControlFunction(DA(DeviceAttributes::Identify(62).into())),
933                Token::ControlFunction(CUP(23.into(), 6.into()))
934            ]
935        )
936    }
937}