use crate::{c0::*, c1::*, independent_control_functions::*, ControlFunction};
const C0_CODES: [ControlFunction; 31] = [
ACK, BEL, BS, CAN, CR, DC1, DC2, DC3, DC4, DLE, EM, ENQ, EOT, ETB, ETX, FF, HT, IS1, IS2, IS3,
IS4, LF, NAK, NUL, SI, SO, SOH, STX, SUB, SYN, VT,
];
const C1_CODES: [ControlFunction; 27] = [
BPH, NBH, NEL, SSA, ESA, HTS, HTJ, VTS, PLD, PLU, RI, SS2, SS3, DCS, PU1, PU2, STS, CCH, MW,
SPA, EPA, SOS, SCI, ST, OSC, PM, APC,
];
const INDEPENDENT_CODES: [ControlFunction; 10] =
[DMI, INT, EMI, RIS, CMD, LS2, LS3, LS3R, LS2R, LS1R];
const CONTROL_FUNCTION_LOWER_BOUND: u8 = ascii!(04 / 00).as_bytes()[0];
const CONTROL_FUNCTION_UPPER_BOUND: u8 = ascii!(07 / 15).as_bytes()[0];
const PARAMETER_LOWER_BOUND: u8 = ascii!(03 / 00).as_bytes()[0];
const PARAMETER_UPPER_BOUND: u8 = ascii!(03 / 15).as_bytes()[0];
const PARAMETER_SEPARATOR: &str = ascii!(03 / 11);
#[derive(Debug, PartialEq, Eq)]
pub enum Token<'a> {
String(&'a str),
ControlFunction(ControlFunction<'a>),
}
#[derive(Debug)]
pub struct TokenStream<'a> {
value: &'a str,
position: usize,
max_position: usize,
}
impl<'a> TokenStream<'a> {
pub fn from(value: &'a str) -> Self {
TokenStream {
value,
position: 0,
max_position: value.len(),
}
}
fn get_next_char_boundary(&self, position: usize) -> usize {
if position >= self.max_position {
return position;
}
let mut next_boundary = position + 1;
while !self.value.is_char_boundary(next_boundary) {
next_boundary += 1
}
next_boundary
}
fn emit_current_string(&mut self, position: usize) -> Option<Token<'a>> {
let mut emit_token = None;
if position != self.position {
emit_token = Some(Token::String(&self.value[self.position..position]));
self.position = position;
}
emit_token
}
}
impl<'a> Iterator for TokenStream<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
let mut current_position = self.position;
while current_position < self.max_position {
let next_char_boundary = self.get_next_char_boundary(current_position);
let current_char = &self.value[current_position..next_char_boundary];
if !current_char.is_ascii() {
current_position = next_char_boundary;
continue;
}
if let Some(ansi_control_code) = C0_CODES
.into_iter()
.find(|c0_code| c0_code == ¤t_char)
{
return self.emit_current_string(current_position).or_else(|| {
self.position = next_char_boundary;
Some(Token::ControlFunction(ansi_control_code))
});
}
if ESC == current_char {
if self.max_position == next_char_boundary {
return self.emit_current_string(current_position).or_else(|| {
self.position = next_char_boundary;
Some(Token::ControlFunction(ESC))
});
}
let next_next_char_boundary = self.get_next_char_boundary(next_char_boundary);
let current_char = &self.value[next_char_boundary..next_next_char_boundary];
let control_sequence = &self.value[current_position..next_next_char_boundary];
if !current_char.is_ascii() {
return self.emit_current_string(current_position).or_else(|| {
self.position = next_char_boundary;
Some(Token::ControlFunction(ESC))
});
}
if let Some(ansi_control_code) = C1_CODES
.into_iter()
.find(|c1_code| c1_code == &control_sequence)
{
return self.emit_current_string(current_position).or_else(|| {
self.position = next_next_char_boundary;
Some(Token::ControlFunction(ansi_control_code))
});
}
if let Some(ansi_control_code) = INDEPENDENT_CODES
.into_iter()
.find(|independent_code| independent_code == &control_sequence)
{
return self.emit_current_string(current_position).or_else(|| {
self.position = next_next_char_boundary;
Some(Token::ControlFunction(ansi_control_code))
});
}
if control_sequence == CSI {
let control_sequence_position = next_next_char_boundary;
let lower_bound = CONTROL_FUNCTION_LOWER_BOUND;
let upper_bound = CONTROL_FUNCTION_UPPER_BOUND;
let parameter_lower_bound = PARAMETER_LOWER_BOUND;
let parameter_upper_bound = PARAMETER_UPPER_BOUND;
let mut intermediate_byte = false;
let mut current_position_cs = control_sequence_position;
let mut next_position_cs =
self.get_next_char_boundary(control_sequence_position);
'control_sequence_loop: loop {
let current_char = &self.value[current_position_cs..next_position_cs];
if current_char.as_bytes().len() != 1 {
break 'control_sequence_loop;
}
if current_char.as_bytes()[0] >= lower_bound
&& current_char.as_bytes()[0] <= upper_bound
{
let control_function_value = if intermediate_byte {
&self.value[current_position_cs - 1..next_position_cs]
} else {
current_char
};
let parameters_unparsed = if intermediate_byte {
&self.value[control_sequence_position..(current_position_cs - 1)]
} else {
&self.value[control_sequence_position..current_position_cs]
};
let parameters = parameters_unparsed
.split(PARAMETER_SEPARATOR)
.map(String::from)
.collect();
return self.emit_current_string(current_position).or_else(|| {
self.position = next_position_cs;
Some(Token::ControlFunction(ControlFunction::new_sequence(
control_function_value,
parameters,
)))
});
} else if intermediate_byte {
break 'control_sequence_loop;
} else if current_char.as_bytes()[0] < parameter_lower_bound
|| current_char.as_bytes()[0] > parameter_upper_bound
{
intermediate_byte = current_char == ascii!(02 / 00);
if !intermediate_byte {
break 'control_sequence_loop;
}
}
if next_position_cs == self.max_position {
break 'control_sequence_loop;
}
current_position_cs = next_position_cs;
next_position_cs = self.get_next_char_boundary(current_position_cs);
}
} else {
return self.emit_current_string(current_position).or_else(|| {
self.position = next_char_boundary;
Some(Token::ControlFunction(ESC))
});
}
}
current_position = next_char_boundary;
}
self.emit_current_string(current_position)
}
}
#[cfg(test)]
mod tests {
use crate::{
c0::{BEL, CR, ESC, LF},
c1::{BPH, CSI, NBH, SOS},
control_sequences::{
DeviceAttributes, PrintQuality, ReversedString, TabulationControl, CHA, CHT, CTC, CUP,
DA, SPQR, SRS, SSW, SU, TCC,
},
independent_control_functions::{DMI, EMI, RIS},
ControlFunction,
};
use super::{Token, TokenStream};
#[test]
fn test_simple_ascii_string() {
let simple_ascii_input = "Hello World";
let mut token_stream = TokenStream::from(simple_ascii_input);
let first_element = token_stream.next();
let second_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(simple_ascii_input)));
assert!(second_element.is_none());
}
#[test]
fn test_simple_non_ascii_string() {
let simple_non_ascii_input = "Löwe 老虎 Léopard";
let mut token_stream = TokenStream::from(simple_non_ascii_input);
let first_element = token_stream.next();
let second_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(simple_non_ascii_input)));
assert!(second_element.is_none());
}
#[test]
fn test_simple_ascii_string_with_c0() {
let simple_ascii_input = "Ring the bell";
let input = format!("{}{}", simple_ascii_input, BEL);
let mut token_stream = TokenStream::from(&input);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(simple_ascii_input)));
assert!(second_element.is_some_and(|value| value == Token::ControlFunction(BEL)));
assert!(third_element.is_none());
}
#[test]
fn test_simple_non_ascii_string_with_c0() {
let simple_non_ascii_input = "Löwe 老虎 Léopard";
let input = format!("{}{}{}", simple_non_ascii_input, CR, LF);
let mut token_stream = TokenStream::from(&input);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
let forth_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(simple_non_ascii_input)));
assert!(second_element.is_some_and(|value| value == Token::ControlFunction(CR)));
assert!(third_element.is_some_and(|value| value == Token::ControlFunction(LF)));
assert!(forth_element.is_none());
}
#[test]
fn test_simple_ascii_string_with_interleaved_c0() {
let line1 = "Line1";
let line2 = "Line2";
let input = format!("{}{}{}", line1, LF, line2);
let mut token_stream = TokenStream::from(&input);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
let forth_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(line1)));
assert!(second_element.is_some_and(|value| value == Token::ControlFunction(LF)));
assert!(third_element.is_some_and(|value| value == Token::String(line2)));
assert!(forth_element.is_none());
}
#[test]
fn test_simple_non_ascii_string_with_interleaved_c0() {
let line1 = "Löwe";
let line2 = "老虎";
let input = format!("{}{}{}", line1, LF, line2);
let mut token_stream = TokenStream::from(&input);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
let forth_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(line1)));
assert!(second_element.is_some_and(|value| value == Token::ControlFunction(LF)));
assert!(third_element.is_some_and(|value| value == Token::String(line2)));
assert!(forth_element.is_none());
}
#[test]
fn test_single_esc() {
let esc = ESC.to_string();
let mut token_stream = TokenStream::from(&esc);
let first_element = token_stream.next();
let second_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
assert!(second_element.is_none());
}
#[test]
fn test_esc_at_end_of_string() {
let text = "I have to escape";
let escape = format!("{}{}", text, ESC);
let mut token_stream = TokenStream::from(&escape);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::String(text)));
assert!(second_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
assert!(third_element.is_none());
}
#[test]
fn test_esc_at_start_of_non_ascii_string() {
let text = "í have to escape";
let escape = format!("{}{}", ESC, text);
let mut token_stream = TokenStream::from(&escape);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
assert!(second_element.is_some_and(|value| value == Token::String(text)));
assert!(third_element.is_none());
}
#[test]
fn test_esc_at_start_of_ascii_string() {
let text = "i have to escape";
let escape = format!("{}{}", ESC, text);
let mut token_stream = TokenStream::from(&escape);
let first_element = token_stream.next();
let second_element = token_stream.next();
let third_element = token_stream.next();
println!("{:?}", first_element);
println!("{:?}", second_element);
println!("{:?}", third_element);
assert!(first_element.is_some_and(|value| value == Token::ControlFunction(ESC)));
assert!(second_element.is_some_and(|value| value == Token::String(text)));
assert!(third_element.is_none());
}
#[test]
fn test_c1_at_start_of_string() {
let text = format!("{}This might be in the next line", BPH);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(BPH),
Token::String("This might be in the next line")
]
)
}
#[test]
fn test_c1_at_end_of_string() {
let text = format!("No break is permitted at the end of this string{}", NBH);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("No break is permitted at the end of this string"),
Token::ControlFunction(NBH)
]
)
}
#[test]
fn test_c1_in_between_ascii_strings() {
let text = format!("Line1{}Maybe Line2", BPH);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Line1"),
Token::ControlFunction(BPH),
Token::String("Maybe Line2")
]
)
}
#[test]
fn test_c1_in_between_non_ascii_strings() {
let text = format!("老{}虎", SOS);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("老"),
Token::ControlFunction(SOS),
Token::String("虎")
]
)
}
#[test]
fn test_independent_code_at_start_of_string() {
let text = format!("{}Back to normal", RIS);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![Token::ControlFunction(RIS), Token::String("Back to normal")]
)
}
#[test]
fn test_independent_code_at_end_of_string() {
let text = format!("Now enabling manual input{}", EMI);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Now enabling manual input"),
Token::ControlFunction(EMI)
]
)
}
#[test]
fn test_independent_code_in_between_of_ascii_strings() {
let text = format!(
"Now enabling manual input{} And now {}disabling it again",
EMI, DMI
);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Now enabling manual input"),
Token::ControlFunction(EMI),
Token::String(" And now "),
Token::ControlFunction(DMI),
Token::String("disabling it again"),
]
)
}
#[test]
fn test_independent_code_in_between_of_non_ascii_strings() {
let text = format!(
"Now enabling manual input{} And now 老{}老disabling it again",
EMI, DMI
);
let result = TokenStream::from(&text).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Now enabling manual input"),
Token::ControlFunction(EMI),
Token::String(" And now 老"),
Token::ControlFunction(DMI),
Token::String("老disabling it again"),
]
)
}
#[test]
fn test_invalid_control_sequence() {
let invalid_sequence = format!("{}{}{}", ESC, CSI, "ä");
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}", CSI, "ä")),
]
)
}
#[test]
fn test_invalid_control_sequence_with_lookalike_arguments() {
let invalid_sequence = format!("{}{}{}{}", ESC, CSI, "1;2", "ä");
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}{}", CSI, "1;2", "ä")),
]
)
}
#[test]
fn test_invalid_control_sequence_with_no_end() {
let invalid_sequence = format!("{}{}", ESC, CSI);
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}", CSI)),
]
)
}
#[test]
fn test_invalid_control_sequence_with_intermediate() {
let invalid_sequence = format!("{}{}{}{}", ESC, CSI, ascii!(02 / 00), "ä");
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}{}", CSI, ascii!(02 / 00), "ä")),
]
)
}
#[test]
fn test_invalid_control_sequence_with_intermediate_with_lookalike_arguments() {
let invalid_sequence = format!("{}{}{}{}{}", ESC, CSI, ascii!(02 / 00), "1;2", "ä");
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}{}{}", CSI, ascii!(02 / 00), "1;2", "ä")),
]
)
}
#[test]
fn test_invalid_control_sequence_with_intermediate_with_no_end() {
let invalid_sequence = format!("{}{}{}", ESC, CSI, ascii!(02 / 00));
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}", CSI, ascii!(02 / 00))),
]
)
}
#[test]
fn test_invalid_control_sequence_with_no_end_and_parameters() {
let invalid_sequence = format!("{}{}{}", ESC, CSI, "1;2");
let result = TokenStream::from(&invalid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(ESC),
Token::String(&format!("{}{}", CSI, "1;2")),
]
)
}
#[test]
fn test_valid_control_sequence_no_intermediate_standalone() {
let valid_sequence = format!("{}", CHA(None));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(result, vec![Token::ControlFunction(CHA(None))])
}
#[test]
fn test_valid_control_sequence_no_intermediate_beginning_of_string() {
let valid_sequence = format!("{}Hello", CHA(None));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![Token::ControlFunction(CHA(None)), Token::String("Hello")]
)
}
#[test]
fn test_valid_control_sequence_no_intermediate_end_of_string() {
let valid_sequence = format!("Hello{}", CHT(8.into()));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Hello"),
Token::ControlFunction(CHT(8.into()))
]
)
}
#[test]
fn test_valid_control_sequence_no_intermediate_middle_of_string() {
let valid_sequence = format!(
"Take control{} over tabulations",
CTC(TabulationControl::ClearAllLineTabulationStops.into())
);
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Take control"),
Token::ControlFunction(CTC(TabulationControl::ClearAllLineTabulationStops.into())),
Token::String(" over tabulations")
]
)
}
#[test]
fn test_valid_control_sequence_with_intermediate_standalone() {
let valid_sequence = format!("{}", SPQR(PrintQuality::HighQualityLowSpeed.into()));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(result, vec![Token::ControlFunction(SPQR(None))])
}
#[test]
fn test_valid_control_sequence_with_intermediate_beginning_of_string() {
let valid_sequence = format!(
"{}desreveR{}",
SRS(ReversedString::Start.into()),
SRS(ReversedString::End.into())
);
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(SRS(ReversedString::Start.into())),
Token::String("desreveR"),
Token::ControlFunction(SRS(ReversedString::End.into()))
]
)
}
#[test]
fn test_valid_control_sequence_with_intermediate_end_of_string() {
let valid_sequence = format!("No more spaces after me!{}", SSW(0));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("No more spaces after me!"),
Token::ControlFunction(SSW(0))
]
)
}
#[test]
fn test_valid_control_sequence_with_intermediate_middle_of_string() {
let valid_sequence = format!("Hold tight!{}We are going up!", SU(50.into()));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("Hold tight!"),
Token::ControlFunction(SU(50.into())),
Token::String("We are going up!")
]
)
}
#[test]
fn test_valid_control_sequence_with_multiple_parameters() {
let valid_sequence = format!("All or nothing@>Ä{}", TCC(6, 12.into()));
let result = TokenStream::from(&valid_sequence).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::String("All or nothing@>Ä"),
Token::ControlFunction(TCC(6, 12.into())),
]
)
}
#[test]
fn test_example_a() {
let example = "\x1b[0u\x1b[62c\x1b[23;6H";
let result = TokenStream::from(&example).collect::<Vec<Token>>();
assert_eq!(
result,
vec![
Token::ControlFunction(
ControlFunction::private_use("u", vec![String::from("0")]).unwrap()
),
Token::ControlFunction(DA(DeviceAttributes::Identify(62).into())),
Token::ControlFunction(CUP(23.into(), 6.into()))
]
)
}
}