#![allow(clippy::upper_case_acronyms)]
use utf8parse::Parser as Utf8Parser;
mod enums;
use crate::enums::*;
mod transitions;
use transitions::{ENTRY, EXIT, TRANSITIONS};
#[inline(always)]
fn lookup(state: State, b: u8) -> (Action, State) {
let v = unsafe {
TRANSITIONS
.get_unchecked(state as usize)
.get_unchecked(b as usize)
};
(Action::from_u16(v >> 8), State::from_u16(v & 0xff))
}
#[inline(always)]
#[cfg(not(test))]
fn lookup_entry(state: State) -> Action {
unsafe { *ENTRY.get_unchecked(state as usize) }
}
#[inline(always)]
#[cfg(test)]
fn lookup_entry(state: State) -> Action {
*ENTRY
.get(state as usize)
.unwrap_or_else(|| panic!("State {:?} has no entry in ENTRY", state))
}
#[inline(always)]
#[cfg(test)]
fn lookup_exit(state: State) -> Action {
*EXIT
.get(state as usize)
.unwrap_or_else(|| panic!("State {:?} has no entry in EXIT", state))
}
#[inline(always)]
#[cfg(not(test))]
fn lookup_exit(state: State) -> Action {
unsafe { *EXIT.get_unchecked(state as usize) }
}
pub trait VTActor {
fn print(&mut self, b: char);
fn execute_c0_or_c1(&mut self, control: u8);
fn dcs_hook(
&mut self,
mode: u8,
params: &[i64],
intermediates: &[u8],
ignored_excess_intermediates: bool,
);
fn dcs_put(&mut self, byte: u8);
fn dcs_unhook(&mut self);
fn esc_dispatch(
&mut self,
params: &[i64],
intermediates: &[u8],
ignored_excess_intermediates: bool,
byte: u8,
);
fn csi_dispatch(&mut self, params: &[CsiParam], parameters_truncated: bool, byte: u8);
fn osc_dispatch(&mut self, params: &[&[u8]]);
fn apc_dispatch(&mut self, data: Vec<u8>);
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum VTAction {
Print(char),
ExecuteC0orC1(u8),
DcsHook {
params: Vec<i64>,
intermediates: Vec<u8>,
ignored_excess_intermediates: bool,
byte: u8,
},
DcsPut(u8),
DcsUnhook,
EscDispatch {
params: Vec<i64>,
intermediates: Vec<u8>,
ignored_excess_intermediates: bool,
byte: u8,
},
CsiDispatch {
params: Vec<CsiParam>,
parameters_truncated: bool,
byte: u8,
},
OscDispatch(Vec<Vec<u8>>),
ApcDispatch(Vec<u8>),
}
#[derive(Default)]
pub struct CollectingVTActor {
actions: Vec<VTAction>,
}
impl IntoIterator for CollectingVTActor {
type Item = VTAction;
type IntoIter = std::vec::IntoIter<VTAction>;
fn into_iter(self) -> Self::IntoIter {
self.actions.into_iter()
}
}
impl CollectingVTActor {
pub fn into_vec(self) -> Vec<VTAction> {
self.actions
}
}
impl VTActor for CollectingVTActor {
fn print(&mut self, b: char) {
self.actions.push(VTAction::Print(b));
}
fn execute_c0_or_c1(&mut self, control: u8) {
self.actions.push(VTAction::ExecuteC0orC1(control));
}
fn dcs_hook(
&mut self,
byte: u8,
params: &[i64],
intermediates: &[u8],
ignored_excess_intermediates: bool,
) {
self.actions.push(VTAction::DcsHook {
byte,
params: params.to_vec(),
intermediates: intermediates.to_vec(),
ignored_excess_intermediates,
});
}
fn dcs_put(&mut self, byte: u8) {
self.actions.push(VTAction::DcsPut(byte));
}
fn dcs_unhook(&mut self) {
self.actions.push(VTAction::DcsUnhook);
}
fn esc_dispatch(
&mut self,
params: &[i64],
intermediates: &[u8],
ignored_excess_intermediates: bool,
byte: u8,
) {
self.actions.push(VTAction::EscDispatch {
params: params.to_vec(),
intermediates: intermediates.to_vec(),
ignored_excess_intermediates,
byte,
});
}
fn csi_dispatch(&mut self, params: &[CsiParam], parameters_truncated: bool, byte: u8) {
self.actions.push(VTAction::CsiDispatch {
params: params.to_vec(),
parameters_truncated,
byte,
});
}
fn osc_dispatch(&mut self, params: &[&[u8]]) {
self.actions.push(VTAction::OscDispatch(
params.iter().map(|i| i.to_vec()).collect(),
));
}
fn apc_dispatch(&mut self, data: Vec<u8>) {
self.actions.push(VTAction::ApcDispatch(data));
}
}
const MAX_INTERMEDIATES: usize = 2;
const MAX_OSC: usize = 64;
const MAX_PARAMS: usize = 32;
struct OscState {
buffer: Vec<u8>,
param_indices: [usize; MAX_OSC],
num_params: usize,
full: bool,
}
impl OscState {
fn put(&mut self, param: char) {
if param == ';' {
match self.num_params {
MAX_OSC => {
self.full = true;
}
num => {
self.param_indices[num.saturating_sub(1)] = self.buffer.len();
self.num_params += 1;
}
}
} else if !self.full {
if self.num_params == 0 {
self.num_params = 1;
}
let mut buf = [0u8; 8];
self.buffer
.extend_from_slice(param.encode_utf8(&mut buf).as_bytes());
}
}
}
pub struct VTParser {
state: State,
intermediates: [u8; MAX_INTERMEDIATES],
num_intermediates: usize,
ignored_excess_intermediates: bool,
osc: OscState,
params: [CsiParam; MAX_PARAMS],
num_params: usize,
current_param: Option<CsiParam>,
params_full: bool,
apc_data: Vec<u8>,
utf8_parser: Utf8Parser,
utf8_return_state: State,
}
#[derive(Clone, PartialEq, Eq, Debug, Hash)]
pub enum CsiParam {
Integer(i64),
P(u8),
}
impl Default for CsiParam {
fn default() -> Self {
Self::Integer(0)
}
}
impl CsiParam {
pub fn as_integer(&self) -> Option<i64> {
match self {
Self::Integer(i) => Some(*i),
_ => None,
}
}
}
impl std::fmt::Display for CsiParam {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
CsiParam::Integer(v) => {
write!(f, "{}", v)?;
}
CsiParam::P(p) => {
write!(f, "{}", *p as char)?;
}
}
Ok(())
}
}
impl VTParser {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
let param_indices = [0usize; MAX_OSC];
Self {
state: State::Ground,
utf8_return_state: State::Ground,
intermediates: [0, 0],
num_intermediates: 0,
ignored_excess_intermediates: false,
osc: OscState {
buffer: Vec::new(),
param_indices,
num_params: 0,
full: false,
},
params: Default::default(),
num_params: 0,
params_full: false,
current_param: None,
utf8_parser: Utf8Parser::new(),
apc_data: vec![],
}
}
pub fn is_ground(&self) -> bool {
self.state == State::Ground
}
fn as_integer_params(&self) -> [i64; MAX_PARAMS] {
let mut res = [0i64; MAX_PARAMS];
let mut i = 0;
for src in &self.params[0..self.num_params] {
if let CsiParam::Integer(value) = src {
res[i] = *value;
i += 1;
}
}
res
}
fn finish_param(&mut self) {
if let Some(val) = self.current_param.take() {
if self.num_params < MAX_PARAMS {
self.params[self.num_params] = val;
self.num_params += 1;
}
}
}
fn promote_intermediates_to_params(&mut self) {
if self.num_intermediates > 0 {
for &p in &self.intermediates[..self.num_intermediates] {
if self.num_params >= MAX_PARAMS {
self.ignored_excess_intermediates = true;
break;
}
self.params[self.num_params] = CsiParam::P(p);
self.num_params += 1;
}
self.num_intermediates = 0;
}
}
fn action(&mut self, action: Action, param: u8, actor: &mut dyn VTActor) {
match action {
Action::None | Action::Ignore => {}
Action::Print => actor.print(param as char),
Action::Execute => actor.execute_c0_or_c1(param),
Action::Clear => {
self.num_intermediates = 0;
self.ignored_excess_intermediates = false;
self.osc.num_params = 0;
self.osc.full = false;
self.num_params = 0;
self.params_full = false;
self.current_param.take();
self.apc_data.clear();
}
Action::Collect => {
if self.num_intermediates < MAX_INTERMEDIATES {
self.intermediates[self.num_intermediates] = param;
self.num_intermediates += 1;
} else {
self.ignored_excess_intermediates = true;
}
}
Action::Param => {
if self.params_full {
return;
}
self.promote_intermediates_to_params();
match param {
b'0'..=b'9' => match self.current_param.take() {
Some(CsiParam::Integer(i)) => {
self.current_param.replace(CsiParam::Integer(
i.saturating_mul(10).saturating_add((param - b'0') as i64),
));
}
Some(_) => unreachable!(),
None => {
self.current_param
.replace(CsiParam::Integer((param - b'0') as i64));
}
},
p => {
self.finish_param();
if self.num_params + 1 > MAX_PARAMS {
self.params_full = true;
} else {
self.params[self.num_params] = CsiParam::P(p);
self.num_params += 1;
}
}
}
}
Action::Hook => {
self.finish_param();
actor.dcs_hook(
param,
&self.as_integer_params()[0..self.num_params],
&self.intermediates[0..self.num_intermediates],
self.ignored_excess_intermediates,
);
}
Action::Put => actor.dcs_put(param),
Action::EscDispatch => {
self.finish_param();
actor.esc_dispatch(
&self.as_integer_params()[0..self.num_params],
&self.intermediates[0..self.num_intermediates],
self.ignored_excess_intermediates,
param,
);
}
Action::CsiDispatch => {
self.finish_param();
self.promote_intermediates_to_params();
actor.csi_dispatch(
&self.params[0..self.num_params],
self.ignored_excess_intermediates,
param,
);
}
Action::Unhook => actor.dcs_unhook(),
Action::OscStart => {
self.osc.buffer.clear();
self.osc.num_params = 0;
self.osc.full = false;
}
Action::OscPut => self.osc.put(param as char),
Action::OscEnd => {
if self.osc.num_params == 0 {
actor.osc_dispatch(&[]);
} else {
let mut params: [&[u8]; MAX_OSC] = [b""; MAX_OSC];
let mut offset = 0usize;
let mut slice = self.osc.buffer.as_slice();
let limit = self.osc.num_params.min(MAX_OSC);
#[allow(clippy::needless_range_loop)]
for i in 0..limit - 1 {
let (a, b) = slice.split_at(self.osc.param_indices[i] - offset);
params[i] = a;
slice = b;
offset = self.osc.param_indices[i];
}
params[limit - 1] = slice;
actor.osc_dispatch(¶ms[0..limit]);
}
}
Action::ApcStart => {
self.apc_data.clear();
}
Action::ApcPut => {
self.apc_data.push(param);
}
Action::ApcEnd => {
actor.apc_dispatch(std::mem::take(&mut self.apc_data));
}
Action::Utf8 => self.next_utf8(actor, param),
}
}
fn next_utf8(&mut self, actor: &mut dyn VTActor, byte: u8) {
struct Decoder {
codepoint: Option<char>,
}
impl utf8parse::Receiver for Decoder {
fn codepoint(&mut self, c: char) {
self.codepoint.replace(c);
}
fn invalid_sequence(&mut self) {
self.codepoint(std::char::REPLACEMENT_CHARACTER);
}
}
let mut decoder = Decoder { codepoint: None };
self.utf8_parser.advance(&mut decoder, byte);
if let Some(c) = decoder.codepoint {
if c as u32 <= 0xff {
let byte = ((c as u32) & 0xff) as u8;
let (action, state) = lookup(self.utf8_return_state, byte);
if action == Action::Execute
|| (state != self.utf8_return_state && state != State::Utf8Sequence)
{
self.action(lookup_exit(self.utf8_return_state), 0, actor);
self.action(action, byte, actor);
self.action(lookup_entry(state), 0, actor);
self.utf8_return_state = self.state;
self.state = state;
return;
}
}
match self.utf8_return_state {
State::Ground => actor.print(c),
State::OscString => self.osc.put(c),
state => panic!("unreachable state {:?}", state),
};
self.state = self.utf8_return_state;
}
}
#[inline(always)]
pub fn parse_byte(&mut self, byte: u8, actor: &mut dyn VTActor) {
if self.state == State::Utf8Sequence {
self.next_utf8(actor, byte);
return;
}
let (action, state) = lookup(self.state, byte);
if state != self.state {
if state != State::Utf8Sequence {
self.action(lookup_exit(self.state), 0, actor);
}
self.action(action, byte, actor);
self.action(lookup_entry(state), byte, actor);
self.utf8_return_state = self.state;
self.state = state;
} else {
self.action(action, byte, actor);
}
}
pub fn parse(&mut self, bytes: &[u8], actor: &mut dyn VTActor) {
for b in bytes {
self.parse_byte(*b, actor);
}
}
}
#[cfg(test)]
mod test {
use super::*;
use k9::assert_equal as assert_eq;
fn parse_as_vec(bytes: &[u8]) -> Vec<VTAction> {
let mut parser = VTParser::new();
let mut actor = CollectingVTActor::default();
parser.parse(bytes, &mut actor);
actor.into_vec()
}
#[test]
fn test_mixed() {
assert_eq!(
parse_as_vec(b"yo\x07\x1b[32mwoot\x1b[0mdone"),
vec![
VTAction::Print('y'),
VTAction::Print('o'),
VTAction::ExecuteC0orC1(0x07,),
VTAction::CsiDispatch {
params: vec![CsiParam::Integer(32)],
parameters_truncated: false,
byte: b'm',
},
VTAction::Print('w',),
VTAction::Print('o',),
VTAction::Print('o',),
VTAction::Print('t',),
VTAction::CsiDispatch {
params: vec![CsiParam::Integer(0)],
parameters_truncated: false,
byte: b'm',
},
VTAction::Print('d',),
VTAction::Print('o',),
VTAction::Print('n',),
VTAction::Print('e',),
]
);
}
#[test]
fn test_print() {
assert_eq!(
parse_as_vec(b"yo"),
vec![VTAction::Print('y'), VTAction::Print('o')]
);
}
#[test]
fn test_osc_with_c1_st() {
assert_eq!(
parse_as_vec(b"\x1b]0;there\x9c"),
vec![VTAction::OscDispatch(vec![
b"0".to_vec(),
b"there".to_vec()
])]
);
}
#[test]
fn test_osc_with_bel_st() {
assert_eq!(
parse_as_vec(b"\x1b]0;hello\x07"),
vec![VTAction::OscDispatch(vec![
b"0".to_vec(),
b"hello".to_vec()
])]
);
}
#[test]
fn test_decset() {
assert_eq!(
parse_as_vec(b"\x1b[?1l"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::P(b'?'), CsiParam::Integer(1)],
parameters_truncated: false,
byte: b'l',
},]
);
}
#[test]
fn test_osc_too_many_params() {
let fields = (0..MAX_OSC + 2)
.into_iter()
.map(|i| i.to_string())
.collect::<Vec<_>>();
let input = format!("\x1b]{}\x07", fields.join(";"));
let actions = parse_as_vec(input.as_bytes());
assert_eq!(actions.len(), 1);
match &actions[0] {
VTAction::OscDispatch(parsed_fields) => {
let fields: Vec<_> = fields.into_iter().map(|s| s.as_bytes().to_vec()).collect();
assert_eq!(parsed_fields.as_slice(), &fields[0..MAX_OSC]);
}
other => panic!("Expected OscDispatch but got {:?}", other),
}
}
#[test]
fn test_osc_with_no_params() {
assert_eq!(
parse_as_vec(b"\x1b]\x07"),
vec![VTAction::OscDispatch(vec![])]
);
}
#[test]
fn test_osc_with_esc_sequence_st() {
assert_eq!(
parse_as_vec(b"\x1b]woot\x1b\\"),
vec![
VTAction::OscDispatch(vec![b"woot".to_vec()]),
VTAction::EscDispatch {
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
byte: b'\\'
}
]
);
}
#[test]
fn test_fancy_underline() {
assert_eq!(
parse_as_vec(b"\x1b[4m"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::Integer(4)],
parameters_truncated: false,
byte: b'm'
}]
);
assert_eq!(
parse_as_vec(b"\x1b[4:3m"),
vec![VTAction::CsiDispatch {
params: vec![
CsiParam::Integer(4),
CsiParam::P(b':'),
CsiParam::Integer(3)
],
parameters_truncated: false,
byte: b'm'
}]
);
}
#[test]
fn test_colon_rgb() {
assert_eq!(
parse_as_vec(b"\x1b[38:2::128:64:192m"),
vec![VTAction::CsiDispatch {
params: vec![
CsiParam::Integer(38),
CsiParam::P(b':'),
CsiParam::Integer(2),
CsiParam::P(b':'),
CsiParam::P(b':'),
CsiParam::Integer(128),
CsiParam::P(b':'),
CsiParam::Integer(64),
CsiParam::P(b':'),
CsiParam::Integer(192),
],
parameters_truncated: false,
byte: b'm'
}]
);
}
#[test]
fn test_csi_omitted_param() {
assert_eq!(
parse_as_vec(b"\x1b[;1m"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::P(b';'), CsiParam::Integer(1)],
parameters_truncated: false,
byte: b'm'
}]
);
}
#[test]
fn test_csi_too_many_params() {
assert_eq!(
parse_as_vec(b"\x1b[0;1;2;3;4;5;6;7;8;9;0;1;2;3;4;51;6p"),
vec![VTAction::CsiDispatch {
params: vec![
CsiParam::Integer(0),
CsiParam::P(b';'),
CsiParam::Integer(1),
CsiParam::P(b';'),
CsiParam::Integer(2),
CsiParam::P(b';'),
CsiParam::Integer(3),
CsiParam::P(b';'),
CsiParam::Integer(4),
CsiParam::P(b';'),
CsiParam::Integer(5),
CsiParam::P(b';'),
CsiParam::Integer(6),
CsiParam::P(b';'),
CsiParam::Integer(7),
CsiParam::P(b';'),
CsiParam::Integer(8),
CsiParam::P(b';'),
CsiParam::Integer(9),
CsiParam::P(b';'),
CsiParam::Integer(0),
CsiParam::P(b';'),
CsiParam::Integer(1),
CsiParam::P(b';'),
CsiParam::Integer(2),
CsiParam::P(b';'),
CsiParam::Integer(3),
CsiParam::P(b';'),
CsiParam::Integer(4),
CsiParam::P(b';'),
CsiParam::Integer(51),
CsiParam::P(b';'),
],
parameters_truncated: false,
byte: b'p'
}]
);
}
#[test]
fn test_csi_intermediates() {
assert_eq!(
parse_as_vec(b"\x1b[1 p"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::Integer(1), CsiParam::P(b' ')],
parameters_truncated: false,
byte: b'p'
}]
);
assert_eq!(
parse_as_vec(b"\x1b[1 !p"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::Integer(1), CsiParam::P(b' '), CsiParam::P(b'!')],
parameters_truncated: false,
byte: b'p'
}]
);
assert_eq!(
parse_as_vec(b"\x1b[1 !#p"),
vec![VTAction::CsiDispatch {
params: vec![CsiParam::Integer(1), CsiParam::P(b' '), CsiParam::P(b'!')],
parameters_truncated: true,
byte: b'p'
}]
);
}
#[test]
fn osc_utf8() {
assert_eq!(
parse_as_vec("\x1b]\u{af}\x07".as_bytes()),
vec![VTAction::OscDispatch(vec!["\u{af}".as_bytes().to_vec()])]
);
}
#[test]
fn osc_fedora_vte() {
assert_eq!(
parse_as_vec("\u{9d}777;preexec\u{9c}".as_bytes()),
vec![VTAction::OscDispatch(vec![
b"777".to_vec(),
b"preexec".to_vec(),
])]
);
}
#[test]
fn print_utf8() {
assert_eq!(
parse_as_vec("\u{af}".as_bytes()),
vec![VTAction::Print('\u{af}')]
);
}
#[test]
fn utf8_control() {
assert_eq!(
parse_as_vec("\u{8d}".as_bytes()),
vec![VTAction::ExecuteC0orC1(0x8d)]
);
}
#[test]
fn tmux_control() {
assert_eq!(
parse_as_vec("\x1bP1000phello\x1b\\".as_bytes()),
vec![
VTAction::DcsHook {
byte: b'p',
params: vec![1000],
intermediates: vec![],
ignored_excess_intermediates: false,
},
VTAction::DcsPut(b'h'),
VTAction::DcsPut(b'e'),
VTAction::DcsPut(b'l'),
VTAction::DcsPut(b'l'),
VTAction::DcsPut(b'o'),
VTAction::DcsUnhook,
VTAction::EscDispatch {
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
byte: b'\\',
}
]
);
}
#[test]
fn tmux_passthru() {
assert_eq!(
parse_as_vec("\x1bPtmux;data\x1b\\".as_bytes()),
vec![
VTAction::DcsHook {
byte: b't',
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
},
VTAction::DcsPut(b'm'),
VTAction::DcsPut(b'u'),
VTAction::DcsPut(b'x'),
VTAction::DcsPut(b';'),
VTAction::DcsPut(b'd'),
VTAction::DcsPut(b'a'),
VTAction::DcsPut(b't'),
VTAction::DcsPut(b'a'),
VTAction::DcsUnhook,
VTAction::EscDispatch {
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
byte: b'\\',
}
]
);
}
#[test]
fn kitty_img() {
assert_eq!(
parse_as_vec("\x1b_Gf=24,s=10,v=20;payload\x1b\\".as_bytes()),
vec![
VTAction::ApcDispatch(b"Gf=24,s=10,v=20;payload".to_vec()),
VTAction::EscDispatch {
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
byte: b'\\',
}
]
);
}
#[test]
fn sixel() {
assert_eq!(
parse_as_vec("\x1bPqhello\x1b\\".as_bytes()),
vec![
VTAction::DcsHook {
byte: b'q',
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
},
VTAction::DcsPut(b'h'),
VTAction::DcsPut(b'e'),
VTAction::DcsPut(b'l'),
VTAction::DcsPut(b'l'),
VTAction::DcsPut(b'o'),
VTAction::DcsUnhook,
VTAction::EscDispatch {
params: vec![],
intermediates: vec![],
ignored_excess_intermediates: false,
byte: b'\\',
}
]
);
}
}