const ESC: u8 = 0x1b;
const PASTE_START: &[u8] = b"\x1b[200~";
const PASTE_END: &[u8] = b"\x1b[201~";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Segment {
Bytes(Vec<u8>),
Paste(Vec<u8>),
}
fn is_csi_parameter_byte(byte: u8) -> bool {
(0x30..=0x3f).contains(&byte)
}
fn is_csi_intermediate_byte(byte: u8) -> bool {
(0x20..=0x2f).contains(&byte)
}
fn is_csi_final_byte(byte: u8) -> bool {
(0x40..=0x7e).contains(&byte)
}
enum Parsed {
Sequence { next_index: usize },
Pending,
None,
}
fn parse_csi_sequence(input: &[u8], start_index: usize, prefix_length: usize) -> Parsed {
let csi_payload_start = start_index + prefix_length + 1;
let mut index = csi_payload_start;
while index < input.len() {
let byte = input[index];
if is_csi_parameter_byte(byte) || is_csi_intermediate_byte(byte) {
index += 1;
continue;
}
if byte == 0x5b && index == csi_payload_start {
index += 1;
continue;
}
if is_csi_final_byte(byte) {
return Parsed::Sequence {
next_index: index + 1,
};
}
return Parsed::None;
}
Parsed::Pending
}
fn parse_ss3_sequence(input: &[u8], start_index: usize, prefix_length: usize) -> Parsed {
let next_index = start_index + prefix_length + 2;
if next_index > input.len() {
return Parsed::Pending;
}
let final_byte = input[next_index - 1];
if !is_csi_final_byte(final_byte) {
return Parsed::None;
}
Parsed::Sequence { next_index }
}
fn parse_control_sequence(input: &[u8], start_index: usize, prefix_length: usize) -> Parsed {
let Some(&sequence_type) = input.get(start_index + prefix_length) else {
return Parsed::Pending;
};
if sequence_type == b'[' {
return parse_csi_sequence(input, start_index, prefix_length);
}
if sequence_type == b'O' {
return parse_ss3_sequence(input, start_index, prefix_length);
}
Parsed::None
}
fn parse_escaped_code_point(input: &[u8], escape_index: usize) -> ParsedEscape {
match utf8_codepoint_len(input, escape_index + 1) {
Some(cp_len) => ParsedEscape::Sequence {
next_index: escape_index + 1 + cp_len,
},
None => ParsedEscape::Pending,
}
}
fn utf8_codepoint_len(input: &[u8], index: usize) -> Option<usize> {
let &lead = input.get(index)?;
let expected = if lead < 0x80 {
1
} else if lead >> 5 == 0b110 {
2
} else if lead >> 4 == 0b1110 {
3
} else if lead >> 3 == 0b11110 {
4
} else {
return Some(1);
};
for offset in 1..expected {
match input.get(index + offset) {
Some(&b) if b >> 6 == 0b10 => {}
Some(_) => return Some(1),
None => return None,
}
}
Some(expected)
}
enum ParsedEscape {
Sequence { next_index: usize },
Pending,
}
fn parse_escape_sequence(input: &[u8], escape_index: usize) -> ParsedEscape {
if escape_index == input.len() - 1 {
return ParsedEscape::Pending;
}
let next = input[escape_index + 1];
if next == ESC {
if escape_index + 2 >= input.len() {
return ParsedEscape::Pending;
}
match parse_control_sequence(input, escape_index, 2) {
Parsed::Pending => return ParsedEscape::Pending,
Parsed::Sequence { next_index } => return ParsedEscape::Sequence { next_index },
Parsed::None => {}
}
return ParsedEscape::Sequence {
next_index: escape_index + 2,
};
}
match parse_control_sequence(input, escape_index, 1) {
Parsed::Pending => ParsedEscape::Pending,
Parsed::Sequence { next_index } => ParsedEscape::Sequence { next_index },
Parsed::None => parse_escaped_code_point(input, escape_index),
}
}
fn split_backspace_bytes(text: &[u8], events: &mut Vec<Segment>) {
let mut text_segment_start = 0;
for index in 0..text.len() {
let character = text[index];
if character == 0x7f || character == 0x08 {
if index > text_segment_start {
events.push(Segment::Bytes(text[text_segment_start..index].to_vec()));
}
events.push(Segment::Bytes(vec![character]));
text_segment_start = index + 1;
}
}
if text_segment_start < text.len() {
events.push(Segment::Bytes(text[text_segment_start..].to_vec()));
}
}
fn find_from(haystack: &[u8], needle: &[u8], from: usize) -> Option<usize> {
if needle.is_empty() || from > haystack.len() {
return None;
}
haystack[from..]
.windows(needle.len())
.position(|w| w == needle)
.map(|p| p + from)
}
fn parse_keypresses(input: &[u8]) -> (Vec<Segment>, Vec<u8>) {
let mut events: Vec<Segment> = Vec::new();
let mut index = 0;
while index < input.len() {
let escape_index = match find_from(input, &[ESC], index) {
None => {
split_backspace_bytes(&input[index..], &mut events);
return (events, Vec::new());
}
Some(i) => i,
};
if escape_index > index {
split_backspace_bytes(&input[index..escape_index], &mut events);
}
let parsed = parse_escape_sequence(input, escape_index);
let next_index = match parsed {
ParsedEscape::Pending => return (events, input[escape_index..].to_vec()),
ParsedEscape::Sequence { next_index } => next_index,
};
let sequence = &input[escape_index..next_index];
if sequence == PASTE_START {
let after_start = next_index;
match find_from(input, PASTE_END, after_start) {
None => return (events, input[escape_index..].to_vec()),
Some(end_index) => {
events.push(Segment::Paste(input[after_start..end_index].to_vec()));
index = end_index + PASTE_END.len();
continue;
}
}
}
events.push(Segment::Bytes(sequence.to_vec()));
index = next_index;
}
(events, Vec::new())
}
#[derive(Debug, Default, Clone)]
pub struct Segmenter {
pending: Vec<u8>,
}
impl Segmenter {
pub fn push(&mut self, chunk: &[u8]) -> Vec<Segment> {
let mut combined = std::mem::take(&mut self.pending);
combined.extend_from_slice(chunk);
let (events, pending) = parse_keypresses(&combined);
self.pending = pending;
events
}
pub fn has_pending_escape(&self) -> bool {
self.pending.first() == Some(&ESC)
&& !self.pending.starts_with(PASTE_START)
&& self.pending.as_slice() != b"\x1b[200"
}
pub fn flush_pending_escape(&mut self) -> Option<Vec<u8>> {
if self.pending.first() != Some(&ESC) {
return None;
}
Some(std::mem::take(&mut self.pending))
}
pub fn reset(&mut self) {
self.pending.clear();
}
}
#[cfg(test)]
mod tests;