tastty-core 0.1.0

Sans-IO core of the tastty terminal session library: VT parser, screen buffer, and byte encoders.
//! VT parser wrapper that drives a [`Screen`](crate::Screen).

use std::sync::Arc;

use crate::host_profile::HostProfile;
use crate::screen::TerminalSize;

/// Stateful VT parser plus virtual screen.
///
/// The parser is pure post-change: feeding bytes into [`Parser::process`]
/// mutates the screen and queues [`ScreenEvent`](crate::ScreenEvent)s, but
/// it never produces wire-reply bytes on its own. To answer host queries
/// (DA1/DA2/DA3, XTVERSION, DSR, DECRQM, DECRQSS, XTGETTCAP, OSC color,
/// Kitty keyboard, XTWINOPS), drain events with
/// [`Screen::drain_events`](crate::Screen::drain_events), pass each one
/// through [`auto_reply_bytes`](crate::host_reply::auto_reply_bytes) with
/// your `HostProfile`, and write the resulting bytes back to the guest
/// program (typically the PTY master). The `tastty::Terminal` wrapper does
/// this automatically from its reader thread, so callers using `Terminal`
/// do not need to.
///
/// # References
///
/// - [ECMA-48 (Control Functions for Coded Character Sets)][ecma48]: CSI, SGR, DSR, and the standard mode set.
/// - [VT100 User Guide, Chapter 3][vt100-ug-ch3]: DEC private modes and cursor / scroll-region behavior.
/// - [xterm Control Sequences][xterm-ctlseqs]: DA1 / DA2 / DA3, XTVERSION, DECRQM / DECRQSS, XTGETTCAP, XTWINOPS, OSC color queries.
/// - [Kitty keyboard protocol][kitty-kbd]: progressive enhancement flag stack (CSI ? u family).
///
/// [ecma48]: https://ecma-international.org/publications-and-standards/standards/ecma-48/
/// [vt100-ug-ch3]: https://vt100.net/docs/vt100-ug/chapter3.html
/// [xterm-ctlseqs]: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
/// [kitty-kbd]: https://sw.kovidgoyal.net/kitty/keyboard-protocol/
pub struct Parser {
    parser: vte::Parser,
    screen: crate::screen::Screen,
    utf8_carry: [u8; 4],
    utf8_carry_len: u8,
}

impl Parser {
    /// Create a parser with the default host profile.
    ///
    /// `size` is taken as a [`TerminalSize`], which carries the
    /// non-zero-dimensions invariant by construction (see
    /// [`TerminalSize::new`]).
    #[must_use]
    pub fn new(size: TerminalSize, scrollback_len: usize) -> Self {
        Self {
            parser: vte::Parser::new(),
            screen: crate::screen::Screen::new(
                crate::grid::Size {
                    rows: size.rows,
                    cols: size.cols,
                },
                scrollback_len,
            ),
            utf8_carry: [0; 4],
            utf8_carry_len: 0,
        }
    }

    /// Create a parser with an explicit host profile.
    ///
    /// `size` is taken as a [`TerminalSize`], which carries the
    /// non-zero-dimensions invariant by construction (see
    /// [`TerminalSize::new`]).
    #[must_use]
    pub fn with_profile(
        size: TerminalSize,
        scrollback_len: usize,
        profile: Arc<HostProfile>,
    ) -> Self {
        Self {
            parser: vte::Parser::new(),
            screen: crate::screen::Screen::with_profile(
                crate::grid::Size {
                    rows: size.rows,
                    cols: size.cols,
                },
                scrollback_len,
                profile,
            ),
            utf8_carry: [0; 4],
            utf8_carry_len: 0,
        }
    }

    /// Feed bytes from the child process into the parser.
    ///
    /// Parses the input, mutates screen state (cursor, attrs, grid,
    /// modes, palette, ...), and queues
    /// [`ScreenEvent`](crate::ScreenEvent)s the embedder should react
    /// to. The parser does not produce wire-reply bytes on its own; for
    /// host queries (DA1/DA2/DA3, XTVERSION, DSR, DECRQM, DECRQSS,
    /// XTGETTCAP, OSC color, Kitty keyboard, XTWINOPS), drain the
    /// resulting events via
    /// [`Screen::drain_events`](crate::Screen::drain_events) and run
    /// each one through
    /// [`auto_reply_bytes`](crate::host_reply::auto_reply_bytes) with
    /// your [`HostProfile`] to obtain the reply bytes to write back to
    /// the guest program.
    ///
    /// The `tastty::Terminal` wrapper does this automatically from its
    /// reader thread, so callers using `Terminal` do not need to.
    ///
    /// # Streaming contract
    ///
    /// For any byte sequence `B` and any partition `B = A ++ C`, the
    /// screen state after `process(&A); process(&C)` is identical to
    /// the screen state after `process(&B)`. PTY reads make no
    /// guarantee that chunk boundaries align with UTF-8 code-point
    /// boundaries.
    pub fn process(&mut self, bytes: &[u8]) {
        let mut bytes = bytes;
        let carry_len = usize::from(self.utf8_carry_len);
        if carry_len > 0 {
            let expected = utf8_expected_len(self.utf8_carry[0]);
            let needed = expected - carry_len;
            let consumable = bytes
                .iter()
                .take(needed)
                .take_while(|&&b| (0x80..0xC0).contains(&b))
                .count();
            if consumable == needed {
                self.utf8_carry[carry_len..expected].copy_from_slice(&bytes[..needed]);
                self.parser.advance(
                    &mut crate::perform::PerformScreen::new(&mut self.screen),
                    &self.utf8_carry[..expected],
                );
                self.utf8_carry_len = 0;
                bytes = &bytes[needed..];
            } else if consumable == bytes.len() {
                self.utf8_carry[carry_len..carry_len + consumable].copy_from_slice(bytes);
                self.utf8_carry_len = (carry_len + consumable) as u8;
                self.screen.flush_print_buffer();
                return;
            } else {
                // No continuation completes the carry, so vte's buggy
                // advance_partial_utf8 path (the one 53f833e exists to
                // bypass) cannot fire. vte's partial-UTF-8 recovery
                // emits U+FFFD for the orphan when the non-continuation
                // byte arrives in the advance below.
                self.parser.advance(
                    &mut crate::perform::PerformScreen::new(&mut self.screen),
                    &self.utf8_carry[..carry_len],
                );
                self.utf8_carry_len = 0;
            }
        }

        let hold = incomplete_utf8_tail_len(bytes);
        let split_at = bytes.len() - hold;
        self.parser.advance(
            &mut crate::perform::PerformScreen::new(&mut self.screen),
            &bytes[..split_at],
        );
        if hold > 0 {
            self.utf8_carry[..hold].copy_from_slice(&bytes[split_at..]);
            self.utf8_carry_len = hold as u8;
        }
        self.screen.flush_print_buffer();
    }

    /// Borrow the current virtual screen.
    #[must_use]
    pub fn screen(&self) -> &crate::screen::Screen {
        &self.screen
    }

    /// Mutably borrow the current virtual screen.
    #[must_use]
    pub fn screen_mut(&mut self) -> &mut crate::screen::Screen {
        &mut self.screen
    }
}

/// Length of the UTF-8 sequence whose first byte is `lead`.
///
/// Falls through to 1 for any byte that is not a 2/3/4-byte lead;
/// callers must only invoke this on bytes already classified as a
/// valid lead by [`incomplete_utf8_tail_len`].
const fn utf8_expected_len(lead: u8) -> usize {
    match lead {
        0xC2..=0xDF => 2,
        0xE0..=0xEF => 3,
        0xF0..=0xF4 => 4,
        _ => 1,
    }
}

/// Length of the trailing UTF-8 prefix in `bytes` that could still
/// become a valid codepoint with more bytes; 0 otherwise.
fn incomplete_utf8_tail_len(bytes: &[u8]) -> usize {
    let mut conts = 0usize;
    while conts < 3 && conts < bytes.len() {
        let b = bytes[bytes.len() - 1 - conts];
        if !(0x80..0xC0).contains(&b) {
            break;
        }
        conts += 1;
    }
    if conts == bytes.len() {
        return 0;
    }
    let lead = bytes[bytes.len() - 1 - conts];
    let expected = match lead {
        0xC2..=0xDF => 2,
        0xE0..=0xEF => 3,
        0xF0..=0xF4 => 4,
        _ => return 0,
    };
    let have = conts + 1;
    if have < expected { have } else { 0 }
}

impl std::io::Write for Parser {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        self.process(buf);
        Ok(buf.len())
    }

    fn flush(&mut self) -> std::io::Result<()> {
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::Parser;
    use crate::screen::TerminalSize;

    #[test]
    fn screen_set_size_restores_ascii_mapping() {
        let mut parser = Parser::new(TerminalSize { rows: 24, cols: 80 }, 0);

        parser.process(b"\x1b)0\x0e");
        parser
            .screen_mut()
            .set_size(TerminalSize { rows: 30, cols: 80 });
        parser.process(b"q");

        assert_eq!(parser.screen().cell(0, 0).unwrap().contents(), "q");
    }
}