tastty-core 0.1.0

Sans-IO core of the tastty terminal session library: VT parser, screen buffer, and byte encoders.
//! Accessors for cells in the virtual terminal screen.

use unicode_width::UnicodeWidthChar as _;

const CONTENT_BYTES: usize = 22;

const IS_WIDE: u8 = 0b1000_0000;
const IS_WIDE_CONTINUATION: u8 = 0b0100_0000;
const LEN_BITS: u8 = 0b0001_1111;

use std::sync::Arc;

/// [OSC 8][osc8] hyperlink association: a URI plus an optional explicit id.
///
/// Two `Hyperlink` values compare equal iff both `id` and `uri` match.
/// Adjacent cells whose `id` values differ therefore belong to different
/// links even when the URI matches, which matters for hover-region
/// detection: a host that walks a row and groups cells by `Cell::hyperlink()`
/// must split on `id` boundaries, not on URI boundaries.
///
/// [osc8]: https://gist.github.com/egmontkob/eb8d45597f7db55ec41d6c0ffc6f3bb3
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct Hyperlink {
    /// Explicit `id=` value supplied in the OSC 8 params, if any.
    pub id: Option<Arc<str>>,
    /// Target URI from the OSC 8 sequence.
    pub uri: Arc<str>,
}

/// A single cell in the virtual terminal grid.
#[derive(Clone, Debug, Eq)]
pub struct Cell {
    contents: [u8; CONTENT_BYTES],
    len: u8,
    attrs: crate::attrs::Attrs,
    hyperlink: Option<Arc<Hyperlink>>,
}

// Cell is allocated per-grid-cell, so any size growth multiplies by the
// screen area and changes the cache-density tradeoff; lock the current
// representation so a future field addition has to be a deliberate edit.
#[cfg(all(test, target_pointer_width = "64"))]
const _: () = {
    static_assertions::assert_eq_size!(Cell, [u8; 48]);
};

impl PartialEq<Self> for Cell {
    fn eq(&self, other: &Self) -> bool {
        if self.len != other.len {
            return false;
        }
        if self.attrs != other.attrs {
            return false;
        }
        if self.hyperlink != other.hyperlink {
            return false;
        }
        let len = self.len();
        self.contents[..len] == other.contents[..len]
    }
}

impl Cell {
    pub(crate) fn new() -> Self {
        Self {
            contents: Default::default(),
            len: 0,
            attrs: crate::attrs::Attrs::default(),
            hyperlink: None,
        }
    }

    fn len(&self) -> usize {
        usize::from(self.len & LEN_BITS)
    }

    pub(crate) fn set(&mut self, c: char, a: crate::attrs::Attrs) {
        self.len = 0;
        self.append_char(0, c);
        self.set_wide(c.width().unwrap_or(1) > 1);
        self.attrs = a;
    }

    pub(crate) fn append(&mut self, c: char) {
        let len = self.len();
        if len >= CONTENT_BYTES - 4 {
            return;
        }
        if len == 0 {
            self.contents[0] = b' ';
            self.len += 1;
        }
        self.append_char(self.len(), c);
    }

    fn append_char(&mut self, start: usize, c: char) {
        c.encode_utf8(&mut self.contents[start..]);
        // c.len_utf8() <= 4, so the cast is exact.
        let utf8_len = c.len_utf8();
        debug_assert!(utf8_len <= 4);
        self.len += utf8_len as u8;
    }

    pub(crate) fn append_str(&mut self, s: &str) {
        let len = self.len();
        if len == 0 {
            self.contents[0] = b' ';
            self.len += 1;
        }
        let start = self.len();
        let bytes = s.as_bytes();
        let available = CONTENT_BYTES - start;
        // CONTENT_BYTES is 22, well below u8::MAX. Grapheme clusters whose
        // encoded form would not fit in the remaining cell buffer are
        // dropped silently, matching the VT-spec convention of degrading
        // gracefully on malformed input rather than crashing the host.
        if bytes.len() > available {
            return;
        }
        self.contents[start..start + bytes.len()].copy_from_slice(bytes);
        debug_assert!(bytes.len() <= CONTENT_BYTES);
        self.len += bytes.len() as u8;
    }

    pub(crate) fn clear(&mut self, attrs: crate::attrs::Attrs) {
        self.len = 0;
        self.attrs = attrs;
        self.hyperlink = None;
    }

    #[must_use]
    /// Return the cell's UTF-8 text contents.
    ///
    /// # Panics
    ///
    /// Never in practice. Mutators write through `char::encode_utf8`
    /// (`set`, `append`) or copy a validated `&str` (`append_str`), so
    /// the underlying byte buffer is UTF-8 by construction.
    pub fn contents(&self) -> &str {
        std::str::from_utf8(&self.contents[..self.len()])
            .expect("cell contents are always valid UTF-8")
    }

    #[must_use]
    /// Return whether the cell has visible text contents.
    pub fn has_contents(&self) -> bool {
        self.len() > 0
    }

    #[must_use]
    /// Return whether this cell starts a [wide character](https://www.unicode.org/reports/tr11/).
    pub fn is_wide(&self) -> bool {
        self.len & IS_WIDE != 0
    }

    #[must_use]
    /// Return whether this cell is the trailing half of a [wide character](https://www.unicode.org/reports/tr11/).
    pub fn is_wide_continuation(&self) -> bool {
        self.len & IS_WIDE_CONTINUATION != 0
    }

    pub(crate) fn set_wide(&mut self, wide: bool) {
        if wide {
            self.len |= IS_WIDE;
        } else {
            self.len &= !IS_WIDE;
        }
    }

    pub(crate) fn set_wide_continuation(&mut self, wide: bool) {
        if wide {
            self.len |= IS_WIDE_CONTINUATION;
        } else {
            self.len &= !IS_WIDE_CONTINUATION;
        }
    }

    #[must_use]
    /// Return the complete SGR style attributes for this cell.
    pub fn attrs(&self) -> &crate::attrs::Attrs {
        &self.attrs
    }

    #[must_use]
    /// Return the [OSC 8][osc8] hyperlink association for this cell, if any.
    ///
    /// [osc8]: https://gist.github.com/egmontkob/eb8d45597f7db55ec41d6c0ffc6f3bb3
    pub fn hyperlink(&self) -> Option<&Hyperlink> {
        self.hyperlink.as_deref()
    }

    pub(crate) fn set_hyperlink(&mut self, link: Option<Arc<Hyperlink>>) {
        self.hyperlink = link;
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::attrs::{Attrs, Color};

    #[test]
    fn new_cell_is_empty() {
        let cell = Cell::new();
        assert!(!cell.has_contents());
        assert_eq!(cell.contents(), "");
        assert!(!cell.is_wide());
        assert!(!cell.is_wide_continuation());
    }

    #[test]
    fn set_ascii() {
        let mut cell = Cell::new();
        cell.set('A', Attrs::default());
        assert!(cell.has_contents());
        assert_eq!(cell.contents(), "A");
        assert!(!cell.is_wide());
    }

    #[test]
    fn set_wide_char() {
        let mut cell = Cell::new();
        cell.set('', Attrs::default());
        assert!(cell.has_contents());
        assert_eq!(cell.contents(), "");
        assert!(cell.is_wide());
    }

    #[test]
    fn append_combining() {
        let mut cell = Cell::new();
        cell.set('e', Attrs::default());
        cell.append('\u{0301}'); // combining acute accent
        assert_eq!(cell.contents(), "e\u{0301}");
    }

    #[test]
    fn clear_cell() {
        let mut cell = Cell::new();
        cell.set('X', Attrs::default());
        let attrs = Attrs {
            fg_color: Color::Index(1),
            ..Attrs::default()
        };
        cell.clear(attrs);
        assert!(!cell.has_contents());
        assert_eq!(cell.attrs().fg_color, Color::Index(1));
    }

    #[test]
    fn cell_equality() {
        let mut a = Cell::new();
        let mut b = Cell::new();
        assert_eq!(a, b);
        a.set('X', Attrs::default());
        assert_ne!(a, b);
        b.set('X', Attrs::default());
        assert_eq!(a, b);
    }

    #[test]
    fn wide_continuation_flag() {
        let mut cell = Cell::new();
        assert!(!cell.is_wide_continuation());
        cell.set_wide_continuation(true);
        assert!(cell.is_wide_continuation());
        cell.set_wide_continuation(false);
        assert!(!cell.is_wide_continuation());
    }
}