alma 0.1.0 - Docs.rs

//! Validated UTF-8 byte stream storage and CRUD operations.

use bevy::prelude::Resource;
use std::{
    error::Error,
    fmt::{Display, Formatter},
    ops::Range,
    str,
};

/// A validated UTF-8 byte stream used as the source of rendered text.
#[derive(Clone, Debug, Default, Eq, PartialEq, Resource)]
pub struct TextByteStream {
    /// Text storage, kept as UTF-8 so Bevy can render it directly.
    text: String,
    /// Monotonic version counter for external caches and persistence.
    revision: u64,
}

impl TextByteStream {
    /// Creates a new text byte stream from valid UTF-8 text.
    #[must_use]
    pub fn new(text: impl Into<String>) -> Self {
        Self {
            text: text.into(),
            revision: 0,
        }
    }

    /// Creates a new text byte stream from raw UTF-8 bytes.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError::InvalidUtf8`] when `bytes` are not valid UTF-8.
    pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, TextStreamError> {
        String::from_utf8(bytes)
            .map(Self::new)
            .map_err(|error| TextStreamError::InvalidUtf8 {
                valid_up_to: error.utf8_error().valid_up_to(),
            })
    }

    /// Reads the stream as renderable UTF-8 text.
    #[must_use]
    pub fn as_str(&self) -> &str {
        &self.text
    }

    /// Reads the underlying UTF-8 bytes without allocation.
    #[must_use]
    pub fn as_bytes(&self) -> &[u8] {
        self.text.as_bytes()
    }

    /// Returns the current stream revision.
    #[must_use]
    pub const fn revision(&self) -> u64 {
        self.revision
    }

    /// Replaces the entire stream with valid UTF-8 text.
    pub fn replace_all(&mut self, text: impl Into<String>) {
        self.text = text.into();
        self.bump_revision();
    }

    /// Replaces the entire stream with raw UTF-8 bytes.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError::InvalidUtf8`] when `bytes` are not valid UTF-8.
    pub fn replace_all_bytes(&mut self, bytes: Vec<u8>) -> Result<(), TextStreamError> {
        self.text = String::from_utf8(bytes).map_err(|error| TextStreamError::InvalidUtf8 {
            valid_up_to: error.utf8_error().valid_up_to(),
        })?;
        self.bump_revision();
        Ok(())
    }

    /// Inserts valid UTF-8 text at a byte index.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError`] when `byte_index` is out of bounds or not a UTF-8 character
    /// boundary.
    pub fn insert_str(&mut self, byte_index: usize, text: &str) -> Result<(), TextStreamError> {
        self.validate_boundary(byte_index)?;
        self.text.insert_str(byte_index, text);
        self.bump_revision();
        Ok(())
    }

    /// Inserts raw UTF-8 bytes at a byte index.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError::InvalidUtf8`] when `bytes` are invalid UTF-8, or another
    /// [`TextStreamError`] when `byte_index` is out of bounds or not a UTF-8 character boundary.
    pub fn insert_bytes(&mut self, byte_index: usize, bytes: &[u8]) -> Result<(), TextStreamError> {
        let text = str::from_utf8(bytes).map_err(|error| TextStreamError::InvalidUtf8 {
            valid_up_to: error.valid_up_to(),
        })?;
        self.insert_str(byte_index, text)
    }

    /// Replaces a byte range with valid UTF-8 text.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError`] when `range` is invalid, out of bounds, or not aligned to UTF-8
    /// character boundaries.
    pub fn replace_range(
        &mut self,
        range: Range<usize>,
        text: &str,
    ) -> Result<(), TextStreamError> {
        self.validate_range(range.clone())?;
        self.text.replace_range(range, text);
        self.bump_revision();
        Ok(())
    }

    /// Replaces a byte range with raw UTF-8 bytes.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError::InvalidUtf8`] when `bytes` are invalid UTF-8, or another
    /// [`TextStreamError`] when `range` is invalid, out of bounds, or not aligned to UTF-8
    /// character boundaries.
    pub fn replace_range_bytes(
        &mut self,
        range: Range<usize>,
        bytes: &[u8],
    ) -> Result<(), TextStreamError> {
        let text = str::from_utf8(bytes).map_err(|error| TextStreamError::InvalidUtf8 {
            valid_up_to: error.valid_up_to(),
        })?;
        self.replace_range(range, text)
    }

    /// Deletes a byte range.
    ///
    /// # Errors
    ///
    /// Returns [`TextStreamError`] when `range` is invalid, out of bounds, or not aligned to UTF-8
    /// character boundaries.
    pub fn delete_range(&mut self, range: Range<usize>) -> Result<(), TextStreamError> {
        self.replace_range(range, "")
    }

    /// Clears the stream.
    pub fn clear(&mut self) {
        self.text.clear();
        self.bump_revision();
    }

    /// Advances the stream revision after a mutation.
    const fn bump_revision(&mut self) {
        self.revision = self.revision.wrapping_add(1);
    }

    /// Validates one byte index against the current stream.
    fn validate_boundary(&self, index: usize) -> Result<(), TextStreamError> {
        if self.text.len() < index {
            return Err(TextStreamError::OutOfBounds {
                index,
                len: self.text.len(),
            });
        }

        if !self.text.is_char_boundary(index) {
            return Err(TextStreamError::NotCharBoundary { index });
        }

        Ok(())
    }

    /// Validates a byte range against the current stream.
    fn validate_range(&self, range: Range<usize>) -> Result<(), TextStreamError> {
        if range.end < range.start {
            return Err(TextStreamError::InvalidRange {
                start: range.start,
                end: range.end,
            });
        }

        self.validate_boundary(range.start)?;
        self.validate_boundary(range.end)
    }
}

/// Errors returned by [`TextByteStream`] mutations.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum TextStreamError {
    /// Raw bytes were not valid UTF-8.
    InvalidUtf8 {
        /// The byte index up to which the input was valid UTF-8.
        valid_up_to: usize,
    },
    /// A byte index exceeded the stream length.
    OutOfBounds {
        /// The rejected byte index.
        index: usize,
        /// The current stream length in bytes.
        len: usize,
    },
    /// A byte index split a UTF-8 scalar value.
    NotCharBoundary {
        /// The rejected byte index.
        index: usize,
    },
    /// A byte range had its end before its start.
    InvalidRange {
        /// The start byte index.
        start: usize,
        /// The end byte index.
        end: usize,
    },
}

impl Display for TextStreamError {
    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::InvalidUtf8 { valid_up_to } => {
                write!(formatter, "invalid UTF-8 after byte {valid_up_to}")
            }
            Self::OutOfBounds { index, len } => {
                write!(
                    formatter,
                    "byte index {index} is outside stream length {len}"
                )
            }
            Self::NotCharBoundary { index } => {
                write!(
                    formatter,
                    "byte index {index} is not a UTF-8 character boundary"
                )
            }
            Self::InvalidRange { start, end } => {
                write!(formatter, "invalid byte range {start}..{end}")
            }
        }
    }
}

impl Error for TextStreamError {}

/// Property tests for UTF-8 byte-stream CRUD operations.
#[cfg(test)]
mod tests {
    use super::{TextByteStream, TextStreamError};
    use proptest::{
        prelude::{Strategy, any, proptest},
        prop_assert_eq,
    };

    /// Generates Unicode scalar values that occupy more than one UTF-8 byte.
    fn multibyte_char() -> impl Strategy<Value = char> {
        any::<char>().prop_filter("character must use multiple UTF-8 bytes", |character| {
            1 < character.len_utf8()
        })
    }

    proptest! {
        /// Replacing the whole stream round-trips arbitrary valid Unicode.
        #[test]
        fn replace_all_round_trips_arbitrary_unicode(input in any::<String>()) {
            let mut stream = TextByteStream::default();

            stream.replace_all(input.clone());

            prop_assert_eq!(stream.as_str(), input.as_str());
            prop_assert_eq!(stream.as_bytes(), input.as_bytes());
            prop_assert_eq!(stream.revision(), 1);
        }

        /// Creating from valid UTF-8 bytes round-trips arbitrary Unicode.
        #[test]
        fn from_bytes_accepts_valid_utf8(input in any::<String>()) {
            let stream = TextByteStream::from_bytes(input.clone().into_bytes());

            prop_assert_eq!(stream, Ok(TextByteStream::new(input)));
        }

        /// Inserting at a UTF-8 character boundary matches Rust's `String` model.
        #[test]
        fn insert_str_matches_string_model(
            prefix in any::<String>(),
            inserted in any::<String>(),
            suffix in any::<String>(),
        ) {
            let original = format!("{prefix}{suffix}");
            let expected = format!("{prefix}{inserted}{suffix}");
            let mut stream = TextByteStream::new(original);

            prop_assert_eq!(stream.insert_str(prefix.len(), inserted.as_str()), Ok(()));
            prop_assert_eq!(stream.as_str(), expected.as_str());
            prop_assert_eq!(stream.revision(), 1);
        }

        /// Replacing a UTF-8-aligned byte range matches Rust's `String` model.
        #[test]
        fn replace_range_matches_string_model(
            prefix in any::<String>(),
            removed in any::<String>(),
            suffix in any::<String>(),
            replacement in any::<String>(),
        ) {
            let original = format!("{prefix}{removed}{suffix}");
            let expected = format!("{prefix}{replacement}{suffix}");
            let range = prefix.len()..prefix.len() + removed.len();
            let mut stream = TextByteStream::new(original);

            prop_assert_eq!(stream.replace_range(range, replacement.as_str()), Ok(()));
            prop_assert_eq!(stream.as_str(), expected.as_str());
            prop_assert_eq!(stream.revision(), 1);
        }

        /// Deleting a UTF-8-aligned byte range matches Rust's `String` model.
        #[test]
        fn delete_range_matches_string_model(
            prefix in any::<String>(),
            removed in any::<String>(),
            suffix in any::<String>(),
        ) {
            let original = format!("{prefix}{removed}{suffix}");
            let expected = format!("{prefix}{suffix}");
            let range = prefix.len()..prefix.len() + removed.len();
            let mut stream = TextByteStream::new(original);

            prop_assert_eq!(stream.delete_range(range), Ok(()));
            prop_assert_eq!(stream.as_str(), expected.as_str());
            prop_assert_eq!(stream.revision(), 1);
        }

        /// Byte-range edits reject indices that split a Unicode scalar.
        #[test]
        fn delete_range_rejects_split_scalar(
            prefix in any::<String>(),
            character in multibyte_char(),
        ) {
            let mut stream = TextByteStream::new(format!("{prefix}{character}"));
            let split_index = prefix.len() + 1;

            prop_assert_eq!(
                stream.delete_range(split_index..split_index),
                Err(TextStreamError::NotCharBoundary { index: split_index }),
            );
            prop_assert_eq!(stream.revision(), 0);
        }

        /// Raw byte insertion matches string insertion when bytes are valid UTF-8.
        #[test]
        fn insert_bytes_matches_string_model(
            prefix in any::<String>(),
            inserted in any::<String>(),
            suffix in any::<String>(),
        ) {
            let original = format!("{prefix}{suffix}");
            let expected = format!("{prefix}{inserted}{suffix}");
            let mut stream = TextByteStream::new(original);

            prop_assert_eq!(stream.insert_bytes(prefix.len(), inserted.as_bytes()), Ok(()));
            prop_assert_eq!(stream.as_str(), expected.as_str());
        }
    }

    /// Creating a stream from invalid bytes fails without lossy conversion.
    #[test]
    fn from_bytes_rejects_invalid_utf8() {
        assert_eq!(
            TextByteStream::from_bytes(vec![0x66, 0x80]),
            Err(TextStreamError::InvalidUtf8 { valid_up_to: 1 })
        );
    }

    /// Newline bytes remain literal stream data for Bevy layout.
    #[test]
    fn newline_remains_layout_data() {
        let mut stream = TextByteStream::new("ALMA");

        assert_eq!(stream.insert_str(4, "\nΑλμα"), Ok(()));
        assert_eq!(stream.as_str(), "ALMA\nΑλμα");
    }
}