#![warn(missing_docs)]
use std::fmt::{Debug, Display, Formatter};
use std::str::Utf8Error;
use serde::Serialize;
use crate::error::RipressError;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum TextDataError {
InvalidUtf8(Utf8Error),
TooLarge {
size: usize,
limit: usize,
},
}
impl Display for TextDataError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
TextDataError::InvalidUtf8(e) => write!(f, "Invalid UTF-8: {}", e),
TextDataError::TooLarge { size, limit } => {
write!(f, "Text too large: {} bytes (limit: {} bytes)", size, limit)
}
}
}
}
impl std::error::Error for TextDataError {}
#[derive(Clone, PartialEq, Eq, Serialize)]
pub struct TextData {
inner: Vec<u8>,
charset: Option<String>,
}
impl TryFrom<TextData> for String {
type Error = RipressError;
fn try_from(value: TextData) -> Result<Self, Self::Error> {
value.into_string()
}
}
impl TextData {
pub fn new<T: Into<String>>(text: T) -> Self {
Self {
inner: text.into().into_bytes(),
charset: Some("utf-8".to_string()),
}
}
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, RipressError> {
std::str::from_utf8(&bytes).map_err(TextDataError::InvalidUtf8)?;
Ok(Self {
inner: bytes,
charset: Some("utf-8".to_string()),
})
}
pub fn from_bytes_with_limit(bytes: Vec<u8>, limit: usize) -> Result<Self, RipressError> {
if bytes.len() > limit {
return Err(RipressError::from(TextDataError::TooLarge {
size: bytes.len(),
limit,
}));
}
Self::from_bytes(bytes)
}
pub fn from_raw_bytes(bytes: Vec<u8>, charset: Option<String>) -> Self {
Self {
inner: bytes,
charset,
}
}
pub fn as_str(&self) -> Result<&str, RipressError> {
std::str::from_utf8(&self.inner)
.map_err(|e| RipressError::from(TextDataError::InvalidUtf8(e)))
}
pub fn as_str_lossy(&self) -> std::borrow::Cow<'_, str> {
String::from_utf8_lossy(&self.inner)
}
pub fn into_string(self) -> Result<String, RipressError> {
String::from_utf8(self.inner)
.map_err(|e| RipressError::from(TextDataError::InvalidUtf8(e.utf8_error())))
}
pub fn into_string_lossy(self) -> String {
String::from_utf8_lossy(&self.inner).into_owned()
}
pub fn as_bytes(&self) -> &[u8] {
&self.inner
}
pub(crate) fn _as_bytes_mut(&mut self) -> &mut [u8] {
&mut self.inner
}
pub fn into_bytes(self) -> Vec<u8> {
self.inner
}
pub fn len_bytes(&self) -> usize {
self.inner.len()
}
pub fn len_chars(&self) -> Result<usize, RipressError> {
Ok(self.as_str()?.chars().count())
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn charset(&self) -> Option<&str> {
self.charset.as_deref()
}
pub fn set_charset(&mut self, charset: String) {
self.charset = Some(charset);
}
pub fn is_valid_utf8(&self) -> bool {
std::str::from_utf8(&self.inner).is_ok()
}
pub fn lines(&self) -> Result<std::str::Lines<'_>, RipressError> {
Ok(self.as_str()?.lines())
}
pub fn trim(&self) -> Result<&str, RipressError> {
Ok(self.as_str()?.trim())
}
pub fn contains(&self, needle: &str) -> Result<bool, RipressError> {
Ok(self.as_str()?.contains(needle))
}
pub fn split<'a>(
&'a self,
delimiter: &'a str,
) -> Result<std::str::Split<'a, &'a str>, RipressError> {
Ok(self.as_str()?.split(delimiter))
}
pub fn truncate_bytes(&mut self, max_len: usize) {
if self.inner.len() > max_len {
self.inner.truncate(max_len);
while !self.inner.is_empty() && !std::str::from_utf8(&self.inner).is_ok() {
self.inner.pop();
}
}
}
pub fn truncated_bytes(&self, max_len: usize) -> Self {
let mut copy = self.clone();
copy.truncate_bytes(max_len);
copy
}
}
impl Display for TextData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.as_str() {
Ok(s) => write!(f, "{}", s),
Err(_) => {
write!(f, "{}", self.as_str_lossy())
}
}
}
}
impl Debug for TextData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TextData")
.field("len_bytes", &self.len_bytes())
.field("charset", &self.charset)
.field("is_valid_utf8", &self.is_valid_utf8())
.field("preview", &format!("{:.50}...", self.as_str_lossy()))
.finish()
}
}
impl From<String> for TextData {
fn from(s: String) -> Self {
Self::new(s)
}
}
impl From<&str> for TextData {
fn from(s: &str) -> Self {
Self::new(s.to_string())
}
}
impl TryFrom<Vec<u8>> for TextData {
type Error = RipressError;
fn try_from(bytes: Vec<u8>) -> Result<Self, Self::Error> {
Self::from_bytes(bytes)
}
}
impl std::ops::Deref for TextData {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.inner
}
}