bluetape_rs_codec/text.rs
1//! UTF-8 text boundary helpers for codec call sites.
2
3use std::error::Error;
4use std::fmt;
5use std::str::Utf8Error;
6use std::string::FromUtf8Error;
7
8/// Error returned when bytes cannot be decoded as UTF-8 text.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10#[non_exhaustive]
11pub enum TextDecodeError {
12 /// The byte sequence is not valid UTF-8.
13 InvalidUtf8 {
14 /// Byte offset up to which the input was valid UTF-8.
15 valid_up_to: usize,
16 /// Length in bytes of the invalid sequence when known.
17 error_len: Option<usize>,
18 },
19}
20
21impl fmt::Display for TextDecodeError {
22 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23 match self {
24 Self::InvalidUtf8 {
25 valid_up_to,
26 error_len: Some(error_len),
27 } => write!(
28 f,
29 "text input contains invalid UTF-8 sequence of length {error_len} after byte {valid_up_to}"
30 ),
31 Self::InvalidUtf8 {
32 valid_up_to,
33 error_len: None,
34 } => write!(
35 f,
36 "text input contains incomplete UTF-8 sequence after byte {valid_up_to}"
37 ),
38 }
39 }
40}
41
42impl Error for TextDecodeError {}
43
44impl From<Utf8Error> for TextDecodeError {
45 fn from(error: Utf8Error) -> Self {
46 Self::InvalidUtf8 {
47 valid_up_to: error.valid_up_to(),
48 error_len: error.error_len(),
49 }
50 }
51}
52
53impl From<FromUtf8Error> for TextDecodeError {
54 fn from(error: FromUtf8Error) -> Self {
55 error.utf8_error().into()
56 }
57}
58
59/// Encodes UTF-8 text into owned bytes.
60///
61/// This helper belongs to `bluetape-rs-codec` only as the explicit text-to-byte
62/// boundary used before binary encoders. It is not a general string utility.
63///
64/// # Examples
65///
66/// ```
67/// use bluetape_rs_codec::{encode_base64_url_unpadded, encode_utf8_text};
68///
69/// assert_eq!(encode_base64_url_unpadded(encode_utf8_text("Hello")), "SGVsbG8");
70/// ```
71#[must_use]
72pub fn encode_utf8_text(text: impl AsRef<str>) -> Vec<u8> {
73 text.as_ref().as_bytes().to_vec()
74}
75
76/// Decodes owned bytes into UTF-8 text without lossy replacement.
77///
78/// Use this after binary decoders when the service contract requires valid
79/// UTF-8 text. Invalid UTF-8 returns a typed error with byte-position details.
80///
81/// # Examples
82///
83/// ```
84/// use bluetape_rs_codec::{decode_base64_url_unpadded, decode_utf8_text};
85///
86/// let bytes = decode_base64_url_unpadded("SGVsbG8")?;
87///
88/// assert_eq!(decode_utf8_text(bytes)?, "Hello");
89/// # Ok::<(), Box<dyn std::error::Error>>(())
90/// ```
91///
92/// # Errors
93///
94/// Returns [`TextDecodeError::InvalidUtf8`] when the input bytes are not valid
95/// UTF-8.
96pub fn decode_utf8_text(bytes: impl Into<Vec<u8>>) -> Result<String, TextDecodeError> {
97 String::from_utf8(bytes.into()).map_err(Into::into)
98}
99
100/// Decodes bytes into UTF-8 text using replacement characters for invalid data.
101///
102/// This helper is intentionally named `lossy` so callers must opt in to data
103/// replacement instead of silently accepting corrupted text.
104///
105/// # Examples
106///
107/// ```
108/// use bluetape_rs_codec::decode_utf8_text_lossy;
109///
110/// assert_eq!(decode_utf8_text_lossy([b'a', 0xff, b'z']), "a\u{fffd}z");
111/// ```
112#[must_use]
113pub fn decode_utf8_text_lossy(bytes: impl AsRef<[u8]>) -> String {
114 String::from_utf8_lossy(bytes.as_ref()).into_owned()
115}