Skip to main content

basalt_types/
string.rs

1use crate::{Decode, Encode, EncodedSize, Error, Result, VarInt};
2
3/// Maximum byte length for a Minecraft protocol string.
4const MAX_STRING_BYTES: usize = 32767;
5
6/// Encodes a Rust `String` as a Minecraft protocol string.
7///
8/// Minecraft protocol strings are UTF-8 byte sequences prefixed by a VarInt
9/// indicating the byte length (not character count). They are used for player
10/// names, chat messages, identifiers, server addresses, and many other text
11/// fields. The maximum allowed length is 32767 bytes.
12impl Encode for String {
13    /// Writes a VarInt length prefix followed by the UTF-8 bytes.
14    ///
15    /// Fails with `Error::StringTooLong` if the string exceeds 32767 bytes.
16    fn encode(&self, buf: &mut Vec<u8>) -> Result<()> {
17        let bytes = self.as_bytes();
18        if bytes.len() > MAX_STRING_BYTES {
19            return Err(Error::StringTooLong {
20                len: bytes.len(),
21                max: MAX_STRING_BYTES,
22            });
23        }
24        VarInt(bytes.len() as i32).encode(buf)?;
25        buf.extend_from_slice(bytes);
26        Ok(())
27    }
28}
29
30/// Decodes a Minecraft protocol string into a Rust `String`.
31///
32/// Reads a VarInt byte length, validates it against the 32767-byte limit,
33/// then reads that many bytes and validates them as UTF-8. Multi-byte
34/// UTF-8 characters (accented letters, emoji, CJK) are handled correctly
35/// since the length prefix counts bytes, not characters.
36impl Decode for String {
37    /// Reads the VarInt length prefix, then the UTF-8 payload.
38    ///
39    /// Fails with `Error::StringTooLong` if the declared length exceeds
40    /// 32767 bytes, `Error::BufferUnderflow` if the buffer is shorter than
41    /// the declared length, or `Error::InvalidUtf8` if the bytes are not
42    /// valid UTF-8.
43    fn decode(buf: &mut &[u8]) -> Result<Self> {
44        let raw_len = VarInt::decode(buf)?.0;
45        if raw_len < 0 {
46            return Err(Error::InvalidData(format!(
47                "negative string length: {raw_len}"
48            )));
49        }
50        let len = raw_len as usize;
51        if len > MAX_STRING_BYTES {
52            return Err(Error::StringTooLong {
53                len,
54                max: MAX_STRING_BYTES,
55            });
56        }
57        if buf.len() < len {
58            return Err(Error::BufferUnderflow {
59                needed: len,
60                available: buf.len(),
61            });
62        }
63        let (bytes, rest) = buf.split_at(len);
64        let value = String::from_utf8(bytes.to_vec())?;
65        *buf = rest;
66        Ok(value)
67    }
68}
69
70/// Computes the wire size of a Minecraft protocol string.
71///
72/// The total size is the VarInt-encoded length prefix plus the UTF-8 byte
73/// count. This enables exact buffer pre-allocation before encoding.
74impl EncodedSize for String {
75    /// Returns the VarInt prefix size plus the string's byte length.
76    fn encoded_size(&self) -> usize {
77        let len = self.len();
78        VarInt(len as i32).encoded_size() + len
79    }
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85
86    fn roundtrip(s: &str) {
87        let original = s.to_string();
88        let mut buf = Vec::with_capacity(original.encoded_size());
89        original.encode(&mut buf).unwrap();
90        assert_eq!(buf.len(), original.encoded_size());
91
92        let mut cursor = buf.as_slice();
93        let decoded = String::decode(&mut cursor).unwrap();
94        assert!(cursor.is_empty());
95        assert_eq!(decoded, original);
96    }
97
98    #[test]
99    fn empty_string() {
100        roundtrip("");
101    }
102
103    #[test]
104    fn short_string() {
105        roundtrip("hello");
106    }
107
108    #[test]
109    fn unicode_string() {
110        roundtrip("héllo wörld 🌍");
111    }
112
113    #[test]
114    fn max_length_string() {
115        let s = "a".repeat(MAX_STRING_BYTES);
116        roundtrip(&s);
117    }
118
119    #[test]
120    fn too_long_encode() {
121        let s = "a".repeat(MAX_STRING_BYTES + 1);
122        let mut buf = Vec::new();
123        assert!(matches!(
124            s.encode(&mut buf),
125            Err(Error::StringTooLong { .. })
126        ));
127    }
128
129    #[test]
130    fn too_long_decode() {
131        let mut buf = Vec::new();
132        VarInt(MAX_STRING_BYTES as i32 + 1)
133            .encode(&mut buf)
134            .unwrap();
135        buf.extend_from_slice(&vec![0u8; MAX_STRING_BYTES + 1]);
136
137        let mut cursor = buf.as_slice();
138        assert!(matches!(
139            String::decode(&mut cursor),
140            Err(Error::StringTooLong { .. })
141        ));
142    }
143
144    #[test]
145    fn truncated_buffer() {
146        let mut buf = Vec::new();
147        VarInt(10).encode(&mut buf).unwrap();
148        buf.extend_from_slice(b"short");
149
150        let mut cursor = buf.as_slice();
151        assert!(matches!(
152            String::decode(&mut cursor),
153            Err(Error::BufferUnderflow { .. })
154        ));
155    }
156
157    #[test]
158    fn invalid_utf8() {
159        let mut buf = Vec::new();
160        VarInt(2).encode(&mut buf).unwrap();
161        buf.extend_from_slice(&[0xFF, 0xFE]);
162
163        let mut cursor = buf.as_slice();
164        assert!(matches!(
165            String::decode(&mut cursor),
166            Err(Error::InvalidUtf8(_))
167        ));
168    }
169
170    #[test]
171    fn encoded_size_accounts_for_varint_prefix() {
172        assert_eq!("".to_string().encoded_size(), 1);
173        assert_eq!("hi".to_string().encoded_size(), 3);
174    }
175
176    mod proptests {
177        use super::*;
178        use proptest::prelude::*;
179
180        proptest! {
181            #[test]
182            fn string_roundtrip(s in ".{0,1000}") {
183                roundtrip(&s);
184            }
185        }
186    }
187}