luanti_protocol/wire/
util.rs

1//!
2//! The crazy exotic serialization methods Luanti uses
3//!
4
5use std::str::FromStr;
6
7use anyhow::Result;
8use anyhow::bail;
9use miniz_oxide::inflate;
10use miniz_oxide::inflate::core::DecompressorOxide;
11use miniz_oxide::inflate::core::inflate_flags;
12use zstd_safe::InBuffer;
13use zstd_safe::OutBuffer;
14
15/// Convert an integer type into it's string representation as &[u8]
16///
17/// For example:
18///    123 returns &[49, 50, 51]
19///   -100 returns &[45, 49, 48, 48]
20///
21#[macro_export]
22macro_rules! itos {
23    ($n: expr) => {
24        &($n).to_string().into_bytes()
25    };
26}
27
28/// Parse byte slice into an integer. The opposite of itos.
29/// On error (such as `Utf8Error` or `ParseIntError`) this does
30/// `return Err()` implicitly.
31///
32/// Use return type-inference to specify the integer type, e.g:
33///
34/// ```rust
35/// use luanti_protocol::wire::util::stoi;
36/// let val: u16 = stoi(b"123".as_slice()).unwrap();
37/// ```
38pub fn stoi<T: FromStr>(bytes: &[u8]) -> Result<T>
39where
40    <T as FromStr>::Err: std::error::Error + Sync + Send + 'static,
41{
42    let str = std::str::from_utf8(bytes)?;
43    Ok(str.parse::<T>()?)
44}
45/*
46#[macro_export]
47macro_rules! stoi {
48    ($b: expr, $typ: ty) => {{
49        let result: anyhow::Result<$typ> = match std::str::from_utf8($b) {
50            Ok(v) => match v.parse::<$typ>() {
51                Ok(v) => Ok(v),
52                Err(e) => Err(anyhow::Error::from(e)),
53            },
54            Err(e) => Err(anyhow::Error::from(e)),
55        };
56        result
57    }};
58}
59*/
60
61///
62/// Streaming Zstd compress
63pub fn zstd_compress<F>(input: &[u8], mut write: F) -> Result<()>
64where
65    F: FnMut(&[u8]) -> Result<()>,
66{
67    const BUFSIZE: usize = 0x4000;
68    let mut ctx = zstd_safe::CCtx::create();
69    let mut buf = [0_u8; BUFSIZE];
70    let mut input_buffer = InBuffer { src: input, pos: 0 };
71    while input_buffer.pos < input.len() {
72        let mut output_buffer = OutBuffer::around(&mut buf);
73        match ctx.compress_stream(&mut output_buffer, &mut input_buffer) {
74            Ok(_) => {
75                let written = output_buffer.as_slice();
76                if !written.is_empty() {
77                    write(written)?;
78                }
79            }
80            Err(error) => bail!("zstd_compress: {}", zstd_safe::get_error_name(error)),
81        }
82    }
83    loop {
84        let mut output_buffer = OutBuffer::around(&mut buf);
85        match ctx.end_stream(&mut output_buffer) {
86            Ok(code) => {
87                let chunk = output_buffer.as_slice();
88                if !chunk.is_empty() {
89                    write(chunk)?;
90                }
91                if code == 0 {
92                    break;
93                }
94            }
95            Err(ec) => bail!("zstd_compress end: {}", zstd_safe::get_error_name(ec)),
96        }
97    }
98    Ok(())
99}
100
101/// Streaming Zstd decompress
102///
103/// The input is allowed to contain more data than Zstd will consume.
104/// Returns the actual number of bytes consumed from the input.
105///
106pub fn zstd_decompress<F>(input: &[u8], mut write: F) -> Result<usize>
107where
108    F: FnMut(&[u8]) -> Result<()>,
109{
110    const BUFSIZE: usize = 0x4000;
111    let mut buf = [0_u8; BUFSIZE];
112    let mut ctx = zstd_safe::DCtx::create();
113
114    let mut input_buffer = InBuffer { src: input, pos: 0 };
115    loop {
116        let mut output_buffer = OutBuffer::around(&mut buf);
117        match ctx.decompress_stream(&mut output_buffer, &mut input_buffer) {
118            Ok(code) => {
119                let out = output_buffer.as_slice();
120                if !out.is_empty() {
121                    write(out)?;
122                }
123                if code == 0 {
124                    break;
125                }
126            }
127            Err(ec) => bail!("zstd_compress: {}", zstd_safe::get_error_name(ec)),
128        };
129    }
130    Ok(input_buffer.pos())
131}
132
133/// serializeJsonStringIfNeeded
134pub fn serialize_json_string_if_needed<W>(input: &[u8], mut write: W) -> Result<()>
135where
136    W: FnMut(&[u8]) -> Result<()>,
137{
138    if input.is_empty()
139        || input
140            .iter()
141            .any(|&ch| ch <= 0x1f || ch >= 0x7f || ch == b' ' || ch == b'\"')
142    {
143        serialize_json_string(input, write)
144    } else {
145        write(input)
146    }
147}
148
149pub fn serialize_json_string<W>(input: &[u8], mut write: W) -> Result<()>
150where
151    W: FnMut(&[u8]) -> Result<()>,
152{
153    write(b"\"")?;
154    for &ch in input {
155        match ch {
156            b'"' => write(b"\\\"")?,
157            b'\\' => write(b"\\\\")?,
158            0x08 => write(b"\\b")?,
159            0x0C => write(b"\\f")?,
160            b'\n' => write(b"\\n")?,
161            b'\r' => write(b"\\r")?,
162            b'\t' => write(b"\\t")?,
163            other_char => {
164                // TODO use range pattern instead
165                if (32..=126).contains(&other_char) {
166                    write(&[other_char])?;
167                } else {
168                    // \u00XX style escaping
169                    let bytes = &[
170                        b'\\',
171                        b'u',
172                        b'0',
173                        b'0',
174                        to_hex(other_char >> 4),
175                        to_hex(other_char & 0xf),
176                    ];
177                    write(bytes)?;
178                }
179            }
180        }
181    }
182    write(b"\"")?;
183    Ok(())
184}
185
186#[must_use]
187pub fn to_hex(index: u8) -> u8 {
188    const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
189    #[expect(clippy::indexing_slicing, reason = "the range is safe")]
190    HEX_CHARS[(index & 0x0f) as usize]
191}
192
193pub fn from_hex(hex_digit: u8) -> Result<u8> {
194    // TODO use functions from std
195    if hex_digit.is_ascii_digit() {
196        Ok(hex_digit - b'0')
197    } else if (b'a'..=b'f').contains(&hex_digit) {
198        Ok(10 + (hex_digit - b'a'))
199    } else if (b'A'..=b'F').contains(&hex_digit) {
200        Ok(10 + (hex_digit - b'A'))
201    } else {
202        bail!("Invalid hex digit: {}", hex_digit);
203    }
204}
205
206// deSerializeJsonStringIfNeeded
207// Returns number of bytes consumed by the "json" string, so that parsing can continue after.
208pub fn deserialize_json_string_if_needed(input: &[u8]) -> Result<(Vec<u8>, usize), anyhow::Error> {
209    if input.is_empty() {
210        Ok((Vec::new(), 0))
211    } else {
212        if input[0] == b'"' {
213            return deserialize_json_string(input);
214        }
215        // Just a normal string, consume up until whitespace or eof
216        let endpos = input
217            .iter()
218            .position(|&ch| ch == b' ' || ch == b'\n')
219            .unwrap_or(input.len());
220        Ok((input[..endpos].to_vec(), endpos))
221    }
222}
223
224struct MiniReader<'input> {
225    input: &'input [u8],
226    pos: usize,
227}
228
229impl<'input> MiniReader<'input> {
230    pub(crate) fn new(input: &'input [u8], pos: usize) -> Self {
231        Self { input, pos }
232    }
233
234    pub(crate) fn remaining(&self) -> usize {
235        self.input.len() - self.pos
236    }
237
238    pub(crate) fn has_remaining(&self) -> bool {
239        self.remaining() > 0
240    }
241
242    pub(crate) fn take(&mut self, count: usize) -> Result<&'input [u8]> {
243        if self.pos + count > self.input.len() {
244            bail!("Luanti JSON string ended prematurely");
245        }
246        let result = &self.input[self.pos..self.pos + count];
247        self.pos += count;
248        Ok(result)
249    }
250
251    pub(crate) fn take1(&mut self) -> Result<u8> {
252        self.take(1).map(|ch| ch[0])
253    }
254}
255
256pub fn deserialize_json_string(input: &[u8]) -> Result<(Vec<u8>, usize), anyhow::Error> {
257    let mut result: Vec<u8> = Vec::new();
258    assert_eq!(input[0], b'"', "unexpected start of string");
259    let mut reader = MiniReader::new(input, 1);
260    while reader.has_remaining() {
261        let ch = reader.take1()?;
262        if ch == b'"' {
263            return Ok((result, reader.pos));
264        } else if ch == b'\\' {
265            let code = reader.take1()?;
266            match code {
267                b'b' => result.push(0x08),
268                b'f' => result.push(0x0C),
269                b'n' => result.push(b'\n'),
270                b'r' => result.push(b'\r'),
271                b't' => result.push(b'\t'),
272                b'u' => {
273                    // "Unicode"
274                    let codepoint = reader.take(4)?;
275                    if codepoint[0] != b'0' || codepoint[1] != b'0' {
276                        bail!("Unsupported unicode in Luanti JSON");
277                    }
278                    let hi = from_hex(codepoint[2])?;
279                    let lo = from_hex(codepoint[3])?;
280                    result.push((hi << 4) | lo);
281                }
282                other_char => result.push(other_char),
283            }
284        } else {
285            result.push(ch);
286        }
287    }
288    bail!("Luanti JSON string ended prematurely");
289}
290
291/// This is needed to handle the crazy inventory parsing.
292#[must_use]
293pub fn split_by_whitespace(line: &[u8]) -> Vec<&[u8]> {
294    line.split(|ch| *ch == b' ' || *ch == b'\n')
295        .filter(|item| !item.is_empty())
296        .collect()
297}
298
299#[must_use]
300pub fn skip_whitespace(line: &[u8]) -> &[u8] {
301    match line.iter().position(|ch| *ch != b' ' && *ch != b'\n') {
302        Some(pos) => &line[pos..],
303        None => &line[line.len()..],
304    }
305}
306
307/// Returns the next word (non-whitespace chunk) in u8 slice,
308/// and the remainder (which may still have whitespace)
309///
310/// Returns None when the remainder is empty or all whitespace.
311#[must_use]
312pub fn next_word(line: &[u8]) -> Option<(&[u8], &[u8])> {
313    let line = skip_whitespace(line);
314    match line.iter().position(|ch| *ch == b' ' || *ch == b'\n') {
315        Some(endpos) => Some((&line[..endpos], &line[endpos..])),
316        None => {
317            if line.is_empty() {
318                None
319            } else {
320                Some((line, &line[line.len()..]))
321            }
322        }
323    }
324}
325
326#[must_use]
327pub fn compress_zlib(uncompressed: &[u8]) -> Vec<u8> {
328    miniz_oxide::deflate::compress_to_vec_zlib(uncompressed, 6)
329}
330
331/// This method must detect the end of the stream.
332/// 'uncompressed' may have more data past the end of the zlib stream
333/// Returns (`bytes_consumed`, `uncompressed_data`)
334pub fn decompress_zlib(input: &[u8]) -> Result<(usize, Vec<u8>)> {
335    let flags = inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER
336        | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
337    let mut ret: Vec<u8> = vec![0; input.len().saturating_mul(2)];
338
339    let mut decompressor = Box::<DecompressorOxide>::default();
340
341    let mut in_pos = 0;
342    let mut out_pos = 0;
343    loop {
344        // Wrap the whole output slice so we know we have enough of the
345        // decompressed data for matches.
346        let (status, in_consumed, out_consumed) = inflate::core::decompress(
347            &mut decompressor,
348            &input[in_pos..],
349            &mut ret,
350            out_pos,
351            flags,
352        );
353        in_pos += in_consumed;
354        out_pos += out_consumed;
355
356        match status {
357            inflate::TINFLStatus::Done => {
358                ret.truncate(out_pos);
359                return Ok((in_pos, ret));
360            }
361
362            inflate::TINFLStatus::HasMoreOutput => {
363                // if the buffer has already reached the size limit, return an error
364                // calculate the new length, capped at `max_output_size`
365                let new_len = ret.len().saturating_mul(2);
366                ret.resize(new_len, 0);
367            }
368
369            err => bail!(
370                "zlib decompression error: in_pos={}, out_pos={}, {:?}",
371                in_pos,
372                out_pos,
373                err
374            ),
375        }
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use std::ops::Range;
382
383    use super::*;
384    use log::error;
385    use rand;
386    use rand::Rng;
387    use rand::RngCore;
388    use rand::rng;
389
390    fn rand_bytes(range: Range<usize>) -> Vec<u8> {
391        let mut rng = rng();
392        let length = rng.random_range(range);
393        let mut input = vec![0_u8; length];
394        rng.fill_bytes(input.as_mut_slice());
395        input
396    }
397
398    fn serialize_to_vec(input: &[u8]) -> Vec<u8> {
399        let mut out = Vec::new();
400        serialize_json_string_if_needed(input, |chunk| {
401            out.extend(chunk);
402            Ok(())
403        })
404        .unwrap();
405        out
406    }
407
408    #[test]
409    fn json_serialize_deserialize_fuzz() {
410        for _ in 0..10000 {
411            let input = rand_bytes(0..100);
412            let serialized = serialize_to_vec(&input);
413            // At some junk on the end to make sure it doesn't take more than it should
414            let serialized_plus_junk =
415                [serialized.as_slice(), &[32], rand_bytes(0..20).as_slice()].concat();
416
417            let (result, consumed) =
418                deserialize_json_string_if_needed(&serialized_plus_junk).unwrap();
419            if input != result {
420                error!("input = {:?}", input);
421                error!("serialized = {:?}", serialized);
422                error!("serialized_plus_junk = {:?}", serialized_plus_junk);
423                error!("result = {:?}", result);
424                error!("consumed = {}", consumed);
425                panic!();
426            }
427            assert_eq!(input, result);
428            assert_eq!(consumed, serialized.len());
429        }
430    }
431
432    #[test]
433    fn itos_test() {
434        assert_eq!(itos!(123), &[49, 50, 51]);
435        assert_eq!(itos!(-100), &[45, 49, 48, 48]);
436        assert_eq!(itos!(0), &[48]);
437    }
438
439    #[test]
440    fn itos_stoi_fuzz() {
441        for i in -10000..10000 {
442            let str = itos!(i);
443            let integer: i32 = stoi(str).expect("Should not have failed");
444            assert_eq!(integer, i);
445        }
446    }
447}