minetest_protocol/wire/
util.rs

1//!
2//! The crazy exotic serialization methods Minetest uses
3//!
4
5use std::str::FromStr;
6
7use anyhow::bail;
8use anyhow::Result;
9use miniz_oxide::inflate::core::inflate_flags;
10use miniz_oxide::inflate::core::DecompressorOxide;
11use zstd_safe::InBuffer;
12use zstd_safe::OutBuffer;
13
14/// Convert an integer type into it's string represention as &[u8]
15///
16/// For example:
17///    123 returns &[49, 50, 51]
18///   -100 returns &[45, 49, 48, 48]
19///
20#[macro_export]
21macro_rules! itos {
22    ($n: expr) => {
23        &($n).to_string().into_bytes()
24    };
25}
26
27/// Parse byte slice into an integer. The opposite of itos.
28/// On error (such as Utf8Error or ParseIntError) this does
29/// `return Err()` implicitly.
30///
31/// Use return type-inference to specify the integer type, e.g:
32
33///     let val: u16 = stoi(&s);
34///
35pub fn stoi<T: FromStr>(b: &[u8]) -> anyhow::Result<T>
36where
37    <T as FromStr>::Err: std::error::Error + std::marker::Sync + std::marker::Send + 'static,
38{
39    let s = std::str::from_utf8(b)?;
40    let n = s.parse::<T>()?;
41    Ok(n)
42}
43/*
44#[macro_export]
45macro_rules! stoi {
46    ($b: expr, $typ: ty) => {{
47        let result: anyhow::Result<$typ> = match std::str::from_utf8($b) {
48            Ok(v) => match v.parse::<$typ>() {
49                Ok(v) => Ok(v),
50                Err(e) => Err(anyhow::Error::from(e)),
51            },
52            Err(e) => Err(anyhow::Error::from(e)),
53        };
54        result
55    }};
56}
57*/
58
59///
60/// Streaming Zstd compress
61pub fn zstd_compress<F>(input: &[u8], mut write: F) -> anyhow::Result<()>
62where
63    F: FnMut(&[u8]) -> anyhow::Result<()>,
64{
65    let mut ctx = zstd_safe::CCtx::create();
66    const BUFSIZE: usize = 16384;
67    let mut buf = [0u8; BUFSIZE];
68    let mut input_buffer = InBuffer {
69        src: &input,
70        pos: 0,
71    };
72    while input_buffer.pos < input.len() {
73        let mut output_buffer = OutBuffer::around(&mut buf);
74        match ctx.compress_stream(&mut output_buffer, &mut input_buffer) {
75            Ok(_) => {
76                let written = output_buffer.as_slice();
77                if written.len() > 0 {
78                    write(&written)?;
79                }
80            }
81            Err(e) => bail!("zstd_compress: {}", zstd_safe::get_error_name(e)),
82        }
83    }
84    loop {
85        let mut output_buffer = OutBuffer::around(&mut buf);
86        match ctx.end_stream(&mut output_buffer) {
87            Ok(code) => {
88                let chunk = output_buffer.as_slice();
89                if chunk.len() != 0 {
90                    write(&chunk)?;
91                }
92                if code == 0 {
93                    break;
94                }
95            }
96            Err(ec) => bail!("zstd_compress end: {}", zstd_safe::get_error_name(ec)),
97        }
98    }
99    Ok(())
100}
101
102/// Streaming Zstd decompress
103///
104/// The input is allowed to contain more data than Zstd will consume.
105/// Returns the actual number of bytes consumed from the input.
106///
107pub fn zstd_decompress<F>(input: &[u8], mut write: F) -> anyhow::Result<usize>
108where
109    F: FnMut(&[u8]) -> anyhow::Result<()>,
110{
111    let mut ctx = zstd_safe::DCtx::create();
112    const BUFSIZE: usize = 16384;
113    let mut buf = [0u8; BUFSIZE];
114
115    let mut input_buffer = InBuffer {
116        src: &input,
117        pos: 0,
118    };
119    loop {
120        let mut output_buffer = OutBuffer::around(&mut buf);
121        match ctx.decompress_stream(&mut output_buffer, &mut input_buffer) {
122            Ok(code) => {
123                let out = output_buffer.as_slice();
124                if out.len() != 0 {
125                    write(&out)?;
126                }
127                if code == 0 {
128                    break;
129                }
130            }
131            Err(ec) => bail!("zstd_compress: {}", zstd_safe::get_error_name(ec)),
132        };
133    }
134    Ok(input_buffer.pos())
135}
136
137/// serializeJsonStringIfNeeded
138pub fn serialize_json_string_if_needed<W>(input: &[u8], mut write: W) -> anyhow::Result<()>
139where
140    W: FnMut(&[u8]) -> anyhow::Result<()>,
141{
142    if input.len() == 0
143        || input
144            .iter()
145            .any(|&ch| ch <= 0x1f || ch >= 0x7f || ch == b' ' || ch == b'\"')
146    {
147        serialize_json_string(&input, write)
148    } else {
149        write(input)
150    }
151}
152
153pub fn serialize_json_string<W>(input: &[u8], mut write: W) -> anyhow::Result<()>
154where
155    W: FnMut(&[u8]) -> anyhow::Result<()>,
156{
157    write(b"\"")?;
158    for ch in input {
159        match *ch {
160            b'"' => write(b"\\\"")?,
161            b'\\' => write(b"\\\\")?,
162            0x08 => write(b"\\b")?,
163            0x0C => write(b"\\f")?,
164            b'\n' => write(b"\\n")?,
165            b'\r' => write(b"\\r")?,
166            b'\t' => write(b"\\t")?,
167            ch => {
168                if ch >= 32 && ch <= 126 {
169                    write(&[ch])?
170                } else {
171                    // \u00XX style escaping
172                    let bytes = &[b'\\', b'u', b'0', b'0', to_hex(ch >> 4), to_hex(ch & 0xf)];
173                    write(bytes)?
174                }
175            }
176        }
177    }
178    write(b"\"")?;
179    Ok(())
180}
181
182pub fn to_hex(index: u8) -> u8 {
183    const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
184    HEX_CHARS[index as usize]
185}
186
187pub fn from_hex(hex_digit: u8) -> anyhow::Result<u8> {
188    if hex_digit >= b'0' && hex_digit <= b'9' {
189        Ok(hex_digit - b'0')
190    } else if hex_digit >= b'a' && hex_digit <= b'f' {
191        Ok(10 + (hex_digit - b'a'))
192    } else if hex_digit >= b'A' && hex_digit <= b'F' {
193        Ok(10 + (hex_digit - b'A'))
194    } else {
195        bail!("Invalid hex digit: {}", hex_digit);
196    }
197}
198
199// deSerializeJsonStringIfNeeded
200// Returns number of bytes consumed by the "json" string, so that parsing can continue after.
201pub fn deserialize_json_string_if_needed(input: &[u8]) -> Result<(Vec<u8>, usize), anyhow::Error> {
202    if input.len() > 0 {
203        if input[0] == b'"' {
204            return deserialize_json_string(input);
205        }
206        // Just a normal string, consume up until whitespace or eof
207        let endpos = input
208            .iter()
209            .position(|&ch| ch == b' ' || ch == b'\n')
210            .unwrap_or(input.len());
211        Ok((input[..endpos].to_vec(), endpos))
212    } else {
213        Ok((Vec::new(), 0))
214    }
215}
216
217struct MiniReader<'a> {
218    input: &'a [u8],
219    pos: usize,
220}
221
222impl<'a> MiniReader<'a> {
223    pub fn new(input: &'a [u8], pos: usize) -> Self {
224        Self { input, pos }
225    }
226
227    pub fn remaining(&self) -> usize {
228        self.input.len() - self.pos
229    }
230
231    pub fn take(&mut self, count: usize) -> anyhow::Result<&'a [u8]> {
232        if self.pos + count > self.input.len() {
233            bail!("Minetest JSON string ended prematurely");
234        }
235        let result = &self.input[self.pos..self.pos + count];
236        self.pos += count;
237        Ok(result)
238    }
239
240    pub fn take1(&mut self) -> anyhow::Result<u8> {
241        self.take(1).map(|ch| ch[0])
242    }
243}
244
245pub fn deserialize_json_string(input: &[u8]) -> Result<(Vec<u8>, usize), anyhow::Error> {
246    let mut result: Vec<u8> = Vec::new();
247    assert!(input[0] == b'"');
248    let mut r = MiniReader::new(input, 1);
249    while r.remaining() > 0 {
250        let ch = r.take1()?;
251        if ch == b'"' {
252            return Ok((result, r.pos));
253        } else if ch == b'\\' {
254            let code = r.take1()?;
255            match code {
256                b'b' => result.push(0x08),
257                b'f' => result.push(0x0C),
258                b'n' => result.push(b'\n'),
259                b'r' => result.push(b'\r'),
260                b't' => result.push(b'\t'),
261                b'u' => {
262                    // "Unicode"
263                    let codepoint = r.take(4)?;
264                    if codepoint[0] != b'0' || codepoint[1] != b'0' {
265                        bail!("Unsupported unicode in Minetest JSON");
266                    }
267                    let hi = from_hex(codepoint[2])?;
268                    let lo = from_hex(codepoint[3])?;
269                    result.push((hi << 4) | lo);
270                }
271                ch => result.push(ch),
272            }
273        } else {
274            result.push(ch);
275        }
276    }
277    bail!("Minetest JSON string ended prematurely");
278}
279
280/// This is needed to handle the crazy inventory parsing.
281pub fn split_by_whitespace(line: &[u8]) -> Vec<&[u8]> {
282    line.split(|ch| *ch == b' ' || *ch == b'\n')
283        .filter(|v| v.len() > 0)
284        .collect()
285}
286
287pub fn skip_whitespace(line: &[u8]) -> &[u8] {
288    match line.iter().position(|ch| *ch != b' ' && *ch != b'\n') {
289        Some(pos) => &line[pos..],
290        None => &line[line.len()..],
291    }
292}
293
294/// Returns the next word (non-whitespace chunk) in u8 slice,
295/// and the remainder (which may still have whitespace)
296///
297/// Returns None when the remainder is empty or all whitespace.
298pub fn next_word(line: &[u8]) -> Option<(&[u8], &[u8])> {
299    let line = skip_whitespace(line);
300    match line.iter().position(|ch| *ch == b' ' || *ch == b'\n') {
301        Some(endpos) => Some((&line[..endpos], &line[endpos..])),
302        None => {
303            if line.len() == 0 {
304                None
305            } else {
306                Some((line, &line[line.len()..]))
307            }
308        }
309    }
310}
311
312pub fn compress_zlib(uncompressed: &[u8]) -> Vec<u8> {
313    miniz_oxide::deflate::compress_to_vec_zlib(uncompressed, 6)
314}
315
316/// This method must detect the end of the stream.
317/// 'uncompressed' may have more data past the end of the zlib stream
318/// Returns (bytes_consumed, uncompressed_data)
319pub fn decompress_zlib(input: &[u8]) -> Result<(usize, Vec<u8>)> {
320    let flags = inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER
321        | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
322    let mut ret: Vec<u8> = vec![0; input.len().saturating_mul(2)];
323
324    let mut decomp = Box::<DecompressorOxide>::default();
325
326    let mut in_pos = 0;
327    let mut out_pos = 0;
328    loop {
329        // Wrap the whole output slice so we know we have enough of the
330        // decompressed data for matches.
331        let (status, in_consumed, out_consumed) = miniz_oxide::inflate::core::decompress(
332            &mut decomp,
333            &input[in_pos..],
334            &mut ret,
335            out_pos,
336            flags,
337        );
338        in_pos += in_consumed;
339        out_pos += out_consumed;
340
341        match status {
342            miniz_oxide::inflate::TINFLStatus::Done => {
343                ret.truncate(out_pos);
344                return Ok((in_pos, ret));
345            }
346
347            miniz_oxide::inflate::TINFLStatus::HasMoreOutput => {
348                // if the buffer has already reached the size limit, return an error
349                // calculate the new length, capped at `max_output_size`
350                let new_len = ret.len().saturating_mul(2);
351                ret.resize(new_len, 0);
352            }
353
354            err => bail!(
355                "zlib decompression error: in_pos={}, out_pos={}, {:?}",
356                in_pos,
357                out_pos,
358                err
359            ),
360        }
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use std::ops::Range;
367
368    use super::*;
369    use rand::thread_rng;
370    use rand::Rng;
371    use rand::RngCore;
372    use rand::{self};
373
374    fn rand_bytes(range: Range<usize>) -> Vec<u8> {
375        let mut rng = thread_rng();
376        let length = rng.gen_range(range);
377        let mut input = vec![0u8; length];
378        rng.fill_bytes(input.as_mut_slice());
379        input
380    }
381
382    fn serialize_to_vec(input: &[u8]) -> Vec<u8> {
383        let mut out = Vec::new();
384        assert!(serialize_json_string_if_needed(&input, |chunk| {
385            out.extend(chunk);
386            Ok(())
387        })
388        .is_ok());
389        out
390    }
391
392    #[test]
393    fn json_serialize_deserialize_fuzz() {
394        for _ in 0..10000 {
395            let input = rand_bytes(0..100);
396            let serialized = serialize_to_vec(&input);
397            // At some junk on the end to make sure it doesn't take more than it should
398            let serialized_plus_junk =
399                [serialized.as_slice(), &[32], rand_bytes(0..20).as_slice()].concat();
400
401            let (result, consumed) =
402                deserialize_json_string_if_needed(&serialized_plus_junk).unwrap();
403            if input != result {
404                println!("input = {:?}", input);
405                println!("serialized = {:?}", serialized);
406                println!("serialized_plus_junk = {:?}", serialized_plus_junk);
407                println!("result = {:?}", result);
408                println!("consumed = {}", consumed);
409                assert!(false);
410            }
411            assert_eq!(input, result);
412            assert_eq!(consumed, serialized.len());
413        }
414    }
415
416    #[test]
417    fn itos_test() {
418        assert_eq!(itos!(123), &[49, 50, 51]);
419        assert_eq!(itos!(-100), &[45, 49, 48, 48]);
420        assert_eq!(itos!(0), &[48]);
421    }
422
423    #[test]
424    fn itos_stoi_fuzz() {
425        for i in -10000..10000 {
426            let s = itos!(i);
427            let v: i32 = stoi(s).expect("Should not have failed");
428            assert_eq!(v, i);
429        }
430    }
431}