Skip to main content

nobility/bin_decode/
mod.rs

1//! Decoder for the NBT binary format. This module is based around the
2//! idea that you won't store the objects, instead they will be walked
3//! through to build up some other data structure.
4//!
5//! As a result, almost all of the types here use borrows into the
6//! original data buffer, rather than copying into a Vec. The main
7//! exception is bookkeeping where necessary, such as when parsing
8//! Compound tags.
9//!
10//! # Example
11//!
12//! ```rust
13//! # use std::error::Error;
14//! #
15//! # fn main() -> Result<(), Box<dyn Error>> {
16//! use nobility::bin_decode::Document;
17//! use std::fs::File;
18//! use std::io::Read;
19//!
20//! let mut file = File::open("files/hello_world.nbt")?;
21//! let mut data = vec![];
22//! file.read_to_end(&mut data)?;
23//! let cursor = std::io::Cursor::new(data);
24//!
25//! // Either copies the data (plaintext) or decompresses it (gzip).
26//! let doc = Document::load(cursor)?;
27//!
28//! // Returns the root tag's name, and the root tag (always a Compound tag).
29//! // Both of these are borrowing the data inside the Document.
30//! let (name, root) = doc.parse()?;
31//! println!("name: {}", name.decode()?);
32//! println!("{:#?}", root);
33//! #
34//! # Ok(())
35//! # }
36//! ```
37
38use crate::TagType;
39use byteorder::{BigEndian, ByteOrder};
40use flate2::read::GzDecoder;
41use std::fmt;
42use std::io::Error as IoError;
43use std::io::Read;
44
45mod array;
46mod compound;
47mod internal;
48mod list;
49mod string;
50
51pub use array::{IntArray, LongArray, NbtArray, NbtArrayIter};
52pub use compound::{Compound, Entry};
53pub(crate) use internal::{NbtParse, Reader};
54pub use list::{
55    ByteArrayList, CompoundList, DoubleList, FloatList, IntArrayList, IntList, List, ListIter,
56    ListList, LongArrayList, LongList, NbtList, ShortList, StringList,
57};
58pub use string::NbtString;
59
60/// Failures which can occur while parsing an NBT document.
61#[derive(Debug)]
62#[non_exhaustive]
63pub enum ParseError {
64    /// End of file happens when the document is truncated, i.e. we were
65    /// expecting some data to follow after something, and then the file
66    /// ended instead. In particular, this can happen when:
67    ///
68    /// - Any primitive type is not followed by enough bytes to
69    /// construct the primitive type (TAG_Byte, TAG_Short, TAG_Int,
70    /// TAG_Long, TAG_Float, TAG_Double).
71    ///
72    /// - A TAG_Byte_Array, TAG_String, or TAG_Int_Array is not followed
73    /// by as many elements as it says it is.
74    ///
75    /// - A TAG_List does not have as many elements as it says it does, or
76    /// we get an EOF while attempting to parse an element.
77    ///
78    /// - A TAG_Compound does not have a TAG_End to terminate it, or we
79    /// get an EOF while attempting to parse a tag.
80    EOF,
81    /// This happens when there is an unknown tag type in the
82    /// stream. This can happen if Mojang adds new tag types, if a
83    /// document has third party tag types, if the file is corrupted, or
84    /// if there's a bug in the library.
85    UnknownTag { tag: u8, offset: usize },
86    /// This happens when we found a TAG_End where we shouldn't
87    /// have. TAG_End is only supposed to be found after having a
88    /// TAG_Compound, to terminate it. Places we can find this include
89    /// as the root tag of a document and inside of a List.
90    UnexpectedEndTag,
91    /// This library assumes that NBT documents always have a root
92    /// TAG_Compound, and if this invariant fails this error will be
93    /// generated.
94    IncorrectStartTag { tag: TagType },
95}
96
97impl fmt::Display for ParseError {
98    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
99        match self {
100            ParseError::EOF => write!(fmt, "Unexpected end of file"),
101            ParseError::UnknownTag { tag, offset } => {
102                write!(fmt, "Unknown tag {} at offset {:#x}", tag, offset)
103            }
104            ParseError::UnexpectedEndTag => write!(fmt, "Unexpected end tag in document"),
105            ParseError::IncorrectStartTag { tag } => {
106                write!(
107                    fmt,
108                    "Document starts with tag {:?}, it should only start with Compound.",
109                    tag
110                )
111            }
112        }
113    }
114}
115
116impl std::error::Error for ParseError {}
117
118/// Representation for all values that a tag can be.
119#[derive(Clone, Debug, PartialEq)]
120#[non_exhaustive]
121pub enum Tag<'a> {
122    /// A small i8 integer.
123    Byte(i8),
124    /// An i16 integer.
125    Short(i16),
126    /// An i32 integer.
127    Int(i32),
128    /// An i64 integer.
129    Long(i64),
130    /// An f32 number.
131    Float(f32),
132    /// An f64 number.
133    Double(f64),
134    /// An array of raw bytes.
135    ByteArray(&'a [u8]),
136    /// A string containing CESU-8 encoded text.
137    String(NbtString<'a>),
138    /// An array of i32.
139    IntArray(IntArray<'a>),
140    /// An array of i64.
141    LongArray(LongArray<'a>),
142    /// An array which can only contain a single type. The type can be
143    /// any tag, including a nested list. When lists are nested, the
144    /// inner lists do not have to be the same type.
145    List(List<'a>),
146    /// A list of key/value pairs, creating a dictionary.
147    Compound(Compound<'a>),
148}
149
150impl<'a> Tag<'a> {
151    pub(crate) fn read(tag: TagType, reader: &mut Reader<'a>) -> Result<Tag<'a>, ParseError> {
152        match tag {
153            TagType::End => Err(ParseError::UnexpectedEndTag),
154            TagType::Byte => Ok(Tag::Byte(reader.advance(1)?[0] as i8)),
155            TagType::Short => Ok(Tag::Short(BigEndian::read_i16(reader.advance(2)?))),
156            TagType::Int => Ok(Tag::Int(BigEndian::read_i32(reader.advance(4)?))),
157            TagType::Long => Ok(Tag::Long(BigEndian::read_i64(reader.advance(8)?))),
158            TagType::Float => Ok(Tag::Float(BigEndian::read_f32(reader.advance(4)?))),
159            TagType::Double => Ok(Tag::Double(BigEndian::read_f64(reader.advance(8)?))),
160            TagType::String => NbtString::read(reader).map(Tag::String),
161            TagType::List => List::read(reader).map(Tag::List),
162            TagType::Compound => Compound::read(reader).map(Tag::Compound),
163            TagType::ByteArray => read_byte_array(reader).map(Tag::ByteArray),
164            TagType::IntArray => IntArray::read(reader).map(Tag::IntArray),
165            TagType::LongArray => LongArray::read(reader).map(Tag::LongArray),
166        }
167    }
168
169    /// Returns the type that represents this tag.
170    pub fn tag_type(&self) -> TagType {
171        match self {
172            Tag::Byte(_) => TagType::Byte,
173            Tag::Short(_) => TagType::Short,
174            Tag::Int(_) => TagType::Int,
175            Tag::Long(_) => TagType::Long,
176            Tag::Float(_) => TagType::Float,
177            Tag::Double(_) => TagType::Double,
178            Tag::ByteArray(_) => TagType::ByteArray,
179            Tag::String(_) => TagType::String,
180            Tag::List(_) => TagType::List,
181            Tag::Compound(_) => TagType::Compound,
182            Tag::IntArray(_) => TagType::IntArray,
183            Tag::LongArray(_) => TagType::LongArray,
184        }
185    }
186
187    /// If this tag is a string, returns it. Otherwise, returns None. No coercion is performed.
188    pub fn as_string(&self) -> Option<NbtString<'a>> {
189        if let Tag::String(value) = self {
190            Some(*value)
191        } else {
192            None
193        }
194    }
195
196    /// If this tag is a byte array, returns it. Otherwise, returns None.
197    pub fn as_byte_array(&self) -> Option<&[u8]> {
198        if let Tag::ByteArray(value) = self {
199            Some(value)
200        } else {
201            None
202        }
203    }
204
205    /// If this tag is a [Compound], returns it. Otherwise, returns None.
206    pub fn as_compound(&self) -> Option<&Compound<'a>> {
207        if let Tag::Compound(value) = self {
208            Some(value)
209        } else {
210            None
211        }
212    }
213
214    /// If this tag is a [List], returns it. Otherwise, returns None.
215    pub fn as_list(&self) -> Option<&List<'a>> {
216        if let Tag::List(value) = self {
217            Some(value)
218        } else {
219            None
220        }
221    }
222
223    /// Attempts to coerce the tag to an integer. Byte, Short, Int, and
224    /// Long will return a value, other tags will return None.
225    pub fn to_i64(&self) -> Option<i64> {
226        match *self {
227            Tag::Byte(value) => Some(value as i64),
228            Tag::Short(value) => Some(value as i64),
229            Tag::Int(value) => Some(value as i64),
230            Tag::Long(value) => Some(value),
231            _ => None,
232        }
233    }
234
235    /// Attempts to coerce the tag to a f64. Byte, Short, Int, Long,
236    /// Float, and Double will return a value, other tags will return
237    /// None.
238    pub fn to_f64(&self) -> Option<f64> {
239        match *self {
240            Tag::Byte(value) => Some(value as f64),
241            Tag::Short(value) => Some(value as f64),
242            Tag::Int(value) => Some(value as f64),
243            Tag::Long(value) => Some(value as f64),
244            Tag::Float(value) => Some(value as f64),
245            Tag::Double(value) => Some(value),
246            _ => None,
247        }
248    }
249
250    /// Attempts to coerce the tag to a f32. Byte, Short, Int, Long,
251    /// Float, and Double will return a value, other tags will return
252    /// None.
253    pub fn to_f32(&self) -> Option<f32> {
254        match *self {
255            Tag::Byte(value) => Some(value as f32),
256            Tag::Short(value) => Some(value as f32),
257            Tag::Int(value) => Some(value as f32),
258            Tag::Long(value) => Some(value as f32),
259            Tag::Float(value) => Some(value),
260            Tag::Double(value) => Some(value as f32),
261            _ => None,
262        }
263    }
264
265    /// If the tag is in the 1.16+ UUID format (IntArray of length 4),
266    /// returns it as big endian bytes. Otherwise, returns None.
267    pub fn to_uuid_bytes(&self) -> Option<[u8; 16]> {
268        if let Tag::IntArray(array) = self {
269            if array.len() == 4 {
270                let mut buf = [0; 16];
271                BigEndian::write_i32(&mut buf[0..4], array.get(0).unwrap());
272                BigEndian::write_i32(&mut buf[4..8], array.get(1).unwrap());
273                BigEndian::write_i32(&mut buf[8..12], array.get(2).unwrap());
274                BigEndian::write_i32(&mut buf[12..16], array.get(3).unwrap());
275                return Some(buf);
276            }
277        }
278        None
279    }
280
281    /// Similar to [Tag::to_uuid_bytes], but returns a [uuid::Uuid]. Requires the `uuid` feature.
282    #[cfg(feature = "uuid")]
283    pub fn to_uuid(&self) -> Option<uuid::Uuid> {
284        self.to_uuid_bytes().map(uuid::Uuid::from_bytes)
285    }
286}
287
288pub(crate) fn read_type(reader: &mut Reader<'_>) -> Result<TagType, ParseError> {
289    let offset = reader.position;
290    match reader.advance(1)?[0] {
291        0 => Ok(TagType::End),
292        1 => Ok(TagType::Byte),
293        2 => Ok(TagType::Short),
294        3 => Ok(TagType::Int),
295        4 => Ok(TagType::Long),
296        5 => Ok(TagType::Float),
297        6 => Ok(TagType::Double),
298        7 => Ok(TagType::ByteArray),
299        8 => Ok(TagType::String),
300        9 => Ok(TagType::List),
301        10 => Ok(TagType::Compound),
302        11 => Ok(TagType::IntArray),
303        12 => Ok(TagType::LongArray),
304        tag => Err(ParseError::UnknownTag { tag, offset }),
305    }
306}
307
308fn read_byte_array<'a>(reader: &mut Reader<'a>) -> Result<&'a [u8], ParseError> {
309    let len = BigEndian::read_u32(reader.advance(4)?);
310    Ok(reader.advance(len as usize)?)
311}
312
313/// Represents an NBT document and is the owner of the data contained in
314/// it. All other decoder types are borrows of the data stored in this.
315///
316/// # Example
317///
318/// ```rust
319/// # use std::error::Error;
320/// #
321/// # fn main() -> Result<(), Box<dyn Error>> {
322/// use nobility::bin_decode::Document;
323/// # let input = Document::doctest_demo();
324///
325/// // Either copies the data (plaintext) or decompresses it (gzip). Accepts
326/// // any implementation of Read.
327/// let doc = Document::load(input)?;
328///
329/// // Returns the root tag's name, and the root tag (always a Compound tag).
330/// // Both of these are borrowing the data inside the Document.
331/// let (name, root) = doc.parse()?;
332/// # let _ = (name, root);
333/// # Ok(())
334/// # }
335/// ```
336
337#[derive(Clone, PartialEq)]
338pub struct Document {
339    data: Vec<u8>,
340}
341
342impl Document {
343    #[doc(hidden)]
344    pub fn doctest_demo() -> impl Read + Clone {
345        use std::fs::File;
346
347        let mut file = File::open("files/hello_world.nbt").expect("File should exist");
348        let mut data = vec![];
349        file.read_to_end(&mut data).unwrap();
350        std::io::Cursor::new(data)
351    }
352
353    /// Loads a document from any source implementing Read. Sources that
354    /// are compressed with gzip will be automatically decompressed,
355    /// otherwise the data will just be copied.
356    ///
357    /// # Errors
358    ///
359    /// Errors from this function are either from the input [Read]
360    /// object or from [GzDecoder].
361    pub fn load<R: Read + Clone>(mut input: R) -> Result<Document, IoError> {
362        let mut decoder = GzDecoder::new(input.clone());
363        let mut data = vec![];
364        if decoder.header().is_some() {
365            // Valid gzip stream
366            decoder.read_to_end(&mut data)?;
367        } else {
368            // Not a gzip stream
369            input.read_to_end(&mut data)?;
370        }
371        Ok(Document { data })
372    }
373
374    /// Parses the document and returns the name and contents of the
375    /// root tag.
376    ///
377    /// # Errors
378    ///
379    /// The only cases that this should return an error are:
380    ///
381    /// 1. The input is not an NBT document. This will likely generate
382    ///    [ParseError::IncorrectStartTag].
383    /// 2. The input is an NBT document, but is malformed/corrupted, or
384    ///    in the Bedrock edition version of the format.
385    /// 2. The document is compressed using something other than
386    ///    gzip. This will likely generate
387    ///    [ParseError::IncorrectStartTag].
388    /// 3. The specification has changed due to a new Minecraft version.
389    ///    This will likely generate [ParseError::UnknownTag].
390    /// 4. There's a bug in the parser.
391    pub fn parse(&self) -> Result<(NbtString, Compound), ParseError> {
392        let mut reader = Reader::new(&self.data);
393        let tag = read_type(&mut reader)?;
394        if tag != TagType::Compound {
395            return Err(ParseError::IncorrectStartTag { tag });
396        }
397        let name = NbtString::read(&mut reader)?;
398        let root = Compound::read(&mut reader)?;
399        Ok((name, root))
400    }
401}
402
403impl fmt::Debug for Document {
404    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
405        write!(fmt, "Document({} B buffer)", self.data.len() / 1000)
406    }
407}