nobility/bin_decode/mod.rs
1//! Decoder for the NBT binary format. This module is based around the
2//! idea that you won't store the objects, instead they will be walked
3//! through to build up some other data structure.
4//!
5//! As a result, almost all of the types here use borrows into the
6//! original data buffer, rather than copying into a Vec. The main
7//! exception is bookkeeping where necessary, such as when parsing
8//! Compound tags.
9//!
10//! # Example
11//!
12//! ```rust
13//! # use std::error::Error;
14//! #
15//! # fn main() -> Result<(), Box<dyn Error>> {
16//! use nobility::bin_decode::Document;
17//! use std::fs::File;
18//! use std::io::Read;
19//!
20//! let mut file = File::open("files/hello_world.nbt")?;
21//! let mut data = vec![];
22//! file.read_to_end(&mut data)?;
23//! let cursor = std::io::Cursor::new(data);
24//!
25//! // Either copies the data (plaintext) or decompresses it (gzip).
26//! let doc = Document::load(cursor)?;
27//!
28//! // Returns the root tag's name, and the root tag (always a Compound tag).
29//! // Both of these are borrowing the data inside the Document.
30//! let (name, root) = doc.parse()?;
31//! println!("name: {}", name.decode()?);
32//! println!("{:#?}", root);
33//! #
34//! # Ok(())
35//! # }
36//! ```
37
38use crate::TagType;
39use byteorder::{BigEndian, ByteOrder};
40use flate2::read::GzDecoder;
41use std::fmt;
42use std::io::Error as IoError;
43use std::io::Read;
44
45mod array;
46mod compound;
47mod internal;
48mod list;
49mod string;
50
51pub use array::{IntArray, LongArray, NbtArray, NbtArrayIter};
52pub use compound::{Compound, Entry};
53pub(crate) use internal::{NbtParse, Reader};
54pub use list::{
55 ByteArrayList, CompoundList, DoubleList, FloatList, IntArrayList, IntList, List, ListIter,
56 ListList, LongArrayList, LongList, NbtList, ShortList, StringList,
57};
58pub use string::NbtString;
59
60/// Failures which can occur while parsing an NBT document.
61#[derive(Debug)]
62#[non_exhaustive]
63pub enum ParseError {
64 /// End of file happens when the document is truncated, i.e. we were
65 /// expecting some data to follow after something, and then the file
66 /// ended instead. In particular, this can happen when:
67 ///
68 /// - Any primitive type is not followed by enough bytes to
69 /// construct the primitive type (TAG_Byte, TAG_Short, TAG_Int,
70 /// TAG_Long, TAG_Float, TAG_Double).
71 ///
72 /// - A TAG_Byte_Array, TAG_String, or TAG_Int_Array is not followed
73 /// by as many elements as it says it is.
74 ///
75 /// - A TAG_List does not have as many elements as it says it does, or
76 /// we get an EOF while attempting to parse an element.
77 ///
78 /// - A TAG_Compound does not have a TAG_End to terminate it, or we
79 /// get an EOF while attempting to parse a tag.
80 EOF,
81 /// This happens when there is an unknown tag type in the
82 /// stream. This can happen if Mojang adds new tag types, if a
83 /// document has third party tag types, if the file is corrupted, or
84 /// if there's a bug in the library.
85 UnknownTag { tag: u8, offset: usize },
86 /// This happens when we found a TAG_End where we shouldn't
87 /// have. TAG_End is only supposed to be found after having a
88 /// TAG_Compound, to terminate it. Places we can find this include
89 /// as the root tag of a document and inside of a List.
90 UnexpectedEndTag,
91 /// This library assumes that NBT documents always have a root
92 /// TAG_Compound, and if this invariant fails this error will be
93 /// generated.
94 IncorrectStartTag { tag: TagType },
95}
96
97impl fmt::Display for ParseError {
98 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
99 match self {
100 ParseError::EOF => write!(fmt, "Unexpected end of file"),
101 ParseError::UnknownTag { tag, offset } => {
102 write!(fmt, "Unknown tag {} at offset {:#x}", tag, offset)
103 }
104 ParseError::UnexpectedEndTag => write!(fmt, "Unexpected end tag in document"),
105 ParseError::IncorrectStartTag { tag } => {
106 write!(
107 fmt,
108 "Document starts with tag {:?}, it should only start with Compound.",
109 tag
110 )
111 }
112 }
113 }
114}
115
116impl std::error::Error for ParseError {}
117
118/// Representation for all values that a tag can be.
119#[derive(Clone, Debug, PartialEq)]
120#[non_exhaustive]
121pub enum Tag<'a> {
122 /// A small i8 integer.
123 Byte(i8),
124 /// An i16 integer.
125 Short(i16),
126 /// An i32 integer.
127 Int(i32),
128 /// An i64 integer.
129 Long(i64),
130 /// An f32 number.
131 Float(f32),
132 /// An f64 number.
133 Double(f64),
134 /// An array of raw bytes.
135 ByteArray(&'a [u8]),
136 /// A string containing CESU-8 encoded text.
137 String(NbtString<'a>),
138 /// An array of i32.
139 IntArray(IntArray<'a>),
140 /// An array of i64.
141 LongArray(LongArray<'a>),
142 /// An array which can only contain a single type. The type can be
143 /// any tag, including a nested list. When lists are nested, the
144 /// inner lists do not have to be the same type.
145 List(List<'a>),
146 /// A list of key/value pairs, creating a dictionary.
147 Compound(Compound<'a>),
148}
149
150impl<'a> Tag<'a> {
151 pub(crate) fn read(tag: TagType, reader: &mut Reader<'a>) -> Result<Tag<'a>, ParseError> {
152 match tag {
153 TagType::End => Err(ParseError::UnexpectedEndTag),
154 TagType::Byte => Ok(Tag::Byte(reader.advance(1)?[0] as i8)),
155 TagType::Short => Ok(Tag::Short(BigEndian::read_i16(reader.advance(2)?))),
156 TagType::Int => Ok(Tag::Int(BigEndian::read_i32(reader.advance(4)?))),
157 TagType::Long => Ok(Tag::Long(BigEndian::read_i64(reader.advance(8)?))),
158 TagType::Float => Ok(Tag::Float(BigEndian::read_f32(reader.advance(4)?))),
159 TagType::Double => Ok(Tag::Double(BigEndian::read_f64(reader.advance(8)?))),
160 TagType::String => NbtString::read(reader).map(Tag::String),
161 TagType::List => List::read(reader).map(Tag::List),
162 TagType::Compound => Compound::read(reader).map(Tag::Compound),
163 TagType::ByteArray => read_byte_array(reader).map(Tag::ByteArray),
164 TagType::IntArray => IntArray::read(reader).map(Tag::IntArray),
165 TagType::LongArray => LongArray::read(reader).map(Tag::LongArray),
166 }
167 }
168
169 /// Returns the type that represents this tag.
170 pub fn tag_type(&self) -> TagType {
171 match self {
172 Tag::Byte(_) => TagType::Byte,
173 Tag::Short(_) => TagType::Short,
174 Tag::Int(_) => TagType::Int,
175 Tag::Long(_) => TagType::Long,
176 Tag::Float(_) => TagType::Float,
177 Tag::Double(_) => TagType::Double,
178 Tag::ByteArray(_) => TagType::ByteArray,
179 Tag::String(_) => TagType::String,
180 Tag::List(_) => TagType::List,
181 Tag::Compound(_) => TagType::Compound,
182 Tag::IntArray(_) => TagType::IntArray,
183 Tag::LongArray(_) => TagType::LongArray,
184 }
185 }
186
187 /// If this tag is a string, returns it. Otherwise, returns None. No coercion is performed.
188 pub fn as_string(&self) -> Option<NbtString<'a>> {
189 if let Tag::String(value) = self {
190 Some(*value)
191 } else {
192 None
193 }
194 }
195
196 /// If this tag is a byte array, returns it. Otherwise, returns None.
197 pub fn as_byte_array(&self) -> Option<&[u8]> {
198 if let Tag::ByteArray(value) = self {
199 Some(value)
200 } else {
201 None
202 }
203 }
204
205 /// If this tag is a [Compound], returns it. Otherwise, returns None.
206 pub fn as_compound(&self) -> Option<&Compound<'a>> {
207 if let Tag::Compound(value) = self {
208 Some(value)
209 } else {
210 None
211 }
212 }
213
214 /// If this tag is a [List], returns it. Otherwise, returns None.
215 pub fn as_list(&self) -> Option<&List<'a>> {
216 if let Tag::List(value) = self {
217 Some(value)
218 } else {
219 None
220 }
221 }
222
223 /// Attempts to coerce the tag to an integer. Byte, Short, Int, and
224 /// Long will return a value, other tags will return None.
225 pub fn to_i64(&self) -> Option<i64> {
226 match *self {
227 Tag::Byte(value) => Some(value as i64),
228 Tag::Short(value) => Some(value as i64),
229 Tag::Int(value) => Some(value as i64),
230 Tag::Long(value) => Some(value),
231 _ => None,
232 }
233 }
234
235 /// Attempts to coerce the tag to a f64. Byte, Short, Int, Long,
236 /// Float, and Double will return a value, other tags will return
237 /// None.
238 pub fn to_f64(&self) -> Option<f64> {
239 match *self {
240 Tag::Byte(value) => Some(value as f64),
241 Tag::Short(value) => Some(value as f64),
242 Tag::Int(value) => Some(value as f64),
243 Tag::Long(value) => Some(value as f64),
244 Tag::Float(value) => Some(value as f64),
245 Tag::Double(value) => Some(value),
246 _ => None,
247 }
248 }
249
250 /// Attempts to coerce the tag to a f32. Byte, Short, Int, Long,
251 /// Float, and Double will return a value, other tags will return
252 /// None.
253 pub fn to_f32(&self) -> Option<f32> {
254 match *self {
255 Tag::Byte(value) => Some(value as f32),
256 Tag::Short(value) => Some(value as f32),
257 Tag::Int(value) => Some(value as f32),
258 Tag::Long(value) => Some(value as f32),
259 Tag::Float(value) => Some(value),
260 Tag::Double(value) => Some(value as f32),
261 _ => None,
262 }
263 }
264
265 /// If the tag is in the 1.16+ UUID format (IntArray of length 4),
266 /// returns it as big endian bytes. Otherwise, returns None.
267 pub fn to_uuid_bytes(&self) -> Option<[u8; 16]> {
268 if let Tag::IntArray(array) = self {
269 if array.len() == 4 {
270 let mut buf = [0; 16];
271 BigEndian::write_i32(&mut buf[0..4], array.get(0).unwrap());
272 BigEndian::write_i32(&mut buf[4..8], array.get(1).unwrap());
273 BigEndian::write_i32(&mut buf[8..12], array.get(2).unwrap());
274 BigEndian::write_i32(&mut buf[12..16], array.get(3).unwrap());
275 return Some(buf);
276 }
277 }
278 None
279 }
280
281 /// Similar to [Tag::to_uuid_bytes], but returns a [uuid::Uuid]. Requires the `uuid` feature.
282 #[cfg(feature = "uuid")]
283 pub fn to_uuid(&self) -> Option<uuid::Uuid> {
284 self.to_uuid_bytes().map(uuid::Uuid::from_bytes)
285 }
286}
287
288pub(crate) fn read_type(reader: &mut Reader<'_>) -> Result<TagType, ParseError> {
289 let offset = reader.position;
290 match reader.advance(1)?[0] {
291 0 => Ok(TagType::End),
292 1 => Ok(TagType::Byte),
293 2 => Ok(TagType::Short),
294 3 => Ok(TagType::Int),
295 4 => Ok(TagType::Long),
296 5 => Ok(TagType::Float),
297 6 => Ok(TagType::Double),
298 7 => Ok(TagType::ByteArray),
299 8 => Ok(TagType::String),
300 9 => Ok(TagType::List),
301 10 => Ok(TagType::Compound),
302 11 => Ok(TagType::IntArray),
303 12 => Ok(TagType::LongArray),
304 tag => Err(ParseError::UnknownTag { tag, offset }),
305 }
306}
307
308fn read_byte_array<'a>(reader: &mut Reader<'a>) -> Result<&'a [u8], ParseError> {
309 let len = BigEndian::read_u32(reader.advance(4)?);
310 Ok(reader.advance(len as usize)?)
311}
312
313/// Represents an NBT document and is the owner of the data contained in
314/// it. All other decoder types are borrows of the data stored in this.
315///
316/// # Example
317///
318/// ```rust
319/// # use std::error::Error;
320/// #
321/// # fn main() -> Result<(), Box<dyn Error>> {
322/// use nobility::bin_decode::Document;
323/// # let input = Document::doctest_demo();
324///
325/// // Either copies the data (plaintext) or decompresses it (gzip). Accepts
326/// // any implementation of Read.
327/// let doc = Document::load(input)?;
328///
329/// // Returns the root tag's name, and the root tag (always a Compound tag).
330/// // Both of these are borrowing the data inside the Document.
331/// let (name, root) = doc.parse()?;
332/// # let _ = (name, root);
333/// # Ok(())
334/// # }
335/// ```
336
337#[derive(Clone, PartialEq)]
338pub struct Document {
339 data: Vec<u8>,
340}
341
342impl Document {
343 #[doc(hidden)]
344 pub fn doctest_demo() -> impl Read + Clone {
345 use std::fs::File;
346
347 let mut file = File::open("files/hello_world.nbt").expect("File should exist");
348 let mut data = vec![];
349 file.read_to_end(&mut data).unwrap();
350 std::io::Cursor::new(data)
351 }
352
353 /// Loads a document from any source implementing Read. Sources that
354 /// are compressed with gzip will be automatically decompressed,
355 /// otherwise the data will just be copied.
356 ///
357 /// # Errors
358 ///
359 /// Errors from this function are either from the input [Read]
360 /// object or from [GzDecoder].
361 pub fn load<R: Read + Clone>(mut input: R) -> Result<Document, IoError> {
362 let mut decoder = GzDecoder::new(input.clone());
363 let mut data = vec![];
364 if decoder.header().is_some() {
365 // Valid gzip stream
366 decoder.read_to_end(&mut data)?;
367 } else {
368 // Not a gzip stream
369 input.read_to_end(&mut data)?;
370 }
371 Ok(Document { data })
372 }
373
374 /// Parses the document and returns the name and contents of the
375 /// root tag.
376 ///
377 /// # Errors
378 ///
379 /// The only cases that this should return an error are:
380 ///
381 /// 1. The input is not an NBT document. This will likely generate
382 /// [ParseError::IncorrectStartTag].
383 /// 2. The input is an NBT document, but is malformed/corrupted, or
384 /// in the Bedrock edition version of the format.
385 /// 2. The document is compressed using something other than
386 /// gzip. This will likely generate
387 /// [ParseError::IncorrectStartTag].
388 /// 3. The specification has changed due to a new Minecraft version.
389 /// This will likely generate [ParseError::UnknownTag].
390 /// 4. There's a bug in the parser.
391 pub fn parse(&self) -> Result<(NbtString, Compound), ParseError> {
392 let mut reader = Reader::new(&self.data);
393 let tag = read_type(&mut reader)?;
394 if tag != TagType::Compound {
395 return Err(ParseError::IncorrectStartTag { tag });
396 }
397 let name = NbtString::read(&mut reader)?;
398 let root = Compound::read(&mut reader)?;
399 Ok((name, root))
400 }
401}
402
403impl fmt::Debug for Document {
404 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
405 write!(fmt, "Document({} B buffer)", self.data.len() / 1000)
406 }
407}