mca_parser/
lib.rs

1#![warn(missing_docs)]
2//! # mca-parser
3//!
4//! A library for parsing Minecraft's [Region files](https://minecraft.wiki/w/Region_file_format)
5//!
6//! ## Usage
7//!
8//! This library should be pretty simple to use,
9//!
10//! ```no_run
11//! # use mca_parser::*;
12//! # use std::fs::File;
13//! // Create a Region from an open file
14//! let mut file = File::open("r.0.0.mca")?;
15//! let region = Region::from_reader(&mut file)?;
16//!
17//! // `chunk` is raw chunk data, so we need to parse it
18//! let chunk = region.get_chunk(0, 0)?;
19//! if let Some(chunk) = chunk {
20//!     // Parse the raw chunk data into structured NBT format
21//!     let parsed = chunk.parse()?;
22//!     println!("{:?}", parsed.status);
23//! } else {
24//!     // If the chunk is None, it has not been generated
25//!     println!("Chunk has not been generated.");
26//! }
27//! # Ok::<(), mca_parser::error::Error>(())
28//! ```
29
30// TODO: Figure out a nice way to encode the types of coordinates into the type system:
31//     - i.e. relative vs absolute and region vs chunk vs block
32//     - probably use a trait of some kind so that we can convert between them easily (maybe
33//     just `Into`)
34//
35// TODO: Add parsing for singplayer worlds:
36//     - Open the world folder, look for the `regions` dir which will be the overworld, look for
37//     any dirs that start with `DIR`, those will be the other dimensions
38//     - Put it into a struct that allows some handy methods
39//
40// TODO: Add parsing for server worlds:
41//     - Open the server folder
42//     - Open the user-specified worlds (we can't assume since world folder names can be customised
43//     on a server)
44//     - The user will need to specify quite a bit more for the server worlds since they can change
45//     more (dimension id, world folder, etc)
46
47use std::{
48    collections::HashMap,
49    io::{self, Read},
50    ops::Deref,
51    path::{Path, PathBuf},
52};
53
54use bigendian::BigEndian;
55use error::Error;
56
57pub use data::*;
58pub use error::Result;
59
60mod bigendian;
61pub mod data;
62pub mod error;
63pub mod nbt;
64#[macro_use]
65mod util;
66
67#[cfg(test)]
68mod test;
69
70/// Represents a region file, with methods to access data within it.
71///
72/// <https://minecraft.wiki/w/Region_file_format>
73#[derive(Debug, Eq, PartialEq)]
74#[repr(C)]
75pub struct Region {
76    locations: [Location; 1024],
77    timestamps: [BigEndian<4>; 1024],
78    data: [u8],
79}
80
81impl Region {
82    /// Parse this slice into a Region.  This does no input validation except confirm that the 8KiB
83    /// header is there, further validation is done in [`Region::get_chunk`] and [`Chunk::parse`]
84    /// to help prevent unnecessary memory allocation.
85    ///
86    /// Note: Changing the data in the slice after calling this method will change the [`Region`]
87    /// returned by this method, so it is advised against
88    pub fn from_slice(slice: &[u8]) -> Result<&Region> {
89        if slice.len() < 8192 {
90            Err(Error::MissingHeader)
91        } else {
92            // SAFETY: `Region` is (1024 * 4 * 2 = 8192) bytes + some extra data.  We have confirmed that we have
93            // the 8192 byte header, so this pointer deref is okay.
94            let ptr = &slice[..slice.len() - 8192] as *const [u8] as *const Region;
95            Ok(unsafe { &*ptr })
96        }
97    }
98
99    /// Create a Region from an array if the size is known at compile time.
100    ///
101    /// # Safety
102    /// - `N` >= 8192
103    /// - Array _should_ contain valid bytes for a region file, though if it doesn't, that issue
104    /// will be caught in [`Region::get_chunk`] and [`Chunk::parse`]
105    ///
106    /// # Usage
107    ///
108    /// The intended usage of this method is as a const value:
109    ///
110    /// ```
111    /// # use mca_parser::Region;
112    /// const REGION: &Region = unsafe { Region::from_array(include_bytes!("../test/r.0.0.mca")) };
113    /// ```
114    ///
115    /// This method will panic if `N` < 8192, thus failing to compile when used as a const value:
116    ///
117    /// ```compile_fail
118    /// const REGION: &Region = unsafe { Region::from_array(&[0; 16]) };
119    /// ```
120    pub const unsafe fn from_array<const N: usize>(arr: &[u8; N]) -> &'static Region {
121        assert!(N >= 8192);
122        &*(std::ptr::slice_from_raw_parts(arr as *const u8, N - 8192) as *const Region)
123    }
124
125    /// A method for ease of use, effectively does the same thing as calling [`Read::read_to_end`]
126    /// and then passing that to [`Region::from_slice`], with the only difference being that it
127    /// returns an owned box rather than a reference.
128    ///
129    /// # Usage
130    ///
131    /// ```
132    /// # use mca_parser::*;
133    /// # use std::fs::File;
134    /// let mut file = File::open("./test/r.0.0.mca")?;
135    /// let region = Region::from_reader(&mut file)?;
136    /// # Ok::<_, error::Error>(())
137    /// ```
138    pub fn from_reader<R>(r: &mut R) -> Result<Box<Region>>
139    where
140        R: Read,
141    {
142        use std::mem::ManuallyDrop;
143
144        let mut vec = ManuallyDrop::new(Vec::new());
145        r.read_to_end(&mut vec)?;
146
147        if vec.len() < 8192 {
148            Err(Error::MissingHeader)
149        } else {
150            // SAFETY: `Region` is (1024 * 4 * 2 = 8192) bytes + some extra data.
151            let slice =
152                unsafe { std::slice::from_raw_parts_mut(vec.as_mut_ptr(), vec.len() - 8192) };
153            // SAFETY: We know that the vec is allocated on the heap, so we can form a box from it
154            Ok(unsafe { Box::from_raw(slice as *mut [u8] as *mut Region) })
155        }
156    }
157
158    /// Convert x and z into the correct index into the `locations` and `timestamps` arrays
159    ///
160    /// # Panics
161    ///
162    /// - If `x` and `z` are not within `0..=31`
163    // This is a simple calculation, and I'm sure the compiler would inline it, but just to make sure
164    #[inline(always)]
165    const fn chunk_index(x: u32, z: u32) -> usize {
166        assert!(x < 32);
167        assert!(z < 32);
168
169        z as usize * 32 + x as usize
170    }
171
172    /// Validate that this Region contains all valid chunks by trying to parse every chunk.
173    ///
174    /// # Important Note
175    ///
176    /// - This method is obviously slow and uses a decent amount of memory.  It is
177    /// recommended to assume the data is correct and validate it as you use the
178    /// [`Region::get_chunk`] and [`Chunk::parse`] methods.
179    /// - This method should only be used when you absolutely _need_ to validate the data is
180    /// correct and can't use the [`Region::get_chunk`] and [`Chunk::parse`] methods
181    pub fn validate(&self) -> Result<()> {
182        for x in 0..32 {
183            for z in 0..32 {
184                if let Some(chunk) = self.get_chunk(x, z)? {
185                    chunk.parse()?;
186                }
187            }
188        }
189        Ok(())
190    }
191
192    /// Get a timestamp for a chunk in this [`Region`]
193    ///
194    /// # Panics
195    ///
196    /// - If `x` and `z` are not within `0..=31`
197    pub const fn get_timestamp(&self, x: u32, z: u32) -> u32 {
198        self.timestamps[Self::chunk_index(x, z)].as_u32()
199    }
200
201    /// Check if the chunk at `x` and `z` have been generated
202    ///
203    /// # Panics
204    ///
205    /// - If `x` and `z` are not within `0..=31`
206    pub const fn has_chunk(&self, x: u32, z: u32) -> bool {
207        !self.locations[Self::chunk_index(x, z)].is_empty()
208    }
209
210    /// Get a chunk from this [`Region`] using relative coordinates within the region
211    ///
212    /// # Return Values
213    ///
214    /// - `Err` if data is invalid
215    /// - `Ok(None)` if the data is valid, but there is no chunk generated
216    /// - `Ok(Some(&Chunk))` if the data is valid and the chunk exists
217    ///
218    /// This will return a `&Chunk` which references this `Region`, if you want an owned
219    /// version, call [`Chunk::boxed`] on the returned chunk.
220    ///
221    /// # Panics
222    ///
223    /// - If `x` and `z` are not within `0..=31`
224    pub fn get_chunk(&self, chunk_x: u32, chunk_z: u32) -> Result<Option<&Chunk>> {
225        let loc = &self.locations[Self::chunk_index(chunk_x, chunk_z)];
226        let offset: u32 = loc.offset.into();
227
228        if loc.is_empty() {
229            return Ok(None);
230        }
231
232        // Subtract 2 from the offset to account for the 2 * 4096 bytes that we took from the
233        // beginning for the location and timestamps
234        let start = (offset - 2) as usize * 4096;
235
236        if self.data.len() < start + 4 {
237            return Err(Error::UnexpectedEof);
238        }
239
240        // SAFETY: We know that we have these bytes because it's checked above and according to the
241        // minecraft wiki, these bytes are the length and since we specifically grab `4`
242        // bytes, we know that `BigEndian<4>` is valid.
243        let len = u32::from(unsafe { *(self.data[start..][..4].as_ptr() as *const BigEndian<4>) })
244            as usize;
245
246        if self.data.len() < start + 4 + len {
247            return Err(Error::UnexpectedEof);
248        }
249
250        // SAFETY: We have checked that we have `len` bytes after the starting point of `start +
251        // 4`, so we can trivially convert that to a Chunk
252        let chunk = unsafe {
253            &*(core::ptr::slice_from_raw_parts(self.data[start + 4..].as_ptr(), len - 1)
254                as *const Chunk)
255        };
256
257        Ok(Some(chunk))
258    }
259
260    /// Get a chunk from this [`Region`] using relative block coordinates within the region
261    ///
262    /// # Return Values
263    ///
264    /// - `Err` if data is invalid
265    /// - `Ok(None)` if the data is valid, but there is no chunk generated
266    /// - `Ok(Some(&Chunk))` if the data is valid and the chunk exists
267    ///
268    /// This will return a `&Chunk` which references this `Region`, if you want an owned
269    /// version, call [`Chunk::boxed`] on the returned chunk.
270    ///
271    /// # Panics
272    ///
273    /// - If `x` and `z` are not within `0..=511`
274    pub fn get_chunk_from_block(&self, block_x: u32, block_z: u32) -> Result<Option<&Chunk>> {
275        self.get_chunk(block_x / 16, block_z / 16)
276    }
277}
278
279/// Represents a file which holds a Region
280#[derive(Debug, Clone)]
281pub struct RegionFile {
282    /// The path to this region file on disk
283    pub path: PathBuf,
284}
285
286impl RegionFile {
287    /// Create a [`RegionFile`] from a path to a file
288    pub fn new<P>(path: P) -> Self
289    where
290        P: AsRef<Path>,
291    {
292        Self {
293            path: path.as_ref().to_path_buf(),
294        }
295    }
296}
297
298/// Create an iterator over the contents of a directory, allowing each region within to be parsed
299pub fn parse_directory<P>(path: P) -> io::Result<impl Iterator<Item = RegionFile>>
300where
301    P: AsRef<Path>,
302{
303    let path = path.as_ref();
304    assert!(path.is_dir());
305
306    let rd = std::fs::read_dir(path)?;
307
308    let iter = rd.filter_map(|de| {
309        let de = de.ok()?;
310
311        let path = de.path();
312        if !path.is_file() {
313            return None;
314        }
315
316        Some(RegionFile::new(path))
317    });
318
319    Ok(iter)
320}
321
322/// An enum which represents Minecraft's IDs for a dimension
323#[derive(Debug, Clone, Copy, Eq, PartialEq)]
324pub enum DimensionID {
325    /// ID: `0`
326    Overworld,
327    /// ID: `-1`
328    Nether,
329    /// ID: `1`
330    End,
331    /// A custom DimensionID
332    Custom(i32),
333}
334
335impl DimensionID {
336    /// Get the id of this dimension as a number
337    pub fn id(&self) -> i32 {
338        match self {
339            Self::Overworld => 0,
340            Self::Nether => -1,
341            Self::End => 1,
342            Self::Custom(n) => *n,
343        }
344    }
345}
346
347impl From<i32> for DimensionID {
348    fn from(value: i32) -> Self {
349        match value {
350            0 => Self::Overworld,
351            -1 => Self::Nether,
352            1 => Self::End,
353            n => Self::Custom(n),
354        }
355    }
356}
357
358/// A wrapper around [`Region`] that allows either a reference to be used or a Box
359/// over return types.
360///
361/// [`Deref`] is implemented for this enum, so in theory, there should never be a need to match on
362/// this.
363///
364/// This is primarily used in the [`RegionParser`] trait, so that the implementers can return
365/// either a reference to or a box of a [`Region`].
366#[derive(Debug)]
367pub enum RegionRef<'a> {
368    /// Borrowed Region (via reference)
369    Borrowed(&'a Region),
370    /// Owned Region (via box)
371    Owned(Box<Region>),
372}
373
374impl<'a> From<&'a Region> for RegionRef<'a> {
375    fn from(value: &'a Region) -> Self {
376        Self::Borrowed(value)
377    }
378}
379
380impl From<Box<Region>> for RegionRef<'_> {
381    fn from(value: Box<Region>) -> Self {
382        Self::Owned(value)
383    }
384}
385
386impl Deref for RegionRef<'_> {
387    type Target = Region;
388
389    fn deref(&self) -> &Self::Target {
390        match self {
391            RegionRef::Borrowed(r) => r,
392            RegionRef::Owned(r) => r,
393        }
394    }
395}
396
397/// A trait which represents something that can be parsed into a region and optionally contains
398/// information about which region in the world it is.
399pub trait RegionParser {
400    /// Parse this into a [`Region`] and return it through [`RegionRef`] so that we can have either
401    /// owned or or as a reference
402    fn parse(&self) -> Result<RegionRef<'_>>;
403
404    /// Get the position in the world (using
405    /// [region coordinates](https://minecraft.wiki/w/Region_file_format#Location)) of the region that will
406    /// be parsed by this [`RegionParser`] if there is no information as to which region this is,
407    /// then [`None`] should be returned.
408    fn position(&self) -> Option<(i32, i32)>;
409}
410
411impl RegionParser for RegionFile {
412    fn position(&self) -> Option<(i32, i32)> {
413        let filename = self.path.file_name()?.to_string_lossy();
414        let mut parts = filename.split('.');
415        if parts.next() != Some("r") {
416            return None;
417        }
418
419        let Some(Ok(x)) = parts.next().map(|s| s.parse()) else {
420            return None;
421        };
422
423        let Some(Ok(z)) = parts.next().map(|s| s.parse()) else {
424            return None;
425        };
426
427        if parts.next() != Some("mca") {
428            return None;
429        }
430
431        Some((x, z))
432    }
433
434    fn parse(&self) -> Result<RegionRef<'_>> {
435        let mut file = std::fs::File::open(&self.path)?;
436        Ok(Region::from_reader(&mut file)?.into())
437    }
438}
439
440/// Represents a Dimension in a Minecraft world
441pub struct Dimension<R> {
442    /// The ID for the dimension, see [`DimensionID`]
443    pub id: Option<DimensionID>,
444    regions: HashMap<(i32, i32), R>,
445}
446
447impl Dimension<RegionFile> {
448    /// Create a dimension from a path to a directory, the directory's name is used to get the id
449    /// if it is in the form of `DIM{id}`.
450    pub fn from_path<P>(path: P) -> io::Result<Self>
451    where
452        P: AsRef<Path>,
453    {
454        let path = path.as_ref();
455        let file = path.file_name();
456        let id = file
457            .and_then(|n| {
458                n.to_string_lossy()
459                    .strip_prefix("DIM")
460                    .and_then(|n| n.parse().ok())
461            })
462            .map(|n: i32| n.into());
463
464        Ok(Self::from_iter(id, parse_directory(path)?))
465    }
466}
467
468impl<R> Dimension<R>
469where
470    R: RegionParser,
471{
472    /// Construct a [`Dimension`] from an iterator which yields items which implement the
473    /// [`RegionParser`] trait.
474    ///
475    /// Every parser in the iterator must be able to determine a position, otherwise this call will
476    /// panic.
477    ///
478    /// Note: this call consumes the iterator, but does _not_ call [`RegionParser::parse`] on the
479    /// items.
480    pub fn from_iter<I>(id: Option<DimensionID>, iter: I) -> Self
481    where
482        I: Iterator<Item = R>,
483    {
484        Self {
485            id,
486            regions: iter.map(|rf| (rf.position().unwrap(), rf)).collect(),
487        }
488    }
489
490    /// Check if this dimension has a region at this location
491    pub fn has_region(&self, region_x: i32, region_z: i32) -> bool {
492        self.regions.contains_key(&(region_x, region_z))
493    }
494
495    /// Parse a region file at the given location (using [region coordinates](https://minecraft.wiki/w/Region_file_format#Location))
496    ///
497    /// # Panics
498    ///
499    /// If the region does not exist in this Dimension, use [`Dimension::has_region`] to check
500    /// before making a call to this method.
501    pub fn parse_region(&self, region_x: i32, region_z: i32) -> Result<RegionRef> {
502        self.regions[&(region_x, region_z)].parse()
503    }
504
505    /// Get an iterator over the [`RegionParser`]s contained in this [`Dimension`]
506    pub fn regions(&self) -> impl Iterator<Item = &R> {
507        self.regions.values()
508    }
509
510    /// Get an iterator over the locations of regions in this [`Dimension`] in the format of (x, z).
511    pub fn locations(&self) -> impl Iterator<Item = &(i32, i32)> {
512        self.regions.keys()
513    }
514
515    /// Get a region from an absolute chunk location (i.e. the "Chunk:" line in the F3
516    /// screen)
517    ///
518    /// # Return Values
519    ///
520    /// - `Ok(None)` if the region does not exist
521    /// - `Ok(Some(Region))` if the region exists and parsed successfully
522    /// - `Err(_)` if the region failed to parse
523    pub fn get_region_from_chunk(&self, chunk_x: i32, chunk_z: i32) -> Result<Option<RegionRef>> {
524        // self.has_region(chunk_x / 32, chunk_z / 32)
525        //     .then(|| self.parse_region(chunk_x / 32, chunk_z / 32))
526        if self.has_region(chunk_x / 32, chunk_z / 32) {
527            Ok(Some(self.parse_region(chunk_x / 32, chunk_z / 32)?))
528        } else {
529            Ok(None)
530        }
531    }
532
533    /// Get a chunk from an absolute chunk location (i.e. the "Chunk:" line in the F3
534    /// screen)
535    ///
536    /// Note: This is only recommended if you only need one chunk from this region, otherwise, you
537    /// should use [`Dimension::parse_region`], [`Region::get_chunk`], and [`Chunk::parse`].  Using
538    /// those methods over this one also allows for more fine-grained control over error handling.
539    ///
540    /// # Return Values
541    ///
542    /// - `Ok(None)` if the region does not exist
543    /// - `Ok(Some(ParsedChunk))` if everything parsed successfully
544    /// - `Err(_)` if the region/chunk failed to parse
545    pub fn get_chunk_in_world(&self, chunk_x: i32, chunk_z: i32) -> Result<Option<ParsedChunk>> {
546        let region = self.get_region_from_chunk(chunk_x, chunk_z);
547
548        match region {
549            Ok(None) => Ok(None),
550            Ok(Some(region)) => {
551                match region.get_chunk(
552                    positive_mod!(chunk_x, 32) as u32,
553                    positive_mod!(chunk_z, 32) as u32,
554                ) {
555                    Ok(Some(chunk)) => match chunk.parse() {
556                        Ok(p) => Ok(Some(p)),
557                        Err(e) => Err(e),
558                    },
559                    Ok(None) => Ok(None),
560                    Err(e) => Err(e),
561                }
562            }
563            Err(e) => Err(e),
564        }
565    }
566}