mca_parser/lib.rs
1#![warn(missing_docs)]
2//! # mca-parser
3//!
4//! A library for parsing Minecraft's [Region files](https://minecraft.wiki/w/Region_file_format)
5//!
6//! ## Usage
7//!
8//! This library should be pretty simple to use,
9//!
10//! ```no_run
11//! # use mca_parser::*;
12//! # use std::fs::File;
13//! // Create a Region from an open file
14//! let mut file = File::open("r.0.0.mca")?;
15//! let region = Region::from_reader(&mut file)?;
16//!
17//! // `chunk` is raw chunk data, so we need to parse it
18//! let chunk = region.get_chunk(0, 0)?;
19//! if let Some(chunk) = chunk {
20//! // Parse the raw chunk data into structured NBT format
21//! let parsed = chunk.parse()?;
22//! println!("{:?}", parsed.status);
23//! } else {
24//! // If the chunk is None, it has not been generated
25//! println!("Chunk has not been generated.");
26//! }
27//! # Ok::<(), mca_parser::error::Error>(())
28//! ```
29
30// TODO: Figure out a nice way to encode the types of coordinates into the type system:
31// - i.e. relative vs absolute and region vs chunk vs block
32// - probably use a trait of some kind so that we can convert between them easily (maybe
33// just `Into`)
34//
35// TODO: Add parsing for singplayer worlds:
36// - Open the world folder, look for the `regions` dir which will be the overworld, look for
37// any dirs that start with `DIR`, those will be the other dimensions
38// - Put it into a struct that allows some handy methods
39//
40// TODO: Add parsing for server worlds:
41// - Open the server folder
42// - Open the user-specified worlds (we can't assume since world folder names can be customised
43// on a server)
44// - The user will need to specify quite a bit more for the server worlds since they can change
45// more (dimension id, world folder, etc)
46
47use std::{
48 collections::HashMap,
49 io::{self, Read},
50 ops::Deref,
51 path::{Path, PathBuf},
52};
53
54use bigendian::BigEndian;
55use error::Error;
56
57pub use data::*;
58pub use error::Result;
59
60mod bigendian;
61pub mod data;
62pub mod error;
63pub mod nbt;
64#[macro_use]
65mod util;
66
67#[cfg(test)]
68mod test;
69
70/// Represents a region file, with methods to access data within it.
71///
72/// <https://minecraft.wiki/w/Region_file_format>
73#[derive(Debug, Eq, PartialEq)]
74#[repr(C)]
75pub struct Region {
76 locations: [Location; 1024],
77 timestamps: [BigEndian<4>; 1024],
78 data: [u8],
79}
80
81impl Region {
82 /// Parse this slice into a Region. This does no input validation except confirm that the 8KiB
83 /// header is there, further validation is done in [`Region::get_chunk`] and [`Chunk::parse`]
84 /// to help prevent unnecessary memory allocation.
85 ///
86 /// Note: Changing the data in the slice after calling this method will change the [`Region`]
87 /// returned by this method, so it is advised against
88 pub fn from_slice(slice: &[u8]) -> Result<&Region> {
89 if slice.len() < 8192 {
90 Err(Error::MissingHeader)
91 } else {
92 // SAFETY: `Region` is (1024 * 4 * 2 = 8192) bytes + some extra data. We have confirmed that we have
93 // the 8192 byte header, so this pointer deref is okay.
94 let ptr = &slice[..slice.len() - 8192] as *const [u8] as *const Region;
95 Ok(unsafe { &*ptr })
96 }
97 }
98
99 /// Create a Region from an array if the size is known at compile time.
100 ///
101 /// # Safety
102 /// - `N` >= 8192
103 /// - Array _should_ contain valid bytes for a region file, though if it doesn't, that issue
104 /// will be caught in [`Region::get_chunk`] and [`Chunk::parse`]
105 ///
106 /// # Usage
107 ///
108 /// The intended usage of this method is as a const value:
109 ///
110 /// ```
111 /// # use mca_parser::Region;
112 /// const REGION: &Region = unsafe { Region::from_array(include_bytes!("../test/r.0.0.mca")) };
113 /// ```
114 ///
115 /// This method will panic if `N` < 8192, thus failing to compile when used as a const value:
116 ///
117 /// ```compile_fail
118 /// const REGION: &Region = unsafe { Region::from_array(&[0; 16]) };
119 /// ```
120 pub const unsafe fn from_array<const N: usize>(arr: &[u8; N]) -> &'static Region {
121 assert!(N >= 8192);
122 &*(std::ptr::slice_from_raw_parts(arr as *const u8, N - 8192) as *const Region)
123 }
124
125 /// A method for ease of use, effectively does the same thing as calling [`Read::read_to_end`]
126 /// and then passing that to [`Region::from_slice`], with the only difference being that it
127 /// returns an owned box rather than a reference.
128 ///
129 /// # Usage
130 ///
131 /// ```
132 /// # use mca_parser::*;
133 /// # use std::fs::File;
134 /// let mut file = File::open("./test/r.0.0.mca")?;
135 /// let region = Region::from_reader(&mut file)?;
136 /// # Ok::<_, error::Error>(())
137 /// ```
138 pub fn from_reader<R>(r: &mut R) -> Result<Box<Region>>
139 where
140 R: Read,
141 {
142 use std::mem::ManuallyDrop;
143
144 let mut vec = ManuallyDrop::new(Vec::new());
145 r.read_to_end(&mut vec)?;
146
147 if vec.len() < 8192 {
148 Err(Error::MissingHeader)
149 } else {
150 // SAFETY: `Region` is (1024 * 4 * 2 = 8192) bytes + some extra data.
151 let slice =
152 unsafe { std::slice::from_raw_parts_mut(vec.as_mut_ptr(), vec.len() - 8192) };
153 // SAFETY: We know that the vec is allocated on the heap, so we can form a box from it
154 Ok(unsafe { Box::from_raw(slice as *mut [u8] as *mut Region) })
155 }
156 }
157
158 /// Convert x and z into the correct index into the `locations` and `timestamps` arrays
159 ///
160 /// # Panics
161 ///
162 /// - If `x` and `z` are not within `0..=31`
163 // This is a simple calculation, and I'm sure the compiler would inline it, but just to make sure
164 #[inline(always)]
165 const fn chunk_index(x: u32, z: u32) -> usize {
166 assert!(x < 32);
167 assert!(z < 32);
168
169 z as usize * 32 + x as usize
170 }
171
172 /// Validate that this Region contains all valid chunks by trying to parse every chunk.
173 ///
174 /// # Important Note
175 ///
176 /// - This method is obviously slow and uses a decent amount of memory. It is
177 /// recommended to assume the data is correct and validate it as you use the
178 /// [`Region::get_chunk`] and [`Chunk::parse`] methods.
179 /// - This method should only be used when you absolutely _need_ to validate the data is
180 /// correct and can't use the [`Region::get_chunk`] and [`Chunk::parse`] methods
181 pub fn validate(&self) -> Result<()> {
182 for x in 0..32 {
183 for z in 0..32 {
184 if let Some(chunk) = self.get_chunk(x, z)? {
185 chunk.parse()?;
186 }
187 }
188 }
189 Ok(())
190 }
191
192 /// Get a timestamp for a chunk in this [`Region`]
193 ///
194 /// # Panics
195 ///
196 /// - If `x` and `z` are not within `0..=31`
197 pub const fn get_timestamp(&self, x: u32, z: u32) -> u32 {
198 self.timestamps[Self::chunk_index(x, z)].as_u32()
199 }
200
201 /// Check if the chunk at `x` and `z` have been generated
202 ///
203 /// # Panics
204 ///
205 /// - If `x` and `z` are not within `0..=31`
206 pub const fn has_chunk(&self, x: u32, z: u32) -> bool {
207 !self.locations[Self::chunk_index(x, z)].is_empty()
208 }
209
210 /// Get a chunk from this [`Region`] using relative coordinates within the region
211 ///
212 /// # Return Values
213 ///
214 /// - `Err` if data is invalid
215 /// - `Ok(None)` if the data is valid, but there is no chunk generated
216 /// - `Ok(Some(&Chunk))` if the data is valid and the chunk exists
217 ///
218 /// This will return a `&Chunk` which references this `Region`, if you want an owned
219 /// version, call [`Chunk::boxed`] on the returned chunk.
220 ///
221 /// # Panics
222 ///
223 /// - If `x` and `z` are not within `0..=31`
224 pub fn get_chunk(&self, chunk_x: u32, chunk_z: u32) -> Result<Option<&Chunk>> {
225 let loc = &self.locations[Self::chunk_index(chunk_x, chunk_z)];
226 let offset: u32 = loc.offset.into();
227
228 if loc.is_empty() {
229 return Ok(None);
230 }
231
232 // Subtract 2 from the offset to account for the 2 * 4096 bytes that we took from the
233 // beginning for the location and timestamps
234 let start = (offset - 2) as usize * 4096;
235
236 if self.data.len() < start + 4 {
237 return Err(Error::UnexpectedEof);
238 }
239
240 // SAFETY: We know that we have these bytes because it's checked above and according to the
241 // minecraft wiki, these bytes are the length and since we specifically grab `4`
242 // bytes, we know that `BigEndian<4>` is valid.
243 let len = u32::from(unsafe { *(self.data[start..][..4].as_ptr() as *const BigEndian<4>) })
244 as usize;
245
246 if self.data.len() < start + 4 + len {
247 return Err(Error::UnexpectedEof);
248 }
249
250 // SAFETY: We have checked that we have `len` bytes after the starting point of `start +
251 // 4`, so we can trivially convert that to a Chunk
252 let chunk = unsafe {
253 &*(core::ptr::slice_from_raw_parts(self.data[start + 4..].as_ptr(), len - 1)
254 as *const Chunk)
255 };
256
257 Ok(Some(chunk))
258 }
259
260 /// Get a chunk from this [`Region`] using relative block coordinates within the region
261 ///
262 /// # Return Values
263 ///
264 /// - `Err` if data is invalid
265 /// - `Ok(None)` if the data is valid, but there is no chunk generated
266 /// - `Ok(Some(&Chunk))` if the data is valid and the chunk exists
267 ///
268 /// This will return a `&Chunk` which references this `Region`, if you want an owned
269 /// version, call [`Chunk::boxed`] on the returned chunk.
270 ///
271 /// # Panics
272 ///
273 /// - If `x` and `z` are not within `0..=511`
274 pub fn get_chunk_from_block(&self, block_x: u32, block_z: u32) -> Result<Option<&Chunk>> {
275 self.get_chunk(block_x / 16, block_z / 16)
276 }
277}
278
279/// Represents a file which holds a Region
280#[derive(Debug, Clone)]
281pub struct RegionFile {
282 /// The path to this region file on disk
283 pub path: PathBuf,
284}
285
286impl RegionFile {
287 /// Create a [`RegionFile`] from a path to a file
288 pub fn new<P>(path: P) -> Self
289 where
290 P: AsRef<Path>,
291 {
292 Self {
293 path: path.as_ref().to_path_buf(),
294 }
295 }
296}
297
298/// Create an iterator over the contents of a directory, allowing each region within to be parsed
299pub fn parse_directory<P>(path: P) -> io::Result<impl Iterator<Item = RegionFile>>
300where
301 P: AsRef<Path>,
302{
303 let path = path.as_ref();
304 assert!(path.is_dir());
305
306 let rd = std::fs::read_dir(path)?;
307
308 let iter = rd.filter_map(|de| {
309 let de = de.ok()?;
310
311 let path = de.path();
312 if !path.is_file() {
313 return None;
314 }
315
316 Some(RegionFile::new(path))
317 });
318
319 Ok(iter)
320}
321
322/// An enum which represents Minecraft's IDs for a dimension
323#[derive(Debug, Clone, Copy, Eq, PartialEq)]
324pub enum DimensionID {
325 /// ID: `0`
326 Overworld,
327 /// ID: `-1`
328 Nether,
329 /// ID: `1`
330 End,
331 /// A custom DimensionID
332 Custom(i32),
333}
334
335impl DimensionID {
336 /// Get the id of this dimension as a number
337 pub fn id(&self) -> i32 {
338 match self {
339 Self::Overworld => 0,
340 Self::Nether => -1,
341 Self::End => 1,
342 Self::Custom(n) => *n,
343 }
344 }
345}
346
347impl From<i32> for DimensionID {
348 fn from(value: i32) -> Self {
349 match value {
350 0 => Self::Overworld,
351 -1 => Self::Nether,
352 1 => Self::End,
353 n => Self::Custom(n),
354 }
355 }
356}
357
358/// A wrapper around [`Region`] that allows either a reference to be used or a Box
359/// over return types.
360///
361/// [`Deref`] is implemented for this enum, so in theory, there should never be a need to match on
362/// this.
363///
364/// This is primarily used in the [`RegionParser`] trait, so that the implementers can return
365/// either a reference to or a box of a [`Region`].
366#[derive(Debug)]
367pub enum RegionRef<'a> {
368 /// Borrowed Region (via reference)
369 Borrowed(&'a Region),
370 /// Owned Region (via box)
371 Owned(Box<Region>),
372}
373
374impl<'a> From<&'a Region> for RegionRef<'a> {
375 fn from(value: &'a Region) -> Self {
376 Self::Borrowed(value)
377 }
378}
379
380impl From<Box<Region>> for RegionRef<'_> {
381 fn from(value: Box<Region>) -> Self {
382 Self::Owned(value)
383 }
384}
385
386impl Deref for RegionRef<'_> {
387 type Target = Region;
388
389 fn deref(&self) -> &Self::Target {
390 match self {
391 RegionRef::Borrowed(r) => r,
392 RegionRef::Owned(r) => r,
393 }
394 }
395}
396
397/// A trait which represents something that can be parsed into a region and optionally contains
398/// information about which region in the world it is.
399pub trait RegionParser {
400 /// Parse this into a [`Region`] and return it through [`RegionRef`] so that we can have either
401 /// owned or or as a reference
402 fn parse(&self) -> Result<RegionRef<'_>>;
403
404 /// Get the position in the world (using
405 /// [region coordinates](https://minecraft.wiki/w/Region_file_format#Location)) of the region that will
406 /// be parsed by this [`RegionParser`] if there is no information as to which region this is,
407 /// then [`None`] should be returned.
408 fn position(&self) -> Option<(i32, i32)>;
409}
410
411impl RegionParser for RegionFile {
412 fn position(&self) -> Option<(i32, i32)> {
413 let filename = self.path.file_name()?.to_string_lossy();
414 let mut parts = filename.split('.');
415 if parts.next() != Some("r") {
416 return None;
417 }
418
419 let Some(Ok(x)) = parts.next().map(|s| s.parse()) else {
420 return None;
421 };
422
423 let Some(Ok(z)) = parts.next().map(|s| s.parse()) else {
424 return None;
425 };
426
427 if parts.next() != Some("mca") {
428 return None;
429 }
430
431 Some((x, z))
432 }
433
434 fn parse(&self) -> Result<RegionRef<'_>> {
435 let mut file = std::fs::File::open(&self.path)?;
436 Ok(Region::from_reader(&mut file)?.into())
437 }
438}
439
440/// Represents a Dimension in a Minecraft world
441pub struct Dimension<R> {
442 /// The ID for the dimension, see [`DimensionID`]
443 pub id: Option<DimensionID>,
444 regions: HashMap<(i32, i32), R>,
445}
446
447impl Dimension<RegionFile> {
448 /// Create a dimension from a path to a directory, the directory's name is used to get the id
449 /// if it is in the form of `DIM{id}`.
450 pub fn from_path<P>(path: P) -> io::Result<Self>
451 where
452 P: AsRef<Path>,
453 {
454 let path = path.as_ref();
455 let file = path.file_name();
456 let id = file
457 .and_then(|n| {
458 n.to_string_lossy()
459 .strip_prefix("DIM")
460 .and_then(|n| n.parse().ok())
461 })
462 .map(|n: i32| n.into());
463
464 Ok(Self::from_iter(id, parse_directory(path)?))
465 }
466}
467
468impl<R> Dimension<R>
469where
470 R: RegionParser,
471{
472 /// Construct a [`Dimension`] from an iterator which yields items which implement the
473 /// [`RegionParser`] trait.
474 ///
475 /// Every parser in the iterator must be able to determine a position, otherwise this call will
476 /// panic.
477 ///
478 /// Note: this call consumes the iterator, but does _not_ call [`RegionParser::parse`] on the
479 /// items.
480 pub fn from_iter<I>(id: Option<DimensionID>, iter: I) -> Self
481 where
482 I: Iterator<Item = R>,
483 {
484 Self {
485 id,
486 regions: iter.map(|rf| (rf.position().unwrap(), rf)).collect(),
487 }
488 }
489
490 /// Check if this dimension has a region at this location
491 pub fn has_region(&self, region_x: i32, region_z: i32) -> bool {
492 self.regions.contains_key(&(region_x, region_z))
493 }
494
495 /// Parse a region file at the given location (using [region coordinates](https://minecraft.wiki/w/Region_file_format#Location))
496 ///
497 /// # Panics
498 ///
499 /// If the region does not exist in this Dimension, use [`Dimension::has_region`] to check
500 /// before making a call to this method.
501 pub fn parse_region(&self, region_x: i32, region_z: i32) -> Result<RegionRef> {
502 self.regions[&(region_x, region_z)].parse()
503 }
504
505 /// Get an iterator over the [`RegionParser`]s contained in this [`Dimension`]
506 pub fn regions(&self) -> impl Iterator<Item = &R> {
507 self.regions.values()
508 }
509
510 /// Get an iterator over the locations of regions in this [`Dimension`] in the format of (x, z).
511 pub fn locations(&self) -> impl Iterator<Item = &(i32, i32)> {
512 self.regions.keys()
513 }
514
515 /// Get a region from an absolute chunk location (i.e. the "Chunk:" line in the F3
516 /// screen)
517 ///
518 /// # Return Values
519 ///
520 /// - `Ok(None)` if the region does not exist
521 /// - `Ok(Some(Region))` if the region exists and parsed successfully
522 /// - `Err(_)` if the region failed to parse
523 pub fn get_region_from_chunk(&self, chunk_x: i32, chunk_z: i32) -> Result<Option<RegionRef>> {
524 // self.has_region(chunk_x / 32, chunk_z / 32)
525 // .then(|| self.parse_region(chunk_x / 32, chunk_z / 32))
526 if self.has_region(chunk_x / 32, chunk_z / 32) {
527 Ok(Some(self.parse_region(chunk_x / 32, chunk_z / 32)?))
528 } else {
529 Ok(None)
530 }
531 }
532
533 /// Get a chunk from an absolute chunk location (i.e. the "Chunk:" line in the F3
534 /// screen)
535 ///
536 /// Note: This is only recommended if you only need one chunk from this region, otherwise, you
537 /// should use [`Dimension::parse_region`], [`Region::get_chunk`], and [`Chunk::parse`]. Using
538 /// those methods over this one also allows for more fine-grained control over error handling.
539 ///
540 /// # Return Values
541 ///
542 /// - `Ok(None)` if the region does not exist
543 /// - `Ok(Some(ParsedChunk))` if everything parsed successfully
544 /// - `Err(_)` if the region/chunk failed to parse
545 pub fn get_chunk_in_world(&self, chunk_x: i32, chunk_z: i32) -> Result<Option<ParsedChunk>> {
546 let region = self.get_region_from_chunk(chunk_x, chunk_z);
547
548 match region {
549 Ok(None) => Ok(None),
550 Ok(Some(region)) => {
551 match region.get_chunk(
552 positive_mod!(chunk_x, 32) as u32,
553 positive_mod!(chunk_z, 32) as u32,
554 ) {
555 Ok(Some(chunk)) => match chunk.parse() {
556 Ok(p) => Ok(Some(p)),
557 Err(e) => Err(e),
558 },
559 Ok(None) => Ok(None),
560 Err(e) => Err(e),
561 }
562 }
563 Err(e) => Err(e),
564 }
565 }
566}