Skip to main content

petro_meg/reader/
mod.rs

1//! Implements MEGA file reading.
2
3use std::io::{Read, Seek};
4use std::ops::Range;
5use std::{io, usize};
6
7use byteorder::{LE, ReadBytesExt as _};
8use thiserror::Error;
9use tracing::{instrument, trace, warn};
10
11use crate::crypto::{DecryptingReader, Key, round_up_to_block};
12use crate::path::{MegPath, MegPathBuf, MegPathError, WIN_PATH_LIMIT};
13
14mod any_version;
15mod version1;
16mod version2;
17mod version3;
18
19pub(crate) const ID2: u32 = 0x3F7D70A4;
20
21/// Parser options for the MEGA file parser.
22#[derive(Debug, Clone)]
23pub struct MegReadOptions {
24    /// Whether to validate filename CRCs. If `true`, a mismatched CRC is an error instead of a
25    /// warning.
26    ///
27    /// Default: `true`.
28    validate_crc: bool,
29    /// Whether to validate file indexes. If `true`, a mismatched file index is an error instead of a
30    /// warning.
31    ///
32    /// Default: `true`.
33    validate_index: bool,
34    /// Whether to validate that file names are less than the windows 260 character limit.
35    ///
36    /// Default: `true`.
37    validate_name_length: bool,
38    /// Whether to validate that the number of names and number of files match.
39    ///
40    /// Default: `true`.
41    validate_name_count: bool,
42    /// Whether to validate that the names of files are unique.
43    ///
44    /// Default: `true`.
45    validate_names_unique: bool,
46    /// Validate that file start is above data start.
47    ///
48    /// Default: `true`.
49    validate_file_start_data_start: bool,
50    /// Validate that all files in an encrypted file are also encrypted.
51    ///
52    /// Default: `true`.
53    validate_consistent_encryption: bool,
54    /// If true, file readers will be wrapped in a reader which counts bytes read to ensure the
55    /// whole file is complete and returns UnexpectedEof if any contents are missing.
56    ///
57    /// Default: `true`.
58    validate_file_complete: bool,
59    /// Encryption key and initial vector used for decrypting V3 MEGA files.
60    ///
61    /// Default: `None`.
62    key: Option<Key>,
63}
64
65impl MegReadOptions {
66    /// Create a a new default options.
67    pub const fn new() -> Self {
68        Self {
69            validate_crc: true,
70            validate_index: true,
71            validate_name_length: true,
72            validate_names_unique: true,
73            validate_name_count: true,
74            validate_file_start_data_start: true,
75            validate_consistent_encryption: true,
76            validate_file_complete: true,
77            key: None,
78        }
79    }
80
81    /// Set the crypto key used for reading.
82    ///
83    /// The key is unused in V1 and V2 files. V3 files can be encrypted, and require a key to read.
84    pub const fn set_key(&mut self, key: Option<Key>) -> &mut Self {
85        self.key = key;
86        self
87    }
88}
89
90impl Default for MegReadOptions {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96/// MegParseError.
97#[derive(Error, Debug)]
98pub enum MegReadError {
99    /// Encountered an IO error while parsing.
100    #[error("Encountered an IO error while parsing: {0}")]
101    IoError(#[from] io::Error),
102    /// For V2 or V3, the first two words were not recognized as the correct file id.
103    #[error("MEGA file header had an unrecognized file ID: 0x{id1:08X} 0x{id2:08X}")]
104    InvalidFileId { id1: u32, id2: u32 },
105    /// For V3 only, the header had the 'encrypted' id version/flag but no crypto key was available
106    /// in the provided reader options.
107    #[error(
108        "MEGA file header indicated that it was encrypted, but no key was provided to decrypt it"
109    )]
110    MissingKey,
111    /// Name count validation was enabled and the number of files listed differs from the number of
112    /// filenames.
113    #[error(
114        "MEGA file header had a different number of files and file names. \
115        num_filenames={num_filenames}, num_files={num_files}"
116    )]
117    NameFileCountMismatch { num_filenames: u32, num_files: u32 },
118    /// Name Length Validation was enabled, and a name was encountered that exceeded the length
119    /// limit.
120    #[error(
121        "The File name at index {name_index} exceeded the Windows 260 character limit for file \
122        paths. Actual length: {name_len}"
123    )]
124    NameTooLong { name_index: u32, name_len: usize },
125    /// An invalid MegPath name was encountered.
126    #[error("The File name at index {name_index} in the MEGA file was not valid: {path_error}")]
127    InvalidName {
128        name_index: u32,
129        path_error: MegPathError,
130    },
131    /// A V3 File record had a flags value other than 0 or 1.
132    #[error("The file record at index {file_index} had unrecognized flags: 0x{flags:04X}")]
133    InvalidFileFlags { file_index: u32, flags: u16 },
134    /// A V3 File record had an encryption flag which didn't match the containing MEGA file.
135    #[error(
136        "The file record at index {file_index} had encryption={record_encrypted} but the \
137        containing MEGA file had encryption={meg_encrypted}"
138    )]
139    MismatchedEncryption {
140        file_index: u32,
141        /// Whether the MEGA file used encryption.
142        meg_encrypted: bool,
143        /// Whether this particular file record used encryption.
144        record_encrypted: bool,
145    },
146    #[error(
147        "The File record at index {file_index} specified that it should be at index \
148        {index_from_record}"
149    )]
150    InvalidFileIndex {
151        file_index: u32,
152        index_from_record: u32,
153    },
154    #[error(
155        "The File record at index {file_index} referenced name index {name_index} but there are \
156        only {num_names} names defined"
157    )]
158    NameIndexOutOfRange {
159        file_index: u32,
160        name_index: u32,
161        num_names: u32,
162    },
163    #[error(
164        "The File record at index {file_index} referenced name index {name_index} but another file \
165        already has that name"
166    )]
167    NameAlreadyUsed {
168        file_index: u32,
169        name_index: u32,
170        num_names: u32,
171    },
172    #[error(
173        "The File record at index {file_index} expected its name to have a crc of {expected_crc}, \
174        but the name's actual crc was {actual_crc}"
175    )]
176    InvalidCrc {
177        file_index: u32,
178        expected_crc: u32,
179        actual_crc: u32,
180    },
181    #[error(
182        "The File record at index {file_index} expected data at position {file_start}, but the \
183        MEGA header listed {data_start} as the start of the file data section"
184    )]
185    FileBelowDataStart {
186        file_index: u32,
187        file_start: u32,
188        data_start: u32,
189    },
190}
191
192/// Trait for implementing MegMetaReader for various MEGA file versions.
193#[allow(private_bounds)]
194pub trait ReadMegMeta: Sized + ReadVersion {
195    fn read_meg_meta<R: Read>(self, reader: R) -> Result<Vec<FileEntry>, MegReadError> {
196        const DEFAULT_OPTIONS: &'static MegReadOptions = &MegReadOptions::new();
197        self.read_meg_meta_opt(reader, DEFAULT_OPTIONS)
198    }
199
200    fn read_meg_meta_opt<R: Read>(
201        self,
202        reader: R,
203        options: &MegReadOptions,
204    ) -> Result<Vec<FileEntry>, MegReadError>;
205}
206
207trait ReadVersion {}
208
209impl ReadVersion for crate::version::MegVersion {}
210impl ReadVersion for Option<crate::version::MegVersion> {}
211impl ReadVersion for crate::version::MegV1 {}
212impl ReadVersion for crate::version::MegV2 {}
213impl ReadVersion for crate::version::MegV3 {}
214impl ReadVersion for crate::version::GuessVersion {}
215
216/// Version-specific ReaderState. Provides hooks for version-specific operations.
217trait ReaderState: std::fmt::Debug + Sized {
218    /// Gets the number of filename entries in the filenames table.
219    fn num_filenames(&self) -> u32;
220
221    /// Gets the humber of files in the files table.
222    fn num_files(&self) -> u32;
223
224    /// Read the names from the MEGA file.
225    fn read_names<R: Read>(
226        &self,
227        reader: &mut R,
228        options: &MegReadOptions,
229    ) -> Result<Vec<Option<MegPathBuf>>, MegReadError> {
230        read_names(reader, self.num_filenames(), options)
231    }
232
233    /// Read a single file record from the file.
234    ///
235    /// Index is provided only for error messages.
236    fn read_file_record<R: Read>(
237        &self,
238        reader: &mut R,
239        options: &MegReadOptions,
240        index: u32,
241    ) -> Result<FileRecord, MegReadError>;
242}
243
244/// A raw file record, not yet interpreted.
245#[derive(Debug)]
246struct FileRecord {
247    /// Encryption flag. Only used by V3 files.
248    encrypted: bool,
249    /// CRC-32 of the filename.
250    crc: u32,
251    /// Index of this record in the records table.
252    index: u32,
253    /// Size of this file in the data section.
254    size: u32,
255    /// Start of this file relative to the start of the file.
256    start: u32,
257    /// Index of the name in the names table.
258    name: u32,
259}
260
261#[instrument(skip_all)]
262fn read_meg_meta<S: ReaderState, R: Read>(
263    state: S,
264    mut reader: R,
265    options: &MegReadOptions,
266) -> Result<Vec<FileEntry>, MegReadError> {
267    trace!("Read Options: {options:?}");
268    trace!("Header Read: {state:?}");
269    if state.num_filenames() != state.num_files() {
270        let err = MegReadError::NameFileCountMismatch {
271            num_filenames: state.num_filenames(),
272            num_files: state.num_files(),
273        };
274        if options.validate_name_count {
275            return Err(err);
276        }
277        warn!("{err}");
278    }
279    let mut names = state.read_names(&mut reader, options)?;
280
281    let mut files = Vec::with_capacity(state.num_files() as usize);
282    for file_index in 0..state.num_files() {
283        let record = state.read_file_record(&mut reader, options, file_index)?;
284        if record.index != file_index {
285            let err = MegReadError::InvalidFileIndex {
286                file_index,
287                index_from_record: record.index,
288            };
289            if options.validate_index {
290                return Err(err);
291            }
292            warn!("{err}");
293        }
294        let name =
295            names
296                .get_mut(record.name as usize)
297                .ok_or(MegReadError::NameIndexOutOfRange {
298                    file_index,
299                    name_index: record.name,
300                    num_names: state.num_filenames(),
301                })?;
302        let name = if options.validate_names_unique {
303            name.take().ok_or(MegReadError::NameAlreadyUsed {
304                file_index,
305                name_index: record.name,
306                num_names: state.num_filenames(),
307            })?
308        } else {
309            name.clone().unwrap()
310        };
311        let actual_crc = crc32fast::hash(name.as_bytes());
312        if record.crc != actual_crc {
313            let err = MegReadError::InvalidCrc {
314                file_index,
315                expected_crc: record.crc,
316                actual_crc,
317            };
318            if options.validate_crc {
319                return Err(err);
320            }
321            warn!("{err}");
322        }
323        files.push(FileEntry {
324            name,
325            start: record.start,
326            size: record.size,
327            encrypted: record.encrypted,
328        });
329    }
330
331    Ok(files)
332}
333
334/// Read all the file names from the given reader.
335fn read_names<R: Read>(
336    mut reader: R,
337    num_filenames: u32,
338    options: &MegReadOptions,
339) -> Result<Vec<Option<MegPathBuf>>, MegReadError> {
340    let mut names = Vec::with_capacity(num_filenames as usize);
341    // Read the number of entries needed to fill the names table.
342    for name_index in 0..num_filenames {
343        // All versions use the same u16 file name length.
344        let name_len = reader.read_u16::<LE>()? as usize;
345        if name_len > WIN_PATH_LIMIT {
346            let err = MegReadError::NameTooLong {
347                name_index,
348                name_len,
349            };
350            if options.validate_name_length {
351                return Err(err);
352            }
353            warn!("{err}");
354        }
355        let mut raw_name = vec![0u8; name_len];
356        reader.read_exact(&mut raw_name)?;
357        let name = match MegPathBuf::from_bytes(raw_name) {
358            Ok(name) => name,
359            Err(path_error) => {
360                return Err(MegReadError::InvalidName {
361                    name_index,
362                    path_error,
363                });
364            }
365        };
366        trace!("Read name at index {name_index}: {name}");
367        names.push(Some(name));
368    }
369    Ok(names)
370}
371
372/// Common implementation for read_file_record for both V1 and V2 MEGA files.
373///
374/// V1 and V2 are never encrypted and use a 32 bit name field.
375fn read_v1v2_file_record<R: Read>(reader: &mut R) -> Result<FileRecord, MegReadError> {
376    Ok(FileRecord {
377        encrypted: false,
378        crc: reader.read_u32::<LE>()?,
379        index: reader.read_u32::<LE>()?,
380        size: reader.read_u32::<LE>()?,
381        start: reader.read_u32::<LE>()?,
382        name: reader.read_u32::<LE>()?,
383    })
384}
385
386/// Entry for a file read from the mega file files table.
387pub struct FileEntry {
388    /// The path/name of the file.
389    name: MegPathBuf,
390    /// Start offset of the this file within the MEGA file.
391    start: u32,
392    /// Size of this file within the MEGA file.
393    size: u32,
394    /// Whether this file was encrypted.
395    encrypted: bool,
396}
397
398impl FileEntry {
399    /// Gets the MEGA file path that the file entry was stored under.
400    pub fn name(&self) -> &MegPath {
401        &self.name
402    }
403
404    /// Get the range of the original MEGA file occupied by this file's contents.
405    pub fn range(&self) -> Range<usize> {
406        let start: usize = self
407            .start
408            .try_into()
409            .expect("start was outside the range of usize");
410        let size: usize = self
411            .size
412            .try_into()
413            .expect("size was outside the range of usize");
414        let end = start
415            .checked_add(size)
416            .expect("start+size overflowed usize");
417        start..end
418    }
419
420    /// Get the size of the file's contents in the MEGA file.
421    pub fn size(&self) -> u32 {
422        self.size
423    }
424
425    /// Returns true if the file was encrypted.
426    pub fn encrypted(&self) -> bool {
427        self.encrypted
428    }
429
430    /// Extract this file from the given reader. The provided reader must represent the same MEGA
431    /// file that this FileEntry belongs to.
432    pub fn extract_from<'read, R: Read + Seek + 'read>(
433        &self,
434        mut reader: R,
435        options: &MegReadOptions,
436    ) -> io::Result<FileSegmentReader<'read>> {
437        let new_position = reader.seek(io::SeekFrom::Start(self.start as u64))?;
438        if new_position != self.start as u64 {
439            return Err(io::Error::new(
440                io::ErrorKind::UnexpectedEof,
441                format!(
442                    "Tried to seek to {} but position after seek was {new_position}",
443                    self.start
444                ),
445            ));
446        }
447
448        #[inline(always)]
449        fn maybe_check_len<'read, R: Read + 'read>(
450            reader: io::Take<R>,
451            options: &MegReadOptions,
452        ) -> Box<dyn Read + 'read> {
453            if options.validate_file_complete {
454                Box::new(ReadLenChecker { inner: reader })
455            } else {
456                Box::new(reader)
457            }
458        }
459
460        let reader = if self.encrypted {
461            let Some(ref key) = options.key else {
462                return Err(io::Error::new(
463                    io::ErrorKind::Unsupported,
464                    MegReadError::MissingKey,
465                ));
466            };
467            let amount_to_read = round_up_to_block(self.size as u64);
468            // The inner 'take' is to grab the correct block contents for the file, rounded up to
469            // the next full block.
470            let reader = reader.take(amount_to_read);
471            // Then we decrypt.
472            let reader = DecryptingReader::new(reader, key);
473            // Then we have to apply another Take to restrict the output to only the portion of the
474            // last block that's actually supposed to be in the file.
475            let reader = reader.take(self.size as u64);
476            maybe_check_len(reader, options)
477        } else {
478            // If its not encrypted we just take the file contents and maybe check the length limit.
479            let reader = reader.take(self.size as u64);
480            maybe_check_len(reader, options)
481        };
482        Ok(FileSegmentReader(reader))
483    }
484}
485
486/// Reads a segment representing a single FileEntry from a reader representing a larger MEGA file.
487pub struct FileSegmentReader<'read>(Box<dyn Read + 'read>);
488
489impl<'read> Read for FileSegmentReader<'read> {
490    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
491        self.0.read(buf)
492    }
493
494    fn read_vectored(&mut self, bufs: &mut [io::IoSliceMut<'_>]) -> io::Result<usize> {
495        self.0.read_vectored(bufs)
496    }
497
498    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
499        self.0.read_to_end(buf)
500    }
501
502    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
503        self.0.read_to_string(buf)
504    }
505
506    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
507        self.0.read_exact(buf)
508    }
509}
510
511/// Checks that a read returns the full file contents.
512struct ReadLenChecker<R> {
513    inner: io::Take<R>,
514}
515
516impl<R: Read> Read for ReadLenChecker<R> {
517    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
518        let read = self.inner.read(buf)?;
519        if read == 0 && self.inner.limit() > 0 {
520            Err(io::Error::new(
521                io::ErrorKind::UnexpectedEof,
522                "Data for this file is incomplete.",
523            ))
524        } else {
525            Ok(read)
526        }
527    }
528}