Skip to main content

ms_pdb_msfz/
lib.rs

1//! Multi-Stream File - Compressed
2//!
3//! This crate allows reading and writing PDZ/MSFZ files. PDZ/MSFZ files are similar to PDB/MSF
4//! files. They contain a set of streams, which are indexed by number. Each stream is a sequence
5//! of bytes, similar to an ordinary file.
6//!
7//! See the [MSFZ Container Specification](https://github.com/microsoft/pdb-rs/blob/main/docs/pdb/msfz.md)
8
9#![forbid(unsafe_code)]
10#![forbid(unused_must_use)]
11#![warn(missing_docs)]
12#![allow(clippy::needless_lifetimes)]
13
14use std::fs::OpenOptions;
15
16use anyhow::Result;
17use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout, LE, U32, U64, Unaligned};
18
19mod compress_utils;
20mod reader;
21mod stream_data;
22#[cfg(test)]
23mod tests;
24mod writer;
25
26pub use reader::{Fragment, FragmentLocation, Msfz, StreamReader};
27pub use stream_data::StreamData;
28pub use writer::*;
29
30/// Describes the header at the start of the MSFZ file.
31///
32/// This describes the on-disk layout of the file header. It is stored at the beginning of the
33/// MSFZ file.
34#[derive(IntoBytes, FromBytes, Unaligned, Immutable, KnownLayout)]
35#[repr(C)]
36pub struct MsfzFileHeader {
37    /// Identifies this as an MSFZ file. The value must always be [`MSFZ_FILE_SIGNATURE`].
38    pub signature: [u8; 32],
39
40    /// Specifies the version of the MSFZ file layout.
41    pub version: U64<LE>,
42
43    /// The file offset of the stream directory.
44    pub stream_dir_offset: U64<LE>,
45
46    /// The file offset of the Chunk Table, which has type `[ChunkEntry; num_chunks]`.
47    pub chunk_table_offset: U64<LE>,
48
49    /// The number of streams stored within this MSFZ file.
50    pub num_streams: U32<LE>,
51
52    /// The compression algorithm applied to the stream directory.
53    pub stream_dir_compression: U32<LE>,
54
55    /// The size in bytes of the stream directory, compressed (on-disk).
56    pub stream_dir_size_compressed: U32<LE>,
57
58    /// The size in bytes of the stream directory after decompression (in-memory).
59    pub stream_dir_size_uncompressed: U32<LE>,
60
61    /// The number of compression chunks.
62    pub num_chunks: U32<LE>,
63
64    /// The size in bytes of the Chunk Table.
65    pub chunk_table_size: U32<LE>,
66}
67
68/// Describes one compressed chunk.
69#[derive(IntoBytes, FromBytes, Unaligned, Immutable, KnownLayout)]
70#[repr(C)]
71pub struct ChunkEntry {
72    /// File offset (within the MSFZ file) the compressed chunk.
73    pub file_offset: U64<LE>,
74
75    /// The compression algorithm for this chunk.
76    pub compression: U32<LE>,
77
78    /// Size in bytes of the compressed data on disk.
79    ///
80    /// This value should be non-zero.
81    pub compressed_size: U32<LE>,
82
83    /// Number of bytes after decompression; this is the in-memory size.
84    ///
85    /// This value should be non-zero.
86    pub uncompressed_size: U32<LE>,
87}
88
89/// The special value used for stream size to indicate a nil stream.
90pub const NIL_STREAM_SIZE: u32 = 0xffff_ffff;
91
92/// Indicates that no compression is used.
93pub const COMPRESSION_NONE: u32 = 0;
94
95/// Identifies the [`Zstd`](https://github.com/facebook/zstd) compression algorithm.
96pub const COMPRESSION_ZSTD: u32 = 1;
97
98/// Identifies the [`Deflate`](https://en.wikipedia.org/wiki/Deflate) compression algorithm.
99///
100/// This uses the "raw" Deflate stream. It _does not_ use the GZIP encapsulation header.
101pub const COMPRESSION_DEFLATE: u32 = 2;
102
103/// This is the maximum file offset where an uncompressed fragment be be stored.
104///
105/// The MSFZ specification provides 48 bits for storing the file offset of an uncompressed fragment.
106pub const MAX_UNCOMPRESSED_FILE_OFFSET: u64 = (1u64 << 48) - 1;
107
108/// Specifies the compression algorithms that are supported by this library.
109#[derive(Copy, Clone, Eq, PartialEq, Debug)]
110#[non_exhaustive]
111pub enum Compression {
112    /// Identifies the [`Zstd`](https://github.com/facebook/zstd) compression algorithm.
113    Zstd,
114    /// Identifies the [`Deflate`](https://en.wikipedia.org/wiki/Deflate) compression algorithm.
115    Deflate,
116}
117
118impl Compression {
119    fn to_code(self) -> u32 {
120        match self {
121            Self::Zstd => COMPRESSION_ZSTD,
122            Self::Deflate => COMPRESSION_DEFLATE,
123        }
124    }
125
126    fn try_from_code(code: u32) -> Result<Self, UnsupportedCompressionError> {
127        match code {
128            COMPRESSION_ZSTD => Ok(Self::Zstd),
129            COMPRESSION_DEFLATE => Ok(Self::Deflate),
130            _ => Err(UnsupportedCompressionError),
131        }
132    }
133
134    fn try_from_code_opt(code: u32) -> Result<Option<Self>, UnsupportedCompressionError> {
135        if code != COMPRESSION_NONE {
136            Ok(Some(Self::try_from_code(code)?))
137        } else {
138            Ok(None)
139        }
140    }
141}
142
143#[derive(Copy, Clone, Debug)]
144struct UnsupportedCompressionError;
145
146impl std::error::Error for UnsupportedCompressionError {}
147
148impl std::fmt::Display for UnsupportedCompressionError {
149    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150        write!(
151            f,
152            "The specified compression mode is not recognized or supported."
153        )
154    }
155}
156
157/// The signature of a MSFZ/PDZ file.
158pub const MSFZ_FILE_SIGNATURE: [u8; 32] = *b"Microsoft MSFZ Container\r\n\x1aALD\0\0";
159
160#[test]
161fn print_file_signature() {
162    use pretty_hex::PrettyHex;
163    println!("\n{:?}", MSFZ_FILE_SIGNATURE.hex_dump());
164}
165
166/// The current version of the PDZ specification being developed.
167pub const MSFZ_FILE_VERSION_V0: u64 = 0;
168
169/// Checks whether the header of a file appears to be a valid MSFZ/PDZ file.
170///
171/// This only looks at the signature; it doens't read anything else in the file.
172pub fn is_header_msfz(header: &[u8]) -> bool {
173    header.starts_with(&MSFZ_FILE_SIGNATURE)
174}
175
176fn open_options_shared(options: &mut OpenOptions) -> &mut OpenOptions {
177    #[cfg(windows)]
178    {
179        use std::os::windows::fs::OpenOptionsExt;
180        const FILE_SHARE_READ: u32 = 1;
181        options.share_mode(FILE_SHARE_READ)
182    }
183    #[cfg(not(windows))]
184    {
185        options
186    }
187}
188
189fn open_options_exclusive(options: &mut OpenOptions) -> &mut OpenOptions {
190    #[cfg(windows)]
191    {
192        use std::os::windows::fs::OpenOptionsExt;
193        options.share_mode(0)
194    }
195    #[cfg(not(windows))]
196    {
197        options
198    }
199}