1use super::*;
2use anyhow::anyhow;
3use pow2::Pow2;
4use std::fs::File;
5use std::io::{Seek, SeekFrom, Write};
6use std::path::Path;
7use tracing::{debug, debug_span, error, trace, trace_span};
8use zerocopy::IntoBytes;
9
10pub const DEFAULT_CHUNK_THRESHOLD: u32 = 0x10_0000; pub const MIN_CHUNK_SIZE: u32 = 0x1000;
15
16pub const MAX_CHUNK_SIZE: u32 = 1 << 30;
18
19pub struct MsfzWriter<F: Write + Seek = File> {
21 pub(crate) file: MsfzWriterFile<F>,
22
23 pub(crate) streams: Vec<Option<Stream>>,
26}
27
28pub(crate) struct MsfzWriterFile<F: Write + Seek> {
29 uncompressed_chunk_size_threshold: u32,
32
33 uncompressed_chunk_data: Vec<u8>,
35
36 compressed_chunk_buffer: Vec<u8>,
39 chunks: Vec<ChunkEntry>,
41
42 chunk_compression_mode: Compression,
44
45 pub(crate) out: F,
47}
48
49impl std::fmt::Debug for FragmentLocation {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 match self {
52 Self::Uncompressed { file_offset } => {
53 write!(f, "uncompressed at 0x{file_offset:06x}")
54 }
55 Self::Compressed {
56 chunk_index,
57 offset_within_chunk,
58 } => write!(f, "chunk {chunk_index} : 0x{offset_within_chunk:04x}"),
59 }
60 }
61}
62
63#[derive(Clone, Debug)]
65pub(crate) struct Fragment {
66 pub(crate) size: u32,
67 pub(crate) location: FragmentLocation,
68}
69
70#[derive(Default)]
71pub(crate) struct Stream {
72 pub(crate) fragments: Vec<Fragment>,
73}
74
75const FRAGMENT_LOCATION_CHUNK_BIT: u32 = 63;
76const FRAGMENT_LOCATION_CHUNK_MASK: u64 = 1 << FRAGMENT_LOCATION_CHUNK_BIT;
77
78#[derive(Clone)]
79pub(crate) enum FragmentLocation {
80 Uncompressed {
81 file_offset: u64,
82 },
83 Compressed {
84 chunk_index: u32,
85 offset_within_chunk: u32,
86 },
87}
88
89#[non_exhaustive]
91pub struct Summary {
92 pub num_chunks: u32,
94 pub num_streams: u32,
96}
97
98impl std::fmt::Display for Summary {
99 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 writeln!(f, "Number of chunks: {}", self.num_chunks)?;
101 writeln!(f, "Number of streams: {}", self.num_streams)?;
102 Ok(())
103 }
104}
105
106impl MsfzWriter<File> {
107 pub fn create(file_name: &Path) -> Result<Self> {
111 let f = open_options_exclusive(File::options().write(true).create(true).truncate(true))
112 .open(file_name)?;
113 Self::new(f)
114 }
115}
116
117impl<F: Write + Seek> MsfzWriter<F> {
118 pub fn new(mut file: F) -> Result<Self> {
121 let _span = trace_span!("MsfzWriter::new").entered();
122
123 file.seek(SeekFrom::Start(0))?;
124
125 let fake_file_header = MsfzFileHeader::new_zeroed();
128 file.write_all(fake_file_header.as_bytes())?;
129
130 let mut streams = Vec::with_capacity(0x40);
133
134 streams.push(Some(Stream {
137 fragments: Vec::new(),
138 }));
139
140 let mut this = Self {
141 streams,
142 file: MsfzWriterFile {
143 uncompressed_chunk_size_threshold: DEFAULT_CHUNK_THRESHOLD,
144 uncompressed_chunk_data: Vec::with_capacity(DEFAULT_CHUNK_THRESHOLD as usize),
145 compressed_chunk_buffer: Vec::new(),
146 out: file,
147 chunks: Vec::new(),
148 chunk_compression_mode: Compression::Zstd,
149 },
150 };
151 this.file.write_align(Pow2::from_exponent(4))?;
152 Ok(this)
153 }
154
155 pub fn set_chunk_compression_mode(&mut self, compression: Compression) {
157 self.file.chunk_compression_mode = compression;
160 }
161
162 pub fn set_uncompressed_chunk_size_threshold(&mut self, value: u32) {
169 self.file.uncompressed_chunk_size_threshold = value.clamp(MIN_CHUNK_SIZE, MAX_CHUNK_SIZE);
170 }
171
172 pub fn uncompressed_chunk_size_threshold(&self) -> u32 {
174 self.file.uncompressed_chunk_size_threshold
175 }
176
177 pub fn reserve_num_streams(&mut self, num_streams: usize) {
186 if num_streams <= self.streams.len() {
187 return;
188 }
189
190 self.streams.resize_with(num_streams, Option::default);
191 }
192
193 pub fn end_chunk(&mut self) -> std::io::Result<()> {
200 self.file.finish_current_chunk()
201 }
202
203 pub fn stream_writer(&mut self, stream: u32) -> std::io::Result<StreamWriter<'_, F>> {
209 assert!((stream as usize) < self.streams.len());
210
211 Ok(StreamWriter {
212 file: &mut self.file,
213 stream: self.streams[stream as usize].get_or_insert_with(Stream::default),
214 chunked_compression_enabled: true,
215 alignment: Pow2::from_exponent(2), })
217 }
218
219 pub fn new_stream_writer(&mut self) -> Result<(u32, StreamWriter<'_, F>)> {
221 let stream = self.streams.len() as u32;
222 self.streams.push(Some(Stream::default()));
223 let w = self.stream_writer(stream)?;
224 Ok((stream, w))
225 }
226
227 pub fn finish(self) -> Result<(Summary, F)> {
233 self.finish_with_options(MsfzFinishOptions::default())
234 }
235
236 pub fn finish_with_options(mut self, options: MsfzFinishOptions) -> Result<(Summary, F)> {
244 let _span = debug_span!("MsfzWriter::finish").entered();
245
246 self.file.finish_current_chunk()?;
247
248 let directory_offset = self.file.write_align(Pow2::from_exponent(4))?;
250
251 let stream_dir_bytes: Vec<u8> = encode_stream_dir(&self.streams);
252 let stream_dir_size_uncompressed = u32::try_from(stream_dir_bytes.len())
253 .map_err(|_| anyhow!("The stream directory is too large."))?;
254 let stream_dir_size_compressed: u32;
255 let stream_dir_compression: u32;
256 if let Some(compression) = options.stream_dir_compression {
257 stream_dir_compression = compression.to_code();
258 let stream_dir_compressed_bytes =
259 crate::compress_utils::compress_to_vec(compression, &stream_dir_bytes)?;
260 stream_dir_size_compressed = stream_dir_compressed_bytes.len() as u32;
261 self.file.out.write_all(&stream_dir_compressed_bytes)?;
262 } else {
263 self.file.out.write_all(&stream_dir_bytes)?;
264 stream_dir_size_compressed = stream_dir_size_uncompressed;
265 stream_dir_compression = COMPRESSION_NONE;
266 }
267
268 let chunk_table_offset = self.file.write_align(Pow2::from_exponent(4))?;
270 let chunk_table_bytes = self.file.chunks.as_bytes();
271 let chunk_table_size = u32::try_from(chunk_table_bytes.len())
272 .map_err(|_| anyhow!("The chunk index is too large."))?;
273 self.file.out.write_all(chunk_table_bytes)?;
274
275 let file_header = MsfzFileHeader {
277 signature: MSFZ_FILE_SIGNATURE,
278 version: U64::new(MSFZ_FILE_VERSION_V0),
279 num_streams: U32::new(self.streams.len() as u32),
280 stream_dir_compression: U32::new(stream_dir_compression),
281 stream_dir_offset: U64::new(directory_offset),
282 stream_dir_size_compressed: U32::new(stream_dir_size_compressed),
283 stream_dir_size_uncompressed: U32::new(stream_dir_size_uncompressed),
284 num_chunks: U32::new(self.file.chunks.len() as u32),
285 chunk_table_size: U32::new(chunk_table_size),
286 chunk_table_offset: U64::new(chunk_table_offset),
287 };
288 self.file.out.seek(SeekFrom::Start(0))?;
289 self.file.out.write_all(file_header.as_bytes())?;
290
291 if options.min_file_size != 0 {
292 let file_length = self.file.out.seek(SeekFrom::End(0))?;
293 if file_length < options.min_file_size {
294 debug!(
295 file_length,
296 options.min_file_size, "Extending file to meet minimum length requirement"
297 );
298 self.file
301 .out
302 .seek(SeekFrom::Start(options.min_file_size - 1))?;
303 self.file.out.write_all(&[0u8])?;
304 }
305 }
306
307 let summary = Summary {
308 num_chunks: self.file.chunks.len() as u32,
309 num_streams: self.streams.len() as u32,
310 };
311
312 Ok((summary, self.file.out))
313 }
314}
315
316#[derive(Copy, Clone, Eq, PartialEq, Debug)]
318struct ChunkAndOffset {
319 chunk: u32,
320 offset: u32,
321}
322
323impl<F: Write + Seek> MsfzWriterFile<F> {
324 fn write_to_chunks(&mut self, data: &[u8]) -> std::io::Result<ChunkAndOffset> {
340 let _span = debug_span!("write_to_chunks").entered();
341
342 if data.len() + self.uncompressed_chunk_data.len()
343 >= self.uncompressed_chunk_size_threshold as usize
344 {
345 self.finish_current_chunk()?;
346 }
347
348 let chunk = self.chunks.len() as u32;
353 let offset_within_chunk = self.uncompressed_chunk_data.len();
354
355 self.uncompressed_chunk_data.extend_from_slice(data);
356 Ok(ChunkAndOffset {
357 chunk,
358 offset: offset_within_chunk as u32,
359 })
360 }
361
362 fn bytes_available_in_chunk_buffer(&self) -> usize {
363 (self.uncompressed_chunk_size_threshold as usize)
364 .saturating_sub(self.uncompressed_chunk_data.len())
365 }
366
367 #[inline(never)]
368 fn finish_current_chunk(&mut self) -> std::io::Result<()> {
369 let _span = debug_span!("finish_current_chunk").entered();
370
371 if self.uncompressed_chunk_data.is_empty() {
372 return Ok(());
373 }
374
375 let _span = trace_span!("MsfzWriter::finish_current_chunk").entered();
376
377 {
378 let _span = trace_span!("compress chunk").entered();
379 self.compressed_chunk_buffer.clear();
380 crate::compress_utils::compress_to_vec_mut(
381 self.chunk_compression_mode,
382 &self.uncompressed_chunk_data,
383 &mut self.compressed_chunk_buffer,
384 )?;
385 }
386
387 let file_pos;
388 {
389 let _span = trace_span!("write to disk").entered();
390 file_pos = self.out.stream_position()?;
391 self.out.write_all(&self.compressed_chunk_buffer)?;
392 }
393
394 trace!(
395 file_pos,
396 compressed_size = self.compressed_chunk_buffer.len(),
397 uncompressed_size = self.uncompressed_chunk_data.len()
398 );
399
400 self.chunks.push(ChunkEntry {
401 compressed_size: U32::new(self.compressed_chunk_buffer.len() as u32),
402 uncompressed_size: U32::new(self.uncompressed_chunk_data.len() as u32),
403 file_offset: U64::new(file_pos),
404 compression: U32::new(self.chunk_compression_mode.to_code()),
405 });
406
407 self.uncompressed_chunk_data.clear();
408 self.compressed_chunk_buffer.clear();
409
410 Ok(())
411 }
412
413 fn write_align(&mut self, alignment: Pow2) -> std::io::Result<u64> {
416 let pos = self.out.stream_position()?;
417 if alignment.is_aligned(pos) {
418 return Ok(pos);
419 }
420
421 let Some(aligned_pos) = alignment.align_up(pos) else {
422 return Err(std::io::ErrorKind::InvalidInput.into());
423 };
424
425 self.out.seek(SeekFrom::Start(aligned_pos))?;
426 Ok(aligned_pos)
427 }
428}
429
430pub struct StreamWriter<'a, F: Write + Seek> {
452 file: &'a mut MsfzWriterFile<F>,
453 stream: &'a mut Stream,
454 alignment: Pow2,
455 chunked_compression_enabled: bool,
456}
457
458impl<'a, F: Write + Seek> StreamWriter<'a, F> {
459 pub fn end_chunk(&mut self) -> std::io::Result<()> {
464 self.file.finish_current_chunk()
465 }
466
467 pub fn bytes_available_in_chunk_buffer(&self) -> usize {
470 self.file.bytes_available_in_chunk_buffer()
471 }
472
473 pub fn set_compression_enabled(&mut self, value: bool) {
482 self.chunked_compression_enabled = value;
483 }
484
485 pub fn set_alignment(&mut self, value: Pow2) {
490 self.alignment = value;
491 }
492}
493
494impl<'a, F: Write + Seek> Write for StreamWriter<'a, F> {
495 fn flush(&mut self) -> std::io::Result<()> {
496 Ok(())
497 }
498
499 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
500 let _span = trace_span!("StreamWriter::write").entered();
501 trace!(buf_len = buf.len());
502
503 if buf.is_empty() {
504 return Ok(0);
505 }
506
507 let old_stream_size: u32 = self.stream.fragments.iter().map(|f| f.size).sum();
508 let is_first_write = old_stream_size == 0;
509 let max_new_bytes = NIL_STREAM_SIZE - old_stream_size;
510
511 let buf_len = match u32::try_from(buf.len()) {
515 Ok(buf_len) if buf_len < max_new_bytes => buf_len,
516 _ => {
517 return Err(std::io::Error::new(
518 std::io::ErrorKind::InvalidInput,
519 "The input is too large for an MSFZ stream.",
520 ));
521 }
522 };
523
524 if self.chunked_compression_enabled {
525 let chunk_at = self.file.write_to_chunks(buf)?;
526
527 add_fragment_compressed(
528 &mut self.stream.fragments,
529 buf_len,
530 chunk_at.chunk,
531 chunk_at.offset,
532 );
533 } else {
534 let fragment_file_offset: u64 = if is_first_write {
535 self.file.write_align(self.alignment)?
536 } else {
537 self.file.out.stream_position()?
538 };
539
540 if fragment_file_offset > MAX_UNCOMPRESSED_FILE_OFFSET {
542 error!("The uncompressed file fragment ");
543 return Err(std::io::ErrorKind::FileTooLarge.into());
544 };
545
546 self.file.out.write_all(buf)?;
547
548 add_fragment_uncompressed(&mut self.stream.fragments, buf_len, fragment_file_offset);
549 }
550
551 Ok(buf.len())
552 }
553}
554
555fn add_fragment_compressed(
558 fragments: &mut Vec<Fragment>,
559 new_fragment_size: u32,
560 new_chunk: u32,
561 new_offset_within_chunk: u32,
562) {
563 debug!(
564 new_fragment_size,
565 new_chunk, new_offset_within_chunk, "add_fragment_compressed"
566 );
567
568 match fragments.last_mut() {
570 Some(Fragment {
571 size: last_fragment_size,
572 location:
573 FragmentLocation::Compressed {
574 chunk_index: last_chunk,
575 offset_within_chunk: last_offset_within_chunk,
576 },
577 }) if *last_chunk == new_chunk
578 && *last_offset_within_chunk + new_fragment_size == new_offset_within_chunk =>
579 {
580 *last_fragment_size += new_fragment_size;
581 }
582
583 _ => {
584 fragments.push(Fragment {
586 size: new_fragment_size,
587 location: FragmentLocation::Compressed {
588 chunk_index: new_chunk,
589 offset_within_chunk: new_offset_within_chunk,
590 },
591 });
592 }
593 }
594}
595
596fn add_fragment_uncompressed(
599 fragments: &mut Vec<Fragment>,
600 new_fragment_size: u32,
601 new_file_offset: u64,
602) {
603 debug!(
604 new_fragment_size,
605 new_file_offset, "add_fragment_uncompressed"
606 );
607
608 match fragments.last_mut() {
609 Some(Fragment {
610 size: last_fragment_size,
611 location:
612 FragmentLocation::Uncompressed {
613 file_offset: last_fragment_file_offset,
614 },
615 }) if *last_fragment_file_offset + new_fragment_size as u64 == new_file_offset => {
616 *last_fragment_size += new_fragment_size;
617 }
618
619 _ => {
620 fragments.push(Fragment {
622 size: new_fragment_size,
623 location: FragmentLocation::Uncompressed {
624 file_offset: new_file_offset,
625 },
626 });
627 }
628 }
629}
630
631pub(crate) fn encode_stream_dir(streams: &[Option<Stream>]) -> Vec<u8> {
633 let _span = trace_span!("encode_stream_dir").entered();
634
635 let mut stream_dir_encoded: Vec<u8> = Vec::new();
636 let mut enc = Encoder {
637 vec: &mut stream_dir_encoded,
638 };
639
640 for stream_opt in streams.iter() {
641 if let Some(stream) = stream_opt {
642 for fragment in stream.fragments.iter() {
643 assert_ne!(fragment.size, 0);
644 assert_ne!(fragment.size, NIL_STREAM_SIZE);
645 enc.u32(fragment.size);
646
647 let location: u64 = match fragment.location {
648 FragmentLocation::Compressed {
649 chunk_index,
650 offset_within_chunk,
651 } => {
652 ((chunk_index as u64) << 32)
653 | (offset_within_chunk as u64)
654 | FRAGMENT_LOCATION_CHUNK_MASK
655 }
656 FragmentLocation::Uncompressed { file_offset } => file_offset,
657 };
658
659 enc.u64(location)
660 }
661
662 enc.u32(0);
664 } else {
665 enc.u32(NIL_STREAM_SIZE);
668 }
669 }
670
671 stream_dir_encoded.as_bytes().to_vec()
672}
673
674struct Encoder<'a> {
675 vec: &'a mut Vec<u8>,
676}
677
678impl<'a> Encoder<'a> {
679 fn u32(&mut self, value: u32) {
680 self.vec.extend_from_slice(&value.to_le_bytes());
681 }
682 fn u64(&mut self, value: u64) {
683 self.vec.extend_from_slice(&value.to_le_bytes());
684 }
685}
686
687#[derive(Clone, Debug, Default)]
689pub struct MsfzFinishOptions {
690 pub min_file_size: u64,
693
694 pub stream_dir_compression: Option<Compression>,
696}
697
698pub const MIN_FILE_SIZE_16K: u64 = 0x4000;