1use super::*;
4use sync_file::RandomAccessFile;
5use tracing::{error, trace, trace_span, warn};
6use zerocopy::IntoBytes;
7
8#[derive(Clone, Debug)]
10pub struct CreateOptions {
11 pub page_size: PageSize,
13
14 pub max_streams: u32,
21}
22
23const DEFAULT_MAX_STREAMS: u32 = 0xfffe;
25
26impl Default for CreateOptions {
27 fn default() -> Self {
28 Self {
29 page_size: DEFAULT_PAGE_SIZE,
30 max_streams: DEFAULT_MAX_STREAMS,
31 }
32 }
33}
34
35impl Msf<RandomAccessFile> {
36 pub fn open(file_name: &Path) -> anyhow::Result<Self> {
38 let file = open_options_shared(File::options().read(true)).open(file_name)?;
39 let random_file = RandomAccessFile::from(file);
40 Self::new_with_access_mode(random_file, AccessMode::Read)
41 }
42
43 pub fn create(file_name: &Path, options: CreateOptions) -> anyhow::Result<Self> {
49 let file = open_options_exclusive(File::options().read(true).write(true).create(true))
50 .open(file_name)?;
51 let random_file = RandomAccessFile::from(file);
52 Self::create_with_file(random_file, options)
53 }
54
55 pub fn modify(file_name: &Path) -> anyhow::Result<Self> {
57 let file =
58 open_options_exclusive(File::options().read(true).write(true)).open(file_name)?;
59 let random_file = RandomAccessFile::from(file);
60 Self::modify_with_file(random_file)
61 }
62}
63
64impl<F: ReadAt> Msf<F> {
65 pub fn open_with_file(file: F) -> anyhow::Result<Self> {
67 Self::new_with_access_mode(file, AccessMode::Read)
68 }
69
70 pub fn create_with_file(file: F, options: CreateOptions) -> anyhow::Result<Self> {
74 Self::create_for(file, options)
75 }
76
77 pub fn modify_with_file(file: F) -> anyhow::Result<Self> {
84 Self::new_with_access_mode(file, AccessMode::ReadWrite)
85 }
86
87 fn new_with_access_mode(file: F, access_mode: AccessMode) -> anyhow::Result<Self> {
94 let _span = trace_span!("Msf::new_with_access_mode").entered();
97
98 const MIN_PAGE_SIZE_USIZE: usize = 1usize << MIN_PAGE_SIZE.exponent();
99
100 let mut page0: [u8; MIN_PAGE_SIZE_USIZE] = [0; MIN_PAGE_SIZE_USIZE];
101
102 file.read_exact_at(&mut page0, 0)?;
104
105 let msf_kind: MsfKind;
106 let page_size: u32;
107 let active_fpm: u32;
108 let num_pages: u32;
109 let stream_dir_size: u32;
110
111 if page0.starts_with(&MSF_BIG_MAGIC) {
112 let (msf_header, _) = MsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
114 page_size = msf_header.page_size.get();
115 active_fpm = msf_header.active_fpm.get();
116 num_pages = msf_header.num_pages.get();
117 stream_dir_size = msf_header.stream_dir_size.get();
118 msf_kind = MsfKind::Big;
119
120 if !matches!(active_fpm, 1 | 2) {
122 bail!("The PDB header is invalid. The active FPM is invalid.");
123 }
124 } else if page0.starts_with(&MSF_SMALL_MAGIC) {
125 let (msf_header, _) = SmallMsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
128 page_size = msf_header.page_size.get();
129 active_fpm = msf_header.active_fpm.get() as u32;
130 num_pages = msf_header.num_pages.get() as u32;
131 stream_dir_size = msf_header.stream_dir_size.get();
132 msf_kind = MsfKind::Small;
133 } else if page0[16..24] == *b"PDB v1.0" {
134 bail!("This file is a Portable PDB, which is not supported.");
135 } else {
136 bail!("PDB file does not have the correct header (magic is wrong).");
137 }
138
139 let Ok(page_size_pow2) = PageSize::try_from(page_size) else {
140 bail!("The PDB header is invalid. The page size ({page_size}) is not a power of 2.",);
141 };
142
143 if num_pages == 0 {
144 bail!("PDB specifies invalid value for num_pages (zero).");
145 }
146
147 let mut stream_sizes: Vec<u32>;
148
149 let stream_dir_num_pages = stream_dir_size.div_round_up(page_size_pow2);
151
152 let mut page_allocator = PageAllocator::new(num_pages as usize, page_size_pow2);
155
156 let mut committed_stream_pages: Vec<Page>;
157 let mut committed_stream_page_starts: Vec<u32>;
158
159 match msf_kind {
160 MsfKind::Big => {
161 if stream_dir_size % 4 != 0 {
179 bail!("MSF Stream Directory has an invalid size; it is not a multiple of 4.");
180 }
181
182 let mut stream_dir: Vec<u32> = vec![0; stream_dir_size as usize / 4];
184
185 let stream_dir_l1_num_pages =
187 num_pages_for_stream_size(4 * stream_dir_num_pages, page_size_pow2) as usize;
188 let Ok((page_map_l1_ptrs, _)) = <[U32<LE>]>::ref_from_prefix_with_elems(
189 &page0[STREAM_DIR_PAGE_MAP_FILE_OFFSET as usize..],
190 stream_dir_l1_num_pages,
191 ) else {
192 bail!("Stream dir size is invalid (exceeds design limits)");
193 };
194
195 let stream_dir_bytes: &mut [u8] = stream_dir.as_mut_bytes();
196 let mut stream_dir_chunks = stream_dir_bytes.chunks_mut(page_size as usize);
197 let mut l1_page: Vec<u8> = vec![0; page_size as usize];
199 'l1_loop: for &page_map_l1_ptr in page_map_l1_ptrs.iter() {
200 let page_map_l1_ptr: u32 = page_map_l1_ptr.get();
201
202 page_allocator.init_mark_stream_dir_page_busy(page_map_l1_ptr)?;
203 if is_special_page_big_msf(page_size_pow2, page_map_l1_ptr) {
204 bail!(
205 "Stream dir contains invalid page number: {page_map_l1_ptr}. \
206 Page points to Page 0 or to an FPM page."
207 );
208 }
209
210 let file_offset = page_to_offset(page_map_l1_ptr, page_size_pow2);
212 file.read_exact_at(l1_page.as_mut_slice(), file_offset)?;
213
214 let l2_page_u32 = <[U32<LE>]>::ref_from_bytes(l1_page.as_slice()).unwrap();
216
217 for &l2_page in l2_page_u32.iter() {
218 let l2_page: u32 = l2_page.get();
219
220 let Some(stream_dir_chunk) = stream_dir_chunks.next() else {
221 break 'l1_loop;
222 };
223
224 page_allocator.init_mark_stream_dir_page_busy(l2_page)?;
225 if is_special_page_big_msf(page_size_pow2, l2_page) {
226 bail!(
227 "Stream dir contains invalid page number: {l2_page}. \
228 Page points to Page 0 or to an FPM page."
229 );
230 }
231
232 let l2_file_offset = page_to_offset(l2_page, page_size_pow2);
233 file.read_exact_at(stream_dir_chunk, l2_file_offset)?;
234 }
235 }
236
237 if stream_dir.is_empty() {
238 bail!("Stream directory is invalid (zero-length)");
239 }
240
241 if !cfg!(target_endian = "little") {
243 for x in stream_dir.iter_mut() {
244 *x = u32::from_le(*x);
245 }
246 }
247
248 let num_streams = stream_dir[0] as usize;
249
250 if num_streams == 0 {
252 bail!("MSF file is invalid, because num_streams = 0.");
253 }
254
255 let Some(stream_sizes_src) = stream_dir.get(1..1 + num_streams) else {
256 bail!("Stream directory is invalid (num_streams is not consistent with size)");
257 };
258 stream_sizes = stream_sizes_src.to_vec();
259
260 let mut stream_pages_iter = &stream_dir[1 + num_streams..];
261
262 committed_stream_pages = Vec::with_capacity(stream_dir.len() - num_streams - 1);
264 committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
265
266 for (stream, &stream_size) in stream_sizes_src.iter().enumerate() {
267 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
268
269 if stream_size != NIL_STREAM_SIZE {
270 let num_stream_pages =
271 num_pages_for_stream_size(stream_size, page_size_pow2) as usize;
272 if num_stream_pages > stream_pages_iter.len() {
273 bail!(
274 "Stream directory is invalid. Stream {stream} has size {stream_size}, \
275 which exceeds the size of the stream directory."
276 );
277 }
278 let (this_stream_pages, next) =
279 stream_pages_iter.split_at(num_stream_pages);
280 stream_pages_iter = next;
281 committed_stream_pages.extend_from_slice(this_stream_pages);
282 }
283 }
284 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
285
286 stream_sizes[0] = 0;
291 }
292
293 MsfKind::Small => {
294 let page_pointers_size_bytes = stream_dir_num_pages * 2;
298
299 let mut pages_u16: Vec<U16<LE>> = vec![U16::new(0); stream_dir_num_pages as usize];
300 if page_pointers_size_bytes + size_of::<SmallMsfHeader>() as u32 > page_size {
301 bail!(
302 "The MSF header is invalid. The page pointers for the stream directory \
303 exceed the range of the first page. \
304 Stream dir size (in bytes): {stream_dir_size} Page size: {page_size}"
305 );
306 }
307
308 file.read_exact_at(pages_u16.as_mut_bytes(), size_of::<SmallMsfHeader>() as u64)?;
309
310 let mut page_iter = pages_u16.iter();
312 let mut old_stream_dir_bytes: Vec<u8> = vec![0; stream_dir_size as usize];
313 for stream_dir_chunk in old_stream_dir_bytes.chunks_mut(page_size as usize) {
314 let page = page_iter.next().unwrap().get() as u32;
317 page_allocator.init_mark_stream_dir_page_busy(page)?;
318 file.read_exact_at(stream_dir_chunk, page_to_offset(page, page_size_pow2))?;
319 }
320
321 let Ok((header, rest)) =
322 OldMsfStreamDirHeader::read_from_prefix(old_stream_dir_bytes.as_slice())
323 else {
324 bail!("Invalid stream directory: too small");
325 };
326
327 let num_streams = header.num_streams.get() as usize;
328 stream_sizes = Vec::with_capacity(num_streams);
329
330 let Ok((entries, mut rest)) =
331 <[OldMsfStreamEntry]>::ref_from_prefix_with_elems(rest, num_streams)
332 else {
333 bail!("Invalid stream directory: too small")
334 };
335
336 for entry in entries.iter() {
337 let stream_size = entry.stream_size.get();
338 stream_sizes.push(stream_size);
339 }
340
341 committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
342 committed_stream_pages = Vec::new(); for &stream_size in stream_sizes.iter() {
345 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
346 if stream_size != NIL_STREAM_SIZE {
347 let num_pages = stream_size.div_round_up(page_size_pow2);
348
349 let Ok((pages, r)) =
350 <[U16<LE>]>::ref_from_prefix_with_elems(rest, num_pages as usize)
351 else {
352 bail!("Invalid stream directory: too small");
353 };
354
355 rest = r; for page in pages.iter() {
357 committed_stream_pages.push(page.get() as u32);
358 }
359 }
360 }
361
362 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
363
364 if !rest.is_empty() {
365 warn!(
366 unused_bytes = rest.len(),
367 "old-style stream dir contained unused bytes"
368 );
369 }
370 }
371 }
372
373 mark_stream_pages_busy(
376 &committed_stream_page_starts,
377 &committed_stream_pages,
378 &mut page_allocator.fpm,
379 page_size_pow2,
380 msf_kind,
381 access_mode,
382 )?;
383
384 page_allocator.check_vector_consistency()?;
387
388 if access_mode == AccessMode::ReadWrite {
398 let fpm_on_disk = read_fpm_big_msf(&file, active_fpm, num_pages, page_size_pow2)?;
399
400 assert_eq!(fpm_on_disk.len(), page_allocator.fpm.len()); if page_allocator.fpm != fpm_on_disk {
403 warn!("FPM computed from Stream Directory is not equal to FPM found on disk.");
404 warn!(
405 "Num pages = {num_pages} (0x{num_pages:x} bytes, bit offset: 0x{:x}:{})",
406 num_pages / 8,
407 num_pages % 8
408 );
409
410 for i in 0..num_pages as usize {
411 if fpm_on_disk[i] != page_allocator.fpm[i] {
412 warn!(
413 " bit 0x{:04x} is different. disk = {}, computed = {}",
414 i, fpm_on_disk[i], page_allocator.fpm[i]
415 );
416 }
417 }
418
419 if access_mode == AccessMode::ReadWrite {
423 bail!(
424 "FPM is corrupted; FPM computed from Stream Directory is not equal to FPM found on disk."
425 );
426 }
427 }
428 }
429
430 page_allocator.assert_invariants();
433
434 match (access_mode, msf_kind) {
435 (AccessMode::ReadWrite, MsfKind::Small) => {
436 bail!(
437 "This PDB file uses the obsolete 'Small MSF' encoding. \
438 This library does not support read-write mode with Small MSF files."
439 );
440 }
441
442 (AccessMode::ReadWrite, MsfKind::Big) => {}
443
444 (AccessMode::Read, _) => {}
445 }
446
447 Ok(Self {
448 file,
449 access_mode,
450 active_fpm,
451 committed_stream_pages,
452 committed_stream_page_starts,
453 stream_sizes,
454 kind: msf_kind,
455 pages: page_allocator,
456 modified_streams: HashMap::new(),
457 max_streams: DEFAULT_MAX_STREAMS,
458 })
459 }
460
461 pub fn create_for(file: F, options: CreateOptions) -> anyhow::Result<Self> {
464 let _span = trace_span!("Msf::create_for").entered();
465
466 assert!(options.page_size >= MIN_PAGE_SIZE);
467 assert!(options.page_size <= MAX_PAGE_SIZE);
468
469 let num_pages: usize = 3;
470
471 let mut this = Self {
472 file,
473 access_mode: AccessMode::ReadWrite,
474 committed_stream_pages: vec![],
475 committed_stream_page_starts: vec![0; 2],
476 kind: MsfKind::Big,
477 pages: PageAllocator::new(num_pages, options.page_size),
478 modified_streams: HashMap::new(),
479 stream_sizes: vec![0],
480 active_fpm: 2,
481 max_streams: options.max_streams,
482 };
483
484 for _ in 1..=4 {
486 let _stream_index = this.nil_stream()?;
487 }
488
489 Ok(this)
490 }
491}
492
493fn read_fpm_big_msf<F: ReadAt>(
498 file: &F,
499 active_fpm: u32,
500 num_pages: u32,
501 page_size: PageSize,
502) -> anyhow::Result<BitVec<u32, Lsb0>> {
503 let _span = trace_span!("read_fpm_big_msf").entered();
504
505 assert!(num_pages > 0);
506
507 let mut free_page_map: BitVec<u32, Lsb0> = BitVec::new();
508 free_page_map.resize(num_pages as usize, false);
509 let fpm_bytes: &mut [u8] = free_page_map.as_raw_mut_slice().as_mut_bytes();
510 let page_size_usize = usize::from(page_size);
511
512 for (interval, fpm_page_bytes) in fpm_bytes.chunks_mut(page_size_usize).enumerate() {
513 let interval_page = interval_to_page(interval as u32, page_size);
514 let file_pos = page_to_offset(interval_page + active_fpm, page_size);
515
516 trace!(
517 interval,
518 interval_page,
519 file_pos,
520 "reading FPM page, interval_page = 0x{interval_page:x}, file_pos = 0x{file_pos:x}"
521 );
522 file.read_exact_at(fpm_page_bytes, file_pos)?;
523 }
524
525 if free_page_map[0] {
531 bail!("FPM is invalid: Page 0 should always be BUSY");
532 }
533
534 let mut interval: u32 = 0;
537 loop {
538 let interval_page = interval_to_page(interval, page_size) as usize;
539 let fpm1_index = interval_page + 1;
540 let fpm2_index = interval_page + 2;
541
542 if fpm1_index < free_page_map.len() {
543 if free_page_map[fpm1_index] {
544 bail!("All FPM pages should be marked BUSY");
545 }
546 }
547
548 if fpm2_index < free_page_map.len() {
549 if free_page_map[fpm2_index] {
550 bail!("All FPM pages should be marked BUSY");
551 }
552 interval += 1;
553 } else {
554 break;
555 }
556 }
557
558 Ok(free_page_map)
559}
560
561fn low_page_mask(page_size: PageSize) -> u32 {
563 (1u32 << page_size.exponent()).wrapping_sub(1u32)
564}
565
566fn is_fpm_page_big_msf(page_size: PageSize, page: u32) -> bool {
568 let page_within_interval = page & low_page_mask(page_size);
569 matches!(page_within_interval, 1 | 2)
570}
571
572fn is_special_page_big_msf(page_size: PageSize, page: u32) -> bool {
574 page == 0 || is_fpm_page_big_msf(page_size, page)
575}
576
577#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
579#[repr(C)]
580struct OldMsfStreamDirHeader {
581 num_streams: U16<LE>,
582 ignored: U16<LE>,
583}
584
585#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
587#[repr(C)]
588struct OldMsfStreamEntry {
589 stream_size: U32<LE>,
590 ignored: U32<LE>,
591}
592
593fn mark_stream_pages_busy(
614 stream_page_starts: &[u32],
615 all_stream_pages: &[u32],
616 fpm: &mut BitVec<u32, Lsb0>,
617 page_size: Pow2,
618 msf_kind: MsfKind,
619 access_mode: AccessMode,
620) -> anyhow::Result<()> {
621 let page_within_interval_mask = low_page_mask(page_size);
622 let strict_mode = access_mode == AccessMode::ReadWrite;
623
624 for (stream, range) in stream_page_starts.windows(2).enumerate().skip(1) {
626 let stream_pages = &all_stream_pages[range[0] as usize..range[1] as usize];
627
628 for (stream_page, &page) in stream_pages.iter().enumerate() {
629 if page == 0 {
630 bail!(
631 "Page cannot be marked busy because it points to the first page of the file. Stream {0} is invalid.",
632 stream
633 );
634 }
635
636 if msf_kind == MsfKind::Big {
643 let page_within_interval = page & page_within_interval_mask;
644 if page_within_interval == 1 || page_within_interval == 2 {
645 warn!(
646 "Page {page} is invalid; it is assigned to a page reserved for the Free Page Map. Stream {0} is invalid.",
647 stream
648 );
649 if strict_mode {
650 bail!(
651 "Page {page} is invalid; it is assigned to a page reserved for the Free Page Map. Stream {0} is invalid.",
652 stream
653 );
654 }
655 continue;
658 }
659 }
660
661 if let Some(mut page_is_free) = fpm.get_mut(page as usize) {
662 if !*page_is_free {
663 error!(
664 page,
665 stream,
666 stream_page,
667 "Page cannot be marked busy, because it is already marked busy. It may be used by more than one stream."
668 );
669 if strict_mode {
670 bail!(
671 "Page {page} cannot be marked busy, because it is already marked busy. It may be used by more than one stream. Stream #{stream}"
672 );
673 } else {
674 continue;
675 }
676 }
677
678 page_is_free.set(false);
679 } else {
680 error!(
681 page,
682 stream, stream_page, "Page is invalid; it is out of range (exceeds num_pages)"
683 );
684 bail!(
685 "Page {} is invalid; it is out of range (exceeds num_pages)",
686 page
687 );
688 }
689 }
690 }
691
692 Ok(())
693}