Skip to main content

ms_pdb_msf/
open.rs

1//! Code for opening or creating MSF files.
2
3use super::*;
4use sync_file::RandomAccessFile;
5use tracing::{error, trace, trace_span, warn};
6use zerocopy::IntoBytes;
7
8/// Options for creating a new PDB/MSF file.
9#[derive(Clone, Debug)]
10pub struct CreateOptions {
11    /// The page size to use. This must be in the range [`MIN_PAGE_SIZE..=MAX_PAGE_SIZE`].
12    pub page_size: PageSize,
13
14    /// The maximum number of streams that we will allow to be created using `new_stream` or
15    /// `nil_stream`. The default value is 0xfffe, which prevents overflowing the 16-bit stream
16    /// indexes that are used in PDB (or confusing them with the "nil" stream index).
17    ///
18    /// Applications may increase this value beyond the default, but this will produce MSF files
19    /// that are not usable by most PDB tools.
20    pub max_streams: u32,
21}
22
23/// The maximum number of streams that PDB can tolerate.
24const DEFAULT_MAX_STREAMS: u32 = 0xfffe;
25
26impl Default for CreateOptions {
27    fn default() -> Self {
28        Self {
29            page_size: DEFAULT_PAGE_SIZE,
30            max_streams: DEFAULT_MAX_STREAMS,
31        }
32    }
33}
34
35impl Msf<RandomAccessFile> {
36    /// Opens an MSF file for read access, given a file name.
37    pub fn open(file_name: &Path) -> anyhow::Result<Self> {
38        let file = open_options_shared(File::options().read(true)).open(file_name)?;
39        let random_file = RandomAccessFile::from(file);
40        Self::new_with_access_mode(random_file, AccessMode::Read)
41    }
42
43    /// Creates a new MSF file on disk (**truncating any existing file!**) and creates a new
44    /// [`Msf`] object in-memory object with read/write access.
45    ///
46    /// This function does not write anything to disk until stream data is written or
47    /// [`Self::commit`] is called.
48    pub fn create(file_name: &Path, options: CreateOptions) -> anyhow::Result<Self> {
49        let file = open_options_exclusive(File::options().read(true).write(true).create(true))
50            .open(file_name)?;
51        let random_file = RandomAccessFile::from(file);
52        Self::create_with_file(random_file, options)
53    }
54
55    /// Opens an existing MSF file for read/write access, given a file name.
56    pub fn modify(file_name: &Path) -> anyhow::Result<Self> {
57        let file =
58            open_options_exclusive(File::options().read(true).write(true)).open(file_name)?;
59        let random_file = RandomAccessFile::from(file);
60        Self::modify_with_file(random_file)
61    }
62}
63
64impl<F: ReadAt> Msf<F> {
65    /// Opens an MSF file for read access, given a [`File`] that has already been opened.
66    pub fn open_with_file(file: F) -> anyhow::Result<Self> {
67        Self::new_with_access_mode(file, AccessMode::Read)
68    }
69
70    /// Creates a new MSF file, given a file handle that has already been opened.
71    ///
72    /// **This function destroys the contents of the existing file.**
73    pub fn create_with_file(file: F, options: CreateOptions) -> anyhow::Result<Self> {
74        Self::create_for(file, options)
75    }
76
77    /// Opens an existing MSF file for read/write access, given an [`File`] that has already
78    /// been opened.
79    ///
80    /// The `file` handle will be used for absolute reads and writes. The caller should never use
81    /// this same file handle for reads (and especially not for writes) while also using [`Msf`]
82    /// because the operating system's read/write file position may be updated by [`Msf`].
83    pub fn modify_with_file(file: F) -> anyhow::Result<Self> {
84        Self::new_with_access_mode(file, AccessMode::ReadWrite)
85    }
86
87    /// Reads the header of a PDB file and provides access to the streams contained within the
88    /// PDB file.
89    ///
90    /// This function reads the MSF File Header, which is the header for the entire file.
91    /// It also reads the stream directory, so it knows how to find each of the streams
92    /// and the pages of the streams.
93    fn new_with_access_mode(file: F, access_mode: AccessMode) -> anyhow::Result<Self> {
94        // Read the MSF File Header.
95
96        let _span = trace_span!("Msf::new_with_access_mode").entered();
97
98        const MIN_PAGE_SIZE_USIZE: usize = 1usize << MIN_PAGE_SIZE.exponent();
99
100        let mut page0: [u8; MIN_PAGE_SIZE_USIZE] = [0; MIN_PAGE_SIZE_USIZE];
101
102        // If this read fails, then the file is too small to be a valid PDB of any kind.
103        file.read_exact_at(&mut page0, 0)?;
104
105        let msf_kind: MsfKind;
106        let page_size: u32;
107        let active_fpm: u32;
108        let num_pages: u32;
109        let stream_dir_size: u32;
110
111        if page0.starts_with(&MSF_BIG_MAGIC) {
112            // unwrap() cannot fail because page0 has a fixed size that is larger than MsfHeader
113            let (msf_header, _) = MsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
114            page_size = msf_header.page_size.get();
115            active_fpm = msf_header.active_fpm.get();
116            num_pages = msf_header.num_pages.get();
117            stream_dir_size = msf_header.stream_dir_size.get();
118            msf_kind = MsfKind::Big;
119
120            // The active FPM can only be 1 or 2.
121            if !matches!(active_fpm, 1 | 2) {
122                bail!("The PDB header is invalid.  The active FPM is invalid.");
123            }
124        } else if page0.starts_with(&MSF_SMALL_MAGIC) {
125            // Found an "old" MSF header.
126            // unwrap() cannot fail because page0 has a fixed size that is larger than SmallMsfHeader
127            let (msf_header, _) = SmallMsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
128            page_size = msf_header.page_size.get();
129            active_fpm = msf_header.active_fpm.get() as u32;
130            num_pages = msf_header.num_pages.get() as u32;
131            stream_dir_size = msf_header.stream_dir_size.get();
132            msf_kind = MsfKind::Small;
133        } else if page0[16..24] == *b"PDB v1.0" {
134            bail!("This file is a Portable PDB, which is not supported.");
135        } else {
136            bail!("PDB file does not have the correct header (magic is wrong).");
137        }
138
139        let Ok(page_size_pow2) = PageSize::try_from(page_size) else {
140            bail!("The PDB header is invalid. The page size ({page_size}) is not a power of 2.",);
141        };
142
143        if num_pages == 0 {
144            bail!("PDB specifies invalid value for num_pages (zero).");
145        }
146
147        let mut stream_sizes: Vec<u32>;
148
149        // The number of pages in the stream directory.
150        let stream_dir_num_pages = stream_dir_size.div_round_up(page_size_pow2);
151
152        // Create the PageAllocator. This initializes the fpm vector to "everything is free"
153        // and then sets Page 0 and the FPM pages as "free". Nothing is marked as "freed".
154        let mut page_allocator = PageAllocator::new(num_pages as usize, page_size_pow2);
155
156        let mut committed_stream_pages: Vec<Page>;
157        let mut committed_stream_page_starts: Vec<u32>;
158
159        match msf_kind {
160            MsfKind::Big => {
161                // "Big MSF" uses a 3-level hierarchy for the Stream Directory:
162                //
163                // stream_dir_map        <-- contains u32 pages to ↓
164                // stream_dir_pages      <-- contains u32 pages to ↓
165                // stream_dir_bytes      <-- bottom level, stored in pages
166                //
167                // stream_dir_map is an array of u32 page pointers. It is stored directly in
168                // page 0, immediately after MsfHeader. These pointers point to pages that contain
169                // the stream_dir_pages, which is the next level down.
170                // The number of pages allocated to stream_dir_map = ceil(stream_dir_pages.len() * 4 / page_size).
171                // The number of bytes used within stream_dir_map = stream_dir_pages.len() * 4.
172                //
173                // stream_dir_pages is a set of pages. When concatenated, they contain the page
174                // pointers that point to the stream directory bytes.
175                // The number of pages in stream_dir_pages = ceil(stream_dir_size / page_size).
176                // The number of bytes used within stream_dir_pages is stream_dir_pages * 4.
177
178                if stream_dir_size % 4 != 0 {
179                    bail!("MSF Stream Directory has an invalid size; it is not a multiple of 4.");
180                }
181
182                // We are going to read the stream directory into this vector.
183                let mut stream_dir: Vec<u32> = vec![0; stream_dir_size as usize / 4];
184
185                // Read the page map for the stream directory.
186                let stream_dir_l1_num_pages =
187                    num_pages_for_stream_size(4 * stream_dir_num_pages, page_size_pow2) as usize;
188                let Ok((page_map_l1_ptrs, _)) = <[U32<LE>]>::ref_from_prefix_with_elems(
189                    &page0[STREAM_DIR_PAGE_MAP_FILE_OFFSET as usize..],
190                    stream_dir_l1_num_pages,
191                ) else {
192                    bail!("Stream dir size is invalid (exceeds design limits)");
193                };
194
195                let stream_dir_bytes: &mut [u8] = stream_dir.as_mut_bytes();
196                let mut stream_dir_chunks = stream_dir_bytes.chunks_mut(page_size as usize);
197                // Now read the stream pages for the stream dir.
198                let mut l1_page: Vec<u8> = vec![0; page_size as usize];
199                'l1_loop: for &page_map_l1_ptr in page_map_l1_ptrs.iter() {
200                    let page_map_l1_ptr: u32 = page_map_l1_ptr.get();
201
202                    page_allocator.init_mark_stream_dir_page_busy(page_map_l1_ptr)?;
203                    if is_special_page_big_msf(page_size_pow2, page_map_l1_ptr) {
204                        bail!(
205                            "Stream dir contains invalid page number: {page_map_l1_ptr}. \
206                             Page points to Page 0 or to an FPM page."
207                        );
208                    }
209
210                    // Read the page pointers.
211                    let file_offset = page_to_offset(page_map_l1_ptr, page_size_pow2);
212                    file.read_exact_at(l1_page.as_mut_slice(), file_offset)?;
213
214                    // Now read the individual pages, as long as we have more.
215                    let l2_page_u32 = <[U32<LE>]>::ref_from_bytes(l1_page.as_slice()).unwrap();
216
217                    for &l2_page in l2_page_u32.iter() {
218                        let l2_page: u32 = l2_page.get();
219
220                        let Some(stream_dir_chunk) = stream_dir_chunks.next() else {
221                            break 'l1_loop;
222                        };
223
224                        page_allocator.init_mark_stream_dir_page_busy(l2_page)?;
225                        if is_special_page_big_msf(page_size_pow2, l2_page) {
226                            bail!(
227                                "Stream dir contains invalid page number: {l2_page}. \
228                                 Page points to Page 0 or to an FPM page."
229                            );
230                        }
231
232                        let l2_file_offset = page_to_offset(l2_page, page_size_pow2);
233                        file.read_exact_at(stream_dir_chunk, l2_file_offset)?;
234                    }
235                }
236
237                if stream_dir.is_empty() {
238                    bail!("Stream directory is invalid (zero-length)");
239                }
240
241                // Bulk-convert the stream directory to host endian, if necessary.
242                if !cfg!(target_endian = "little") {
243                    for x in stream_dir.iter_mut() {
244                        *x = u32::from_le(*x);
245                    }
246                }
247
248                let num_streams = stream_dir[0] as usize;
249
250                // Stream 0 is special and must exist.
251                if num_streams == 0 {
252                    bail!("MSF file is invalid, because num_streams = 0.");
253                }
254
255                let Some(stream_sizes_src) = stream_dir.get(1..1 + num_streams) else {
256                    bail!("Stream directory is invalid (num_streams is not consistent with size)");
257                };
258                stream_sizes = stream_sizes_src.to_vec();
259
260                let mut stream_pages_iter = &stream_dir[1 + num_streams..];
261
262                // Build committed_stream_pages and committed_stream_page_starts.
263                committed_stream_pages = Vec::with_capacity(stream_dir.len() - num_streams - 1);
264                committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
265
266                for (stream, &stream_size) in stream_sizes_src.iter().enumerate() {
267                    committed_stream_page_starts.push(committed_stream_pages.len() as u32);
268
269                    if stream_size != NIL_STREAM_SIZE {
270                        let num_stream_pages =
271                            num_pages_for_stream_size(stream_size, page_size_pow2) as usize;
272                        if num_stream_pages > stream_pages_iter.len() {
273                            bail!(
274                                "Stream directory is invalid.  Stream {stream} has size {stream_size}, \
275                                 which exceeds the size of the stream directory."
276                            );
277                        }
278                        let (this_stream_pages, next) =
279                            stream_pages_iter.split_at(num_stream_pages);
280                        stream_pages_iter = next;
281                        committed_stream_pages.extend_from_slice(this_stream_pages);
282                    }
283                }
284                committed_stream_page_starts.push(committed_stream_pages.len() as u32);
285
286                // Now that we have finished reading the stream directory, we set the length
287                // of stream 0 (the "Old Stream Directory") to 0. Nothing should ever read Stream 0.
288                // If we modify a PDB/MSF file, then we want to write no pages at all for Stream 0.
289                // Doing this here is the most convenient way to handle this.
290                stream_sizes[0] = 0;
291            }
292
293            MsfKind::Small => {
294                // Before Big MSF files, the stream directory was stored in a set of pages.
295                // These pages were listed directly within page 0. Keep in mind that page numbers
296                // are 16-bit in old MSF files.
297                let page_pointers_size_bytes = stream_dir_num_pages * 2;
298
299                let mut pages_u16: Vec<U16<LE>> = vec![U16::new(0); stream_dir_num_pages as usize];
300                if page_pointers_size_bytes + size_of::<SmallMsfHeader>() as u32 > page_size {
301                    bail!(
302                        "The MSF header is invalid. The page pointers for the stream directory \
303                         exceed the range of the first page. \
304                         Stream dir size (in bytes): {stream_dir_size}  Page size: {page_size}"
305                    );
306                }
307
308                file.read_exact_at(pages_u16.as_mut_bytes(), size_of::<SmallMsfHeader>() as u64)?;
309
310                // Read the pages of the stream directory. Be careful with the last page.
311                let mut page_iter = pages_u16.iter();
312                let mut old_stream_dir_bytes: Vec<u8> = vec![0; stream_dir_size as usize];
313                for stream_dir_chunk in old_stream_dir_bytes.chunks_mut(page_size as usize) {
314                    // This unwrap should succeed because we computed the length of pages_u16
315                    // based on the byte size of the stream directory.
316                    let page = page_iter.next().unwrap().get() as u32;
317                    page_allocator.init_mark_stream_dir_page_busy(page)?;
318                    file.read_exact_at(stream_dir_chunk, page_to_offset(page, page_size_pow2))?;
319                }
320
321                let Ok((header, rest)) =
322                    OldMsfStreamDirHeader::read_from_prefix(old_stream_dir_bytes.as_slice())
323                else {
324                    bail!("Invalid stream directory: too small");
325                };
326
327                let num_streams = header.num_streams.get() as usize;
328                stream_sizes = Vec::with_capacity(num_streams);
329
330                let Ok((entries, mut rest)) =
331                    <[OldMsfStreamEntry]>::ref_from_prefix_with_elems(rest, num_streams)
332                else {
333                    bail!("Invalid stream directory: too small")
334                };
335
336                for entry in entries.iter() {
337                    let stream_size = entry.stream_size.get();
338                    stream_sizes.push(stream_size);
339                }
340
341                committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
342                committed_stream_pages = Vec::new(); // TODO: precompute capacity
343
344                for &stream_size in stream_sizes.iter() {
345                    committed_stream_page_starts.push(committed_stream_pages.len() as u32);
346                    if stream_size != NIL_STREAM_SIZE {
347                        let num_pages = stream_size.div_round_up(page_size_pow2);
348
349                        let Ok((pages, r)) =
350                            <[U16<LE>]>::ref_from_prefix_with_elems(rest, num_pages as usize)
351                        else {
352                            bail!("Invalid stream directory: too small");
353                        };
354
355                        rest = r; // update iterator state
356                        for page in pages.iter() {
357                            committed_stream_pages.push(page.get() as u32);
358                        }
359                    }
360                }
361
362                committed_stream_page_starts.push(committed_stream_pages.len() as u32);
363
364                if !rest.is_empty() {
365                    warn!(
366                        unused_bytes = rest.len(),
367                        "old-style stream dir contained unused bytes"
368                    );
369                }
370            }
371        }
372
373        // Mark the pages in all streams (except for stream 0) as busy. This will also detect
374        // page numbers that are invalid (0 or FPM).
375        mark_stream_pages_busy(
376            &committed_stream_page_starts,
377            &committed_stream_pages,
378            &mut page_allocator.fpm,
379            page_size_pow2,
380            msf_kind,
381            access_mode,
382        )?;
383
384        // We have finished building the in-memory FPM, including both the fpm and fpm_freed
385        // vectors. We expect that every page is either FREE, BUSY, or DELETED. Check that now.
386        page_allocator.check_vector_consistency()?;
387
388        // Read the FPM from disk and compare it to the FPM that we just constructed. They should
389        // be identical.
390        // TODO: implement for small MSF
391
392        // If we are opening this MSF file for read-write access, then check that it meets our
393        // consistency requirements. Some tools generate PDBs that violate some consistency
394        // requirements, mainly the bits set in the FPM. We want to avoid checking checking
395        // requirements so that we can read data from those tools, even if they don't set the FPM
396        // bits correctly.
397        if access_mode == AccessMode::ReadWrite {
398            let fpm_on_disk = read_fpm_big_msf(&file, active_fpm, num_pages, page_size_pow2)?;
399
400            assert_eq!(fpm_on_disk.len(), page_allocator.fpm.len()); // because num_pages defines both
401
402            if page_allocator.fpm != fpm_on_disk {
403                warn!("FPM computed from Stream Directory is not equal to FPM found on disk.");
404                warn!(
405                    "Num pages = {num_pages} (0x{num_pages:x} bytes, bit offset: 0x{:x}:{})",
406                    num_pages / 8,
407                    num_pages % 8
408                );
409
410                for i in 0..num_pages as usize {
411                    if fpm_on_disk[i] != page_allocator.fpm[i] {
412                        warn!(
413                            "  bit 0x{:04x} is different. disk = {}, computed = {}",
414                            i, fpm_on_disk[i], page_allocator.fpm[i]
415                        );
416                    }
417                }
418
419                // Clang's PDB writer sometimes places stream pages at illegal locations,
420                // such as in the pages reserved for the FPM. We tolerate this for reading
421                // but not for writing.
422                if access_mode == AccessMode::ReadWrite {
423                    bail!(
424                        "FPM is corrupted; FPM computed from Stream Directory is not equal to FPM found on disk."
425                    );
426                }
427            }
428        }
429
430        // We have finished checking all the data that we have read from disk.
431        // Now check the consistency of our in-memory data structures.
432        page_allocator.assert_invariants();
433
434        match (access_mode, msf_kind) {
435            (AccessMode::ReadWrite, MsfKind::Small) => {
436                bail!(
437                    "This PDB file uses the obsolete 'Small MSF' encoding. \
438                     This library does not support read-write mode with Small MSF files."
439                );
440            }
441
442            (AccessMode::ReadWrite, MsfKind::Big) => {}
443
444            (AccessMode::Read, _) => {}
445        }
446
447        Ok(Self {
448            file,
449            access_mode,
450            active_fpm,
451            committed_stream_pages,
452            committed_stream_page_starts,
453            stream_sizes,
454            kind: msf_kind,
455            pages: page_allocator,
456            modified_streams: HashMap::new(),
457            max_streams: DEFAULT_MAX_STREAMS,
458        })
459    }
460
461    /// Creates a new MSF object in memory. The on-disk file is not modified until `commit()` is
462    /// called.
463    pub fn create_for(file: F, options: CreateOptions) -> anyhow::Result<Self> {
464        let _span = trace_span!("Msf::create_for").entered();
465
466        assert!(options.page_size >= MIN_PAGE_SIZE);
467        assert!(options.page_size <= MAX_PAGE_SIZE);
468
469        let num_pages: usize = 3;
470
471        let mut this = Self {
472            file,
473            access_mode: AccessMode::ReadWrite,
474            committed_stream_pages: vec![],
475            committed_stream_page_starts: vec![0; 2],
476            kind: MsfKind::Big,
477            pages: PageAllocator::new(num_pages, options.page_size),
478            modified_streams: HashMap::new(),
479            stream_sizes: vec![0],
480            active_fpm: 2,
481            max_streams: options.max_streams,
482        };
483
484        // Set up the 4 fixed-index streams. They are created as nil streams.
485        for _ in 1..=4 {
486            let _stream_index = this.nil_stream()?;
487        }
488
489        Ok(this)
490    }
491}
492
493/// Read each page of the FPM. Each page of the FPM is stored in a different interval;
494/// they are not contiguous.
495///
496/// num_pages is the total number of pages in the FPM.
497fn read_fpm_big_msf<F: ReadAt>(
498    file: &F,
499    active_fpm: u32,
500    num_pages: u32,
501    page_size: PageSize,
502) -> anyhow::Result<BitVec<u32, Lsb0>> {
503    let _span = trace_span!("read_fpm_big_msf").entered();
504
505    assert!(num_pages > 0);
506
507    let mut free_page_map: BitVec<u32, Lsb0> = BitVec::new();
508    free_page_map.resize(num_pages as usize, false);
509    let fpm_bytes: &mut [u8] = free_page_map.as_raw_mut_slice().as_mut_bytes();
510    let page_size_usize = usize::from(page_size);
511
512    for (interval, fpm_page_bytes) in fpm_bytes.chunks_mut(page_size_usize).enumerate() {
513        let interval_page = interval_to_page(interval as u32, page_size);
514        let file_pos = page_to_offset(interval_page + active_fpm, page_size);
515
516        trace!(
517            interval,
518            interval_page,
519            file_pos,
520            "reading FPM page, interval_page = 0x{interval_page:x}, file_pos = 0x{file_pos:x}"
521        );
522        file.read_exact_at(fpm_page_bytes, file_pos)?;
523    }
524
525    // Check our invariants for the FPM. If these checks fail then we return Err because we
526    // are validating data that we read from disk. After these checks succeed, we switch to using
527    // assert_invariants(), which uses assert!(). That verifies that we preserve our invariants.
528
529    // Check that page 0, which stores the MSF File Header, is busy.
530    if free_page_map[0] {
531        bail!("FPM is invalid: Page 0 should always be BUSY");
532    }
533
534    // Check that the pages assigned to the FPM are marked "busy" in all intervals.
535
536    let mut interval: u32 = 0;
537    loop {
538        let interval_page = interval_to_page(interval, page_size) as usize;
539        let fpm1_index = interval_page + 1;
540        let fpm2_index = interval_page + 2;
541
542        if fpm1_index < free_page_map.len() {
543            if free_page_map[fpm1_index] {
544                bail!("All FPM pages should be marked BUSY");
545            }
546        }
547
548        if fpm2_index < free_page_map.len() {
549            if free_page_map[fpm2_index] {
550                bail!("All FPM pages should be marked BUSY");
551            }
552            interval += 1;
553        } else {
554            break;
555        }
556    }
557
558    Ok(free_page_map)
559}
560
561/// Computes the low-bits-on mask for the page mask.
562fn low_page_mask(page_size: PageSize) -> u32 {
563    (1u32 << page_size.exponent()).wrapping_sub(1u32)
564}
565
566/// Tests whether `page` contributes to either FPM1 or FPM2.
567fn is_fpm_page_big_msf(page_size: PageSize, page: u32) -> bool {
568    let page_within_interval = page & low_page_mask(page_size);
569    matches!(page_within_interval, 1 | 2)
570}
571
572/// Tests whether `page` is one of the special pages (Page 0, FPM1, or FPM2)
573fn is_special_page_big_msf(page_size: PageSize, page: u32) -> bool {
574    page == 0 || is_fpm_page_big_msf(page_size, page)
575}
576
577/// Describes the "old" MSF Stream Directory Header.
578#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
579#[repr(C)]
580struct OldMsfStreamDirHeader {
581    num_streams: U16<LE>,
582    ignored: U16<LE>,
583}
584
585/// An entry in the "old" MSF Stream Directory.
586#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
587#[repr(C)]
588struct OldMsfStreamEntry {
589    stream_size: U32<LE>,
590    ignored: U32<LE>,
591}
592
593/// Mark all of the pages that are assigned to streams (except for Stream 0) as being "busy" in
594/// the Free Page Map.
595///
596/// This is used only when loading the Stream Directory.
597///
598/// `all_stream_pages` contains the list of pages assigned to all streams. The pages are listed
599/// in order by stream, then in order by page within stream. The `stream_page_starts` table
600/// gives the starting index within `all_stream_pages` for each stream. `stream_page_starts.len()`
601/// is equal to `num_streams + 1`.
602///
603/// `fpm` is the Free Page Map. `fpm.len()` is equal to `num_pages` from the MSF File Header.
604/// This function validates that all stream page indices are valid, where valid means:
605///
606/// * less than `num_pages`
607/// * not Page 0
608/// * not assigned to the Free Page Map (although this requirement is relaxed for read-only mode)
609/// * not already marked busy
610///
611/// This ignores pages in Stream 0, which is the Old Stream Directory. The corresponding bits
612/// in the Free Page Map are *not* modified.
613fn mark_stream_pages_busy(
614    stream_page_starts: &[u32],
615    all_stream_pages: &[u32],
616    fpm: &mut BitVec<u32, Lsb0>,
617    page_size: Pow2,
618    msf_kind: MsfKind,
619    access_mode: AccessMode,
620) -> anyhow::Result<()> {
621    let page_within_interval_mask = low_page_mask(page_size);
622    let strict_mode = access_mode == AccessMode::ReadWrite;
623
624    // The skip(1) skips the Old Stream Directory.
625    for (stream, range) in stream_page_starts.windows(2).enumerate().skip(1) {
626        let stream_pages = &all_stream_pages[range[0] as usize..range[1] as usize];
627
628        for (stream_page, &page) in stream_pages.iter().enumerate() {
629            if page == 0 {
630                bail!(
631                    "Page cannot be marked busy because it points to the first page of the file. Stream {0} is invalid.",
632                    stream
633                );
634            }
635
636            // Clang's PDB writer currently generates PDBs that assign stream pages to pages
637            // reserved for the FPM. That's illegal, but the MSVC implementation of PDB/MSF
638            // does not detect that problem. We check for it here and report an error (and
639            // refuse to open the PDB/MSF file) if the access mode is read/write. If the access
640            // mode is read-only then we report a warning but still open the file.
641
642            if msf_kind == MsfKind::Big {
643                let page_within_interval = page & page_within_interval_mask;
644                if page_within_interval == 1 || page_within_interval == 2 {
645                    warn!(
646                        "Page {page} is invalid; it is assigned to a page reserved for the Free Page Map. Stream {0} is invalid.",
647                        stream
648                    );
649                    if strict_mode {
650                        bail!(
651                            "Page {page} is invalid; it is assigned to a page reserved for the Free Page Map. Stream {0} is invalid.",
652                            stream
653                        );
654                    }
655                    // The page will already have been marked busy. Skip the code below which
656                    // marks the page as busy, so that don't report two warnings.
657                    continue;
658                }
659            }
660
661            if let Some(mut page_is_free) = fpm.get_mut(page as usize) {
662                if !*page_is_free {
663                    error!(
664                        page,
665                        stream,
666                        stream_page,
667                        "Page cannot be marked busy, because it is already marked busy. It may be used by more than one stream."
668                    );
669                    if strict_mode {
670                        bail!(
671                            "Page {page} cannot be marked busy, because it is already marked busy. It may be used by more than one stream. Stream #{stream}"
672                        );
673                    } else {
674                        continue;
675                    }
676                }
677
678                page_is_free.set(false);
679            } else {
680                error!(
681                    page,
682                    stream, stream_page, "Page is invalid; it is out of range (exceeds num_pages)"
683                );
684                bail!(
685                    "Page {} is invalid; it is out of range (exceeds num_pages)",
686                    page
687                );
688            }
689        }
690    }
691
692    Ok(())
693}