1use super::*;
4use sync_file::RandomAccessFile;
5use tracing::{trace, trace_span, warn};
6use zerocopy::IntoBytes;
7
8#[derive(Clone, Debug)]
10pub struct CreateOptions {
11 pub page_size: PageSize,
13
14 pub max_streams: u32,
21}
22
23const DEFAULT_MAX_STREAMS: u32 = 0xfffe;
25
26impl Default for CreateOptions {
27 fn default() -> Self {
28 Self {
29 page_size: DEFAULT_PAGE_SIZE,
30 max_streams: DEFAULT_MAX_STREAMS,
31 }
32 }
33}
34
35impl Msf<RandomAccessFile> {
36 pub fn open(file_name: &Path) -> anyhow::Result<Self> {
38 let file = File::open(file_name)?;
39 let random_file = RandomAccessFile::from(file);
40 Self::new_with_access_mode(random_file, AccessMode::Read)
41 }
42
43 pub fn create(file_name: &Path, options: CreateOptions) -> anyhow::Result<Self> {
49 let file = File::create(file_name)?;
50 let random_file = RandomAccessFile::from(file);
51 Self::create_with_file(random_file, options)
52 }
53
54 pub fn modify(file_name: &Path) -> anyhow::Result<Self> {
56 let file = File::options().read(true).write(true).open(file_name)?;
57 let random_file = RandomAccessFile::from(file);
58 Self::modify_with_file(random_file)
59 }
60}
61
62impl<F: ReadAt> Msf<F> {
63 pub fn open_with_file(file: F) -> anyhow::Result<Self> {
65 Self::new_with_access_mode(file, AccessMode::Read)
66 }
67
68 pub fn create_with_file(file: F, options: CreateOptions) -> anyhow::Result<Self> {
72 Self::create_for(file, options)
73 }
74
75 pub fn modify_with_file(file: F) -> anyhow::Result<Self> {
82 Self::new_with_access_mode(file, AccessMode::ReadWrite)
83 }
84
85 fn new_with_access_mode(file: F, access_mode: AccessMode) -> anyhow::Result<Self> {
92 let _span = trace_span!("Msf::new_with_access_mode").entered();
95
96 const MIN_PAGE_SIZE_USIZE: usize = 1usize << MIN_PAGE_SIZE.exponent();
97
98 let mut page0: [u8; MIN_PAGE_SIZE_USIZE] = [0; MIN_PAGE_SIZE_USIZE];
99
100 file.read_exact_at(&mut page0, 0)?;
102
103 let msf_kind: MsfKind;
104 let page_size: u32;
105 let active_fpm: u32;
106 let num_pages: u32;
107 let stream_dir_size: u32;
108
109 if page0.starts_with(&MSF_BIG_MAGIC) {
110 let (msf_header, _) = MsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
112 page_size = msf_header.page_size.get();
113 active_fpm = msf_header.active_fpm.get();
114 num_pages = msf_header.num_pages.get();
115 stream_dir_size = msf_header.stream_dir_size.get();
116 msf_kind = MsfKind::Big;
117
118 if !matches!(active_fpm, 1 | 2) {
120 bail!("The PDB header is invalid. The active FPM is invalid.");
121 }
122 } else if page0.starts_with(&MSF_SMALL_MAGIC) {
123 let (msf_header, _) = SmallMsfHeader::ref_from_prefix(page0.as_slice()).unwrap();
126 page_size = msf_header.page_size.get();
127 active_fpm = msf_header.active_fpm.get() as u32;
128 num_pages = msf_header.num_pages.get() as u32;
129 stream_dir_size = msf_header.stream_dir_size.get();
130 msf_kind = MsfKind::Small;
131 } else if page0[16..24] == *b"PDB v1.0" {
132 bail!("This file is a Portable PDB, which is not supported.");
133 } else {
134 bail!("PDB file does not have the correct header (magic is wrong).");
135 }
136
137 let Ok(page_size_pow2) = PageSize::try_from(page_size) else {
138 bail!("The PDB header is invalid. The page size ({page_size}) is not a power of 2.",);
139 };
140
141 if num_pages == 0 {
142 bail!("PDB specifies invalid value for num_pages (zero).");
143 }
144
145 let mut stream_sizes: Vec<u32>;
146
147 let stream_dir_num_pages = stream_dir_size.div_round_up(page_size_pow2);
149
150 let mut page_allocator = PageAllocator::new(num_pages as usize, page_size_pow2);
153
154 let mut committed_stream_pages: Vec<Page>;
155 let mut committed_stream_page_starts: Vec<u32>;
156
157 match msf_kind {
158 MsfKind::Big => {
159 if stream_dir_size % 4 != 0 {
177 bail!("MSF Stream Directory has an invalid size; it is not a multiple of 4.");
178 }
179
180 let mut stream_dir: Vec<U32<LE>> = vec![U32::new(0); stream_dir_size as usize / 4];
182
183 let stream_dir_l1_num_pages =
185 num_pages_for_stream_size(4 * stream_dir_num_pages, page_size_pow2) as usize;
186 let Ok((page_map_l1_ptrs, _)) = <[U32<LE>]>::ref_from_prefix_with_elems(
187 &page0[STREAM_DIR_PAGE_MAP_FILE_OFFSET as usize..],
188 stream_dir_l1_num_pages,
189 ) else {
190 bail!("Stream dir size is invalid (exceeds design limits)");
191 };
192
193 let stream_dir_bytes: &mut [u8] = stream_dir.as_mut_bytes();
194 let mut stream_dir_chunks = stream_dir_bytes.chunks_mut(page_size as usize);
195 let mut l1_page: Vec<u8> = vec![0; page_size as usize];
197 'l1_loop: for &page_map_l1_ptr in page_map_l1_ptrs.iter() {
198 let page_map_l1_ptr: u32 = page_map_l1_ptr.get();
199
200 page_allocator.init_mark_stream_dir_page_busy(page_map_l1_ptr)?;
201 if is_special_page_big_msf(page_size_pow2, page_map_l1_ptr) {
202 bail!(
203 "Stream dir contains invalid page number: {page_map_l1_ptr}. \
204 Page points to Page 0 or to an FPM page."
205 );
206 }
207
208 let file_offset = page_to_offset(page_map_l1_ptr, page_size_pow2);
210 file.read_exact_at(l1_page.as_mut_slice(), file_offset)?;
211
212 let l2_page_u32 = <[U32<LE>]>::ref_from_bytes(l1_page.as_slice()).unwrap();
214
215 for &l2_page in l2_page_u32.iter() {
216 let l2_page: u32 = l2_page.get();
217
218 let Some(stream_dir_chunk) = stream_dir_chunks.next() else {
219 break 'l1_loop;
220 };
221
222 page_allocator.init_mark_stream_dir_page_busy(l2_page)?;
223 if is_special_page_big_msf(page_size_pow2, l2_page) {
224 bail!(
225 "Stream dir contains invalid page number: {l2_page}. \
226 Page points to Page 0 or to an FPM page."
227 );
228 }
229
230 let l2_file_offset = page_to_offset(l2_page, page_size_pow2);
231 file.read_exact_at(stream_dir_chunk, l2_file_offset)?;
232 }
233 }
234
235 if stream_dir.is_empty() {
236 bail!("Stream directory is invalid (zero-length)");
237 }
238
239 let num_streams = stream_dir[0].get() as usize;
240
241 if num_streams == 0 {
243 bail!("MSF file is invalid, because num_streams = 0.");
244 }
245
246 let Some(stream_sizes_src) = stream_dir.get(1..1 + num_streams) else {
247 bail!("Stream directory is invalid (num_streams is not consistent with size)");
248 };
249 stream_sizes = stream_sizes_src.iter().map(|size| size.get()).collect();
250
251 let mut stream_pages_iter = &stream_dir[1 + num_streams..];
252
253 committed_stream_pages = Vec::with_capacity(stream_dir.len() - num_streams - 1);
255 committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
256
257 for (stream, &stream_size) in stream_sizes_src.iter().enumerate() {
258 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
259
260 let stream_size = stream_size.get();
261 if stream_size != NIL_STREAM_SIZE {
262 let num_stream_pages =
263 num_pages_for_stream_size(stream_size, page_size_pow2) as usize;
264 if num_stream_pages > stream_pages_iter.len() {
265 bail!(
266 "Stream directory is invalid. Stream {stream} has size {stream_size}, \
267 which exceeds the size of the stream directory."
268 );
269 }
270 let (this_stream_pages, next) =
271 stream_pages_iter.split_at(num_stream_pages);
272 stream_pages_iter = next;
273 committed_stream_pages.extend(this_stream_pages.iter().map(|p| p.get()));
274 }
275 }
276 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
277
278 stream_sizes[0] = 0;
283 }
284
285 MsfKind::Small => {
286 let page_pointers_size_bytes = stream_dir_num_pages * 2;
290
291 let mut pages_u16: Vec<U16<LE>> = vec![U16::new(0); stream_dir_num_pages as usize];
292 if page_pointers_size_bytes + size_of::<SmallMsfHeader>() as u32 > page_size {
293 bail!(
294 "The MSF header is invalid. The page pointers for the stream directory \
295 exceed the range of the first page. \
296 Stream dir size (in bytes): {stream_dir_size} Page size: {page_size}"
297 );
298 }
299
300 file.read_exact_at(pages_u16.as_mut_bytes(), size_of::<SmallMsfHeader>() as u64)?;
301
302 let mut page_iter = pages_u16.iter();
304 let mut old_stream_dir_bytes: Vec<u8> = vec![0; stream_dir_size as usize];
305 for stream_dir_chunk in old_stream_dir_bytes.chunks_mut(page_size as usize) {
306 let page = page_iter.next().unwrap().get() as u32;
309 page_allocator.init_mark_stream_dir_page_busy(page)?;
310 file.read_exact_at(stream_dir_chunk, page_to_offset(page, page_size_pow2))?;
311 }
312
313 let Ok((header, rest)) =
314 OldMsfStreamDirHeader::read_from_prefix(old_stream_dir_bytes.as_slice())
315 else {
316 bail!("Invalid stream directory: too small");
317 };
318
319 let num_streams = header.num_streams.get() as usize;
320 stream_sizes = Vec::with_capacity(num_streams);
321
322 let Ok((entries, mut rest)) =
323 <[OldMsfStreamEntry]>::ref_from_prefix_with_elems(rest, num_streams)
324 else {
325 bail!("Invalid stream directory: too small")
326 };
327
328 for i in 0..num_streams {
329 let stream_size = entries[i].stream_size.get();
330 stream_sizes.push(stream_size);
331 }
332
333 committed_stream_page_starts = Vec::with_capacity(num_streams + 1);
334 committed_stream_pages = Vec::new(); for &stream_size in stream_sizes.iter() {
337 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
338 if stream_size != NIL_STREAM_SIZE {
339 let num_pages = stream_size.div_round_up(page_size_pow2);
340
341 let Ok((pages, r)) =
342 <[U16<LE>]>::ref_from_prefix_with_elems(rest, num_pages as usize)
343 else {
344 bail!("Invalid stream directory: too small");
345 };
346
347 rest = r; for page in pages.iter() {
349 committed_stream_pages.push(page.get() as u32);
350 }
351 }
352 }
353
354 committed_stream_page_starts.push(committed_stream_pages.len() as u32);
355
356 if !rest.is_empty() {
357 warn!(
358 unused_bytes = rest.len(),
359 "old-style stream dir contained unused bytes"
360 );
361 }
362 }
363 }
364
365 {
368 let start = committed_stream_page_starts[1] as usize;
370 let pages = &committed_stream_pages[start..];
371 for &page in pages.iter() {
372 page_allocator.init_mark_stream_page_busy(page, 0, 0)?;
373 }
374 }
375
376 page_allocator.check_vector_consistency()?;
379
380 let fpm_on_disk = read_fpm_big_msf(&file, active_fpm, num_pages, page_size_pow2)?;
384
385 assert_eq!(fpm_on_disk.len(), page_allocator.fpm.len()); if page_allocator.fpm != fpm_on_disk {
388 {
389 use tracing::warn;
390
391 warn!("FPM computed from Stream Directory is not equal to FPM found on disk.");
392 warn!(
393 "Num pages = {num_pages} (0x{num_pages:x} bytes, bit offset: 0x{:x}:{})",
394 num_pages / 8,
395 num_pages % 8
396 );
397
398 for i in 0..num_pages as usize {
399 if fpm_on_disk[i] != page_allocator.fpm[i] {
400 warn!(
401 " bit 0x{:04x} is different. disk = {}, computed = {}",
402 i, fpm_on_disk[i], page_allocator.fpm[i]
403 );
404 }
405 }
406 }
407 bail!("FPM is corrupted; FPM computed from Stream Directory is not equal to FPM found on disk.");
408 }
409
410 page_allocator.assert_invariants();
413
414 match (access_mode, msf_kind) {
415 (AccessMode::ReadWrite, MsfKind::Small) => {
416 bail!(
417 "This PDB file uses the obsolete 'Small MSF' encoding. \
418 This library does not support read-write mode with Small MSF files."
419 );
420 }
421
422 (AccessMode::ReadWrite, MsfKind::Big) => {}
423
424 (AccessMode::Read, _) => {}
425 }
426
427 Ok(Self {
428 file,
429 access_mode,
430 active_fpm,
431 committed_stream_pages,
432 committed_stream_page_starts,
433 stream_sizes,
434 kind: msf_kind,
435 pages: page_allocator,
436 modified_streams: HashMap::new(),
437 max_streams: DEFAULT_MAX_STREAMS,
438 })
439 }
440
441 pub fn create_for(file: F, options: CreateOptions) -> anyhow::Result<Self> {
444 let _span = trace_span!("Msf::create_for").entered();
445
446 assert!(options.page_size >= MIN_PAGE_SIZE);
447 assert!(options.page_size <= MAX_PAGE_SIZE);
448
449 let num_pages: usize = 3;
450
451 let mut this = Self {
452 file,
453 access_mode: AccessMode::ReadWrite,
454 committed_stream_pages: vec![],
455 committed_stream_page_starts: vec![0; 2],
456 kind: MsfKind::Big,
457 pages: PageAllocator::new(num_pages, options.page_size),
458 modified_streams: HashMap::new(),
459 stream_sizes: vec![0],
460 active_fpm: 2,
461 max_streams: options.max_streams,
462 };
463
464 for _ in 1..=4 {
466 let _stream_index = this.nil_stream()?;
467 }
468
469 Ok(this)
470 }
471}
472
473fn read_fpm_big_msf<F: ReadAt>(
478 file: &F,
479 active_fpm: u32,
480 num_pages: u32,
481 page_size: PageSize,
482) -> anyhow::Result<BitVec<u32, Lsb0>> {
483 let _span = trace_span!("read_fpm_big_msf").entered();
484
485 assert!(num_pages > 0);
486
487 let mut free_page_map: BitVec<u32, Lsb0> = BitVec::new();
488 free_page_map.resize(num_pages as usize, false);
489 let fpm_bytes: &mut [u8] = free_page_map.as_raw_mut_slice().as_mut_bytes();
490 let page_size_usize = usize::from(page_size);
491
492 for (interval, fpm_page_bytes) in fpm_bytes.chunks_mut(page_size_usize).enumerate() {
493 let interval_page = interval_to_page(interval as u32, page_size);
494 let file_pos = page_to_offset(interval_page + active_fpm, page_size);
495
496 trace!(
497 interval,
498 interval_page,
499 file_pos,
500 "reading FPM page, interval_page = 0x{interval_page:x}, file_pos = 0x{file_pos:x}"
501 );
502 file.read_exact_at(fpm_page_bytes, file_pos)?;
503 }
504
505 if free_page_map[0] {
511 bail!("FPM is invalid: Page 0 should always be BUSY");
512 }
513
514 let mut interval: u32 = 0;
517 loop {
518 let interval_page = interval_to_page(interval, page_size) as usize;
519 let fpm1_index = interval_page + 1;
520 let fpm2_index = interval_page + 2;
521
522 if fpm1_index < free_page_map.len() {
523 if free_page_map[fpm1_index] {
524 bail!("All FPM pages should be marked BUSY");
525 }
526 }
527
528 if fpm2_index < free_page_map.len() {
529 if free_page_map[fpm2_index] {
530 bail!("All FPM pages should be marked BUSY");
531 }
532 interval += 1;
533 } else {
534 break;
535 }
536 }
537
538 Ok(free_page_map)
539}
540
541fn low_page_mask(page_size: PageSize) -> u32 {
543 (1u32 << page_size.exponent()).wrapping_sub(1u32)
544}
545
546fn is_fpm_page_big_msf(page_size: PageSize, page: u32) -> bool {
548 let page_within_interval = page & low_page_mask(page_size);
549 matches!(page_within_interval, 1 | 2)
550}
551
552fn is_special_page_big_msf(page_size: PageSize, page: u32) -> bool {
554 page == 0 || is_fpm_page_big_msf(page_size, page)
555}
556
557#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
559#[repr(C)]
560struct OldMsfStreamDirHeader {
561 num_streams: U16<LE>,
562 ignored: U16<LE>,
563}
564
565#[derive(Clone, IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable)]
567#[repr(C)]
568struct OldMsfStreamEntry {
569 stream_size: U32<LE>,
570 ignored: U32<LE>,
571}