1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
// Copyright 2017 pdb Developers
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

use std::fmt;
use std::io;

/// Represents an offset + size of the source file.
///
/// The multi-stream file implementation (used by `pdb::PDB`) determines which byte ranges it needs
/// to satisfy its requests, and it describes those requests as a `&[SourceSlice]`.
#[derive(Debug,Clone,Copy,Eq,PartialEq)]
pub struct SourceSlice {
    pub offset: u64,
    pub size: usize,
}

/// The `pdb` crate accesses PDB files via the `pdb::Source` trait.
///
/// This library is written with zero-copy in mind. `Source`s provide `SourceView`s which need not
/// outlive their parent, supporting implementations of e.g. memory mapped files.
///
/// PDB files are "multi-stream files" (MSF) under the hood. MSFs have various layers of
/// indirection, but ultimately the MSF code asks a `Source` to view a series of
/// [`{ offset, size }` records](struct.SourceSlice.html), which the `Source` provides as a
/// contiguous `&[u8]`.
///
/// # Default
///
/// There is a default `Source` implementation for `std::io::Read` + `std::io::Seek` +
/// `std::fmt::Debug`, allowing a `std::fs::File` to be treated as `pdb::Source`. This
/// implementation provides views by allocating a buffer, seeking, and reading the contents into
/// that buffer.
///
/// # Alignment
///
/// The requested offsets will always be aligned to the MSF's page size, which is always a power of
/// two and is usually (but not always) 4096 bytes. The requested sizes will also be multiples of the
/// page size, except for the size of the final `SourceSlice`, which may be smaller.
///
/// PDB files are specified as always being a multiple of the page size, so `Source` implementations
/// are free to e.g. map whole pages and return a sub-slice of the requested length.
///
pub trait Source<'s> : fmt::Debug {
    /// Provides a contiguous view of the source file composed of the requested position(s).
    ///
    /// Note that the SourceView's as_slice() method cannot fail, so `view()` is the time to raise
    /// IO errors.
    fn view(&mut self, slices: &[SourceSlice]) -> Result<Box<SourceView<'s>>, io::Error>;
}

/// An owned, droppable, read-only view of the source file which can be referenced as a byte slice.
pub trait SourceView<'s>: Drop + fmt::Debug {
    fn as_slice(&self) -> &[u8];
}

#[derive(Clone)]
struct ReadView {
    bytes: Vec<u8>,
}

impl<'v> fmt::Debug for ReadView {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "ReadView({} bytes)", self.bytes.len())
    }
}

impl<'s> SourceView<'s> for ReadView {
    fn as_slice(&self) -> &[u8] {
        self.bytes.as_slice()
    }
}

impl<'s> Drop for ReadView {
    fn drop(&mut self) {
        // no-op
    }
}

impl<'s, T> Source<'s> for T where T: io::Read + io::Seek + fmt::Debug + 's {
    fn view(&mut self, slices: &[SourceSlice]) -> Result<Box<SourceView<'s>>, io::Error> {
        let len = slices.iter()
            .fold(0 as usize, |acc,s| acc + s.size);

        let mut v = ReadView{
            bytes: Vec::with_capacity(len)
        };
        v.bytes.resize(len, 0);

        {
            let bytes = v.bytes.as_mut_slice();
            let mut output_offset: usize = 0;
            for slice in slices {
                self.seek(io::SeekFrom::Start(slice.offset))?;
                self.read_exact(&mut bytes[output_offset .. (output_offset + slice.size)])?;
                output_offset += slice.size;
            }
        }

        Ok(Box::new(v))
    }
}

#[cfg(test)]
mod tests {
    mod read_view {
        use std::io::Cursor;
        use std::io::ErrorKind;
        use source::*;

        #[test]
        fn test_basic_reading() {
            let mut data = vec![0; 4096];
            data[42] = 42;

            let mut source: Box<Source> = Box::new(Cursor::new(data.as_slice()));

            let source_slices = vec![SourceSlice{ offset: 40, size: 4 } ];
            let view = source.view(source_slices.as_slice()).expect("viewing must succeed");
            assert_eq!(&[0u8, 0, 42, 0], view.as_slice());
        }

        #[test]
        fn test_discontinuous_reading() {
            let mut data = vec![0; 4096];
            data[42] = 42;
            data[88] = 88;

            let mut source: Box<Source> = Box::new(Cursor::new(data.as_slice()));

            let source_slices = vec![SourceSlice{ offset: 88, size: 1 }, SourceSlice{ offset: 40, size: 4 } ];
            let view = source.view(source_slices.as_slice()).expect("viewing must succeed");
            assert_eq!(&[88u8, 0, 0, 42, 0], view.as_slice());
        }

        #[test]
        fn test_duplicate_reading() {
            let mut data = vec![0; 4096];
            data[42] = 42;
            data[88] = 88;

            let mut source: Box<Source> = Box::new(Cursor::new(data.as_slice()));

            let source_slices = vec![SourceSlice{ offset: 88, size: 1 }, SourceSlice{ offset: 40, size: 4 }, SourceSlice{ offset: 88, size: 1 } ];
            let view = source.view(source_slices.as_slice()).expect("viewing must succeed");
            assert_eq!(&[88u8, 0, 0, 42, 0, 88], view.as_slice());
        }

        #[test]
        fn test_eof_reading() {
            let data = vec![0; 4096];

            let mut source: Box<Source> = Box::new(Cursor::new(data.as_slice()));

            // one byte is readable, but we asked for two
            let source_slices = vec![SourceSlice { offset: 4095, size: 2 }];
            let r = source.view(source_slices.as_slice());
            match r {
                Ok(_) => panic!("should have failed"),
                Err(e) => {
                    assert_eq!(ErrorKind::UnexpectedEof, e.kind());
                }
            }
        }
    }
}