chrome_cache_parser/
block_file.rs

1use std::{
2    cell::RefCell,
3    cmp::min,
4    collections::{hash_map::Entry, HashMap},
5    fmt,
6    fs::{self, File},
7    io::{self, BufReader, Read},
8    mem,
9    path::PathBuf,
10    rc::Rc,
11};
12
13use zerocopy::{FromBytes, FromZeroes};
14
15use crate::{
16    cache_address::{CacheAddr, FileType},
17    error::{self, CCPResult},
18    time::WindowsEpochMicroseconds,
19    CCPError,
20};
21use static_assertions as sa;
22
23const BLOCK_MAGIC: u32 = 0xc104cac3;
24const BLOCK_HEADER_SIZE: usize = 8192;
25const MAX_BLOCKS: usize = (BLOCK_HEADER_SIZE - 80) * 8;
26const INLINE_KEY_SIZE: usize = 160;
27
28#[derive(Debug, FromZeroes, FromBytes)]
29#[repr(C)]
30struct AllocBitmap {
31    data: [u32; MAX_BLOCKS / 32],
32}
33
34#[derive(Debug, FromZeroes, FromBytes, Clone)]
35#[repr(C, packed(4))]
36pub struct RankingsNode {
37    pub last_used: WindowsEpochMicroseconds,
38    pub last_modified: WindowsEpochMicroseconds,
39    pub next: CacheAddr,
40    pub prev: CacheAddr,
41    pub contents: CacheAddr,
42    pub dirty: i32,
43    pub self_hash: u32,
44}
45
46sa::const_assert_eq!(mem::size_of::<RankingsNode>(), 36);
47
48// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format_base.h
49#[derive(Debug, FromZeroes, FromBytes)]
50#[repr(C)]
51struct BlockFileHeader {
52    pub magic: u32,
53    pub version: u32,
54    pub this_file: i16,
55    pub next_file: i16,
56    pub entry_size: i32,
57    pub num_entries: i32,
58    pub max_entries: i32,
59    pub empty: [i32; 4],
60    pub hints: [i32; 4],
61    pub updating: i32,
62    pub user: [i32; 5],
63    pub allocation_map: AllocBitmap,
64}
65
66#[derive(FromZeroes, FromBytes, Clone)]
67pub struct InlineCacheKey {
68    key: [u8; INLINE_KEY_SIZE],
69}
70
71impl fmt::Debug for InlineCacheKey {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        let key = String::from_utf8_lossy(&self.key);
74        write!(f, "{}", key.trim_end_matches(char::from(0)))
75    }
76}
77
78impl fmt::Display for InlineCacheKey {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        let key = String::from_utf8_lossy(&self.key);
81        let trimmed = key.trim_end_matches(char::from(0));
82        write!(f, "{}", trimmed)
83    }
84}
85
86#[derive(Debug, Clone, PartialEq)]
87#[repr(u32)]
88pub enum BlockCacheEntryState {
89    Normal = 0,
90    Evicted = 1,
91    Doomed = 2,
92    Unknown,
93}
94
95impl From<i32> for BlockCacheEntryState {
96    fn from(value: i32) -> Self {
97        match value {
98            0 => BlockCacheEntryState::Normal,
99            1 => BlockCacheEntryState::Evicted,
100            2 => BlockCacheEntryState::Doomed,
101            _ => BlockCacheEntryState::Unknown,
102        }
103    }
104}
105
106#[derive(Debug, FromZeroes, FromBytes, Clone, Copy)]
107pub struct BlockCacheEntryStateField(i32);
108
109impl BlockCacheEntryStateField {
110    // zerocopy lib doesn't provide a mechanism for decoding enums that don't represent all
111    // states, see: https://github.com/google/zerocopy/issues/1429
112    pub fn kind(&self) -> BlockCacheEntryState {
113        BlockCacheEntryState::from(self.0)
114    }
115}
116
117// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format.h#101
118#[derive(Debug, FromZeroes, FromBytes, Clone)]
119#[repr(C)]
120pub struct BlockFileCacheEntry {
121    pub hash: u32,
122    pub next: CacheAddr,
123    pub rankings_node: CacheAddr,
124    pub reuse_count: i32,
125    pub refetch_count: i32,
126    pub state: BlockCacheEntryStateField,
127    pub creation_time: WindowsEpochMicroseconds,
128    pub key_len: i32,
129    pub long_key: CacheAddr,
130    pub data_size: [i32; 4],
131    pub data_addr: [CacheAddr; 4],
132    pub flags: u32,
133    pad: [u32; 4],
134    pub self_hash: u32,
135    pub key: InlineCacheKey,
136}
137
138sa::const_assert_eq!(mem::size_of::<BlockFileCacheEntry>(), 256);
139
140struct BlockFileStreamReader {
141    addr: CacheAddr,
142    size: usize,
143    data_files: Rc<RefCell<DataFiles>>,
144    read_offset: usize,
145}
146
147impl BlockFileStreamReader {
148    pub fn new(
149        addr: CacheAddr,
150        size: usize,
151        data_files: Rc<RefCell<DataFiles>>,
152    ) -> BlockFileStreamReader {
153        BlockFileStreamReader {
154            addr,
155            size,
156            data_files,
157            read_offset: 0,
158        }
159    }
160}
161
162impl Read for BlockFileStreamReader {
163    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
164        if self.read_offset >= self.size {
165            return Ok(0);
166        }
167
168        let mut data_files = self.data_files.borrow_mut();
169        let data_file = match data_files.get(self.addr.file_number()) {
170            Ok(file) => file,
171            Err(CCPError::Io { source }) => return Err(source),
172            Err(err) => return Err(io::Error::new(io::ErrorKind::Other, err)),
173        };
174
175        let block_size = self
176            .addr
177            .file_type()
178            .block_size()
179            .or(Err(io::ErrorKind::InvalidData))?;
180        let start_addr =
181            BLOCK_HEADER_SIZE + self.addr.start_block() as usize * block_size + self.read_offset;
182        let to_be_read = min(buf.len(), self.size - self.read_offset);
183        let end_addr = start_addr + to_be_read;
184
185        buf[0..to_be_read].copy_from_slice(&data_file.buffer[start_addr..end_addr]);
186
187        self.read_offset += to_be_read;
188
189        Ok(to_be_read)
190    }
191}
192
193struct ExternalFileReader {
194    addr: CacheAddr,
195    file: Option<BufReader<File>>,
196    cache_path: PathBuf,
197}
198
199impl ExternalFileReader {
200    pub fn new(addr: CacheAddr, cache_path: PathBuf) -> ExternalFileReader {
201        ExternalFileReader {
202            addr,
203            file: None,
204            cache_path,
205        }
206    }
207}
208
209impl Read for ExternalFileReader {
210    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
211        if let Some(file) = &mut self.file {
212            file.read(buf)
213        } else {
214            let file_name = format!("f_{:0>6x}", self.addr.file_number());
215            let reader = File::open(self.cache_path.join(file_name))?;
216            self.file.replace(BufReader::new(reader));
217            self.read(buf)
218        }
219    }
220}
221
222/// An iterator over the logical entries in a map of block files. Data files are lazily loaded and
223/// cached. An entry in the chrome cache is a node in a linked list of entries in the block files.
224/// The index file is a hash table that maps keys to the first entry in the linked list.
225///
226/// The next node in a given linked list is not guaranteed to be in the same block file, so each
227/// entry needs needs a reference to all of the data files.
228///
229/// By storing the reference to the data files, we can lazily evaluate the actual entries without
230/// copying the underlying buffer. The iterator yields a parser with a shared reference to the
231/// underlying data required for transmutation.
232///
233/// `LazyBlockFileCacheEntryIterator`` is to be instantiated with the cache address of the first
234/// entry and yields any subsequent entries in the linked list.
235pub struct LazyBlockFileCacheEntryIterator {
236    current: Option<CacheAddr>,
237    data_files: Rc<RefCell<DataFiles>>,
238    cache_path: PathBuf,
239}
240
241impl LazyBlockFileCacheEntryIterator {
242    pub fn new(
243        data_files: Rc<RefCell<DataFiles>>,
244        start: CacheAddr,
245        cache_path: PathBuf,
246    ) -> LazyBlockFileCacheEntryIterator {
247        LazyBlockFileCacheEntryIterator {
248            current: Some(start),
249            data_files,
250            cache_path,
251        }
252    }
253}
254
255/// A map of data files, lazily loaded and cached. Provides a method to get a cache entry from a
256/// cache address, selecting the approapriate data file by the file number in the cache address.
257pub struct DataFiles {
258    data_files: HashMap<u32, LazyBlockFile>,
259    path: PathBuf,
260}
261
262impl DataFiles {
263    pub fn new(data_files: HashMap<u32, LazyBlockFile>, path: PathBuf) -> DataFiles {
264        DataFiles { data_files, path }
265    }
266
267    fn get(&mut self, file_number: u32) -> CCPResult<&LazyBlockFile> {
268        Ok(match self.data_files.entry(file_number) {
269            Entry::Occupied(entry) => entry.into_mut(),
270            Entry::Vacant(entry) => {
271                let file_path = self.path.join(format!("data_{}", file_number));
272                let mut file = fs::File::open(&file_path)?;
273                let mut buf: Vec<u8> = Vec::new();
274                file.read_to_end(&mut buf)?;
275                entry.insert(LazyBlockFile::new(Rc::new(buf)))
276            }
277        })
278    }
279
280    pub fn get_entry(&mut self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
281        let data_file = self.get(addr.file_number())?;
282        data_file.get_buffer(addr)
283    }
284}
285
286impl Iterator for LazyBlockFileCacheEntryIterator {
287    type Item = LazyBlockFileCacheEntry;
288
289    fn next(&mut self) -> Option<Self::Item> {
290        let current = self.current.take()?;
291
292        let mut data_files = (*self.data_files).borrow_mut();
293
294        let current = data_files.get_entry(&current).ok()?;
295        let current = LazyBlockFileCacheEntry::new(
296            current,
297            Rc::clone(&self.data_files),
298            self.cache_path.clone(),
299        );
300
301        if let Ok(current) = current.get() {
302            let next = current.next;
303            if next.is_initialized() {
304                self.current = Some(next);
305            }
306        }
307
308        Some(current)
309    }
310}
311
312pub struct LazyRankingsNode {
313    buffer: BufferSlice,
314}
315
316/// A slice to a shared buffer. Enables us to pass a reference to the buffer to all of the
317/// transmuters.
318pub struct BufferSlice {
319    buffer: Rc<Vec<u8>>,
320    start: usize,
321    size: usize,
322}
323
324impl BufferSlice {
325    pub fn new(buffer: Rc<Vec<u8>>, start: usize, size: usize) -> BufferSlice {
326        BufferSlice {
327            buffer,
328            start,
329            size,
330        }
331    }
332
333    pub fn get(&self) -> &[u8] {
334        &self.buffer[self.start..self.start + self.size]
335    }
336}
337
338impl LazyRankingsNode {
339    pub fn get(&self) -> CCPResult<&RankingsNode> {
340        RankingsNode::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(format!(
341            "rankings node at {}",
342            self.buffer.start
343        )))
344    }
345}
346
347pub struct LazyBlockFileCacheEntry {
348    buffer: BufferSlice,
349    data_files: Rc<RefCell<DataFiles>>,
350    cache_path: PathBuf,
351}
352
353impl LazyBlockFileCacheEntry {
354    pub fn new(
355        buffer: BufferSlice,
356        block_files: Rc<RefCell<DataFiles>>,
357        cache_path: PathBuf,
358    ) -> LazyBlockFileCacheEntry {
359        LazyBlockFileCacheEntry {
360            buffer,
361            data_files: block_files,
362            cache_path,
363        }
364    }
365
366    /// Parse the entry from the buffer and return a reference to it.
367    pub fn get(&self) -> CCPResult<&BlockFileCacheEntry> {
368        BlockFileCacheEntry::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(
369            format!("block file cache entry at {}", self.buffer.start),
370        ))
371    }
372
373    /// Return readers for the actual cache data. Typically, this is a header stream followed by
374    /// a content stream.
375    pub fn stream_readers(self) -> CCPResult<Vec<CCPResult<Box<dyn Read>>>> {
376        let entry = self.get().or(Err(CCPError::InvalidState(
377            "Unable to read entry".to_string(),
378        )))?;
379
380        Ok(entry
381            .data_addr
382            .iter()
383            .zip(entry.data_size.iter())
384            .map(|(addr, size)| match addr.file_type() {
385                FileType::External => Ok(Box::new(ExternalFileReader::new(
386                    *addr,
387                    self.cache_path.clone(),
388                )) as Box<dyn Read>),
389                FileType::Block1k | FileType::Block256 | FileType::Block4k => Ok(Box::new(
390                    BlockFileStreamReader::new(*addr, *size as usize, self.data_files.clone()),
391                )
392                    as Box<dyn Read>),
393                _ => Err(CCPError::InvalidState(
394                    format!(
395                        "Requested stream reader of nonsense address type {:?}",
396                        addr.file_type()
397                    )
398                    .to_string(),
399                )),
400            })
401            .collect())
402    }
403
404    pub fn get_rankings_node(&mut self) -> CCPResult<LazyRankingsNode> {
405        let cache_entry = self.get()?;
406
407        if !cache_entry.rankings_node.is_initialized() {
408            return Err(error::CCPError::InvalidData(
409                "rankings node not initialized".to_string(),
410            ));
411        }
412
413        let mut data_files = self.data_files.borrow_mut();
414        let ranking_entry = data_files.get_entry(&cache_entry.rankings_node)?;
415
416        Ok(LazyRankingsNode {
417            buffer: ranking_entry,
418        })
419    }
420}
421
422pub struct LazyBlockFile {
423    buffer: Rc<Vec<u8>>,
424}
425
426/// Represents a block file in the chrome cache. It has a header, providing some metadata about the
427/// file, followed by a series of contiguous blocks of a fixed size, defined by a field within the
428/// header.
429impl LazyBlockFile {
430    pub fn new(buffer: Rc<Vec<u8>>) -> LazyBlockFile {
431        LazyBlockFile { buffer }
432    }
433
434    fn header(&self) -> CCPResult<&BlockFileHeader> {
435        let header = BlockFileHeader::ref_from(&self.buffer[0..mem::size_of::<BlockFileHeader>()])
436            .ok_or(error::CCPError::DataMisalignment(
437                "block file header".to_string(),
438            ))?;
439
440        if header.magic != BLOCK_MAGIC {
441            return Err(error::CCPError::InvalidData(format!(
442                "expected block magic {:x}, got {:x}",
443                BLOCK_MAGIC, header.magic
444            )));
445        }
446        Ok(header)
447    }
448
449    pub fn get_buffer(&self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
450        let header = self.header()?;
451        Ok(BufferSlice::new(
452            Rc::clone(&self.buffer),
453            BLOCK_HEADER_SIZE + addr.start_block() as usize * header.entry_size as usize,
454            header.entry_size as usize,
455        ))
456    }
457}