chrome_cache_parser/
block_file.rs

1use std::{
2    cell::RefCell,
3    cmp::min,
4    collections::HashMap,
5    fmt,
6    fs::{self, File},
7    io::{self, BufReader, Read},
8    mem,
9    path::PathBuf,
10    rc::Rc,
11};
12
13use zerocopy::{FromBytes, FromZeroes};
14
15use crate::{
16    cache_address::{CacheAddr, FileType},
17    error::{self, CCPResult},
18    time::WindowsEpochMicroseconds,
19    CCPError,
20};
21use static_assertions as sa;
22
23const BLOCK_MAGIC: u32 = 0xc104cac3;
24const BLOCK_HEADER_SIZE: usize = 8192;
25const MAX_BLOCKS: usize = (BLOCK_HEADER_SIZE - 80) * 8;
26const INLINE_KEY_SIZE: usize = 160;
27
28#[derive(Debug, FromZeroes, FromBytes)]
29#[repr(C)]
30struct AllocBitmap {
31    data: [u32; MAX_BLOCKS / 32],
32}
33
34#[derive(Debug, FromZeroes, FromBytes, Clone)]
35#[repr(C, packed(4))]
36pub struct RankingsNode {
37    pub last_used: WindowsEpochMicroseconds,
38    pub last_modified: WindowsEpochMicroseconds,
39    pub next: CacheAddr,
40    pub prev: CacheAddr,
41    pub contents: CacheAddr,
42    pub dirty: i32,
43    pub self_hash: u32,
44}
45
46sa::const_assert_eq!(mem::size_of::<RankingsNode>(), 36);
47
48// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format_base.h
49#[derive(Debug, FromZeroes, FromBytes)]
50#[repr(C)]
51struct BlockFileHeader {
52    pub magic: u32,
53    pub version: u32,
54    pub this_file: i16,
55    pub next_file: i16,
56    pub entry_size: i32,
57    pub num_entries: i32,
58    pub max_entries: i32,
59    pub empty: [i32; 4],
60    pub hints: [i32; 4],
61    pub updating: i32,
62    pub user: [i32; 5],
63    pub allocation_map: AllocBitmap,
64}
65
66#[derive(FromZeroes, FromBytes, Clone)]
67pub struct InlineCacheKey {
68    key: [u8; INLINE_KEY_SIZE],
69}
70
71impl fmt::Debug for InlineCacheKey {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        write!(f, "{}", std::str::from_utf8(&self.key).unwrap())
74    }
75}
76
77impl fmt::Display for InlineCacheKey {
78    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79        let key = std::str::from_utf8(&self.key)
80            .map_err(|_| fmt::Error)?
81            .trim_end_matches(char::from(0));
82        write!(f, "{}", key)?;
83        Ok(())
84    }
85}
86
87#[derive(Debug, Clone, PartialEq)]
88#[repr(u32)]
89pub enum BlockCacheEntryState {
90    Normal = 0,
91    Evicted = 1,
92    Doomed = 2,
93    Unknown,
94}
95
96impl From<i32> for BlockCacheEntryState {
97    fn from(value: i32) -> Self {
98        match value {
99            0 => BlockCacheEntryState::Normal,
100            1 => BlockCacheEntryState::Evicted,
101            2 => BlockCacheEntryState::Doomed,
102            _ => BlockCacheEntryState::Unknown,
103        }
104    }
105}
106
107#[derive(Debug, FromZeroes, FromBytes, Clone, Copy)]
108pub struct BlockCacheEntryStateField(i32);
109
110impl BlockCacheEntryStateField {
111    // zerocopy lib doesn't provide a mechanism for decoding enums that don't represent all
112    // states, see: https://github.com/google/zerocopy/issues/1429
113    pub fn kind(&self) -> BlockCacheEntryState {
114        BlockCacheEntryState::from(self.0)
115    }
116}
117
118// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format.h#101
119#[derive(Debug, FromZeroes, FromBytes, Clone)]
120#[repr(C)]
121pub struct BlockFileCacheEntry {
122    pub hash: u32,
123    pub next: CacheAddr,
124    pub rankings_node: CacheAddr,
125    pub reuse_count: i32,
126    pub refetch_count: i32,
127    pub state: BlockCacheEntryStateField,
128    pub creation_time: WindowsEpochMicroseconds,
129    pub key_len: i32,
130    pub long_key: CacheAddr,
131    pub data_size: [i32; 4],
132    pub data_addr: [CacheAddr; 4],
133    pub flags: u32,
134    pad: [u32; 4],
135    pub self_hash: u32,
136    pub key: InlineCacheKey,
137}
138
139sa::const_assert_eq!(mem::size_of::<BlockFileCacheEntry>(), 256);
140
141struct BlockFileStreamReader {
142    addr: CacheAddr,
143    size: usize,
144    data_files: Rc<RefCell<DataFiles>>,
145    read_offset: usize,
146}
147
148impl BlockFileStreamReader {
149    pub fn new(
150        addr: CacheAddr,
151        size: usize,
152        data_files: Rc<RefCell<DataFiles>>,
153    ) -> BlockFileStreamReader {
154        BlockFileStreamReader {
155            addr,
156            size,
157            data_files,
158            read_offset: 0,
159        }
160    }
161}
162
163impl Read for BlockFileStreamReader {
164    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
165        if self.read_offset >= self.size {
166            return Ok(0);
167        }
168
169        let mut data_files = self.data_files.borrow_mut();
170        let data_file = data_files.get(self.addr.file_number());
171
172        let block_size = self
173            .addr
174            .file_type()
175            .block_size()
176            .or(Err(io::ErrorKind::InvalidData))?;
177        let start_addr =
178            BLOCK_HEADER_SIZE + self.addr.start_block() as usize * block_size + self.read_offset;
179        let to_be_read = min(buf.len(), self.size - self.read_offset);
180        let end_addr = start_addr + to_be_read;
181
182        buf[0..to_be_read].copy_from_slice(&data_file.buffer[start_addr..end_addr]);
183
184        self.read_offset += to_be_read;
185
186        Ok(to_be_read)
187    }
188}
189
190struct ExternalFileReader {
191    addr: CacheAddr,
192    file: Option<BufReader<File>>,
193    cache_path: PathBuf,
194}
195
196impl ExternalFileReader {
197    pub fn new(addr: CacheAddr, cache_path: PathBuf) -> ExternalFileReader {
198        ExternalFileReader {
199            addr,
200            file: None,
201            cache_path,
202        }
203    }
204}
205
206impl Read for ExternalFileReader {
207    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
208        if let Some(file) = &mut self.file {
209            file.read(buf)
210        } else {
211            let file_name = format!("f_{:0>6x}", self.addr.file_number());
212            let reader = File::open(self.cache_path.join(file_name))?;
213            self.file.replace(BufReader::new(reader));
214            self.read(buf)
215        }
216    }
217}
218
219/// An iterator over the logical entries in a map of block files. Data files are lazily loaded and
220/// cached. An entry in the chrome cache is a node in a linked list of entries in the block files.
221/// The index file is a hash table that maps keys to the first entry in the linked list.
222///
223/// The next node in a given linked list is not guaranteed to be in the same block file, so each
224/// entry needs needs a reference to all of the data files.
225///
226/// By storing the reference to the data files, we can lazily evaluate the actual entries without
227/// copying the underlying buffer. The iterator yields a parser with a shared reference to the
228/// underlying data required for transmutation.
229///
230/// `LazyBlockFileCacheEntryIterator`` is to be instantiated with the cache address of the first
231/// entry and yields any subsequent entries in the linked list.
232pub struct LazyBlockFileCacheEntryIterator {
233    current: Option<CacheAddr>,
234    data_files: Rc<RefCell<DataFiles>>,
235    cache_path: PathBuf,
236}
237
238impl LazyBlockFileCacheEntryIterator {
239    pub fn new(
240        data_files: Rc<RefCell<DataFiles>>,
241        start: CacheAddr,
242        cache_path: PathBuf,
243    ) -> LazyBlockFileCacheEntryIterator {
244        LazyBlockFileCacheEntryIterator {
245            current: Some(start),
246            data_files,
247            cache_path,
248        }
249    }
250}
251
252/// A map of data files, lazily loaded and cached. Provides a method to get a cache entry from a
253/// cache address, selecting the approapriate data file by the file number in the cache address.
254pub struct DataFiles {
255    data_files: HashMap<u32, LazyBlockFile>,
256    path: PathBuf,
257}
258
259impl DataFiles {
260    pub fn new(data_files: HashMap<u32, LazyBlockFile>, path: PathBuf) -> DataFiles {
261        DataFiles { data_files, path }
262    }
263
264    fn get(&mut self, file_number: u32) -> &LazyBlockFile {
265        self.data_files.entry(file_number).or_insert_with(|| {
266            let file_path = self.path.join(format!("data_{}", file_number));
267
268            let mut file = fs::File::open(file_path).unwrap();
269            let mut buf: Vec<u8> = Vec::new();
270            file.read_to_end(&mut buf).unwrap();
271            LazyBlockFile::new(Rc::new(buf))
272        })
273    }
274
275    pub fn get_entry(&mut self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
276        let data_file = self.get(addr.file_number());
277        data_file.get_buffer(addr)
278    }
279}
280
281impl Iterator for LazyBlockFileCacheEntryIterator {
282    type Item = LazyBlockFileCacheEntry;
283
284    fn next(&mut self) -> Option<Self::Item> {
285        let current = self.current.take()?;
286
287        let mut data_files = (*self.data_files).borrow_mut();
288
289        let current = data_files.get_entry(&current).ok()?;
290        let current = LazyBlockFileCacheEntry::new(
291            current,
292            Rc::clone(&self.data_files),
293            self.cache_path.clone(),
294        );
295
296        if let Ok(current) = current.get() {
297            let next = current.next;
298            if next.is_initialized() {
299                self.current = Some(next);
300            }
301        }
302
303        Some(current)
304    }
305}
306
307pub struct LazyRankingsNode {
308    buffer: BufferSlice,
309}
310
311/// A slice to a shared buffer. Enables us to pass a reference to the buffer to all of the
312/// transmuters.
313pub struct BufferSlice {
314    buffer: Rc<Vec<u8>>,
315    start: usize,
316    size: usize,
317}
318
319impl BufferSlice {
320    pub fn new(buffer: Rc<Vec<u8>>, start: usize, size: usize) -> BufferSlice {
321        BufferSlice {
322            buffer,
323            start,
324            size,
325        }
326    }
327
328    pub fn get(&self) -> &[u8] {
329        &self.buffer[self.start..self.start + self.size]
330    }
331}
332
333impl LazyRankingsNode {
334    pub fn get(&self) -> CCPResult<&RankingsNode> {
335        RankingsNode::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(format!(
336            "rankings node at {}",
337            self.buffer.start
338        )))
339    }
340}
341
342pub struct LazyBlockFileCacheEntry {
343    buffer: BufferSlice,
344    data_files: Rc<RefCell<DataFiles>>,
345    cache_path: PathBuf,
346}
347
348impl LazyBlockFileCacheEntry {
349    pub fn new(
350        buffer: BufferSlice,
351        block_files: Rc<RefCell<DataFiles>>,
352        cache_path: PathBuf,
353    ) -> LazyBlockFileCacheEntry {
354        LazyBlockFileCacheEntry {
355            buffer,
356            data_files: block_files,
357            cache_path,
358        }
359    }
360
361    /// Parse the entry from the buffer and return a reference to it.
362    pub fn get(&self) -> CCPResult<&BlockFileCacheEntry> {
363        BlockFileCacheEntry::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(
364            format!("block file cache entry at {}", self.buffer.start),
365        ))
366    }
367
368    /// Return readers for the actual cache data. Typically, this is a header stream followed by
369    /// a content stream.
370    pub fn stream_readers(self) -> CCPResult<Vec<CCPResult<Box<dyn Read>>>> {
371        let entry = self.get().or(Err(CCPError::InvalidState(
372            "Unable to read entry".to_string(),
373        )))?;
374
375        Ok(entry
376            .data_addr
377            .iter()
378            .zip(entry.data_size.iter())
379            .map(|(addr, size)| match addr.file_type() {
380                FileType::External => Ok(Box::new(ExternalFileReader::new(
381                    *addr,
382                    self.cache_path.clone(),
383                )) as Box<dyn Read>),
384                FileType::Block1k | FileType::Block256 | FileType::Block4k => Ok(Box::new(
385                    BlockFileStreamReader::new(*addr, *size as usize, self.data_files.clone()),
386                )
387                    as Box<dyn Read>),
388                _ => Err(CCPError::InvalidState(
389                    format!(
390                        "Requested stream reader of nonsense address type {:?}",
391                        addr.file_type()
392                    )
393                    .to_string(),
394                )),
395            })
396            .collect())
397    }
398
399    pub fn get_rankings_node(&mut self) -> CCPResult<LazyRankingsNode> {
400        let cache_entry = self.get()?;
401
402        if !cache_entry.rankings_node.is_initialized() {
403            return Err(error::CCPError::InvalidData(
404                "rankings node not initialized".to_string(),
405            ));
406        }
407
408        let mut data_files = self.data_files.borrow_mut();
409        let ranking_entry = data_files.get_entry(&cache_entry.rankings_node)?;
410
411        Ok(LazyRankingsNode {
412            buffer: ranking_entry,
413        })
414    }
415}
416
417pub struct LazyBlockFile {
418    buffer: Rc<Vec<u8>>,
419}
420
421/// Represents a block file in the chrome cache. It has a header, providing some metadata about the
422/// file, followed by a series of contiguous blocks of a fixed size, defined by a field within the
423/// header.
424impl LazyBlockFile {
425    pub fn new(buffer: Rc<Vec<u8>>) -> LazyBlockFile {
426        LazyBlockFile { buffer }
427    }
428
429    fn header(&self) -> CCPResult<&BlockFileHeader> {
430        let header = BlockFileHeader::ref_from(&self.buffer[0..mem::size_of::<BlockFileHeader>()])
431            .ok_or(error::CCPError::DataMisalignment(
432                "block file header".to_string(),
433            ))?;
434
435        if header.magic != BLOCK_MAGIC {
436            return Err(error::CCPError::InvalidData(format!(
437                "expected block magic {:x}, got {:x}",
438                BLOCK_MAGIC, header.magic
439            )));
440        }
441        Ok(header)
442    }
443
444    pub fn get_buffer(&self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
445        let header = self.header()?;
446        Ok(BufferSlice::new(
447            Rc::clone(&self.buffer),
448            BLOCK_HEADER_SIZE + addr.start_block() as usize * header.entry_size as usize,
449            header.entry_size as usize,
450        ))
451    }
452}