chrome_cache_parser/
block_file.rs

1use std::{
2    cell::RefCell,
3    cmp::min,
4    collections::{hash_map::Entry, HashMap},
5    fmt,
6    fs::{self, File},
7    io::{self, BufReader, Read},
8    mem,
9    path::PathBuf,
10    rc::Rc,
11};
12
13use zerocopy::{FromBytes, FromZeroes};
14
15use crate::{
16    cache_address::{CacheAddr, FileType},
17    error::{self, CCPResult},
18    time::WindowsEpochMicroseconds,
19    CCPError,
20};
21use static_assertions as sa;
22
23const BLOCK_MAGIC: u32 = 0xc104cac3;
24const BLOCK_HEADER_SIZE: usize = 8192;
25const MAX_BLOCKS: usize = (BLOCK_HEADER_SIZE - 80) * 8;
26const INLINE_KEY_SIZE: usize = 160;
27
28#[derive(Debug, FromZeroes, FromBytes)]
29#[repr(C)]
30struct AllocBitmap {
31    data: [u32; MAX_BLOCKS / 32],
32}
33
34#[derive(Debug, FromZeroes, FromBytes, Clone)]
35#[repr(C, packed(4))]
36pub struct RankingsNode {
37    pub last_used: WindowsEpochMicroseconds,
38    pub last_modified: WindowsEpochMicroseconds,
39    pub next: CacheAddr,
40    pub prev: CacheAddr,
41    pub contents: CacheAddr,
42    pub dirty: i32,
43    pub self_hash: u32,
44}
45
46sa::const_assert_eq!(mem::size_of::<RankingsNode>(), 36);
47
48// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format_base.h
49#[derive(Debug, FromZeroes, FromBytes)]
50#[repr(C)]
51struct BlockFileHeader {
52    pub magic: u32,
53    pub version: u32,
54    pub this_file: i16,
55    pub next_file: i16,
56    pub entry_size: i32,
57    pub num_entries: i32,
58    pub max_entries: i32,
59    pub empty: [i32; 4],
60    pub hints: [i32; 4],
61    pub updating: i32,
62    pub user: [i32; 5],
63    pub allocation_map: AllocBitmap,
64}
65
66#[derive(FromZeroes, FromBytes, Clone)]
67pub struct InlineCacheKey {
68    key: [u8; INLINE_KEY_SIZE],
69}
70
71impl fmt::Debug for InlineCacheKey {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        write!(f, "{}", std::str::from_utf8(&self.key).unwrap())
74    }
75}
76
77impl fmt::Display for InlineCacheKey {
78    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79        let key = std::str::from_utf8(&self.key)
80            .map_err(|_| fmt::Error)?
81            .trim_end_matches(char::from(0));
82        write!(f, "{}", key)?;
83        Ok(())
84    }
85}
86
87#[derive(Debug, Clone, PartialEq)]
88#[repr(u32)]
89pub enum BlockCacheEntryState {
90    Normal = 0,
91    Evicted = 1,
92    Doomed = 2,
93    Unknown,
94}
95
96impl From<i32> for BlockCacheEntryState {
97    fn from(value: i32) -> Self {
98        match value {
99            0 => BlockCacheEntryState::Normal,
100            1 => BlockCacheEntryState::Evicted,
101            2 => BlockCacheEntryState::Doomed,
102            _ => BlockCacheEntryState::Unknown,
103        }
104    }
105}
106
107#[derive(Debug, FromZeroes, FromBytes, Clone, Copy)]
108pub struct BlockCacheEntryStateField(i32);
109
110impl BlockCacheEntryStateField {
111    // zerocopy lib doesn't provide a mechanism for decoding enums that don't represent all
112    // states, see: https://github.com/google/zerocopy/issues/1429
113    pub fn kind(&self) -> BlockCacheEntryState {
114        BlockCacheEntryState::from(self.0)
115    }
116}
117
118// See: https://chromium.googlesource.com/chromium/src/net/+/ddbc6c5954c4bee29902082eb9052405e83abc02/disk_cache/disk_format.h#101
119#[derive(Debug, FromZeroes, FromBytes, Clone)]
120#[repr(C)]
121pub struct BlockFileCacheEntry {
122    pub hash: u32,
123    pub next: CacheAddr,
124    pub rankings_node: CacheAddr,
125    pub reuse_count: i32,
126    pub refetch_count: i32,
127    pub state: BlockCacheEntryStateField,
128    pub creation_time: WindowsEpochMicroseconds,
129    pub key_len: i32,
130    pub long_key: CacheAddr,
131    pub data_size: [i32; 4],
132    pub data_addr: [CacheAddr; 4],
133    pub flags: u32,
134    pad: [u32; 4],
135    pub self_hash: u32,
136    pub key: InlineCacheKey,
137}
138
139sa::const_assert_eq!(mem::size_of::<BlockFileCacheEntry>(), 256);
140
141struct BlockFileStreamReader {
142    addr: CacheAddr,
143    size: usize,
144    data_files: Rc<RefCell<DataFiles>>,
145    read_offset: usize,
146}
147
148impl BlockFileStreamReader {
149    pub fn new(
150        addr: CacheAddr,
151        size: usize,
152        data_files: Rc<RefCell<DataFiles>>,
153    ) -> BlockFileStreamReader {
154        BlockFileStreamReader {
155            addr,
156            size,
157            data_files,
158            read_offset: 0,
159        }
160    }
161}
162
163impl Read for BlockFileStreamReader {
164    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
165        if self.read_offset >= self.size {
166            return Ok(0);
167        }
168
169        let mut data_files = self.data_files.borrow_mut();
170        let data_file = match data_files.get(self.addr.file_number()) {
171            Ok(file) => file,
172            Err(CCPError::Io { source }) => return Err(source),
173            Err(err) => return Err(io::Error::new(io::ErrorKind::Other, err)),
174        };
175
176        let block_size = self
177            .addr
178            .file_type()
179            .block_size()
180            .or(Err(io::ErrorKind::InvalidData))?;
181        let start_addr =
182            BLOCK_HEADER_SIZE + self.addr.start_block() as usize * block_size + self.read_offset;
183        let to_be_read = min(buf.len(), self.size - self.read_offset);
184        let end_addr = start_addr + to_be_read;
185
186        buf[0..to_be_read].copy_from_slice(&data_file.buffer[start_addr..end_addr]);
187
188        self.read_offset += to_be_read;
189
190        Ok(to_be_read)
191    }
192}
193
194struct ExternalFileReader {
195    addr: CacheAddr,
196    file: Option<BufReader<File>>,
197    cache_path: PathBuf,
198}
199
200impl ExternalFileReader {
201    pub fn new(addr: CacheAddr, cache_path: PathBuf) -> ExternalFileReader {
202        ExternalFileReader {
203            addr,
204            file: None,
205            cache_path,
206        }
207    }
208}
209
210impl Read for ExternalFileReader {
211    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
212        if let Some(file) = &mut self.file {
213            file.read(buf)
214        } else {
215            let file_name = format!("f_{:0>6x}", self.addr.file_number());
216            let reader = File::open(self.cache_path.join(file_name))?;
217            self.file.replace(BufReader::new(reader));
218            self.read(buf)
219        }
220    }
221}
222
223/// An iterator over the logical entries in a map of block files. Data files are lazily loaded and
224/// cached. An entry in the chrome cache is a node in a linked list of entries in the block files.
225/// The index file is a hash table that maps keys to the first entry in the linked list.
226///
227/// The next node in a given linked list is not guaranteed to be in the same block file, so each
228/// entry needs needs a reference to all of the data files.
229///
230/// By storing the reference to the data files, we can lazily evaluate the actual entries without
231/// copying the underlying buffer. The iterator yields a parser with a shared reference to the
232/// underlying data required for transmutation.
233///
234/// `LazyBlockFileCacheEntryIterator`` is to be instantiated with the cache address of the first
235/// entry and yields any subsequent entries in the linked list.
236pub struct LazyBlockFileCacheEntryIterator {
237    current: Option<CacheAddr>,
238    data_files: Rc<RefCell<DataFiles>>,
239    cache_path: PathBuf,
240}
241
242impl LazyBlockFileCacheEntryIterator {
243    pub fn new(
244        data_files: Rc<RefCell<DataFiles>>,
245        start: CacheAddr,
246        cache_path: PathBuf,
247    ) -> LazyBlockFileCacheEntryIterator {
248        LazyBlockFileCacheEntryIterator {
249            current: Some(start),
250            data_files,
251            cache_path,
252        }
253    }
254}
255
256/// A map of data files, lazily loaded and cached. Provides a method to get a cache entry from a
257/// cache address, selecting the approapriate data file by the file number in the cache address.
258pub struct DataFiles {
259    data_files: HashMap<u32, LazyBlockFile>,
260    path: PathBuf,
261}
262
263impl DataFiles {
264    pub fn new(data_files: HashMap<u32, LazyBlockFile>, path: PathBuf) -> DataFiles {
265        DataFiles { data_files, path }
266    }
267
268    fn get(&mut self, file_number: u32) -> CCPResult<&LazyBlockFile> {
269        Ok(match self.data_files.entry(file_number) {
270            Entry::Occupied(entry) => entry.into_mut(),
271            Entry::Vacant(entry) => {
272                let file_path = self.path.join(format!("data_{}", file_number));
273                let mut file = fs::File::open(&file_path)?;
274                let mut buf: Vec<u8> = Vec::new();
275                file.read_to_end(&mut buf)?;
276                entry.insert(LazyBlockFile::new(Rc::new(buf)))
277            }
278        })
279    }
280
281    pub fn get_entry(&mut self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
282        let data_file = self.get(addr.file_number())?;
283        data_file.get_buffer(addr)
284    }
285}
286
287impl Iterator for LazyBlockFileCacheEntryIterator {
288    type Item = LazyBlockFileCacheEntry;
289
290    fn next(&mut self) -> Option<Self::Item> {
291        let current = self.current.take()?;
292
293        let mut data_files = (*self.data_files).borrow_mut();
294
295        let current = data_files.get_entry(&current).ok()?;
296        let current = LazyBlockFileCacheEntry::new(
297            current,
298            Rc::clone(&self.data_files),
299            self.cache_path.clone(),
300        );
301
302        if let Ok(current) = current.get() {
303            let next = current.next;
304            if next.is_initialized() {
305                self.current = Some(next);
306            }
307        }
308
309        Some(current)
310    }
311}
312
313pub struct LazyRankingsNode {
314    buffer: BufferSlice,
315}
316
317/// A slice to a shared buffer. Enables us to pass a reference to the buffer to all of the
318/// transmuters.
319pub struct BufferSlice {
320    buffer: Rc<Vec<u8>>,
321    start: usize,
322    size: usize,
323}
324
325impl BufferSlice {
326    pub fn new(buffer: Rc<Vec<u8>>, start: usize, size: usize) -> BufferSlice {
327        BufferSlice {
328            buffer,
329            start,
330            size,
331        }
332    }
333
334    pub fn get(&self) -> &[u8] {
335        &self.buffer[self.start..self.start + self.size]
336    }
337}
338
339impl LazyRankingsNode {
340    pub fn get(&self) -> CCPResult<&RankingsNode> {
341        RankingsNode::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(format!(
342            "rankings node at {}",
343            self.buffer.start
344        )))
345    }
346}
347
348pub struct LazyBlockFileCacheEntry {
349    buffer: BufferSlice,
350    data_files: Rc<RefCell<DataFiles>>,
351    cache_path: PathBuf,
352}
353
354impl LazyBlockFileCacheEntry {
355    pub fn new(
356        buffer: BufferSlice,
357        block_files: Rc<RefCell<DataFiles>>,
358        cache_path: PathBuf,
359    ) -> LazyBlockFileCacheEntry {
360        LazyBlockFileCacheEntry {
361            buffer,
362            data_files: block_files,
363            cache_path,
364        }
365    }
366
367    /// Parse the entry from the buffer and return a reference to it.
368    pub fn get(&self) -> CCPResult<&BlockFileCacheEntry> {
369        BlockFileCacheEntry::ref_from(self.buffer.get()).ok_or(error::CCPError::DataMisalignment(
370            format!("block file cache entry at {}", self.buffer.start),
371        ))
372    }
373
374    /// Return readers for the actual cache data. Typically, this is a header stream followed by
375    /// a content stream.
376    pub fn stream_readers(self) -> CCPResult<Vec<CCPResult<Box<dyn Read>>>> {
377        let entry = self.get().or(Err(CCPError::InvalidState(
378            "Unable to read entry".to_string(),
379        )))?;
380
381        Ok(entry
382            .data_addr
383            .iter()
384            .zip(entry.data_size.iter())
385            .map(|(addr, size)| match addr.file_type() {
386                FileType::External => Ok(Box::new(ExternalFileReader::new(
387                    *addr,
388                    self.cache_path.clone(),
389                )) as Box<dyn Read>),
390                FileType::Block1k | FileType::Block256 | FileType::Block4k => Ok(Box::new(
391                    BlockFileStreamReader::new(*addr, *size as usize, self.data_files.clone()),
392                )
393                    as Box<dyn Read>),
394                _ => Err(CCPError::InvalidState(
395                    format!(
396                        "Requested stream reader of nonsense address type {:?}",
397                        addr.file_type()
398                    )
399                    .to_string(),
400                )),
401            })
402            .collect())
403    }
404
405    pub fn get_rankings_node(&mut self) -> CCPResult<LazyRankingsNode> {
406        let cache_entry = self.get()?;
407
408        if !cache_entry.rankings_node.is_initialized() {
409            return Err(error::CCPError::InvalidData(
410                "rankings node not initialized".to_string(),
411            ));
412        }
413
414        let mut data_files = self.data_files.borrow_mut();
415        let ranking_entry = data_files.get_entry(&cache_entry.rankings_node)?;
416
417        Ok(LazyRankingsNode {
418            buffer: ranking_entry,
419        })
420    }
421}
422
423pub struct LazyBlockFile {
424    buffer: Rc<Vec<u8>>,
425}
426
427/// Represents a block file in the chrome cache. It has a header, providing some metadata about the
428/// file, followed by a series of contiguous blocks of a fixed size, defined by a field within the
429/// header.
430impl LazyBlockFile {
431    pub fn new(buffer: Rc<Vec<u8>>) -> LazyBlockFile {
432        LazyBlockFile { buffer }
433    }
434
435    fn header(&self) -> CCPResult<&BlockFileHeader> {
436        let header = BlockFileHeader::ref_from(&self.buffer[0..mem::size_of::<BlockFileHeader>()])
437            .ok_or(error::CCPError::DataMisalignment(
438                "block file header".to_string(),
439            ))?;
440
441        if header.magic != BLOCK_MAGIC {
442            return Err(error::CCPError::InvalidData(format!(
443                "expected block magic {:x}, got {:x}",
444                BLOCK_MAGIC, header.magic
445            )));
446        }
447        Ok(header)
448    }
449
450    pub fn get_buffer(&self, addr: &CacheAddr) -> CCPResult<BufferSlice> {
451        let header = self.header()?;
452        Ok(BufferSlice::new(
453            Rc::clone(&self.buffer),
454            BLOCK_HEADER_SIZE + addr.start_block() as usize * header.entry_size as usize,
455            header.entry_size as usize,
456        ))
457    }
458}