samply_symbols/
cache.rs

1use std::collections::HashMap;
2use std::ops::Range;
3use std::sync::atomic::AtomicUsize;
4use std::sync::Mutex;
5
6use elsa::sync::FrozenVec;
7
8use crate::chunked_read_buffer_manager::{ChunkedReadBufferManager, RangeLocation, RangeSourcing};
9use crate::{FileAndPathHelperResult, FileContents};
10
11const CHUNK_SIZE: u64 = 32 * 1024;
12
13pub trait FileByteSource: Send + Sync {
14    /// Read `size` bytes at offset `offset` and append them to `buffer`.
15    /// If successful, `buffer` must have had its len increased exactly by `size`,
16    /// otherwise the caller may panic.
17    fn read_bytes_into(
18        &self,
19        buffer: &mut Vec<u8>,
20        offset: u64,
21        size: usize,
22    ) -> FileAndPathHelperResult<()>;
23}
24
25pub struct FileContentsWithChunkedCaching<S: FileByteSource> {
26    source: S,
27    file_len: u64,
28    buffer_manager: Mutex<ChunkedReadBufferManager<CHUNK_SIZE>>,
29    string_cache: Mutex<HashMap<(u64, u8), RangeLocation>>,
30    buffers: FrozenVec<Box<[u8]>>,
31    buffer_count: AtomicUsize,
32}
33
34impl<S: FileByteSource> FileContentsWithChunkedCaching<S> {
35    pub fn new(file_len: u64, source: S) -> Self {
36        FileContentsWithChunkedCaching {
37            source,
38            buffers: FrozenVec::new(),
39            file_len,
40            buffer_manager: Mutex::new(ChunkedReadBufferManager::new_with_size(file_len)),
41            string_cache: Mutex::new(HashMap::new()),
42            buffer_count: AtomicUsize::new(0),
43        }
44    }
45
46    #[inline]
47    fn slice_from_location(&self, location: &RangeLocation) -> &[u8] {
48        let buffer = &self.buffers.get(location.buffer_handle).unwrap();
49        &buffer[location.offset_from_start..][..location.size]
50    }
51
52    /// Must be called with a valid, non-empty range which does not exceed file_len.
53    #[inline]
54    fn get_range_location(&self, range: Range<u64>) -> FileAndPathHelperResult<RangeLocation> {
55        let mut buffer_manager = self.buffer_manager.lock().unwrap();
56        let read_range = match buffer_manager.determine_range_sourcing(range.clone()) {
57            RangeSourcing::InExistingBuffer(l) => return Ok(l),
58            RangeSourcing::NeedToReadNewBuffer(read_range) => read_range,
59        };
60        assert!(read_range.start <= read_range.end);
61
62        // Read the bytes from the source.
63        let read_len: usize = (read_range.end - read_range.start).try_into()?;
64        let mut buffer = Vec::new();
65        self.source
66            .read_bytes_into(&mut buffer, read_range.start, read_len)?;
67        assert!(buffer.len() == read_len);
68
69        let buffer_handle = self
70            .buffer_count
71            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
72        self.buffers.push(buffer.into_boxed_slice());
73        buffer_manager.insert_buffer_range(read_range.clone(), buffer_handle);
74
75        Ok(RangeLocation {
76            buffer_handle,
77            offset_from_start: (range.start - read_range.start) as usize,
78            size: (range.end - range.start) as usize,
79        })
80    }
81}
82
83impl<S: FileByteSource> FileContents for FileContentsWithChunkedCaching<S> {
84    #[inline]
85    fn len(&self) -> u64 {
86        self.file_len
87    }
88
89    #[inline]
90    fn read_bytes_at(&self, offset: u64, size: u64) -> FileAndPathHelperResult<&[u8]> {
91        if size == 0 {
92            return Ok(&[]);
93        }
94
95        let start = offset;
96        let end = offset.checked_add(size).ok_or_else(|| {
97            std::io::Error::new(
98                std::io::ErrorKind::InvalidInput,
99                "read_bytes_at with offset + size overflowing u64",
100            )
101        })?;
102        if end > self.file_len {
103            return Err(Box::new(std::io::Error::new(
104                std::io::ErrorKind::InvalidInput,
105                "read_bytes_at range out-of-bounds",
106            )));
107        }
108        let location = self.get_range_location(start..end)?;
109        Ok(self.slice_from_location(&location))
110    }
111
112    #[inline]
113    fn read_bytes_at_until(
114        &self,
115        range: Range<u64>,
116        delimiter: u8,
117    ) -> FileAndPathHelperResult<&[u8]> {
118        const MAX_LENGTH_INCLUDING_DELIMITER: u64 = 4096;
119
120        if range.end < range.start {
121            return Err(Box::new(std::io::Error::new(
122                std::io::ErrorKind::InvalidInput,
123                "read_bytes_at_until called with range.end < range.start",
124            )));
125        }
126        if range.end > self.file_len {
127            return Err(Box::new(std::io::Error::new(
128                std::io::ErrorKind::InvalidInput,
129                "read_bytes_at_until range out-of-bounds",
130            )));
131        }
132
133        let mut string_cache = self.string_cache.lock().unwrap();
134        if let Some(location) = string_cache.get(&(range.start, delimiter)) {
135            return Ok(self.slice_from_location(location));
136        }
137
138        let max_len = (range.end - range.start).min(MAX_LENGTH_INCLUDING_DELIMITER);
139        let mut location = self.get_range_location(range.start..(range.start + max_len))?;
140        let bytes = self.slice_from_location(&location);
141
142        let string_len = match memchr::memchr(delimiter, bytes) {
143            Some(len) => len,
144            None => {
145                return Err(Box::new(std::io::Error::new(
146                    std::io::ErrorKind::InvalidInput,
147                    "Could not find delimiter",
148                )));
149            }
150        };
151
152        location.size = string_len;
153        string_cache.insert((range.start, delimiter), location);
154        Ok(&bytes[..string_len])
155    }
156
157    fn read_bytes_into(
158        &self,
159        buffer: &mut Vec<u8>,
160        offset: u64,
161        size: usize,
162    ) -> FileAndPathHelperResult<()> {
163        self.source.read_bytes_into(buffer, offset, size)
164    }
165}