1use crate::error::{Error, Result};
6use std::collections::HashMap;
7use std::io::{Seek, Write};
8use std::path::Path;
9
10pub struct StringPool {
14 prefixes: Vec<String>,
15 prefix_map: HashMap<String, u16>,
16 path_info: HashMap<String, (u32, u16)>,
17}
18
19impl Default for StringPool {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl StringPool {
26 #[must_use]
28 pub fn new() -> Self {
29 let prefixes = vec![String::new()];
30 let mut prefix_map = HashMap::new();
31 prefix_map.insert(String::new(), 0);
32
33 Self {
34 prefixes,
35 prefix_map,
36 path_info: HashMap::new(),
37 }
38 }
39
40 pub fn add_path(&mut self, path: &Path) {
44 let path_str = path.to_string_lossy().to_string();
45 self.path_info.entry(path_str).or_insert((0, 0));
46 }
47
48 pub fn set_prefixes(&mut self, prefixes: Vec<String>) {
50 self.prefixes = vec![String::new()];
51 self.prefix_map = HashMap::new();
52 self.prefix_map.insert(String::new(), 0);
53
54 for p in prefixes {
55 if p.is_empty() {
56 continue;
57 }
58 let id = u16::try_from(self.prefixes.len()).unwrap_or(0);
59 self.prefix_map.insert(p.clone(), id);
60 self.prefixes.push(p);
61 }
62 }
63
64 #[must_use]
66 pub fn get_info(&self, path: &Path) -> (u32, u16) {
67 let path_str = path.to_string_lossy();
68 *self.path_info.get(path_str.as_ref()).unwrap_or(&(0, 0))
69 }
70
71 #[allow(clippy::as_conversions)] pub fn serialize<W: Write + Seek>(&mut self, mut w: W) -> std::io::Result<()> {
78 let start_pos = w.stream_position()?;
79
80 w.write_all(&u32::try_from(self.prefixes.len()).unwrap_or(0).to_le_bytes())?;
81
82 for (i, p) in self.prefixes.iter().enumerate() {
83 w.write_all(&u16::try_from(i).unwrap_or(0).to_le_bytes())?;
84 w.write_all(&u16::try_from(p.len()).unwrap_or(0).to_le_bytes())?;
85 w.write_all(p.as_bytes())?;
86 }
87
88 let current = w.stream_position()?;
89 let padding = (4 - (current % 4)) % 4;
90 for _ in 0..padding {
91 w.write_all(&[0])?;
92 }
93
94 let paths: Vec<String> = self.path_info.keys().cloned().collect();
95 for path_str in paths {
96 let offset = u32::try_from(w.stream_position()? - start_pos).unwrap_or(0);
97
98 let mut best_prefix_id = 0u16;
99 let mut best_prefix_len = 0;
100
101 for (prefix, &id) in &self.prefix_map {
102 if path_str.starts_with(prefix) && prefix.len() > best_prefix_len {
103 best_prefix_id = id;
104 best_prefix_len = prefix.len();
105 }
106 }
107
108 let suffix = path_str.get(best_prefix_len..).unwrap_or("");
109 w.write_all(&best_prefix_id.to_le_bytes())?;
110 w.write_all(&(suffix.len() as u16).to_le_bytes())?;
111 w.write_all(suffix.as_bytes())?;
112
113 self.path_info
114 .insert(path_str.clone(), (offset, path_str.len() as u16));
115 }
116
117 Ok(())
118 }
119}
120
121pub struct StringPoolReader<'a> {
123 data: &'a [u8],
124 prefixes: Vec<&'a [u8]>,
125}
126
127impl<'a> StringPoolReader<'a> {
128 #[allow(clippy::as_conversions)] #[allow(clippy::indexing_slicing)] pub fn new(data: &'a [u8]) -> Result<Self> {
137 if data.len() < 4 {
138 return Err(Error::StringPoolOutOfBounds);
139 }
140 let prefix_count = data[0..4]
141 .try_into()
142 .ok()
143 .map_or(0, u32::from_le_bytes) as usize;
144 let mut prefixes = Vec::with_capacity(prefix_count);
145 let mut pos = 4;
146
147 for _ in 0..prefix_count {
148 if pos + 4 > data.len() {
149 return Err(Error::StringPoolOutOfBounds);
150 }
151 let _id = data[pos..pos + 2]
152 .try_into()
153 .ok()
154 .map_or(0, u16::from_le_bytes);
155 let len = data[pos + 2..pos + 4]
156 .try_into()
157 .ok()
158 .map_or(0, u16::from_le_bytes) as usize;
159 pos += 4;
160 if pos + len > data.len() {
161 return Err(Error::StringPoolOutOfBounds);
162 }
163 prefixes.push(&data[pos..pos + len]);
164 pos += len;
165 }
166
167 Ok(Self { data, prefixes })
168 }
169
170 #[allow(clippy::as_conversions)] #[allow(clippy::indexing_slicing)] pub fn resolve(&self, offset: u32) -> Result<String> {
179 let pos = offset as usize;
180 if pos + 4 > self.data.len() {
181 return Err(Error::StringPoolOutOfBounds);
182 }
183
184 let prefix_id = self.data[pos..pos + 2]
185 .try_into()
186 .ok()
187 .map_or(0, u16::from_le_bytes) as usize;
188 let suffix_len = self.data[pos + 2..pos + 4]
189 .try_into()
190 .ok()
191 .map_or(0, u16::from_le_bytes) as usize;
192
193 if prefix_id >= self.prefixes.len() {
194 return Err(Error::StringPoolOutOfBounds);
195 }
196
197 let prefix = self.prefixes[prefix_id];
198 let suffix_pos = pos + 4;
199 if suffix_pos + suffix_len > self.data.len() {
200 return Err(Error::StringPoolOutOfBounds);
201 }
202 let suffix = &self.data[suffix_pos..suffix_pos + suffix_len];
203
204 let mut res = String::with_capacity(prefix.len() + suffix.len());
205 res.push_str(std::str::from_utf8(prefix).map_err(|_| Error::InvalidPath)?);
206 res.push_str(std::str::from_utf8(suffix).map_err(|_| Error::InvalidPath)?);
207
208 Ok(res)
209 }
210}
211
212#[cfg(test)]
213#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
214mod tests {
215 use super::*;
216 use std::io::Cursor;
217
218 #[test]
219 fn roundtrip() {
220 let mut pool = StringPool::new();
221 pool.set_prefixes(vec!["/home/user/".to_string(), "/var/log/".to_string()]);
222 pool.add_path(Path::new("/home/user/file.rs"));
223 pool.add_path(Path::new("/var/log/syslog"));
224 pool.add_path(Path::new("/other/path"));
225
226 let mut buf = Cursor::new(Vec::new());
227 pool.serialize(&mut buf).unwrap();
228
229 let data = buf.into_inner();
230 let reader = StringPoolReader::new(&data).unwrap();
231
232 let (off1, _) = pool.get_info(Path::new("/home/user/file.rs"));
233 assert_eq!(reader.resolve(off1).unwrap(), "/home/user/file.rs");
234
235 let (off2, _) = pool.get_info(Path::new("/var/log/syslog"));
236 assert_eq!(reader.resolve(off2).unwrap(), "/var/log/syslog");
237
238 let (off3, _) = pool.get_info(Path::new("/other/path"));
239 assert_eq!(reader.resolve(off3).unwrap(), "/other/path");
240 }
241}