1use crate::error::{Error, Result};
6use std::collections::HashMap;
7use std::io::{Seek, Write};
8use std::path::Path;
9
10pub struct StringPool {
14 prefixes: Vec<String>,
15 prefix_map: HashMap<String, u16>,
16 path_info: HashMap<String, (u32, u16)>,
17}
18
19impl Default for StringPool {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl StringPool {
26 #[must_use]
28 pub fn new() -> Self {
29 let prefixes = vec![String::new()];
30 let mut prefix_map = HashMap::new();
31 prefix_map.insert(String::new(), 0);
32
33 Self {
34 prefixes,
35 prefix_map,
36 path_info: HashMap::new(),
37 }
38 }
39
40 pub fn add_path(&mut self, path: &Path) {
44 let path_str = path.to_string_lossy().to_string();
45 self.path_info.entry(path_str).or_insert((0, 0));
46 }
47
48 pub fn set_prefixes(&mut self, prefixes: Vec<String>) {
50 self.prefixes = vec![String::new()];
51 self.prefix_map = HashMap::new();
52 self.prefix_map.insert(String::new(), 0);
53
54 for p in prefixes {
55 if p.is_empty() {
56 continue;
57 }
58 let id = u16::try_from(self.prefixes.len()).unwrap_or(0);
59 self.prefix_map.insert(p.clone(), id);
60 self.prefixes.push(p);
61 }
62 }
63
64 #[must_use]
66 pub fn get_info(&self, path: &Path) -> (u32, u16) {
67 let path_str = path.to_string_lossy();
68 *self.path_info.get(path_str.as_ref()).unwrap_or(&(0, 0))
69 }
70
71 #[allow(clippy::as_conversions)] pub fn serialize<W: Write + Seek>(&mut self, mut w: W) -> std::io::Result<()> {
78 let start_pos = w.stream_position()?;
79
80 w.write_all(
81 &u32::try_from(self.prefixes.len())
82 .unwrap_or(0)
83 .to_le_bytes(),
84 )?;
85
86 for (i, p) in self.prefixes.iter().enumerate() {
87 w.write_all(&u16::try_from(i).unwrap_or(0).to_le_bytes())?;
88 w.write_all(&u16::try_from(p.len()).unwrap_or(0).to_le_bytes())?;
89 w.write_all(p.as_bytes())?;
90 }
91
92 let current = w.stream_position()?;
93 let padding = (4 - (current % 4)) % 4;
94 for _ in 0..padding {
95 w.write_all(&[0])?;
96 }
97
98 let paths: Vec<String> = self.path_info.keys().cloned().collect();
99 for path_str in paths {
100 let offset = u32::try_from(w.stream_position()? - start_pos).unwrap_or(0);
101
102 let mut best_prefix_id = 0u16;
103 let mut best_prefix_len = 0;
104
105 for (prefix, &id) in &self.prefix_map {
106 if path_str.starts_with(prefix) && prefix.len() > best_prefix_len {
107 best_prefix_id = id;
108 best_prefix_len = prefix.len();
109 }
110 }
111
112 let suffix = path_str.get(best_prefix_len..).unwrap_or("");
113 w.write_all(&best_prefix_id.to_le_bytes())?;
114 w.write_all(&(suffix.len() as u16).to_le_bytes())?;
115 w.write_all(suffix.as_bytes())?;
116
117 self.path_info
118 .insert(path_str.clone(), (offset, path_str.len() as u16));
119 }
120
121 Ok(())
122 }
123}
124
125pub struct StringPoolReader<'a> {
127 data: &'a [u8],
128 prefixes: Vec<&'a [u8]>,
129}
130
131impl<'a> StringPoolReader<'a> {
132 #[allow(clippy::as_conversions)] #[allow(clippy::indexing_slicing)] pub fn new(data: &'a [u8]) -> Result<Self> {
141 if data.len() < 4 {
142 return Err(Error::StringPoolOutOfBounds);
143 }
144 let prefix_count = data[0..4].try_into().ok().map_or(0, u32::from_le_bytes) as usize;
145 let mut prefixes = Vec::with_capacity(prefix_count);
146 let mut pos = 4;
147
148 for _ in 0..prefix_count {
149 if pos + 4 > data.len() {
150 return Err(Error::StringPoolOutOfBounds);
151 }
152 let _id = data[pos..pos + 2]
153 .try_into()
154 .ok()
155 .map_or(0, u16::from_le_bytes);
156 let len = data[pos + 2..pos + 4]
157 .try_into()
158 .ok()
159 .map_or(0, u16::from_le_bytes) as usize;
160 pos += 4;
161 if pos + len > data.len() {
162 return Err(Error::StringPoolOutOfBounds);
163 }
164 prefixes.push(&data[pos..pos + len]);
165 pos += len;
166 }
167
168 Ok(Self { data, prefixes })
169 }
170
171 #[allow(clippy::as_conversions)] #[allow(clippy::indexing_slicing)] pub fn resolve(&self, offset: u32) -> Result<String> {
180 let pos = offset as usize;
181 if pos + 4 > self.data.len() {
182 return Err(Error::StringPoolOutOfBounds);
183 }
184
185 let prefix_id = self.data[pos..pos + 2]
186 .try_into()
187 .ok()
188 .map_or(0, u16::from_le_bytes) as usize;
189 let suffix_len = self.data[pos + 2..pos + 4]
190 .try_into()
191 .ok()
192 .map_or(0, u16::from_le_bytes) as usize;
193
194 if prefix_id >= self.prefixes.len() {
195 return Err(Error::StringPoolOutOfBounds);
196 }
197
198 let prefix = self.prefixes[prefix_id];
199 let suffix_pos = pos + 4;
200 if suffix_pos + suffix_len > self.data.len() {
201 return Err(Error::StringPoolOutOfBounds);
202 }
203 let suffix = &self.data[suffix_pos..suffix_pos + suffix_len];
204
205 let mut res = String::with_capacity(prefix.len() + suffix.len());
206 res.push_str(std::str::from_utf8(prefix).map_err(|_| Error::InvalidPath)?);
207 res.push_str(std::str::from_utf8(suffix).map_err(|_| Error::InvalidPath)?);
208
209 Ok(res)
210 }
211}
212
213#[cfg(test)]
214#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
215mod tests {
216 use super::*;
217 use std::io::Cursor;
218
219 #[test]
220 fn roundtrip() {
221 let mut pool = StringPool::new();
222 pool.set_prefixes(vec!["/home/user/".to_string(), "/var/log/".to_string()]);
223 pool.add_path(Path::new("/home/user/file.rs"));
224 pool.add_path(Path::new("/var/log/syslog"));
225 pool.add_path(Path::new("/other/path"));
226
227 let mut buf = Cursor::new(Vec::new());
228 pool.serialize(&mut buf).unwrap();
229
230 let data = buf.into_inner();
231 let reader = StringPoolReader::new(&data).unwrap();
232
233 let (off1, _) = pool.get_info(Path::new("/home/user/file.rs"));
234 assert_eq!(reader.resolve(off1).unwrap(), "/home/user/file.rs");
235
236 let (off2, _) = pool.get_info(Path::new("/var/log/syslog"));
237 assert_eq!(reader.resolve(off2).unwrap(), "/var/log/syslog");
238
239 let (off3, _) = pool.get_info(Path::new("/other/path"));
240 assert_eq!(reader.resolve(off3).unwrap(), "/other/path");
241 }
242}