1use crate::error::{Error, Result};
6use std::collections::HashMap;
7use std::io::{Seek, Write};
8use std::path::Path;
9
10pub struct StringPool {
11 prefixes: Vec<String>,
12 prefix_map: HashMap<String, u16>,
13 path_info: HashMap<String, (u32, u16)>,
15}
16
17impl Default for StringPool {
18 fn default() -> Self {
19 Self::new()
20 }
21}
22
23impl StringPool {
24 pub fn new() -> Self {
25 let prefixes = vec!["".to_string()];
26 let mut prefix_map = HashMap::new();
27 prefix_map.insert("".to_string(), 0);
28
29 Self {
30 prefixes,
31 prefix_map,
32 path_info: HashMap::new(),
33 }
34 }
35
36 pub fn add_path(&mut self, path: &Path) {
39 let path_str = path.to_string_lossy().to_string();
40 self.path_info.entry(path_str).or_insert((0, 0));
41 }
42
43 pub fn set_prefixes(&mut self, prefixes: Vec<String>) {
45 self.prefixes = vec!["".to_string()];
46 self.prefix_map = HashMap::new();
47 self.prefix_map.insert("".to_string(), 0);
48
49 for p in prefixes {
50 if p.is_empty() {
51 continue;
52 }
53 let id = self.prefixes.len() as u16;
54 self.prefix_map.insert(p.clone(), id);
55 self.prefixes.push(p);
56 }
57 }
58
59 pub fn get_info(&self, path: &Path) -> (u32, u16) {
60 let path_str = path.to_string_lossy();
61 *self.path_info.get(path_str.as_ref()).unwrap_or(&(0, 0))
62 }
63
64 pub fn serialize<W: Write + Seek>(&mut self, mut w: W) -> std::io::Result<()> {
65 let start_pos = w.stream_position()?;
66
67 w.write_all(&(self.prefixes.len() as u32).to_le_bytes())?;
69
70 for (i, p) in self.prefixes.iter().enumerate() {
72 w.write_all(&(i as u16).to_le_bytes())?;
73 w.write_all(&(p.len() as u16).to_le_bytes())?;
74 w.write_all(p.as_bytes())?;
75 }
76
77 let current = w.stream_position()?;
79 let padding = (4 - (current % 4)) % 4;
80 for _ in 0..padding {
81 w.write_all(&[0])?;
82 }
83
84 let paths: Vec<String> = self.path_info.keys().cloned().collect();
86 for path_str in paths {
87 let offset = (w.stream_position()? - start_pos) as u32;
88
89 let mut best_prefix_id = 0u16;
91 let mut best_prefix_len = 0;
92
93 for (prefix, &id) in &self.prefix_map {
94 if path_str.starts_with(prefix) && prefix.len() > best_prefix_len {
95 best_prefix_id = id;
96 best_prefix_len = prefix.len();
97 }
98 }
99
100 let suffix = &path_str[best_prefix_len..];
101 w.write_all(&best_prefix_id.to_le_bytes())?;
102 w.write_all(&(suffix.len() as u16).to_le_bytes())?;
103 w.write_all(suffix.as_bytes())?;
104
105 self.path_info
106 .insert(path_str.clone(), (offset, path_str.len() as u16));
107 }
108
109 Ok(())
110 }
111}
112
113pub struct StringPoolReader<'a> {
114 data: &'a [u8],
115 prefixes: Vec<&'a [u8]>,
116}
117
118impl<'a> StringPoolReader<'a> {
119 pub fn new(data: &'a [u8]) -> Result<Self> {
120 if data.len() < 4 {
121 return Err(Error::StringPoolOutOfBounds);
122 }
123 let prefix_count = data[0..4].try_into().ok().map(u32::from_le_bytes).unwrap_or(0) as usize;
124 let mut prefixes = Vec::with_capacity(prefix_count);
125 let mut pos = 4;
126
127 for _ in 0..prefix_count {
128 if pos + 4 > data.len() {
129 return Err(Error::StringPoolOutOfBounds);
130 }
131 let _id = data[pos..pos + 2].try_into().ok().map(u16::from_le_bytes).unwrap_or(0);
132 let len = data[pos + 2..pos + 4].try_into().ok().map(u16::from_le_bytes).unwrap_or(0) as usize;
133 pos += 4;
134 if pos + len > data.len() {
135 return Err(Error::StringPoolOutOfBounds);
136 }
137 prefixes.push(&data[pos..pos + len]);
138 pos += len;
139 }
140
141 Ok(Self { data, prefixes })
142 }
143
144 pub fn resolve(&self, offset: u32) -> Result<String> {
145 let pos = offset as usize;
146 if pos + 4 > self.data.len() {
147 return Err(Error::StringPoolOutOfBounds);
148 }
149
150 let prefix_id = self.data[pos..pos + 2].try_into().ok().map(u16::from_le_bytes).unwrap_or(0) as usize;
151 let suffix_len =
152 self.data[pos + 2..pos + 4].try_into().ok().map(u16::from_le_bytes).unwrap_or(0) as usize;
153
154 if prefix_id >= self.prefixes.len() {
155 return Err(Error::StringPoolOutOfBounds);
156 }
157
158 let prefix = self.prefixes[prefix_id];
159 let suffix_pos = pos + 4;
160 if suffix_pos + suffix_len > self.data.len() {
161 return Err(Error::StringPoolOutOfBounds);
162 }
163 let suffix = &self.data[suffix_pos..suffix_pos + suffix_len];
164
165 let mut res = String::with_capacity(prefix.len() + suffix.len());
166 res.push_str(std::str::from_utf8(prefix).map_err(|_| Error::InvalidPath)?);
167 res.push_str(std::str::from_utf8(suffix).map_err(|_| Error::InvalidPath)?);
168
169 Ok(res)
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176 use std::io::Cursor;
177
178 #[test]
179 fn roundtrip() {
180 let mut pool = StringPool::new();
181 pool.set_prefixes(vec!["/home/user/".to_string(), "/var/log/".to_string()]);
182 pool.add_path(Path::new("/home/user/file.rs"));
183 pool.add_path(Path::new("/var/log/syslog"));
184 pool.add_path(Path::new("/other/path"));
185
186 let mut buf = Cursor::new(Vec::new());
187 pool.serialize(&mut buf).unwrap();
188
189 let data = buf.into_inner();
190 let reader = StringPoolReader::new(&data).unwrap();
191
192 let (off1, _) = pool.get_info(Path::new("/home/user/file.rs"));
193 assert_eq!(reader.resolve(off1).unwrap(), "/home/user/file.rs");
194
195 let (off2, _) = pool.get_info(Path::new("/var/log/syslog"));
196 assert_eq!(reader.resolve(off2).unwrap(), "/var/log/syslog");
197
198 let (off3, _) = pool.get_info(Path::new("/other/path"));
199 assert_eq!(reader.resolve(off3).unwrap(), "/other/path");
200 }
201}