1use crate::filesystem::{FileEntry, Filesystem, FilesystemEntry};
2use crate::integrity::FileIntegrity;
3use crate::path_validation::ensure_within;
4use crate::pickle::Pickle;
5use indexmap::IndexMap;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::io::{Read, Seek, Write};
10use std::path::{Path, PathBuf};
11use std::sync::{Arc, LazyLock, RwLock};
12
13static FILESYSTEM_CACHE: LazyLock<RwLock<HashMap<PathBuf, Arc<Filesystem>>>> =
14 LazyLock::new(|| RwLock::new(HashMap::new()));
15
16use thiserror::Error;
17
18#[derive(Error, Debug)]
20pub enum AsarError {
21 #[error("I/O error: {0}")]
22 Io(#[from] std::io::Error),
23 #[error("Invalid archive header: {0}")]
24 HeaderValidation(String),
25 #[error("{0}")]
26 NotFound(String),
27 #[error("Path traversal: {0}")]
28 PathTraversal(String),
29 #[error("Circular symlink detected: {0}")]
30 CircularSymlink(String),
31 #[error("Too many levels of symbolic links")]
32 SymlinkDepth,
33 #[error("{path}: file size can not be larger than 4.2GB")]
34 FileTooLarge { path: String },
35 #[error("{0}")]
36 Other(String),
37}
38
39#[derive(Debug, Serialize, Deserialize)]
41pub struct HeaderIntegrity {
42 pub algorithm: String,
43 pub hash: String,
44 #[serde(rename = "blockSize")]
45 pub block_size: usize,
46 pub blocks: Vec<String>,
47}
48
49#[derive(Debug, Serialize, Deserialize)]
53#[serde(untagged)]
54pub enum HeaderEntry {
55 File {
56 #[serde(default)]
57 offset: Option<String>,
58 size: u64,
59 #[serde(skip_serializing_if = "Option::is_none")]
60 executable: Option<bool>,
61 #[serde(skip_serializing_if = "Option::is_none")]
62 unpacked: Option<bool>,
63 #[serde(skip_serializing_if = "Option::is_none")]
64 integrity: Option<HeaderIntegrity>,
65 },
66 Directory {
67 files: IndexMap<String, HeaderEntry>,
68 #[serde(skip_serializing_if = "Option::is_none")]
69 unpacked: Option<bool>,
70 },
71 Link {
72 link: String,
73 #[serde(skip_serializing_if = "Option::is_none")]
74 unpacked: Option<bool>,
75 },
76}
77
78#[derive(Debug)]
80pub struct ArchiveHeader {
81 pub header: HeaderEntry,
82 pub header_string: String,
83 pub header_size: u32,
84}
85
86pub fn read_archive_header_sync(archive_path: &Path) -> Result<ArchiveHeader, AsarError> {
87 let mut file = fs::File::open(archive_path)?;
88 let archive_size = file.metadata()?.len();
89
90 let mut size_buf = [0u8; 8];
91 file.read_exact(&mut size_buf)?;
92
93 let size_pickle = Pickle::from_buffer(&size_buf);
94 let mut size_iter = size_pickle.iter();
95 let size = size_iter.read_u32();
96
97 if size as u64 > archive_size.saturating_sub(8) {
98 return Err(AsarError::HeaderValidation(format!(
99 "Header size {} exceeds archive size {}. The archive is corrupted.",
100 size, archive_size
101 )));
102 }
103
104 let mut header_buf = vec![0u8; size as usize];
105 file.read_exact(&mut header_buf)?;
106
107 let header_pickle = Pickle::from_buffer(&header_buf);
108 let mut header_iter = header_pickle.iter();
109 let header_string = header_iter.read_string();
110
111 let parsed_header: HeaderEntry = serde_json::from_str(&header_string)
112 .map_err(|e| AsarError::HeaderValidation(e.to_string()))?;
113
114 validate_header(&parsed_header)?;
115
116 Ok(ArchiveHeader {
117 header: parsed_header,
118 header_string,
119 header_size: size,
120 })
121}
122
123fn validate_header(header: &HeaderEntry) -> Result<(), AsarError> {
124 match header {
125 HeaderEntry::Directory { .. } => Ok(()),
126 _ => Err(AsarError::HeaderValidation(
127 "root header must be a directory".to_string(),
128 )),
129 }
130}
131
132impl TryFrom<&HeaderEntry> for FilesystemEntry {
133 type Error = AsarError;
134 fn try_from(entry: &HeaderEntry) -> Result<Self, AsarError> {
135 match entry {
136 HeaderEntry::File {
137 offset,
138 size,
139 executable,
140 unpacked,
141 integrity,
142 } => {
143 let integ = integrity.as_ref().map(|i| FileIntegrity {
144 algorithm: i.algorithm.clone(),
145 hash: i.hash.clone(),
146 block_size: i.block_size,
147 blocks: i.blocks.clone(),
148 });
149 Ok(FilesystemEntry::File(FileEntry {
150 offset: offset.clone().unwrap_or_else(|| "0".to_string()),
151 size: *size,
152 executable: executable.unwrap_or(false),
153 unpacked: unpacked.unwrap_or(false),
154 integrity: integ,
155 }))
156 }
157 HeaderEntry::Directory { files, unpacked } => {
158 let mut map = IndexMap::new();
159 for (name, child) in files {
160 map.insert(name.clone(), FilesystemEntry::try_from(child)?);
161 }
162 Ok(FilesystemEntry::Directory(
163 crate::filesystem::DirectoryEntry {
164 files: map,
165 unpacked: unpacked.unwrap_or(false),
166 },
167 ))
168 }
169 HeaderEntry::Link { link, unpacked } => {
170 Ok(FilesystemEntry::Link(crate::filesystem::LinkEntry {
171 link: link.clone(),
172 unpacked: unpacked.unwrap_or(false),
173 }))
174 }
175 }
176 }
177}
178
179pub fn read_filesystem_sync(archive_path: &Path) -> Result<Arc<Filesystem>, AsarError> {
180 {
181 let cache = FILESYSTEM_CACHE.read().unwrap();
182 if let Some(fs) = cache.get(archive_path) {
183 return Ok(Arc::clone(fs));
184 }
185 }
186
187 let archive_header = read_archive_header_sync(archive_path)?;
188 let root_entry = FilesystemEntry::try_from(&archive_header.header)?;
189
190 let path_buf = archive_path.to_path_buf();
191 let mut fs = Filesystem::new(&path_buf);
192 fs.set_header(root_entry, archive_header.header_size);
193
194 let arc = Arc::new(fs);
195 let mut cache = FILESYSTEM_CACHE.write().unwrap();
196 cache.insert(path_buf, Arc::clone(&arc));
197 Ok(arc)
198}
199
200impl From<&FilesystemEntry> for HeaderEntry {
201 fn from(entry: &FilesystemEntry) -> Self {
202 match entry {
203 FilesystemEntry::File(f) => HeaderEntry::File {
204 offset: if f.unpacked { None } else { Some(f.offset.clone()) },
205 size: f.size,
206 executable: if f.executable { Some(true) } else { None },
207 unpacked: if f.unpacked { Some(true) } else { None },
208 integrity: f.integrity.as_ref().map(|i| HeaderIntegrity {
209 algorithm: i.algorithm.clone(),
210 hash: i.hash.clone(),
211 block_size: i.block_size,
212 blocks: i.blocks.clone(),
213 }),
214 },
215 FilesystemEntry::Directory(d) => {
216 let mut files = IndexMap::new();
217 for (name, child) in &d.files {
218 files.insert(name.clone(), HeaderEntry::from(child));
219 }
220 HeaderEntry::Directory {
221 files,
222 unpacked: if d.unpacked { Some(true) } else { None },
223 }
224 }
225 FilesystemEntry::Link(l) => HeaderEntry::Link {
226 link: l.link.clone(),
227 unpacked: if l.unpacked { Some(true) } else { None },
228 },
229 }
230 }
231}
232
233pub fn write_filesystem(
234 dest: &Path,
235 filesystem: &Filesystem,
236 files: &[(PathBuf, bool)],
237 _metadata: &HashMap<PathBuf, crate::crawlfs::FileMetadata>,
238) -> Result<(), AsarError> {
239 if let Some(parent) = dest.parent() {
240 fs::create_dir_all(parent)?;
241 }
242
243 let header_entry = HeaderEntry::from(filesystem.get_header());
244 let header_json =
245 serde_json::to_string(&header_entry).map_err(|e| AsarError::Other(e.to_string()))?;
246
247 let mut header_pickle = Pickle::new();
248 header_pickle.write_string(&header_json);
249 let header_buf = header_pickle.into_buffer();
250
251 let mut size_pickle = Pickle::new();
252 size_pickle.write_u32(header_buf.len() as u32);
253 let size_buf = size_pickle.into_buffer();
254
255 let mut out = fs::File::create(dest)?;
256 out.write_all(&size_buf)?;
257 out.write_all(&header_buf)?;
258
259 let unpacked_base = format!("{}.unpacked", dest.display());
260 let unpacked_path = Path::new(&unpacked_base);
261
262 for (filepath, unpack) in files {
263 if *unpack {
264 let relative = filepath
265 .strip_prefix(filesystem.root_path())
266 .unwrap_or(filepath);
267 let target = unpacked_path.join(relative);
268 if let Some(parent) = target.parent() {
269 fs::create_dir_all(parent)?;
270 }
271 fs::copy(filepath, &target)?;
272 } else {
273 let mut source = fs::File::open(filepath)?;
274 std::io::copy(&mut source, &mut out)?;
275 }
276 }
277
278 Ok(())
279}
280
281pub fn read_file_sync(
282 filesystem: &Filesystem,
283 filename: &str,
284 info: &FileEntry,
285) -> Result<Vec<u8>, AsarError> {
286 let mut file = fs::File::open(filesystem.root_path())?;
287 read_file_with_fd(&mut file, filesystem, filename, info)
288}
289
290pub fn read_file_with_fd(
291 file: &mut std::fs::File,
292 filesystem: &Filesystem,
293 filename: &str,
294 info: &FileEntry,
295) -> Result<Vec<u8>, AsarError> {
296 if info.size == 0 {
297 return Ok(Vec::new());
298 }
299
300 if info.unpacked {
301 let unpacked_dir = format!("{}.unpacked", filesystem.root_path().display());
302 return Ok(fs::read(
303 ensure_within(Path::new(&unpacked_dir), filename)?,
304 )?);
305 }
306
307 let file_offset: u64 = info
308 .offset
309 .parse()
310 .map_err(|_| AsarError::Other(format!("Invalid offset: {}", info.offset)))?;
311
312 let offset = 8 + filesystem.header_size() as u64 + file_offset;
313
314 let archive_size = file.metadata()?.len();
315
316 if offset
317 .checked_add(info.size)
318 .is_none_or(|end| end > archive_size)
319 {
320 return Err(AsarError::Other(format!(
321 "File entry extends beyond archive boundary (offset={}, size={}, archiveSize={})",
322 offset, info.size, archive_size
323 )));
324 }
325
326 file.seek(std::io::SeekFrom::Start(offset))?;
327 let size = usize::try_from(info.size).map_err(|_| AsarError::Other("size overflow".into()))?;
328 let mut buffer = vec![0u8; size];
329 file.read_exact(&mut buffer)?;
330 Ok(buffer)
331}
332
333pub fn uncache_filesystem(archive_path: &Path) -> bool {
334 let mut cache = FILESYSTEM_CACHE.write().unwrap();
335 cache.remove(archive_path).is_some()
336}
337
338pub fn uncache_all() {
339 let mut cache = FILESYSTEM_CACHE.write().unwrap();
340 cache.clear();
341}