anyback_reader/
archive.rs1use std::{
2 collections::BTreeSet,
3 fs,
4 io::Read,
5 path::{Path, PathBuf},
6 sync::{Arc, Mutex},
7};
8
9use anyhow::{Context, Result, anyhow};
10use serde::Serialize;
11use zip::ZipArchive;
12
13#[derive(Debug, Clone, Serialize)]
14pub struct ArchiveFileEntry {
15 pub path: String,
16 pub bytes: u64,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ArchiveSourceKind {
21 Directory,
22 Zip,
23}
24
25impl ArchiveSourceKind {
26 pub fn as_str(self) -> &'static str {
27 match self {
28 Self::Directory => "directory",
29 Self::Zip => "zip",
30 }
31 }
32}
33
34#[derive(Clone)]
35pub struct ArchiveReader {
36 root: PathBuf,
37 source: ArchiveSourceKind,
38 zip: Option<ZipReaderState>,
39}
40
41#[derive(Clone)]
42struct ZipReaderState {
43 archive: Arc<Mutex<ZipArchive<fs::File>>>,
44 files: Arc<Vec<ArchiveFileEntry>>,
45}
46
47impl std::fmt::Debug for ArchiveReader {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 f.debug_struct("ArchiveReader")
50 .field("root", &self.root)
51 .field("source", &self.source)
52 .finish_non_exhaustive()
53 }
54}
55
56impl ArchiveReader {
57 pub fn from_path(path: &Path) -> Result<Self> {
58 if path.is_dir() {
59 return Ok(Self {
60 root: path.to_path_buf(),
61 source: ArchiveSourceKind::Directory,
62 zip: None,
63 });
64 }
65 if path.is_file() {
66 let file = fs::File::open(path)
67 .with_context(|| format!("failed to open archive file {}", path.display()))?;
68 if let Ok(mut zip) = ZipArchive::new(file) {
69 let mut files = Vec::new();
70 for idx in 0..zip.len() {
71 let entry = zip.by_index(idx)?;
72 if entry.is_dir() {
73 continue;
74 }
75 files.push(ArchiveFileEntry {
76 path: entry.name().to_string(),
77 bytes: entry.size(),
78 });
79 }
80 files.sort_by(|a, b| a.path.cmp(&b.path));
81 return Ok(Self {
82 root: path.to_path_buf(),
83 source: ArchiveSourceKind::Zip,
84 zip: Some(ZipReaderState {
85 archive: Arc::new(Mutex::new(zip)),
86 files: Arc::new(files),
87 }),
88 });
89 }
90 }
91 Err(anyhow!(
92 "archive must be a directory or zip file: {}",
93 path.display()
94 ))
95 }
96
97 pub fn source(&self) -> ArchiveSourceKind {
98 self.source
99 }
100
101 pub fn list_files(&self) -> Result<Vec<ArchiveFileEntry>> {
102 match self.source {
103 ArchiveSourceKind::Directory => {
104 let mut entries = Vec::new();
105 let mut stack = vec![self.root.clone()];
106 while let Some(dir) = stack.pop() {
107 for entry in fs::read_dir(&dir)? {
108 let entry = entry?;
109 let path = entry.path();
110 if path.is_dir() {
111 stack.push(path);
112 continue;
113 }
114 let rel = path.strip_prefix(&self.root).with_context(|| {
115 format!("archive file not under root: {}", path.display())
116 })?;
117 let meta = entry.metadata()?;
118 entries.push(ArchiveFileEntry {
119 path: rel.to_string_lossy().into_owned(),
120 bytes: meta.len(),
121 });
122 }
123 }
124 entries.sort_by(|a, b| a.path.cmp(&b.path));
125 Ok(entries)
126 }
127 ArchiveSourceKind::Zip => {
128 let state = self.zip_state()?;
129 Ok(state.files.as_ref().clone())
130 }
131 }
132 }
133
134 pub fn read_bytes(&self, rel_path: &str) -> Result<Vec<u8>> {
135 match self.source {
136 ArchiveSourceKind::Directory => {
137 let path = self.root.join(rel_path);
138 fs::read(&path)
139 .with_context(|| format!("failed to read archive file {}", path.display()))
140 }
141 ArchiveSourceKind::Zip => {
142 let state = self.zip_state()?;
143 let mut zip = state
144 .archive
145 .lock()
146 .map_err(|_| anyhow!("zip archive lock poisoned"))?;
147 let mut entry = zip
148 .by_name(rel_path)
149 .with_context(|| format!("archive entry not found in zip: {rel_path}"))?;
150 let mut out = Vec::new();
151 entry.read_to_end(&mut out)?;
152 drop(entry);
153 drop(zip);
154 Ok(out)
155 }
156 }
157 }
158
159 pub fn read_bytes_if_exists(&self, rel_path: &str) -> Result<Option<Vec<u8>>> {
160 match self.source {
161 ArchiveSourceKind::Directory => {
162 let path = self.root.join(rel_path);
163 if !path.is_file() {
164 return Ok(None);
165 }
166 let bytes = fs::read(&path)
167 .with_context(|| format!("failed to read archive file {}", path.display()))?;
168 Ok(Some(bytes))
169 }
170 ArchiveSourceKind::Zip => {
171 let state = self.zip_state()?;
172 let mut zip = state
173 .archive
174 .lock()
175 .map_err(|_| anyhow!("zip archive lock poisoned"))?;
176 let Ok(mut entry) = zip.by_name(rel_path) else {
177 return Ok(None);
178 };
179 let mut out = Vec::new();
180 entry.read_to_end(&mut out)?;
181 drop(entry);
182 drop(zip);
183 Ok(Some(out))
184 }
185 }
186 }
187
188 fn zip_state(&self) -> Result<&ZipReaderState> {
189 self.zip
190 .as_ref()
191 .ok_or_else(|| anyhow!("zip archive state unavailable"))
192 }
193}
194
195fn looks_like_content_id(value: &str) -> bool {
196 value.len() >= 20
197 && value.len() <= 128
198 && value.starts_with("bafy")
199 && value
200 .chars()
201 .all(|ch| ch.is_ascii_lowercase() || ch.is_ascii_digit())
202}
203
204pub fn infer_object_id_from_snapshot_path(path: &str) -> Option<String> {
205 let file_name = Path::new(path).file_name().and_then(|s| s.to_str())?;
206 let stem = if let Some(stem) = file_name.strip_suffix(".pb.json") {
207 stem
208 } else if let Some(stem) = file_name.strip_suffix(".pb") {
209 stem
210 } else {
211 return None;
212 };
213 looks_like_content_id(stem).then(|| stem.to_string())
214}
215
216pub fn infer_object_ids_from_files(files: &[ArchiveFileEntry]) -> Vec<String> {
217 let mut ids = BTreeSet::new();
218 for entry in files {
219 let path = Path::new(&entry.path);
220 let under_objects = path
221 .components()
222 .next()
223 .and_then(|c| c.as_os_str().to_str())
224 .is_some_and(|root| root == "objects");
225 if !under_objects {
226 continue;
227 }
228 if let Some(id) = infer_object_id_from_snapshot_path(&entry.path) {
229 ids.insert(id);
230 }
231 }
232 ids.into_iter().collect()
233}
234
235#[cfg(test)]
236mod tests {
237 use std::io::Write;
238
239 use super::*;
240
241 #[test]
242 fn reader_lists_and_reads_directory_archive() {
243 let temp = tempfile::tempdir().unwrap();
244 fs::create_dir_all(temp.path().join("objects")).unwrap();
245 fs::write(temp.path().join("manifest.json"), b"{}").unwrap();
246 fs::write(temp.path().join("objects/obj.pb"), b"payload").unwrap();
247
248 let reader = ArchiveReader::from_path(temp.path()).unwrap();
249 assert_eq!(reader.source(), ArchiveSourceKind::Directory);
250
251 let files = reader.list_files().unwrap();
252 assert!(files.iter().any(|entry| entry.path == "manifest.json"));
253 assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
254 assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
255 }
256
257 #[test]
258 fn reader_lists_and_reads_zip_archive() {
259 let temp = tempfile::tempdir().unwrap();
260 let zip_path = temp.path().join("archive.zip");
261 let file = fs::File::create(&zip_path).unwrap();
262 let mut writer = zip::ZipWriter::new(file);
263 writer
264 .start_file("manifest.json", zip::write::SimpleFileOptions::default())
265 .unwrap();
266 writer.write_all(b"{}").unwrap();
267 writer
268 .start_file("objects/obj.pb", zip::write::SimpleFileOptions::default())
269 .unwrap();
270 writer.write_all(b"payload").unwrap();
271 writer.finish().unwrap();
272
273 let reader = ArchiveReader::from_path(&zip_path).unwrap();
274 assert_eq!(reader.source(), ArchiveSourceKind::Zip);
275
276 let files = reader.list_files().unwrap();
277 assert!(files.iter().any(|entry| entry.path == "manifest.json"));
278 assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
279 assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
280 }
281
282 #[test]
283 fn infer_object_id_accepts_bafy_id_stems() {
284 let id = "bafyreiaebddr63d7sye3eggmtkyeioqxftoaipobsynceksj6faedvd2xi";
285 let path = format!("objects/{id}.pb");
286 assert_eq!(
287 infer_object_id_from_snapshot_path(&path),
288 Some(id.to_string())
289 );
290 }
291}