anyback_reader/
archive.rs1use std::{
2 collections::BTreeSet,
3 fs,
4 io::Read,
5 path::{Path, PathBuf},
6};
7
8use anyhow::{Context, Result, anyhow};
9use serde::Serialize;
10use zip::ZipArchive;
11
12#[derive(Debug, Clone, Serialize)]
13pub struct ArchiveFileEntry {
14 pub path: String,
15 pub bytes: u64,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum ArchiveSourceKind {
20 Directory,
21 Zip,
22}
23
24impl ArchiveSourceKind {
25 pub fn as_str(self) -> &'static str {
26 match self {
27 Self::Directory => "directory",
28 Self::Zip => "zip",
29 }
30 }
31}
32
33#[derive(Debug, Clone)]
34pub struct ArchiveReader {
35 root: PathBuf,
36 source: ArchiveSourceKind,
37}
38
39impl ArchiveReader {
40 pub fn from_path(path: &Path) -> Result<Self> {
41 if path.is_dir() {
42 return Ok(Self {
43 root: path.to_path_buf(),
44 source: ArchiveSourceKind::Directory,
45 });
46 }
47 if path.is_file() {
48 let file = fs::File::open(path)
49 .with_context(|| format!("failed to open archive file {}", path.display()))?;
50 if ZipArchive::new(file).is_ok() {
51 return Ok(Self {
52 root: path.to_path_buf(),
53 source: ArchiveSourceKind::Zip,
54 });
55 }
56 }
57 Err(anyhow!(
58 "archive must be a directory or zip file: {}",
59 path.display()
60 ))
61 }
62
63 pub fn source(&self) -> ArchiveSourceKind {
64 self.source
65 }
66
67 pub fn list_files(&self) -> Result<Vec<ArchiveFileEntry>> {
68 let mut entries = Vec::new();
69 match self.source {
70 ArchiveSourceKind::Directory => {
71 let mut stack = vec![self.root.clone()];
72 while let Some(dir) = stack.pop() {
73 for entry in fs::read_dir(&dir)? {
74 let entry = entry?;
75 let path = entry.path();
76 if path.is_dir() {
77 stack.push(path);
78 continue;
79 }
80 let rel = path.strip_prefix(&self.root).with_context(|| {
81 format!("archive file not under root: {}", path.display())
82 })?;
83 let meta = entry.metadata()?;
84 entries.push(ArchiveFileEntry {
85 path: rel.to_string_lossy().into_owned(),
86 bytes: meta.len(),
87 });
88 }
89 }
90 }
91 ArchiveSourceKind::Zip => {
92 let file = fs::File::open(&self.root)?;
93 let mut zip = ZipArchive::new(file).with_context(|| {
94 format!("failed to open zip archive {}", self.root.display())
95 })?;
96 for idx in 0..zip.len() {
97 let entry = zip.by_index(idx)?;
98 if entry.is_dir() {
99 continue;
100 }
101 entries.push(ArchiveFileEntry {
102 path: entry.name().to_string(),
103 bytes: entry.size(),
104 });
105 }
106 }
107 }
108 entries.sort_by(|a, b| a.path.cmp(&b.path));
109 Ok(entries)
110 }
111
112 pub fn read_bytes(&self, rel_path: &str) -> Result<Vec<u8>> {
113 match self.source {
114 ArchiveSourceKind::Directory => {
115 let path = self.root.join(rel_path);
116 fs::read(&path)
117 .with_context(|| format!("failed to read archive file {}", path.display()))
118 }
119 ArchiveSourceKind::Zip => {
120 let file = fs::File::open(&self.root)?;
121 let mut zip = ZipArchive::new(file)?;
122 let mut entry = zip
123 .by_name(rel_path)
124 .with_context(|| format!("archive entry not found in zip: {rel_path}"))?;
125 let mut out = Vec::new();
126 entry.read_to_end(&mut out)?;
127 Ok(out)
128 }
129 }
130 }
131
132 pub fn read_bytes_if_exists(&self, rel_path: &str) -> Result<Option<Vec<u8>>> {
133 match self.source {
134 ArchiveSourceKind::Directory => {
135 let path = self.root.join(rel_path);
136 if !path.is_file() {
137 return Ok(None);
138 }
139 let bytes = fs::read(&path)
140 .with_context(|| format!("failed to read archive file {}", path.display()))?;
141 Ok(Some(bytes))
142 }
143 ArchiveSourceKind::Zip => {
144 let file = fs::File::open(&self.root)?;
145 let mut zip = ZipArchive::new(file)?;
146 let Ok(mut entry) = zip.by_name(rel_path) else {
147 return Ok(None);
148 };
149 let mut out = Vec::new();
150 entry.read_to_end(&mut out)?;
151 Ok(Some(out))
152 }
153 }
154 }
155}
156
157fn looks_like_content_id(value: &str) -> bool {
158 value.len() >= 20
159 && value.len() <= 128
160 && value.starts_with("bafy")
161 && value
162 .chars()
163 .all(|ch| ch.is_ascii_lowercase() || ch.is_ascii_digit())
164}
165
166pub fn infer_object_id_from_snapshot_path(path: &str) -> Option<String> {
167 let file_name = Path::new(path).file_name().and_then(|s| s.to_str())?;
168 let stem = if let Some(stem) = file_name.strip_suffix(".pb.json") {
169 stem
170 } else if let Some(stem) = file_name.strip_suffix(".pb") {
171 stem
172 } else {
173 return None;
174 };
175 looks_like_content_id(stem).then(|| stem.to_string())
176}
177
178pub fn infer_object_ids_from_files(files: &[ArchiveFileEntry]) -> Vec<String> {
179 let mut ids = BTreeSet::new();
180 for entry in files {
181 let path = Path::new(&entry.path);
182 let under_objects = path
183 .components()
184 .next()
185 .and_then(|c| c.as_os_str().to_str())
186 .is_some_and(|root| root == "objects");
187 if !under_objects {
188 continue;
189 }
190 if let Some(id) = infer_object_id_from_snapshot_path(&entry.path) {
191 ids.insert(id);
192 }
193 }
194 ids.into_iter().collect()
195}
196
197#[cfg(test)]
198mod tests {
199 use std::io::Write;
200
201 use super::*;
202
203 #[test]
204 fn reader_lists_and_reads_directory_archive() {
205 let temp = tempfile::tempdir().unwrap();
206 fs::create_dir_all(temp.path().join("objects")).unwrap();
207 fs::write(temp.path().join("manifest.json"), b"{}").unwrap();
208 fs::write(temp.path().join("objects/obj.pb"), b"payload").unwrap();
209
210 let reader = ArchiveReader::from_path(temp.path()).unwrap();
211 assert_eq!(reader.source(), ArchiveSourceKind::Directory);
212
213 let files = reader.list_files().unwrap();
214 assert!(files.iter().any(|entry| entry.path == "manifest.json"));
215 assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
216 assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
217 }
218
219 #[test]
220 fn reader_lists_and_reads_zip_archive() {
221 let temp = tempfile::tempdir().unwrap();
222 let zip_path = temp.path().join("archive.zip");
223 let file = fs::File::create(&zip_path).unwrap();
224 let mut writer = zip::ZipWriter::new(file);
225 writer
226 .start_file("manifest.json", zip::write::SimpleFileOptions::default())
227 .unwrap();
228 writer.write_all(b"{}").unwrap();
229 writer
230 .start_file("objects/obj.pb", zip::write::SimpleFileOptions::default())
231 .unwrap();
232 writer.write_all(b"payload").unwrap();
233 writer.finish().unwrap();
234
235 let reader = ArchiveReader::from_path(&zip_path).unwrap();
236 assert_eq!(reader.source(), ArchiveSourceKind::Zip);
237
238 let files = reader.list_files().unwrap();
239 assert!(files.iter().any(|entry| entry.path == "manifest.json"));
240 assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
241 assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
242 }
243
244 #[test]
245 fn infer_object_id_accepts_bafy_id_stems() {
246 let id = "bafyreiaebddr63d7sye3eggmtkyeioqxftoaipobsynceksj6faedvd2xi";
247 let path = format!("objects/{id}.pb");
248 assert_eq!(
249 infer_object_id_from_snapshot_path(&path),
250 Some(id.to_string())
251 );
252 }
253}