Skip to main content

anyback_reader/
archive.rs

1use std::{
2    collections::BTreeSet,
3    fs,
4    io::Read,
5    path::{Path, PathBuf},
6};
7
8use anyhow::{Context, Result, anyhow};
9use serde::Serialize;
10use zip::ZipArchive;
11
12#[derive(Debug, Clone, Serialize)]
13pub struct ArchiveFileEntry {
14    pub path: String,
15    pub bytes: u64,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum ArchiveSourceKind {
20    Directory,
21    Zip,
22}
23
24impl ArchiveSourceKind {
25    pub fn as_str(self) -> &'static str {
26        match self {
27            Self::Directory => "directory",
28            Self::Zip => "zip",
29        }
30    }
31}
32
33#[derive(Debug, Clone)]
34pub struct ArchiveReader {
35    root: PathBuf,
36    source: ArchiveSourceKind,
37}
38
39impl ArchiveReader {
40    pub fn from_path(path: &Path) -> Result<Self> {
41        if path.is_dir() {
42            return Ok(Self {
43                root: path.to_path_buf(),
44                source: ArchiveSourceKind::Directory,
45            });
46        }
47        if path.is_file() {
48            let file = fs::File::open(path)
49                .with_context(|| format!("failed to open archive file {}", path.display()))?;
50            if ZipArchive::new(file).is_ok() {
51                return Ok(Self {
52                    root: path.to_path_buf(),
53                    source: ArchiveSourceKind::Zip,
54                });
55            }
56        }
57        Err(anyhow!(
58            "archive must be a directory or zip file: {}",
59            path.display()
60        ))
61    }
62
63    pub fn source(&self) -> ArchiveSourceKind {
64        self.source
65    }
66
67    pub fn list_files(&self) -> Result<Vec<ArchiveFileEntry>> {
68        let mut entries = Vec::new();
69        match self.source {
70            ArchiveSourceKind::Directory => {
71                let mut stack = vec![self.root.clone()];
72                while let Some(dir) = stack.pop() {
73                    for entry in fs::read_dir(&dir)? {
74                        let entry = entry?;
75                        let path = entry.path();
76                        if path.is_dir() {
77                            stack.push(path);
78                            continue;
79                        }
80                        let rel = path.strip_prefix(&self.root).with_context(|| {
81                            format!("archive file not under root: {}", path.display())
82                        })?;
83                        let meta = entry.metadata()?;
84                        entries.push(ArchiveFileEntry {
85                            path: rel.to_string_lossy().into_owned(),
86                            bytes: meta.len(),
87                        });
88                    }
89                }
90            }
91            ArchiveSourceKind::Zip => {
92                let file = fs::File::open(&self.root)?;
93                let mut zip = ZipArchive::new(file).with_context(|| {
94                    format!("failed to open zip archive {}", self.root.display())
95                })?;
96                for idx in 0..zip.len() {
97                    let entry = zip.by_index(idx)?;
98                    if entry.is_dir() {
99                        continue;
100                    }
101                    entries.push(ArchiveFileEntry {
102                        path: entry.name().to_string(),
103                        bytes: entry.size(),
104                    });
105                }
106            }
107        }
108        entries.sort_by(|a, b| a.path.cmp(&b.path));
109        Ok(entries)
110    }
111
112    pub fn read_bytes(&self, rel_path: &str) -> Result<Vec<u8>> {
113        match self.source {
114            ArchiveSourceKind::Directory => {
115                let path = self.root.join(rel_path);
116                fs::read(&path)
117                    .with_context(|| format!("failed to read archive file {}", path.display()))
118            }
119            ArchiveSourceKind::Zip => {
120                let file = fs::File::open(&self.root)?;
121                let mut zip = ZipArchive::new(file)?;
122                let mut entry = zip
123                    .by_name(rel_path)
124                    .with_context(|| format!("archive entry not found in zip: {rel_path}"))?;
125                let mut out = Vec::new();
126                entry.read_to_end(&mut out)?;
127                Ok(out)
128            }
129        }
130    }
131
132    pub fn read_bytes_if_exists(&self, rel_path: &str) -> Result<Option<Vec<u8>>> {
133        match self.source {
134            ArchiveSourceKind::Directory => {
135                let path = self.root.join(rel_path);
136                if !path.is_file() {
137                    return Ok(None);
138                }
139                let bytes = fs::read(&path)
140                    .with_context(|| format!("failed to read archive file {}", path.display()))?;
141                Ok(Some(bytes))
142            }
143            ArchiveSourceKind::Zip => {
144                let file = fs::File::open(&self.root)?;
145                let mut zip = ZipArchive::new(file)?;
146                let Ok(mut entry) = zip.by_name(rel_path) else {
147                    return Ok(None);
148                };
149                let mut out = Vec::new();
150                entry.read_to_end(&mut out)?;
151                Ok(Some(out))
152            }
153        }
154    }
155}
156
157fn looks_like_content_id(value: &str) -> bool {
158    value.len() >= 20
159        && value.len() <= 128
160        && value.starts_with("bafy")
161        && value
162            .chars()
163            .all(|ch| ch.is_ascii_lowercase() || ch.is_ascii_digit())
164}
165
166pub fn infer_object_id_from_snapshot_path(path: &str) -> Option<String> {
167    let file_name = Path::new(path).file_name().and_then(|s| s.to_str())?;
168    let stem = if let Some(stem) = file_name.strip_suffix(".pb.json") {
169        stem
170    } else if let Some(stem) = file_name.strip_suffix(".pb") {
171        stem
172    } else {
173        return None;
174    };
175    looks_like_content_id(stem).then(|| stem.to_string())
176}
177
178pub fn infer_object_ids_from_files(files: &[ArchiveFileEntry]) -> Vec<String> {
179    let mut ids = BTreeSet::new();
180    for entry in files {
181        let path = Path::new(&entry.path);
182        let under_objects = path
183            .components()
184            .next()
185            .and_then(|c| c.as_os_str().to_str())
186            .is_some_and(|root| root == "objects");
187        if !under_objects {
188            continue;
189        }
190        if let Some(id) = infer_object_id_from_snapshot_path(&entry.path) {
191            ids.insert(id);
192        }
193    }
194    ids.into_iter().collect()
195}
196
197#[cfg(test)]
198mod tests {
199    use std::io::Write;
200
201    use super::*;
202
203    #[test]
204    fn reader_lists_and_reads_directory_archive() {
205        let temp = tempfile::tempdir().unwrap();
206        fs::create_dir_all(temp.path().join("objects")).unwrap();
207        fs::write(temp.path().join("manifest.json"), b"{}").unwrap();
208        fs::write(temp.path().join("objects/obj.pb"), b"payload").unwrap();
209
210        let reader = ArchiveReader::from_path(temp.path()).unwrap();
211        assert_eq!(reader.source(), ArchiveSourceKind::Directory);
212
213        let files = reader.list_files().unwrap();
214        assert!(files.iter().any(|entry| entry.path == "manifest.json"));
215        assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
216        assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
217    }
218
219    #[test]
220    fn reader_lists_and_reads_zip_archive() {
221        let temp = tempfile::tempdir().unwrap();
222        let zip_path = temp.path().join("archive.zip");
223        let file = fs::File::create(&zip_path).unwrap();
224        let mut writer = zip::ZipWriter::new(file);
225        writer
226            .start_file("manifest.json", zip::write::SimpleFileOptions::default())
227            .unwrap();
228        writer.write_all(b"{}").unwrap();
229        writer
230            .start_file("objects/obj.pb", zip::write::SimpleFileOptions::default())
231            .unwrap();
232        writer.write_all(b"payload").unwrap();
233        writer.finish().unwrap();
234
235        let reader = ArchiveReader::from_path(&zip_path).unwrap();
236        assert_eq!(reader.source(), ArchiveSourceKind::Zip);
237
238        let files = reader.list_files().unwrap();
239        assert!(files.iter().any(|entry| entry.path == "manifest.json"));
240        assert!(files.iter().any(|entry| entry.path == "objects/obj.pb"));
241        assert_eq!(reader.read_bytes("objects/obj.pb").unwrap(), b"payload");
242    }
243
244    #[test]
245    fn infer_object_id_accepts_bafy_id_stems() {
246        let id = "bafyreiaebddr63d7sye3eggmtkyeioqxftoaipobsynceksj6faedvd2xi";
247        let path = format!("objects/{id}.pb");
248        assert_eq!(
249            infer_object_id_from_snapshot_path(&path),
250            Some(id.to_string())
251        );
252    }
253}