Skip to main content

shard_core/store/
flat.rs

1use anyhow::Result;
2use std::fs;
3use std::io::{BufRead, Write};
4use std::path::{Path, PathBuf};
5use std::sync::Mutex;
6
7pub struct FlatStore {
8    root: PathBuf,
9    lock: Mutex<()>,
10}
11
12impl FlatStore {
13    pub fn new(root: &Path) -> Self {
14        Self {
15            root: root.to_path_buf(),
16            lock: Mutex::new(()),
17        }
18    }
19
20    fn index_path(&self) -> PathBuf {
21        self.root.join("objects.idx")
22    }
23
24    fn objects_dir(&self) -> PathBuf {
25        self.root.join("objects")
26    }
27
28    fn append_index(&self, hash_hex: &str) -> Result<()> {
29        let path = self.index_path();
30        let mut file = fs::OpenOptions::new()
31            .create(true)
32            .append(true)
33            .open(&path)?;
34        writeln!(file, "{}", hash_hex)?;
35        Ok(())
36    }
37
38    fn scan_and_index(&self) -> Result<Vec<(String, String)>> {
39        let objects_dir = self.objects_dir();
40        let mut entries = Vec::new();
41        if objects_dir.exists() {
42            for entry in fs::read_dir(&objects_dir)? {
43                let entry = entry?;
44                let prefix_dir = entry.path();
45                let prefix_name = entry.file_name().to_string_lossy().to_string();
46                if prefix_dir.is_dir() {
47                    for file_entry in fs::read_dir(&prefix_dir)? {
48                        let file_entry = file_entry?;
49                        let hash = file_entry.file_name().to_string_lossy().to_string();
50                        let rel_path = format!("{}/{}", prefix_name, hash);
51                        entries.push((hash, rel_path));
52                    }
53                }
54            }
55        }
56        let path = self.index_path();
57        let mut file = fs::OpenOptions::new()
58            .create(true)
59            .truncate(true)
60            .write(true)
61            .open(&path)?;
62        for (hash, _) in &entries {
63            writeln!(file, "{}", hash)?;
64        }
65        Ok(entries)
66    }
67
68    pub fn put_chunk(&self, chunk: &crate::chunker::Chunk) -> Result<()> {
69        let _guard = self.lock.lock().unwrap();
70        let hash_hex = chunk.hash.to_hex().to_string();
71        let prefix = hash_hex.get(..2).unwrap_or("xx");
72        let dir = self.objects_dir().join(prefix);
73        fs::create_dir_all(&dir)?;
74        let path = dir.join(&hash_hex);
75        if !path.exists() {
76            fs::write(path, &chunk.data)?;
77        }
78        self.append_index(&hash_hex)?;
79        Ok(())
80    }
81
82    pub fn get_chunk(&self, hash_hex: &str) -> Result<Vec<u8>> {
83        if hash_hex.len() < 2 {
84            anyhow::bail!("Invalid hash: {}", hash_hex);
85        }
86        let path = self.objects_dir().join(&hash_hex[..2]).join(hash_hex);
87        if !path.exists() {
88            anyhow::bail!("Chunk not found: {}", hash_hex);
89        }
90        Ok(fs::read(path)?)
91    }
92
93    pub fn has_chunk(&self, hash_hex: &str) -> bool {
94        if hash_hex.len() < 2 {
95            return false;
96        }
97        self.objects_dir()
98            .join(&hash_hex[..2])
99            .join(hash_hex)
100            .exists()
101    }
102
103    pub fn iter_chunks(&self) -> Result<Vec<(String, String)>> {
104        let _guard = self.lock.lock().unwrap();
105        let idx_path = self.index_path();
106        if idx_path.exists() {
107            let file = fs::File::open(&idx_path)?;
108            let mut entries = Vec::new();
109            for line in std::io::BufReader::new(file).lines() {
110                let h = line?.trim().to_string();
111                if !h.is_empty() {
112                    let prefix = h.get(..2).unwrap_or("xx");
113                    entries.push((h.clone(), format!("{}/{}", prefix, h)));
114                }
115            }
116            let file_count = count_object_files(&self.objects_dir());
117            if entries.len() >= file_count {
118                return Ok(entries);
119            }
120        }
121        self.scan_and_index()
122    }
123
124    pub fn delete_chunk(&self, hash_hex: &str, full_path: Option<&str>) -> Result<()> {
125        let _guard = self.lock.lock().unwrap();
126        let path = if let Some(fp) = full_path {
127            self.objects_dir().join(fp)
128        } else {
129            if hash_hex.len() < 2 {
130                anyhow::bail!("Invalid hash: {}", hash_hex);
131            }
132            self.objects_dir().join(&hash_hex[..2]).join(hash_hex)
133        };
134        if path.exists() {
135            fs::remove_file(path)?;
136        }
137        let idx_path = self.index_path();
138        if idx_path.exists() {
139            let entries: Vec<String> = {
140                let file = fs::File::open(&idx_path)?;
141                std::io::BufReader::new(file)
142                    .lines()
143                    .map_while(Result::ok)
144                    .map(|l| l.trim().to_string())
145                    .filter(|h| !h.is_empty() && h != hash_hex)
146                    .collect()
147            };
148            let mut file = fs::OpenOptions::new()
149                .create(true)
150                .truncate(true)
151                .write(true)
152                .open(&idx_path)?;
153            for h in &entries {
154                writeln!(file, "{}", h)?;
155            }
156        }
157        Ok(())
158    }
159}
160
161fn count_object_files(objects_dir: &Path) -> usize {
162    let mut count = 0;
163    if let Ok(dir) = fs::read_dir(objects_dir) {
164        for entry in dir.flatten() {
165            if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
166                if let Ok(sub) = fs::read_dir(entry.path()) {
167                    count += sub.count();
168                }
169            }
170        }
171    }
172    count
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use crate::chunker::Chunk;
179    use tempfile::tempdir;
180
181    fn fake_chunk(data: &[u8]) -> Chunk {
182        let hash = blake3::hash(data);
183        Chunk {
184            hash,
185            data: data.to_vec(),
186            offset: 0,
187        }
188    }
189
190    #[test]
191    fn test_flat_put_get_roundtrip() {
192        let dir = tempdir().unwrap();
193        let store = FlatStore::new(dir.path());
194        let chunk = fake_chunk(b"hello flat store");
195        store.put_chunk(&chunk).unwrap();
196        let hash_hex = chunk.hash.to_hex().to_string();
197        assert!(store.has_chunk(&hash_hex));
198        let retrieved = store.get_chunk(&hash_hex).unwrap();
199        assert_eq!(retrieved, b"hello flat store");
200    }
201
202    #[test]
203    fn test_flat_get_nonexistent() {
204        let dir = tempdir().unwrap();
205        let store = FlatStore::new(dir.path());
206        let result = store.get_chunk("abcdef");
207        assert!(result.is_err());
208    }
209
210    #[test]
211    fn test_flat_has_nonexistent() {
212        let dir = tempdir().unwrap();
213        let store = FlatStore::new(dir.path());
214        assert!(!store.has_chunk("ab"));
215        assert!(!store.has_chunk(""));
216    }
217
218    #[test]
219    fn test_flat_delete_chunk() {
220        let dir = tempdir().unwrap();
221        let store = FlatStore::new(dir.path());
222        let chunk = fake_chunk(b"delete me");
223        store.put_chunk(&chunk).unwrap();
224        let hash_hex = chunk.hash.to_hex().to_string();
225        assert!(store.has_chunk(&hash_hex));
226        store.delete_chunk(&hash_hex, None).unwrap();
227        assert!(!store.has_chunk(&hash_hex));
228    }
229
230    #[test]
231    fn test_flat_iter_chunks() {
232        let dir = tempdir().unwrap();
233        let store = FlatStore::new(dir.path());
234        let chunks = vec![
235            fake_chunk(b"chunk a"),
236            fake_chunk(b"chunk b"),
237            fake_chunk(b"chunk c"),
238        ];
239        for c in &chunks {
240            store.put_chunk(c).unwrap();
241        }
242        let entries = store.iter_chunks().unwrap();
243        assert_eq!(entries.len(), 3);
244        for c in &chunks {
245            let h = c.hash.to_hex().to_string();
246            assert!(entries.iter().any(|(hash, _)| hash == &h));
247        }
248    }
249
250    #[test]
251    fn test_flat_put_idempotent() {
252        let dir = tempdir().unwrap();
253        let store = FlatStore::new(dir.path());
254        let chunk = fake_chunk(b"idempotent");
255        store.put_chunk(&chunk).unwrap();
256        store.put_chunk(&chunk).unwrap();
257        // Object file exists only once (deduped on disk)
258        let hash_hex = chunk.hash.to_hex().to_string();
259        let path = dir
260            .path()
261            .join("objects")
262            .join(&hash_hex[..2])
263            .join(&hash_hex);
264        assert!(path.exists());
265        // get_chunk works
266        let retrieved = store.get_chunk(&hash_hex).unwrap();
267        assert_eq!(retrieved, b"idempotent");
268    }
269}