diskscan/
tree.rs

1use std::collections::HashMap;
2
3use byte_unit::Byte;
4
5use crate::arena::{Arena, Id};
6use crate::entry::DirEntry;
7use crate::path::{EntryPath, PathCrc};
8use crate::tree_snapshot::FilesRetrieverFn;
9use crate::{EntrySnapshot, SnapshotConfig, TreeSnapshot};
10
11#[derive(Clone, Debug)]
12pub struct Stats {
13    pub used_size: Byte,
14    pub files: u64,
15    pub dirs: u64,
16}
17
18#[derive(Debug)]
19pub struct FileTree {
20    /// Root of file tree
21    root: Id,
22
23    /// Arena where all entries are actually stored
24    arena: Arena<DirEntry>,
25
26    /// Map of all file entries
27    ///
28    /// Key is CRC of entry path and value is all entries with same crc
29    entries: HashMap<PathCrc, Vec<Id>>,
30
31    files: u64,
32    dirs: u64,
33}
34
35impl FileTree {
36    pub fn find_child(&self, parent_id: Id, name: &str, path_crc: PathCrc) -> Option<Id> {
37        let ids = self.entries.get(&path_crc)?;
38
39        ids.iter()
40            .find(|&&id| {
41                let child = self.arena.get(id);
42                child.get_parent() == Some(parent_id) && child.get_name() == name
43            })
44            .copied()
45    }
46
47    pub fn find_entry(&self, path: &EntryPath) -> Option<Id> {
48        let root = self.arena.get(self.root);
49
50        //todo can store in field and not create each time
51        let root_path = root.get_path(&self.arena);
52
53        if &root_path == path {
54            Some(self.root)
55        } else if &root_path < path {
56            let crc = path.get_crc();
57            let ids = self.entries.get(&crc)?;
58
59            ids.iter()
60                .find(|&&id| self.arena.get(id).compare_path(&self.arena, path))
61                .copied()
62        } else {
63            None
64        }
65    }
66
67    pub fn get_arena(&self) -> &Arena<DirEntry> {
68        &self.arena
69    }
70
71    pub fn get_root(&self) -> &DirEntry {
72        self.arena.get(self.root)
73    }
74
75    pub fn make_snapshot(
76        &self,
77        root: &EntryPath,
78        config: SnapshotConfig,
79        files_getter: &FilesRetrieverFn,
80    ) -> Option<TreeSnapshot<EntrySnapshot>> {
81        self.make_snapshot_wrapped(root, config, &std::convert::identity, files_getter)
82    }
83
84    pub fn make_snapshot_wrapped<W>(
85        &self,
86        root: &EntryPath,
87        config: SnapshotConfig,
88        wrapper: &dyn Fn(EntrySnapshot) -> W,
89        files_getter: &FilesRetrieverFn,
90    ) -> Option<TreeSnapshot<W>>
91    where
92        W: AsRef<EntrySnapshot> + AsMut<EntrySnapshot>,
93    {
94        let root = self.find_entry(root)?;
95        Some(TreeSnapshot::create_wrapped(
96            root,
97            &self.arena,
98            config,
99            wrapper,
100            files_getter,
101        ))
102    }
103
104    /// Creates new [`FileTree`] rooted at specified path
105    pub fn new(path: String) -> Self {
106        let mut arena = Arena::default();
107
108        let root = arena.put(DirEntry::new_dir(path));
109        FileTree {
110            root,
111            arena,
112            entries: HashMap::new(),
113            files: 0,
114            dirs: 0,
115        }
116    }
117
118    /// Sets children for specified path
119    ///
120    /// All existing directories at path, if not present in given vec, are removed (recursively)
121    /// Updates number of files at given path and their total size
122    /// All new directories are returned
123    pub fn set_children(
124        &mut self,
125        path: &EntryPath,
126        directories: Vec<DirEntry>,
127        file_count: u64,
128        files_size: i64,
129    ) -> Option<Vec<String>> {
130        let parent_id = self.find_entry(path)?;
131        //todo probably can increase speed by presorting children
132        // and inserting them in bulk
133        let mut new_dirs = vec![];
134
135        let (mut deleted_dirs, dirs_size) = DirEntry::mark_children(&mut self.arena, parent_id);
136        // updated total file count
137        self.files -= self.arena.get(parent_id).get_files() as u64;
138        self.files += file_count;
139        self.arena.get_mut(parent_id).set_files(file_count as u32);
140        DirEntry::set_size(&mut self.arena, parent_id, dirs_size + files_size);
141
142        let has_children = deleted_dirs > 0;
143        let parent_crc = self.arena.get(parent_id).path_crc();
144        for dir in directories {
145            if has_children {
146                let existing =
147                    self.find_child(parent_id, dir.get_name(), parent_crc ^ dir.path_crc());
148
149                if let Some(existing) = existing {
150                    let child = self.arena.get_mut(existing);
151                    child.unmark();
152                    deleted_dirs -= 1;
153                    // size of dir is sum of children sizes, so nothing to do here
154                    continue;
155                }
156            }
157
158            // entry was not found, add it
159            let child_id = self.arena.put(dir);
160            DirEntry::add_child(&mut self.arena, parent_id, child_id);
161
162            let child = self.arena.get(child_id);
163            self.dirs += 1;
164            new_dirs.push(child.get_name().to_string());
165            // store new entry in path crc map
166            self.entries
167                .entry(child.path_crc())
168                .or_default()
169                .push(child_id);
170        }
171
172        if has_children {
173            let removed = DirEntry::remove_marked(&mut self.arena, parent_id, deleted_dirs);
174            self.cleanup_removed(removed);
175        }
176
177        Some(new_dirs)
178    }
179
180    /// Return size of tree (number of files and dirs)
181    pub fn stats(&self) -> Stats {
182        Stats {
183            files: self.files,
184            dirs: self.dirs,
185            used_size: Byte::from_bytes(self.arena.get(self.root).get_size() as u64),
186        }
187    }
188
189    /// Cleans up removed ids recursively
190    fn cleanup_removed(&mut self, entries: Vec<Id>) {
191        self.dirs -= entries.len() as u64;
192        for id in entries {
193            // remove entry from index
194            let path_crc = self.arena.get(id).path_crc();
195            let bin = self.entries.get_mut(&path_crc).unwrap();
196            if bin.len() == 1 {
197                self.entries.remove(&path_crc);
198            } else {
199                let pos = bin.iter().position(|&i| i == id).unwrap();
200                bin.swap_remove(pos);
201            }
202
203            let children = self.arena.remove(id).unwrap().take_children();
204            self.cleanup_removed(children);
205        }
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use std::collections::HashMap;
212    use std::fmt::Debug;
213    use std::path::PathBuf;
214
215    use crate::entry::DirEntry;
216    use crate::path::EntryPath;
217    use crate::tree::FileTree;
218    use crate::tree_snapshot::FilesRetrieverFn;
219    use crate::SnapshotConfig;
220
221    fn new_dir<T: Into<String>>(name: T) -> DirEntry {
222        DirEntry::new_dir(name.into())
223    }
224
225    fn path<R: Into<PathBuf>, P: Into<PathBuf>>(root: R, path: P) -> EntryPath {
226        EntryPath::from(root.into(), path.into()).unwrap()
227    }
228
229    fn root_path(tree: &FileTree) -> EntryPath {
230        tree.get_root().get_path(&tree.arena)
231    }
232
233    fn files_getter<
234        S: Into<Vec<(&'static str, i64)>> + Debug,
235        T: Into<HashMap<&'static str, S>>,
236    >(
237        files: T,
238    ) -> Box<FilesRetrieverFn> {
239        let map = files.into();
240        let map: HashMap<_, _> = map
241            .into_iter()
242            .map(|(key, val)| (key, val.into()))
243            .collect();
244        Box::new(move |path| {
245            // replace all backslashes since in tests we use only '/'
246            let path = path.to_str().unwrap().replace('\\', "/");
247            map.get(path.as_str())
248                .cloned()
249                .unwrap_or_default()
250                .into_iter()
251                .map(|(name, size)| (name.to_string(), size))
252                .collect()
253        })
254    }
255
256    fn sample_tree() -> FileTree {
257        let root = "/data/mnt".to_string();
258        let mut tree = FileTree::new(root.clone());
259        tree.set_children(&path(&root, "/data/mnt"), vec![new_dir("dir1")], 2, 25);
260        tree.set_children(&path(&root, "/data/mnt/dir1"), vec![new_dir("dir2")], 1, 25);
261        tree.set_children(&path(&root, "/data/mnt/dir1/dir2"), vec![], 3, 25);
262        tree
263    }
264
265    fn sample_getter() -> Box<FilesRetrieverFn> {
266        files_getter([
267            ("/data/mnt", [("file2", 10), ("file1", 15)].as_ref()),
268            ("/data/mnt/dir1", [("file3", 25)].as_ref()),
269            (
270                "/data/mnt/dir1/dir2",
271                [("file4", 5), ("file5", 10), ("file6", 10)].as_ref(),
272            ),
273        ])
274    }
275
276    #[test]
277    fn tree_building() {
278        let root = "/data/mnt".to_string();
279        let mut tree = FileTree::new(root.clone());
280
281        tree.set_children(&path(&root, "/data/mnt"), vec![new_dir("dir1")], 2, 25);
282        tree.set_children(&path(&root, "/data/mnt/dir1"), vec![new_dir("dir2")], 1, 25);
283
284        tree.arena.get(tree.root).print(&tree.arena, 5);
285
286        assert_eq!(tree.find_entry(&path(&root, "/data/mnt")), Some(tree.root));
287
288        let dir1 = tree.find_entry(&path(&root, "/data/mnt/dir1")).unwrap();
289        let dir2 = tree
290            .find_entry(&path(&root, "/data/mnt/dir1/dir2"))
291            .unwrap();
292
293        assert_eq!(tree.arena.get(dir1).get_name(), "dir1");
294        assert_eq!(tree.arena.get(dir2).get_name(), "dir2");
295
296        assert_eq!(tree.find_entry(&path(&root, "/data/mnt/test")), None);
297        assert_eq!(tree.find_entry(&path("/", "/data2")), None);
298        assert_eq!(tree.find_entry(&path("/", "/dat")), None);
299
300        let stats = tree.stats();
301        assert_eq!(stats.used_size.get_bytes(), 50);
302        assert_eq!(stats.files, 3);
303        assert_eq!(stats.dirs, 2);
304    }
305
306    #[test]
307    fn set_children_from_empty() {
308        let root = "/data/mnt".to_string();
309        let mut tree = FileTree::new(root);
310
311        let new_dirs = tree
312            .set_children(&root_path(&tree), vec![new_dir("dir1")], 2, 20)
313            .unwrap();
314        assert_eq!(new_dirs.len(), 1);
315        assert_eq!(new_dirs[0], "dir1");
316
317        let snapshot = tree
318            .make_snapshot(
319                &root_path(&tree),
320                SnapshotConfig::default(),
321                &files_getter([("/data/mnt", [("file1", 5), ("file2", 15)])]),
322            )
323            .unwrap();
324        let mut it = snapshot.get_root().iter();
325        assert_eq!(it.next().unwrap().get_name(), "file2");
326        assert_eq!(it.next().unwrap().get_name(), "file1");
327        assert_eq!(it.next().unwrap().get_name(), "dir1");
328        assert!(it.next().is_none());
329    }
330
331    #[test]
332    fn set_children_to_empty() {
333        let mut tree = sample_tree();
334        tree.get_root().print(tree.get_arena(), 5);
335
336        tree.set_children(&root_path(&tree), vec![], 0, 0);
337        tree.get_root().print(tree.get_arena(), 5);
338        let snapshot = tree
339            .make_snapshot(&root_path(&tree), SnapshotConfig::default(), &|_| vec![])
340            .unwrap();
341        let root = snapshot.get_root();
342        assert!(root.iter().next().is_none());
343    }
344
345    #[test]
346    fn set_children_update() {
347        let mut tree = sample_tree();
348        tree.get_root().print(tree.get_arena(), 5);
349
350        let new_dirs = tree
351            .set_children(
352                &path("/data/mnt", "/data/mnt/dir1"),
353                vec![new_dir("dir2"), new_dir("dir3"), new_dir("dir4")],
354                1,
355                30,
356            )
357            .unwrap();
358        tree.get_root().print(tree.get_arena(), 5);
359        assert_eq!(new_dirs.len(), 2);
360        assert!(new_dirs.contains(&"dir3".to_string()));
361        assert!(new_dirs.contains(&"dir4".to_string()));
362        assert_eq!(tree.stats().dirs, 4);
363        assert_eq!(tree.stats().files, 6);
364        assert_eq!(tree.stats().used_size.get_bytes(), 80);
365
366        let snapshot = tree
367            .make_snapshot(
368                &path("/data/mnt", "/data/mnt/dir1"),
369                SnapshotConfig::default(),
370                &files_getter([("/data/mnt/dir1", [("file1", 30)])]),
371            )
372            .unwrap();
373        let children: Vec<_> = snapshot
374            .get_root()
375            .iter()
376            .map(|e| (e.get_name().to_string(), e.get_size().get_bytes()))
377            .collect();
378        assert_eq!(
379            children,
380            vec![
381                ("file1".to_string(), 30),
382                ("dir2".to_string(), 25),
383                ("dir3".to_string(), 0),
384                ("dir4".to_string(), 0),
385            ]
386        );
387
388        tree.set_children(&path("/data/mnt", "/data/mnt/dir1/dir2"), vec![], 2, 50);
389        assert_eq!(tree.stats().dirs, 4);
390        assert_eq!(tree.stats().files, 5);
391        assert_eq!(tree.stats().used_size.get_bytes(), 105);
392        tree.get_root().print(tree.get_arena(), 5);
393
394        let children: Vec<_> = tree
395            .make_snapshot(
396                &path("/data/mnt", "/data/mnt/dir1/dir2"),
397                SnapshotConfig::default(),
398                &files_getter([("/data/mnt/dir1/dir2", [("file6", 5), ("file7", 45)])]),
399            )
400            .unwrap()
401            .get_root()
402            .iter()
403            .map(|e| (e.get_name().to_string(), e.get_size().get_bytes()))
404            .collect();
405        assert_eq!(
406            children,
407            vec![("file7".to_string(), 45), ("file6".to_string(), 5)]
408        );
409    }
410
411    #[test]
412    fn snapshot_from_root() {
413        let tree = sample_tree();
414        tree.get_root().print(tree.get_arena(), 5);
415
416        let snapshot = tree
417            .make_snapshot(
418                &root_path(&tree),
419                SnapshotConfig::default(),
420                &sample_getter(),
421            )
422            .unwrap();
423
424        let root = snapshot.get_root();
425        assert_eq!(root.get_name(), "/data/mnt");
426        assert_eq!(root.get_size(), 75u64.into());
427        assert!(root.is_dir());
428        assert_eq!(root.get_parent_id(), None);
429
430        let dir1 = root.iter().next();
431        assert!(dir1.is_some());
432        let dir1 = dir1.unwrap();
433        assert_eq!(dir1.get_name(), "dir1");
434        assert_eq!(dir1.get_size(), 50u64.into());
435        assert!(dir1.is_dir());
436        assert_eq!(dir1.get_parent_id(), Some(root.get_id()));
437
438        let dir2 = dir1.iter().next();
439        assert!(dir2.is_some());
440        let dir2 = dir2.unwrap();
441        assert_eq!(dir2.get_name(), "dir2");
442        assert_eq!(dir2.get_size(), 25u64.into());
443        assert!(dir2.is_dir());
444        assert_eq!(dir2.get_parent_id(), Some(dir1.get_id()));
445
446        let file5 = dir2.iter().next();
447        assert!(file5.is_some());
448        let file5 = file5.unwrap();
449        assert_eq!(file5.get_name(), "file5");
450        assert_eq!(file5.get_size(), 10u64.into());
451        assert!(!file5.is_dir());
452        assert_eq!(file5.get_parent_id(), Some(dir2.get_id()));
453    }
454
455    #[test]
456    fn snapshot_from_child() {
457        let tree = sample_tree();
458        tree.get_root().print(tree.get_arena(), 5);
459
460        let snapshot = tree
461            .make_snapshot(
462                &path("/data/mnt", "/data/mnt/dir1"),
463                SnapshotConfig::default(),
464                &sample_getter(),
465            )
466            .unwrap();
467
468        let root = snapshot.get_root();
469        assert_eq!(root.get_name(), "dir1");
470        assert_eq!(root.get_size(), 50u64.into());
471        assert!(root.is_dir());
472        assert_eq!(root.get_parent_id(), None);
473
474        let dir2 = root.iter().next();
475        assert!(dir2.is_some());
476        let dir2 = dir2.unwrap();
477        assert_eq!(dir2.get_name(), "dir2");
478        assert_eq!(dir2.get_size(), 25u64.into());
479        assert!(dir2.is_dir());
480        assert_eq!(dir2.get_parent_id(), Some(root.get_id()));
481
482        let mut dir2_iter = dir2.iter();
483        let file5 = dir2_iter.next();
484        assert!(file5.is_some());
485        let file5 = file5.unwrap();
486        assert_eq!(file5.get_name(), "file5");
487        assert_eq!(file5.get_size(), 10u64.into());
488        assert!(!file5.is_dir());
489        assert_eq!(file5.get_parent_id(), Some(dir2.get_id()));
490
491        let file6 = dir2_iter.next();
492        assert!(file6.is_some());
493        let file6 = file6.unwrap();
494        assert_eq!(file6.get_name(), "file6");
495        assert_eq!(file6.get_size(), 10u64.into());
496        assert!(!file6.is_dir());
497        assert_eq!(file6.get_parent_id(), Some(dir2.get_id()));
498    }
499
500    #[test]
501    fn snapshot_with_depth_constraint() {
502        let tree = sample_tree();
503        tree.get_root().print(tree.get_arena(), 5);
504
505        let snapshot = tree
506            .make_snapshot(
507                &root_path(&tree),
508                SnapshotConfig {
509                    max_depth: 1,
510                    ..SnapshotConfig::default()
511                },
512                &sample_getter(),
513            )
514            .unwrap();
515
516        let root = snapshot.get_root();
517        assert_eq!(root.get_name(), "/data/mnt");
518        assert_eq!(root.get_size(), 75u64.into());
519        assert!(root.is_dir());
520        assert_eq!(root.get_parent_id(), None);
521
522        let dir1 = root.iter().next();
523        assert!(dir1.is_some());
524        let dir1 = dir1.unwrap();
525        assert_eq!(dir1.get_name(), "dir1");
526        assert_eq!(dir1.get_size(), 50u64.into());
527        assert!(dir1.is_dir());
528        assert_eq!(dir1.get_parent_id(), Some(root.get_id()));
529
530        assert!(dir1.iter().next().is_none());
531    }
532
533    #[test]
534    fn snapshot_with_size_constraint() {
535        let tree = sample_tree();
536        tree.get_root().print(tree.get_arena(), 5);
537
538        let snapshot = tree
539            .make_snapshot(
540                &root_path(&tree),
541                SnapshotConfig {
542                    min_size: 12,
543                    ..SnapshotConfig::default()
544                },
545                &sample_getter(),
546            )
547            .unwrap();
548
549        let root = snapshot.get_root();
550        assert_eq!(root.get_name(), "/data/mnt");
551        assert_eq!(root.get_size(), 75u64.into());
552        assert!(root.is_dir());
553        assert_eq!(root.get_parent_id(), None);
554
555        let mut root_iter = root.iter();
556        let dir1 = root_iter.next();
557        assert!(dir1.is_some());
558        let dir1 = dir1.unwrap();
559        assert_eq!(dir1.get_name(), "dir1");
560        assert_eq!(dir1.get_size(), 50u64.into());
561        assert!(dir1.is_dir());
562        assert_eq!(dir1.get_parent_id(), Some(root.get_id()));
563
564        let dir2 = dir1.iter().next();
565        assert!(dir2.is_some());
566        let dir2 = dir2.unwrap();
567        assert_eq!(dir2.get_name(), "dir2");
568        assert_eq!(dir2.get_size(), 25u64.into());
569        assert!(dir2.is_dir());
570        assert_eq!(dir2.get_parent_id(), Some(dir1.get_id()));
571
572        assert!(dir2.iter().next().is_none());
573
574        let file1 = root_iter.next();
575        assert!(file1.is_some());
576        let file1 = file1.unwrap();
577        assert_eq!(file1.get_name(), "file1");
578        assert_eq!(file1.get_size(), 15u64.into());
579        assert!(!file1.is_dir());
580        assert_eq!(file1.get_parent_id(), Some(root.get_id()));
581
582        assert!(root_iter.next().is_none());
583    }
584}