docker_archive/
file_tree.rs

1use crate::error::DockerArchiveError;
2use crate::Result;
3use itertools::Itertools;
4use path_clean::clean;
5use serde::Serialize;
6use std::cell::RefCell;
7use std::rc::{Rc, Weak};
8use std::{collections::HashMap, io::Read};
9use tar::{Archive, EntryType};
10use uuid::Uuid;
11use xxhash_rust::xxh3::xxh3_64;
12
13// TODO https://github.com/moby/moby/issues/43279
14// https://github.com/wagoodman/dive/issues/391
15const WHITEOUT_PREFIX: &'static str = ".wh.";
16const DOUBLE_WHITEOUT_PREFIX: &'static str = ".wh..wh..";
17
18mod uuid_serde {
19    use std::str::FromStr;
20
21    use serde::{de::Error, Deserialize, Deserializer, Serializer};
22    use uuid::Uuid;
23    pub fn serialize<S>(uuid: &Uuid, serializer: S) -> Result<S::Ok, S::Error>
24    where
25        S: Serializer,
26    {
27        serializer.serialize_str(&uuid.to_string())
28    }
29
30    #[allow(dead_code)]
31    pub fn deserialize<'de, D>(deserializer: D) -> Result<Uuid, D::Error>
32    where
33        D: Deserializer<'de>,
34    {
35        let s = <&str>::deserialize(deserializer)?;
36        Uuid::from_str(s).map_err(Error::custom)
37    }
38}
39
40#[derive(Debug, Default, Serialize)]
41pub struct FileTree {
42    pub root: Rc<RefCell<FileNode>>,
43    pub size: u64,
44    pub file_size: u64,
45    pub name: String,
46    #[serde(with = "uuid_serde")]
47    pub id: Uuid,
48}
49
50struct CompareMark {
51    pub lower_node: Rc<RefCell<FileNode>>,
52    pub upper_node: Rc<RefCell<FileNode>>,
53    pub tentative: Option<DiffType>,
54    pub finalis: Option<DiffType>,
55}
56
57impl FileTree {
58    pub fn search<E>(tree: Rc<RefCell<Self>>, evaluator: &E) -> Vec<Rc<RefCell<FileNode>>>
59    where
60        E: Fn(&FileNode) -> bool,
61    {
62        let mut ret = vec![];
63        let root = Rc::clone(&tree.borrow().root);
64        FileNode::visit_depth_child_first(
65            root,
66            &mut |node| {
67                ret.push(node);
68            },
69            evaluator,
70        );
71        ret
72    }
73
74    pub fn copy(tree: Rc<RefCell<Self>>) -> Rc<RefCell<Self>> {
75        let new_tree = Rc::new(RefCell::new(FileTree {
76            size: tree.borrow().size,
77            file_size: tree.borrow().file_size,
78            ..Default::default()
79        }));
80        let new_tree_root = FileNode::copy(
81            Rc::clone(&tree.borrow().root),
82            Rc::clone(&new_tree.borrow().root),
83        );
84        // update the tree pointers
85        FileNode::visit_depth_child_first(
86            Rc::clone(&new_tree_root),
87            &mut |node| {
88                node.borrow_mut().tree = Rc::downgrade(&new_tree);
89            },
90            &|_| true,
91        );
92        // assign the node as root
93        new_tree.borrow_mut().root = new_tree_root;
94        new_tree
95    }
96
97    // combines an array of trees into a single tree
98    pub fn stack_trees(trees: Vec<Rc<RefCell<FileTree>>>) -> Rc<RefCell<FileTree>> {
99        let tree = FileTree::copy(Rc::clone(&trees[0]));
100        for i in 1..trees.len() {
101            FileTree::stack(Rc::clone(&tree), Rc::clone(&trees[i]))
102        }
103        tree
104    }
105
106    // takes two trees and combines them together. This is done by "stacking" the upper tree on top of the lower tree.
107    pub fn stack(lower: Rc<RefCell<Self>>, upper: Rc<RefCell<Self>>) {
108        let upper_root = Rc::clone(&upper.borrow().root);
109        FileNode::visit_depth_child_first(
110            upper_root,
111            &mut |upper_node| {
112                let path = upper_node.borrow().path.clone();
113                if upper_node.borrow().is_whiteout() {
114                    FileTree::remove_path(Rc::clone(&lower), &path);
115                    return;
116                }
117                FileTree::add_path(
118                    Rc::clone(&lower),
119                    &path,
120                    upper_node.borrow().data.file_info.clone(),
121                );
122            },
123            &|_| true,
124        );
125    }
126
127    // marks the FileNodes in the owning (lower) tree with DiffType annotations when compared to the given (upper) tree
128    pub fn compare_mark(lower: Rc<RefCell<Self>>, upper: Rc<RefCell<Self>>) {
129        let mut modifications = vec![];
130        // we must visit from the leaves upwards to ensure that diff types can be derived from and assigned to children
131        FileNode::visit_depth_child_first(
132            Rc::clone(&upper.borrow().root),
133            &mut |upper_node| {
134                let path = upper_node.borrow().path.clone();
135                if upper_node.borrow().is_whiteout() {
136                    if let Some(lower_node) = FileTree::get_node(Rc::clone(&lower), &path) {
137                        FileNode::assign_diff_type(lower_node, DiffType::Removed);
138                    }
139                    return;
140                }
141                // note: since we are not comparing against the original tree (copying the tree is expensive) we may mark the parent of an added node incorrectly as modified. This will be corrected later.
142                let lower_node = FileTree::get_node(Rc::clone(&lower), &path);
143                if lower_node.is_none() {
144                    let (_, new_nodes) = FileTree::add_path(
145                        Rc::clone(&lower),
146                        &path,
147                        upper_node.borrow().data.file_info.clone(),
148                    );
149                    for new_node in new_nodes.iter().rev() {
150                        modifications.push(CompareMark {
151                            lower_node: Rc::clone(new_node),
152                            upper_node: Rc::clone(&upper_node),
153                            tentative: None,
154                            finalis: Some(DiffType::Added),
155                        });
156                    }
157                    return;
158                }
159                // the file exists in the lower layer
160                let lower_node = lower_node.unwrap();
161                let diff_type =
162                    FileNode::compare(Some(Rc::clone(&lower_node)), Some(Rc::clone(&upper_node)));
163                modifications.push(CompareMark {
164                    lower_node: Rc::clone(&lower_node),
165                    upper_node: Rc::clone(&upper_node),
166                    tentative: Some(diff_type),
167                    finalis: None,
168                })
169            },
170            &|_| true,
171        );
172        // take note of the comparison results on each note in the lower tree
173        for pair in modifications.iter() {
174            if let Some(diff_type) = pair.finalis {
175                FileNode::assign_diff_type(Rc::clone(&pair.lower_node), diff_type);
176            } else {
177                let lower_node_diff_type = pair.lower_node.borrow().data.diff_type;
178                if lower_node_diff_type == DiffType::Unmodified {
179                    FileNode::derive_diff_type(
180                        Rc::clone(&pair.lower_node),
181                        pair.tentative.unwrap(),
182                    );
183                }
184                // persist the upper's payload on the lower tree
185                pair.lower_node.borrow_mut().data.file_info =
186                    pair.upper_node.borrow().data.file_info.clone();
187            }
188        }
189    }
190
191    // fetches a single node when given a slash-delimited string from root('/') to the desired node (e.g. '/a/node/path')
192    pub fn get_node(tree: Rc<RefCell<Self>>, path: &str) -> Option<Rc<RefCell<FileNode>>> {
193        let mut node = Rc::clone(&tree.borrow().root);
194        for name in path.trim_matches('/').split('/') {
195            if name.is_empty() {
196                continue;
197            }
198            let child = node.borrow().children.get(name).map(Rc::clone);
199            if child.is_none() {
200                return None;
201            } else {
202                node = child.unwrap();
203            }
204        }
205        Some(node)
206    }
207
208    // adds a new node to the tree with the given payload
209    pub fn add_path(
210        tree: Rc<RefCell<Self>>,
211        path: &str,
212        data: FileInfo,
213    ) -> (Option<Rc<RefCell<FileNode>>>, Vec<Rc<RefCell<FileNode>>>) {
214        let mut added_nodes = vec![];
215        let path = clean(path);
216        // cannot add relative path
217        if path.eq(".") {
218            return (None, added_nodes);
219        }
220        let mut node = Rc::clone(&tree.borrow().root);
221        for node_name in path.trim_matches('/').split('/') {
222            if node_name.is_empty() {
223                continue;
224            }
225            // find or create node
226            let child = node.borrow().children.get(node_name).map(Rc::clone);
227            if child.is_some() {
228                node = child.unwrap();
229                continue;
230            }
231            // don't add paths that should be deleted
232            if node_name.starts_with(DOUBLE_WHITEOUT_PREFIX) {
233                return (None, added_nodes);
234            }
235            // don't attach the payload. The payload is destined for the Path's end node, not any intermediary node.
236            node = FileNode::add_child(Rc::clone(&node), node_name, Default::default());
237            added_nodes.push(Rc::clone(&node));
238        }
239        // attach payload to the last specified node
240        node.borrow_mut().data.file_info = data;
241        (Some(node), added_nodes)
242    }
243
244    // removes a node from the tree given its path
245    pub fn remove_path(tree: Rc<RefCell<Self>>, path: &str) {
246        if let Some(node) = Self::get_node(tree, path) {
247            FileNode::remove(node);
248        }
249    }
250
251    pub fn build_from_layer_tar<R: Read>(ar: Archive<R>) -> Result<Rc<RefCell<Self>>> {
252        let tree = Rc::new(RefCell::new(FileTree::default()));
253        {
254            let a = tree.borrow();
255            let mut root = a.root.borrow_mut();
256            root.tree = Rc::downgrade(&tree);
257            //root.path = "/".to_string(); // root路径初始值设为空,因为添加子结点都会加/前缀; 可以在最后再设为/
258        }
259        let file_infos = FileInfo::get_file_infos(ar)?;
260        for file_info in file_infos.into_iter() {
261            let path = file_info.path.clone();
262            tree.borrow_mut().file_size += file_info.size;
263            FileTree::add_path(Rc::clone(&tree), &path, file_info);
264        }
265        // root的path设为/
266        tree.borrow().root.borrow_mut().path = "/".to_string();
267        // 把数据移出rc会导致所有的weak无法升级
268        //match Rc::try_unwrap(tree) {
269        //Ok(ret) => Ok(ret.take()),
270        //Err(origial) => Err(DockerArchiveError::InternalLogicError(format!(
271        //"Rc<FileTree> has more than one strong reference: {}",
272        //Rc::strong_count(&origial)
273        //))),
274        //}
275        Ok(tree)
276    }
277}
278
279#[derive(Debug, Default, Serialize)]
280pub struct FileNode {
281    // serde don't support cyclic data structures(https://github.com/serde-rs/serde/issues/1361)
282    #[serde(skip)]
283    pub tree: Weak<RefCell<FileTree>>,
284    #[serde(skip)]
285    pub parent: Weak<RefCell<FileNode>>,
286    pub name: String,
287    pub data: NodeData,
288    pub children: HashMap<String, Rc<RefCell<FileNode>>>,
289    pub path: String,
290}
291
292impl FileNode {
293    // duplicates the existing node relative to a new parent node
294    pub fn copy(node: Rc<RefCell<Self>>, parent: Rc<RefCell<Self>>) -> Rc<RefCell<Self>> {
295        let new_node = Self {
296            tree: Weak::clone(&parent.borrow().tree),
297            parent: Rc::downgrade(&parent),
298            name: node.borrow().name.clone(),
299            data: node.borrow().data.clone(),
300            path: node.borrow().path.clone(),
301            ..Default::default()
302        };
303        let new_node = Rc::new(RefCell::new(new_node));
304        for (name, child) in node.borrow().children.iter() {
305            let new_child = Self::copy(Rc::clone(child), Rc::clone(&new_node));
306            new_node
307                .borrow_mut()
308                .children
309                .insert(name.clone(), new_child);
310        }
311        new_node
312    }
313
314    // determines a DiffType to the current FileNode. Note: the DiffType of a node is always the DiffType of its attributes and its contents. The contents are the bytes of the file of the children of a directory
315    pub fn derive_diff_type(node: Rc<RefCell<Self>>, diff_type: DiffType) {
316        let leaf = node.borrow().is_leaf();
317        if leaf {
318            return Self::assign_diff_type(node, diff_type);
319        }
320        let mut my_diff_type = diff_type;
321        for child in node.borrow().children.values() {
322            my_diff_type = my_diff_type.merge(child.borrow().data.diff_type);
323        }
324        return Self::assign_diff_type(node, my_diff_type);
325    }
326
327    // assign the given DiffType to this node, possibly affecting child nodes
328    pub fn assign_diff_type(node: Rc<RefCell<Self>>, diff_type: DiffType) {
329        node.borrow_mut().data.diff_type = diff_type;
330        // if we've removed this node, then all children have been removed as well
331        if matches!(diff_type, DiffType::Removed) {
332            for child in node.borrow().children.values() {
333                FileNode::assign_diff_type(Rc::clone(child), diff_type);
334            }
335        }
336    }
337
338    pub fn add_child(parent: Rc<RefCell<Self>>, name: &str, data: FileInfo) -> Rc<RefCell<Self>> {
339        // never allow processing of purely whiteout flag files (for now)
340        // doublewhiteout 作用?
341        if let Some(node) = parent.borrow().children.get(name) {
342            // tree node already exists, replace the payload, keep the children
343            node.borrow_mut().data.file_info = data;
344            return Rc::clone(node);
345        }
346        // set node path
347        let mut path = parent.borrow().path.clone();
348        path.push('/');
349        // white out prefixes are fictitious on leaf nodes
350        if let Some(s) = name.strip_prefix(WHITEOUT_PREFIX) {
351            path.push_str(s);
352        } else {
353            path.push_str(name);
354        }
355        let child = Rc::new(RefCell::new(FileNode {
356            tree: Weak::clone(&parent.borrow().tree),
357            parent: Rc::downgrade(&parent),
358            name: name.to_string(),
359            data: NodeData {
360                file_info: data,
361                ..Default::default()
362            },
363            path,
364            ..Default::default()
365        }));
366        parent
367            .borrow_mut()
368            .children
369            .insert(name.to_string(), Rc::clone(&child));
370        if let Some(t) = parent.borrow().tree.upgrade() {
371            t.borrow_mut().size += 1;
372        }
373        child
374    }
375
376    // deletes the current FileNode from it's parent FileNode's relations
377    pub fn remove(node: Rc<RefCell<Self>>) {
378        if let Some(tree) = node.borrow().tree.upgrade() {
379            // cannot remove the tree root
380            if Rc::ptr_eq(&node, &tree.borrow().root) {
381                return;
382            }
383            tree.borrow_mut().size -= 1;
384        }
385        if let Some(parent) = node.borrow().parent.upgrade() {
386            parent.borrow_mut().children.remove(&node.borrow().name);
387        }
388        let childs: Vec<Rc<RefCell<Self>>> = node.borrow().children.values().cloned().collect();
389        for child in childs.into_iter() {
390            FileNode::remove(child);
391        }
392    }
393
394    pub fn is_whiteout(&self) -> bool {
395        self.name.starts_with(WHITEOUT_PREFIX)
396    }
397
398    pub fn is_leaf(&self) -> bool {
399        self.children.is_empty()
400    }
401
402    // compare self(lower node in the tree stack) and other(upper node in tree stack), return DiffType for upper node
403    pub fn compare(lower: Option<Rc<RefCell<Self>>>, upper: Option<Rc<RefCell<Self>>>) -> DiffType {
404        if lower.is_none() && upper.is_none() {
405            return DiffType::Unmodified;
406        }
407        if lower.is_none() && upper.is_some() {
408            return DiffType::Added;
409        }
410        if lower.is_some() && upper.is_none() {
411            return DiffType::Removed;
412        }
413        // lower and upper both not none
414        let (lower, upper) = (lower.unwrap(), upper.unwrap());
415        if upper.borrow().is_whiteout() {
416            return DiffType::Removed;
417        }
418        if lower.borrow().name.ne(&upper.borrow().name) {
419            panic!("comparing mismatched nodes");
420        }
421        let file_info_diff = lower
422            .borrow()
423            .data
424            .file_info
425            .compare(&upper.borrow().data.file_info);
426        file_info_diff
427    }
428
429    // iterates a tree depth-first (starting at this FileNode), evaluating the deepest depths first (visit on bubble up)
430    pub fn visit_depth_child_first<V, E>(node: Rc<RefCell<Self>>, visitor: &mut V, evaluator: &E)
431    where
432        V: FnMut(Rc<RefCell<FileNode>>) -> (),
433        E: Fn(&FileNode) -> bool,
434    {
435        // 根据字符串排序,保证父目录优先被处理(对比tree时父目录标记为删除要同时标记子结点)
436        for (_, v) in node.borrow().children.iter().sorted_by_key(|x| x.0) {
437            let child = Rc::clone(v);
438            FileNode::visit_depth_child_first(child, visitor, evaluator);
439        }
440        // never visit the root node
441        if let Some(tree) = node.borrow().tree.upgrade() {
442            if Rc::ptr_eq(&node, &tree.borrow().root) {
443                return;
444            }
445        }
446        // put the borrow on its own line to make the borrow dropped before a call to visitor
447        let visit = evaluator(&node.borrow());
448        if visit {
449            visitor(Rc::clone(&node))
450        }
451    }
452
453    // iterates a tree depth-first (starting at this FileNode), evaluating the shallowest depths first (visit while sinking down)
454    pub fn visit_depth_parent_first<V, E>(node: Rc<RefCell<Self>>, visitor: &mut V, evaluator: &E)
455    where
456        V: FnMut(Rc<RefCell<FileNode>>) -> (),
457        E: Fn(&FileNode) -> bool,
458    {
459        if !evaluator(&node.borrow()) {
460            return;
461        }
462        // never visit the root node
463        let mut is_root = false;
464        if let Some(tree) = node.borrow().tree.upgrade() {
465            if Rc::ptr_eq(&node, &tree.borrow().root) {
466                is_root = true;
467            }
468        }
469        if !is_root {
470            visitor(Rc::clone(&node));
471        }
472        for (_, v) in node.borrow().children.iter().sorted_by_key(|x| x.0) {
473            let child = Rc::clone(v);
474            FileNode::visit_depth_parent_first(child, visitor, evaluator);
475        }
476    }
477}
478
479// NodeData is the payload for a FileNode
480#[derive(Debug, Default, Serialize, Clone)]
481pub struct NodeData {
482    pub view_info: ViewInfo,
483    pub file_info: FileInfo,
484    pub diff_type: DiffType,
485}
486
487// ViewInfo contains UI specific detail for a specific FileNode
488#[derive(Debug, Default, Serialize, Clone)]
489pub struct ViewInfo {
490    pub collapsed: bool,
491    pub hidden: bool,
492}
493
494// FileInfo contains tar metadata for a specific FileNode
495#[derive(Debug, Default, Serialize, Clone)]
496pub struct FileInfo {
497    pub path: String,
498    pub type_flag: u8,
499    pub linkname: String,
500    pub hash: u64,
501    pub size: u64,
502    pub mode: u32,
503    pub uid: u64,
504    pub gid: u64,
505    pub is_dir: bool,
506}
507
508impl FileInfo {
509    pub fn get_file_infos<R: Read>(mut ar: Archive<R>) -> Result<Vec<Self>> {
510        let mut ret = vec![];
511        for entry in ar.entries()? {
512            let mut e = entry?;
513            if let Some(path) = e.path()?.to_str().map(clean) {
514                // always ensure relative path notations are not parsed as part of the filename
515                if path.eq(".") {
516                    continue;
517                }
518                match e.header().entry_type() {
519                    EntryType::XGlobalHeader | EntryType::XHeader => {
520                        return Err(DockerArchiveError::InvalidArchiveX(format!(
521                            "unexptected tar file: type={:?} name={}",
522                            e.header().entry_type(),
523                            &path,
524                        )));
525                    }
526                    _ => {
527                        let mut hash = 0;
528                        if !e.header().entry_type().is_dir() {
529                            let mut data = vec![];
530                            e.read_to_end(&mut data)?;
531                            hash = xxh3_64(data.as_slice());
532                        }
533                        ret.push(FileInfo {
534                            path,
535                            hash,
536                            ..e.header().into()
537                        });
538                    }
539                }
540            }
541        }
542        Ok(ret)
543    }
544
545    pub fn compare(&self, other: &Self) -> DiffType {
546        if self.type_flag == other.type_flag {
547            if self.hash == other.hash
548                && self.mode == other.mode
549                && self.uid == other.uid
550                && self.gid == other.gid
551            {
552                return DiffType::Unmodified;
553            }
554        }
555        DiffType::Modified
556    }
557}
558
559impl From<&tar::Header> for FileInfo {
560    fn from(header: &tar::Header) -> Self {
561        Self {
562            path: header
563                .path()
564                .map(|p| p.to_str().unwrap_or("").to_string())
565                .unwrap_or(String::new()),
566            type_flag: header.entry_type().as_byte(),
567            linkname: header
568                .link_name()
569                .map(|p| {
570                    p.map(|s| s.to_str().unwrap_or("").to_string())
571                        .unwrap_or(String::new())
572                })
573                .unwrap_or(String::new()),
574            size: header.size().unwrap_or(0),
575            mode: header.mode().unwrap_or(0),
576            uid: header.uid().unwrap_or(0),
577            gid: header.gid().unwrap_or(0),
578            is_dir: header.entry_type().is_dir(),
579            ..Default::default()
580        }
581    }
582}
583
584// DiffType defines the comparison result between two FileNodes
585#[derive(Debug, Serialize, Clone, Copy, PartialEq)]
586pub enum DiffType {
587    Unmodified,
588    Modified,
589    Added,
590    Removed,
591}
592
593impl Default for DiffType {
594    fn default() -> Self {
595        Self::Unmodified
596    }
597}
598
599impl DiffType {
600    // merge two DiffTypes into a single result. Essentially, return the given value unless they two values differ, in which case we can only determine that there is "a change".
601    pub fn merge(self, other: Self) -> DiffType {
602        if self.eq(&other) {
603            return self;
604        }
605        return Self::Modified;
606    }
607}