fastax/
tree.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt;
3
4use ansi_term::Style;
5
6use crate::Node;
7
8/// A taxonomy tree
9pub struct Tree {
10    root: i64,
11    pub nodes: HashMap<i64, Node>,
12    pub children: HashMap<i64, HashSet<i64>>,
13    marked: HashSet<i64>
14}
15
16impl Tree {
17    /// Create a new Tree containing the given nodes.
18    pub fn new(root_id: i64, nodes: &[Node]) -> Tree {
19        let mut tree = Tree{
20            root: root_id,
21            nodes: HashMap::new(),
22            children: HashMap::new(),
23            marked: HashSet::new()
24        };
25        tree.add_nodes(nodes);
26        tree
27    }
28
29    /// Add the given nodes to the Tree.
30    pub fn add_nodes(&mut self, nodes: &[Node]) {
31        for node in nodes.iter() {
32            self.nodes.entry(node.tax_id).or_insert({
33                let mut node = node.clone();
34                if node.format_string.is_none() {
35                    node.format_string = Some(String::from("%rank: %name"));
36                }
37                node});
38
39
40            if node.tax_id != node.parent_tax_id {
41                self.children.entry(node.parent_tax_id)
42                    .and_modify(|children| {children.insert(node.tax_id);})
43                    .or_insert({
44                        let mut set = HashSet::new();
45                        set.insert(node.tax_id);
46                        set
47                    });
48            }
49        }
50    }
51
52    /// Mark the nodes with this IDs.
53    pub fn mark_nodes(&mut self, taxids: &[i64]) {
54        for taxid in taxids.iter() {
55            self.marked.insert(*taxid);
56        }
57    }
58
59    /// Set the format string for all nodes.
60    pub fn set_format_string(&mut self, format_string: String) {
61        for node in self.nodes.values_mut() {
62            node.format_string = Some(format_string.clone());
63        }
64    }
65
66    /// Simplify the tree by removing all nodes that have only one child
67    /// *and* are not marked.
68    pub fn simplify(&mut self) {
69        self.simplify_helper(self.root);
70        self.children.retain(|_, v| !v.is_empty());
71    }
72
73    fn simplify_helper(&mut self, parent: i64) {
74        let new_children = self.remove_single_child(parent);
75        // TODO: remove that clone
76        self.children.insert(parent, new_children.clone());
77        // .unwrap() is safe here because new_children
78        // is at least an empty set.
79        for child in new_children.iter() {
80            self.simplify_helper(*child);
81        }
82    }
83
84
85    /// remove_single_child find the new children of a node by removing all
86    /// unique child.
87    fn remove_single_child(&self, parent: i64) -> HashSet<i64> {
88        // nodes are the children of parent
89        let mut new_children = HashSet::new();
90        if let Some(nodes) = self.children.get(&parent) {
91            for node in nodes.iter() {
92                let mut node = node;
93                while let Some(children) = self.children.get(node) {
94                    if children.len() == 1 && !self.marked.contains(node) {
95                        node = children.iter().next().unwrap();
96                    } else {
97                        break;
98                    }
99                }
100                new_children.insert(*node);
101            }
102        }
103        new_children
104    }
105
106    /// Return a Newick representation of the tree.
107    /// If the root has only one child, we remove the root from the
108    /// resulting tree.
109    pub fn to_newick(&self) -> String {
110        let mut n = String::new();
111
112        if self.children.get(&self.root).unwrap().len() == 1 {
113            let root = self.children.get(&1).unwrap().iter().next().unwrap();
114            self.newick_helper(&mut n, *root);
115        } else {
116            self.newick_helper(&mut n, self.root);
117        }
118        n.push(';');
119        n
120    }
121
122    /// Helper function that actually makes the Newick format representation
123    /// of the tree. The resulting String is in `n` and the current node is
124    /// `taxid`.
125    ///
126    /// This function is recursive, hence it should be called only once with
127    /// the root.
128    fn newick_helper(&self, n: &mut String, taxid: i64) {
129        // unwrap are safe here because of the way we build the tree
130        // and the nodes.
131        let node = self.nodes.get(&taxid).unwrap();
132
133        if let Some(children) = self.children.get(&taxid) {
134            n.push_str(&format!("({}", node)); // Mind the parenthesis
135            n.push_str(",(");
136            for child in children.iter() {
137                self.newick_helper(n, *child);
138                n.push(',');
139            }
140
141            // After iterating through the children, a comma left
142            let _ = n.pop();
143            // two closing parenthesis:
144            // - one for the last child tree,
145            // - another for the parent
146            n.push_str("))");
147        } else {
148            n.push_str(&format!("{}", node)); // Mind the absent parenthesis
149        }
150    }
151
152    /// Helper function that actually makes the String-representation of the
153    /// tree. The resulting representation is in `s`, the current node is
154    /// `taxid`, the `prefix` is used for spacing, and the boolean
155    /// `was_first_child` is used to choose which branching character to use.
156    ///
157    /// This function is recursive, hence it should be called only once with
158    /// the root.
159    fn print_tree_helper(&self, s: &mut String, taxid: i64, prefix: String, was_first_child: bool) {
160        // .unwrap() is safe here because of the way we build the tree.
161        let node = self.nodes.get(&taxid).unwrap();
162
163        if let Some(children) = self.children.get(&taxid) {
164            if self.marked.contains(&taxid) {
165                s.push_str(&format!("{}\u{2500}\u{252C}\u{2500} {}\n",
166                                   prefix,
167                                   Style::new().bold().paint(node.to_string())));
168
169            } else {
170                s.push_str(&format!("{}\u{2500}\u{252C}\u{2500} {}\n",
171                                   prefix, node));
172            }
173            let mut prefix = prefix;
174            prefix.pop();
175            if was_first_child {
176                prefix.push('\u{2502}');
177            } else {
178                prefix.push(' ');
179            }
180
181            // We want to keep the last child
182            let mut children: Vec<i64> = children.iter().copied().collect();
183            children.sort();
184
185            loop {
186                let child = children.pop();
187                let mut new_prefix = prefix.clone();
188                match child {
189                    Some(child) => {
190                        if children.is_empty() {
191                            new_prefix.push_str(" \u{2514}");
192                            self.print_tree_helper(s, child, new_prefix, false);
193                        } else {
194                            new_prefix.push_str(" \u{251C}");
195                            self.print_tree_helper(s, child, new_prefix, true);
196                        }
197                    },
198
199                    None => break
200                };
201            }
202        } else if self.marked.contains(&taxid) {
203            s.push_str(&format!("{}\u{2500}\u{2500} {}\n",
204                                prefix,
205                                Style::new().bold().paint(node.to_string())));
206        } else {
207            s.push_str(&format!("{}\u{2500}\u{2500} {}\n",
208                                prefix, node));
209        }
210    }
211}
212
213impl fmt::Display for Tree {
214    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215        let mut s = String::new();
216        let root = self.nodes.get(&self.root).unwrap();
217        s.push_str(&format!("{}\n", root.to_string()));
218
219        let root_children = self.children.get(&self.root).unwrap();
220        if root_children.len() == 1 {
221            let child = root_children.iter().next().unwrap();
222            self.print_tree_helper(&mut s, *child, String::from("\u{2514}"), false);
223        } else {
224            for (i, child) in root_children.iter().enumerate() {
225                if i == root_children.len() - 1 {
226                    self.print_tree_helper(&mut s, *child, String::from("\u{2514}"), false);
227                } else {
228                    self.print_tree_helper(&mut s, *child, String::from("\u{251C}"), true);
229                }
230            }
231        }
232
233        write!(f, "{}", s)
234    }
235}