1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
//! The `phylotree` crate aims to be useful when dealing with phylogenetic trees.
//! It can be used to build such trees or read then from newick files. this crate
//! can also be used to compare trees.
//!
//! Since phylogenetic trees and phylolgenetic distance matrices are so closely related
//! this crate can also be used to extract such matrices from phylolgenetic trees as
//! well as read and write phylip distance matrix files.
//!
//! # A note on implementation
//! Recursive data structures can be a pain in rust, which is why this crate exists:
//!
//! **so you don't have to implement it...**
//!
//! To avoid this problem here the tree is stored as a vector
//! of nodes, each node has an identifier and accessing and mutating
//! nodes in a tree is done using these identifiers. As such we can have a
//! non-recursive data structure representing the tree but also easily
//! implement recursive algorithms *(like simple tree traversals)* on this tree.
//!
//! # Using `phylotree`
//! Most of the functionality is implemented in [`crate::tree`]. The
//! [`crate::distance`] module is used to dealt with phylolgenetic distance matrices.
//! [`crate::distr`] is a helper module to provide different branch
//! length distributions when generating random phylogenetic trees.
//!
//! ## Building trees
//! The simplest way to build a tree is to create an empty tree, add a root node and
//! then add children to the various added nodes:
//!
//! ```
//! use phylotree::tree::{Tree, Node};
//!
//! let mut tree = Tree::new();
//!
//! // Add the root node
//! let root = tree.add(Node::new());
//!
//! // Add a child to the root
//! let child1 = tree.add_child(Node::new_named("Child_1"), root, None).unwrap();
//! // Add a child to the root with a branch length
//! let child2 = tree.add_child(Node::new_named("Child_2"), root, Some(0.5)).unwrap();
//!
//! // Add more children
//! let child3 = tree.add_child(Node::new_named("Child_3"), child1, None).unwrap();
//!
//! // Get depth of child
//! assert_eq!(tree.get(&child3).unwrap().get_depth(), 2)
//! ```
//!
//! ## Reading and writing trees
//! This library can build trees strings (or files) encoded in the
//! [newick](https://en.wikipedia.org/wiki/Newick_format) format:
//! ```
//! use phylotree::tree::Tree;
//!
//! let newick_str = "((A:0.1,B:0.2)F:0.6,(C:0.3,D:0.4)E:0.5)G;";
//! let tree = Tree::from_newick(newick_str).unwrap();
//!
//! assert_eq!(tree.to_newick().unwrap(), newick_str)
//! ```
//!
//! ## Traversing trees
//! Several traversals are implemented to visit nodes in a particular order. pre-order,
//! post-order and level-order traversals are implemented on all trees. In-order traversls
//! are implemented only for binary trees. A traversals returns a [`Vec`] of [`tree::NodeId`]
//! in the order they are to be visited in.
//! ```
//! use phylotree::tree::{Tree, Node};
//!
//! // |
//! // -----G-----
//! // | |
//! // ---C--- ---F---
//! // | | | |
//! // A B D E
//!
//! let newick_str = "((A,B)C,(D,E)F)G;";
//! let mut tree = Tree::from_newick(newick_str).unwrap();
//! let root = tree.get_root().unwrap();
//!
//! let preorder: Vec<_> = tree.preorder(&root).unwrap()
//! .iter()
//! .map(|node_id| tree.get(node_id).unwrap().name.clone().unwrap())
//! .collect();
//!
//! assert_eq!(preorder, vec!["G", "C", "A", "B", "F", "D", "E"]);
//!
//! // Add a child node to F so the tree is no longer binary
//! let f_idx = tree.get_by_name("F").unwrap().id;
//! tree.add_child(Node::new_named("third_child"), f_idx, None).unwrap();
//!
//! assert!(tree.inorder(&root).is_err())
//! ```
//!
//!
//! ## Comparing trees
//! A number of metrics taking into account topology and branch lenghts are implemented
//! in order to compare trees with each other:
//! ```
//! use phylotree::tree::Tree;
//!
//! // The second tree is just a random rotation of the first,
//! // they represent the same phylogeney
//! let newick_orig = "((A:0.1,B:0.2)F:0.6,(C:0.3,D:0.4)E:0.5)G;";
//! let newick_rota = "((D:0.3,C:0.4)E:0.5,(B:0.2,A:0.1)F:0.6)G;";
//!
//! let tree_orig = Tree::from_newick(newick_orig).unwrap();
//! let tree_rota = Tree::from_newick(newick_rota).unwrap();
//!
//! let rf = tree_orig.robinson_foulds(&tree_rota).unwrap();
//!
//! assert_eq!(rf, 0)
//! ```
//!
//! ## Computing distances between nodes in a tree
//! We can get the distance (either as number of edges or sum of edge lengths) betweem
//! nodes of the tree as well as compute the whole phyhlogenetic distance matrix
//! of a tree.
//! ```
//! use phylotree::tree::Tree;
//!
//! // The following tree is encoded by the newick string:
//! // |
//! // +----+----+
//! // | |
//! // 0.3 |
//! // | |
//! // | 0.6
//! // --+-- |
//! // | | |
//! // 0.2 0.2 |
//! // | | ---+---
//! // T3 T1 | |
//! // | |
//! // 0.4 0.5
//! // | |
//! // | |
//! // T2 |
//! // T0
//!
//! let newick = "((T3:0.2,T1:0.2):0.3,(T2:0.4,T0:0.5):0.6);";
//! let tree = Tree::from_newick(newick).unwrap();
//!
//! let t0 = tree.get_by_name("T0").unwrap();
//! let t3 = tree.get_by_name("T3").unwrap();
//!
//! let (edge_sum, num_edges) = tree.get_distance(&t0.id, &t3.id).unwrap();
//!
//! assert_eq!(num_edges, 4);
//! assert_eq!(edge_sum, Some(0.5 + 0.6 + 0.3 + 0.2));
//!
//! // Compute the whole distance matrix
//! let matrix = tree.distance_matrix_recursive().unwrap();
//! let phylip="\
//! 4
//! T0 0 1.6 0.9 1.6
//! T1 1.6 0 1.5 0.4
//! T2 0.9 1.5 0 1.5
//! T3 1.6 0.4 1.5 0
//! ";
//!
//! assert_eq!(matrix.to_phylip(true).unwrap(), phylip)
//! ```
//!
use VecDeque;
use ValueEnum;
use ;
use *;
use ;
// type Error = Box<dyn std::error::Error>;
// type Result<T> = std::result::Result<T, Error>;
/// Shape of random trees to generate
/// Genereates a random binary tree of a given size.
/// Generate a random binary tree under the Yule model.
/// Generates a caterpillar tree by adding children to the last node addesd to the tree
/// until we reach the desired numebr of leaves.