1use super::{
2 CustomFlatUnixFs, DirBuilder, Entry, Leaf, NamedLeaf, TreeConstructionFailed, TreeOptions,
3};
4use core::fmt;
5use ipld_core::cid::Cid;
6use multihash::Multihash;
7use multihash_codetable::Code;
8use std::collections::HashMap;
9
10pub struct PostOrderIterator {
15 full_path: String,
16 old_depth: usize,
17 block_buffer: Vec<u8>,
18 pending: Vec<Visited>,
20 persisted_cids: HashMap<u64, Vec<Option<NamedLeaf>>>,
23 reused_children: Vec<Visited>,
24 cid: Option<Cid>,
25 total_size: u64,
26 opts: TreeOptions,
28}
29
30type Leaves = Vec<Option<NamedLeaf>>;
35
36#[derive(Debug)]
42enum Visited {
43 DescentRoot(DirBuilder),
45 Descent {
46 node: DirBuilder,
47 name: String,
48 depth: usize,
49 index: usize,
51 },
52 Post {
53 parent_id: u64,
54 depth: usize,
55 name: String,
56 index: usize,
57 leaves: LeafStorage,
60 },
61 PostRoot {
62 leaves: LeafStorage,
63 },
64}
65
66impl PostOrderIterator {
67 pub(super) fn new(root: DirBuilder, opts: TreeOptions, longest_path: usize) -> Self {
68 let root = Visited::DescentRoot(root);
69 PostOrderIterator {
70 full_path: String::with_capacity(longest_path),
71 old_depth: 0,
72 block_buffer: Default::default(),
73 pending: vec![root],
74 persisted_cids: Default::default(),
75 reused_children: Vec::new(),
76 cid: None,
77 total_size: 0,
78 opts,
79 }
80 }
81
82 fn render_directory(
83 links: &[Option<NamedLeaf>],
84 buffer: &mut Vec<u8>,
85 block_size_limit: &Option<u64>,
86 ) -> Result<Leaf, TreeConstructionFailed> {
87 use crate::pb::{UnixFs, UnixFsType};
88 use quick_protobuf::{BytesWriter, MessageWrite, Writer};
89 use sha2::{Digest, Sha256};
90
91 let node = CustomFlatUnixFs {
106 links,
107 data: UnixFs {
108 Type: UnixFsType::Directory,
109 ..Default::default()
110 },
111 };
112
113 let size = node.get_size();
114
115 if let Some(limit) = block_size_limit {
116 let size = size as u64;
117 if *limit < size {
118 return Err(TreeConstructionFailed::TooLargeBlock(size));
120 }
121 }
122
123 let cap = buffer.capacity();
124
125 if let Some(additional) = size.checked_sub(cap) {
126 buffer.reserve(additional);
127 }
128
129 if let Some(mut needed_zeroes) = size.checked_sub(buffer.len()) {
130 let zeroes = [0; 8];
131
132 while needed_zeroes > 8 {
133 buffer.extend_from_slice(&zeroes[..]);
134 needed_zeroes -= zeroes.len();
135 }
136
137 buffer.extend(core::iter::repeat(0).take(needed_zeroes));
138 }
139
140 let mut writer = Writer::new(BytesWriter::new(&mut buffer[..]));
141 node.write_message(&mut writer)
142 .map_err(TreeConstructionFailed::Protobuf)?;
143
144 buffer.truncate(size);
145
146 #[allow(clippy::needless_borrows_for_generic_args)]
147 let mh = Multihash::wrap(Code::Sha2_256.into(), &Sha256::digest(&buffer)).unwrap();
148 let cid = Cid::new_v0(mh).expect("sha2_256 is the correct multihash for cidv0");
149
150 let combined_from_links = links
151 .iter()
152 .map(|opt| {
153 opt.as_ref()
154 .map(|NamedLeaf(_, _, total_size)| total_size)
155 .unwrap()
156 })
157 .sum::<u64>();
158
159 Ok(Leaf {
160 link: cid,
161 total_size: buffer.len() as u64 + combined_from_links,
162 })
163 }
164
165 pub fn next_borrowed(&mut self) -> Option<Result<TreeNode<'_>, TreeConstructionFailed>> {
169 while let Some(visited) = self.pending.pop() {
170 let (name, depth) = match &visited {
171 Visited::DescentRoot(_) => (None, 0),
172 Visited::Descent { name, depth, .. } => (Some(name.as_ref()), *depth),
173 Visited::Post { name, depth, .. } => (Some(name.as_ref()), *depth),
174 Visited::PostRoot { .. } => (None, 0),
175 };
176
177 update_full_path((&mut self.full_path, &mut self.old_depth), name, depth);
178
179 match visited {
180 Visited::DescentRoot(node) => {
181 let children = &mut self.reused_children;
182 let leaves = partition_children_leaves(depth, node.nodes.into_iter(), children);
183 let any_children = !children.is_empty();
184
185 let leaves = if any_children {
186 self.persisted_cids.insert(node.id, leaves);
187 LeafStorage::from(node.id)
188 } else {
189 leaves.into()
190 };
191
192 self.pending.push(Visited::PostRoot { leaves });
193 self.pending.append(children);
194 }
195 Visited::Descent {
196 node,
197 name,
198 depth,
199 index,
200 } => {
201 let children = &mut self.reused_children;
202 let leaves = partition_children_leaves(depth, node.nodes.into_iter(), children);
203 let any_children = !children.is_empty();
204 let parent_id = node.parent_id.expect("only roots parent_id is None");
205
206 let leaves = if any_children {
207 self.persisted_cids.insert(node.id, leaves);
208 node.id.into()
209 } else {
210 leaves.into()
211 };
212
213 self.pending.push(Visited::Post {
214 parent_id,
215 name,
216 depth,
217 leaves,
218 index,
219 });
220
221 self.pending.append(children);
222 }
223 Visited::Post {
224 parent_id,
225 name,
226 leaves,
227 index,
228 ..
229 } => {
230 let leaves = leaves.into_inner(&mut self.persisted_cids);
231 let buffer = &mut self.block_buffer;
232
233 let leaf = match Self::render_directory(
234 &leaves,
235 buffer,
236 &self.opts.block_size_limit,
237 ) {
238 Ok(leaf) => leaf,
239 Err(e) => return Some(Err(e)),
240 };
241
242 self.cid = Some(leaf.link);
243 self.total_size = leaf.total_size;
244
245 {
246 let parent_leaves = self.persisted_cids.get_mut(&parent_id);
249
250 match (parent_id, parent_leaves, index) {
251 (pid, None, index) => {
252 panic!("leaves not found for parent_id = {pid} and index = {index}")
253 }
254 (_, Some(vec), index) => {
255 let cell = &mut vec[index];
256 assert!(cell.is_none());
258 *cell = Some(NamedLeaf(name, leaf.link, leaf.total_size));
259 }
260 }
261 }
262
263 return Some(Ok(TreeNode {
264 path: self.full_path.as_str(),
265 cid: self.cid.as_ref().unwrap(),
266 total_size: self.total_size,
267 block: &self.block_buffer,
268 }));
269 }
270 Visited::PostRoot { leaves } => {
271 let leaves = leaves.into_inner(&mut self.persisted_cids);
272
273 if !self.opts.wrap_with_directory {
274 break;
275 }
276
277 let buffer = &mut self.block_buffer;
278
279 let leaf = match Self::render_directory(
280 &leaves,
281 buffer,
282 &self.opts.block_size_limit,
283 ) {
284 Ok(leaf) => leaf,
285 Err(e) => return Some(Err(e)),
286 };
287
288 self.cid = Some(leaf.link);
289 self.total_size = leaf.total_size;
290
291 return Some(Ok(TreeNode {
292 path: self.full_path.as_str(),
293 cid: self.cid.as_ref().unwrap(),
294 total_size: self.total_size,
295 block: &self.block_buffer,
296 }));
297 }
298 }
299 }
300 None
301 }
302}
303
304impl Iterator for PostOrderIterator {
305 type Item = Result<OwnedTreeNode, TreeConstructionFailed>;
306
307 fn next(&mut self) -> Option<Self::Item> {
308 self.next_borrowed()
309 .map(|res| res.map(TreeNode::into_owned))
310 }
311}
312
313pub struct TreeNode<'a> {
315 pub path: &'a str,
317 pub cid: &'a Cid,
319 pub total_size: u64,
321 pub block: &'a [u8],
323}
324
325impl<'a> fmt::Debug for TreeNode<'a> {
326 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
327 fmt.debug_struct("TreeNode")
328 .field("path", &format_args!("{:?}", self.path))
329 .field("cid", &format_args!("{}", self.cid))
330 .field("total_size", &self.total_size)
331 .field("size", &self.block.len())
332 .finish()
333 }
334}
335
336impl TreeNode<'_> {
337 pub fn into_owned(self) -> OwnedTreeNode {
339 OwnedTreeNode {
340 path: self.path.to_owned(),
341 cid: self.cid.to_owned(),
342 total_size: self.total_size,
343 block: self.block.into(),
344 }
345 }
346}
347
348pub struct OwnedTreeNode {
350 pub path: String,
352 pub cid: Cid,
354 pub total_size: u64,
356 pub block: Box<[u8]>,
358}
359
360fn update_full_path(
361 (full_path, old_depth): (&mut String, &mut usize),
362 name: Option<&str>,
363 depth: usize,
364) {
365 if depth < 2 {
366 full_path.clear();
369 *old_depth = 0;
370 } else {
371 while *old_depth >= depth && *old_depth > 0 {
372 let slash_at = full_path.bytes().rposition(|ch| ch == b'/');
375 if let Some(slash_at) = slash_at {
376 if *old_depth == depth && Some(&full_path[(slash_at + 1)..]) == name {
377 return;
380 }
381 full_path.truncate(slash_at);
382 *old_depth -= 1;
383 } else {
384 todo!(
385 "no last slash_at in {:?} yet {} >= {}",
386 full_path,
387 old_depth,
388 depth
389 );
390 }
391 }
392 }
393
394 debug_assert!(*old_depth <= depth);
395
396 if let Some(name) = name {
397 if !full_path.is_empty() {
398 full_path.push('/');
399 }
400 full_path.push_str(name);
401 *old_depth += 1;
402 }
403
404 assert_eq!(*old_depth, depth);
405}
406
407fn partition_children_leaves(
410 depth: usize,
411 it: impl Iterator<Item = (String, Entry)>,
412 children: &mut Vec<Visited>,
413) -> Leaves {
414 let mut leaves = Vec::new();
415
416 for (i, (k, v)) in it.enumerate() {
417 match v {
418 Entry::Directory(node) => {
419 children.push(Visited::Descent {
420 node,
421 name: k,
423 depth: depth + 1,
424 index: i,
425 });
426
427 leaves.push(None);
429 }
430 Entry::Leaf(leaf) => leaves.push(Some(NamedLeaf(k, leaf.link, leaf.total_size))),
431 }
432 }
433
434 leaves
435}
436
437#[derive(Debug)]
438enum LeafStorage {
439 Direct(Leaves),
440 Stashed(u64),
441}
442
443impl LeafStorage {
444 fn into_inner(self, stash: &mut HashMap<u64, Leaves>) -> Leaves {
445 use LeafStorage::*;
446
447 match self {
448 Direct(leaves) => leaves,
449 Stashed(id) => stash
450 .remove(&id)
451 .ok_or(id)
452 .expect("leaves are either stashed or direct, must able to find with id"),
453 }
454 }
455}
456
457impl From<u64> for LeafStorage {
458 fn from(key: u64) -> LeafStorage {
459 LeafStorage::Stashed(key)
460 }
461}
462
463impl From<Leaves> for LeafStorage {
464 fn from(leaves: Leaves) -> LeafStorage {
465 LeafStorage::Direct(leaves)
466 }
467}