use super::{
CustomFlatUnixFs, DirBuilder, Entry, Leaf, NamedLeaf, TreeConstructionFailed, TreeOptions,
};
use cid::Cid;
use std::collections::HashMap;
use std::fmt;
pub struct PostOrderIterator {
full_path: String,
old_depth: usize,
block_buffer: Vec<u8>,
pending: Vec<Visited>,
persisted_cids: HashMap<u64, Vec<Option<NamedLeaf>>>,
reused_children: Vec<Visited>,
cid: Option<Cid>,
total_size: u64,
opts: TreeOptions,
}
type Leaves = Vec<Option<NamedLeaf>>;
#[derive(Debug)]
enum Visited {
DescentRoot(DirBuilder),
Descent {
node: DirBuilder,
name: String,
depth: usize,
index: usize,
},
Post {
parent_id: u64,
depth: usize,
name: String,
index: usize,
leaves: LeafStorage,
},
PostRoot {
leaves: LeafStorage,
},
}
impl PostOrderIterator {
pub(super) fn new(root: DirBuilder, opts: TreeOptions, longest_path: usize) -> Self {
let root = Visited::DescentRoot(root);
PostOrderIterator {
full_path: String::with_capacity(longest_path),
old_depth: 0,
block_buffer: Default::default(),
pending: vec![root],
persisted_cids: Default::default(),
reused_children: Vec::new(),
cid: None,
total_size: 0,
opts,
}
}
fn render_directory(
links: &[Option<NamedLeaf>],
buffer: &mut Vec<u8>,
block_size_limit: &Option<u64>,
) -> Result<Leaf, TreeConstructionFailed> {
use crate::pb::{UnixFs, UnixFsType};
use quick_protobuf::{BytesWriter, MessageWrite, Writer};
use sha2::{Digest, Sha256};
let node = CustomFlatUnixFs {
links,
data: UnixFs {
Type: UnixFsType::Directory,
..Default::default()
},
};
let size = node.get_size();
if let Some(limit) = block_size_limit {
let size = size as u64;
if *limit < size {
return Err(TreeConstructionFailed::TooLargeBlock(size));
}
}
let cap = buffer.capacity();
if let Some(additional) = size.checked_sub(cap) {
buffer.reserve(additional);
}
if let Some(mut needed_zeroes) = size.checked_sub(buffer.len()) {
let zeroes = [0; 8];
while needed_zeroes > 8 {
buffer.extend_from_slice(&zeroes[..]);
needed_zeroes -= zeroes.len();
}
buffer.extend(std::iter::repeat(0).take(needed_zeroes));
}
let mut writer = Writer::new(BytesWriter::new(&mut buffer[..]));
node.write_message(&mut writer)
.map_err(TreeConstructionFailed::Protobuf)?;
buffer.truncate(size);
let mh = multihash::wrap(multihash::Code::Sha2_256, &Sha256::digest(&buffer));
let cid = Cid::new_v0(mh).expect("sha2_256 is the correct multihash for cidv0");
let combined_from_links = links
.iter()
.map(|opt| {
opt.as_ref()
.map(|NamedLeaf(_, _, total_size)| total_size)
.unwrap()
})
.sum::<u64>();
Ok(Leaf {
link: cid,
total_size: buffer.len() as u64 + combined_from_links,
})
}
pub fn next_borrowed(&mut self) -> Option<Result<TreeNode<'_>, TreeConstructionFailed>> {
while let Some(visited) = self.pending.pop() {
let (name, depth) = match &visited {
Visited::DescentRoot(_) => (None, 0),
Visited::Descent { name, depth, .. } => (Some(name.as_ref()), *depth),
Visited::Post { name, depth, .. } => (Some(name.as_ref()), *depth),
Visited::PostRoot { .. } => (None, 0),
};
update_full_path((&mut self.full_path, &mut self.old_depth), name, depth);
match visited {
Visited::DescentRoot(node) => {
let children = &mut self.reused_children;
let leaves = partition_children_leaves(depth, node.nodes.into_iter(), children);
let any_children = !children.is_empty();
let leaves = if any_children {
self.persisted_cids.insert(node.id, leaves);
LeafStorage::from(node.id)
} else {
leaves.into()
};
self.pending.push(Visited::PostRoot { leaves });
self.pending.extend(children.drain(..));
}
Visited::Descent {
node,
name,
depth,
index,
} => {
let children = &mut self.reused_children;
let leaves = partition_children_leaves(depth, node.nodes.into_iter(), children);
let any_children = !children.is_empty();
let parent_id = node.parent_id.expect("only roots parent_id is None");
let leaves = if any_children {
self.persisted_cids.insert(node.id, leaves);
node.id.into()
} else {
leaves.into()
};
self.pending.push(Visited::Post {
parent_id,
name,
depth,
leaves,
index,
});
self.pending.extend(children.drain(..));
}
Visited::Post {
parent_id,
name,
leaves,
index,
..
} => {
let leaves = leaves.into_inner(&mut self.persisted_cids);
let buffer = &mut self.block_buffer;
let leaf = match Self::render_directory(
&leaves,
buffer,
&self.opts.block_size_limit,
) {
Ok(leaf) => leaf,
Err(e) => return Some(Err(e)),
};
self.cid = Some(leaf.link.clone());
self.total_size = leaf.total_size;
{
let parent_leaves = self.persisted_cids.get_mut(&parent_id);
match (parent_id, parent_leaves, index) {
(pid, None, index) => panic!(
"leaves not found for parent_id = {} and index = {}",
pid, index
),
(_, Some(vec), index) => {
let cell = &mut vec[index];
assert!(cell.is_none());
*cell = Some(NamedLeaf(name, leaf.link, leaf.total_size));
}
}
}
return Some(Ok(TreeNode {
path: self.full_path.as_str(),
cid: self.cid.as_ref().unwrap(),
total_size: self.total_size,
block: &self.block_buffer,
}));
}
Visited::PostRoot { leaves } => {
let leaves = leaves.into_inner(&mut self.persisted_cids);
if !self.opts.wrap_with_directory {
break;
}
let buffer = &mut self.block_buffer;
let leaf = match Self::render_directory(
&leaves,
buffer,
&self.opts.block_size_limit,
) {
Ok(leaf) => leaf,
Err(e) => return Some(Err(e)),
};
self.cid = Some(leaf.link.clone());
self.total_size = leaf.total_size;
return Some(Ok(TreeNode {
path: self.full_path.as_str(),
cid: self.cid.as_ref().unwrap(),
total_size: self.total_size,
block: &self.block_buffer,
}));
}
}
}
None
}
}
impl Iterator for PostOrderIterator {
type Item = Result<OwnedTreeNode, TreeConstructionFailed>;
fn next(&mut self) -> Option<Self::Item> {
self.next_borrowed()
.map(|res| res.map(TreeNode::into_owned))
}
}
pub struct TreeNode<'a> {
pub path: &'a str,
pub cid: &'a Cid,
pub total_size: u64,
pub block: &'a [u8],
}
impl<'a> fmt::Debug for TreeNode<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("TreeNode")
.field("path", &format_args!("{:?}", self.path))
.field("cid", &format_args!("{}", self.cid))
.field("total_size", &self.total_size)
.field("size", &self.block.len())
.finish()
}
}
impl TreeNode<'_> {
pub fn into_owned(self) -> OwnedTreeNode {
OwnedTreeNode {
path: self.path.to_owned(),
cid: self.cid.to_owned(),
total_size: self.total_size,
block: self.block.into(),
}
}
}
pub struct OwnedTreeNode {
pub path: String,
pub cid: Cid,
pub total_size: u64,
pub block: Box<[u8]>,
}
fn update_full_path(
(full_path, old_depth): (&mut String, &mut usize),
name: Option<&str>,
depth: usize,
) {
if depth < 2 {
full_path.clear();
*old_depth = 0;
} else {
while *old_depth >= depth && *old_depth > 0 {
let slash_at = full_path.bytes().rposition(|ch| ch == b'/');
if let Some(slash_at) = slash_at {
if *old_depth == depth && Some(&full_path[(slash_at + 1)..]) == name {
return;
}
full_path.truncate(slash_at);
*old_depth -= 1;
} else {
todo!(
"no last slash_at in {:?} yet {} >= {}",
full_path,
old_depth,
depth
);
}
}
}
debug_assert!(*old_depth <= depth);
if let Some(name) = name {
if !full_path.is_empty() {
full_path.push_str("/");
}
full_path.push_str(name);
*old_depth += 1;
}
assert_eq!(*old_depth, depth);
}
fn partition_children_leaves(
depth: usize,
it: impl Iterator<Item = (String, Entry)>,
children: &mut Vec<Visited>,
) -> Leaves {
let mut leaves = Vec::new();
for (i, (k, v)) in it.enumerate() {
match v {
Entry::Directory(node) => {
children.push(Visited::Descent {
node,
name: k,
depth: depth + 1,
index: i,
});
leaves.push(None);
}
Entry::Leaf(leaf) => leaves.push(Some(NamedLeaf(k, leaf.link, leaf.total_size))),
}
}
leaves
}
#[derive(Debug)]
enum LeafStorage {
Direct(Leaves),
Stashed(u64),
}
impl LeafStorage {
fn into_inner(self, stash: &mut HashMap<u64, Leaves>) -> Leaves {
use LeafStorage::*;
match self {
Direct(leaves) => leaves,
Stashed(id) => stash
.remove(&id)
.ok_or(id)
.expect("leaves are either stashed or direct, must able to find with id"),
}
}
}
impl From<u64> for LeafStorage {
fn from(key: u64) -> LeafStorage {
LeafStorage::Stashed(key)
}
}
impl From<Leaves> for LeafStorage {
fn from(leaves: Leaves) -> LeafStorage {
LeafStorage::Direct(leaves)
}
}