ipfs-unixfs 0.2.0

UnixFs tree support
Documentation
use cid::Cid;

use crate::pb::{FlatUnixFs, PBLink, UnixFs, UnixFsType};
use alloc::borrow::Cow;
use core::fmt;
use quick_protobuf::{MessageWrite, Writer};

use sha2::{Digest, Sha256};

/// File tree builder. Implements [`core::default::Default`] which tracks the recent defaults.
///
/// Custom file tree builder can be created with [`FileAdder::builder()`] and configuring the
/// chunker and collector.
///
/// Current implementation maintains an internal buffer for the block creation and uses a
/// non-customizable hash function to produce Cid version 0 links. Currently does not support
/// inline links.
#[derive(Default)]
pub struct FileAdder {
    chunker: Chunker,
    collector: Collector,
    block_buffer: Vec<u8>,
    // all unflushed links as a flat vec; this is compacted as we grow and need to create a link
    // block for the last N blocks, as decided by the collector.
    // FIXME: this is a cause of likely "accidentially quadratic" behaviour visible when adding a
    // large file and using a minimal chunk size. Could be that this must be moved to Collector to
    // help collector (or layout) to decide how this should be persisted.
    unflushed_links: Vec<Link>,
}

impl fmt::Debug for FileAdder {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            fmt,
            "FileAdder {{ chunker: {:?}, block_buffer: {}/{}, unflushed_links: {} }}",
            self.chunker,
            self.block_buffer.len(),
            self.block_buffer.capacity(),
            LinkFormatter(&self.unflushed_links),
        )
    }
}

struct LinkFormatter<'a>(&'a [Link]);

impl fmt::Display for LinkFormatter<'_> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        let mut iter = self.0.iter().peekable();

        write!(fmt, "[")?;

        let mut current = match iter.peek() {
            Some(Link { depth, .. }) => depth,
            None => return write!(fmt, "]"),
        };

        let mut count = 0;

        for Link {
            depth: next_depth, ..
        } in iter
        {
            if current == next_depth {
                count += 1;
            } else {
                write!(fmt, "{}: {}/", current, count)?;

                let steps_between = if current > next_depth {
                    current - next_depth
                } else {
                    next_depth - current
                };

                for _ in 0..steps_between - 1 {
                    write!(fmt, "0/")?;
                }
                count = 1;
                current = next_depth;
            }
        }

        write!(fmt, "{}: {}]", current, count)
    }
}

/// Represents an intermediate structure which will be serialized into link blocks as both PBLink
/// and UnixFs::blocksize. Also holds `depth`, which helps with compaction of the link blocks.
struct Link {
    /// Depth of this link. Zero is leaf, and anything above it is, at least for
    /// [`BalancedCollector`], the compacted link blocks.
    depth: usize,
    /// The link target
    target: Cid,
    /// Total size is dag-pb specific part of the link: aggregated size of the linked subtree.
    total_size: u64,
    /// File size is the unixfs specific blocksize for this link. In UnixFs link blocks, there is a
    /// UnixFs::blocksizes item for each link.
    file_size: u64,
}

impl fmt::Debug for Link {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt.debug_struct("Link")
            .field("depth", &self.depth)
            .field("target", &format_args!("{}", self.target))
            .field("total_size", &self.total_size)
            .field("file_size", &self.file_size)
            .finish()
    }
}

/// Convinience type to facilitate configuring [`FileAdder`]s.
#[derive(Default)]
pub struct FileAdderBuilder {
    chunker: Chunker,
    collector: Collector,
}

impl FileAdderBuilder {
    /// Configures the builder to use the given chunker.
    pub fn with_chunker(self, chunker: Chunker) -> Self {
        FileAdderBuilder { chunker, ..self }
    }

    /// Configures the builder to use the given collector or layout.
    pub fn with_collector(self, collector: impl Into<Collector>) -> Self {
        FileAdderBuilder {
            collector: collector.into(),
            ..self
        }
    }

    /// Returns a new FileAdder
    pub fn build(self) -> FileAdder {
        let FileAdderBuilder { chunker, collector } = self;

        FileAdder {
            chunker,
            collector,
            ..Default::default()
        }
    }
}

impl FileAdder {
    /// Returns a [`FileAdderBuilder`] for creating a non-default FileAdder.
    pub fn builder() -> FileAdderBuilder {
        FileAdderBuilder::default()
    }

    /// Returns the likely amount of buffering the file adding will work with best.
    ///
    /// When using the size based chunker and input larger than or equal to the hint is `push()`'ed
    /// to the chunker, the internal buffer will not be used.
    pub fn size_hint(&self) -> usize {
        self.chunker.size_hint()
    }

    /// Called to push new file bytes into the tree builder.
    ///
    /// Returns the newly created blocks (at most 2) and their respective Cids, and the amount of
    /// `input` consumed.
    pub fn push(&mut self, input: &[u8]) -> (impl Iterator<Item = (Cid, Vec<u8>)>, usize) {
        let (accepted, ready) = self.chunker.accept(input, &self.block_buffer);

        if self.block_buffer.is_empty() && ready {
            // save single copy as the caller is giving us whole chunks.
            //
            // TODO: though, this path does make one question if there is any point in keeping
            // block_buffer and chunker here; perhaps FileAdder should only handle pre-chunked
            // blocks and user takes care of chunking (and buffering)?
            //
            // cat file | my_awesome_chunker | my_brilliant_collector
            let leaf = Self::flush_buffered_leaf(accepted, &mut self.unflushed_links, false);
            assert!(leaf.is_some(), "chunk completed, must produce a new block");
            self.block_buffer.clear();
            let links = self.flush_buffered_links(false);
            (leaf.into_iter().chain(links.into_iter()), accepted.len())
        } else {
            // slower path as we manage the buffer.

            if self.block_buffer.capacity() == 0 {
                // delay the internal buffer creation until this point, as the caller clearly wants
                // to use it.
                self.block_buffer.reserve(self.size_hint());
            }

            self.block_buffer.extend_from_slice(accepted);
            let written = accepted.len();

            let (leaf, links) = if !ready {
                // a new block did not become ready, which means we couldn't have gotten a new cid.
                (None, Vec::new())
            } else {
                // a new leaf must be output, as well as possibly a new link block
                let leaf = Self::flush_buffered_leaf(
                    self.block_buffer.as_slice(),
                    &mut self.unflushed_links,
                    false,
                );
                assert!(leaf.is_some(), "chunk completed, must produce a new block");
                self.block_buffer.clear();
                let links = self.flush_buffered_links(false);

                (leaf, links)
            };
            (leaf.into_iter().chain(links.into_iter()), written)
        }
    }

    /// Called after the last [`FileAdder::push`] to finish the tree construction.
    ///
    /// Returns a list of Cids and their respective blocks.
    ///
    /// Note: the API will hopefully evolve in a direction which will not allocate a new Vec for
    /// every block in the near-ish future.
    pub fn finish(mut self) -> impl Iterator<Item = (Cid, Vec<u8>)> {
        let last_leaf = Self::flush_buffered_leaf(
            &self.block_buffer.as_slice(),
            &mut self.unflushed_links,
            true,
        );
        let root_links = self.flush_buffered_links(true);
        // should probably error if there is neither?
        last_leaf.into_iter().chain(root_links.into_iter())
    }

    /// Returns `None` when the input is empty but there are links, otherwise a new Cid and a
    /// block.
    fn flush_buffered_leaf(
        input: &[u8],
        unflushed_links: &mut Vec<Link>,
        finishing: bool,
    ) -> Option<(Cid, Vec<u8>)> {
        if input.is_empty() && (!finishing || !unflushed_links.is_empty()) {
            return None;
        }

        // for empty unixfs file the bytes is missing but filesize is present.

        let data = if !input.is_empty() {
            Some(Cow::Borrowed(input))
        } else {
            None
        };

        let filesize = Some(input.len() as u64);

        let inner = FlatUnixFs {
            links: Vec::new(),
            data: UnixFs {
                Type: UnixFsType::File,
                Data: data,
                filesize,
                // no blocksizes as there are no links
                ..Default::default()
            },
        };

        let (cid, vec) = render_and_hash(&inner);

        let total_size = vec.len();

        let link = Link {
            depth: 0,
            target: cid.clone(),
            total_size: total_size as u64,
            file_size: input.len() as u64,
        };

        unflushed_links.push(link);

        Some((cid, vec))
    }

    fn flush_buffered_links(&mut self, finishing: bool) -> Vec<(Cid, Vec<u8>)> {
        self.collector
            .flush_links(&mut self.unflushed_links, finishing)
    }

    /// Test helper for collecting all of the produced blocks; probably not a good idea outside
    /// smaller test cases. When `amt` is zero, the whole content is processed at the speed of
    /// chunker, otherwise `all_content` is pushed at `amt` sized slices with the idea of catching
    /// bugs in chunkers.
    #[cfg(test)]
    fn collect_blocks(mut self, all_content: &[u8], mut amt: usize) -> Vec<(Cid, Vec<u8>)> {
        let mut written = 0;
        let mut blocks_received = Vec::new();

        if amt == 0 {
            amt = all_content.len();
        }

        while written < all_content.len() {
            let end = written + (all_content.len() - written).min(amt);
            let slice = &all_content[written..end];

            let (blocks, pushed) = self.push(slice);
            blocks_received.extend(blocks);
            written += pushed;
        }

        let last_blocks = self.finish();
        blocks_received.extend(last_blocks);

        blocks_received
    }
}

fn render_and_hash(flat: &FlatUnixFs<'_>) -> (Cid, Vec<u8>) {
    // TODO: as shown in later dagger we don't really need to render the FlatUnixFs fully; we could
    // either just render a fixed header and continue with the body OR links, though the links are
    // a bit more complicated.
    let mut out = Vec::with_capacity(flat.get_size());
    let mut writer = Writer::new(&mut out);
    flat.write_message(&mut writer)
        .expect("unsure how this could fail");
    let mh = multihash::wrap(multihash::Code::Sha2_256, &Sha256::digest(&out));
    let cid = Cid::new_v0(mh).expect("sha2_256 is the correct multihash for cidv0");
    (cid, out)
}

/// Chunker strategy
#[derive(Debug, Clone)]
pub enum Chunker {
    /// Size based chunking
    Size(usize),
}

impl Default for Chunker {
    /// Returns a default chunker which matches go-ipfs 0.6
    fn default() -> Self {
        Chunker::Size(256 * 1024)
    }
}

impl Chunker {
    fn accept<'a>(&mut self, input: &'a [u8], buffered: &[u8]) -> (&'a [u8], bool) {
        use Chunker::*;

        match self {
            Size(max) => {
                let l = input.len().min(*max - buffered.len());
                let accepted = &input[..l];
                let ready = buffered.len() + l >= *max;
                (accepted, ready)
            }
        }
    }

    fn size_hint(&self) -> usize {
        use Chunker::*;

        match self {
            Size(max) => *max,
        }
    }
}

/// Collector or layout strategy. For more information, see the [Layout section of the spec].
/// Currently only the default balanced collector/layout has been implemented.
///
/// [Layout section of the spec]: https://github.com/ipfs/specs/blob/master/UNIXFS.md#layout
#[derive(Debug, Clone)]
pub enum Collector {
    /// Balanced trees.
    Balanced(BalancedCollector),
}

impl Default for Collector {
    fn default() -> Self {
        Collector::Balanced(Default::default())
    }
}

impl Collector {
    fn flush_links(&mut self, pending: &mut Vec<Link>, finishing: bool) -> Vec<(Cid, Vec<u8>)> {
        use Collector::*;

        match self {
            Balanced(bc) => bc.flush_links(pending, finishing),
        }
    }
}

/// BalancedCollector creates balanced UnixFs trees, most optimized for random access to different
/// parts of the file. Currently supports only link count threshold or the branching factor.
#[derive(Clone)]
pub struct BalancedCollector {
    branching_factor: usize,
    // reused between link block generation
    reused_links: Vec<PBLink<'static>>,
    // reused between link block generation
    reused_blocksizes: Vec<u64>,
}

impl fmt::Debug for BalancedCollector {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            fmt,
            "BalancedCollector {{ branching_factor: {} }}",
            self.branching_factor
        )
    }
}

impl Default for BalancedCollector {
    /// Returns a default collector which matches go-ipfs 0.6
    ///
    /// The origin for 174 is not described in the the [specs], but has likely to do something
    /// with being "good enough" regarding prefetching when reading and allows reusing some of the
    /// link blocks if parts of a longer file change.
    ///
    /// [specs]: https://github.com/ipfs/specs/blob/master/UNIXFS.md
    fn default() -> Self {
        Self::with_branching_factor(174)
    }
}

impl From<BalancedCollector> for Collector {
    fn from(b: BalancedCollector) -> Self {
        Collector::Balanced(b)
    }
}

impl BalancedCollector {
    /// Configure Balanced collector with the given branching factor.
    pub fn with_branching_factor(branching_factor: usize) -> Self {
        assert!(branching_factor > 0);

        Self {
            branching_factor,
            reused_links: Vec::new(),
            reused_blocksizes: Vec::new(),
        }
    }

    /// In-place compression of the `pending` links to a balanced hierarchy. When `finishing`, the
    /// links will be compressed iteratively from the lowest level to produce a single root link
    /// block.
    fn flush_links(&mut self, pending: &mut Vec<Link>, finishing: bool) -> Vec<(Cid, Vec<u8>)> {
        /*

        file    |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
        links-0 |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|E|F|G|
        links-1 |-------|-------|-------|-------|-B-----|-C-----|-D-----|\   /
        links-2 |-A-----------------------------|                         ^^^
              ^                                                     one short
               \--- link.depth

        pending [A, B, C, D, E, F, G]

        #flush_buffered_links(...) first iteration:

        file    |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
        links-0 |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|E|F|G|
        links-1 |-------|-------|-------|-------|-B-----|-C-----|-D-----|=#1==|
        links-2 |-A-----------------------------|

        pending [A, B, C, D, E, F, G] => [A, B, C, D, 1]

        new link block #1 is created for E, F, and G.

        #flush_buffered_links(...) second iteration:

        file    |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
        links-0 |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
        links-1 |-------|-------|-------|-------|-B-----|-C-----|-D-----|-#1--|
        links-2 |-A-----------------------------|=========================#2==|

        pending [A, B, C, D, 1] => [A, 2]

        new link block #2 is created for B, C, D, and #1.

        #flush_buffered_links(...) last iteration:

        file    |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
        links-0 |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
        links-1 |-------|-------|-------|-------|-------|-------|-------|-#1--|
        links-2 |-A-----------------------------|-------------------------#2--|
        links-3 |=========================================================#3==|

        pending [A, 2] => [3]

        new link block #3 is created for A, and #2. (the root block)
        */

        let mut ret = Vec::new();

        let mut reused_links = core::mem::take(&mut self.reused_links);
        let mut reused_blocksizes = core::mem::take(&mut self.reused_blocksizes);

        if let Some(need) = self.branching_factor.checked_sub(reused_links.capacity()) {
            reused_links.reserve(need);
        }

        if let Some(need) = self
            .branching_factor
            .checked_sub(reused_blocksizes.capacity())
        {
            reused_blocksizes.reserve(need);
        }

        'outer: for level in 0.. {
            if pending.len() == 1 && finishing
                || pending.len() <= self.branching_factor && !finishing
            {
                // when there is just a single linking block left and we are finishing, we are
                // done. It might not be part of the `ret` as will be the case with single chunk
                // files for example.
                //
                // normally when not finishing we do nothing if we don't have enough links.
                break;
            }

            // when finishing, we iterate the level to completion in blocks of
            // self.branching_factor and *insert* values at the offset of the first compressed
            // link. on following iterations this will be the index after the higher level index.
            let mut starting_point = 0;

            // when creating the link blocks, start overwriting the pending links at the first
            // found link for this depth. this index will be incremented for successive link
            // blocks.
            let mut last_overwrite = None;

            while let Some(mut first_at) = &pending[starting_point..]
                .iter()
                .position(|Link { depth, .. }| depth == &level)
            {
                // fix first_at as absolute index from being possible relative to the
                // starting_point
                first_at += starting_point;

                if !finishing && pending[first_at..].len() <= self.branching_factor {
                    if let Some(last_overwrite) = last_overwrite {
                        // drain any processed
                        pending.drain((last_overwrite + 1)..first_at);
                    }
                    break 'outer;
                }

                reused_links.clear();
                reused_blocksizes.clear();

                let mut nested_size = 0;
                let mut nested_total_size = 0;

                let last = (first_at + self.branching_factor).min(pending.len());

                for (index, link) in pending[first_at..last].iter().enumerate() {
                    assert_eq!(
                        link.depth,
                        level,
                        "unexpected link depth {} when searching at level {} index {}",
                        link.depth,
                        level,
                        index + first_at
                    );

                    Self::partition_link(
                        link,
                        &mut reused_links,
                        &mut reused_blocksizes,
                        &mut nested_size,
                        &mut nested_total_size,
                    );
                }

                debug_assert_eq!(reused_links.len(), reused_blocksizes.len());

                let inner = FlatUnixFs {
                    links: reused_links,
                    data: UnixFs {
                        Type: UnixFsType::File,
                        filesize: Some(nested_size),
                        blocksizes: reused_blocksizes,
                        ..Default::default()
                    },
                };

                let (cid, vec) = render_and_hash(&inner);

                // start overwriting at the first index of this level, then continue forward on
                // next iterations.
                let index = last_overwrite.map(|i| i + 1).unwrap_or(first_at);
                pending[index] = Link {
                    depth: level + 1,
                    target: cid.clone(),
                    total_size: nested_total_size + vec.len() as u64,
                    file_size: nested_size,
                };

                ret.push((cid, vec));

                reused_links = inner.links;
                reused_blocksizes = inner.data.blocksizes;

                starting_point = last;
                last_overwrite = Some(index);
            }

            if let Some(last_overwrite) = last_overwrite {
                pending.truncate(last_overwrite + 1);
            }

            // this holds regardless of finishing; we would had broken 'outer had there been less
            // than full blocks left.
            debug_assert_eq!(
                pending.iter().position(|l| l.depth == level),
                None,
                "should have no more of depth {}: {}",
                level,
                LinkFormatter(pending.as_slice())
            );
        }

        self.reused_links = reused_links;
        self.reused_blocksizes = reused_blocksizes;

        ret
    }

    /// Each link needs to be partitioned into the four mut arguments received by this function in
    /// order to produce the expected UnixFs output.
    fn partition_link(
        link: &Link,
        links: &mut Vec<PBLink<'static>>,
        blocksizes: &mut Vec<u64>,
        nested_size: &mut u64,
        nested_total_size: &mut u64,
    ) {
        links.push(PBLink {
            Hash: Some(link.target.to_bytes().into()),
            Name: Some("".into()),
            Tsize: Some(link.total_size),
        });
        blocksizes.push(link.file_size);
        *nested_size += link.file_size;
        *nested_total_size += link.total_size;
    }
}

#[cfg(test)]
mod tests {

    use super::{BalancedCollector, Chunker, FileAdder};
    use crate::test_support::FakeBlockstore;
    use cid::Cid;
    use core::convert::TryFrom;
    use hex_literal::hex;

    #[test]
    fn test_size_chunker() {
        assert_eq!(size_chunker_scenario(1, 4, 0), (1, true));
        assert_eq!(size_chunker_scenario(2, 4, 0), (2, true));
        assert_eq!(size_chunker_scenario(2, 1, 0), (1, false));
        assert_eq!(size_chunker_scenario(2, 1, 1), (1, true));
        assert_eq!(size_chunker_scenario(32, 3, 29), (3, true));
        // this took some debugging time:
        assert_eq!(size_chunker_scenario(32, 4, 29), (3, true));
    }

    fn size_chunker_scenario(max: usize, input_len: usize, existing_len: usize) -> (usize, bool) {
        let input = vec![0; input_len];
        let existing = vec![0; existing_len];

        let (accepted, ready) = Chunker::Size(max).accept(&input, &existing);
        (accepted.len(), ready)
    }

    #[test]
    fn favourite_single_block_file() {
        let blocks = FakeBlockstore::with_fixtures();
        // everyones favourite content
        let content = b"foobar\n";

        let mut adder = FileAdder::default();

        {
            let (mut ready_blocks, bytes) = adder.push(content);
            assert!(ready_blocks.next().is_none());
            assert_eq!(bytes, content.len());
        }

        // real impl would probably hash this ... except maybe hashing is faster when done inline?
        // or maybe not
        let (_, file_block) = adder
            .finish()
            .next()
            .expect("there must have been the root block");

        assert_eq!(
            blocks.get_by_str("QmRgutAxd8t7oGkSm4wmeuByG6M51wcTso6cubDdQtuEfL"),
            file_block.as_slice()
        );
    }

    #[test]
    fn favourite_multi_block_file() {
        // root should be QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6

        let blocks = FakeBlockstore::with_fixtures();
        let content = b"foobar\n";
        let adder = FileAdder::builder().with_chunker(Chunker::Size(2)).build();

        let blocks_received = adder.collect_blocks(content, 0);

        // the order here is "fo", "ob", "ar", "\n", root block
        // while verifying the root Cid would be *enough* this is easier to eyeball, ... not really
        // that much but ...
        let expected = [
            "QmfVyMoStzTvdnUR7Uotzh82gmL427q9z3xW5Y8fUoszi4",
            "QmdPyW4CWE3QBkgjWfjM5f7Tjb3HukxVuBXZtkqAGwsMnm",
            "QmNhDQpphvMWhdCzP74taRzXDaEfPGq8vWfFRzD7mEgePM",
            "Qmc5m94Gu7z62RC8waSKkZUrCCBJPyHbkpmGzEePxy2oXJ",
            "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6",
        ]
        .iter()
        .map(|key| {
            let cid = Cid::try_from(*key).unwrap();
            let block = blocks.get_by_str(key).to_vec();
            (cid, block)
        })
        .collect::<Vec<_>>();

        assert_eq!(blocks_received, expected);
    }

    #[test]
    fn three_layers() {
        let content = b"Lorem ipsum dolor sit amet, sit enim montes aliquam. Cras non lorem, \
            rhoncus condimentum, irure et ante. Pulvinar suscipit odio ante, et tellus a enim, \
            wisi ipsum, vel rhoncus eget faucibus varius, luctus turpis nibh vel odio nulla pede.";

        assert!(content.len() > 174 && content.len() < 2 * 174);

        // go-ipfs 0.5 result: QmRQ6NZNUs4JrCT2y7tmCC1wUhjqYuTssB8VXbbN3rMffg, 239 blocks and root
        // root has two links:
        //  - QmXUcuLGKc8SCMEqG4wgct6NKsSRZQfvB2FCfjDow1PfpB (174 links)
        //  - QmeEn8dxWTzGAFKvyXoLj4oWbh9putL4vSw4uhLXJrSZhs (63 links)
        //
        // in future, if we ever add inline Cid generation this test would need to be changed not
        // to use those inline cids or raw leaves
        let adder = FileAdder::builder().with_chunker(Chunker::Size(1)).build();

        let blocks_received = adder.collect_blocks(content, 0);

        assert_eq!(blocks_received.len(), 240);

        assert_eq!(
            blocks_received.last().unwrap().0.to_string(),
            "QmRQ6NZNUs4JrCT2y7tmCC1wUhjqYuTssB8VXbbN3rMffg"
        );
    }

    #[test]
    fn three_layers_all_subchunks() {
        let content = b"Lorem ipsum dolor sit amet, sit enim montes aliquam. Cras non lorem, \
            rhoncus condimentum, irure et ante. Pulvinar suscipit odio ante, et tellus a enim, \
            wisi ipsum, vel rhoncus eget faucibus varius, luctus turpis nibh vel odio nulla pede.";

        for amt in 1..32 {
            let adder = FileAdder::builder().with_chunker(Chunker::Size(32)).build();
            let blocks_received = adder.collect_blocks(content, amt);
            assert_eq!(
                blocks_received.last().unwrap().0.to_string(),
                "QmYSLcVQqxKygiq7x9w1XGYxU29EShB8ZemiaQ8GAAw17h",
                "amt: {}",
                amt
            );
        }
    }

    #[test]
    fn empty_file() {
        let blocks = FileAdder::default().collect_blocks(b"", 0);
        assert_eq!(blocks.len(), 1);
        // 0a == field dag-pb body (unixfs)
        // 04 == dag-pb body len, varint, 4 bytes
        // 08 == field type tag, varint, 1 byte
        // 02 == field type (File)
        // 18 == field filesize tag, varint
        // 00 == filesize, varint, 1 byte
        assert_eq!(blocks[0].1.as_slice(), &hex!("0a 04 08 02 18 00"));
        assert_eq!(
            blocks[0].0.to_string(),
            "QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH"
        );
    }

    #[test]
    fn full_link_block_and_a_byte() {
        let buf = vec![0u8; 2];

        // this should produce a root with two links
        //             +----------^---+
        //             |              |
        //  |----------------------| |-| <-- link blocks
        //   ^^^^^^^^^^^^^^^^^^^^^^   ^
        //          174 blocks        \--- 1 block

        let branching_factor = 174;

        let mut adder = FileAdder::builder()
            .with_chunker(Chunker::Size(2))
            .with_collector(BalancedCollector::with_branching_factor(branching_factor))
            .build();
        let mut blocks_count = 0;

        for _ in 0..branching_factor {
            let (blocks, written) = adder.push(buf.as_slice());
            assert_eq!(written, buf.len());

            blocks_count += blocks.count();
        }

        let (blocks, written) = adder.push(&buf[0..1]);
        assert_eq!(written, 1);
        blocks_count += blocks.count();

        let last_blocks = adder.finish().collect::<Vec<_>>();
        blocks_count += last_blocks.len();

        // chunks == 174
        // one link block for 174
        // one is for the single byte block
        // one is a link block for the singular single byte block
        // other is for the root block
        assert_eq!(blocks_count, branching_factor + 1 + 1 + 1 + 1);

        assert_eq!(
            last_blocks.last().unwrap().0.to_string(),
            "QmcHNWF1d56uCDSfJPA7t9fadZRV9we5HGSTGSmwuqmMP9"
        );
    }

    #[test]
    fn full_link_block() {
        let buf = vec![0u8; 1];

        let branching_factor = 174;

        let mut adder = FileAdder::builder()
            .with_chunker(Chunker::Size(1))
            .with_collector(BalancedCollector::with_branching_factor(branching_factor))
            .build();
        let mut blocks_count = 0;

        for _ in 0..branching_factor {
            let (blocks, written) = adder.push(buf.as_slice());
            assert_eq!(written, buf.len());

            blocks_count += blocks.count();
        }

        let mut last_blocks = adder.finish();

        // go-ipfs waits until finish to get a single link block, no additional root block

        let last_block = last_blocks.next().expect("must not have flushed yet");
        blocks_count += 1;

        assert_eq!(last_blocks.next(), None);

        assert_eq!(
            last_block.0.to_string(),
            "QmdgQac8c6Bo3MP5bHAg2yQ25KebFUsmkZFvyByYzf8UCB"
        );

        assert_eq!(blocks_count, 175);
    }
}