bab_rs 0.5.0

An implementation of the Bab family of hash functions, and its WILLIAM3 instantiation.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
//! Abstractions for storing Bab data, most importantly provides the [`StorageBackend`] trait.

use core::cmp::min;

#[cfg(feature = "dev")]
use arbitrary::Arbitrary;

use derive_more::{Display, Error};

use ufotofu::prelude::*;

use crate::generic::{BabDigest, BabInstantiation, Label, storage::units::*};

/// Backing storage for a single bab-addressed string.
///
/// You only need to care about this trait if you want to implement a custom persistent Bab backend. If you are fine using our provided backends, you can safely ignore the internals of this trait.
///
/// An instance of this trait provides access to a (conceptual) array of bytes, storing a [baseline verifiable slice stream](https://worm-blossom.github.io/bab/#baseline_slice) for some given start chunk index and slice length (in chunks). This trait neither enforces validity of the bytes, nor does it even keep track which bytes have already been set to correct data and which bytes do not contain valid data yet. All such tracking and verification happens at a higher level of abstraction — this trait is the simplemost useful storage provider that can make such higher levels of abstractions work.
///
/// In addition to the byte array of Bab data, the trait also provides a simple interface for storing metadata - in the form of a fixed-sized (size specified at creation time) sequence of bytes. The intention for this metadata is that higher-level abstractions building on top of a StorageBackend can track which parts of a slice (or slices) have been stored yet, persist an expected root hash, etc.
///
/// Because this trait is intended for persistent storage, but persisting updates is expensive, the insertion methods are not guaranted to be made fully persistent immediately. To ensure that all prior updates are fully written to disk, call the [`flush`](StorageBackend::flush) method.
///
/// Errors returned by any method are fatal (i.e., the backend should be considered unusable after an error), unless the method documentation explicitly states otherwise.
///
/// The [`create`](StorageBackend::create), [`load`](StorageBackend::load), and [`delete`](StorageBackend::delete) methods allow associating keys to storage backends. This is particularly important with persistent stores, where you need a way of retrieving a particular store from disk. The system is based on two associated types: [`Key`](StorageBackend::Key) is the type of keys by which individual backends can be addressed, and [`KeyState`](StorageBackend::KeyState) can track state across multiple creation, loading, and deletion operations (for example to memoise data stored on disk).
///
/// The default methods on this trait offer access in terms of node contents and chunks. Internally, they perform a conversion to raw byte offsets, with a [`StringInfo`] argument providing the context required to do the conversions. The methods further take const generic parameters `WIDTH` and `CHUNK_SIZE` to convey the Bab [width](https://worm-blossom.github.io/bab/#width) and [chunk size](https://worm-blossom.github.io/bab/#chunk_size) parameters respectively.
///
/// Finally, note that cloning a `StorageBackend` should always be cheap (typically achieved by using reference-counting). Code written to work with arbitrary `StorageBackends` is allowed to assume that cloning is cheap.
pub trait StorageBackend: Sized + Clone {
    /// The type of errors the backends might yield on any operation.
    type InternalError;

    /// The type of values by which individual storage backends can be identified. Used by [`create`](StorageBackend::create), [`load`](StorageBackend::load), and [`delete`](StorageBackend::delete).
    type Key;

    /// The type of the shared, mutable state to keep across calls to [`create`](StorageBackend::create), [`load`](StorageBackend::load), and [`delete`](StorageBackend::delete).
    type KeyState;

    /// Creates a new storage backend, later retrievable via [`StorageBackend::load`] by supplying an equal key to the same `key_state`. If there already existed a StorageBackend for the given key, it is overwritten with a fresh backend.
    ///
    /// The `capacity` specifies how many bytes of Bab data the created backend can store. This value cannot be adjusted later.
    ///
    /// The `metadata_len` specifies the number of bytes of metadata the created storage backend can manage. This value cannot be adjusted later.
    async fn create(
        key_state: &mut Self::KeyState,
        key: Self::Key,
        capacity: ByteCount,
        metadata_len: ByteCount,
    ) -> Result<Self, Self::InternalError>;

    /// Creates a handle to a preexisting storage backend, identified by the given `key`, or `None` if no backend has been [created](StorageBackend::create) (and not already [deleted](StorageBackend::delete)) for this key yet (in the same `key_state`).
    async fn load(
        key_state: &mut Self::KeyState,
        key: &Self::Key,
    ) -> Result<Option<Self>, Self::InternalError>;

    /// Deletes all persistent storage (and possibly even in-memory data) for the storage backend of the given key. All future operations on that backend will yield an [`OperationsError::StorageDeleted`] error. This will also affect clones of the backend, or backends [loaded](StorageBackend::load) with the same `key`.
    ///
    /// If, after deletion, a new storage is created for the same `key`, handles to the old, deleted storage *should* still report errors instead of operating on the new data, but this is not required. Implementations in which handles to deleted storage can become non-erroneous again should explicitly document that this is the case. This behaviour should only be implemented if guaranteeing errors on old handles would be too expensive.
    ///
    /// Returns an `InternalError` instead of an `OperationsError`; if the storage was already deleted in the past, this is reported as a success.
    async fn delete(
        key_state: &mut Self::KeyState,
        key: &Self::Key,
    ) -> Result<(), Self::InternalError>;

    /// Changes the data associated with one key in the given key_state to being associated with a different key.
    ///
    /// Does nothing if there is no storage backend associated with the first key (irrespective of whether there never was one or whether it was deleted).
    async fn rename(
        key_state: &mut Self::KeyState,
        old_key: &Self::Key,
        new_key: Self::Key,
    ) -> Result<(), Self::InternalError>;

    /// Returns the number of bab bytes this can store.
    async fn get_capacity(&mut self) -> Result<ByteCount, OperationsError<Self::InternalError>>;

    /// Fills `buf` with stored bytes, starting from the given offset in the storage.
    ///
    /// Panics if `offset + buf.len() >= self.get_capacity()`.
    async fn get_bytes(
        &mut self,
        offset: ByteIndex,
        buf: &mut [u8],
    ) -> Result<(), OperationsError<Self::InternalError>>;

    /// Fills `buf` with the data of the chunk of index `chunk_index`, assuming the storage is for a string of `chunk_count` many chunks, storing chunks starting at chunk index `start_offset`.
    ///
    /// Careful: the final chunk might be shorter than `CHUNK_SIZE`. Calling this method with the `chunk_index` of the final chunk and a `buf` whose length exceeeds the length of the final chunk may exhibit unspecified behaviour (most likely, it will panic).
    async fn get_chunk<const WIDTH: usize, const CHUNK_SIZE: usize>(
        &mut self,
        chunk_index: ChunkIndex,
        string_info: StringInfo,
        buf: &mut [u8],
    ) -> Result<(), OperationsError<Self::InternalError>> {
        let start_byte_index = chunk_to_byte_index::<WIDTH, CHUNK_SIZE>(
            chunk_index,
            string_info.chunk_count,
            string_info.start_offset,
        );

        return self.get_bytes(start_byte_index, buf).await;
    }

    /// Fills `left` and `right` with the node content at the given `node_number`, assuming the storage is for a string of `chunk_count` many chunks, storing chunks starting at chunk index `start_offset`. Will fill `left` and `right` with unspecified data if the `node_number` does not correspond to an inner node.
    ///
    /// Surprisingly enough, the label of the left child is written into `left`, and the label of the right child is written into `right`.
    async fn get_node_contents<const WIDTH: usize, const CHUNK_SIZE: usize>(
        &mut self,
        node_number: NodeNumber,
        string_info: StringInfo,
        left: &mut Label<WIDTH>,
        right: &mut Label<WIDTH>,
    ) -> Result<(), OperationsError<Self::InternalError>> {
        let start_byte_index = node_number_to_byte_index::<WIDTH, CHUNK_SIZE>(
            node_number,
            string_info.chunk_count,
            string_info.start_offset,
        );

        // Wish this could be stack-allocated, but arrays of size `2 * WIDTH` are not possible on stable rust yet.
        let mut buf = vec![0; 2 * WIDTH];

        self.get_bytes(start_byte_index, &mut buf[..]).await?;

        left.copy_from_slice(&buf[..WIDTH]);
        right.copy_from_slice(&buf[WIDTH..]);

        return Ok(());
    }

    /// Writes the bytes of a (sub-) slice of the stored string into the given bulk consumer of bytes. Returns how many bytes were written.
    ///
    /// This method only writes original string data, but no Merkle tree node labels.
    async fn get_slice<const WIDTH: usize, const CHUNK_SIZE: usize, C>(
        &mut self,
        c: &mut C,
        slice_start: ByteIndex,
        slice_length: ByteCount,
        string_info: StringInfo,
        string_len: ByteCount,
    ) -> Result<ByteCount, WriteToConsumerError<Self::InternalError, C::Error>>
    where
        C: BulkConsumer<Item = u8>,
    {
        let mut gotten = 0;

        while gotten < slice_length {
            // Process one chunk per iteration of the loop.
            let current_position_in_string = slice_start + gotten;

            let chunk_index = current_position_in_string / (CHUNK_SIZE as u64);
            let mut chunk = [0u8; CHUNK_SIZE];
            let chunk_len = if chunk_index == string_info.chunk_count - 1 {
                (string_len as usize) % CHUNK_SIZE
            } else {
                CHUNK_SIZE
            };

            self.get_chunk::<WIDTH, CHUNK_SIZE>(chunk_index, string_info, &mut chunk[..chunk_len])
                .await
                .map_err(|err| WriteToConsumerError::StorageError(err))?;

            let start_in_chunk = current_position_in_string % (CHUNK_SIZE as u64);
            let end_in_chunk =
                start_in_chunk + min(slice_length - gotten, CHUNK_SIZE as u64 - start_in_chunk);

            debug_assert!(start_in_chunk < end_in_chunk);
            debug_assert!(end_in_chunk <= CHUNK_SIZE as u64);

            c.bulk_consume_full_slice(&chunk[start_in_chunk as usize..end_in_chunk as usize])
                .await
                .map_err(|err| WriteToConsumerError::ConsumerError(err.into_reason()))?;

            gotten += end_in_chunk - start_in_chunk;
        }

        Ok(gotten)
    }

    // /// Reserves more capacity at the end of the stored array.
    // ///
    // /// The argument specifies how much capacity to add to the existing capacity (i.e., *not* the new total).
    // ///
    // /// If this returns `Err(None)`, then the error is not fatal: allocating more capacity was not possible, so the capacity remained unchanged, and the storage remains operational.
    // async fn reserve_capacity(
    //     &mut self,
    //     additional_capacity: ByteCount,
    // ) -> Result<(), Option<OperationsError<Self::InternalError>>>;

    /// Sets bytes in the storage backend to the given slice, starting at the given offset.
    ///
    /// Panics if `offset + new_data.len() >= self.get_capacity()`.
    async fn set_bytes(
        &mut self,
        offset: ByteIndex,
        new_data: &[u8],
    ) -> Result<(), OperationsError<Self::InternalError>>;

    /// Writes into the storage backend the node contents of the inner node `node_number`, assuming the backend is for a string of `chunk_count` many chunks, storing chunks starting at chunk index `start_offset`.
    ///
    /// Panics if `node_number` does not correspond to an inner node, or the node number does not fit into the allocated storage backend.
    async fn set_inner_node_contents<const WIDTH: usize, const CHUNK_SIZE: usize>(
        &mut self,
        node_number: NodeNumber,
        string_info: StringInfo,
        left: &Label<WIDTH>,
        right: &Label<WIDTH>,
    ) -> Result<(), OperationsError<Self::InternalError>> {
        let start_byte_index = node_number_to_byte_index::<WIDTH, CHUNK_SIZE>(
            node_number,
            string_info.chunk_count,
            string_info.start_offset,
        );

        self.set_bytes(start_byte_index, left).await?;
        self.set_bytes(start_byte_index + (WIDTH as u64), right)
            .await?;

        Ok(())
    }

    /// Writes into the storage backend the chunk data of a chunk at the given `chunk_index`, assuming the backend is for a string of `chunk_count` many chunks, storing chunks starting at chunk index `start_offset`.
    ///
    /// Panics if the chunk index does not fit into the allocated storage backend.
    async fn set_chunk<const WIDTH: usize, const CHUNK_SIZE: usize>(
        &mut self,
        chunk_index: ChunkIndex,
        string_info: StringInfo,
        chunk_bytes: &[u8],
    ) -> Result<(), OperationsError<Self::InternalError>> {
        debug_assert!(chunk_bytes.len() <= CHUNK_SIZE);

        let start_byte_index = chunk_to_byte_index::<WIDTH, CHUNK_SIZE>(
            chunk_index,
            string_info.chunk_count,
            string_info.start_offset,
        );

        self.set_bytes(start_byte_index, chunk_bytes).await?;

        Ok(())
    }

    /// Writes to a StorageBackend the raw bytes of a string (supplied as a producer of bytes; the total length must be known in advance) and automatically fills in the correct Merkle tree labels as well. I.e., this method takes a raw string, but stores a verifiable stream.
    /// Returns the root label, i.e., the Bab digest, of the string.
    ///
    /// Panics if the producer does not produce at least `string_length` many bytes. The storage associated with `key` is unspecified in this case.
    ///
    /// The `start_offset` parameter has the same meaning as the corresponding field of the [`StringInfo`] struct. Internally, this method constructs a [`StringInfo`] by converting the `string_length_in_bytes` into a chunk count and reusing the supplied `start_offset`.
    async fn initialise_backend<
        const WIDTH: usize,
        const CHUNK_SIZE: usize,
        P,
        HashChunkContext,
        HashInnerContext,
    >(
        &mut self,
        string_length_in_bytes: ByteCount,
        start_offset: ChunkIndex,
        string_bytes: &mut P,
        bab_instantiation: &BabInstantiation<WIDTH, CHUNK_SIZE, HashChunkContext, HashInnerContext>,
    ) -> Result<BabDigest<WIDTH>, OperationsError<Self::InternalError>>
    where
        P: BulkProducer<Item = u8>,
    {
        super::verifiable_streaming::initialise_store::<WIDTH, CHUNK_SIZE, _, _, _, _>(
            self,
            string_length_in_bytes,
            start_offset,
            string_bytes,
            bab_instantiation,
        )
        .await
        .map(From::from)
    }

    /// Returns how many bytes of metadata can be stored in `self`.
    async fn get_len_of_metadata(
        &mut self,
    ) -> Result<ByteCount, OperationsError<Self::InternalError>>;

    /// Overwrites `buf` with metadata bytes, starting from the given offset.
    ///
    /// Panics if `offset + buf.len() >= self.get_metadata_len()`.
    async fn get_metadata(
        &mut self,
        offset: ByteIndex,
        buf: &mut [u8],
    ) -> Result<(), OperationsError<Self::InternalError>>;

    /// Sets a slice of metadata, starting at the given offset.
    ///
    /// Panics if `offset + new_data.len() >= self.get_metadata_len()`.
    async fn set_metadata(
        &mut self,
        offset: ByteIndex,
        new_data: &[u8],
    ) -> Result<(), OperationsError<Self::InternalError>>;

    /// Flushes all prior operations. If the storage backend is persistent, a successful flush guarantees persistence of all mutations up to that point.
    async fn flush(&mut self) -> Result<(), OperationsError<Self::InternalError>>;
}

/// The error type for most operations involving a [`StorageBackend`].
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug, Display, Error)]
#[cfg_attr(feature = "dev", derive(Arbitrary))]
pub enum OperationsError<InternalError> {
    /// The storage backend has been [deleted](StorageBackend::delete).
    ///
    /// All further operations on it will error as well.
    #[display("storage was deleted")]
    StorageDeleted,
    /// The storage backend encountered an error internally. The `is_fatal` flag indicates whether the backend is in a consistent state and can be used going forward (`false`) or whether the error was irrecoverable (`true`).
    #[display("internal storage error")]
    Internal {
        #[error(source)]
        /// The cause of the error.
        err: InternalError,
        /// Whether the error is recoverable (`false`) or irrecoverable (`true`).
        is_fatal: bool,
    },
}

/// Everything that can go wrong when writing data from a [`StorageBackend`] into a [`BulkConsumer`]: the backend might encounter an [`OperationsError`], or the consumer might emit an error.
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug, Display, Error)]
#[cfg_attr(feature = "dev", derive(Arbitrary))]
pub enum WriteToConsumerError<InternalError, ConsumerError> {
    #[display("storage error")]
    /// Failed to retrieve slice data from the [`StorageBackend`].
    StorageError(#[error(source)] OperationsError<InternalError>),
    #[display("consumer error")]
    /// The consumer emitted an error before all slice data could be written into it.
    ConsumerError(#[error(source)] ConsumerError),
}

/// Information about the string stored in a [`StorageBackend`], required by the methods that interact with the backend in terms of Bab concepts.
///
/// Each [`StorageBackend`] is expected to store Bab data in a certain form: as the a [baselin verifiable slice stream](https://worm-blossom.github.io/bab/#baseline_slice) for a string of some known total number of chunks, with the slice starting at some known chunk index. This slice may or may not extend to the very end of the stream, this is determined implicitly through the total size of allocated byte storage in the backend. In other words, a backend is not required to allocate storage for a full string, but only to allocate storage for some subslice of a string.
///
/// The methods for interacting with a backend in terms of Bab concepts (retrieving a chunk, setting a Merkle tree label, etc) need to know the number of chunks of the underlying string, and the start offset of the slice that is actually being stored. This type supplies that information.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StringInfo {
    /// How many chunks does the underlying string consist of in total?
    pub chunk_count: ChunkCount,
    /// Beginning at which chunk index does the backend actually store string data?
    pub start_offset: ChunkIndex,
}

////////////////
// Index Math //
////////////////

fn byte_size_of_node_content<const WIDTH: usize>() -> ByteCount {
    (WIDTH as u64) * 2
}

/// Converts a [`ChunkIndex`] into the [`ByteIndex`] at which a [`StorageBackend`] stores the raw bytes of the chunk.
///
/// The `start_offset` gives the first chunk of the stored slice.
///
/// Panics if `chunk_index >= chunk_count`, or if `chunk_index < start_offset`.
pub(crate) fn chunk_to_byte_index<const WIDTH: usize, const CHUNK_SIZE: usize>(
    chunk_index: ChunkIndex,
    chunk_count: ChunkCount,
    start_offset: ChunkIndex,
) -> ByteIndex {
    assert!(chunk_index < chunk_count);
    assert!(chunk_index >= start_offset,);

    return chunk_to_byte_index_actual::<WIDTH, CHUNK_SIZE>(chunk_index, chunk_count, start_offset);

    // Actual, recursion-friendly implementation of `chunk_to_byte`.
    fn chunk_to_byte_index_actual<const WIDTH: usize, const CHUNK_SIZE: usize>(
        chunk_index: ChunkIndex,
        chunk_count: ChunkCount,
        start_offset: ChunkIndex,
    ) -> ByteIndex {
        if chunk_index < start_offset {
            return 0;
        } else {
            if chunk_count == 1 {
                // trivial tree, trivial solution
                return 0;
            } else {
                let left_subtree_leaf_count = chunk_count.next_power_of_two() / 2;

                if chunk_index < left_subtree_leaf_count {
                    // chunk is in left subtree of the root (which is a perfect subtree)
                    return byte_size_of_node_content::<WIDTH>()
                        + chunk_to_byte_index_actual::<WIDTH, CHUNK_SIZE>(
                            chunk_index,
                            left_subtree_leaf_count,
                            start_offset,
                        );
                } else {
                    // chunk is in right subtree of the root
                    return size_of_perfect_tree::<WIDTH, CHUNK_SIZE>(
                        left_subtree_leaf_count,
                        start_offset,
                    ) + byte_size_of_node_content::<WIDTH>() // root content
                     + chunk_to_byte_index_actual::<WIDTH, CHUNK_SIZE>(
                        chunk_index - left_subtree_leaf_count,
                        chunk_count - left_subtree_leaf_count,
                        start_offset.saturating_sub(left_subtree_leaf_count),
                    );
                }
            }
        }
    }
}

fn size_of_perfect_tree<const WIDTH: usize, const CHUNK_SIZE: usize>(
    leaf_count: ChunkCount,
    start_offset: ChunkIndex,
) -> ByteCount {
    // Only called with perfect trees, i.e. leaf count is a power of two.
    debug_assert!(
        leaf_count.next_power_of_two() == leaf_count,
        "leaf count: {leaf_count}"
    );

    if start_offset >= leaf_count {
        return 0;
    } else if start_offset == 0 {
        leaf_count * (CHUNK_SIZE as u64) + (leaf_count - 1) * byte_size_of_node_content::<WIDTH>()
    } else {
        size_of_perfect_tree::<WIDTH, CHUNK_SIZE>(leaf_count / 2, start_offset)
            + byte_size_of_node_content::<WIDTH>() // root content
            + size_of_perfect_tree::<WIDTH, CHUNK_SIZE>(leaf_count / 2, start_offset.saturating_sub(leaf_count / 2))
    }
}

#[test]
fn test_chunk_to_byte_index() {
    let expected = [
        &[
            // The i-th slice lists the ByteIndex for the chunks of the trees on i + 1 leaves,
            // for a start_offset of zero.
            &[0u64][..],
            &[64, 64 + 1024][..],
            &[64 * 2, 64 * 2 + 1024, 64 * 2 + 1024 * 2][..],
            &[64 * 2, 64 * 2 + 1024, 64 * 3 + 1024 * 2, 64 * 3 + 1024 * 3][..],
            &[
                64 * 3,
                64 * 3 + 1024,
                64 * 4 + 1024 * 2,
                64 * 4 + 1024 * 3,
                64 * 4 + 1024 * 4,
            ][..],
        ][..],
        &[
            // The i-th slice lists the ByteIndex for the chunks of the trees on i + 2 leaves,
            // for a start_offset of one.
            &[64][..],
            &[64 * 2, 64 * 2 + 1024][..],
            &[64 * 2, 64 * 3 + 1024, 64 * 3 + 1024 * 2][..],
            &[64 * 3, 64 * 4 + 1024, 64 * 4 + 1024 * 2, 64 * 4 + 1024 * 3][..],
        ][..],
        &[
            // The i-th slice lists the ByteIndex for the chunks of the trees on i + 3 leaves,
            // for a start_offset of two.
            &[64][..],
            &[64 * 2, 64 * 2 + 1024][..],
            &[64 * 3, 64 * 3 + 1024, 64 * 3 + 1024 * 2][..],
        ][..],
        &[
            // The i-th slice lists the ByteIndex for the chunks of the trees on i + 4 leaves,
            // for a start_offset of three.
            &[64 * 2][..],
            &[64 * 3, 64 * 3 + 1024][..],
        ][..],
        &[
            // The i-th slice lists the ByteIndex for the chunks of the trees on i + 5 leaves,
            // for a start_offset of four.
            &[64][..],
        ][..],
    ];

    for (start_offset, expected) in expected.iter().enumerate() {
        for (i, testdata) in expected.iter().enumerate() {
            let total_chunk_count = (i + 1 + start_offset) as u64;
            for (chunk_index, byte_offset) in testdata.iter().enumerate() {
                let chunk_index = chunk_index + start_offset;

                assert_eq!(
                    chunk_to_byte_index::<32, 1024>(
                        chunk_index as ChunkIndex,
                        total_chunk_count,
                        start_offset as u64,
                    ),
                    *byte_offset,
                    "start_offset: {start_offset}, total_chunk_count: {total_chunk_count}, chunk_index: {chunk_index}",
                );
            }
        }
    }
}

/// Converts a [`NodeNumber`] into the [`ByteIndex`] at which a [`StorageBackend`] stores the raw bytes for the node (a node content for inner nodes, or a chunk for a leaf node).
///
/// The `start_offset` gives the first chunk of the stored slice.
///
/// Unspecified behaviour if the `node_number` refers to a node outside the range of valid nodes defined by the `start_offset` and the `chunk_count`.
pub(crate) fn node_number_to_byte_index<const WIDTH: usize, const CHUNK_SIZE: usize>(
    node_number: NodeNumber,
    chunk_count: ChunkCount,
    start_offset: ChunkIndex,
) -> ByteIndex {
    if start_offset >= chunk_count {
        return 0;
    }

    if node_number == 0 {
        // Trivial case: the root is always stored at offset zero.
        return 0;
    } else {
        let left_subtree_node_count = chunk_count.next_power_of_two() - 1;
        let left_subtree_leaf_count = chunk_count.next_power_of_two() / 2;

        if node_number <= left_subtree_node_count {
            // node_number is in the left subtree.
            return byte_size_of_node_content::<WIDTH>() // root content
                    + node_number_to_byte_index::<WIDTH, CHUNK_SIZE>(
                        node_number - 1,
                        left_subtree_leaf_count,
                        start_offset,
                    );
        } else {
            return size_of_perfect_tree::<WIDTH, CHUNK_SIZE>(
                        left_subtree_leaf_count,
                        start_offset,
                    ) + byte_size_of_node_content::<WIDTH>() // root content
                     + node_number_to_byte_index::<WIDTH, CHUNK_SIZE>(
                        node_number - (left_subtree_node_count + 1),
                        chunk_count - left_subtree_leaf_count,
                        start_offset.saturating_sub(left_subtree_leaf_count),
                    );
        }
    }
}

#[test]
fn test_node_number_to_byte_index() {
    let expected = [
        &[
            // The i-th slice lists the byte sizes for the nodes of the trees on i + 1 leaves,
            // for a start_offset of zero.
            &[1024][..],
            &[64, 1024, 1024][..],
            &[64, 64, 1024, 1024, 1024][..],
            &[64, 64, 1024, 1024, 64, 1024, 1024][..],
            &[64, 64, 64, 1024, 1024, 64, 1024, 1024, 1024][..],
        ][..],
        &[
            // The i-th slice lists the byte sizes for the nodes of the trees on i + 2 leaves,
            // for a start_offset of one.
            &[64, 0, 1024][..],
            &[64, 64, 0, 1024, 1024][..],
            &[64, 64, 0, 1024, 64, 1024, 1024][..],
            &[64, 64, 64, 0, 1024, 64, 1024, 1024, 1024][..],
        ][..],
        &[
            // The i-th slice lists the byte sizes for the nodes of the trees on i + 3 leaves,
            // for a start_offset of two.
            &[64, 0, 0, 0, 1024][..],
            &[64, 0, 0, 0, 64, 1024, 1024][..],
            &[64, 64, 0, 0, 0, 64, 1024, 1024, 1024][..],
        ][..],
        &[
            // The i-th slice lists the byte sizes for the nodes of the trees on i + 4 leaves,
            // for a start_offset of three.
            &[64, 0, 0, 0, 64, 0, 1024][..],
            &[64, 64, 0, 0, 0, 64, 0, 1024, 1024][..],
        ][..],
        &[
            // The i-th slice lists the byte sizes for the nodes of the trees on i + 5 leaves,
            // for a start_offset of four.
            &[64, 0, 0, 0, 0, 0, 0, 0, 1024][..],
        ][..],
    ];

    for (start_offset, expected) in expected.iter().enumerate() {
        for (i, testdata) in expected.iter().enumerate() {
            let total_chunk_count = (i + 1 + start_offset) as u64;

            let mut total_offset = 0;
            for (node_number, node_encoding_size) in testdata.iter().enumerate() {
                // println!(
                //     "start_offset {start_offset} total_chunk_count {total_chunk_count} node_number {node_number}\n"
                // );

                assert_eq!(
                    node_number_to_byte_index::<32, 1024>(
                        node_number as NodeNumber,
                        total_chunk_count,
                        start_offset as u64,
                    ),
                    total_offset,
                    "start_offset: {start_offset}, total_chunk_count: {total_chunk_count}, node_number: {node_number}",
                );

                total_offset += *node_encoding_size;
            }
        }
    }
}

pub(crate) fn chunk_count_to_tree_height(chunk_count: ChunkCount) -> RootedPathLength {
    1 + chunk_count.next_power_of_two().ilog2() as u8
}

#[test]
fn test_tree_height_computation() {
    for (i, height) in [1, 2, 3, 3, 4].into_iter().enumerate() {
        let chunk_count = i + 1;
        assert_eq!(chunk_count_to_tree_height(chunk_count as u64), height);
    }
}

// /// Given a total string length, returns how many bytes long the chunk at the given index is.
// ///
// /// Returns `0` only if `string_length_in_bytes` is zero, otherwise returns a value between one and `CHUNK_SIZE`, both inclusive.
// pub(crate) fn chunk_to_length<const CHUNK_SIZE: usize>(
//     chunk_index: ChunkIndex,
//     string_length_in_bytes: ByteCount,
// ) -> ByteCount {
//     if string_length_in_bytes == 0 {
//         0
//     } else {
//         let total_number_of_chunks = string_length_in_bytes.div_ceil(CHUNK_SIZE as u64);
//         if chunk_index < total_number_of_chunks - 1 {
//             CHUNK_SIZE as u64
//         } else {
//             let tmp = string_length_in_bytes % (CHUNK_SIZE as u64);
//             if tmp == 0 { CHUNK_SIZE as u64 } else { tmp }
//         }
//     }
// }

// #[test]
// fn test_chunk_to_length() {
//     assert_eq!(chunk_to_length::<4>(0, 0), 0);
//     assert_eq!(chunk_to_length::<4>(0, 1), 1);
//     assert_eq!(chunk_to_length::<4>(0, 2), 2);
//     assert_eq!(chunk_to_length::<4>(0, 3), 3);
//     assert_eq!(chunk_to_length::<4>(0, 4), 4);

//     assert_eq!(chunk_to_length::<4>(0, 5), 4);
//     assert_eq!(chunk_to_length::<4>(1, 5), 1);
//     assert_eq!(chunk_to_length::<4>(0, 6), 4);
//     assert_eq!(chunk_to_length::<4>(1, 6), 2);
//     assert_eq!(chunk_to_length::<4>(0, 7), 4);
//     assert_eq!(chunk_to_length::<4>(1, 7), 3);
//     assert_eq!(chunk_to_length::<4>(0, 8), 4);
//     assert_eq!(chunk_to_length::<4>(1, 8), 4);

//     assert_eq!(chunk_to_length::<4>(0, 9), 4);
//     assert_eq!(chunk_to_length::<4>(1, 9), 4);
//     assert_eq!(chunk_to_length::<4>(2, 9), 1);
//     assert_eq!(chunk_to_length::<4>(0, 10), 4);
//     assert_eq!(chunk_to_length::<4>(1, 10), 4);
//     assert_eq!(chunk_to_length::<4>(2, 10), 2);
//     assert_eq!(chunk_to_length::<4>(0, 11), 4);
//     assert_eq!(chunk_to_length::<4>(1, 11), 4);
//     assert_eq!(chunk_to_length::<4>(2, 11), 3);
//     assert_eq!(chunk_to_length::<4>(0, 12), 4);
//     assert_eq!(chunk_to_length::<4>(1, 12), 4);
//     assert_eq!(chunk_to_length::<4>(2, 12), 4);
// }

/// Given a total string length, returns how many bytes the node of the given node number covers.
pub(crate) fn node_number_to_length<const CHUNK_SIZE: usize>(
    node_number: NodeNumber,
    string_length_in_bytes: ByteCount,
) -> ByteCount {
    if node_number == 0 {
        // Trivial case: the root covers the full string.
        return string_length_in_bytes;
    } else if string_length_in_bytes <= CHUNK_SIZE as u64 {
        // Another trivial case: if all bytes fit into a single chunk, the chunk node covers all bytes.
        return string_length_in_bytes;
    } else {
        let chunk_count = string_length_in_bytes.div_ceil(CHUNK_SIZE as u64);

        let left_subtree_node_count = chunk_count.next_power_of_two() - 1;
        let left_subtree_leaf_count = chunk_count.next_power_of_two() / 2;

        if node_number <= left_subtree_node_count {
            // node_number is in the left subtree.
            return node_number_to_length::<CHUNK_SIZE>(
                node_number - 1,
                left_subtree_leaf_count * (CHUNK_SIZE as u64),
            );
        } else {
            return node_number_to_length::<CHUNK_SIZE>(
                node_number - (left_subtree_node_count + 1),
                string_length_in_bytes - (left_subtree_leaf_count * (CHUNK_SIZE as u64)),
            );
        }
    }
}

#[test]
fn test_node_number_to_length() {
    assert_eq!(node_number_to_length::<4>(0, 0), 0);
    assert_eq!(node_number_to_length::<4>(0, 1), 1);
    assert_eq!(node_number_to_length::<4>(0, 2), 2);
    assert_eq!(node_number_to_length::<4>(0, 3), 3);
    assert_eq!(node_number_to_length::<4>(0, 4), 4);

    assert_eq!(node_number_to_length::<4>(0, 11), 11);
    assert_eq!(node_number_to_length::<4>(1, 11), 8);
    assert_eq!(node_number_to_length::<4>(2, 11), 4);
    assert_eq!(node_number_to_length::<4>(3, 11), 4);
    assert_eq!(node_number_to_length::<4>(4, 11), 3);

    assert_eq!(node_number_to_length::<4>(0, 16), 16);
    assert_eq!(node_number_to_length::<4>(1, 16), 8);
    assert_eq!(node_number_to_length::<4>(2, 16), 4);
    assert_eq!(node_number_to_length::<4>(3, 16), 4);
    assert_eq!(node_number_to_length::<4>(4, 16), 8);
    assert_eq!(node_number_to_length::<4>(5, 16), 4);
    assert_eq!(node_number_to_length::<4>(6, 16), 4);
}

/// Returns at least as many bytes as the length of the [baseline verifiable slice stream](https://worm-blossom.github.io/bab/#baseline_slice) for the given slice.
pub(crate) fn approximate_length_of_verifiable_stream<
    const WIDTH: usize,
    const CHUNK_SIZE: usize,
>(
    slice_start: ChunkIndex,
    slice_length: ChunkCount,
    string_length: ByteCount,
) -> ByteCount {
    let chunk_count = string_length_to_chunk_count::<CHUNK_SIZE>(string_length);

    let length_of_the_chunks = if slice_start + slice_length == chunk_count {
        (CHUNK_SIZE as ByteCount) * (slice_length - 1) + (string_length % (CHUNK_SIZE as u64))
    } else {
        (CHUNK_SIZE as ByteCount) * slice_length
    };

    if chunk_count == 1 {
        return length_of_the_chunks;
    } else {
        let num_vertices_in_tree_on_slice_length_many_leaves = slice_length * 2 - 1;

        let height_of_tree_on_slice_length_many_leaves =
            chunk_count_to_tree_height(slice_length) as u64;
        let total_height = chunk_count_to_tree_height(chunk_count) as u64;

        return
            // The chunks themselves.
            length_of_the_chunks
            // The subtree for the slice. (this might actually be multiple disjoint trees, but the counting stays the same).
            + num_vertices_in_tree_on_slice_length_many_leaves * (WIDTH as ByteCount) * 2
            // Paths from the root to the subtree(s) for the slice. If the two paths share the same vertices, this part overcounts.
            + (total_height - height_of_tree_on_slice_length_many_leaves) * (WIDTH as ByteCount) * 2 * 2
            // The root label itself is not part of the verifiable stream.
            - (WIDTH as ByteCount);
    }
}