Skip to main content

tdb_succinct/tfc/
block.rs

1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::hash::{Hash, Hasher};
4use std::io;
5
6use bytes::{Buf, BufMut, Bytes, BytesMut};
7use itertools::Either;
8use thiserror::Error;
9use tokio::io::{AsyncRead, AsyncReadExt};
10
11use crate::{
12    util::{find_common_prefix, find_common_prefix_ord},
13    vbyte::{self, encode_array},
14};
15
16pub const BLOCK_SIZE: usize = 8;
17
18#[derive(Debug, Error)]
19pub enum SizedDictError {
20    #[error("invalid coding")]
21    InvalidCoding,
22    #[error("not enough data")]
23    NotEnoughData,
24}
25
26#[derive(Debug, Error)]
27pub enum SizedDictReaderError {
28    #[error(transparent)]
29    Io(#[from] io::Error),
30    #[error(transparent)]
31    SizedDictError(#[from] SizedDictError),
32}
33
34impl SizedDictReaderError {
35    pub fn is_unexpected_eof(&self) -> bool {
36        match self {
37            Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof,
38            Self::SizedDictError(SizedDictError::NotEnoughData) => true,
39            _ => false,
40        }
41    }
42}
43
44#[derive(Clone, Debug, PartialEq)]
45pub struct SizedBlockHeader {
46    head: Bytes,
47    num_entries: u8,
48    buffer_length: usize,
49    sizes: [usize; BLOCK_SIZE - 1],
50    shareds: [usize; BLOCK_SIZE - 1],
51}
52
53impl From<vbyte::DecodeError> for SizedDictError {
54    fn from(e: vbyte::DecodeError) -> Self {
55        match e {
56            vbyte::DecodeError::UnexpectedEndOfBuffer => Self::NotEnoughData,
57            _ => Self::InvalidCoding,
58        }
59    }
60}
61
62impl From<vbyte::DecodeReaderError> for SizedDictReaderError {
63    fn from(value: vbyte::DecodeReaderError) -> Self {
64        match value {
65            vbyte::DecodeReaderError::Io(e) => SizedDictReaderError::Io(e),
66            vbyte::DecodeReaderError::DecodeError(e) => Self::SizedDictError(e.into()),
67        }
68    }
69}
70
71impl SizedBlockHeader {
72    pub fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
73        let cw = buf.get_u8();
74
75        let (record_size, num_entries) = parse_block_control_word(cw);
76        let mut sizes = [0_usize; BLOCK_SIZE - 1];
77        let mut shareds = [0_usize; BLOCK_SIZE - 1];
78        let (first_size, _) = vbyte::decode_buf(buf)?;
79
80        let head = buf.split_to(first_size as usize);
81
82        for i in 0..(num_entries - 1) as usize {
83            let (shared, _) = vbyte::decode_buf(buf)?;
84            let size = if record_size == None {
85                let (size, _) = vbyte::decode_buf(buf)?;
86                size
87            } else {
88                record_size.unwrap() as u64 - shared
89            };
90            sizes[i] = size as usize;
91            shareds[i] = shared as usize;
92        }
93
94        let buffer_length = sizes.iter().sum();
95
96        Ok(Self {
97            head,
98            num_entries,
99            buffer_length,
100            sizes,
101            shareds,
102        })
103    }
104
105    pub async fn parse_from_reader<R: AsyncRead + Unpin>(
106        mut reader: R,
107    ) -> Result<Self, SizedDictReaderError> {
108        let cw = reader.read_u8().await?;
109
110        let (record_size, num_entries) = parse_block_control_word(cw);
111        let mut sizes = [0_usize; BLOCK_SIZE - 1];
112        let mut shareds = [0_usize; BLOCK_SIZE - 1];
113        let (first_size, _) = vbyte::decode_reader(&mut reader).await?;
114
115        let mut head = vec![0; first_size as usize];
116        reader.read_exact(&mut head).await?;
117
118        for i in 0..(num_entries - 1) as usize {
119            let (shared, _) = vbyte::decode_reader(&mut reader).await?;
120            let size = if record_size == None {
121                let (size, _) = vbyte::decode_reader(&mut reader).await?;
122                size
123            } else {
124                record_size.unwrap() as u64 - shared
125            };
126            sizes[i] = size as usize;
127            shareds[i] = shared as usize;
128        }
129
130        let buffer_length = sizes.iter().sum();
131
132        Ok(Self {
133            head: Bytes::from(head),
134            num_entries,
135            buffer_length,
136            sizes,
137            shareds,
138        })
139    }
140}
141
142#[derive(Clone, Debug)]
143pub enum SizedDictEntry {
144    Single(Bytes),
145    Rope(Vec<Bytes>),
146}
147
148impl From<Bytes> for SizedDictEntry {
149    fn from(val: Bytes) -> Self {
150        Self::Single(val)
151    }
152}
153
154impl From<Vec<Bytes>> for SizedDictEntry {
155    fn from(val: Vec<Bytes>) -> Self {
156        Self::Rope(val)
157    }
158}
159
160impl SizedDictEntry {
161    pub fn new(parts: Vec<Bytes>) -> Self {
162        Self::Rope(parts)
163    }
164
165    pub fn new_optimized(parts: Vec<Bytes>) -> Self {
166        let mut entry = Self::new(parts);
167        entry.optimize();
168
169        entry
170    }
171
172    pub fn to_bytes(&self) -> Bytes {
173        match self {
174            Self::Single(b) => b.clone(),
175            Self::Rope(v) => {
176                if v.len() == 1 {
177                    v[0].clone()
178                } else {
179                    let mut buf = BytesMut::with_capacity(self.len());
180                    for slice in v.iter() {
181                        buf.extend_from_slice(&slice[..]);
182                    }
183
184                    buf.freeze()
185                }
186            }
187        }
188    }
189
190    pub fn chunks(&self) -> impl Iterator<Item = &Bytes> {
191        match self {
192            Self::Single(b) => Either::Left(std::iter::once(b)),
193            Self::Rope(v) => Either::Right(v.iter()),
194        }
195    }
196
197    pub fn into_chunks(self) -> impl Iterator<Item = Bytes> {
198        match self {
199            Self::Single(b) => Either::Left(std::iter::once(b)),
200            Self::Rope(v) => Either::Right(v.into_iter()),
201        }
202    }
203
204    pub fn to_vec(&self) -> Vec<u8> {
205        let mut v = Vec::with_capacity(self.len());
206
207        for slice in self.chunks() {
208            v.extend_from_slice(slice);
209        }
210
211        v
212    }
213
214    pub fn as_buf(&self) -> SizedDictEntryBuf {
215        SizedDictEntryBuf {
216            entry: Cow::Borrowed(self),
217            slice_ix: 0,
218            pos_in_slice: 0,
219        }
220    }
221
222    pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
223        SizedDictEntryBuf {
224            entry: Cow::Owned(self),
225            slice_ix: 0,
226            pos_in_slice: 0,
227        }
228    }
229
230    pub fn len(&self) -> usize {
231        self.chunks().map(|s| s.len()).sum()
232    }
233
234    fn rope_len(&self) -> usize {
235        match self {
236            Self::Single(_) => 1,
237            Self::Rope(v) => v.len(),
238        }
239    }
240
241    /// optimize size
242    ///
243    /// For short strings, a list of pointers may be much less
244    /// efficient than a copy of the string.  This will copy the
245    /// underlying string if that is the case.
246    pub fn optimize(&mut self) {
247        let overhead_size = std::mem::size_of::<Bytes>() * self.rope_len();
248
249        if std::mem::size_of::<Bytes>() + self.len() < overhead_size {
250            let mut bytes = BytesMut::with_capacity(self.len());
251            for part in self.chunks() {
252                bytes.extend(part);
253            }
254
255            *self = Self::Single(bytes.freeze());
256        }
257    }
258
259    pub fn buf_eq<B: Buf>(&self, mut b: B) -> bool {
260        if self.len() != b.remaining() {
261            false
262        } else if self.len() == 0 {
263            true
264        } else {
265            let mut it = self.chunks();
266            let mut part = it.next().unwrap();
267            loop {
268                let slice = b.chunk();
269
270                match part.len().cmp(&slice.len()) {
271                    Ordering::Less => {
272                        if part.as_ref() != &slice[..part.len()] {
273                            return false;
274                        }
275                    }
276                    Ordering::Equal => {
277                        if part != slice {
278                            return false;
279                        }
280
281                        assert!(it.next().is_none());
282                        return true;
283                    }
284                    Ordering::Greater => {
285                        panic!("This should never happen because it'd mean our entry is larger than the buffer passed in, but we already checked to make sure that is not the case");
286                    }
287                }
288
289                b.advance(part.len());
290                part = it.next().unwrap();
291            }
292        }
293    }
294}
295
296impl PartialEq for SizedDictEntry {
297    fn eq(&self, other: &Self) -> bool {
298        // unequal length, so can't be equal
299        if self.len() != other.len() {
300            return false;
301        }
302
303        self.cmp(other) == Ordering::Equal
304    }
305}
306
307impl Eq for SizedDictEntry {}
308
309impl Hash for SizedDictEntry {
310    fn hash<H: Hasher>(&self, state: &mut H) {
311        for part in self.chunks() {
312            state.write(part);
313        }
314    }
315}
316
317impl Ord for SizedDictEntry {
318    fn cmp(&self, other: &Self) -> Ordering {
319        // both are empty, so equal
320        if self.len() == 0 && other.len() == 0 {
321            return Ordering::Equal;
322        }
323
324        let mut it1 = self.chunks();
325        let mut it2 = other.chunks();
326        let mut part1 = Cow::Borrowed(it1.next().unwrap());
327        let mut part2 = Cow::Borrowed(it2.next().unwrap());
328
329        loop {
330            match part1.len().cmp(&part2.len()) {
331                Ordering::Equal => {
332                    match part1.cmp(&part2) {
333                        Ordering::Less => return Ordering::Less,
334                        Ordering::Greater => return Ordering::Greater,
335                        Ordering::Equal => {}
336                    }
337
338                    let p1_next = it1.next();
339                    let p2_next = it2.next();
340
341                    if let (Some(p1), Some(p2)) = (p1_next, p2_next) {
342                        part1 = Cow::Borrowed(p1);
343                        part2 = Cow::Borrowed(p2);
344                    } else if p1_next.is_none() && p2_next.is_none() {
345                        // done! everything has been compared equally and nothign remains.
346                        return Ordering::Equal;
347                    } else if p1_next.is_none() {
348                        // the left side is a prefix of the right side
349
350                        return Ordering::Less;
351                    } else {
352                        return Ordering::Greater;
353                    }
354                }
355                Ordering::Less => {
356                    let part2_slice = Cow::Owned(part2.slice(0..part1.len()));
357                    match part1.cmp(&part2_slice) {
358                        Ordering::Less => return Ordering::Less,
359                        Ordering::Greater => return Ordering::Greater,
360                        Ordering::Equal => {}
361                    }
362
363                    part2 = Cow::Owned(part2.slice(part1.len()..));
364                    let part1_option = it1.next();
365                    if part1_option.is_none() {
366                        return Ordering::Less;
367                    }
368                    part1 = Cow::Borrowed(part1_option.unwrap());
369                }
370                Ordering::Greater => {
371                    let part1_slice = part1.slice(0..part2.len());
372                    match part1_slice.cmp(&part2) {
373                        Ordering::Less => return Ordering::Less,
374                        Ordering::Greater => return Ordering::Greater,
375                        Ordering::Equal => {}
376                    }
377
378                    part1 = Cow::Owned(part1.slice(part2.len()..));
379                    let part2_option = it2.next();
380                    if part2_option.is_none() {
381                        return Ordering::Greater;
382                    }
383                    part2 = Cow::Owned(part2_option.unwrap().clone());
384                }
385            }
386        }
387    }
388}
389
390impl PartialOrd for SizedDictEntry {
391    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
392        Some(self.cmp(other))
393    }
394}
395
396#[derive(Clone)]
397pub struct SizedDictEntryBuf<'a> {
398    entry: Cow<'a, SizedDictEntry>,
399    slice_ix: usize,
400    pos_in_slice: usize,
401}
402
403impl<'a> SizedDictEntryBuf<'a> {
404    fn current_slice(&self) -> &Bytes {
405        match self.entry.as_ref() {
406            SizedDictEntry::Single(b) => &b,
407            SizedDictEntry::Rope(v) => &v[self.slice_ix],
408        }
409    }
410}
411
412impl<'a> Buf for SizedDictEntryBuf<'a> {
413    fn remaining(&self) -> usize {
414        {
415            let pos_in_slice = self.pos_in_slice;
416            let total: usize = self
417                .entry
418                .chunks()
419                .skip(self.slice_ix)
420                .map(|s| s.len())
421                .sum();
422            total - pos_in_slice
423        }
424    }
425
426    fn chunk(&self) -> &[u8] {
427        {
428            let pos_in_slice = self.pos_in_slice;
429            if self.slice_ix >= self.entry.rope_len() {
430                &[]
431            } else {
432                let slice = self.current_slice();
433                &slice[pos_in_slice..]
434            }
435        }
436    }
437
438    fn advance(&mut self, cnt: usize) {
439        {
440            let mut cnt = cnt;
441            if self.slice_ix < self.entry.rope_len() {
442                let slice = self.current_slice();
443                let remaining_in_slice = slice.len() - self.pos_in_slice;
444
445                if remaining_in_slice > cnt {
446                    // we remain in the slice we're at.
447                    self.pos_in_slice += cnt;
448                } else {
449                    // we are starting at the next slice
450                    cnt -= remaining_in_slice;
451                    self.slice_ix += 1;
452
453                    loop {
454                        if self.entry.rope_len() >= self.slice_ix {
455                            // past the end
456                            self.pos_in_slice = 0;
457                            break;
458                        }
459
460                        let slice_len = self.current_slice().len();
461
462                        if cnt < slice_len {
463                            // this is our slice
464                            self.pos_in_slice = cnt;
465                            break;
466                        }
467
468                        // not our slice, so advance to next
469                        cnt -= self.entry.rope_len();
470                        self.slice_ix += 1;
471                    }
472                }
473            }
474        }
475    }
476}
477
478pub type OwnedSizedDictEntryBuf = SizedDictEntryBuf<'static>;
479
480#[derive(Debug)]
481pub struct SizedDictBlock {
482    header: SizedBlockHeader,
483    data: Bytes,
484}
485
486impl SizedDictBlock {
487    pub fn parse(bytes: &mut Bytes) -> Result<Self, SizedDictError> {
488        let header = SizedBlockHeader::parse(bytes)?;
489        if bytes.remaining() < header.buffer_length {
490            return Err(SizedDictError::NotEnoughData);
491        }
492
493        let data = bytes.split_to(header.buffer_length);
494
495        Ok(Self { header, data })
496    }
497
498    pub async fn parse_from_reader<R: AsyncRead + Unpin>(
499        mut reader: R,
500    ) -> Result<Self, SizedDictReaderError> {
501        let header = SizedBlockHeader::parse_from_reader(&mut reader).await?;
502        let mut data = vec![0; header.buffer_length];
503        reader.read_exact(&mut data).await?;
504
505        Ok(Self {
506            header,
507            data: Bytes::from(data),
508        })
509    }
510
511    pub fn num_entries(&self) -> u8 {
512        self.header.num_entries
513    }
514
515    pub fn is_incomplete(&self) -> bool {
516        self.header.num_entries != BLOCK_SIZE as u8
517    }
518
519    pub fn entry(&self, index: usize) -> SizedDictEntry {
520        if index == 0 {
521            return SizedDictEntry::new(vec![self.header.head.clone()]);
522        }
523
524        let mut v = Vec::with_capacity(7);
525        let mut last = self.header.shareds[index - 1];
526        if last != 0 {
527            v.push(last);
528        }
529        if last != 0 {
530            for i in (0..index - 1).rev() {
531                let shared = self.header.shareds[i];
532                if shared == 0 {
533                    break;
534                }
535
536                if shared < last {
537                    v.push(shared);
538                    last = shared;
539                } else {
540                    v.push(last);
541                }
542            }
543        }
544
545        let start = index - v.len();
546
547        let mut taken = 0;
548        let mut slices = Vec::with_capacity(v.len() + 1);
549
550        let mut offset: usize;
551        if start == 0 {
552            offset = 0;
553        } else {
554            offset = self.header.sizes.iter().take(start - 1).sum();
555        }
556        for (ix, shared) in v.iter().rev().enumerate() {
557            let have_to_take = shared - taken;
558            let cur_offset = offset;
559
560            if !(ix == 0 && start == 0) {
561                // the head slice does not contribute to the offset
562                offset += self.header.sizes[start + ix - 1];
563            }
564
565            if have_to_take == 0 {
566                continue;
567            }
568
569            let slice;
570            if ix == 0 && start == 0 {
571                // the slice has to come out of the header
572                slice = self.header.head.slice(..have_to_take);
573            } else {
574                slice = self.data.slice(cur_offset..cur_offset + have_to_take);
575            }
576            slices.push(slice);
577            taken += have_to_take;
578        }
579
580        let suffix_size = self.header.sizes[index - 1];
581        slices.push(self.data.slice(offset..offset + suffix_size));
582
583        SizedDictEntry::new_optimized(slices)
584    }
585
586    fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
587        let head = Some(self.header.head.clone());
588        let mut offset = 0;
589        let tail = self.header.sizes.iter().map(move |s| {
590            let slice = self.data.slice(offset..*s + offset);
591            offset += s;
592
593            slice
594        });
595
596        head.into_iter().chain(tail)
597    }
598
599    pub fn id(&self, slice: &[u8]) -> IdLookupResult {
600        let (mut common_prefix, ordering) = find_common_prefix_ord(slice, &self.header.head);
601        match ordering {
602            Ordering::Equal => return IdLookupResult::Found(0),
603            Ordering::Less => return IdLookupResult::NotFound,
604            // We have to traverse the block
605            Ordering::Greater => {}
606        }
607
608        for (ix, (shared, suffix)) in self
609            .header
610            .shareds
611            .iter()
612            .zip(self.suffixes().skip(1))
613            .enumerate()
614        {
615            if *shared < common_prefix {
616                return IdLookupResult::Closest(ix as u64);
617            } else if *shared > common_prefix {
618                continue;
619            }
620
621            let (new_common_prefix, ordering) =
622                find_common_prefix_ord(&slice[common_prefix..], &suffix[..]);
623            match ordering {
624                Ordering::Equal => return IdLookupResult::Found(ix as u64 + 1),
625                Ordering::Less => return IdLookupResult::Closest(ix as u64),
626                Ordering::Greater => {
627                    common_prefix += new_common_prefix;
628                }
629            }
630        }
631
632        IdLookupResult::Closest(self.header.num_entries as u64 - 1)
633    }
634
635    pub fn iter<'a>(&'a self) -> SizedBlockIterator<'a> {
636        SizedBlockIterator {
637            header: Cow::Borrowed(&self.header),
638            data: self.data.clone(),
639            ix: 0,
640            last: None,
641        }
642    }
643
644    pub fn into_iter(self) -> OwnedSizedBlockIterator {
645        SizedBlockIterator {
646            header: Cow::Owned(self.header),
647            data: self.data.clone(),
648            ix: 0,
649            last: None,
650        }
651    }
652}
653
654pub type OwnedSizedBlockIterator = SizedBlockIterator<'static>;
655
656#[derive(Clone)]
657pub struct SizedBlockIterator<'a> {
658    header: Cow<'a, SizedBlockHeader>,
659    data: Bytes,
660    ix: usize,
661    last: Option<Vec<Bytes>>,
662}
663
664impl<'a> Iterator for SizedBlockIterator<'a> {
665    type Item = SizedDictEntry;
666
667    fn next(&mut self) -> Option<SizedDictEntry> {
668        if let Some(last) = self.last.as_mut() {
669            if self.ix >= self.header.num_entries as usize - 1 {
670                return None;
671            }
672            let size = self.header.sizes[self.ix];
673            let mut shared = self.header.shareds[self.ix];
674            for rope_index in 0..last.len() {
675                let x = &mut last[rope_index];
676                if x.len() < shared {
677                    shared -= x.len();
678                    continue;
679                }
680
681                x.truncate(shared);
682                last.truncate(rope_index + 1);
683                break;
684            }
685
686            last.push(self.data.split_to(size));
687            self.ix += 1;
688
689            Some(SizedDictEntry::new_optimized(last.clone()))
690        } else {
691            let mut last = Vec::with_capacity(BLOCK_SIZE);
692            last.push(self.header.head.clone());
693            let result = last.clone();
694            self.last = Some(last);
695            Some(SizedDictEntry::new_optimized(result))
696        }
697    }
698}
699
700#[derive(Clone, Copy, Debug, PartialEq, Eq)]
701pub enum IdLookupResult {
702    Found(u64),
703    Closest(u64),
704    NotFound,
705}
706
707impl IdLookupResult {
708    pub fn offset(self, offset: u64) -> Self {
709        match self {
710            Self::Found(i) => Self::Found(i + offset),
711            Self::Closest(i) => Self::Closest(i + offset),
712            Self::NotFound => Self::NotFound,
713        }
714    }
715
716    pub fn default(self, default: u64) -> Self {
717        match self {
718            Self::NotFound => Self::Closest(default),
719            _ => self,
720        }
721    }
722
723    pub fn map<F: Fn(u64) -> u64>(self, f: F) -> Self {
724        match self {
725            Self::Found(i) => Self::Found(f(i)),
726            Self::Closest(i) => Self::Closest(f(i)),
727            Self::NotFound => Self::NotFound,
728        }
729    }
730
731    pub fn into_option(self) -> Option<u64> {
732        match self {
733            Self::Found(i) => Some(i),
734            _ => None,
735        }
736    }
737}
738
739pub fn parse_block_control_records(cw: u8) -> u8 {
740    parse_block_control_word(cw).1
741}
742
743pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
744    let records = (cw & ((1 << 3) - 1)) + 1;
745    let record_size = record_size_decoding(cw);
746    (record_size, records)
747}
748
749fn record_size_decoding(enc: u8) -> Option<u8> {
750    match enc >> 3 {
751        0 => None,
752        3 => Some(4),
753        4 => Some(8),
754        _ => panic!("Ok, this is not known"),
755    }
756}
757
758fn record_size_encoding(record_size: Option<u8>) -> u8 {
759    match record_size {
760        None => 0,
761        Some(4) => 3 << 3,
762        Some(8) => 4 << 3,
763        _ => {
764            panic!("This is really bad!")
765        }
766    }
767}
768
769fn create_block_control_word(record_size: Option<u8>, records: u8) -> u8 {
770    records - 1 + record_size_encoding(record_size)
771}
772
773pub(crate) fn build_block_unchecked<B: BufMut>(
774    record_size: Option<u8>,
775    buf: &mut B,
776    slices: &[&[u8]],
777) -> usize {
778    let mut size = 0;
779    let slices_len = slices.len();
780    debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
781    let cw = create_block_control_word(record_size, slices_len as u8);
782    buf.put_u8(cw as u8);
783    size += 1;
784
785    let first = slices[0];
786    let (vbyte, vbyte_len) = encode_array(first.len() as u64);
787
788    // write the head first
789    buf.put_slice(&vbyte[..vbyte_len]);
790    buf.put_slice(slices[0]);
791    size += vbyte_len + slices[0].len();
792
793    let mut last = first;
794
795    let mut suffixes: Vec<&[u8]> = Vec::with_capacity(slices.len());
796    for i in 1..slices.len() {
797        let cur = slices[i];
798        let common_prefix = find_common_prefix(last, cur);
799        let (vbyte, vbyte_len) = encode_array(common_prefix as u64);
800        buf.put_slice(&vbyte[..vbyte_len]);
801        size += vbyte_len;
802
803        if record_size == None {
804            let suffix_len = cur.len() - common_prefix;
805            let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
806            buf.put_slice(&vbyte[..vbyte_len]);
807            size += vbyte_len;
808        }
809        suffixes.push(&cur[common_prefix..]);
810        last = cur;
811    }
812
813    // write the rest of the slices
814    for suffix in suffixes.into_iter() {
815        buf.put_slice(suffix);
816        size += suffix.len();
817    }
818
819    size
820}
821
822pub fn block_head(mut block: Bytes) -> Result<Bytes, SizedDictError> {
823    block.advance(1);
824    let (size, _) = vbyte::decode_buf(&mut block)?;
825    Ok(block.split_to(size as usize))
826}
827
828#[cfg(test)]
829mod tests {
830    use super::*;
831    use bytes::Buf;
832
833    fn build_block_bytes(strings: &[&[u8]]) -> Bytes {
834        let mut buf = BytesMut::new();
835        build_block_unchecked(None, &mut buf, &strings);
836
837        buf.freeze()
838    }
839
840    fn build_block(strings: &[&[u8]]) -> SizedDictBlock {
841        let mut bytes = build_block_bytes(strings);
842
843        SizedDictBlock::parse(&mut bytes).unwrap()
844    }
845
846    #[test]
847    fn build_and_parse_block() {
848        let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
849
850        let block = build_block(&strings);
851
852        let expected_header = SizedBlockHeader {
853            head: Bytes::copy_from_slice(b"aaaaaa"),
854            num_entries: 5,
855            buffer_length: 11,
856            sizes: [2, 4, 3, 2, 0, 0, 0],
857            shareds: [2, 0, 1, 2, 0, 0, 0],
858        };
859
860        assert_eq!(expected_header, block.header);
861
862        let expected_bytes = b"bbccccdefff";
863        assert_eq!(expected_bytes, &block.data[..]);
864    }
865
866    #[test]
867    fn entry_in_block() {
868        let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
869        let block = build_block(&strings);
870
871        for (ix, string) in strings.iter().enumerate() {
872            assert_eq!(*string, &block.entry(ix).to_vec()[..]);
873        }
874    }
875
876    #[test]
877    fn entry_in_complete_block() {
878        let strings: [&[u8]; 8] = [
879            b"aaaaaa",
880            b"aabb",
881            b"cccc",
882            b"cdef",
883            b"cdff",
884            b"cdffasdf",
885            b"cdffeeee",
886            b"ceeeeeeeeeeeeeee",
887        ];
888        let block = build_block(&strings);
889
890        for (ix, string) in strings.iter().enumerate() {
891            assert_eq!(*string, &block.entry(ix).to_vec()[..]);
892        }
893    }
894
895    #[test]
896    fn entry_buf_in_complete_block() {
897        let strings: [&[u8]; 8] = [
898            b"aaaaaa",
899            b"aabb",
900            b"cccc",
901            b"cdef",
902            b"cdff",
903            b"cdffasdf",
904            b"cdffeeee",
905            b"ceeeeeeeeeeeeeee",
906        ];
907        let block = build_block(&strings);
908
909        for (ix, string) in strings.iter().enumerate() {
910            let entry = block.entry(ix);
911            let mut buf = entry.as_buf();
912            let len = buf.remaining();
913            let bytes = buf.copy_to_bytes(len);
914            assert_eq!(*string, &bytes[..]);
915        }
916    }
917
918    #[test]
919    fn entry_owned_buf_in_complete_block() {
920        let strings: [&[u8]; 8] = [
921            b"aaaaaa",
922            b"aabb",
923            b"cccc",
924            b"cdef",
925            b"cdff",
926            b"cdffasdf",
927            b"cdffeeee",
928            b"ceeeeeeeeeeeeeee",
929        ];
930        let block = build_block(&strings);
931
932        for (ix, string) in strings.iter().enumerate() {
933            let mut buf = block.entry(ix).into_buf();
934            let len = buf.remaining();
935            let bytes = buf.copy_to_bytes(len);
936            assert_eq!(*string, &bytes[..]);
937        }
938    }
939
940    #[test]
941    fn head_from_complete_block() {
942        let strings: [&[u8]; 8] = [
943            b"aaaaaa",
944            b"aabb",
945            b"cccc",
946            b"cdef",
947            b"cdff",
948            b"cdffasdf",
949            b"cdffeeee",
950            b"ceeeeeeeeeeeeeee",
951        ];
952        let block = build_block_bytes(&strings);
953        let head = block_head(block).unwrap();
954
955        assert_eq!(b"aaaaaa", &head[..]);
956    }
957
958    #[test]
959    fn slices_iter() {
960        let strings: [&[u8]; 8] = [
961            b"aaaaaa",
962            b"aabb",
963            b"cccc",
964            b"cdef",
965            b"cdff",
966            b"cdffasdf",
967            b"cdffeeee",
968            b"ceeeeeeeeeeeeeee",
969        ];
970        let block = build_block(&strings);
971
972        let expected_slices: Vec<&[u8]> = vec![
973            b"aaaaaa",
974            b"bb",
975            b"cccc",
976            b"def",
977            b"ff",
978            b"asdf",
979            b"eeee",
980            b"eeeeeeeeeeeeeee",
981        ];
982
983        let expected_bytes: Vec<_> = expected_slices
984            .into_iter()
985            .map(|b| Bytes::from(b))
986            .collect();
987
988        let actual: Vec<_> = block.suffixes().collect();
989
990        assert_eq!(expected_bytes, actual);
991    }
992
993    #[test]
994    fn block_id_lookup() {
995        let strings: [&[u8]; 8] = [
996            b"aaaaaa",
997            b"aabb",
998            b"cccc",
999            b"cdef",
1000            b"cdff",
1001            b"cdffasdf",
1002            b"cdffeeee",
1003            b"ceeeeeeeeeeeeeee",
1004        ];
1005        let block = build_block(&strings);
1006
1007        for (ix, string) in strings.iter().enumerate() {
1008            let index = block.id(string);
1009            assert_eq!(IdLookupResult::Found(ix as u64), index);
1010        }
1011    }
1012
1013    #[test]
1014    fn block_id_lookup_nonmatches() {
1015        let strings: [&[u8]; 8] = [
1016            b"aaaaaa",
1017            b"aabb",
1018            b"cccc",
1019            b"cdef",
1020            b"cdff",
1021            b"cdffasdf",
1022            b"cdffeeee",
1023            b"ceeeeeeeeeeeeeee",
1024        ];
1025        let block = build_block(&strings);
1026
1027        assert_eq!(IdLookupResult::NotFound, block.id(b"aa"));
1028
1029        assert_eq!(IdLookupResult::Closest(0), block.id(b"aaab"));
1030
1031        assert_eq!(IdLookupResult::Closest(1), block.id(b"aabba"));
1032
1033        assert_eq!(IdLookupResult::Closest(7), block.id(b"f"));
1034    }
1035
1036    #[test]
1037    fn enumerate_complete_block() {
1038        let strings: [&[u8]; 8] = [
1039            b"aaaaaa",
1040            b"aabb",
1041            b"cccc",
1042            b"cdef",
1043            b"cdff",
1044            b"cdffasdf",
1045            b"cdffeeee",
1046            b"ceeeeeeeeeeeeeee",
1047        ];
1048        let block = build_block(&strings);
1049
1050        let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
1051        assert_eq!(
1052            strings
1053                .iter()
1054                .cloned()
1055                .map(Bytes::from_static)
1056                .collect::<Vec<_>>(),
1057            result
1058        );
1059    }
1060
1061    #[test]
1062    fn enumerate_incomplete_block() {
1063        let strings: [&[u8]; 6] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff", b"cdffasdf"];
1064        let block = build_block(&strings);
1065
1066        let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
1067        assert_eq!(
1068            strings
1069                .iter()
1070                .cloned()
1071                .map(Bytes::from_static)
1072                .collect::<Vec<_>>(),
1073            result
1074        );
1075    }
1076
1077    #[test]
1078    fn control_word_round_trip() {
1079        let cw = create_block_control_word(None, 1);
1080        assert_eq!(parse_block_control_word(cw), (None, 1));
1081
1082        let cw = create_block_control_word(None, 8);
1083        assert_eq!(parse_block_control_word(cw), (None, 8));
1084
1085        let cw = create_block_control_word(None, 3);
1086        assert_eq!(parse_block_control_word(cw), (None, 3));
1087
1088        let cw = create_block_control_word(Some(8), 5);
1089        assert_eq!(parse_block_control_word(cw), (Some(8), 5));
1090
1091        let cw = create_block_control_word(Some(4), 6);
1092        assert_eq!(parse_block_control_word(cw), (Some(4), 6))
1093    }
1094}