1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::hash::{Hash, Hasher};
4use std::io;
5
6use bytes::{Buf, BufMut, Bytes, BytesMut};
7use itertools::Either;
8use thiserror::Error;
9use tokio::io::{AsyncRead, AsyncReadExt};
10
11use crate::{
12 util::{find_common_prefix, find_common_prefix_ord},
13 vbyte::{self, encode_array},
14};
15
16pub const BLOCK_SIZE: usize = 8;
17
18#[derive(Debug, Error)]
19pub enum SizedDictError {
20 #[error("invalid coding")]
21 InvalidCoding,
22 #[error("not enough data")]
23 NotEnoughData,
24}
25
26#[derive(Debug, Error)]
27pub enum SizedDictReaderError {
28 #[error(transparent)]
29 Io(#[from] io::Error),
30 #[error(transparent)]
31 SizedDictError(#[from] SizedDictError),
32}
33
34impl SizedDictReaderError {
35 pub fn is_unexpected_eof(&self) -> bool {
36 match self {
37 Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof,
38 Self::SizedDictError(SizedDictError::NotEnoughData) => true,
39 _ => false,
40 }
41 }
42}
43
44#[derive(Clone, Debug, PartialEq)]
45pub struct SizedBlockHeader {
46 head: Bytes,
47 num_entries: u8,
48 buffer_length: usize,
49 sizes: [usize; BLOCK_SIZE - 1],
50 shareds: [usize; BLOCK_SIZE - 1],
51}
52
53impl From<vbyte::DecodeError> for SizedDictError {
54 fn from(e: vbyte::DecodeError) -> Self {
55 match e {
56 vbyte::DecodeError::UnexpectedEndOfBuffer => Self::NotEnoughData,
57 _ => Self::InvalidCoding,
58 }
59 }
60}
61
62impl From<vbyte::DecodeReaderError> for SizedDictReaderError {
63 fn from(value: vbyte::DecodeReaderError) -> Self {
64 match value {
65 vbyte::DecodeReaderError::Io(e) => SizedDictReaderError::Io(e),
66 vbyte::DecodeReaderError::DecodeError(e) => Self::SizedDictError(e.into()),
67 }
68 }
69}
70
71impl SizedBlockHeader {
72 pub fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
73 let cw = buf.get_u8();
74
75 let (record_size, num_entries) = parse_block_control_word(cw);
76 let mut sizes = [0_usize; BLOCK_SIZE - 1];
77 let mut shareds = [0_usize; BLOCK_SIZE - 1];
78 let (first_size, _) = vbyte::decode_buf(buf)?;
79
80 let head = buf.split_to(first_size as usize);
81
82 for i in 0..(num_entries - 1) as usize {
83 let (shared, _) = vbyte::decode_buf(buf)?;
84 let size = if record_size == None {
85 let (size, _) = vbyte::decode_buf(buf)?;
86 size
87 } else {
88 record_size.unwrap() as u64 - shared
89 };
90 sizes[i] = size as usize;
91 shareds[i] = shared as usize;
92 }
93
94 let buffer_length = sizes.iter().sum();
95
96 Ok(Self {
97 head,
98 num_entries,
99 buffer_length,
100 sizes,
101 shareds,
102 })
103 }
104
105 pub async fn parse_from_reader<R: AsyncRead + Unpin>(
106 mut reader: R,
107 ) -> Result<Self, SizedDictReaderError> {
108 let cw = reader.read_u8().await?;
109
110 let (record_size, num_entries) = parse_block_control_word(cw);
111 let mut sizes = [0_usize; BLOCK_SIZE - 1];
112 let mut shareds = [0_usize; BLOCK_SIZE - 1];
113 let (first_size, _) = vbyte::decode_reader(&mut reader).await?;
114
115 let mut head = vec![0; first_size as usize];
116 reader.read_exact(&mut head).await?;
117
118 for i in 0..(num_entries - 1) as usize {
119 let (shared, _) = vbyte::decode_reader(&mut reader).await?;
120 let size = if record_size == None {
121 let (size, _) = vbyte::decode_reader(&mut reader).await?;
122 size
123 } else {
124 record_size.unwrap() as u64 - shared
125 };
126 sizes[i] = size as usize;
127 shareds[i] = shared as usize;
128 }
129
130 let buffer_length = sizes.iter().sum();
131
132 Ok(Self {
133 head: Bytes::from(head),
134 num_entries,
135 buffer_length,
136 sizes,
137 shareds,
138 })
139 }
140}
141
142#[derive(Clone, Debug)]
143pub enum SizedDictEntry {
144 Single(Bytes),
145 Rope(Vec<Bytes>),
146}
147
148impl From<Bytes> for SizedDictEntry {
149 fn from(val: Bytes) -> Self {
150 Self::Single(val)
151 }
152}
153
154impl From<Vec<Bytes>> for SizedDictEntry {
155 fn from(val: Vec<Bytes>) -> Self {
156 Self::Rope(val)
157 }
158}
159
160impl SizedDictEntry {
161 pub fn new(parts: Vec<Bytes>) -> Self {
162 Self::Rope(parts)
163 }
164
165 pub fn new_optimized(parts: Vec<Bytes>) -> Self {
166 let mut entry = Self::new(parts);
167 entry.optimize();
168
169 entry
170 }
171
172 pub fn to_bytes(&self) -> Bytes {
173 match self {
174 Self::Single(b) => b.clone(),
175 Self::Rope(v) => {
176 if v.len() == 1 {
177 v[0].clone()
178 } else {
179 let mut buf = BytesMut::with_capacity(self.len());
180 for slice in v.iter() {
181 buf.extend_from_slice(&slice[..]);
182 }
183
184 buf.freeze()
185 }
186 }
187 }
188 }
189
190 pub fn chunks(&self) -> impl Iterator<Item = &Bytes> {
191 match self {
192 Self::Single(b) => Either::Left(std::iter::once(b)),
193 Self::Rope(v) => Either::Right(v.iter()),
194 }
195 }
196
197 pub fn into_chunks(self) -> impl Iterator<Item = Bytes> {
198 match self {
199 Self::Single(b) => Either::Left(std::iter::once(b)),
200 Self::Rope(v) => Either::Right(v.into_iter()),
201 }
202 }
203
204 pub fn to_vec(&self) -> Vec<u8> {
205 let mut v = Vec::with_capacity(self.len());
206
207 for slice in self.chunks() {
208 v.extend_from_slice(slice);
209 }
210
211 v
212 }
213
214 pub fn as_buf(&self) -> SizedDictEntryBuf {
215 SizedDictEntryBuf {
216 entry: Cow::Borrowed(self),
217 slice_ix: 0,
218 pos_in_slice: 0,
219 }
220 }
221
222 pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
223 SizedDictEntryBuf {
224 entry: Cow::Owned(self),
225 slice_ix: 0,
226 pos_in_slice: 0,
227 }
228 }
229
230 pub fn len(&self) -> usize {
231 self.chunks().map(|s| s.len()).sum()
232 }
233
234 fn rope_len(&self) -> usize {
235 match self {
236 Self::Single(_) => 1,
237 Self::Rope(v) => v.len(),
238 }
239 }
240
241 pub fn optimize(&mut self) {
247 let overhead_size = std::mem::size_of::<Bytes>() * self.rope_len();
248
249 if std::mem::size_of::<Bytes>() + self.len() < overhead_size {
250 let mut bytes = BytesMut::with_capacity(self.len());
251 for part in self.chunks() {
252 bytes.extend(part);
253 }
254
255 *self = Self::Single(bytes.freeze());
256 }
257 }
258
259 pub fn buf_eq<B: Buf>(&self, mut b: B) -> bool {
260 if self.len() != b.remaining() {
261 false
262 } else if self.len() == 0 {
263 true
264 } else {
265 let mut it = self.chunks();
266 let mut part = it.next().unwrap();
267 loop {
268 let slice = b.chunk();
269
270 match part.len().cmp(&slice.len()) {
271 Ordering::Less => {
272 if part.as_ref() != &slice[..part.len()] {
273 return false;
274 }
275 }
276 Ordering::Equal => {
277 if part != slice {
278 return false;
279 }
280
281 assert!(it.next().is_none());
282 return true;
283 }
284 Ordering::Greater => {
285 panic!("This should never happen because it'd mean our entry is larger than the buffer passed in, but we already checked to make sure that is not the case");
286 }
287 }
288
289 b.advance(part.len());
290 part = it.next().unwrap();
291 }
292 }
293 }
294}
295
296impl PartialEq for SizedDictEntry {
297 fn eq(&self, other: &Self) -> bool {
298 if self.len() != other.len() {
300 return false;
301 }
302
303 self.cmp(other) == Ordering::Equal
304 }
305}
306
307impl Eq for SizedDictEntry {}
308
309impl Hash for SizedDictEntry {
310 fn hash<H: Hasher>(&self, state: &mut H) {
311 for part in self.chunks() {
312 state.write(part);
313 }
314 }
315}
316
317impl Ord for SizedDictEntry {
318 fn cmp(&self, other: &Self) -> Ordering {
319 if self.len() == 0 && other.len() == 0 {
321 return Ordering::Equal;
322 }
323
324 let mut it1 = self.chunks();
325 let mut it2 = other.chunks();
326 let mut part1 = Cow::Borrowed(it1.next().unwrap());
327 let mut part2 = Cow::Borrowed(it2.next().unwrap());
328
329 loop {
330 match part1.len().cmp(&part2.len()) {
331 Ordering::Equal => {
332 match part1.cmp(&part2) {
333 Ordering::Less => return Ordering::Less,
334 Ordering::Greater => return Ordering::Greater,
335 Ordering::Equal => {}
336 }
337
338 let p1_next = it1.next();
339 let p2_next = it2.next();
340
341 if let (Some(p1), Some(p2)) = (p1_next, p2_next) {
342 part1 = Cow::Borrowed(p1);
343 part2 = Cow::Borrowed(p2);
344 } else if p1_next.is_none() && p2_next.is_none() {
345 return Ordering::Equal;
347 } else if p1_next.is_none() {
348 return Ordering::Less;
351 } else {
352 return Ordering::Greater;
353 }
354 }
355 Ordering::Less => {
356 let part2_slice = Cow::Owned(part2.slice(0..part1.len()));
357 match part1.cmp(&part2_slice) {
358 Ordering::Less => return Ordering::Less,
359 Ordering::Greater => return Ordering::Greater,
360 Ordering::Equal => {}
361 }
362
363 part2 = Cow::Owned(part2.slice(part1.len()..));
364 let part1_option = it1.next();
365 if part1_option.is_none() {
366 return Ordering::Less;
367 }
368 part1 = Cow::Borrowed(part1_option.unwrap());
369 }
370 Ordering::Greater => {
371 let part1_slice = part1.slice(0..part2.len());
372 match part1_slice.cmp(&part2) {
373 Ordering::Less => return Ordering::Less,
374 Ordering::Greater => return Ordering::Greater,
375 Ordering::Equal => {}
376 }
377
378 part1 = Cow::Owned(part1.slice(part2.len()..));
379 let part2_option = it2.next();
380 if part2_option.is_none() {
381 return Ordering::Greater;
382 }
383 part2 = Cow::Owned(part2_option.unwrap().clone());
384 }
385 }
386 }
387 }
388}
389
390impl PartialOrd for SizedDictEntry {
391 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
392 Some(self.cmp(other))
393 }
394}
395
396#[derive(Clone)]
397pub struct SizedDictEntryBuf<'a> {
398 entry: Cow<'a, SizedDictEntry>,
399 slice_ix: usize,
400 pos_in_slice: usize,
401}
402
403impl<'a> SizedDictEntryBuf<'a> {
404 fn current_slice(&self) -> &Bytes {
405 match self.entry.as_ref() {
406 SizedDictEntry::Single(b) => &b,
407 SizedDictEntry::Rope(v) => &v[self.slice_ix],
408 }
409 }
410}
411
412impl<'a> Buf for SizedDictEntryBuf<'a> {
413 fn remaining(&self) -> usize {
414 {
415 let pos_in_slice = self.pos_in_slice;
416 let total: usize = self
417 .entry
418 .chunks()
419 .skip(self.slice_ix)
420 .map(|s| s.len())
421 .sum();
422 total - pos_in_slice
423 }
424 }
425
426 fn chunk(&self) -> &[u8] {
427 {
428 let pos_in_slice = self.pos_in_slice;
429 if self.slice_ix >= self.entry.rope_len() {
430 &[]
431 } else {
432 let slice = self.current_slice();
433 &slice[pos_in_slice..]
434 }
435 }
436 }
437
438 fn advance(&mut self, cnt: usize) {
439 {
440 let mut cnt = cnt;
441 if self.slice_ix < self.entry.rope_len() {
442 let slice = self.current_slice();
443 let remaining_in_slice = slice.len() - self.pos_in_slice;
444
445 if remaining_in_slice > cnt {
446 self.pos_in_slice += cnt;
448 } else {
449 cnt -= remaining_in_slice;
451 self.slice_ix += 1;
452
453 loop {
454 if self.entry.rope_len() >= self.slice_ix {
455 self.pos_in_slice = 0;
457 break;
458 }
459
460 let slice_len = self.current_slice().len();
461
462 if cnt < slice_len {
463 self.pos_in_slice = cnt;
465 break;
466 }
467
468 cnt -= self.entry.rope_len();
470 self.slice_ix += 1;
471 }
472 }
473 }
474 }
475 }
476}
477
478pub type OwnedSizedDictEntryBuf = SizedDictEntryBuf<'static>;
479
480#[derive(Debug)]
481pub struct SizedDictBlock {
482 header: SizedBlockHeader,
483 data: Bytes,
484}
485
486impl SizedDictBlock {
487 pub fn parse(bytes: &mut Bytes) -> Result<Self, SizedDictError> {
488 let header = SizedBlockHeader::parse(bytes)?;
489 if bytes.remaining() < header.buffer_length {
490 return Err(SizedDictError::NotEnoughData);
491 }
492
493 let data = bytes.split_to(header.buffer_length);
494
495 Ok(Self { header, data })
496 }
497
498 pub async fn parse_from_reader<R: AsyncRead + Unpin>(
499 mut reader: R,
500 ) -> Result<Self, SizedDictReaderError> {
501 let header = SizedBlockHeader::parse_from_reader(&mut reader).await?;
502 let mut data = vec![0; header.buffer_length];
503 reader.read_exact(&mut data).await?;
504
505 Ok(Self {
506 header,
507 data: Bytes::from(data),
508 })
509 }
510
511 pub fn num_entries(&self) -> u8 {
512 self.header.num_entries
513 }
514
515 pub fn is_incomplete(&self) -> bool {
516 self.header.num_entries != BLOCK_SIZE as u8
517 }
518
519 pub fn entry(&self, index: usize) -> SizedDictEntry {
520 if index == 0 {
521 return SizedDictEntry::new(vec![self.header.head.clone()]);
522 }
523
524 let mut v = Vec::with_capacity(7);
525 let mut last = self.header.shareds[index - 1];
526 if last != 0 {
527 v.push(last);
528 }
529 if last != 0 {
530 for i in (0..index - 1).rev() {
531 let shared = self.header.shareds[i];
532 if shared == 0 {
533 break;
534 }
535
536 if shared < last {
537 v.push(shared);
538 last = shared;
539 } else {
540 v.push(last);
541 }
542 }
543 }
544
545 let start = index - v.len();
546
547 let mut taken = 0;
548 let mut slices = Vec::with_capacity(v.len() + 1);
549
550 let mut offset: usize;
551 if start == 0 {
552 offset = 0;
553 } else {
554 offset = self.header.sizes.iter().take(start - 1).sum();
555 }
556 for (ix, shared) in v.iter().rev().enumerate() {
557 let have_to_take = shared - taken;
558 let cur_offset = offset;
559
560 if !(ix == 0 && start == 0) {
561 offset += self.header.sizes[start + ix - 1];
563 }
564
565 if have_to_take == 0 {
566 continue;
567 }
568
569 let slice;
570 if ix == 0 && start == 0 {
571 slice = self.header.head.slice(..have_to_take);
573 } else {
574 slice = self.data.slice(cur_offset..cur_offset + have_to_take);
575 }
576 slices.push(slice);
577 taken += have_to_take;
578 }
579
580 let suffix_size = self.header.sizes[index - 1];
581 slices.push(self.data.slice(offset..offset + suffix_size));
582
583 SizedDictEntry::new_optimized(slices)
584 }
585
586 fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
587 let head = Some(self.header.head.clone());
588 let mut offset = 0;
589 let tail = self.header.sizes.iter().map(move |s| {
590 let slice = self.data.slice(offset..*s + offset);
591 offset += s;
592
593 slice
594 });
595
596 head.into_iter().chain(tail)
597 }
598
599 pub fn id(&self, slice: &[u8]) -> IdLookupResult {
600 let (mut common_prefix, ordering) = find_common_prefix_ord(slice, &self.header.head);
601 match ordering {
602 Ordering::Equal => return IdLookupResult::Found(0),
603 Ordering::Less => return IdLookupResult::NotFound,
604 Ordering::Greater => {}
606 }
607
608 for (ix, (shared, suffix)) in self
609 .header
610 .shareds
611 .iter()
612 .zip(self.suffixes().skip(1))
613 .enumerate()
614 {
615 if *shared < common_prefix {
616 return IdLookupResult::Closest(ix as u64);
617 } else if *shared > common_prefix {
618 continue;
619 }
620
621 let (new_common_prefix, ordering) =
622 find_common_prefix_ord(&slice[common_prefix..], &suffix[..]);
623 match ordering {
624 Ordering::Equal => return IdLookupResult::Found(ix as u64 + 1),
625 Ordering::Less => return IdLookupResult::Closest(ix as u64),
626 Ordering::Greater => {
627 common_prefix += new_common_prefix;
628 }
629 }
630 }
631
632 IdLookupResult::Closest(self.header.num_entries as u64 - 1)
633 }
634
635 pub fn iter<'a>(&'a self) -> SizedBlockIterator<'a> {
636 SizedBlockIterator {
637 header: Cow::Borrowed(&self.header),
638 data: self.data.clone(),
639 ix: 0,
640 last: None,
641 }
642 }
643
644 pub fn into_iter(self) -> OwnedSizedBlockIterator {
645 SizedBlockIterator {
646 header: Cow::Owned(self.header),
647 data: self.data.clone(),
648 ix: 0,
649 last: None,
650 }
651 }
652}
653
654pub type OwnedSizedBlockIterator = SizedBlockIterator<'static>;
655
656#[derive(Clone)]
657pub struct SizedBlockIterator<'a> {
658 header: Cow<'a, SizedBlockHeader>,
659 data: Bytes,
660 ix: usize,
661 last: Option<Vec<Bytes>>,
662}
663
664impl<'a> Iterator for SizedBlockIterator<'a> {
665 type Item = SizedDictEntry;
666
667 fn next(&mut self) -> Option<SizedDictEntry> {
668 if let Some(last) = self.last.as_mut() {
669 if self.ix >= self.header.num_entries as usize - 1 {
670 return None;
671 }
672 let size = self.header.sizes[self.ix];
673 let mut shared = self.header.shareds[self.ix];
674 for rope_index in 0..last.len() {
675 let x = &mut last[rope_index];
676 if x.len() < shared {
677 shared -= x.len();
678 continue;
679 }
680
681 x.truncate(shared);
682 last.truncate(rope_index + 1);
683 break;
684 }
685
686 last.push(self.data.split_to(size));
687 self.ix += 1;
688
689 Some(SizedDictEntry::new_optimized(last.clone()))
690 } else {
691 let mut last = Vec::with_capacity(BLOCK_SIZE);
692 last.push(self.header.head.clone());
693 let result = last.clone();
694 self.last = Some(last);
695 Some(SizedDictEntry::new_optimized(result))
696 }
697 }
698}
699
700#[derive(Clone, Copy, Debug, PartialEq, Eq)]
701pub enum IdLookupResult {
702 Found(u64),
703 Closest(u64),
704 NotFound,
705}
706
707impl IdLookupResult {
708 pub fn offset(self, offset: u64) -> Self {
709 match self {
710 Self::Found(i) => Self::Found(i + offset),
711 Self::Closest(i) => Self::Closest(i + offset),
712 Self::NotFound => Self::NotFound,
713 }
714 }
715
716 pub fn default(self, default: u64) -> Self {
717 match self {
718 Self::NotFound => Self::Closest(default),
719 _ => self,
720 }
721 }
722
723 pub fn map<F: Fn(u64) -> u64>(self, f: F) -> Self {
724 match self {
725 Self::Found(i) => Self::Found(f(i)),
726 Self::Closest(i) => Self::Closest(f(i)),
727 Self::NotFound => Self::NotFound,
728 }
729 }
730
731 pub fn into_option(self) -> Option<u64> {
732 match self {
733 Self::Found(i) => Some(i),
734 _ => None,
735 }
736 }
737}
738
739pub fn parse_block_control_records(cw: u8) -> u8 {
740 parse_block_control_word(cw).1
741}
742
743pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
744 let records = (cw & ((1 << 3) - 1)) + 1;
745 let record_size = record_size_decoding(cw);
746 (record_size, records)
747}
748
749fn record_size_decoding(enc: u8) -> Option<u8> {
750 match enc >> 3 {
751 0 => None,
752 3 => Some(4),
753 4 => Some(8),
754 _ => panic!("Ok, this is not known"),
755 }
756}
757
758fn record_size_encoding(record_size: Option<u8>) -> u8 {
759 match record_size {
760 None => 0,
761 Some(4) => 3 << 3,
762 Some(8) => 4 << 3,
763 _ => {
764 panic!("This is really bad!")
765 }
766 }
767}
768
769fn create_block_control_word(record_size: Option<u8>, records: u8) -> u8 {
770 records - 1 + record_size_encoding(record_size)
771}
772
773pub(crate) fn build_block_unchecked<B: BufMut>(
774 record_size: Option<u8>,
775 buf: &mut B,
776 slices: &[&[u8]],
777) -> usize {
778 let mut size = 0;
779 let slices_len = slices.len();
780 debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
781 let cw = create_block_control_word(record_size, slices_len as u8);
782 buf.put_u8(cw as u8);
783 size += 1;
784
785 let first = slices[0];
786 let (vbyte, vbyte_len) = encode_array(first.len() as u64);
787
788 buf.put_slice(&vbyte[..vbyte_len]);
790 buf.put_slice(slices[0]);
791 size += vbyte_len + slices[0].len();
792
793 let mut last = first;
794
795 let mut suffixes: Vec<&[u8]> = Vec::with_capacity(slices.len());
796 for i in 1..slices.len() {
797 let cur = slices[i];
798 let common_prefix = find_common_prefix(last, cur);
799 let (vbyte, vbyte_len) = encode_array(common_prefix as u64);
800 buf.put_slice(&vbyte[..vbyte_len]);
801 size += vbyte_len;
802
803 if record_size == None {
804 let suffix_len = cur.len() - common_prefix;
805 let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
806 buf.put_slice(&vbyte[..vbyte_len]);
807 size += vbyte_len;
808 }
809 suffixes.push(&cur[common_prefix..]);
810 last = cur;
811 }
812
813 for suffix in suffixes.into_iter() {
815 buf.put_slice(suffix);
816 size += suffix.len();
817 }
818
819 size
820}
821
822pub fn block_head(mut block: Bytes) -> Result<Bytes, SizedDictError> {
823 block.advance(1);
824 let (size, _) = vbyte::decode_buf(&mut block)?;
825 Ok(block.split_to(size as usize))
826}
827
828#[cfg(test)]
829mod tests {
830 use super::*;
831 use bytes::Buf;
832
833 fn build_block_bytes(strings: &[&[u8]]) -> Bytes {
834 let mut buf = BytesMut::new();
835 build_block_unchecked(None, &mut buf, &strings);
836
837 buf.freeze()
838 }
839
840 fn build_block(strings: &[&[u8]]) -> SizedDictBlock {
841 let mut bytes = build_block_bytes(strings);
842
843 SizedDictBlock::parse(&mut bytes).unwrap()
844 }
845
846 #[test]
847 fn build_and_parse_block() {
848 let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
849
850 let block = build_block(&strings);
851
852 let expected_header = SizedBlockHeader {
853 head: Bytes::copy_from_slice(b"aaaaaa"),
854 num_entries: 5,
855 buffer_length: 11,
856 sizes: [2, 4, 3, 2, 0, 0, 0],
857 shareds: [2, 0, 1, 2, 0, 0, 0],
858 };
859
860 assert_eq!(expected_header, block.header);
861
862 let expected_bytes = b"bbccccdefff";
863 assert_eq!(expected_bytes, &block.data[..]);
864 }
865
866 #[test]
867 fn entry_in_block() {
868 let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
869 let block = build_block(&strings);
870
871 for (ix, string) in strings.iter().enumerate() {
872 assert_eq!(*string, &block.entry(ix).to_vec()[..]);
873 }
874 }
875
876 #[test]
877 fn entry_in_complete_block() {
878 let strings: [&[u8]; 8] = [
879 b"aaaaaa",
880 b"aabb",
881 b"cccc",
882 b"cdef",
883 b"cdff",
884 b"cdffasdf",
885 b"cdffeeee",
886 b"ceeeeeeeeeeeeeee",
887 ];
888 let block = build_block(&strings);
889
890 for (ix, string) in strings.iter().enumerate() {
891 assert_eq!(*string, &block.entry(ix).to_vec()[..]);
892 }
893 }
894
895 #[test]
896 fn entry_buf_in_complete_block() {
897 let strings: [&[u8]; 8] = [
898 b"aaaaaa",
899 b"aabb",
900 b"cccc",
901 b"cdef",
902 b"cdff",
903 b"cdffasdf",
904 b"cdffeeee",
905 b"ceeeeeeeeeeeeeee",
906 ];
907 let block = build_block(&strings);
908
909 for (ix, string) in strings.iter().enumerate() {
910 let entry = block.entry(ix);
911 let mut buf = entry.as_buf();
912 let len = buf.remaining();
913 let bytes = buf.copy_to_bytes(len);
914 assert_eq!(*string, &bytes[..]);
915 }
916 }
917
918 #[test]
919 fn entry_owned_buf_in_complete_block() {
920 let strings: [&[u8]; 8] = [
921 b"aaaaaa",
922 b"aabb",
923 b"cccc",
924 b"cdef",
925 b"cdff",
926 b"cdffasdf",
927 b"cdffeeee",
928 b"ceeeeeeeeeeeeeee",
929 ];
930 let block = build_block(&strings);
931
932 for (ix, string) in strings.iter().enumerate() {
933 let mut buf = block.entry(ix).into_buf();
934 let len = buf.remaining();
935 let bytes = buf.copy_to_bytes(len);
936 assert_eq!(*string, &bytes[..]);
937 }
938 }
939
940 #[test]
941 fn head_from_complete_block() {
942 let strings: [&[u8]; 8] = [
943 b"aaaaaa",
944 b"aabb",
945 b"cccc",
946 b"cdef",
947 b"cdff",
948 b"cdffasdf",
949 b"cdffeeee",
950 b"ceeeeeeeeeeeeeee",
951 ];
952 let block = build_block_bytes(&strings);
953 let head = block_head(block).unwrap();
954
955 assert_eq!(b"aaaaaa", &head[..]);
956 }
957
958 #[test]
959 fn slices_iter() {
960 let strings: [&[u8]; 8] = [
961 b"aaaaaa",
962 b"aabb",
963 b"cccc",
964 b"cdef",
965 b"cdff",
966 b"cdffasdf",
967 b"cdffeeee",
968 b"ceeeeeeeeeeeeeee",
969 ];
970 let block = build_block(&strings);
971
972 let expected_slices: Vec<&[u8]> = vec![
973 b"aaaaaa",
974 b"bb",
975 b"cccc",
976 b"def",
977 b"ff",
978 b"asdf",
979 b"eeee",
980 b"eeeeeeeeeeeeeee",
981 ];
982
983 let expected_bytes: Vec<_> = expected_slices
984 .into_iter()
985 .map(|b| Bytes::from(b))
986 .collect();
987
988 let actual: Vec<_> = block.suffixes().collect();
989
990 assert_eq!(expected_bytes, actual);
991 }
992
993 #[test]
994 fn block_id_lookup() {
995 let strings: [&[u8]; 8] = [
996 b"aaaaaa",
997 b"aabb",
998 b"cccc",
999 b"cdef",
1000 b"cdff",
1001 b"cdffasdf",
1002 b"cdffeeee",
1003 b"ceeeeeeeeeeeeeee",
1004 ];
1005 let block = build_block(&strings);
1006
1007 for (ix, string) in strings.iter().enumerate() {
1008 let index = block.id(string);
1009 assert_eq!(IdLookupResult::Found(ix as u64), index);
1010 }
1011 }
1012
1013 #[test]
1014 fn block_id_lookup_nonmatches() {
1015 let strings: [&[u8]; 8] = [
1016 b"aaaaaa",
1017 b"aabb",
1018 b"cccc",
1019 b"cdef",
1020 b"cdff",
1021 b"cdffasdf",
1022 b"cdffeeee",
1023 b"ceeeeeeeeeeeeeee",
1024 ];
1025 let block = build_block(&strings);
1026
1027 assert_eq!(IdLookupResult::NotFound, block.id(b"aa"));
1028
1029 assert_eq!(IdLookupResult::Closest(0), block.id(b"aaab"));
1030
1031 assert_eq!(IdLookupResult::Closest(1), block.id(b"aabba"));
1032
1033 assert_eq!(IdLookupResult::Closest(7), block.id(b"f"));
1034 }
1035
1036 #[test]
1037 fn enumerate_complete_block() {
1038 let strings: [&[u8]; 8] = [
1039 b"aaaaaa",
1040 b"aabb",
1041 b"cccc",
1042 b"cdef",
1043 b"cdff",
1044 b"cdffasdf",
1045 b"cdffeeee",
1046 b"ceeeeeeeeeeeeeee",
1047 ];
1048 let block = build_block(&strings);
1049
1050 let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
1051 assert_eq!(
1052 strings
1053 .iter()
1054 .cloned()
1055 .map(Bytes::from_static)
1056 .collect::<Vec<_>>(),
1057 result
1058 );
1059 }
1060
1061 #[test]
1062 fn enumerate_incomplete_block() {
1063 let strings: [&[u8]; 6] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff", b"cdffasdf"];
1064 let block = build_block(&strings);
1065
1066 let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
1067 assert_eq!(
1068 strings
1069 .iter()
1070 .cloned()
1071 .map(Bytes::from_static)
1072 .collect::<Vec<_>>(),
1073 result
1074 );
1075 }
1076
1077 #[test]
1078 fn control_word_round_trip() {
1079 let cw = create_block_control_word(None, 1);
1080 assert_eq!(parse_block_control_word(cw), (None, 1));
1081
1082 let cw = create_block_control_word(None, 8);
1083 assert_eq!(parse_block_control_word(cw), (None, 8));
1084
1085 let cw = create_block_control_word(None, 3);
1086 assert_eq!(parse_block_control_word(cw), (None, 3));
1087
1088 let cw = create_block_control_word(Some(8), 5);
1089 assert_eq!(parse_block_control_word(cw), (Some(8), 5));
1090
1091 let cw = create_block_control_word(Some(4), 6);
1092 assert_eq!(parse_block_control_word(cw), (Some(4), 6))
1093 }
1094}