block_array_cow/
lib.rs

1// Apache License, Version 2.0
2// (c) Blender Foundation, 2016
3//     Campbell Barton, 2017
4
5//! Array storage to minimize duplication.
6//!
7//! This is done by splitting arrays into chunks and using copy-on-write (COW),
8//! to de-duplicate chunks,
9//! from the users perspective this is an implementation detail.
10//!
11//! # Overview
12//!
13//! ## Data Structure
14//!
15//! This diagram is an overview of the structure of a single array-store.
16//!
17//! note: The only 2 structures here which are referenced externally are the.
18//!
19//! * `BArrayStore`: The whole array store.
20//! * `BArrayState`: Represents a single state (array) of data.
21//!   These can be add using a reference state,
22//!   while this could be considered the previous or parent state.
23//!   no relationship is kept,
24//!   so the caller is free to add any state from the same `BArrayStore` as a reference.
25//!
26//! ```.text
27//! <+> BArrayStore: root data-structure,
28//!  |  can store many 'states', which share memory.
29//!  |
30//!  |  This can store many arrays, however they must share the same 'stride'.
31//!  |  Arrays of different types will need to use a new BArrayStore.
32//!  |
33//!  +- <+> states (Collection of BArrayState's):
34//!  |   |  Each represents an array added by the user of this API.
35//!  |   |  and references a chunk_list (each state is a chunk_list user).
36//!  |   |  Note that the list order has no significance.
37//!  |   |
38//!  |   +- <+> chunk_list (BChunkList):
39//!  |       |  The chunks that make up this state.
40//!  |       |  Each state is a chunk_list user,
41//!  |       |  avoids duplicating lists when there is no change between states.
42//!  |       |
43//!  |       +- chunk_refs (List of BChunkRef): Each chunk_ref links to a a BChunk.
44//!  |          Each reference is a chunk user,
45//!  |          avoids duplicating smaller chunks of memory found in multiple states.
46//!  |
47//!  +- info (BArrayInfo):
48//!  |  Sizes and offsets for this array-store.
49//!  |  Also caches some variables for reuse.
50//!  |
51//!  +- <+> memory (BArrayMemory):
52//!      |  Memory pools for storing BArrayStore data.
53//!      |
54//!      +- chunk_list (Pool of BChunkList):
55//!      |  All chunk_lists, (reference counted, used by BArrayState).
56//!      |
57//!      +- chunk_ref (Pool of BChunkRef):
58//!      |  All chunk_refs (link between BChunkList & BChunk).
59//!      |
60//!      +- chunks (Pool of BChunk):
61//!         All chunks, (reference counted, used by BChunkList).
62//!         These have their headers hashed for reuse so we can quickly check for duplicates.
63//! ```
64//!
65//!
66//! ### De-Duplication
67//!
68//! When creating a new state, a previous state can be given as a reference,
69//! matching chunks from this state are re-used in the new state.
70//!
71//! First matches at either end of the array are detected.
72//! For identical arrays this is all thats needed.
73//!
74//! De-duplication is performed on any remaining chunks,
75//! by hashing the first few bytes of the chunk
76//! (see: `BCHUNK_HASH_TABLE_ACCUMULATE_STEPS`).
77//!
78//! \note This is cached for reuse since the referenced data never changes.
79//!
80//! An array is created to store hash values at every 'stride',
81//! then stepped over to search for matching chunks.
82//!
83//! Once a match is found, there is a high chance next chunks match too,
84//! so this is checked to avoid performing so many hash-lookups.
85//! Otherwise new chunks are created.
86//!
87//! # Example
88//!
89//! ```
90//! let mut bs = block_array_cow::BArrayStore::new(1, 8);
91//! let data_src_a = b"The quick brown fox jumps over the lazy dog";
92//! let data_src_b = b"The quick brown fox almost jumps over the lazy dog";
93//! let data_src_c = b"The little quick brown fox jumps over the lazy dog!";
94//!
95//! let state_a = bs.state_add(data_src_a, None);
96//! let state_b = bs.state_add(data_src_b, Some(state_a));
97//! let state_c = bs.state_add(data_src_c, Some(state_b));
98//!
99//! // Check the data is stored correctly
100//! let data_dst = block_array_cow::BArrayStore::state_data_get_alloc(state_a);
101//! assert_eq!(&data_src_a[..], &data_dst[..]);
102//!
103//! let data_dst = block_array_cow::BArrayStore::state_data_get_alloc(state_b);
104//! assert_eq!(&data_src_b[..], &data_dst[..]);
105//!
106//! let data_dst = block_array_cow::BArrayStore::state_data_get_alloc(state_c);
107//! assert_eq!(&data_src_c[..], &data_dst[..]);
108//! ```
109
110
111// -----------------------------------------------------------------------------
112// Constants
113
114/// # Defines
115///
116/// Some of the logic for merging is quite involved,
117/// support disabling some parts of this.
118
119/// Scan first chunks (happy path when beginning of the array matches).
120/// When the array is a perfect match, we can re-use the entire list.
121///
122/// Note that disabling makes some tests fail that check for output-size.
123const USE_FASTPATH_CHUNKS_FIRST: bool = true;
124
125/// Scan last chunks (happy path when end of the array matches).
126/// When the end of the array matches, we can quickly add these chunks.
127///
128/// Note that we will add contiguous matching chunks
129/// so this isn't as useful as `USE_FASTPATH_CHUNKS_FIRST`,
130/// however it avoids adding matching chunks into the lookup table,
131/// so creating the lookup table won't be as expensive.
132const USE_FASTPATH_CHUNKS_LAST: bool = USE_FASTPATH_CHUNKS_FIRST;
133
134/// For arrays of matching length, test that *enough* of the chunks are aligned,
135/// and simply step over both arrays, using matching chunks.
136/// This avoids overhead of using a lookup table for cases
137/// when we can assume they're mostly aligned.
138const USE_ALIGN_CHUNKS_TEST: bool = true;
139
140/// Number of times to propagate hashes back.
141/// Effectively a 'triangle-number'.
142/// so 4 -> 7, 5 -> 10, 6 -> 15... etc.
143const BCHUNK_HASH_TABLE_ACCUMULATE_STEPS: usize = 4;
144
145/// Calculate the key once and reuse it
146const HASH_TABLE_KEY_UNSET: u64 = ::std::u64::MAX;
147const HASH_TABLE_KEY_FALLBACK: u64 = ::std::u64::MAX - 1;
148
149/// How much larger the table is then the total number of chunks.
150const BCHUNK_HASH_TABLE_MUL: usize = 3;
151
152/// Merge too small/large chunks:
153///
154/// Using this means chunks below a threshold will be merged together.
155/// Even though short term this uses more memory,
156/// long term the overhead of maintaining many small chunks is reduced.
157/// This is defined by setting the minimum chunk size
158/// (as a fraction of the regular chunk size).
159///
160/// Chunks may also become too large (when incrementally growing an array),
161/// this also enables chunk splitting.
162const USE_MERGE_CHUNKS: bool = true;
163
164/// `ifdef USE_MERGE_CHUNKS`
165/// Merge chunks smaller then: `(chunk_size / BCHUNK_MIN_SIZE_DIV)`
166///
167const BCHUNK_SIZE_MIN_DIV: usize = 8;
168
169/// Disallow chunks bigger then the regular chunk size scaled by this value
170///
171/// note: must be at least 2!
172/// however, this code runs wont run in tests unless its ~1.1 ugh.
173/// so lower only to check splitting works.
174const BCHUNK_SIZE_MAX_MUL: usize = 2;
175/// USE_MERGE_CHUNKS
176
177/// slow (keep disabled), but handy for debugging
178const USE_VALIDATE_LIST_SIZE: bool = false;
179
180const USE_VALIDATE_LIST_DATA_PARTIAL: bool = false;
181
182const USE_PARANOID_CHECKS: bool = false;
183
184const MEMPOOL_CHUNK_SIZE: usize = 512;
185
186// -----------------------------------------------------------------------------
187// Modules
188
189mod plain_ptr;
190use plain_ptr::{
191    PtrMut,
192    PtrConst,
193
194    null_mut,
195    null_const,
196};
197
198mod mempool_elem;
199use mempool_elem::{
200    MemPool,
201    MemPoolElemUtils,
202};
203
204mod list_base;
205use list_base::{
206    ListBase,
207    ListBaseElemUtils,
208};
209
210use ::std::cmp::{
211    min,
212    max,
213};
214
215/// NOP for now, keep since this may be supported later.
216macro_rules! unlikely {
217    ($body:expr) => {
218        $body
219    }
220}
221
222// -----------------------------------------------------------------------------
223// Internal Structs
224
225type HashKey = u64;
226
227struct BArrayInfo {
228    chunk_stride: usize,
229    // chunk_count: usize, // UNUSED
230
231    // pre-calculated
232    chunk_byte_size: usize,
233    // min/max limits (inclusive)
234    chunk_byte_size_min: usize,
235    chunk_byte_size_max: usize,
236
237    accum_read_ahead_bytes: usize,
238    accum_steps: usize,
239    accum_read_ahead_len: usize,
240}
241
242struct BArrayMemory {
243    state: MemPool<BArrayState>,
244    chunk_list: MemPool<BChunkList>,
245    chunk_ref: MemPool<BChunkRef>,
246    // this needs explicit drop on it's 'data'
247    chunk: MemPool<BChunk>,
248}
249
250///
251/// Main storage for all states
252///
253pub struct BArrayStore {
254    // static data
255    info: BArrayInfo,
256
257    // memory storage
258    memory: BArrayMemory,
259
260    // `BArrayState` may be in any order
261    // (logic should never depend on state order).
262    states: ListBase<BArrayState>,
263}
264
265
266///
267/// A single instance of an array.
268///
269/// This is how external API's hold a reference to an in-memory state,
270/// although the struct is private.
271///
272pub struct BArrayState {
273    // linked list in `BArrayStore.states`
274    next: PtrMut<BArrayState>,
275    prev: PtrMut<BArrayState>,
276
277    // BChunkList's
278    chunk_list: PtrMut<BChunkList>,
279}
280
281struct BChunkList {
282    // BChunkRef's
283    chunk_refs: ListBase<BChunkRef>,
284    // ListBase.count(chunks), store for reuse.
285    chunk_refs_len: usize,
286    // size of all chunks
287    total_size: usize,
288
289    // number of `BArrayState` using this.
290    users: isize,
291}
292
293/// A chunk of an array.
294struct BChunk {
295    data: Vec<u8>,
296
297    // number of `BChunkList` using this.
298    users: isize,
299
300    key: HashKey,
301}
302
303/// Links to store `BChunk` data in `BChunkList.chunks`.
304struct BChunkRef {
305    next: PtrMut<BChunkRef>,
306    prev: PtrMut<BChunkRef>,
307    link: PtrMut<BChunk>,
308}
309
310///
311/// Single linked list used when putting chunks into a temporary table,
312/// used for lookups.
313///
314/// Point to the `BChunkRef`, not the `BChunk`,
315/// to allow talking down the chunks in-order until a mis-match is found,
316/// this avoids having to do so many table lookups.
317///
318struct BTableRef {
319    next: PtrMut<BTableRef>,
320    cref: PtrMut<BChunkRef>,
321}
322
323/// internal structs
324
325
326// -----------------------------------------------------------------------------
327// MemPoolElemUtils impl
328
329macro_rules! mempool_list_elem_impl {
330    ($t:ty) => {
331        impl MemPoolElemUtils for $t {
332            #[inline] fn default_chunk_size() -> usize {
333                MEMPOOL_CHUNK_SIZE
334            }
335            #[inline] fn free_ptr_get(&self) -> *mut Self {
336                return self.next.as_ptr() as usize as *mut Self;
337            }
338            #[inline] fn free_ptr_set(&mut self, ptr: *mut Self) {
339                self.next = ::plain_ptr::PtrMut(ptr as usize as *mut _);
340                self.prev = PtrMut(self);
341            }
342            #[inline] fn free_ptr_test(&self) -> bool {
343                self.prev == PtrConst(self)
344            }
345        }
346    }
347}
348
349mempool_list_elem_impl!(BArrayState);
350mempool_list_elem_impl!(BChunkRef);
351
352impl MemPoolElemUtils for BChunkList {
353    #[inline] fn default_chunk_size() -> usize {
354        MEMPOOL_CHUNK_SIZE
355    }
356    #[inline] fn free_ptr_get(&self) -> *mut Self {
357        return self.chunk_refs.head.as_ptr() as usize as *mut Self;
358    }
359    #[inline] fn free_ptr_set(&mut self, ptr: *mut Self) {
360        self.chunk_refs.head = PtrMut(ptr as usize as *mut _);
361        self.chunk_refs.tail = PtrMut((self as *const _) as usize as *mut _);
362    }
363    #[inline] fn free_ptr_test(&self) -> bool {
364        self.chunk_refs.tail.as_ptr() as usize == (self as *const _ as usize)
365    }
366}
367
368impl MemPoolElemUtils for BChunk {
369    #[inline] fn default_chunk_size() -> usize {
370        MEMPOOL_CHUNK_SIZE
371    }
372    #[inline] fn free_ptr_get(&self) -> *mut Self {
373        return self.users as *mut Self;
374    }
375    #[inline] fn free_ptr_set(&mut self, ptr: *mut Self) {
376        self.users = ptr as isize;
377        self.key = self as *const _ as HashKey;
378    }
379    #[inline] fn free_ptr_test(&self) -> bool {
380        self.key == self as *const _ as HashKey
381    }
382}
383
384
385// -----------------------------------------------------------------------------
386// ListBaseElemUtils impl
387
388macro_rules! list_base_elem_impl {
389    ($t:ty) => {
390        impl ListBaseElemUtils for $t {
391            #[inline] fn next_get(&self) -> PtrMut<Self> { self.next }
392            #[inline] fn prev_get(&self) -> PtrMut<Self> { self.prev }
393            #[inline] fn next_set(&mut self, ptr: PtrMut<Self>) { self.next = ptr; }
394            #[inline] fn prev_set(&mut self, ptr: PtrMut<Self>) { self.prev = ptr; }
395        }
396    }
397}
398
399list_base_elem_impl!(BArrayState);
400list_base_elem_impl!(BChunkRef);
401
402
403// -----------------------------------------------------------------------------
404// Internal API
405
406// put internal API in its own module
407
408/// # Internal BChunk API
409/// []( { )
410
411fn bchunk_new(
412    bs_mem: &mut BArrayMemory, data: Vec<u8>,
413) -> PtrMut<BChunk> {
414    PtrMut(bs_mem.chunk.alloc_elem_from(
415        BChunk {
416            data: data,
417            users: 0,
418            key: HASH_TABLE_KEY_UNSET,
419        }
420    ))
421}
422
423fn bchunk_new_copydata(
424    bs_mem: &mut BArrayMemory, data: &[u8],
425) -> PtrMut<BChunk> {
426    let mut data_copy = Vec::with_capacity(data.len());
427    data_copy.extend_from_slice(data);
428    return bchunk_new(bs_mem, data_copy);
429}
430
431fn bchunk_decref(
432    bs_mem: &mut BArrayMemory, mut chunk: PtrMut<BChunk>,
433) {
434    debug_assert!(chunk.users > 0);
435    if chunk.users == 1 {
436        unsafe { ::std::ptr::drop_in_place(&mut chunk.data) };
437        bs_mem.chunk.free_elem(chunk.as_ptr());
438    } else {
439        chunk.users -= 1;
440    }
441}
442
443fn bchunk_data_compare(
444    chunk: PtrMut<BChunk>,
445    data_base: &[u8],
446    data_base_len: usize,
447    offset: usize,
448) -> bool {
449    if offset + chunk.data.len() <= data_base_len {
450        return &data_base[offset..(offset + chunk.data.len())] == &chunk.data[..];
451    } else {
452        return false;
453    }
454}
455
456/// []( } )
457
458/// # Internal BChunkList API
459/// []( { )
460
461fn bchunk_list_new(
462    bs_mem: &mut BArrayMemory,
463    total_size: usize,
464) -> PtrMut<BChunkList> {
465    PtrMut(bs_mem.chunk_list.alloc_elem_from(
466        BChunkList {
467            chunk_refs: ListBase::new(),
468            chunk_refs_len: 0,
469            total_size: total_size,
470            users: 0,
471        }
472    ))
473}
474
475fn bchunk_list_decref(
476    bs_mem: &mut BArrayMemory, mut chunk_list: PtrMut<BChunkList>,
477) {
478    debug_assert!(chunk_list.users > 0);
479    if chunk_list.users == 1 {
480        let mut cref = chunk_list.chunk_refs.head;
481        while cref != null_mut() {
482            let cref_next = cref.next;
483            bchunk_decref(bs_mem, cref.link);
484            bs_mem.chunk_ref.free_elem(cref.as_ptr());
485            cref = cref_next;
486        }
487
488        bs_mem.chunk_list.free_elem(chunk_list.as_ptr());
489    } else {
490        chunk_list.users -= 1;
491    }
492}
493
494macro_rules! debug_assert_chunklist_size {
495    ($chunk_list:expr, $n:expr) => {
496        {
497            if USE_VALIDATE_LIST_SIZE {
498                debug_assert_eq!(bchunk_list_size($chunk_list), $n)
499            }
500        }
501    }
502}
503
504// USE_VALIDATE_LIST_DATA_PARTIAL
505fn bchunk_list_data_check(
506    chunk_list: PtrMut<BChunkList>, data: &[u8],
507) -> bool {
508    let mut offset = 0;
509    for cref in chunk_list.chunk_refs.iter() {
510        if &data[offset..(offset + cref.link.data.len())] != &cref.link.data[..] {
511            return false;
512        }
513        offset += cref.link.data.len();
514    }
515    return true;
516}
517
518macro_rules! debug_assert_chunklist_data {
519    ($chunk_list:expr, $data:expr) => {
520        {
521            if USE_VALIDATE_LIST_DATA_PARTIAL {
522                debug_assert!(bchunk_list_data_check($chunk_list, $data));
523            }
524        }
525    }
526}
527
528// USE_MERGE_CHUNKS
529fn bchunk_list_ensure_min_size_last(
530    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
531    mut chunk_list: PtrMut<BChunkList>,
532) {
533    let mut cref = chunk_list.chunk_refs.tail;
534    if cref != null_mut() && cref.prev != null_mut() {
535        // both are decref'd after use (end of this block)
536        let chunk_curr: PtrMut<BChunk> = cref.link;
537        let chunk_prev: PtrMut<BChunk> = cref.prev.link;
538
539        if min(chunk_prev.data.len(), chunk_curr.data.len()) < info.chunk_byte_size_min {
540            let data_merge_len = chunk_prev.data.len() + chunk_curr.data.len();
541            // we could pass, but no need
542            if data_merge_len <= info.chunk_byte_size_max {
543                // we have enough space to merge
544
545                // remove last from linklist
546                debug_assert!(chunk_list.chunk_refs.tail != chunk_list.chunk_refs.head);
547                cref.prev.next = null_mut();
548                chunk_list.chunk_refs.tail = cref.prev;
549                chunk_list.chunk_refs_len -= 1;
550
551                let mut data_merge: Vec<u8> = Vec::with_capacity(data_merge_len);
552                data_merge.extend_from_slice(&chunk_prev.data[..]);
553                data_merge.extend_from_slice(&chunk_curr.data[..]);
554
555                cref.prev.link = bchunk_new(bs_mem, data_merge);
556                cref.prev.link.users += 1;
557                bs_mem.chunk_ref.free_elem(cref.as_ptr());
558            } else {
559                // If we always merge small slices,
560                // we should _almost_ never end up having very large chunks.
561                // Gradual expanding on contracting will cause this.
562                //
563                // if we do, the code below works (test by setting 'BCHUNK_SIZE_MAX_MUL = 1.2')
564
565                // keep chunk on the left hand side a regular size
566                let split = info.chunk_byte_size;
567
568                // merge and split
569                let data_prev_len = split;
570                let data_curr_len = data_merge_len - split;
571                let mut data_prev: Vec<u8> = Vec::with_capacity(data_prev_len);
572                let mut data_curr: Vec<u8> = Vec::with_capacity(data_curr_len);
573
574                if data_prev_len <= chunk_prev.data.len() {
575                    // setup 'data_prev'
576                    data_prev.extend_from_slice(&chunk_prev.data[..]);
577
578                    // setup 'data_curr'
579                    data_curr.extend_from_slice(
580                        &chunk_prev.data[data_prev_len..chunk_prev.data.len()]);
581                    data_curr.extend_from_slice(
582                        &chunk_curr.data[..]);
583                } else {
584                    debug_assert!(data_curr_len <= chunk_curr.data.len());
585                    debug_assert!(data_prev_len >= chunk_prev.data.len());
586
587                    let data_prev_grow_len = data_prev_len - chunk_prev.data.len();
588
589                    // setup 'data_prev'
590                    data_prev.extend_from_slice(&chunk_prev.data[..]);
591                    data_prev.extend_from_slice(&chunk_curr.data[0..data_prev_grow_len]);
592
593                    // setup 'data_curr'
594                    data_curr.extend_from_slice(
595                        &chunk_curr.data[data_prev_grow_len..(data_prev_grow_len + data_curr_len)]);
596                }
597
598                debug_assert_eq!(data_prev_len, data_prev.len());
599                debug_assert_eq!(data_curr_len, data_curr.len());
600
601                cref.prev.link = bchunk_new(bs_mem, data_prev);
602                cref.prev.link.users += 1;
603
604                cref.link = bchunk_new(bs_mem, data_curr);
605                cref.link.users += 1;
606            }
607
608            // free zero users
609            bchunk_decref(bs_mem, chunk_curr);
610            bchunk_decref(bs_mem, chunk_prev);
611        }
612    }
613}
614
615/// Return length split into 2 values: (usize, usize)
616///
617/// * `data_trim_len` Length which is aligned to the `BArrayInfo.chunk_byte_size`.
618/// * `data_last_chunk_len` The remaining bytes.
619///
620/// Note: This function ensures the size of `data_last_chunk_len`
621/// is larger than `BArrayInfo.chunk_byte_size_min`.
622fn bchunk_list_calc_trim_len(
623    info: &BArrayInfo, data_len: usize,
624) -> (usize, usize) {
625    let mut data_last_chunk_len: usize;
626    let mut data_trim_len: usize = data_len;
627
628    if USE_MERGE_CHUNKS {
629        // avoid creating too-small chunks
630        // more efficient then merging after
631        if data_len > info.chunk_byte_size {
632            data_last_chunk_len = data_trim_len % info.chunk_byte_size;
633            data_trim_len = data_trim_len - data_last_chunk_len;
634            if data_last_chunk_len != 0 {
635                if data_last_chunk_len < info.chunk_byte_size_min {
636                    // may be zero and thats OK
637                    data_trim_len -= info.chunk_byte_size;
638                    data_last_chunk_len += info.chunk_byte_size;
639                }
640            }
641        } else {
642            data_trim_len = 0;
643            data_last_chunk_len = data_len;
644        }
645
646        debug_assert!((data_trim_len == 0) || (data_trim_len >= info.chunk_byte_size));
647    } else {
648        data_last_chunk_len = data_trim_len % info.chunk_byte_size;
649        data_trim_len = data_trim_len - data_last_chunk_len;
650    }
651
652    debug_assert_eq!(data_trim_len + data_last_chunk_len, data_len);
653
654    (data_trim_len, data_last_chunk_len)
655}
656
657/// Append and don't manage merging small chunks.
658fn bchunk_list_append_only(
659    bs_mem: &mut BArrayMemory,
660    mut chunk_list: PtrMut<BChunkList>, mut chunk: PtrMut<BChunk>,
661) {
662    let cref = PtrMut(bs_mem.chunk_ref.alloc_elem_from(
663        BChunkRef {
664            next: null_mut(),
665            prev: null_mut(),
666            link: chunk,
667        })
668    );
669    chunk_list.chunk_refs.push_back(cref);
670    chunk_list.chunk_refs_len += 1;
671    chunk.users += 1
672}
673
674/// note: This is for writing single chunks,
675/// use `bchunk_list_append_data_n` when writing large blocks of memory into many chunks.
676fn bchunk_list_append_data(
677    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
678    chunk_list: PtrMut<BChunkList>,
679    data: &[u8],
680) {
681    debug_assert!(data.len() != 0);
682
683    if USE_MERGE_CHUNKS {
684        debug_assert!(data.len() <= info.chunk_byte_size_max);
685
686        if !chunk_list.chunk_refs.is_empty() {
687            let mut cref: PtrMut<BChunkRef> = chunk_list.chunk_refs.tail;
688            let chunk_prev: PtrMut<BChunk> = cref.link;
689            if min(chunk_prev.data.len(), data.len()) < info.chunk_byte_size_min {
690                let data_merge_len = chunk_prev.data.len() + data.len();
691                // realloc for single user
692                if cref.link.users == 1 {
693                    cref.link.data.extend_from_slice(data);
694                } else {
695                    let mut data_merge: Vec<u8> = Vec::with_capacity(data_merge_len);
696                    data_merge.extend_from_slice(&chunk_prev.data[..]);
697                    data_merge.extend_from_slice(data);
698                    cref.link = bchunk_new(bs_mem, data_merge);
699                    cref.link.users += 1;
700                    bchunk_decref(bs_mem, chunk_prev);
701                }
702                debug_assert_eq!(data_merge_len, cref.link.data.len());
703                return;
704            }
705        }
706    }
707
708    let chunk: PtrMut<BChunk> = bchunk_new_copydata(bs_mem, data);
709    bchunk_list_append_only(bs_mem, chunk_list, chunk);
710
711    // don't run this, instead preemptively avoid creating a chunk only to merge it (above).
712    if false && USE_MERGE_CHUNKS {
713        bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
714    }
715}
716
717/// Similar to `bchunk_list_append_data`, but handle multiple chunks.
718/// Use for adding arrays of arbitrary sized memory at once.
719///
720/// Note: this function takes care not to perform redundant chunk-merging checks,
721/// so we can write successive fixed size chunks quickly.
722fn bchunk_list_append_data_n(
723    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
724    chunk_list: PtrMut<BChunkList>,
725    data: &[u8],
726) {
727    let (data_trim_len, data_last_chunk_len) = bchunk_list_calc_trim_len(info, data.len());
728
729    if data_trim_len != 0 {
730        let mut i_prev;
731
732        {
733            let i = info.chunk_byte_size;
734            bchunk_list_append_data(info, bs_mem, chunk_list, &data[0..i]);
735            i_prev = i;
736        }
737
738        while i_prev != data_trim_len {
739            let i = i_prev + info.chunk_byte_size;
740            let chunk = bchunk_new_copydata(bs_mem, &data[i_prev..i]);
741            bchunk_list_append_only(bs_mem, chunk_list, chunk);
742            i_prev = i;
743        }
744
745        if data_last_chunk_len != 0 {
746            let chunk = bchunk_new_copydata(
747                bs_mem, &data[i_prev..(i_prev + data_last_chunk_len)]);
748            bchunk_list_append_only(bs_mem, chunk_list, chunk);
749            // i_prev = data.len();  // UNUSED
750        }
751    } else {
752        // if we didn't write any chunks previously,
753        // we may need to merge with the last.
754        if data_last_chunk_len != 0 {
755            debug_assert_eq!(data.len(), data_last_chunk_len);
756            bchunk_list_append_data(info, bs_mem, chunk_list, data);
757            // i_prev = data.len();  // UNUSED
758        }
759    }
760
761    if USE_MERGE_CHUNKS {
762        if data.len() > info.chunk_byte_size {
763            debug_assert!(chunk_list.chunk_refs.tail.link.data.len() >= info.chunk_byte_size_min);
764        }
765    }
766}
767
768fn bchunk_list_append(
769    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
770    chunk_list: PtrMut<BChunkList>,
771    chunk: PtrMut<BChunk>,
772) {
773    bchunk_list_append_only(bs_mem, chunk_list, chunk);
774
775    if USE_MERGE_CHUNKS {
776        bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
777    }
778}
779
780fn bchunk_list_fill_from_array(
781    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
782    chunk_list: PtrMut<BChunkList>,
783    data: &[u8],
784) {
785    debug_assert!(chunk_list.chunk_refs.is_empty());
786    let (data_trim_len, data_last_chunk_len) = bchunk_list_calc_trim_len(info, data.len());
787
788    let mut i_prev = 0;
789    while i_prev != data_trim_len {
790        let i = i_prev + info.chunk_byte_size;
791        let chunk = bchunk_new_copydata(bs_mem, &data[i_prev..i]);
792        bchunk_list_append_only(bs_mem, chunk_list, chunk);
793        i_prev = i;
794    }
795
796    if data_last_chunk_len != 0 {
797        let chunk = bchunk_new_copydata(bs_mem, &data[i_prev..(i_prev + data_last_chunk_len)]);
798        bchunk_list_append_only(bs_mem, chunk_list, chunk);
799        // i_prev = data.len();
800    }
801
802    if USE_MERGE_CHUNKS {
803        if data.len() > info.chunk_byte_size {
804            debug_assert!(chunk_list.chunk_refs.tail.link.data.len() >= info.chunk_byte_size_min);
805        }
806    }
807
808    // works but better avoid redundant re-alloc
809    if false && USE_MERGE_CHUNKS {
810        bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
811    }
812
813    debug_assert_chunklist_size!(chunk_list, data.len());
814    debug_assert_chunklist_data!(chunk_list, data);
815}
816
817
818// ---------------------------------------------------------------------------
819// Internal Table Lookup Functions
820
821/// # Internal Hashing/De-Duplication API
822///
823/// Only used by `bchunk_list_from_data_merge`.
824
825const HASH_INIT: u32 = 5381;
826
827#[inline]
828fn hash_data_single(p: u8) -> u32 {
829    return ((HASH_INIT << 5) + HASH_INIT).wrapping_add((p as i8) as u32);
830}
831
832// hash bytes
833fn hash_data(key: &[u8]) -> u32 {
834    let mut h: u32 = HASH_INIT;
835
836    for p in key {
837        // h = (h << 5) + h + ((*p as i8) as u32);
838        h = h.wrapping_shl(5).wrapping_add(h).wrapping_add((*p as i8) as u32);
839    }
840
841    return h;
842}
843
844fn hash_array_from_data(
845    info: &BArrayInfo, data_slice: &[u8],
846    hash_array: &mut [HashKey],
847) {
848    if info.chunk_stride != 1 {
849        let mut i_step = 0;
850        let mut i = 0;
851        while i_step != data_slice.len() {
852            let i_next = i_step + info.chunk_stride;
853            hash_array[i] = hash_data(&data_slice[i_step..i_next]) as HashKey;
854            i_step = i_next;
855            i += 1;
856        }
857    } else {
858        // fast-path for bytes
859        for i in 0..data_slice.len() {
860            hash_array[i] = hash_data_single(data_slice[i]) as HashKey;
861        }
862    }
863}
864
865/// Similar to `hash_array_from_data`,
866/// but able to step into the next chunk if we run-out of data.
867fn hash_array_from_cref(
868    info: &BArrayInfo, mut cref: PtrMut<BChunkRef>, data_len: usize,
869    hash_array: &mut [HashKey],
870) {
871    let hash_array_len = data_len / info.chunk_stride;
872    let mut i: usize = 0;
873    loop {
874        let mut i_next: usize = hash_array_len - i;
875        let mut data_trim_len = i_next * info.chunk_stride;
876        if data_trim_len > cref.link.data.len() {
877            data_trim_len = cref.link.data.len();
878            i_next = data_trim_len / info.chunk_stride;
879        }
880        debug_assert!(data_trim_len <= cref.link.data.len());
881        hash_array_from_data(
882            info, &cref.link.data[0..data_trim_len], &mut hash_array[i..(i + i_next)]);
883        i += i_next;
884        cref = cref.next;
885
886        if !((i < hash_array_len) && (cref != null_const())) {
887            break;
888        }
889    }
890
891    // If this isn't equal, the caller didn't properly check
892    // that there was enough data left in all chunks
893    debug_assert!(i == hash_array_len);
894}
895
896fn hash_accum(hash_array: &mut [HashKey], hash_array_len: usize, mut iter_steps: usize) {
897    // _very_ unlikely, can happen if you select a chunk-size of 1 for example.
898    if unlikely!(iter_steps > hash_array_len) {
899        iter_steps = hash_array_len;
900    }
901
902    let hash_array_search_len: usize = hash_array_len - iter_steps;
903    while iter_steps != 0 {
904        let hash_offset: usize = iter_steps;
905        for i in 0..hash_array_search_len {
906            hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
907        }
908        iter_steps -= 1;
909    }
910}
911
912/// When we only need a single value, can use a small optimization.
913/// we can avoid accumulating the tail of the array a little, each iteration.
914fn hash_accum_single(hash_array: &mut [HashKey], mut iter_steps: usize) {
915    debug_assert!(iter_steps <= hash_array.len());
916    if unlikely!(!(iter_steps <= hash_array.len())) {
917        // while this shouldn't happen, avoid crashing
918        iter_steps = hash_array.len();
919    }
920    // We can increase this value each step to avoid accumulating quite as much
921    // while getting the same results as hash_accum
922    let mut iter_steps_sub = iter_steps;
923
924    while iter_steps != 0 {
925        let hash_array_search_len: usize = hash_array.len() - iter_steps_sub;
926        let hash_offset: usize = iter_steps;
927        for i in 0..hash_array_search_len {
928            hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
929        }
930        iter_steps -= 1;
931        iter_steps_sub += iter_steps;
932    }
933}
934
935fn key_from_chunk_ref(
936    info: &BArrayInfo, cref: PtrMut<BChunkRef>,
937    // avoid reallocating each time
938    hash_store: &mut [HashKey],
939) -> HashKey {
940    // fill in a reusable array
941    let mut chunk: PtrMut<BChunk> = cref.link;
942    debug_assert_ne!(0, (info.accum_read_ahead_bytes * info.chunk_stride));
943
944    if info.accum_read_ahead_bytes <= chunk.data.len() {
945        let mut key: HashKey = chunk.key;
946
947        if key != HASH_TABLE_KEY_UNSET {
948            // Using key cache!
949            // avoids calculating every time
950        } else {
951            hash_array_from_cref(info, cref, info.accum_read_ahead_bytes, hash_store);
952            hash_accum_single(hash_store, info.accum_steps);
953            key = hash_store[0];
954
955            // cache the key
956            if key == HASH_TABLE_KEY_UNSET {
957                key = HASH_TABLE_KEY_FALLBACK;
958            }
959            chunk.key = key;
960        }
961        return key;
962    } else {
963        // corner case - we're too small, calculate the key each time.
964        hash_array_from_cref(info, cref, info.accum_read_ahead_bytes, hash_store);
965        hash_accum_single(hash_store, info.accum_steps);
966        let mut key: HashKey = hash_store[0];
967
968        if unlikely!(key == HASH_TABLE_KEY_UNSET) {
969            key = HASH_TABLE_KEY_FALLBACK;
970        }
971        return key;
972    }
973}
974
975fn table_lookup(
976    info: &BArrayInfo, table: &Vec<PtrMut<BTableRef>>, table_len: usize, i_table_start: usize,
977    data: &[u8], data_len: usize, offset: usize, table_hash_array: &Vec<HashKey>,
978) -> PtrMut<BChunkRef> {
979    let size_left: usize = data_len - offset;
980    let key: HashKey = table_hash_array[((offset - i_table_start) / info.chunk_stride)];
981    let key_index = (key % (table_len as HashKey)) as usize;
982    let mut tref: PtrMut<BTableRef> = table[key_index];
983    while tref != null_const() {
984        let cref: PtrMut<BChunkRef> = tref.cref;
985        if cref.link.key == key {
986            let chunk_test: PtrMut<BChunk> = cref.link;
987            if chunk_test.data.len() <= size_left {
988                if bchunk_data_compare(chunk_test, data, data_len, offset) {
989                    // we could remove the chunk from the table, to avoid multiple hits
990                    return cref;
991                }
992            }
993        }
994        tref = tref.next;
995    }
996    null_mut()
997}
998
999// End Table Lookup
1000// ----------------
1001
1002/// []( } )
1003
1004/// * `data` Data to store in the returned value.
1005/// * `data_len_original` Length of data in bytes.
1006/// * `chunk_list_reference` Reuse this list or chunks within it, don't modify its content.
1007///
1008/// Note: The caller is responsible for adding the user.
1009fn bchunk_list_from_data_merge(
1010    info: &BArrayInfo, bs_mem: &mut BArrayMemory,
1011    data: &[u8], data_len_original: usize,
1012    chunk_list_reference: PtrMut<BChunkList>,
1013) -> PtrMut<BChunkList> {
1014    debug_assert_chunklist_size!(chunk_list_reference, chunk_list_reference.total_size);
1015
1016    // -----------------------------------------------------------------------
1017    // Fast-Path for exact match
1018    // Check for exact match, if so, return the current list.
1019
1020    let mut cref_match_first: PtrMut<BChunkRef> = null_mut();
1021
1022    let mut chunk_list_reference_skip_len: usize = 0;
1023    let mut chunk_list_reference_skip_bytes: usize = 0;
1024    let mut i_prev = 0;
1025
1026    if USE_FASTPATH_CHUNKS_FIRST {
1027        let mut full_match: bool = true;
1028
1029        let mut cref: PtrMut<BChunkRef> = chunk_list_reference.chunk_refs.head;
1030        while i_prev < data_len_original {
1031            if  cref != null_mut() &&
1032                bchunk_data_compare(cref.link, data, data_len_original, i_prev)
1033            {
1034                cref_match_first = cref;
1035                chunk_list_reference_skip_len += 1;
1036                chunk_list_reference_skip_bytes += cref.link.data.len();
1037                i_prev += cref.link.data.len();
1038                cref = cref.next;
1039            } else {
1040                full_match = false;
1041                break;
1042            }
1043        }
1044
1045        if full_match {
1046            if chunk_list_reference.total_size == data_len_original {
1047                return chunk_list_reference;
1048            }
1049        }
1050    }
1051    // End Fast-Path (first)
1052    // ---------------------
1053
1054    // Copy until we have a mismatch
1055    let chunk_list: PtrMut<BChunkList> = bchunk_list_new(bs_mem, data_len_original);
1056    if cref_match_first != null_const() {
1057        let mut chunk_size_step: usize = 0;
1058        let mut cref: PtrMut<BChunkRef> = chunk_list_reference.chunk_refs.head;
1059        loop {
1060            let chunk: PtrMut<BChunk> = cref.link;
1061            chunk_size_step += chunk.data.len();
1062            bchunk_list_append_only(bs_mem, chunk_list, chunk);
1063            debug_assert_chunklist_size!(chunk_list, chunk_size_step);
1064            debug_assert_chunklist_data!(chunk_list, data);
1065            if cref == cref_match_first {
1066                break;
1067            } else {
1068                cref = cref.next;
1069            }
1070        }
1071        // happens when bytes are removed from the end of the array
1072        if chunk_size_step == data_len_original {
1073            return chunk_list;
1074        }
1075
1076        i_prev = chunk_size_step;
1077    } else {
1078        i_prev = 0;
1079    }
1080
1081    // ------------------------------------------------------------------------
1082    // Fast-Path for end chunks
1083    //
1084    // Check for trailing chunks
1085
1086    // In this case use 'chunk_list_reference_last' to define the last index
1087    // index_match_last = -1
1088
1089    // warning, from now on don't use len(data)
1090    // since we want to ignore chunks already matched
1091    let mut data_len: usize = data_len_original;
1092
1093    let mut chunk_list_reference_last: PtrMut<BChunkRef> = null_mut();
1094
1095    if USE_FASTPATH_CHUNKS_LAST {
1096        if !chunk_list_reference.chunk_refs.is_empty() {
1097            let mut cref: PtrMut<BChunkRef> = chunk_list_reference.chunk_refs.tail;
1098            while
1099                (cref.prev != null_mut()) &&
1100                (cref != cref_match_first) &&
1101                (cref.link.data.len() <= data_len - i_prev)
1102            {
1103                let chunk_test: PtrMut<BChunk> = cref.link;
1104                let offset: usize = data_len - chunk_test.data.len();
1105                if bchunk_data_compare(chunk_test, data, data_len, offset) {
1106                    data_len = offset;
1107                    chunk_list_reference_last = cref;
1108                    chunk_list_reference_skip_len += 1;
1109                    chunk_list_reference_skip_bytes += cref.link.data.len();
1110                    cref = cref.prev;
1111                } else {
1112                    break;
1113                }
1114            }
1115        }
1116    }
1117
1118    // End Fast-Path (last)
1119    // --------------------
1120
1121    // -----------------------------------------------------------------------
1122    // Check for aligned chunks
1123    //
1124    // This saves a lot of searching, so use simple heuristics to detect aligned arrays.
1125    // (may need to tweak exact method).
1126
1127    let mut use_aligned: bool = false;
1128
1129    if USE_ALIGN_CHUNKS_TEST {
1130        if chunk_list.total_size == chunk_list_reference.total_size {
1131            // if we're already a quarter aligned
1132            if data_len - i_prev <= chunk_list.total_size / 4 {
1133                use_aligned = true;
1134            } else {
1135                // TODO, walk over chunks and check if some arbitrary amount align
1136            }
1137        }
1138    }
1139
1140    // End Aligned Chunk Case
1141    // ----------------------
1142
1143    if use_aligned {
1144        // Copy matching chunks, creates using the same 'layout' as the reference
1145        let mut cref: PtrMut<BChunkRef> = {
1146            if cref_match_first != null_mut() {
1147                cref_match_first.next
1148            } else {
1149                chunk_list_reference.chunk_refs.head
1150            }
1151        };
1152        while i_prev != data_len {
1153            let i: usize = i_prev + cref.link.data.len();
1154            debug_assert!(i != i_prev);
1155
1156            if (cref != chunk_list_reference_last) &&
1157                bchunk_data_compare(cref.link, data, data_len, i_prev)
1158            {
1159                bchunk_list_append(info, bs_mem, chunk_list, cref.link);
1160                debug_assert_chunklist_size!(chunk_list, i);
1161                debug_assert_chunklist_data!(chunk_list, data);
1162            } else {
1163                bchunk_list_append_data(info, bs_mem, chunk_list, &data[i_prev..i]);
1164                debug_assert_chunklist_size!(chunk_list, i);
1165                debug_assert_chunklist_data!(chunk_list, data);
1166            }
1167
1168            cref = cref.next;
1169
1170            i_prev = i;
1171        }
1172    } else if
1173        (data_len - i_prev >= info.chunk_byte_size) &&
1174        (chunk_list_reference.chunk_refs_len >= chunk_list_reference_skip_len) &&
1175        (chunk_list_reference.chunk_refs.head != null_mut())
1176    {
1177
1178        // --------------------------------------------------------------------
1179        // Non-Aligned Chunk De-Duplication
1180
1181        // only create a table if we have at least one chunk to search
1182        // otherwise just make a new one.
1183        //
1184        // Support re-arranged chunks
1185
1186        let i_table_start = i_prev;
1187        let table_hash_array_len: usize = (data_len - i_prev) / info.chunk_stride;
1188        let mut table_hash_array: Vec<HashKey> = Vec::with_capacity(table_hash_array_len);
1189        unsafe { table_hash_array.set_len(table_hash_array_len) };
1190
1191        hash_array_from_data(info, &data[i_prev..data_len], &mut table_hash_array[..]);
1192
1193        hash_accum(&mut table_hash_array[..], table_hash_array_len, info.accum_steps);
1194
1195        let chunk_list_reference_remaining_len: usize =
1196            (chunk_list_reference.chunk_refs_len - chunk_list_reference_skip_len) + 1;
1197        let mut table_ref_stack: Vec<BTableRef> =
1198            Vec::with_capacity(chunk_list_reference_remaining_len);
1199
1200        let table_len = chunk_list_reference_remaining_len * BCHUNK_HASH_TABLE_MUL;
1201        let mut table: Vec<PtrMut<BTableRef>> = vec![null_mut(); table_len];
1202
1203        // table_make - inline
1204        // include one matching chunk, to allow for repeating values
1205        {
1206            // all values are filled
1207            let mut hash_store: Vec<HashKey> = Vec::with_capacity(info.accum_read_ahead_len);
1208            unsafe { hash_store.set_len(info.accum_read_ahead_len) };
1209
1210            let mut chunk_list_reference_bytes_remaining: usize =
1211                chunk_list_reference.total_size - chunk_list_reference_skip_bytes;
1212
1213            let mut cref: PtrMut<BChunkRef> = {
1214                if cref_match_first != null_mut() {
1215                    chunk_list_reference_bytes_remaining += cref_match_first.link.data.len();
1216                    cref_match_first
1217                } else {
1218                    chunk_list_reference.chunk_refs.head
1219                }
1220            };
1221
1222            if USE_PARANOID_CHECKS {
1223                let mut test_bytes_len: usize = 0;
1224                let mut cr: PtrMut<BChunkRef> = cref;
1225                while cr != chunk_list_reference_last {
1226                    test_bytes_len += cr.link.data.len();
1227                    cr = cr.next;
1228                }
1229                debug_assert!(test_bytes_len == chunk_list_reference_bytes_remaining);
1230            }
1231
1232            while
1233                (cref != chunk_list_reference_last) &&
1234                (chunk_list_reference_bytes_remaining >= info.accum_read_ahead_bytes)
1235            {
1236                let key: HashKey = key_from_chunk_ref(info, cref, &mut hash_store[..]);
1237                let key_index: usize = (key % table_len as HashKey) as usize;
1238                let tref_prev: PtrMut<BTableRef> = table[key_index];
1239                debug_assert!(table_ref_stack.len() < chunk_list_reference_remaining_len);
1240                table_ref_stack.push(BTableRef { cref: cref, next: tref_prev });
1241                table[key_index] = PtrMut(table_ref_stack.last_mut().unwrap());
1242
1243                chunk_list_reference_bytes_remaining -= cref.link.data.len();
1244                cref = cref.next;
1245            }
1246
1247            debug_assert!(table_ref_stack.len() <= chunk_list_reference_remaining_len);
1248
1249            drop(hash_store);
1250        }
1251        // done making the table
1252
1253        debug_assert!(i_prev <= data_len);
1254        let mut i = i_prev;
1255        while i < data_len {
1256            // Assumes exiting chunk isnt a match!
1257            let mut cref_found: PtrMut<BChunkRef> = table_lookup(
1258                info,
1259                &table, table_len, i_table_start,
1260                data, data_len, i, &table_hash_array);
1261
1262            if cref_found != null_const() {
1263                debug_assert!(i < data_len);
1264                if i != i_prev {
1265                    bchunk_list_append_data_n(info, bs_mem, chunk_list, &data[i_prev..i]);
1266                    i_prev = i;
1267                    if false && i_prev != 0 { } // quiet warning!
1268                }
1269
1270                // now add the reference chunk
1271                {
1272                    let chunk_found: PtrMut<BChunk> = cref_found.link;
1273                    i += chunk_found.data.len();
1274                    bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
1275                }
1276                i_prev = i;
1277                debug_assert!(i_prev <= data_len);
1278                debug_assert_chunklist_size!(chunk_list, i_prev);
1279                debug_assert_chunklist_data!(chunk_list, data);
1280
1281                // its likely that the next chunk in the list will be a match, so check it!
1282                while
1283                    (cref_found.next != null_mut()) &&
1284                    (cref_found.next != chunk_list_reference_last)
1285                {
1286                    cref_found = cref_found.next;
1287                    let chunk_found: PtrMut<BChunk> = cref_found.link;
1288
1289                    if bchunk_data_compare(chunk_found, data, data_len, i_prev) {
1290                        // may be useful to remove table data,
1291                        // assuming we dont have repeating memory
1292                        // where it would be useful to re-use chunks.
1293                        i += chunk_found.data.len();
1294                        bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
1295                        // chunk_found may be freed!
1296                        i_prev = i;
1297                        debug_assert!(i_prev <= data_len);
1298                        debug_assert_chunklist_size!(chunk_list, i_prev);
1299                        debug_assert_chunklist_data!(chunk_list, data);
1300                    } else {
1301                        break;
1302                    }
1303                }
1304            } else {
1305                i = i + info.chunk_stride;
1306            }
1307        }
1308
1309        drop(table_hash_array);
1310        drop(table);
1311        drop(table_ref_stack);
1312
1313        // End Table Lookup
1314        // ----------------
1315    }
1316
1317    debug_assert_chunklist_size!(chunk_list, i_prev);
1318    debug_assert_chunklist_data!(chunk_list, data);
1319
1320    // -----------------------------------------------------------------------
1321    // No Duplicates to copy, write new chunks
1322    //
1323    // Trailing chunks, no matches found in table lookup above.
1324    // Write all new data. */
1325    if i_prev != data_len {
1326        bchunk_list_append_data_n(info, bs_mem, chunk_list, &data[i_prev..data_len]);
1327        i_prev = data_len;
1328    }
1329
1330    debug_assert!(i_prev == data_len);
1331
1332    if USE_FASTPATH_CHUNKS_LAST {
1333        if chunk_list_reference_last != null_mut() {
1334            // write chunk_list_reference_last since it hasn't been written yet
1335            let mut cref: PtrMut<BChunkRef> = chunk_list_reference_last;
1336            while cref != null_mut() {
1337                let chunk: PtrMut<BChunk> = cref.link;
1338                // debug_assert!(bchunk_data_compare(chunk, data, data_len, i_prev));
1339                i_prev += chunk.data.len();
1340                // use simple since we assume the references
1341                // chunks have already been sized correctly.
1342                bchunk_list_append_only(bs_mem, chunk_list, chunk);
1343                debug_assert_chunklist_data!(chunk_list, data);
1344                cref = cref.next;
1345            }
1346        }
1347    }
1348
1349    debug_assert!(i_prev == data_len_original);
1350
1351    // check we're the correct size and that we didn't accidentally modify the reference
1352    debug_assert_chunklist_size!(chunk_list, data_len_original);
1353    debug_assert_chunklist_size!(chunk_list_reference, chunk_list_reference.total_size);
1354
1355    debug_assert_chunklist_data!(chunk_list, data);
1356
1357    return chunk_list;
1358}
1359// end private API
1360
1361/// []( } )
1362
1363/// # Main Array Storage API
1364/// []( { )
1365
1366///
1367/// Create a new array store, which can store any number of arrays
1368/// as long as their stride matches.
1369///
1370/// * `stride` the `sizeof()` each element,
1371///
1372/// Note while a stride of `1` will always work,
1373/// its less efficient since duplicate chunks of memory will be searched
1374/// at positions unaligned with the array data.
1375///
1376/// * `chunk_count` Number of elements to split each chunk into.
1377///
1378///   * A small value increases the ability to de-duplicate chunks,
1379///     but adds overhead by increasing the number of chunks
1380///     to look-up when searching for duplicates,
1381///     as well as some overhead constructing the original
1382///     array again, with more calls to ``memcpy``.
1383///   * Larger values reduce the *book keeping* overhead,
1384///     but increase the chance a small,
1385///     isolated change will cause a larger amount of data to be duplicated.
1386///
1387/// Return a new array store.
1388///
1389impl BArrayStore {
1390    pub fn new(
1391        stride: usize,
1392        chunk_count: usize,
1393    ) -> BArrayStore {
1394        let accum_steps = BCHUNK_HASH_TABLE_ACCUMULATE_STEPS - 1;
1395        let accum_read_ahead_len = ((((accum_steps * (accum_steps + 1))) / 2) + 1) as usize;
1396        let accum_read_ahead_bytes = accum_read_ahead_len * stride;
1397
1398        BArrayStore {
1399            info: BArrayInfo {
1400                chunk_stride: stride,
1401                // chunk_count: chunk_count, // UNUSED
1402
1403                chunk_byte_size: chunk_count * stride,
1404                chunk_byte_size_min: max(1, chunk_count / BCHUNK_SIZE_MIN_DIV) * stride,
1405                chunk_byte_size_max: (chunk_count * BCHUNK_SIZE_MAX_MUL) * stride,
1406
1407                accum_steps: accum_steps,
1408                // Triangle number, identifying now much read-ahead we need:
1409                // https://en.wikipedia.org/wiki/Triangular_number (+ 1)
1410                accum_read_ahead_len: accum_read_ahead_len,
1411                accum_read_ahead_bytes: accum_read_ahead_bytes,
1412            },
1413            memory: BArrayMemory {
1414                state: MemPool::new(),
1415                chunk_list: MemPool::new(),
1416                chunk_ref: MemPool::new(),
1417                // allow iteration to simplify freeing, otherwise its not needed
1418                // (we could loop over all states as an alternative).
1419                chunk: MemPool::new(),
1420            },
1421            states: ListBase::new(),
1422        }
1423    }
1424
1425    fn free_data(&mut self) {
1426        // free chunk data
1427        for mut chunk in self.memory.chunk.iter_mut() {
1428            unsafe { ::std::ptr::drop_in_place(&mut chunk.data); }
1429        }
1430    }
1431
1432    /// Clear all contents, allowing reuse of `self`.
1433    pub fn clear(
1434        &mut self,
1435    ) {
1436        self.free_data();
1437
1438        self.states.clear();
1439
1440        self.memory.chunk_list.clear();
1441        self.memory.chunk_ref.clear();
1442        self.memory.chunk.clear();
1443    }
1444
1445    /// # BArrayStore Statistics
1446    /// []( { )
1447
1448    /// return the total amount of memory that would be used by getting the arrays for all states.
1449    pub fn calc_size_expanded_get(
1450        &self,
1451    ) -> usize {
1452        let mut size_accum: usize = 0;
1453        for state in self.states.iter() {
1454            size_accum += state.chunk_list.total_size;
1455        }
1456        size_accum
1457    }
1458
1459    /// return the amount of memory used by all `BChunk.data`
1460    /// (duplicate chunks are only counted once).
1461    pub fn calc_size_compacted_get(
1462        &self,
1463    ) -> usize {
1464        let mut size_total: usize = 0;
1465        for chunk in self.memory.chunk.iter() {
1466            debug_assert!(chunk.users > 0);
1467            size_total += chunk.data.len();
1468        }
1469        size_total
1470    }
1471
1472    /// []( } )
1473
1474    /// # BArrayState Access
1475    /// []( { )
1476    ///
1477    /// * `data` Data used to create.
1478    ///
1479    /// * `state_reference` The state to use as a reference when adding the new state,
1480    ///   typically this is the previous state,
1481    ///   however it can be any previously created state from this `self`.
1482    ///
1483    /// Returns the new state,
1484    /// which is used by the caller as a handle to get back the contents of `data`.
1485    /// This may be removed using `BArrayStore.state_remove`,
1486    /// otherwise it will be removed with `BArrayStore.destroy`.
1487    ///
1488    pub fn state_add(
1489        &mut self,
1490        data: &[u8],
1491        state_reference: Option<*const BArrayState>,
1492    ) -> *mut BArrayState {
1493        // ensure we're aligned to the stride
1494        debug_assert_eq!(0, data.len() % self.info.chunk_stride);
1495
1496        if USE_PARANOID_CHECKS {
1497            if let Some(state_reference) = state_reference {
1498                assert!(self.states.index_at(PtrConst(state_reference)).is_some());
1499            }
1500        }
1501
1502        let mut chunk_list = {
1503            if let Some(state_reference) = state_reference {
1504                bchunk_list_from_data_merge(
1505                    &self.info, &mut self.memory,
1506                    data, data.len(),
1507                    // re-use reference chunks
1508                    PtrConst(state_reference).chunk_list,
1509                )
1510            } else {
1511                let chunk_list = bchunk_list_new(&mut self.memory, data.len());
1512                bchunk_list_fill_from_array(
1513                    &self.info, &mut self.memory,
1514                    chunk_list,
1515                    data,
1516                );
1517                chunk_list
1518            }
1519        };
1520
1521        chunk_list.users += 1;
1522
1523        let state = PtrMut(self.memory.state.alloc_elem_from(
1524            BArrayState {
1525                next: null_mut(),
1526                prev: null_mut(),
1527                chunk_list: chunk_list,
1528            })
1529        );
1530
1531        self.states.push_back(state);
1532
1533        if USE_PARANOID_CHECKS {
1534            let data_test = BArrayStore::state_data_get_alloc(state.as_ptr());
1535            assert_eq!(data_test.len(), data.len());
1536            // we don't want to print the
1537            assert!(data_test == data);
1538            // data_test gets freed
1539        }
1540
1541        return state.as_ptr();
1542    }
1543
1544    /// Remove a state and free any unused `BChunk` data.
1545    ///
1546    /// The states can be freed in any order.
1547    pub fn state_remove(
1548        &mut self,
1549        state: *mut BArrayState,
1550    ) {
1551        let state = PtrMut(state);
1552        if USE_PARANOID_CHECKS {
1553            assert!(self.states.index_at(state).is_some());
1554        }
1555
1556        bchunk_list_decref(&mut self.memory, state.chunk_list);
1557        self.states.remove(state);
1558
1559        self.memory.state.free_elem(state.as_ptr());
1560    }
1561
1562    /// return the expanded size of the array,
1563    /// use this to know how much memory to allocate `BArrayStore.state_data_get` 's argument.
1564    pub fn state_size_get(
1565        state: *const BArrayState,
1566    ) -> usize {
1567        return unsafe { (*state).chunk_list.total_size };
1568    }
1569
1570    /// Fill in existing allocated memory with the contents of `state`.
1571    pub fn state_data_get(
1572        state: *const BArrayState,
1573        data: &mut [u8],
1574    ) {
1575        let state = PtrConst(state);
1576        if USE_PARANOID_CHECKS {
1577            let mut data_test_len: usize = 0;
1578            for cref in state.chunk_list.chunk_refs.iter() {
1579                data_test_len += cref.link.data.len();
1580            }
1581            assert_eq!(data_test_len, state.chunk_list.total_size);
1582            assert_eq!(data_test_len, data.len());
1583        }
1584
1585        debug_assert_eq!(state.chunk_list.total_size, data.len());
1586        let mut data_step = 0;
1587        for cref in state.chunk_list.chunk_refs.iter() {
1588            let data_step_next = data_step + cref.link.data.len();
1589            debug_assert!(cref.link.users > 0);
1590            {
1591                let aaa = &cref.link.data[..];
1592                data[data_step..data_step_next].clone_from_slice(aaa);
1593            }
1594            data_step = data_step_next;
1595        }
1596    }
1597
1598    /// Allocate an array for `state` and return it.
1599    pub fn state_data_get_alloc(
1600        state: *const BArrayState,
1601    ) -> Vec<u8> {
1602        let state = PtrConst(state);
1603        let mut data: Vec<u8> = Vec::with_capacity(state.chunk_list.total_size);
1604        unsafe { data.set_len(state.chunk_list.total_size) };
1605        BArrayStore::state_data_get(state.as_ptr(), &mut data[..]);
1606        return data;
1607    }
1608
1609    pub fn is_valid(
1610        &self,
1611    ) -> bool {
1612
1613        // Check Length
1614        // ------------
1615
1616        for state in self.states.iter() {
1617            let chunk_list: PtrMut<BChunkList> = state.chunk_list;
1618            if bchunk_list_size(chunk_list) != chunk_list.total_size {
1619                return false;
1620            }
1621
1622            if chunk_list.chunk_refs.len_calc() != chunk_list.chunk_refs_len {
1623                return false;
1624            }
1625
1626            if USE_MERGE_CHUNKS {
1627                // ensure we merge all chunks that could be merged
1628                if chunk_list.total_size > self.info.chunk_byte_size_min {
1629                    for cref in chunk_list.chunk_refs.iter() {
1630                        if cref.link.data.len() < self.info.chunk_byte_size_min {
1631                            return false;
1632                        }
1633                    }
1634                }
1635            }
1636        }
1637
1638        // Check User Count & Lost References
1639        // ----------------------------------
1640
1641        {
1642            use std::collections::HashMap;
1643            use std::collections::hash_map::Entry::{
1644                Occupied,
1645                Vacant,
1646            };
1647            macro_rules! GHASH_PTR_ADD_USER {
1648                ($gh:expr, $pt:expr) => {
1649                    match $gh.entry($pt.as_ptr()) {
1650                        Occupied(mut val) => {
1651                            *val.get_mut() += 1;
1652                        },
1653                        Vacant(entry) => {
1654                            entry.insert(1);
1655                        }
1656                    }
1657                }
1658            }
1659
1660
1661            // count chunk_list's
1662            let mut chunk_list_map: HashMap<*const BChunkList, isize> = HashMap::new();
1663            let mut chunk_map: HashMap<*const BChunk, isize> = HashMap::new();
1664
1665            let mut totrefs: usize = 0;
1666            for state in self.states.iter() {
1667                GHASH_PTR_ADD_USER!(chunk_list_map, state.chunk_list);
1668            }
1669            for (chunk_list, users) in chunk_list_map.iter() {
1670                if !(unsafe { (**chunk_list).users } == *users) {
1671                    return false;
1672                }
1673            }
1674            if !(self.memory.chunk_list.len() == chunk_list_map.len()) {
1675                return false;
1676            }
1677
1678            // count chunk's
1679            for (chunk_list, _users) in chunk_list_map.iter() {
1680                for cref in unsafe { (**chunk_list) .chunk_refs.iter() } {
1681                    GHASH_PTR_ADD_USER!(chunk_map, cref.link);
1682                    totrefs += 1;
1683                }
1684            }
1685            if self.memory.chunk.len() != chunk_map.len() {
1686                return false;
1687            }
1688            if self.memory.chunk_ref.len() != totrefs {
1689                return false;
1690            }
1691
1692            for (chunk, users) in chunk_map.iter() {
1693                if !(unsafe { (**chunk).users } == *users) {
1694                    return false;
1695                }
1696            }
1697        }
1698        return true;
1699    }
1700
1701}
1702
1703impl Drop for BArrayStore {
1704    fn drop(&mut self) {
1705        self.free_data();
1706    }
1707}
1708
1709/// # Debugging API (for testing).
1710/// []( { )
1711
1712// only for test validation
1713fn bchunk_list_size(chunk_list: PtrMut<BChunkList>) -> usize {
1714    let mut total_size: usize = 0;
1715    for cref in chunk_list.chunk_refs.iter() {
1716        total_size += cref.link.data.len();
1717    }
1718    return total_size;
1719}
1720
1721// []( } )