gemachain_runtime/
accounts_hash.rs

1use log::*;
2use rayon::prelude::*;
3use gemachain_measure::measure::Measure;
4use gemachain_sdk::{
5    hash::{Hash, Hasher},
6    pubkey::Pubkey,
7};
8use std::{convert::TryInto, sync::Mutex};
9
10pub const ZERO_RAW_CARATS_SENTINEL: u64 = std::u64::MAX;
11pub const MERKLE_FANOUT: usize = 16;
12
13#[derive(Default, Debug)]
14pub struct PreviousPass {
15    pub reduced_hashes: Vec<Vec<Hash>>,
16    pub remaining_unhashed: Vec<Hash>,
17    pub carats: u64,
18}
19
20#[derive(Debug, Default)]
21pub struct HashStats {
22    pub scan_time_total_us: u64,
23    pub zeros_time_total_us: u64,
24    pub hash_time_total_us: u64,
25    pub hash_time_pre_us: u64,
26    pub sort_time_total_us: u64,
27    pub hash_total: usize,
28    pub unreduced_entries: usize,
29    pub num_snapshot_storage: usize,
30    pub collect_snapshots_us: u64,
31    pub storage_sort_us: u64,
32    pub min_bin_size: usize,
33    pub max_bin_size: usize,
34}
35impl HashStats {
36    fn log(&mut self) {
37        let total_time_us = self.scan_time_total_us
38            + self.zeros_time_total_us
39            + self.hash_time_total_us
40            + self.sort_time_total_us
41            + self.collect_snapshots_us
42            + self.storage_sort_us;
43        datapoint_info!(
44            "calculate_accounts_hash_without_index",
45            ("accounts_scan", self.scan_time_total_us, i64),
46            ("eliminate_zeros", self.zeros_time_total_us, i64),
47            ("hash", self.hash_time_total_us, i64),
48            ("hash_time_pre_us", self.hash_time_pre_us, i64),
49            ("sort", self.sort_time_total_us, i64),
50            ("hash_total", self.hash_total, i64),
51            ("storage_sort_us", self.storage_sort_us, i64),
52            ("unreduced_entries", self.unreduced_entries as i64, i64),
53            (
54                "collect_snapshots_us",
55                self.collect_snapshots_us as i64,
56                i64
57            ),
58            (
59                "num_snapshot_storage",
60                self.num_snapshot_storage as i64,
61                i64
62            ),
63            ("min_bin_size", self.min_bin_size as i64, i64),
64            ("max_bin_size", self.max_bin_size as i64, i64),
65            ("total", total_time_us as i64, i64),
66        );
67    }
68}
69
70#[derive(Default, Debug, PartialEq, Clone)]
71pub struct CalculateHashIntermediate {
72    pub hash: Hash,
73    pub carats: u64,
74    pub pubkey: Pubkey,
75}
76
77impl CalculateHashIntermediate {
78    pub fn new(hash: Hash, carats: u64, pubkey: Pubkey) -> Self {
79        Self {
80            hash,
81            carats,
82            pubkey,
83        }
84    }
85}
86
87#[derive(Default, Debug, PartialEq)]
88pub struct CumulativeOffset {
89    pub index: Vec<usize>,
90    pub start_offset: usize,
91}
92
93impl CumulativeOffset {
94    pub fn new(index: Vec<usize>, start_offset: usize) -> CumulativeOffset {
95        Self {
96            index,
97            start_offset,
98        }
99    }
100}
101
102pub trait ExtractSliceFromRawData<'b, T: 'b> {
103    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
104}
105
106impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
107    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
108        &self[offset.index[0]][start..]
109    }
110}
111
112impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
113    fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
114        &self[offset.index[0]][offset.index[1]][start..]
115    }
116}
117
118// Allow retrieving &[start..end] from a logical src: Vec<T>, where src is really Vec<Vec<T>> (or later Vec<Vec<Vec<T>>>)
119// This model prevents callers from having to flatten which saves both working memory and time.
120#[derive(Default, Debug)]
121pub struct CumulativeOffsets {
122    cumulative_offsets: Vec<CumulativeOffset>,
123    total_count: usize,
124}
125
126impl CumulativeOffsets {
127    pub fn from_raw<T>(raw: &[Vec<T>]) -> CumulativeOffsets {
128        let mut total_count: usize = 0;
129        let cumulative_offsets: Vec<_> = raw
130            .iter()
131            .enumerate()
132            .filter_map(|(i, v)| {
133                let len = v.len();
134                if len > 0 {
135                    let result = CumulativeOffset::new(vec![i], total_count);
136                    total_count += len;
137                    Some(result)
138                } else {
139                    None
140                }
141            })
142            .collect();
143
144        Self {
145            cumulative_offsets,
146            total_count,
147        }
148    }
149
150    pub fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> CumulativeOffsets {
151        let mut total_count: usize = 0;
152        let mut cumulative_offsets = Vec::with_capacity(0);
153        for (i, v_outer) in raw.iter().enumerate() {
154            for (j, v) in v_outer.iter().enumerate() {
155                let len = v.len();
156                if len > 0 {
157                    if cumulative_offsets.is_empty() {
158                        // the first inner, non-empty vector we find gives us an approximate rectangular shape
159                        cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
160                    }
161                    cumulative_offsets.push(CumulativeOffset::new(vec![i, j], total_count));
162                    total_count += len;
163                }
164            }
165        }
166
167        Self {
168            cumulative_offsets,
169            total_count,
170        }
171    }
172
173    fn find_index(&self, start: usize) -> usize {
174        assert!(!self.cumulative_offsets.is_empty());
175        match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
176            Ok(index) => index,
177            Err(index) => index - 1, // we would insert at index so we are before the item at index
178        }
179    }
180
181    fn find(&self, start: usize) -> (usize, &CumulativeOffset) {
182        let index = self.find_index(start);
183        let index = &self.cumulative_offsets[index];
184        let start = start - index.start_offset;
185        (start, index)
186    }
187
188    // return the biggest slice possible that starts at 'start'
189    pub fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
190    where
191        U: ExtractSliceFromRawData<'b, T> + 'b,
192    {
193        let (start, index) = self.find(start);
194        raw.extract(index, start)
195    }
196}
197
198#[derive(Debug)]
199pub struct AccountsHash {
200    pub dummy: i32,
201}
202
203impl AccountsHash {
204    pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
205        let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
206
207        let hash_total = cumulative_offsets.total_count;
208        let result = AccountsHash::compute_merkle_root_from_slices(
209            hash_total,
210            MERKLE_FANOUT,
211            None,
212            |start: usize| cumulative_offsets.get_slice(&hashes, start),
213            None,
214        );
215        (result.0, hash_total)
216    }
217
218    pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
219        Self::compute_merkle_root_loop(hashes, fanout, |t| t.1)
220    }
221
222    // this function avoids an infinite recursion compiler error
223    pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
224        Self::compute_merkle_root_loop(hashes, fanout, |t: &Hash| *t)
225    }
226
227    pub fn div_ceil(x: usize, y: usize) -> usize {
228        let mut result = x / y;
229        if x % y != 0 {
230            result += 1;
231        }
232        result
233    }
234
235    // For the first iteration, there could be more items in the tuple than just hash and carats.
236    // Using extractor allows us to avoid an unnecessary array copy on the first iteration.
237    pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
238    where
239        F: Fn(&T) -> Hash + std::marker::Sync,
240        T: std::marker::Sync,
241    {
242        if hashes.is_empty() {
243            return Hasher::default().result();
244        }
245
246        let mut time = Measure::start("time");
247
248        let total_hashes = hashes.len();
249        let chunks = Self::div_ceil(total_hashes, fanout);
250
251        let result: Vec<_> = (0..chunks)
252            .into_par_iter()
253            .map(|i| {
254                let start_index = i * fanout;
255                let end_index = std::cmp::min(start_index + fanout, total_hashes);
256
257                let mut hasher = Hasher::default();
258                for item in hashes.iter().take(end_index).skip(start_index) {
259                    let h = extractor(item);
260                    hasher.hash(h.as_ref());
261                }
262
263                hasher.result()
264            })
265            .collect();
266        time.stop();
267        debug!("hashing {} {}", total_hashes, time);
268
269        if result.len() == 1 {
270            result[0]
271        } else {
272            Self::compute_merkle_root_recurse(result, fanout)
273        }
274    }
275
276    fn calculate_three_level_chunks(
277        total_hashes: usize,
278        fanout: usize,
279        max_levels_per_pass: Option<usize>,
280        specific_level_count: Option<usize>,
281    ) -> (usize, usize, bool) {
282        const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll
283        let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
284
285        // Only use the 3 level optimization if we have at least 4 levels of data.
286        // Otherwise, we'll be serializing a parallel operation.
287        let threshold = target * fanout;
288        let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
289            && total_hashes >= threshold;
290        if three_level {
291            if let Some(specific_level_count_value) = specific_level_count {
292                three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
293            }
294        }
295        let (num_hashes_per_chunk, levels_hashed) = if three_level {
296            (target, THREE_LEVEL_OPTIMIZATION)
297        } else {
298            (fanout, 1)
299        };
300        (num_hashes_per_chunk, levels_hashed, three_level)
301    }
302
303    // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
304    // The caller provides a function to return a slice from the source data.
305    pub fn compute_merkle_root_from_slices<'a, F>(
306        total_hashes: usize,
307        fanout: usize,
308        max_levels_per_pass: Option<usize>,
309        get_hash_slice_starting_at_index: F,
310        specific_level_count: Option<usize>,
311    ) -> (Hash, Vec<Hash>)
312    where
313        F: Fn(usize) -> &'a [Hash] + std::marker::Sync,
314    {
315        if total_hashes == 0 {
316            return (Hasher::default().result(), vec![]);
317        }
318
319        let mut time = Measure::start("time");
320
321        let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
322            total_hashes,
323            fanout,
324            max_levels_per_pass,
325            specific_level_count,
326        );
327
328        let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
329
330        // initial fetch - could return entire slice
331        let data: &[Hash] = get_hash_slice_starting_at_index(0);
332        let data_len = data.len();
333
334        let result: Vec<_> = (0..chunks)
335            .into_par_iter()
336            .map(|i| {
337                // summary:
338                // this closure computes 1 or 3 levels of merkle tree (all chunks will be 1 or all will be 3)
339                // for a subset (our chunk) of the input data [start_index..end_index]
340
341                // index into get_hash_slice_starting_at_index where this chunk's range begins
342                let start_index = i * num_hashes_per_chunk;
343                // index into get_hash_slice_starting_at_index where this chunk's range ends
344                let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
345
346                // will compute the final result for this closure
347                let mut hasher = Hasher::default();
348
349                // index into 'data' where we are currently pulling data
350                // if we exhaust our data, then we will request a new slice, and data_index resets to 0, the beginning of the new slice
351                let mut data_index = start_index;
352                // source data, which we may refresh when we exhaust
353                let mut data = data;
354                // len of the source data
355                let mut data_len = data_len;
356
357                if !three_level {
358                    // 1 group of fanout
359                    // The result of this loop is a single hash value from fanout input hashes.
360                    for i in start_index..end_index {
361                        if data_index >= data_len {
362                            // we exhausted our data, fetch next slice starting at i
363                            data = get_hash_slice_starting_at_index(i);
364                            data_len = data.len();
365                            data_index = 0;
366                        }
367                        hasher.hash(data[data_index].as_ref());
368                        data_index += 1;
369                    }
370                } else {
371                    // hash 3 levels of fanout simultaneously.
372                    // This codepath produces 1 hash value for between 1..=fanout^3 input hashes.
373                    // It is equivalent to running the normal merkle tree calculation 3 iterations on the input.
374                    //
375                    // big idea:
376                    //  merkle trees usually reduce the input vector by a factor of fanout with each iteration
377                    //  example with fanout 2:
378                    //   start:     [0,1,2,3,4,5,6,7]      in our case: [...16M...] or really, 1B
379                    //   iteration0 [.5, 2.5, 4.5, 6.5]                 [... 1M...]
380                    //   iteration1 [1.5, 5.5]                          [...65k...]
381                    //   iteration2 3.5                                 [...4k... ]
382                    //  So iteration 0 consumes N elements, hashes them in groups of 'fanout' and produces a vector of N/fanout elements
383                    //   and the process repeats until there is only 1 hash left.
384                    //
385                    //  With the three_level code path, we make each chunk we iterate of size fanout^3 (4096)
386                    //  So, the input could be 16M hashes and the output will be 4k hashes, or N/fanout^3
387                    //  The goal is to reduce the amount of data that has to be constructed and held in memory.
388                    //  When we know we have enough hashes, then, in 1 pass, we hash 3 levels simultaneously, storing far fewer intermediate hashes.
389                    //
390                    // Now, some details:
391                    // The result of this loop is a single hash value from fanout^3 input hashes.
392                    // concepts:
393                    //  what we're conceptually hashing: "raw_hashes"[start_index..end_index]
394                    //   example: [a,b,c,d,e,f]
395                    //   but... hashes[] may really be multiple vectors that are pieced together.
396                    //   example: [[a,b],[c],[d,e,f]]
397                    //   get_hash_slice_starting_at_index(any_index) abstracts that and returns a slice starting at raw_hashes[any_index..]
398                    //   such that the end of get_hash_slice_starting_at_index may be <, >, or = end_index
399                    //   example: get_hash_slice_starting_at_index(1) returns [b]
400                    //            get_hash_slice_starting_at_index(3) returns [d,e,f]
401                    // This code is basically 3 iterations of merkle tree hashing occurring simultaneously.
402                    // The first fanout raw hashes are hashed in hasher_k. This is iteration0
403                    // Once hasher_k has hashed fanout hashes, hasher_k's result hash is hashed in hasher_j and then discarded
404                    // hasher_k then starts over fresh and hashes the next fanout raw hashes. This is iteration0 again for a new set of data.
405                    // Once hasher_j has hashed fanout hashes (from k), hasher_j's result hash is hashed in hasher and then discarded
406                    // Once hasher has hashed fanout hashes (from j), then the result of hasher is the hash for fanout^3 raw hashes.
407                    // If there are < fanout^3 hashes, then this code stops when it runs out of raw hashes and returns whatever it hashed.
408                    // This is always how the very last elements work in a merkle tree.
409                    let mut i = start_index;
410                    while i < end_index {
411                        let mut hasher_j = Hasher::default();
412                        for _j in 0..fanout {
413                            let mut hasher_k = Hasher::default();
414                            let end = std::cmp::min(end_index - i, fanout);
415                            for _k in 0..end {
416                                if data_index >= data_len {
417                                    // we exhausted our data, fetch next slice starting at i
418                                    data = get_hash_slice_starting_at_index(i);
419                                    data_len = data.len();
420                                    data_index = 0;
421                                }
422                                hasher_k.hash(data[data_index].as_ref());
423                                data_index += 1;
424                                i += 1;
425                            }
426                            hasher_j.hash(hasher_k.result().as_ref());
427                            if i >= end_index {
428                                break;
429                            }
430                        }
431                        hasher.hash(hasher_j.result().as_ref());
432                    }
433                }
434
435                hasher.result()
436            })
437            .collect();
438        time.stop();
439        debug!("hashing {} {}", total_hashes, time);
440
441        if let Some(mut specific_level_count_value) = specific_level_count {
442            specific_level_count_value -= levels_hashed;
443            if specific_level_count_value == 0 {
444                (Hash::default(), result)
445            } else {
446                assert!(specific_level_count_value > 0);
447                // We did not hash the number of levels required by 'specific_level_count', so repeat
448                Self::compute_merkle_root_from_slices_recurse(
449                    result,
450                    fanout,
451                    max_levels_per_pass,
452                    Some(specific_level_count_value),
453                )
454            }
455        } else {
456            (
457                if result.len() == 1 {
458                    result[0]
459                } else {
460                    Self::compute_merkle_root_recurse(result, fanout)
461                },
462                vec![], // no intermediate results needed by caller
463            )
464        }
465    }
466
467    pub fn compute_merkle_root_from_slices_recurse(
468        hashes: Vec<Hash>,
469        fanout: usize,
470        max_levels_per_pass: Option<usize>,
471        specific_level_count: Option<usize>,
472    ) -> (Hash, Vec<Hash>) {
473        Self::compute_merkle_root_from_slices(
474            hashes.len(),
475            fanout,
476            max_levels_per_pass,
477            |start| &hashes[start..],
478            specific_level_count,
479        )
480    }
481
482    pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, Hash)>) -> Hash {
483        Self::sort_hashes_by_pubkey(&mut hashes);
484
485        Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| i.1)
486    }
487
488    pub fn sort_hashes_by_pubkey(hashes: &mut Vec<(Pubkey, Hash)>) {
489        hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
490    }
491
492    pub fn compare_two_hash_entries(
493        a: &CalculateHashIntermediate,
494        b: &CalculateHashIntermediate,
495    ) -> std::cmp::Ordering {
496        // note partial_cmp only returns None with floating point comparisons
497        a.pubkey.partial_cmp(&b.pubkey).unwrap()
498    }
499
500    pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
501        balance
502            .try_into()
503            .expect("overflow is detected while summing capitalization")
504    }
505
506    fn de_dup_and_eliminate_zeros(
507        sorted_data_by_pubkey: Vec<Vec<Vec<CalculateHashIntermediate>>>,
508        stats: &mut HashStats,
509        max_bin: usize,
510    ) -> (Vec<Vec<Hash>>, u64) {
511        // 1. eliminate zero carat accounts
512        // 2. pick the highest slot or (slot = and highest version) of each pubkey
513        // 3. produce this output:
514        // a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
515        //      vec: individual hashes in pubkey order, 1 hash per
516        // b. carats
517        let mut zeros = Measure::start("eliminate zeros");
518        let min_max_sum_entries_hashes = Mutex::new((usize::MAX, usize::MIN, 0u64, 0usize, 0usize));
519        let hashes: Vec<Vec<Hash>> = (0..max_bin)
520            .into_par_iter()
521            .map(|bin| {
522                let (hashes, carats_bin, unreduced_entries_count) =
523                    Self::de_dup_accounts_in_parallel(&sorted_data_by_pubkey, bin);
524                {
525                    let mut lock = min_max_sum_entries_hashes.lock().unwrap();
526                    let (mut min, mut max, mut carats_sum, mut entries, mut hash_total) = *lock;
527                    min = std::cmp::min(min, unreduced_entries_count);
528                    max = std::cmp::max(max, unreduced_entries_count);
529                    carats_sum = Self::checked_cast_for_capitalization(
530                        carats_sum as u128 + carats_bin as u128,
531                    );
532                    entries += unreduced_entries_count;
533                    hash_total += hashes.len();
534                    *lock = (min, max, carats_sum, entries, hash_total);
535                }
536                hashes
537            })
538            .collect();
539        zeros.stop();
540        stats.zeros_time_total_us += zeros.as_us();
541        let (min, max, carats_sum, entries, hash_total) =
542            *min_max_sum_entries_hashes.lock().unwrap();
543        stats.min_bin_size = min;
544        stats.max_bin_size = max;
545        stats.unreduced_entries += entries;
546        stats.hash_total += hash_total;
547        (hashes, carats_sum)
548    }
549
550    // returns true if this vector was exhausted
551    fn get_item<'a, 'b>(
552        min_index: usize,
553        bin: usize,
554        first_items: &'a mut Vec<(Pubkey, usize)>,
555        pubkey_division: &'b [Vec<Vec<CalculateHashIntermediate>>],
556        indexes: &'a mut Vec<usize>,
557    ) -> (bool, &'b CalculateHashIntermediate) {
558        let first_item = first_items[min_index];
559        let key = &first_item.0;
560        let division_index = first_item.1;
561        let bin = &pubkey_division[division_index][bin];
562        let mut index = indexes[division_index];
563        index += 1;
564        while index < bin.len() {
565            // still more items where we found the previous key, so just increment the index for that slot group, skipping all pubkeys that are equal
566            if &bin[index].pubkey == key {
567                index += 1;
568                continue; // duplicate entries of same pubkey, so keep skipping
569            }
570
571            // point to the next pubkey > key
572            first_items[min_index] = (bin[index].pubkey, division_index);
573            indexes[division_index] = index;
574            break;
575        }
576
577        (
578            if index >= bin.len() {
579                first_items.remove(min_index); // stop looking in this vector - we exhausted it
580                true
581            } else {
582                false
583            }, // this is the last item with this pubkey
584            &bin[index - 1],
585        )
586    }
587
588    // go through: [..][pubkey_bin][..] and return hashes and carat sum
589    //   slot groups^                ^accounts found in a slot group, sorted by pubkey, higher slot, write_version
590    // 1. eliminate zero carat accounts
591    // 2. pick the highest slot or (slot = and highest version) of each pubkey
592    // 3. produce this output:
593    //   a. vec: individual hashes in pubkey order
594    //   b. carat sum
595    //   c. unreduced count (ie. including duplicates and zero carat)
596    fn de_dup_accounts_in_parallel(
597        pubkey_division: &[Vec<Vec<CalculateHashIntermediate>>],
598        pubkey_bin: usize,
599    ) -> (Vec<Hash>, u64, usize) {
600        let len = pubkey_division.len();
601        let mut item_len = 0;
602        let mut indexes = vec![0; len];
603        let mut first_items = Vec::with_capacity(len);
604
605        pubkey_division.iter().enumerate().for_each(|(i, bins)| {
606            if bins.len() > pubkey_bin {
607                let sub = &bins[pubkey_bin];
608                if !sub.is_empty() {
609                    item_len += bins[pubkey_bin].len();
610                    first_items.push((bins[pubkey_bin][0].pubkey, i));
611                }
612            }
613        });
614        let mut overall_sum = 0;
615        let mut hashes: Vec<Hash> = Vec::with_capacity(item_len);
616
617        while !first_items.is_empty() {
618            let mut loop_stop = { first_items.len() - 1 }; // we increment at the beginning of the loop
619            let mut min_index = 0;
620            let mut min_pubkey = first_items[min_index].0;
621            let mut first_item_index = 0; // we will start iterating at item 1. +=1 is first instruction in loop
622
623            while first_item_index < loop_stop {
624                first_item_index += 1;
625                let (key, _) = first_items[first_item_index];
626                let cmp = min_pubkey.cmp(&key);
627                match cmp {
628                    std::cmp::Ordering::Less => {
629                        continue; // we still have the min item
630                    }
631                    std::cmp::Ordering::Equal => {
632                        // we found an item that masks an earlier slot, so skip the earlier item
633                        let (exhausted, _) = Self::get_item(
634                            min_index,
635                            pubkey_bin,
636                            &mut first_items,
637                            pubkey_division,
638                            &mut indexes,
639                        );
640                        if exhausted {
641                            // this whole vector is exhausted, so we have to back our indices up since our search set has been reduced out from under us
642                            first_item_index -= 1;
643                            loop_stop -= 1;
644                        }
645                    }
646                    std::cmp::Ordering::Greater => (),
647                }
648                // this is the new min pubkey
649                min_index = first_item_index;
650                min_pubkey = key;
651            }
652
653            // get the min item, add carats, get hash
654            let (_, item) = Self::get_item(
655                min_index,
656                pubkey_bin,
657                &mut first_items,
658                pubkey_division,
659                &mut indexes,
660            );
661            if item.carats != ZERO_RAW_CARATS_SENTINEL {
662                overall_sum = Self::checked_cast_for_capitalization(
663                    item.carats as u128 + overall_sum as u128,
664                );
665                hashes.push(item.hash);
666            }
667        }
668        (hashes, overall_sum, item_len)
669    }
670
671    // input:
672    // vec: group of slot data, ordered by Slot (low to high)
673    //   vec: [0..bins] - where bins are pubkey ranges (these are ordered by Pubkey range)
674    //     vec: [..] - items which fit in the containing bin. Sorted by: Pubkey, higher Slot, higher Write version (if pubkey =)
675    pub fn rest_of_hash_calculation(
676        data_sections_by_pubkey: Vec<Vec<Vec<CalculateHashIntermediate>>>,
677        mut stats: &mut HashStats,
678        is_last_pass: bool,
679        mut previous_state: PreviousPass,
680        max_bin: usize,
681    ) -> (Hash, u64, PreviousPass) {
682        let (mut hashes, mut total_carats) =
683            Self::de_dup_and_eliminate_zeros(data_sections_by_pubkey, &mut stats, max_bin);
684
685        total_carats += previous_state.carats;
686
687        if !previous_state.remaining_unhashed.is_empty() {
688            // These items were not hashed last iteration because they didn't divide evenly.
689            // These are hashes for pubkeys that are < the pubkeys we are looking at now, so their hashes go first in order.
690            hashes.insert(0, previous_state.remaining_unhashed);
691            previous_state.remaining_unhashed = Vec::new();
692        }
693
694        let mut next_pass = PreviousPass::default();
695        let cumulative = CumulativeOffsets::from_raw(&hashes);
696        let mut hash_total = cumulative.total_count;
697        next_pass.reduced_hashes = previous_state.reduced_hashes;
698
699        const TARGET_FANOUT_LEVEL: usize = 3;
700        let target_fanout = MERKLE_FANOUT.pow(TARGET_FANOUT_LEVEL as u32);
701
702        if !is_last_pass {
703            next_pass.carats = total_carats;
704            total_carats = 0;
705
706            // Save hashes that don't evenly hash. They will be combined with hashes from the next pass.
707            let left_over_hashes = hash_total % target_fanout;
708
709            // move tail hashes that don't evenly hash into a 1d vector for next time
710            let mut i = hash_total - left_over_hashes;
711            while i < hash_total {
712                let data = cumulative.get_slice(&hashes, i);
713                next_pass.remaining_unhashed.extend(data);
714                i += data.len();
715            }
716
717            hash_total -= left_over_hashes; // this is enough to cause the hashes at the end of the data set to be ignored
718        }
719
720        // if we have raw hashes to process and
721        //   we are not the last pass (we already modded against target_fanout) OR
722        //   we have previously surpassed target_fanout and hashed some already to the target_fanout level. In that case, we know
723        //     we need to hash whatever is left here to the target_fanout level.
724        if hash_total != 0 && (!is_last_pass || !next_pass.reduced_hashes.is_empty()) {
725            let mut hash_time = Measure::start("hash");
726            let partial_hashes = Self::compute_merkle_root_from_slices(
727                hash_total, // note this does not include the ones that didn't divide evenly, unless we're in the last iteration
728                MERKLE_FANOUT,
729                Some(TARGET_FANOUT_LEVEL),
730                |start| cumulative.get_slice(&hashes, start),
731                Some(TARGET_FANOUT_LEVEL),
732            )
733            .1;
734            hash_time.stop();
735            stats.hash_time_total_us += hash_time.as_us();
736            stats.hash_time_pre_us += hash_time.as_us();
737            next_pass.reduced_hashes.push(partial_hashes);
738        }
739
740        let no_progress = is_last_pass && next_pass.reduced_hashes.is_empty() && !hashes.is_empty();
741        if no_progress {
742            // we never made partial progress, so hash everything now
743            hashes.into_iter().for_each(|v| {
744                if !v.is_empty() {
745                    next_pass.reduced_hashes.push(v);
746                }
747            });
748        }
749
750        let hash = if is_last_pass {
751            let cumulative = CumulativeOffsets::from_raw(&next_pass.reduced_hashes);
752
753            let hash = if cumulative.total_count == 1 && !no_progress {
754                // all the passes resulted in a single hash, that means we're done, so we had <= MERKLE_ROOT total hashes
755                cumulative.get_slice(&next_pass.reduced_hashes, 0)[0]
756            } else {
757                let mut hash_time = Measure::start("hash");
758                // hash all the rest and combine and hash until we have only 1 hash left
759                let (hash, _) = Self::compute_merkle_root_from_slices(
760                    cumulative.total_count,
761                    MERKLE_FANOUT,
762                    None,
763                    |start| cumulative.get_slice(&next_pass.reduced_hashes, start),
764                    None,
765                );
766                hash_time.stop();
767                stats.hash_time_total_us += hash_time.as_us();
768                hash
769            };
770            next_pass.reduced_hashes = Vec::new();
771            hash
772        } else {
773            Hash::default()
774        };
775
776        if is_last_pass {
777            stats.log();
778        }
779        (hash, total_carats, next_pass)
780    }
781}
782
783#[cfg(test)]
784pub mod tests {
785    use super::*;
786    use std::str::FromStr;
787
788    #[test]
789    fn test_accountsdb_div_ceil() {
790        assert_eq!(AccountsHash::div_ceil(10, 3), 4);
791        assert_eq!(AccountsHash::div_ceil(0, 1), 0);
792        assert_eq!(AccountsHash::div_ceil(0, 5), 0);
793        assert_eq!(AccountsHash::div_ceil(9, 3), 3);
794        assert_eq!(AccountsHash::div_ceil(9, 9), 1);
795    }
796
797    #[test]
798    #[should_panic(expected = "attempt to divide by zero")]
799    fn test_accountsdb_div_ceil_fail() {
800        assert_eq!(AccountsHash::div_ceil(10, 0), 0);
801    }
802
803    fn for_rest(
804        original: Vec<CalculateHashIntermediate>,
805    ) -> Vec<Vec<Vec<CalculateHashIntermediate>>> {
806        vec![vec![original]]
807    }
808
809    #[test]
810    fn test_accountsdb_rest_of_hash_calculation() {
811        gemachain_logger::setup();
812
813        let mut account_maps = Vec::new();
814
815        let key = Pubkey::new(&[11u8; 32]);
816        let hash = Hash::new(&[1u8; 32]);
817        let val = CalculateHashIntermediate::new(hash, 88, key);
818        account_maps.push(val);
819
820        // 2nd key - zero carats, so will be removed
821        let key = Pubkey::new(&[12u8; 32]);
822        let hash = Hash::new(&[2u8; 32]);
823        let val = CalculateHashIntermediate::new(hash, ZERO_RAW_CARATS_SENTINEL, key);
824        account_maps.push(val);
825
826        let result = AccountsHash::rest_of_hash_calculation(
827            for_rest(account_maps.clone()),
828            &mut HashStats::default(),
829            true,
830            PreviousPass::default(),
831            one_range(),
832        );
833        let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
834        assert_eq!((result.0, result.1), (expected_hash, 88));
835
836        // 3rd key - with pubkey value before 1st key so it will be sorted first
837        let key = Pubkey::new(&[10u8; 32]);
838        let hash = Hash::new(&[2u8; 32]);
839        let val = CalculateHashIntermediate::new(hash, 20, key);
840        account_maps.insert(0, val);
841
842        let result = AccountsHash::rest_of_hash_calculation(
843            for_rest(account_maps.clone()),
844            &mut HashStats::default(),
845            true,
846            PreviousPass::default(),
847            one_range(),
848        );
849        let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
850        assert_eq!((result.0, result.1), (expected_hash, 108));
851
852        // 3rd key - with later slot
853        let key = Pubkey::new(&[10u8; 32]);
854        let hash = Hash::new(&[99u8; 32]);
855        let val = CalculateHashIntermediate::new(hash, 30, key);
856        account_maps.insert(1, val);
857
858        let result = AccountsHash::rest_of_hash_calculation(
859            for_rest(account_maps),
860            &mut HashStats::default(),
861            true,
862            PreviousPass::default(),
863            one_range(),
864        );
865        let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
866        assert_eq!((result.0, result.1), (expected_hash, 118));
867    }
868
869    fn one_range() -> usize {
870        1
871    }
872
873    fn zero_range() -> usize {
874        0
875    }
876
877    #[test]
878    fn test_accountsdb_multi_pass_rest_of_hash_calculation() {
879        gemachain_logger::setup();
880
881        // passes:
882        // 0: empty, NON-empty, empty, empty final
883        // 1: NON-empty, empty final
884        // 2: NON-empty, empty, empty final
885        for pass in 0..3 {
886            let mut account_maps = Vec::new();
887
888            let key = Pubkey::new(&[11u8; 32]);
889            let hash = Hash::new(&[1u8; 32]);
890            let val = CalculateHashIntermediate::new(hash, 88, key);
891            account_maps.push(val);
892
893            // 2nd key - zero carats, so will be removed
894            let key = Pubkey::new(&[12u8; 32]);
895            let hash = Hash::new(&[2u8; 32]);
896            let val = CalculateHashIntermediate::new(hash, ZERO_RAW_CARATS_SENTINEL, key);
897            account_maps.push(val);
898
899            let mut previous_pass = PreviousPass::default();
900
901            if pass == 0 {
902                // first pass that is not last and is empty
903                let result = AccountsHash::rest_of_hash_calculation(
904                    vec![vec![vec![]]],
905                    &mut HashStats::default(),
906                    false, // not last pass
907                    previous_pass,
908                    one_range(),
909                );
910                assert_eq!(result.0, Hash::default());
911                assert_eq!(result.1, 0);
912                previous_pass = result.2;
913                assert_eq!(previous_pass.remaining_unhashed.len(), 0);
914                assert_eq!(previous_pass.reduced_hashes.len(), 0);
915                assert_eq!(previous_pass.carats, 0);
916            }
917
918            let result = AccountsHash::rest_of_hash_calculation(
919                for_rest(account_maps.clone()),
920                &mut HashStats::default(),
921                false, // not last pass
922                previous_pass,
923                one_range(),
924            );
925
926            assert_eq!(result.0, Hash::default());
927            assert_eq!(result.1, 0);
928            let mut previous_pass = result.2;
929            assert_eq!(previous_pass.remaining_unhashed, vec![account_maps[0].hash]);
930            assert_eq!(previous_pass.reduced_hashes.len(), 0);
931            assert_eq!(previous_pass.carats, account_maps[0].carats);
932
933            let expected_hash =
934                Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
935            if pass == 2 {
936                let result = AccountsHash::rest_of_hash_calculation(
937                    vec![vec![vec![]]],
938                    &mut HashStats::default(),
939                    false,
940                    previous_pass,
941                    one_range(),
942                );
943
944                previous_pass = result.2;
945                assert_eq!(previous_pass.remaining_unhashed, vec![account_maps[0].hash]);
946                assert_eq!(previous_pass.reduced_hashes.len(), 0);
947                assert_eq!(previous_pass.carats, account_maps[0].carats);
948            }
949
950            let result = AccountsHash::rest_of_hash_calculation(
951                vec![vec![vec![]]],
952                &mut HashStats::default(),
953                true, // finally, last pass
954                previous_pass,
955                one_range(),
956            );
957            let previous_pass = result.2;
958
959            assert_eq!(previous_pass.remaining_unhashed.len(), 0);
960            assert_eq!(previous_pass.reduced_hashes.len(), 0);
961            assert_eq!(previous_pass.carats, 0);
962
963            assert_eq!((result.0, result.1), (expected_hash, 88));
964        }
965    }
966
967    #[test]
968    fn test_accountsdb_multi_pass_rest_of_hash_calculation_partial() {
969        gemachain_logger::setup();
970
971        let mut account_maps = Vec::new();
972
973        let key = Pubkey::new(&[11u8; 32]);
974        let hash = Hash::new(&[1u8; 32]);
975        let val = CalculateHashIntermediate::new(hash, 88, key);
976        account_maps.push(val);
977
978        let key = Pubkey::new(&[12u8; 32]);
979        let hash = Hash::new(&[2u8; 32]);
980        let val = CalculateHashIntermediate::new(hash, 20, key);
981        account_maps.push(val);
982
983        let result = AccountsHash::rest_of_hash_calculation(
984            for_rest(vec![account_maps[0].clone()]),
985            &mut HashStats::default(),
986            false, // not last pass
987            PreviousPass::default(),
988            one_range(),
989        );
990
991        assert_eq!(result.0, Hash::default());
992        assert_eq!(result.1, 0);
993        let previous_pass = result.2;
994        assert_eq!(previous_pass.remaining_unhashed, vec![account_maps[0].hash]);
995        assert_eq!(previous_pass.reduced_hashes.len(), 0);
996        assert_eq!(previous_pass.carats, account_maps[0].carats);
997
998        let result = AccountsHash::rest_of_hash_calculation(
999            for_rest(vec![account_maps[1].clone()]),
1000            &mut HashStats::default(),
1001            false, // not last pass
1002            previous_pass,
1003            one_range(),
1004        );
1005
1006        assert_eq!(result.0, Hash::default());
1007        assert_eq!(result.1, 0);
1008        let previous_pass = result.2;
1009        assert_eq!(
1010            previous_pass.remaining_unhashed,
1011            vec![account_maps[0].hash, account_maps[1].hash]
1012        );
1013        assert_eq!(previous_pass.reduced_hashes.len(), 0);
1014        let total_carats_expected = account_maps[0].carats + account_maps[1].carats;
1015        assert_eq!(previous_pass.carats, total_carats_expected);
1016
1017        let result = AccountsHash::rest_of_hash_calculation(
1018            vec![vec![vec![]]],
1019            &mut HashStats::default(),
1020            true,
1021            previous_pass,
1022            one_range(),
1023        );
1024
1025        let previous_pass = result.2;
1026        assert_eq!(previous_pass.remaining_unhashed.len(), 0);
1027        assert_eq!(previous_pass.reduced_hashes.len(), 0);
1028        assert_eq!(previous_pass.carats, 0);
1029
1030        let expected_hash = AccountsHash::compute_merkle_root(
1031            account_maps
1032                .iter()
1033                .map(|a| (a.pubkey, a.hash))
1034                .collect::<Vec<_>>(),
1035            MERKLE_FANOUT,
1036        );
1037
1038        assert_eq!(
1039            (result.0, result.1),
1040            (expected_hash, total_carats_expected)
1041        );
1042    }
1043
1044    #[test]
1045    fn test_accountsdb_multi_pass_rest_of_hash_calculation_partial_hashes() {
1046        gemachain_logger::setup();
1047
1048        let mut account_maps = Vec::new();
1049
1050        const TARGET_FANOUT_LEVEL: usize = 3;
1051        let target_fanout = MERKLE_FANOUT.pow(TARGET_FANOUT_LEVEL as u32);
1052        let mut total_carats_expected = 0;
1053        let plus1 = target_fanout + 1;
1054        for i in 0..plus1 * 2 {
1055            let carats = (i + 1) as u64;
1056            total_carats_expected += carats;
1057            let key = Pubkey::new_unique();
1058            let hash = Hash::new_unique();
1059            let val = CalculateHashIntermediate::new(hash, carats, key);
1060            account_maps.push(val);
1061        }
1062
1063        let mut chunk = account_maps[0..plus1].to_vec();
1064        chunk.sort_by(AccountsHash::compare_two_hash_entries);
1065        let sorted = chunk.clone();
1066
1067        // first 4097 hashes (1 left over)
1068        let result = AccountsHash::rest_of_hash_calculation(
1069            for_rest(chunk),
1070            &mut HashStats::default(),
1071            false, // not last pass
1072            PreviousPass::default(),
1073            one_range(),
1074        );
1075
1076        assert_eq!(result.0, Hash::default());
1077        assert_eq!(result.1, 0);
1078        let previous_pass = result.2;
1079        let left_over_1 = sorted[plus1 - 1].hash;
1080        assert_eq!(previous_pass.remaining_unhashed, vec![left_over_1]);
1081        assert_eq!(previous_pass.reduced_hashes.len(), 1);
1082        let expected_hash = AccountsHash::compute_merkle_root(
1083            sorted[0..target_fanout]
1084                .iter()
1085                .map(|a| (a.pubkey, a.hash))
1086                .collect::<Vec<_>>(),
1087            MERKLE_FANOUT,
1088        );
1089        assert_eq!(previous_pass.reduced_hashes[0], vec![expected_hash]);
1090        assert_eq!(
1091            previous_pass.carats,
1092            account_maps[0..plus1]
1093                .iter()
1094                .map(|i| i.carats)
1095                .sum::<u64>()
1096        );
1097
1098        let mut chunk = account_maps[plus1..plus1 * 2].to_vec();
1099        chunk.sort_by(AccountsHash::compare_two_hash_entries);
1100        let sorted2 = chunk.clone();
1101
1102        let mut with_left_over = vec![left_over_1];
1103        with_left_over.extend(sorted2[0..plus1 - 2].to_vec().into_iter().map(|i| i.hash));
1104        let expected_hash2 = AccountsHash::compute_merkle_root(
1105            with_left_over[0..target_fanout]
1106                .iter()
1107                .map(|a| (Pubkey::default(), *a))
1108                .collect::<Vec<_>>(),
1109            MERKLE_FANOUT,
1110        );
1111
1112        // second 4097 hashes (2 left over)
1113        let result = AccountsHash::rest_of_hash_calculation(
1114            for_rest(chunk),
1115            &mut HashStats::default(),
1116            false, // not last pass
1117            previous_pass,
1118            one_range(),
1119        );
1120
1121        assert_eq!(result.0, Hash::default());
1122        assert_eq!(result.1, 0);
1123        let previous_pass = result.2;
1124        assert_eq!(
1125            previous_pass.remaining_unhashed,
1126            vec![sorted2[plus1 - 2].hash, sorted2[plus1 - 1].hash]
1127        );
1128        assert_eq!(previous_pass.reduced_hashes.len(), 2);
1129        assert_eq!(
1130            previous_pass.reduced_hashes,
1131            vec![vec![expected_hash], vec![expected_hash2]]
1132        );
1133        assert_eq!(
1134            previous_pass.carats,
1135            account_maps[0..plus1 * 2]
1136                .iter()
1137                .map(|i| i.carats)
1138                .sum::<u64>()
1139        );
1140
1141        let result = AccountsHash::rest_of_hash_calculation(
1142            vec![vec![vec![]]],
1143            &mut HashStats::default(),
1144            true,
1145            previous_pass,
1146            one_range(),
1147        );
1148
1149        let previous_pass = result.2;
1150        assert_eq!(previous_pass.remaining_unhashed.len(), 0);
1151        assert_eq!(previous_pass.reduced_hashes.len(), 0);
1152        assert_eq!(previous_pass.carats, 0);
1153
1154        let mut combined = sorted;
1155        combined.extend(sorted2);
1156        let expected_hash = AccountsHash::compute_merkle_root(
1157            combined
1158                .iter()
1159                .map(|a| (a.pubkey, a.hash))
1160                .collect::<Vec<_>>(),
1161            MERKLE_FANOUT,
1162        );
1163
1164        assert_eq!(
1165            (result.0, result.1),
1166            (expected_hash, total_carats_expected)
1167        );
1168    }
1169
1170    #[test]
1171    fn test_accountsdb_de_dup_accounts_zero_chunks() {
1172        let (hashes, carats, _) = AccountsHash::de_dup_accounts_in_parallel(
1173            &[vec![vec![CalculateHashIntermediate::default()]]],
1174            0,
1175        );
1176        assert_eq!(vec![Hash::default()], hashes);
1177        assert_eq!(carats, 0);
1178    }
1179
1180    #[test]
1181    fn test_accountsdb_de_dup_accounts_empty() {
1182        gemachain_logger::setup();
1183
1184        let (hashes, carats) = AccountsHash::de_dup_and_eliminate_zeros(
1185            vec![vec![], vec![]],
1186            &mut HashStats::default(),
1187            one_range(),
1188        );
1189        assert_eq!(
1190            vec![Hash::default(); 0],
1191            hashes.into_iter().flatten().collect::<Vec<_>>()
1192        );
1193        assert_eq!(carats, 0);
1194
1195        let (hashes, carats) = AccountsHash::de_dup_and_eliminate_zeros(
1196            vec![],
1197            &mut HashStats::default(),
1198            zero_range(),
1199        );
1200        let empty: Vec<Vec<Hash>> = Vec::default();
1201        assert_eq!(empty, hashes);
1202        assert_eq!(carats, 0);
1203
1204        let (hashes, carats, _) = AccountsHash::de_dup_accounts_in_parallel(&[], 1);
1205        assert_eq!(vec![Hash::default(); 0], hashes);
1206        assert_eq!(carats, 0);
1207
1208        let (hashes, carats, _) = AccountsHash::de_dup_accounts_in_parallel(&[], 2);
1209        assert_eq!(vec![Hash::default(); 0], hashes);
1210        assert_eq!(carats, 0);
1211    }
1212
1213    #[test]
1214    fn test_accountsdb_de_dup_accounts_from_stores() {
1215        gemachain_logger::setup();
1216
1217        let key_a = Pubkey::new(&[1u8; 32]);
1218        let key_b = Pubkey::new(&[2u8; 32]);
1219        let key_c = Pubkey::new(&[3u8; 32]);
1220        const COUNT: usize = 6;
1221        let hashes = (0..COUNT).into_iter().map(|i| Hash::new(&[i as u8; 32]));
1222        // create this vector
1223        // abbbcc
1224        let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
1225
1226        let accounts: Vec<_> = hashes
1227            .zip(keys.iter())
1228            .enumerate()
1229            .map(|(i, (hash, key))| CalculateHashIntermediate::new(hash, (i + 1) as u64, *key))
1230            .collect();
1231
1232        type ExpectedType = (String, bool, u64, String);
1233        let expected:Vec<ExpectedType> = vec![
1234            // ("key/carats key2/carats ...",
1235            // is_last_slice
1236            // result carats
1237            // result hashes)
1238            // "a5" = key_a, 5 carats
1239            ("a1", false, 1, "[11111111111111111111111111111111]"),
1240            ("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1241            ("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1242            ("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1243            ("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1244            ("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1245            ("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1246            ("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1247            ("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1248            ("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1249            ("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1250            ("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1251            ("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1252            ("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1253            ("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1254            ("a1", true, 1, "[11111111111111111111111111111111]"),
1255            ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1256            ("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1257            ("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1258            ("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1259            ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
1260            ("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1261            ("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1262            ("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1263            ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
1264            ("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1265            ("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1266            ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
1267            ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1268            ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
1269            ].into_iter().map(|item| {
1270                let result: ExpectedType = (
1271                    item.0.to_string(),
1272                    item.1,
1273                    item.2,
1274                    item.3.to_string(),
1275                );
1276                result
1277            }).collect();
1278
1279        let mut expected_index = 0;
1280        for last_slice in 0..2 {
1281            for start in 0..COUNT {
1282                for end in start + 1..COUNT {
1283                    let is_last_slice = last_slice == 1;
1284                    let accounts = accounts.clone();
1285                    let slice = &accounts[start..end];
1286
1287                    let slice2 = vec![vec![slice.to_vec()]];
1288                    let slice = &slice2[..];
1289                    let (hashes2, carats2, _) =
1290                        AccountsHash::de_dup_accounts_in_parallel(slice, 0);
1291                    let (hashes3, carats3, _) =
1292                        AccountsHash::de_dup_accounts_in_parallel(slice, 0);
1293                    let (hashes4, carats4) = AccountsHash::de_dup_and_eliminate_zeros(
1294                        slice.to_vec(),
1295                        &mut HashStats::default(),
1296                        end - start,
1297                    );
1298                    let (hashes5, carats5) = AccountsHash::de_dup_and_eliminate_zeros(
1299                        slice.to_vec(),
1300                        &mut HashStats::default(),
1301                        end - start,
1302                    );
1303                    let (hashes6, carats6) = AccountsHash::de_dup_and_eliminate_zeros(
1304                        slice.to_vec(),
1305                        &mut HashStats::default(),
1306                        end - start,
1307                    );
1308
1309                    assert_eq!(hashes2, hashes3);
1310                    let expected2 = hashes2.clone();
1311                    assert_eq!(
1312                        expected2,
1313                        hashes4.into_iter().flatten().collect::<Vec<_>>(),
1314                        "last_slice: {}, start: {}, end: {}, slice: {:?}",
1315                        last_slice,
1316                        start,
1317                        end,
1318                        slice
1319                    );
1320                    assert_eq!(
1321                        expected2.clone(),
1322                        hashes5.iter().flatten().copied().collect::<Vec<_>>(),
1323                        "last_slice: {}, start: {}, end: {}, slice: {:?}",
1324                        last_slice,
1325                        start,
1326                        end,
1327                        slice
1328                    );
1329                    assert_eq!(
1330                        expected2.clone(),
1331                        hashes6.iter().flatten().copied().collect::<Vec<_>>()
1332                    );
1333                    assert_eq!(carats2, carats3);
1334                    assert_eq!(carats2, carats4);
1335                    assert_eq!(carats2, carats5);
1336                    assert_eq!(carats2, carats6);
1337
1338                    let human_readable = slice[0][0]
1339                        .iter()
1340                        .map(|v| {
1341                            let mut s = (if v.pubkey == key_a {
1342                                "a"
1343                            } else if v.pubkey == key_b {
1344                                "b"
1345                            } else {
1346                                "c"
1347                            })
1348                            .to_string();
1349
1350                            s.push_str(&v.carats.to_string());
1351                            s
1352                        })
1353                        .collect::<String>();
1354
1355                    let hash_result_as_string = format!("{:?}", hashes2);
1356
1357                    let packaged_result: ExpectedType = (
1358                        human_readable,
1359                        is_last_slice,
1360                        carats2 as u64,
1361                        hash_result_as_string,
1362                    );
1363                    assert_eq!(expected[expected_index], packaged_result);
1364
1365                    // for generating expected results
1366                    // error!("{:?},", packaged_result);
1367                    expected_index += 1;
1368                }
1369            }
1370        }
1371    }
1372
1373    #[test]
1374    fn test_accountsdb_compare_two_hash_entries() {
1375        gemachain_logger::setup();
1376        let key = Pubkey::new_unique();
1377        let hash = Hash::new_unique();
1378        let val = CalculateHashIntermediate::new(hash, 1, key);
1379
1380        // slot same, version <
1381        let hash2 = Hash::new_unique();
1382        let val2 = CalculateHashIntermediate::new(hash2, 4, key);
1383        assert_eq!(
1384            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1385            AccountsHash::compare_two_hash_entries(&val, &val2)
1386        );
1387
1388        // slot same, vers =
1389        let hash3 = Hash::new_unique();
1390        let val3 = CalculateHashIntermediate::new(hash3, 2, key);
1391        assert_eq!(
1392            std::cmp::Ordering::Equal,
1393            AccountsHash::compare_two_hash_entries(&val, &val3)
1394        );
1395
1396        // slot same, vers >
1397        let hash4 = Hash::new_unique();
1398        let val4 = CalculateHashIntermediate::new(hash4, 6, key);
1399        assert_eq!(
1400            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1401            AccountsHash::compare_two_hash_entries(&val, &val4)
1402        );
1403
1404        // slot >, version <
1405        let hash5 = Hash::new_unique();
1406        let val5 = CalculateHashIntermediate::new(hash5, 8, key);
1407        assert_eq!(
1408            std::cmp::Ordering::Equal, // no longer comparing slots or versions
1409            AccountsHash::compare_two_hash_entries(&val, &val5)
1410        );
1411    }
1412
1413    fn test_de_dup_accounts_in_parallel(
1414        account_maps: &[CalculateHashIntermediate],
1415    ) -> (Vec<Hash>, u64, usize) {
1416        AccountsHash::de_dup_accounts_in_parallel(&vec![vec![account_maps.to_vec()]][..], 0)
1417    }
1418
1419    #[test]
1420    fn test_accountsdb_remove_zero_balance_accounts() {
1421        gemachain_logger::setup();
1422
1423        let key = Pubkey::new_unique();
1424        let hash = Hash::new_unique();
1425        let mut account_maps = Vec::new();
1426        let val = CalculateHashIntermediate::new(hash, 1, key);
1427        account_maps.push(val.clone());
1428
1429        let result = test_de_dup_accounts_in_parallel(&account_maps[..]);
1430        assert_eq!(result, (vec![val.hash], val.carats as u64, 1));
1431
1432        // zero original carats, higher version
1433        let val = CalculateHashIntermediate::new(hash, ZERO_RAW_CARATS_SENTINEL, key);
1434        account_maps.push(val); // has to be after previous entry since account_maps are in slot order
1435
1436        let result = test_de_dup_accounts_in_parallel(&account_maps[..]);
1437        assert_eq!(result, (vec![], 0, 2));
1438    }
1439
1440    #[test]
1441    fn test_accountsdb_cumulative_offsets1_d() {
1442        let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
1443        let cumulative = CumulativeOffsets::from_raw(&input);
1444
1445        let src: Vec<_> = input.clone().into_iter().flatten().collect();
1446        let len = src.len();
1447        assert_eq!(cumulative.total_count, len);
1448        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
1449
1450        const DIMENSION: usize = 0;
1451        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
1452        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
1453
1454        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
1455        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
1456
1457        for start in 0..len {
1458            let slice = cumulative.get_slice(&input, start);
1459            let len = slice.len();
1460            assert!(len > 0);
1461            assert_eq!(&src[start..(start + len)], slice);
1462        }
1463
1464        let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
1465        let cumulative = CumulativeOffsets::from_raw(&input);
1466
1467        let src: Vec<_> = input.clone().into_iter().flatten().collect();
1468        let len = src.len();
1469        assert_eq!(cumulative.total_count, len);
1470        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
1471
1472        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
1473        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
1474
1475        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
1476        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
1477
1478        for start in 0..len {
1479            let slice = cumulative.get_slice(&input, start);
1480            let len = slice.len();
1481            assert!(len > 0);
1482            assert_eq!(&src[start..(start + len)], slice);
1483        }
1484
1485        let input: Vec<Vec<u32>> = vec![vec![]];
1486        let cumulative = CumulativeOffsets::from_raw(&input);
1487
1488        let len = input.into_iter().flatten().count();
1489        assert_eq!(cumulative.total_count, len);
1490        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
1491    }
1492
1493    #[should_panic(expected = "is_empty")]
1494    #[test]
1495    fn test_accountsdb_cumulative_find_empty() {
1496        let input = CumulativeOffsets {
1497            cumulative_offsets: vec![],
1498            total_count: 0,
1499        };
1500        input.find(0);
1501    }
1502
1503    #[test]
1504    fn test_accountsdb_cumulative_find() {
1505        let input = CumulativeOffsets {
1506            cumulative_offsets: vec![CumulativeOffset {
1507                index: vec![0],
1508                start_offset: 0,
1509            }],
1510            total_count: 0,
1511        };
1512        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0]));
1513
1514        let input = CumulativeOffsets {
1515            cumulative_offsets: vec![
1516                CumulativeOffset {
1517                    index: vec![0],
1518                    start_offset: 0,
1519                },
1520                CumulativeOffset {
1521                    index: vec![1],
1522                    start_offset: 2,
1523                },
1524            ],
1525            total_count: 0,
1526        };
1527        assert_eq!(input.find(0), (0, &input.cumulative_offsets[0])); // = first start_offset
1528        assert_eq!(input.find(1), (1, &input.cumulative_offsets[0])); // > first start_offset
1529        assert_eq!(input.find(2), (0, &input.cumulative_offsets[1])); // = last start_offset
1530        assert_eq!(input.find(3), (1, &input.cumulative_offsets[1])); // > last start_offset
1531    }
1532
1533    #[test]
1534    fn test_accountsdb_cumulative_offsets2_d() {
1535        let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
1536        let cumulative = CumulativeOffsets::from_raw_2d(&input);
1537
1538        let src: Vec<_> = input
1539            .clone()
1540            .into_iter()
1541            .flatten()
1542            .into_iter()
1543            .flatten()
1544            .collect();
1545        let len = src.len();
1546        assert_eq!(cumulative.total_count, len);
1547        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
1548
1549        const DIMENSION_0: usize = 0;
1550        const DIMENSION_1: usize = 1;
1551        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
1552        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
1553        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
1554        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
1555
1556        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
1557        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
1558
1559        for start in 0..len {
1560            let slice: &[u64] = cumulative.get_slice(&input, start);
1561            let len = slice.len();
1562            assert!(len > 0);
1563            assert_eq!(&src[start..(start + len)], slice);
1564        }
1565
1566        let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
1567        let cumulative = CumulativeOffsets::from_raw_2d(&input);
1568
1569        let src: Vec<_> = input
1570            .clone()
1571            .into_iter()
1572            .flatten()
1573            .into_iter()
1574            .flatten()
1575            .collect();
1576        let len = src.len();
1577        assert_eq!(cumulative.total_count, len);
1578        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
1579
1580        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
1581        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
1582        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
1583        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
1584
1585        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
1586        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
1587
1588        for start in 0..len {
1589            let slice: &[u64] = cumulative.get_slice(&input, start);
1590            let len = slice.len();
1591            assert!(len > 0);
1592            assert_eq!(&src[start..(start + len)], slice);
1593        }
1594
1595        let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
1596        let cumulative = CumulativeOffsets::from_raw_2d(&input);
1597
1598        let len = input.into_iter().flatten().count();
1599        assert_eq!(cumulative.total_count, len);
1600        assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
1601
1602        let input = vec![
1603            vec![vec![0, 1]],
1604            vec![vec![]],
1605            vec![vec![], vec![2, 3, 4], vec![]],
1606        ];
1607        let cumulative = CumulativeOffsets::from_raw_2d(&input);
1608
1609        let src: Vec<_> = input
1610            .clone()
1611            .into_iter()
1612            .flatten()
1613            .into_iter()
1614            .flatten()
1615            .collect();
1616        let len = src.len();
1617        assert_eq!(cumulative.total_count, len);
1618        assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
1619
1620        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
1621        assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
1622        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
1623        assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
1624
1625        assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
1626        assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
1627
1628        for start in 0..len {
1629            let slice: &[u64] = cumulative.get_slice(&input, start);
1630            let len = slice.len();
1631            assert!(len > 0);
1632            assert_eq!(&src[start..(start + len)], slice);
1633        }
1634    }
1635
1636    fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
1637        let result = AccountsHash::compute_merkle_root(hashes.clone(), fanout);
1638        let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
1639        let result2 = test_hashing(reduced, fanout);
1640        assert_eq!(result, result2, "len: {}", hashes.len());
1641        result
1642    }
1643
1644    fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
1645        let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
1646        let result = AccountsHash::compute_merkle_root(temp, fanout);
1647        let reduced: Vec<_> = hashes.clone();
1648        let result2 = AccountsHash::compute_merkle_root_from_slices(
1649            hashes.len(),
1650            fanout,
1651            None,
1652            |start| &reduced[start..],
1653            None,
1654        );
1655        assert_eq!(result, result2.0, "len: {}", hashes.len());
1656
1657        let result2 = AccountsHash::compute_merkle_root_from_slices(
1658            hashes.len(),
1659            fanout,
1660            Some(1),
1661            |start| &reduced[start..],
1662            None,
1663        );
1664        assert_eq!(result, result2.0, "len: {}", hashes.len());
1665
1666        let max = std::cmp::min(reduced.len(), fanout * 2);
1667        for left in 0..max {
1668            for right in left + 1..max {
1669                let src = vec![
1670                    vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
1671                    vec![reduced[right..].to_vec()],
1672                ];
1673                let offsets = CumulativeOffsets::from_raw_2d(&src);
1674
1675                let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
1676                let result2 = AccountsHash::compute_merkle_root_from_slices(
1677                    offsets.total_count,
1678                    fanout,
1679                    None,
1680                    get_slice,
1681                    None,
1682                );
1683                assert_eq!(result, result2.0);
1684            }
1685        }
1686        result
1687    }
1688
1689    #[test]
1690    fn test_accountsdb_compute_merkle_root_large() {
1691        gemachain_logger::setup();
1692
1693        // handle fanout^x -1, +0, +1 for a few 'x's
1694        const FANOUT: usize = 3;
1695        let mut hash_counts: Vec<_> = (1..6)
1696            .flat_map(|x| {
1697                let mark = FANOUT.pow(x);
1698                vec![mark - 1, mark, mark + 1]
1699            })
1700            .collect();
1701
1702        // saturate the test space for threshold to threshold + target
1703        // this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks
1704        let target = FANOUT.pow(3);
1705        let threshold = target * FANOUT;
1706        hash_counts.extend(threshold - 1..=threshold + target);
1707
1708        for hash_count in hash_counts {
1709            let hashes: Vec<_> = (0..hash_count)
1710                .into_iter()
1711                .map(|_| Hash::new_unique())
1712                .collect();
1713
1714            test_hashing(hashes, FANOUT);
1715        }
1716    }
1717
1718    #[test]
1719    fn test_accountsdb_compute_merkle_root() {
1720        gemachain_logger::setup();
1721
1722        let expected_results = vec![
1723            (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
1724            (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
1725            (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
1726            (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
1727            (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
1728            (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
1729            (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
1730            (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
1731            (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
1732            (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
1733            (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
1734            (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
1735            (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
1736            (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
1737            (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
1738            (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
1739            (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
1740            (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
1741            (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
1742            (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
1743        ];
1744
1745        let mut expected_index = 0;
1746        let start = 0;
1747        let default_fanout = 2;
1748        // test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types.
1749        let iterations = default_fanout * default_fanout * default_fanout + 2;
1750        for pass in 0..2 {
1751            let fanout = if pass == 0 {
1752                default_fanout
1753            } else {
1754                MERKLE_FANOUT
1755            };
1756            for count in start..iterations {
1757                let mut input: Vec<_> = (0..count)
1758                    .map(|i| {
1759                        let key = Pubkey::new(&[(pass * iterations + count) as u8; 32]);
1760                        let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]);
1761                        (key, hash)
1762                    })
1763                    .collect();
1764
1765                let result = if pass == 0 {
1766                    test_hashing_larger(input.clone(), fanout)
1767                } else {
1768                    // this sorts inside
1769                    let early_result = AccountsHash::accumulate_account_hashes(
1770                        input.iter().map(|i| (i.0, i.1)).collect::<Vec<_>>(),
1771                    );
1772                    AccountsHash::sort_hashes_by_pubkey(&mut input);
1773                    let result = AccountsHash::compute_merkle_root(input.clone(), fanout);
1774                    assert_eq!(early_result, result);
1775                    result
1776                };
1777                // compare against captured, expected results for hash (and carats)
1778                assert_eq!(
1779                    (
1780                        pass,
1781                        count,
1782                        &*(result.to_string()),
1783                        expected_results[expected_index].3
1784                    ), // we no longer calculate carats
1785                    expected_results[expected_index]
1786                );
1787                expected_index += 1;
1788            }
1789        }
1790    }
1791
1792    #[test]
1793    #[should_panic(expected = "overflow is detected while summing capitalization")]
1794    fn test_accountsdb_carat_overflow() {
1795        gemachain_logger::setup();
1796
1797        let offset = 2;
1798        let input = vec![
1799            CalculateHashIntermediate::new(
1800                Hash::new(&[1u8; 32]),
1801                u64::MAX - offset,
1802                Pubkey::new_unique(),
1803            ),
1804            CalculateHashIntermediate::new(Hash::new(&[2u8; 32]), offset + 1, Pubkey::new_unique()),
1805        ];
1806        AccountsHash::de_dup_accounts_in_parallel(&[vec![input]], 0);
1807    }
1808
1809    #[test]
1810    #[should_panic(expected = "overflow is detected while summing capitalization")]
1811    fn test_accountsdb_carat_overflow2() {
1812        gemachain_logger::setup();
1813
1814        let offset = 2;
1815        let input = vec![
1816            vec![CalculateHashIntermediate::new(
1817                Hash::new(&[1u8; 32]),
1818                u64::MAX - offset,
1819                Pubkey::new_unique(),
1820            )],
1821            vec![CalculateHashIntermediate::new(
1822                Hash::new(&[2u8; 32]),
1823                offset + 1,
1824                Pubkey::new_unique(),
1825            )],
1826        ];
1827        AccountsHash::de_dup_and_eliminate_zeros(
1828            vec![input],
1829            &mut HashStats::default(),
1830            2, // accounts above are in 2 groups
1831        );
1832    }
1833}