1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
use near_primitives_v01::hash::CryptoHash;
use near_primitives_v01::sharding::ChunkHash;
use near_primitives_v01::types::BlockHeight;
use std::cmp::Ordering;
use std::collections::{
    btree_map::{self, BTreeMap},
    hash_map::{self, HashMap},
    BinaryHeap, HashSet,
};
use tracing::warn;

type BlockHash = CryptoHash;

const MAX_BLOCKS_MISSING_CHUNKS: usize = 1024;

pub trait BlockLike {
    fn hash(&self) -> BlockHash;
    fn height(&self) -> BlockHeight;
}

#[derive(Debug)]
struct HeightOrdered<T>(T);

impl<T: BlockLike> PartialEq for HeightOrdered<T> {
    fn eq(&self, other: &Self) -> bool {
        self.0.height() == other.0.height()
    }
}
impl<T: BlockLike> Eq for HeightOrdered<T> {}
impl<T: BlockLike> PartialOrd for HeightOrdered<T> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.0.height().cmp(&other.0.height()))
    }
}
impl<T: BlockLike> Ord for HeightOrdered<T> {
    fn cmp(&self, other: &Self) -> Ordering {
        self.0.height().cmp(&other.0.height())
    }
}

/// Structure for keeping track of missing chunks.
/// The reason to have a Block type parameter instead of using the
/// `block::Block` type is to make testing easier (`block::Block` is a complex structure and I
/// don't care about most of it).
#[derive(Debug, Default)]
pub struct MissingChunksPool<Block: BlockLike> {
    missing_chunks: HashMap<ChunkHash, HashSet<BlockHash>>,
    blocks_missing_chunks: HashMap<BlockHash, HashSet<ChunkHash>>,
    blocks_waiting_for_chunks: HashMap<BlockHash, Block>,
    blocks_ready_to_process: BinaryHeap<HeightOrdered<Block>>,
    height_idx: BTreeMap<BlockHeight, HashSet<BlockHash>>,
}

impl<Block: BlockLike> MissingChunksPool<Block> {
    pub fn new() -> Self {
        Self {
            missing_chunks: Default::default(),
            blocks_missing_chunks: Default::default(),
            blocks_waiting_for_chunks: Default::default(),
            blocks_ready_to_process: BinaryHeap::new(),
            height_idx: Default::default(),
        }
    }

    pub fn contains(&self, block_hash: &BlockHash) -> bool {
        self.blocks_waiting_for_chunks.contains_key(block_hash)
    }

    pub fn len(&self) -> usize {
        self.blocks_waiting_for_chunks.len()
    }

    pub fn ready_blocks(&mut self) -> Vec<Block> {
        if self.blocks_ready_to_process.is_empty() {
            return Vec::new();
        }
        let heap = std::mem::replace(&mut self.blocks_ready_to_process, BinaryHeap::new());
        heap.into_sorted_vec().into_iter().map(|x| x.0).collect()
    }

    pub fn add_block_with_missing_chunks(&mut self, block: Block, missing_chunks: Vec<ChunkHash>) {
        let block_hash = block.hash();
        // This case can only happen when missing chunks are not being eventually received and
        // thus removing blocks from the HashMap. It means the this node has severely stalled out.
        // It is ok to ignore further blocks because either (a) we will start receiving chunks
        // again, work through the backlog of the pool, then naturally sync the later blocks
        // which were not added initially, or (b) someone will restart the node because something
        // has gone horribly wrong, in which case these HashMaps will be lost anyways.
        if self.blocks_missing_chunks.len() >= MAX_BLOCKS_MISSING_CHUNKS {
            warn!(target: "chunks", "Not recording block with hash {} even though it is missing chunks. The missing chunks pool is full.", block_hash);
            return;
        }

        for chunk_hash in missing_chunks.iter().cloned() {
            let blocks_for_chunk =
                self.missing_chunks.entry(chunk_hash).or_insert_with(HashSet::new);
            blocks_for_chunk.insert(block_hash);
        }

        // Convert to HashSet
        let missing_chunks = missing_chunks.into_iter().collect();
        match self.blocks_missing_chunks.entry(block_hash) {
            hash_map::Entry::Vacant(entry) => {
                entry.insert(missing_chunks);
            }
            // The Occupied case should never happen since we know
            // all the missing chunks for a block the first time we receive it,
            // and we should not call `add_block_with_missing_chunks` again after
            // we know a block is missing chunks.
            hash_map::Entry::Occupied(mut entry) => {
                let previous_chunks = entry.insert(missing_chunks);
                warn!(target: "chunks", "Block with hash {} was already missing chunks {:?}.", block_hash, previous_chunks);
            }
        }

        let height = block.height();
        let blocks_at_height = self.height_idx.entry(height).or_insert_with(HashSet::new);
        blocks_at_height.insert(block_hash);
        self.blocks_waiting_for_chunks.insert(block_hash, block);
    }

    pub fn accept_chunk(&mut self, chunk_hash: &ChunkHash) {
        let block_hashes = self.missing_chunks.remove(chunk_hash).unwrap_or_else(HashSet::new);
        for block_hash in block_hashes {
            match self.blocks_missing_chunks.entry(block_hash) {
                hash_map::Entry::Occupied(mut missing_chunks_entry) => {
                    let missing_chunks = missing_chunks_entry.get_mut();
                    missing_chunks.remove(chunk_hash);
                    if missing_chunks.is_empty() {
                        // No more missing chunks!
                        missing_chunks_entry.remove_entry();
                        self.mark_block_as_ready(&block_hash);
                    }
                }
                hash_map::Entry::Vacant(_) => {
                    warn!(target: "chunks", "Invalid MissingChunksPool state. Block with hash {} was still a value of the missing_chunks map, but not present in the blocks_missing_chunks map", block_hash);
                    self.mark_block_as_ready(&block_hash);
                }
            }
        }
    }

    fn mark_block_as_ready(&mut self, block_hash: &BlockHash) {
        for block in self.blocks_waiting_for_chunks.remove(block_hash) {
            let height = block.height();
            if let btree_map::Entry::Occupied(mut entry) = self.height_idx.entry(height) {
                let blocks_at_height = entry.get_mut();
                blocks_at_height.remove(block_hash);
                if blocks_at_height.is_empty() {
                    entry.remove_entry();
                }
            }
            self.blocks_ready_to_process.push(HeightOrdered(block));
        }
    }

    pub fn prune_blocks_below_height(&mut self, height: BlockHeight) {
        let heights_to_remove: Vec<BlockHeight> =
            self.height_idx.keys().copied().take_while(|h| *h < height).collect();
        for h in heights_to_remove {
            if let Some(block_hashes) = self.height_idx.remove(&h) {
                for block_hash in block_hashes {
                    self.blocks_waiting_for_chunks.remove(&block_hash);
                    if let Some(chunk_hashes) = self.blocks_missing_chunks.remove(&block_hash) {
                        for chunk_hash in chunk_hashes {
                            if let hash_map::Entry::Occupied(mut entry) =
                                self.missing_chunks.entry(chunk_hash)
                            {
                                let blocks_for_chunk = entry.get_mut();
                                blocks_for_chunk.remove(&block_hash);
                                if blocks_for_chunk.is_empty() {
                                    entry.remove_entry();
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

#[cfg(test)]
mod test {
    use super::{BlockHash, BlockLike, MissingChunksPool, MAX_BLOCKS_MISSING_CHUNKS};
    use near_primitives_v01::hash::{hash, CryptoHash};
    use near_primitives_v01::sharding::ChunkHash;
    use near_primitives_v01::types::BlockHeight;

    fn get_hash(idx: u64) -> CryptoHash {
        hash(&idx.to_le_bytes())
    }

    fn get_chunk_hash(idx: u64) -> ChunkHash {
        ChunkHash(get_hash(idx))
    }

    #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
    struct MockBlock {
        hash: BlockHash,
        height: BlockHeight,
    }
    impl MockBlock {
        fn new(height: BlockHeight) -> Self {
            Self { hash: get_hash(height), height }
        }
    }
    impl BlockLike for MockBlock {
        fn hash(&self) -> BlockHash {
            self.hash
        }

        fn height(&self) -> u64 {
            self.height
        }
    }

    #[test]
    fn should_mark_blocks_as_ready_after_all_chunks_accepted() {
        let mut pool: MissingChunksPool<MockBlock> = MissingChunksPool::default();

        let block_height = 0;
        let block = MockBlock::new(block_height);
        let chunk_hashes: Vec<ChunkHash> = (101..105).map(get_chunk_hash).collect();

        pool.add_block_with_missing_chunks(block.clone(), chunk_hashes.clone());
        assert!(pool.contains(&block.hash));

        for chunk_hash in chunk_hashes.iter().skip(1) {
            pool.accept_chunk(chunk_hash);
            assert!(pool.contains(&block.hash));
        }

        // after the last chunk is accepted the block is ready to process
        pool.accept_chunk(&chunk_hashes[0]);
        assert!(!pool.contains(&block.hash));
        assert_eq!(pool.ready_blocks(), vec![block]);
    }

    #[test]
    fn should_not_add_new_blocks_after_size_limit() {
        let mut pool: MissingChunksPool<MockBlock> = MissingChunksPool::default();
        let mut chunk_hash_idx = MAX_BLOCKS_MISSING_CHUNKS as BlockHeight;

        for block_height in 0..MAX_BLOCKS_MISSING_CHUNKS {
            let block_height = block_height as BlockHeight;
            let block = MockBlock::new(block_height);
            chunk_hash_idx += 1;
            let missing_chunk_hash = get_chunk_hash(chunk_hash_idx);
            let block_hash = block.hash;
            pool.add_block_with_missing_chunks(block, vec![missing_chunk_hash]);
            assert!(pool.contains(&block_hash));
        }

        let block_height = MAX_BLOCKS_MISSING_CHUNKS as BlockHeight;
        let block = MockBlock::new(block_height);
        chunk_hash_idx += 1;
        let missing_chunk_hash = get_chunk_hash(chunk_hash_idx);
        let block_hash = block.hash;
        pool.add_block_with_missing_chunks(block, vec![missing_chunk_hash]);
        assert!(!pool.contains(&block_hash));
    }

    #[test]
    fn should_remove_old_blocks_when_prune_called() {
        let mut pool: MissingChunksPool<MockBlock> = MissingChunksPool::default();

        let block = MockBlock::new(0);
        let early_block_hash = block.hash;
        let missing_chunk_hash = get_chunk_hash(100);
        pool.add_block_with_missing_chunks(block, vec![missing_chunk_hash]);

        let block_height = 1;
        let block = MockBlock::new(block_height);
        let missing_chunk_hash = get_chunk_hash(200);
        pool.add_block_with_missing_chunks(block.clone(), vec![missing_chunk_hash.clone()]);

        let later_block = MockBlock::new(block_height + 1);
        let later_block_hash = later_block.hash;
        pool.add_block_with_missing_chunks(later_block, vec![get_chunk_hash(300)]);

        pool.accept_chunk(&missing_chunk_hash);
        pool.prune_blocks_below_height(block_height);
        assert_eq!(pool.ready_blocks(), vec![block]);
        assert!(!pool.contains(&early_block_hash));
        assert!(pool.contains(&later_block_hash));
    }
}