grin_chain/txhashset/
bitmap_accumulator.rs

1// Copyright 2021 The Grin Developers
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::cmp::min;
16use std::convert::TryFrom;
17use std::time::Instant;
18
19use bit_vec::BitVec;
20use croaring::Bitmap;
21
22use crate::core::core::hash::{DefaultHashable, Hash};
23use crate::core::core::pmmr::segment::{Segment, SegmentIdentifier, SegmentProof};
24use crate::core::core::pmmr::{self, Backend, ReadablePMMR, ReadonlyPMMR, VecBackend, PMMR};
25use crate::core::ser::{self, PMMRable, Readable, Reader, Writeable, Writer};
26use crate::error::Error;
27use enum_primitive::FromPrimitive;
28
29/// The "bitmap accumulator" allows us to commit to a specific bitmap by splitting it into
30/// fragments and inserting these fragments into an MMR to produce an overall root hash.
31/// Leaves in the MMR are fragments of the bitmap consisting of 1024 contiguous bits
32/// from the overall bitmap. The first (leftmost) leaf in the MMR represents the first 1024 bits
33/// of the bitmap, the next leaf is the next 1024 bits of the bitmap etc.
34///
35/// Flipping a single bit does not require the full bitmap to be rehashed, only the path from the
36/// relevant leaf up to its associated peak.
37///
38/// Flipping multiple bits *within* a single chunk is no more expensive than flipping a single bit
39/// as a leaf node in the MMR represents a sequence of 1024 bits. Flipping multiple bits located
40/// close together is a relatively cheap operation with minimal rehashing required to update the
41/// relevant peaks and the overall MMR root.
42///
43/// It is also possible to generate Merkle proofs for these 1024 bit fragments, proving
44/// both inclusion and location in the overall "accumulator" MMR. We plan to take advantage of
45/// this during fast sync, allowing for validation of partial data.
46///
47#[derive(Clone)]
48pub struct BitmapAccumulator {
49	backend: VecBackend<BitmapChunk>,
50}
51
52impl BitmapAccumulator {
53	const NBITS: u64 = BitmapChunk::LEN_BITS as u64;
54
55	/// Crate a new empty bitmap accumulator.
56	pub fn new() -> BitmapAccumulator {
57		BitmapAccumulator {
58			backend: VecBackend::new(),
59		}
60	}
61
62	/// Initialize a bitmap accumulator given the provided idx iterator.
63	pub fn init<T: IntoIterator<Item = u64>>(&mut self, idx: T, size: u64) -> Result<(), Error> {
64		self.apply_from(idx, 0, size)
65	}
66
67	/// Find the start of the first "chunk" of 1024 bits from the provided idx.
68	/// Zero the last 10 bits to round down to multiple of 1024.
69	pub fn chunk_start_idx(idx: u64) -> u64 {
70		idx & !(Self::NBITS - 1)
71	}
72
73	/// The first 1024 belong to chunk 0, the next 1024 to chunk 1 etc.
74	fn chunk_idx(idx: u64) -> u64 {
75		idx / Self::NBITS
76	}
77
78	/// Apply the provided idx iterator to our bitmap accumulator.
79	/// We start at the chunk containing from_idx and rebuild chunks as necessary
80	/// for the bitmap, limiting it to size (in bits).
81	/// If from_idx is 1023 and size is 1024 then we rebuild a single chunk.
82	fn apply_from<T>(&mut self, idx: T, from_idx: u64, size: u64) -> Result<(), Error>
83	where
84		T: IntoIterator<Item = u64>,
85	{
86		let now = Instant::now();
87
88		// Find the (1024 bit chunk) chunk_idx for the (individual bit) from_idx.
89		let from_chunk_idx = BitmapAccumulator::chunk_idx(from_idx);
90		let mut chunk_idx = from_chunk_idx;
91
92		let mut chunk = BitmapChunk::new();
93
94		let mut idx_iter = idx.into_iter().filter(|&x| x < size).peekable();
95		while let Some(x) = idx_iter.peek() {
96			if *x < chunk_idx * Self::NBITS {
97				// NOTE we never get here if idx starts from from_idx
98				// skip until we reach our first chunk
99				idx_iter.next();
100			} else if *x < (chunk_idx + 1) * Self::NBITS {
101				let idx = idx_iter.next().expect("next after peek");
102				chunk.set(idx % Self::NBITS, true);
103			} else {
104				self.append_chunk(chunk)?;
105				chunk_idx += 1;
106				chunk = BitmapChunk::new();
107			}
108		}
109		if chunk.any() {
110			self.append_chunk(chunk)?;
111		}
112		debug!(
113			"applied {} chunks from idx {} to idx {} ({}ms)",
114			1 + chunk_idx - from_chunk_idx,
115			from_chunk_idx,
116			chunk_idx,
117			now.elapsed().as_millis(),
118		);
119		Ok(())
120	}
121
122	/// Apply updates to the bitmap accumulator given an iterator of invalidated idx and
123	/// an iterator of idx to be set to true.
124	/// We determine the existing chunks to be rebuilt given the invalidated idx.
125	/// We then rebuild given idx, extending the accumulator with new chunk(s) as necessary.
126	/// Resulting bitmap accumulator will contain sufficient bitmap chunks to cover size.
127	/// If size is 1 then we will have a single chunk.
128	/// If size is 1023 then we will have a single chunk (bits 0 to 1023 inclusive).
129	/// If the size is 1024 then we will have two chunks.
130	/// TODO: first argument is an iterator for no good reason;
131	/// might as well pass from_idx as first argument
132	pub fn apply<T, U>(&mut self, invalidated_idx: T, idx: U, size: u64) -> Result<(), Error>
133	where
134		T: IntoIterator<Item = u64>,
135		U: IntoIterator<Item = u64>,
136	{
137		// Determine the earliest chunk by looking at the min invalidated idx (assume sorted).
138		// Rewind prior to this and reapply new_idx.
139		// Note: We rebuild everything after rewind point but much of the bitmap may be
140		// unchanged. This can be further optimized by only rebuilding necessary chunks and
141		// rehashing.
142		if let Some(from_idx) = invalidated_idx.into_iter().next() {
143			self.rewind_prior(from_idx)?;
144			self.pad_left(from_idx)?;
145			self.apply_from(idx, from_idx, size)?;
146		}
147
148		Ok(())
149	}
150
151	/// Given the provided (bit) idx rewind the bitmap accumulator to the end of the
152	/// previous chunk ready for the updated chunk to be appended.
153	fn rewind_prior(&mut self, from_idx: u64) -> Result<(), Error> {
154		let chunk_idx = BitmapAccumulator::chunk_idx(from_idx);
155		let last_pos = self.backend.size();
156		let mut pmmr = PMMR::at(&mut self.backend, last_pos);
157		let rewind_pos = pmmr::insertion_to_pmmr_index(chunk_idx);
158		pmmr.rewind(rewind_pos, &Bitmap::new())
159			.map_err(Error::Other)?;
160		Ok(())
161	}
162
163	/// Make sure we append empty chunks to fill in any gap before we append the chunk
164	/// we actually care about. This effectively pads the bitmap with 1024 chunks of 0s
165	/// as necessary to put the new chunk at the correct place.
166	fn pad_left(&mut self, from_idx: u64) -> Result<(), Error> {
167		let chunk_idx = BitmapAccumulator::chunk_idx(from_idx);
168		let current_chunk_idx = pmmr::n_leaves(self.backend.size());
169		for _ in current_chunk_idx..chunk_idx {
170			self.append_chunk(BitmapChunk::new())?;
171		}
172		Ok(())
173	}
174
175	/// Append a new chunk to the BitmapAccumulator.
176	/// Append parent hashes (if any) as necessary to build associated peak.
177	pub fn append_chunk(&mut self, chunk: BitmapChunk) -> Result<u64, Error> {
178		let last_pos = self.backend.size();
179		PMMR::at(&mut self.backend, last_pos)
180			.push(&chunk)
181			.map_err(Error::Other)
182	}
183
184	/// The root hash of the bitmap accumulator MMR.
185	pub fn root(&self) -> Hash {
186		self.readonly_pmmr().root().expect("no root, invalid tree")
187	}
188
189	/// Readonly access to our internal data.
190	pub fn readonly_pmmr(&self) -> ReadonlyPMMR<BitmapChunk, VecBackend<BitmapChunk>> {
191		ReadonlyPMMR::at(&self.backend, self.backend.size())
192	}
193
194	/// Return a raw in-memory bitmap of this accumulator
195	pub fn as_bitmap(&self) -> Result<Bitmap, Error> {
196		let mut bitmap = Bitmap::new();
197		for (chunk_index, chunk_pos) in self.backend.leaf_pos_iter().enumerate() {
198			//TODO: Unwrap
199			let chunk = self.backend.get_data(chunk_pos as u64).unwrap();
200			let additive = chunk.set_iter(chunk_index * 1024).collect::<Vec<u32>>();
201			bitmap.add_many(&additive);
202		}
203		Ok(bitmap)
204	}
205}
206
207/// A bitmap "chunk" representing 1024 contiguous bits of the overall bitmap.
208/// The first 1024 bits belong in one chunk. The next 1024 bits in the next chunk, etc.
209#[derive(Clone, Debug, PartialEq, Eq)]
210pub struct BitmapChunk(BitVec);
211
212impl BitmapChunk {
213	const LEN_BITS: usize = 1024;
214	const LEN_BYTES: usize = Self::LEN_BITS / 8;
215
216	/// Create a new bitmap chunk, defaulting all bits in the chunk to false.
217	pub fn new() -> BitmapChunk {
218		BitmapChunk(BitVec::from_elem(Self::LEN_BITS, false))
219	}
220
221	/// Set a single bit in this chunk.
222	/// 0-indexed from start of chunk.
223	/// Panics if idx is outside the valid range of bits in a chunk.
224	pub fn set(&mut self, idx: u64, value: bool) {
225		let idx = usize::try_from(idx).expect("usize from u64");
226		assert!(idx < Self::LEN_BITS);
227		self.0.set(idx, value)
228	}
229
230	/// Does this bitmap chunk have any bits set to 1?
231	pub fn any(&self) -> bool {
232		self.0.any()
233	}
234
235	/// Iterator over the integer set represented by this chunk, applying the given
236	/// offset to the values
237	pub fn set_iter(&self, idx_offset: usize) -> impl Iterator<Item = u32> + '_ {
238		self.0
239			.iter()
240			.enumerate()
241			.filter(|(_, val)| *val)
242			.map(move |(idx, _)| (idx as u32 + idx_offset as u32))
243	}
244}
245
246impl PMMRable for BitmapChunk {
247	type E = Self;
248
249	fn as_elmt(&self) -> Self::E {
250		self.clone()
251	}
252
253	fn elmt_size() -> Option<u16> {
254		Some(Self::LEN_BYTES as u16)
255	}
256}
257
258impl DefaultHashable for BitmapChunk {}
259
260impl Writeable for BitmapChunk {
261	fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
262		self.0.to_bytes().write(writer)
263	}
264}
265
266impl Readable for BitmapChunk {
267	/// Reading is not currently supported, just return an empty one for now.
268	/// We store the underlying roaring bitmap externally for the bitmap accumulator
269	/// and the "hash only" backend means we never actually read these chunks.
270	fn read<R: Reader>(_reader: &mut R) -> Result<BitmapChunk, ser::Error> {
271		Ok(BitmapChunk::new())
272	}
273}
274
275///
276#[derive(Clone, Debug, PartialEq, Eq)]
277pub struct BitmapSegment {
278	identifier: SegmentIdentifier,
279	blocks: Vec<BitmapBlock>,
280	proof: SegmentProof,
281}
282
283impl Writeable for BitmapSegment {
284	fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
285		Writeable::write(&self.identifier, writer)?;
286		writer.write_u16(self.blocks.len() as u16)?;
287		for block in &self.blocks {
288			Writeable::write(block, writer)?;
289		}
290		Writeable::write(&self.proof, writer)?;
291		Ok(())
292	}
293}
294
295impl Readable for BitmapSegment {
296	fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
297		let identifier: SegmentIdentifier = Readable::read(reader)?;
298
299		let n_blocks = reader.read_u16()? as usize;
300		let mut blocks = Vec::<BitmapBlock>::with_capacity(n_blocks);
301		for _ in 0..n_blocks {
302			blocks.push(Readable::read(reader)?);
303		}
304		let proof = Readable::read(reader)?;
305
306		Ok(Self {
307			identifier,
308			blocks,
309			proof,
310		})
311	}
312}
313
314// TODO: this can be sped up with some `unsafe` code
315impl From<Segment<BitmapChunk>> for BitmapSegment {
316	fn from(segment: Segment<BitmapChunk>) -> Self {
317		let (identifier, _, _, _, leaf_data, proof) = segment.parts();
318
319		let mut chunks_left = leaf_data.len();
320		let mut blocks =
321			Vec::with_capacity((chunks_left + BitmapBlock::NCHUNKS - 1) / BitmapBlock::NCHUNKS);
322		while chunks_left > 0 {
323			let n_chunks = min(BitmapBlock::NCHUNKS, chunks_left);
324			chunks_left = chunks_left.saturating_sub(n_chunks);
325			blocks.push(BitmapBlock::new(n_chunks));
326		}
327
328		for (chunk_idx, chunk) in leaf_data.into_iter().enumerate() {
329			assert_eq!(chunk.0.len(), BitmapChunk::LEN_BITS);
330			let block = &mut blocks
331				.get_mut(chunk_idx / BitmapBlock::NCHUNKS)
332				.unwrap()
333				.inner;
334			let offset = (chunk_idx % BitmapBlock::NCHUNKS) * BitmapChunk::LEN_BITS;
335			for (i, _) in chunk.0.iter().enumerate().filter(|&(_, v)| v) {
336				block.set(offset + i, true);
337			}
338		}
339
340		Self {
341			identifier,
342			blocks,
343			proof,
344		}
345	}
346}
347
348// TODO: this can be sped up with some `unsafe` code
349impl From<BitmapSegment> for Segment<BitmapChunk> {
350	fn from(segment: BitmapSegment) -> Self {
351		let BitmapSegment {
352			identifier,
353			blocks,
354			proof,
355		} = segment;
356
357		// Count the number of chunks taking into account that the final block might be smaller
358		let n_chunks = (blocks.len() - 1) * BitmapBlock::NCHUNKS
359			+ blocks.last().map(|b| b.n_chunks()).unwrap_or(0);
360		let mut leaf_pos = Vec::with_capacity(n_chunks);
361		let mut chunks = Vec::with_capacity(n_chunks);
362		let offset = (1 << identifier.height) * identifier.idx;
363		for i in 0..(n_chunks as u64) {
364			leaf_pos.push(pmmr::insertion_to_pmmr_index(offset + i));
365			chunks.push(BitmapChunk::new());
366		}
367
368		for (block_idx, block) in blocks.into_iter().enumerate() {
369			assert!(block.inner.len() <= BitmapBlock::NBITS as usize);
370			let offset = block_idx * BitmapBlock::NCHUNKS;
371			for (i, _) in block.inner.iter().enumerate().filter(|&(_, v)| v) {
372				chunks
373					.get_mut(offset + i / BitmapChunk::LEN_BITS)
374					.unwrap()
375					.0
376					.set(i % BitmapChunk::LEN_BITS, true);
377			}
378		}
379
380		Segment::from_parts(identifier, Vec::new(), Vec::new(), leaf_pos, chunks, proof)
381	}
382}
383
384/// A block of 2^16 bits that provides an efficient (de)serialization
385/// depending on the bitmap occupancy.
386#[derive(Clone, Debug, PartialEq, Eq)]
387struct BitmapBlock {
388	inner: BitVec,
389}
390
391impl BitmapBlock {
392	/// Maximum number of bits in a block
393	const NBITS: u32 = 1 << 16;
394	/// Maximum number of chunks in a block
395	const NCHUNKS: usize = Self::NBITS as usize / BitmapChunk::LEN_BITS;
396
397	fn new(n_chunks: usize) -> Self {
398		assert!(n_chunks <= BitmapBlock::NCHUNKS);
399		Self {
400			inner: BitVec::from_elem(n_chunks * BitmapChunk::LEN_BITS, false),
401		}
402	}
403
404	fn n_chunks(&self) -> usize {
405		let length = self.inner.len();
406		assert_eq!(length % BitmapChunk::LEN_BITS, 0);
407		let n_chunks = length / BitmapChunk::LEN_BITS;
408		assert!(n_chunks <= BitmapBlock::NCHUNKS);
409		n_chunks
410	}
411}
412
413impl Writeable for BitmapBlock {
414	fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
415		let length = self.inner.len();
416		assert!(length <= Self::NBITS as usize);
417		assert_eq!(length % BitmapChunk::LEN_BITS, 0);
418		writer.write_u8((length / BitmapChunk::LEN_BITS) as u8)?;
419
420		let count_pos = self.inner.iter().filter(|&v| v).count() as u32;
421
422		// Negative count needs to be adjusted if the block is not full,
423		// which affects the choice of serialization mode and size written
424		let count_neg = length as u32 - count_pos;
425
426		let threshold = Self::NBITS / 16;
427		if count_pos < threshold {
428			// Write positive indices
429			Writeable::write(&BitmapBlockSerialization::Positive, writer)?;
430			writer.write_u16(count_pos as u16)?;
431			for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| v) {
432				writer.write_u16(i as u16)?;
433			}
434		} else if count_neg < threshold {
435			// Write negative indices
436			Writeable::write(&BitmapBlockSerialization::Negative, writer)?;
437			writer.write_u16(count_neg as u16)?;
438			for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| !v) {
439				writer.write_u16(i as u16)?;
440			}
441		} else {
442			// Write raw bytes
443			Writeable::write(&BitmapBlockSerialization::Raw, writer)?;
444			let bytes = self.inner.to_bytes();
445			assert!(bytes.len() <= Self::NBITS as usize / 8);
446			writer.write_fixed_bytes(&bytes)?;
447		}
448
449		Ok(())
450	}
451}
452
453impl Readable for BitmapBlock {
454	fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
455		let n_chunks = reader.read_u8()?;
456		if n_chunks as usize > BitmapBlock::NCHUNKS {
457			return Err(ser::Error::TooLargeReadErr);
458		}
459		let n_bits = n_chunks as usize * BitmapChunk::LEN_BITS;
460
461		let mode = Readable::read(reader)?;
462		let inner = match mode {
463			BitmapBlockSerialization::Raw => {
464				// Raw bytes
465				let bytes = reader.read_fixed_bytes(n_bits / 8)?;
466				BitVec::from_bytes(&bytes)
467			}
468			BitmapBlockSerialization::Positive => {
469				// Positive indices
470				let mut inner = BitVec::from_elem(n_bits, false);
471				let n = reader.read_u16()?;
472				for _ in 0..n {
473					inner.set(reader.read_u16()? as usize, true);
474				}
475				inner
476			}
477			BitmapBlockSerialization::Negative => {
478				// Negative indices
479				let mut inner = BitVec::from_elem(n_bits, true);
480				let n = reader.read_u16()?;
481				for _ in 0..n {
482					inner.set(reader.read_u16()? as usize, false);
483				}
484				inner
485			}
486		};
487
488		Ok(BitmapBlock { inner })
489	}
490}
491
492enum_from_primitive! {
493	#[derive(Debug, Clone, Copy, PartialEq)]
494	#[repr(u8)]
495	enum BitmapBlockSerialization {
496		Raw = 0,
497		Positive = 1,
498		Negative = 2,
499	}
500}
501
502impl Writeable for BitmapBlockSerialization {
503	fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
504		writer.write_u8(*self as u8)
505	}
506}
507
508impl Readable for BitmapBlockSerialization {
509	fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
510		Self::from_u8(reader.read_u8()?).ok_or(ser::Error::CorruptedData)
511	}
512}
513
514#[cfg(test)]
515mod tests {
516	use super::*;
517	use crate::core::ser::{
518		BinReader, BinWriter, DeserializationMode, ProtocolVersion, Readable, Writeable,
519	};
520	use byteorder::ReadBytesExt;
521	use grin_util::secp::rand::Rng;
522	use rand::thread_rng;
523	use std::io::Cursor;
524
525	fn test_roundtrip(entries: usize, inverse: bool, encoding: u8, length: usize, n_blocks: usize) {
526		let mut rng = thread_rng();
527		let mut block = BitmapBlock::new(n_blocks);
528		if inverse {
529			block.inner.negate();
530		}
531
532		let range_size = n_blocks * BitmapChunk::LEN_BITS as usize;
533
534		// Flip `entries` bits in random spots
535		let mut count = 0;
536		while count < entries {
537			let idx = rng.gen_range(0, range_size);
538			if block.inner.get(idx).unwrap() == inverse {
539				count += 1;
540				block.inner.set(idx, !inverse);
541			}
542		}
543
544		// Serialize
545		let mut cursor = Cursor::new(Vec::<u8>::new());
546		let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1));
547		Writeable::write(&block, &mut writer).unwrap();
548
549		// Check encoding type and length
550		cursor.set_position(1);
551		assert_eq!(cursor.read_u8().unwrap(), encoding);
552		let actual_length = cursor.get_ref().len();
553		assert_eq!(actual_length, length);
554		assert!(actual_length <= 2 + BitmapBlock::NBITS as usize / 8);
555
556		// Deserialize
557		cursor.set_position(0);
558		let mut reader = BinReader::new(
559			&mut cursor,
560			ProtocolVersion(1),
561			DeserializationMode::default(),
562		);
563		let block2: BitmapBlock = Readable::read(&mut reader).unwrap();
564		assert_eq!(block, block2);
565	}
566
567	#[test]
568	fn block_ser_roundtrip() {
569		let threshold = BitmapBlock::NBITS as usize / 16;
570		let entries = thread_rng().gen_range(threshold, 4 * threshold);
571		test_roundtrip(entries, false, 0, 2 + BitmapBlock::NBITS as usize / 8, 64);
572		test_roundtrip(entries, true, 0, 2 + BitmapBlock::NBITS as usize / 8, 64);
573	}
574
575	#[test]
576	fn sparse_block_ser_roundtrip() {
577		let entries =
578			thread_rng().gen_range(BitmapChunk::LEN_BITS, BitmapBlock::NBITS as usize / 16);
579		test_roundtrip(entries, false, 1, 4 + 2 * entries, 64);
580	}
581
582	#[test]
583	fn sparse_unfull_block_ser_roundtrip() {
584		let entries =
585			thread_rng().gen_range(BitmapChunk::LEN_BITS, BitmapBlock::NBITS as usize / 16);
586		test_roundtrip(entries, false, 1, 4 + 2 * entries, 61);
587	}
588
589	#[test]
590	fn abdundant_block_ser_roundtrip() {
591		let entries =
592			thread_rng().gen_range(BitmapChunk::LEN_BITS, BitmapBlock::NBITS as usize / 16);
593		test_roundtrip(entries, true, 2, 4 + 2 * entries, 64);
594	}
595
596	#[test]
597	fn abdundant_unfull_block_ser_roundtrip() {
598		let entries =
599			thread_rng().gen_range(BitmapChunk::LEN_BITS, BitmapBlock::NBITS as usize / 16);
600		test_roundtrip(entries, true, 2, 4 + 2 * entries, 61);
601	}
602}