grin_store/
pmmr.rs

1// Copyright 2021 The Grin Developers
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6//     http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//! Implementation of the persistent Backend for the prunable MMR tree.
15
16use std::fs;
17use std::{io, time};
18
19use crate::grin_core::core::hash::{Hash, Hashed};
20use crate::grin_core::core::pmmr::{self, family, Backend};
21use crate::grin_core::core::BlockHeader;
22use crate::grin_core::ser::{PMMRable, ProtocolVersion};
23use crate::leaf_set::LeafSet;
24use crate::prune_list::PruneList;
25use crate::types::{AppendOnlyFile, DataFile, SizeEntry, SizeInfo};
26use croaring::Bitmap;
27use std::convert::TryInto;
28use std::path::{Path, PathBuf};
29
30const PMMR_HASH_FILE: &str = "pmmr_hash.bin";
31const PMMR_DATA_FILE: &str = "pmmr_data.bin";
32const PMMR_LEAF_FILE: &str = "pmmr_leaf.bin";
33const PMMR_PRUN_FILE: &str = "pmmr_prun.bin";
34const PMMR_SIZE_FILE: &str = "pmmr_size.bin";
35const REWIND_FILE_CLEANUP_DURATION_SECONDS: u64 = 60 * 60 * 24; // 24 hours as seconds
36
37/// The list of PMMR_Files for internal purposes
38pub const PMMR_FILES: [&str; 4] = [
39	PMMR_HASH_FILE,
40	PMMR_DATA_FILE,
41	PMMR_LEAF_FILE,
42	PMMR_PRUN_FILE,
43];
44
45/// PMMR persistent backend implementation. Relies on multiple facilities to
46/// handle writing, reading and pruning.
47///
48/// * A main storage file appends Hash instances as they come.
49/// This AppendOnlyFile is also backed by a mmap for reads.
50/// * An in-memory backend buffers the latest batch of writes to ensure the
51/// PMMR can always read recent values even if they haven't been flushed to
52/// disk yet.
53/// * A leaf_set tracks unpruned (unremoved) leaf positions in the MMR..
54/// * A prune_list tracks the positions of pruned (and compacted) roots in the
55/// MMR.
56pub struct PMMRBackend<T: PMMRable> {
57	data_dir: PathBuf,
58	prunable: bool,
59	hash_file: DataFile<Hash>,
60	data_file: DataFile<T::E>,
61	leaf_set: LeafSet,
62	prune_list: PruneList,
63}
64
65impl<T: PMMRable> Backend<T> for PMMRBackend<T> {
66	/// Append the provided data and hashes to the backend storage.
67	/// Add the new leaf pos to our leaf_set if this is a prunable MMR.
68	fn append(&mut self, data: &T, hashes: &[Hash]) -> Result<(), String> {
69		let size = self
70			.data_file
71			.append(&data.as_elmt())
72			.map_err(|e| format!("Failed to append data to file. {}", e))?;
73
74		self.hash_file
75			.extend_from_slice(hashes)
76			.map_err(|e| format!("Failed to append hash to file. {}", e))?;
77
78		if self.prunable {
79			// (Re)calculate the latest pos given updated size of data file
80			// and the total leaf_shift, and add to our leaf_set.
81			let pos =
82				pmmr::insertion_to_pmmr_index(size + self.prune_list.get_total_leaf_shift() - 1);
83			self.leaf_set.add(pos);
84		}
85
86		Ok(())
87	}
88
89	// Supports appending a pruned subtree (single root hash) to an existing hash file.
90	// Update the prune_list "shift cache" to reflect the new pruned leaf pos in the subtree.
91	fn append_pruned_subtree(&mut self, hash: Hash, pos0: u64) -> Result<(), String> {
92		if !self.prunable {
93			return Err("Not prunable, cannot append pruned subtree.".into());
94		}
95
96		self.hash_file
97			.append(&hash)
98			.map_err(|e| format!("Failed to append subtree hash to file. {}", e))?;
99
100		self.prune_list.append(pos0);
101
102		Ok(())
103	}
104
105	fn append_hash(&mut self, hash: Hash) -> Result<(), String> {
106		self.hash_file
107			.append(&hash)
108			.map_err(|e| format!("Failed to append hash to file. {}", e))?;
109		Ok(())
110	}
111
112	fn get_from_file(&self, pos0: u64) -> Option<Hash> {
113		if self.is_compacted(pos0) {
114			return None;
115		}
116		let shift = self.prune_list.get_shift(pos0);
117		self.hash_file.read(1 + pos0 - shift)
118	}
119
120	fn get_peak_from_file(&self, pos0: u64) -> Option<Hash> {
121		let shift = self.prune_list.get_shift(pos0);
122		self.hash_file.read(1 + pos0 - shift)
123	}
124
125	fn get_data_from_file(&self, pos0: u64) -> Option<T::E> {
126		if !pmmr::is_leaf(pos0) {
127			return None;
128		}
129		if self.is_compacted(pos0) {
130			return None;
131		}
132		let flatfile_pos = pmmr::n_leaves(pos0 + 1);
133		let shift = self.prune_list.get_leaf_shift(1 + pos0);
134		self.data_file.read(flatfile_pos - shift)
135	}
136
137	/// Get the hash at pos.
138	/// Return None if pos is a leaf and it has been removed (or pruned or
139	/// compacted).
140	fn get_hash(&self, pos0: u64) -> Option<Hash> {
141		if self.prunable && pmmr::is_leaf(pos0) && !self.leaf_set.includes(pos0) {
142			return None;
143		}
144		self.get_from_file(pos0)
145	}
146
147	/// Get the data at pos.
148	/// Return None if it has been removed or if pos is not a leaf node.
149	fn get_data(&self, pos0: u64) -> Option<T::E> {
150		if !pmmr::is_leaf(pos0) {
151			return None;
152		}
153		if self.prunable && !self.leaf_set.includes(pos0) {
154			return None;
155		}
156		self.get_data_from_file(pos0)
157	}
158
159	/// Remove leaf from leaf set
160	fn remove_from_leaf_set(&mut self, pos0: u64) {
161		self.leaf_set.remove(pos0);
162	}
163
164	/// Returns an iterator over all the leaf positions.
165	/// for a prunable PMMR this is an iterator over the leaf_set bitmap.
166	/// For a non-prunable PMMR this is *all* leaves (this is not yet implemented).
167	fn leaf_pos_iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
168		if self.prunable {
169			Box::new(self.leaf_set.iter().map(|x| x - 1))
170		} else {
171			panic!("leaf_pos_iter not implemented for non-prunable PMMR")
172		}
173	}
174
175	fn n_unpruned_leaves(&self) -> u64 {
176		if self.prunable {
177			self.leaf_set.len() as u64
178		} else {
179			pmmr::n_leaves(self.unpruned_size())
180		}
181	}
182
183	fn n_unpruned_leaves_to_index(&self, to_index: u64) -> u64 {
184		if self.prunable {
185			self.leaf_set.n_unpruned_leaves_to_index(to_index)
186		} else {
187			pmmr::n_leaves(pmmr::insertion_to_pmmr_index(to_index))
188		}
189	}
190
191	/// Returns an iterator over all the leaf insertion indices (0-indexed).
192	/// If our pos are [1,2,4,5,8] (first 5 leaf pos) then our insertion indices are [0,1,2,3,4]
193	fn leaf_idx_iter(&self, from_idx: u64) -> Box<dyn Iterator<Item = u64> + '_> {
194		// pass from_idx in as param
195		// convert this to pos
196		// iterate, skipping everything prior to this
197		// pass in from_idx=0 then we want to convert to pos=1
198
199		let from_pos = 1 + pmmr::insertion_to_pmmr_index(from_idx);
200
201		if self.prunable {
202			Box::new(
203				self.leaf_set
204					.iter()
205					.skip_while(move |x| *x < from_pos)
206					.map(|x| pmmr::n_leaves(x).saturating_sub(1)),
207			)
208		} else {
209			panic!("leaf_idx_iter not implemented for non-prunable PMMR")
210		}
211	}
212
213	/// Rewind the PMMR backend to the given position.
214	fn rewind(&mut self, position: u64, rewind_rm_pos: &Bitmap) -> Result<(), String> {
215		// First rewind the leaf_set with the necessary added and removed positions.
216		if self.prunable {
217			self.leaf_set.rewind(position, rewind_rm_pos);
218		}
219
220		// Rewind the hash file accounting for pruned/compacted pos
221		let shift = if position == 0 {
222			0
223		} else {
224			self.prune_list.get_shift(position - 1)
225		};
226		self.hash_file.rewind(position - shift);
227
228		// Rewind the data file accounting for pruned/compacted pos
229		let flatfile_pos = pmmr::n_leaves(position);
230		let leaf_shift = if position == 0 {
231			0
232		} else {
233			self.prune_list.get_leaf_shift(position)
234		};
235		self.data_file.rewind(flatfile_pos - leaf_shift);
236
237		Ok(())
238	}
239
240	fn reset_prune_list(&mut self) {
241		let bitmap = Bitmap::new();
242		self.prune_list = PruneList::new(Some(self.data_dir.join(PMMR_PRUN_FILE)), bitmap);
243		if let Err(e) = self.prune_list.flush() {
244			error!("Flushing reset prune list: {}", e);
245		}
246	}
247
248	/// Remove by insertion position.
249	fn remove(&mut self, pos0: u64) -> Result<(), String> {
250		assert!(self.prunable, "Remove on non-prunable MMR");
251		self.leaf_set.remove(pos0);
252		Ok(())
253	}
254
255	/// Release underlying data files
256	fn release_files(&mut self) {
257		self.data_file.release();
258		self.hash_file.release();
259	}
260
261	fn snapshot(&self, header: &BlockHeader) -> Result<(), String> {
262		self.leaf_set
263			.snapshot(header)
264			.map_err(|_| format!("Failed to save copy of leaf_set for {}", header.hash()))?;
265		Ok(())
266	}
267
268	fn dump_stats(&self) {
269		debug!(
270			"pmmr backend: unpruned: {}, hashes: {}, data: {}, leaf_set: {}, prune_list: {}",
271			self.unpruned_size(),
272			self.hash_size(),
273			self.data_size(),
274			self.leaf_set.len(),
275			self.prune_list.len(),
276		);
277	}
278}
279
280impl<T: PMMRable> PMMRBackend<T> {
281	/// Instantiates a new PMMR backend.
282	/// If optional size is provided then treat as "fixed" size otherwise "variable" size backend.
283	/// Use the provided dir to store its files.
284	pub fn new<P: AsRef<Path>>(
285		data_dir: P,
286		prunable: bool,
287		version: ProtocolVersion,
288		header: Option<&BlockHeader>,
289	) -> io::Result<PMMRBackend<T>> {
290		let data_dir = data_dir.as_ref();
291
292		// Are we dealing with "fixed size" data elements or "variable size" data elements
293		// maintained in an associated size file?
294		let size_info = if let Some(fixed_size) = T::elmt_size() {
295			SizeInfo::FixedSize(fixed_size)
296		} else {
297			SizeInfo::VariableSize(Box::new(AppendOnlyFile::open(
298				data_dir.join(PMMR_SIZE_FILE),
299				SizeInfo::FixedSize(SizeEntry::LEN as u16),
300				version,
301			)?))
302		};
303
304		// Hash file is always "fixed size" and we use 32 bytes per hash.
305		let hash_size_info = SizeInfo::FixedSize(Hash::LEN.try_into().unwrap());
306
307		let hash_file = DataFile::open(&data_dir.join(PMMR_HASH_FILE), hash_size_info, version)?;
308		let data_file = DataFile::open(&data_dir.join(PMMR_DATA_FILE), size_info, version)?;
309
310		let leaf_set_path = data_dir.join(PMMR_LEAF_FILE);
311
312		// If we received a rewound "snapshot" leaf_set file move it into
313		// place so we use it.
314		if let Some(header) = header {
315			let leaf_snapshot_path = format!(
316				"{}.{}",
317				data_dir.join(PMMR_LEAF_FILE).to_str().unwrap(),
318				header.hash()
319			);
320			LeafSet::copy_snapshot(&leaf_set_path, &PathBuf::from(leaf_snapshot_path))?;
321		}
322
323		let leaf_set = LeafSet::open(&leaf_set_path)?;
324		let prune_list = PruneList::open(&data_dir.join(PMMR_PRUN_FILE))?;
325
326		Ok(PMMRBackend {
327			data_dir: data_dir.to_path_buf(),
328			prunable,
329			hash_file,
330			data_file,
331			leaf_set,
332			prune_list,
333		})
334	}
335
336	fn is_pruned(&self, pos0: u64) -> bool {
337		self.prune_list.is_pruned(pos0)
338	}
339
340	fn is_pruned_root(&self, pos0: u64) -> bool {
341		self.prune_list.is_pruned_root(pos0)
342	}
343
344	// Check if pos is pruned but not a pruned root itself.
345	// Checking for pruned root is faster so we do this check first.
346	// We can do a fast initial check as well -
347	// if its in the current leaf_set then we know it is not compacted.
348	fn is_compacted(&self, pos0: u64) -> bool {
349		if self.leaf_set.includes(pos0) {
350			return false;
351		}
352		!self.is_pruned_root(pos0) && self.is_pruned(pos0)
353	}
354
355	/// Number of hashes in the PMMR stored by this backend. Only produces the
356	/// fully sync'd size.
357	pub fn unpruned_size(&self) -> u64 {
358		self.hash_size() + self.prune_list.get_total_shift()
359	}
360
361	/// Number of elements in the underlying stored data. Extremely dependent on
362	/// pruning and compaction.
363	pub fn data_size(&self) -> u64 {
364		self.data_file.size()
365	}
366
367	/// Size of the underlying hashed data. Extremely dependent on pruning
368	/// and compaction.
369	pub fn hash_size(&self) -> u64 {
370		self.hash_file.size()
371	}
372
373	/// Syncs all files to disk. A call to sync is required to ensure all the
374	/// data has been successfully written to disk.
375	pub fn sync(&mut self) -> io::Result<()> {
376		Ok(())
377			.and(self.hash_file.flush())
378			.and(self.data_file.flush())
379			.and(self.sync_leaf_set())
380			.and(self.prune_list.flush())
381			.map_err(|e| {
382				io::Error::new(
383					io::ErrorKind::Interrupted,
384					format!("Could not sync pmmr to disk: {:?}", e),
385				)
386			})
387	}
388
389	// Sync the leaf_set if this is a prunable backend.
390	fn sync_leaf_set(&mut self) -> io::Result<()> {
391		if !self.prunable {
392			return Ok(());
393		}
394		self.leaf_set.flush()
395	}
396
397	/// Discard the current, non synced state of the backend.
398	pub fn discard(&mut self) {
399		self.hash_file.discard();
400		self.data_file.discard();
401		self.leaf_set.discard();
402	}
403
404	/// Takes the leaf_set at a given cutoff_pos and generates an updated
405	/// prune_list. Saves the updated prune_list to disk, compacts the hash
406	/// and data files based on the prune_list and saves both to disk.
407	///
408	/// A cutoff position limits compaction on recent data.
409	/// This will be the last position of a particular block to keep things
410	/// aligned. The block_marker in the db/index for the particular block
411	/// will have a suitable output_pos. This is used to enforce a horizon
412	/// after which the local node should have all the data to allow rewinding.
413	pub fn check_compact(&mut self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> io::Result<bool> {
414		assert!(self.prunable, "Trying to compact a non-prunable PMMR");
415
416		// Calculate the sets of leaf positions and node positions to remove based
417		// on the cutoff_pos provided.
418		let (leaves_removed, pos_to_rm) = self.pos_to_rm(cutoff_pos, rewind_rm_pos);
419
420		// Save compact copy of the hash file, skipping removed data.
421		{
422			let pos_to_rm = map_vec!(pos_to_rm, |pos1| {
423				let shift = self.prune_list.get_shift(pos1 as u64 - 1);
424				pos1 as u64 - shift
425			});
426
427			self.hash_file.write_tmp_pruned(&pos_to_rm)?;
428		}
429
430		// Save compact copy of the data file, skipping removed leaves.
431		{
432			let leaf_pos_to_rm = pos_to_rm
433				.iter()
434				.map(|x| x as u64)
435				.filter(|x| pmmr::is_leaf(x - 1))
436				.collect::<Vec<_>>();
437
438			let pos_to_rm = map_vec!(leaf_pos_to_rm, |&pos| {
439				let flat_pos = pmmr::n_leaves(pos);
440				let shift = self.prune_list.get_leaf_shift(pos);
441				flat_pos - shift
442			});
443
444			self.data_file.write_tmp_pruned(&pos_to_rm)?;
445		}
446
447		// Replace hash and data files with compact copies.
448		// Rebuild and intialize from the new files.
449		{
450			debug!("compact: about to replace hash and data files and rebuild...");
451			self.hash_file.replace_with_tmp()?;
452			self.data_file.replace_with_tmp()?;
453			debug!("compact: ...finished replacing and rebuilding");
454		}
455
456		// Update the prune list and write to disk.
457		{
458			let mut bitmap = self.prune_list.bitmap();
459			bitmap.or_inplace(&leaves_removed);
460			self.prune_list = PruneList::new(Some(self.data_dir.join(PMMR_PRUN_FILE)), bitmap);
461			self.prune_list.flush()?;
462		}
463
464		// Write the leaf_set to disk.
465		// Optimize the bitmap storage in the process.
466		self.leaf_set.flush()?;
467
468		self.clean_rewind_files()?;
469
470		Ok(true)
471	}
472
473	fn clean_rewind_files(&self) -> io::Result<u32> {
474		let data_dir = self.data_dir.clone();
475		let pattern = format!("{}.", PMMR_LEAF_FILE);
476		clean_files_by_prefix(data_dir, &pattern, REWIND_FILE_CLEANUP_DURATION_SECONDS)
477	}
478
479	fn pos_to_rm(&self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> (Bitmap, Bitmap) {
480		let mut expanded = Bitmap::new();
481
482		let leaf_pos_to_rm =
483			self.leaf_set
484				.removed_pre_cutoff(cutoff_pos, rewind_rm_pos, &self.prune_list);
485
486		for x in leaf_pos_to_rm.iter() {
487			expanded.add(x);
488			let mut current = x as u64;
489			loop {
490				let (parent0, sibling0) = family(current - 1);
491				let sibling_pruned = self.is_pruned_root(sibling0);
492
493				// if sibling previously pruned
494				// push it back onto list of pos to remove
495				// so we can remove it and traverse up to parent
496				if sibling_pruned {
497					expanded.add(1 + sibling0 as u32);
498				}
499
500				if sibling_pruned || expanded.contains(1 + sibling0 as u32) {
501					expanded.add(1 + parent0 as u32);
502					current = 1 + parent0;
503				} else {
504					break;
505				}
506			}
507		}
508		(leaf_pos_to_rm, removed_excl_roots(&expanded))
509	}
510}
511
512/// Filter remove list to exclude roots.
513/// We want to keep roots around so we have hashes for Merkle proofs.
514fn removed_excl_roots(removed: &Bitmap) -> Bitmap {
515	removed
516		.iter()
517		.filter(|pos| {
518			let (parent_pos0, _) = family(*pos as u64 - 1);
519			removed.contains(1 + parent_pos0 as u32)
520		})
521		.collect()
522}
523
524/// Quietly clean a directory up based on a given prefix.
525/// If the file was accessed within cleanup_duration_seconds from the beginning of
526/// the function call, it will not be deleted. To delete all files, set cleanup_duration_seconds
527/// to zero.
528///
529/// Precondition is that path points to a directory.
530///
531/// If you have files such as
532/// ```text
533/// foo
534/// foo.1
535/// foo.2
536/// .
537/// .
538/// .
539/// .
540/// .
541/// ```
542///
543/// call this function and you will get
544///
545/// ```text
546/// foo
547/// ```
548///
549/// in the directory
550///
551/// The return value will be the number of files that were deleted.
552///
553/// This function will return an error whenever the call to `std;:fs::read_dir`
554/// fails on the given path for any reason.
555///
556
557pub fn clean_files_by_prefix<P: AsRef<std::path::Path>>(
558	path: P,
559	prefix_to_delete: &str,
560	cleanup_duration_seconds: u64,
561) -> io::Result<u32> {
562	let now = time::SystemTime::now();
563	let cleanup_duration = time::Duration::from_secs(cleanup_duration_seconds);
564
565	let number_of_files_deleted: u32 = fs::read_dir(&path)?
566		.flat_map(
567			|possible_dir_entry| -> Result<u32, Box<dyn std::error::Error>> {
568				// result implements iterator and so if we were to use map here
569				// we would have a list of Result<u32, Box<std::error::Error>>
570				// but because we use flat_map, the errors get "discarded" and we are
571				// left with a clean iterator over u32s
572
573				// the error cases that come out of this code are numerous and
574				// we don't really mind throwing them away because the main point
575				// here is to clean up some files, if it doesn't work out it's not
576				// the end of the world
577
578				let dir_entry: std::fs::DirEntry = possible_dir_entry?;
579				let metadata = dir_entry.metadata()?;
580				if metadata.is_dir() {
581					return Ok(0); // skip directories unconditionally
582				}
583				let accessed = metadata.accessed()?;
584				let duration_since_accessed = now.duration_since(accessed)?;
585				if duration_since_accessed <= cleanup_duration {
586					return Ok(0); // these files are still too new
587				}
588				let file_name = dir_entry
589					.file_name()
590					.into_string()
591					.ok()
592					.ok_or("could not convert filename into utf-8")?;
593
594				// check to see if we want to delete this file?
595				if file_name.starts_with(prefix_to_delete)
596					&& file_name.len() > prefix_to_delete.len()
597				{
598					// we want to delete it, try to do so
599					if fs::remove_file(dir_entry.path()).is_ok() {
600						// we successfully deleted a file
601						return Ok(1);
602					}
603				}
604
605				// we either did not want to delete this file or could
606				// not for whatever reason. 0 files deleted.
607				Ok(0)
608			},
609		)
610		.sum();
611
612	Ok(number_of_files_deleted)
613}