pruner 0.80.0

A command-line utility and library to prune backups on a schedule.
Documentation
#[cfg(test)] mod proptest;
#[cfg(test)] mod test;

pub trait Item {
	type Period;

	fn matches(&self, new: &Self, within: &Self::Period) -> bool;
}

impl<T: Clone + std::ops::Sub> Item for T
where T::Output: PartialOrd,
{
	type Period = T::Output;

	fn matches(&self, old: &T, within: &T::Output) -> bool {
		self.clone() - old.clone() >= *within
	}
}

pub struct Tagged<Item, T>(pub Item, pub T);

impl<I: crate::Item, T> Item for Tagged<I, T> {
	type Period = I::Period;

	fn matches(&self, old: &Self, within: &I::Period) -> bool {
		self.0.matches(&old.0, within)
	}
}

#[derive(Debug)]
pub struct Bucket<Period> {
	period: Period,
	count: usize,
}

impl<Period> Bucket<Period> {
	pub fn new(period: Period, count: usize) -> Self {
		Bucket {
			period,
			count,
		}
	}
}

#[derive(Debug)]
pub struct Config<Period> {
	buckets: Vec<Bucket<Period>>,
}

impl<Period> Default for Config<Period> {
	fn default() -> Self {
		Self::empty()
	}
}

impl<Period> Config<Period> {
	pub fn empty() -> Self {
		Config {
			buckets: Default::default(),
		}
	}

	/// Insert a new retention bucket.
	///
	/// For optimal performance insert buckets from shortest retention to maximum retention.
	pub fn add_bucket(&mut self, bucket: Bucket<Period>)
	where Period: PartialOrd
	{
		if bucket.count < 1 {
			return
		}

		for i in (0..self.buckets.len()).rev() {
			if bucket.period >= self.buckets[i].period {
				self.buckets.insert(i+1, bucket);
				return
			}
		}

		self.buckets.insert(0, bucket);
	}
}

#[derive(Debug)]
struct BucketState {
	remaining_slots: usize,
	/// The first interesting item, relative to the previous bucket's anchor.
	anchor: usize,
	/// The last owned item, relative to the previous bucket's last.
	last: usize,
}

struct Iter<'a, Item: crate::Item, I: Iterator<Item=Item>> {
	config: &'a Config<Item::Period>,
	candidates: I,
	keep: Vec<Item>,
	buckets: Box<[BucketState]>,

	next_bucket: usize,
	anchor_ref: usize,
	last_ref: usize,
}

impl<'a, Item: crate::Item, I: Iterator<Item=Item>> Iter<'a, Item, I> {
	fn adjust_anchors(&mut self, to_remove: usize, bid: usize) {
		debug_assert!(self.anchor_ref > to_remove);
		let mut i = self.anchor_ref;
		for mut bucket_state in &mut self.buckets[bid..] {
			i -= bucket_state.anchor;
			if i < to_remove {
				bucket_state.anchor -= 1;
				break
			}
		}
		self.anchor_ref -= 1;
	}

	fn disown(&mut self, bid: usize) -> Option<Item> {
		if bid > 0 {
			let last = &mut self.buckets[bid-1].last;
			*last = last.saturating_sub(1);
		}

		let last = self.last_ref;
		if let Some(bucket) = self.config.buckets.get(bid+1) {
			if last > 0 && !self.keep[last].matches(&self.keep[last-1], &bucket.period) {
				self.adjust_anchors(last, bid);
				Some(self.keep.remove(last))
			} else {
				self.buckets[bid].last += 1;
				self.last_ref += 1; // To account for the fact that we just incremented the rel_last for the next bucket.
				self.next()
			}
		} else {
			debug_assert_eq!(last, 0);
			Some(self.keep.remove(last))
		}
	}
}

impl<'a, Item: crate::Item, I: Iterator<Item=Item>> Iterator for Iter<'a, Item, I> {
	type Item = Item;

	fn next(&mut self) -> Option<Self::Item> {
		let bid = self.next_bucket;
		if let Some(bucket_config) = self.config.buckets.get(bid) {
			let (before, after) = self.buckets.split_at_mut(bid);
			let bucket_state = &mut before[bid-1];
			let next_bucket = after.get_mut(0);

			self.next_bucket += 1;
			self.anchor_ref -= bucket_state.anchor;
			self.last_ref -= bucket_state.last;

			let anchor = &self.keep[self.anchor_ref];
			if !self.keep.last().unwrap().matches(anchor, &bucket_config.period) {
				bucket_state.anchor += 1;
				self.next_bucket = self.config.buckets.len();
				return self.next();
			}

			if let Some(next_bucket) = next_bucket {
				self.anchor_ref += bucket_state.anchor;
				next_bucket.anchor += bucket_state.anchor;
			}
			bucket_state.anchor = 0;

			if bucket_state.remaining_slots > 0 {
				bucket_state.remaining_slots -= 1;
				return self.next()
			} else {
				return self.disown(bid)
			}
		}

		let e = self.candidates.next()?;

		if self.config.buckets.is_empty() {
			return Some(e)
		}

		if self.keep.is_empty() {
			self.keep.push(e);
			return self.next()
		}

		let first_bucket = self.config.buckets.first().unwrap();
		if !e.matches(&self.keep.last().unwrap(), &first_bucket.period) {
			return Some(e)
		}

		self.keep.push(e);

		self.next_bucket = 1;
		self.anchor_ref = self.keep.len() - 2;
		self.last_ref = self.keep.len()
			.saturating_sub(first_bucket.count)
			.saturating_sub(1); // The one that we just added.

		if self.keep.len() > first_bucket.count {
			self.disown(0)
		} else {
			self.next()
		}
	}
}

pub fn prune<'a, Item: crate::Item + 'a>(
	config: &'a Config<Item::Period>,
	candidates: impl IntoIterator<Item=Item> + 'a)
-> impl Iterator<Item=Item> + 'a
{
	let buckets = config.buckets.iter()
		.skip(1)
		.map(|c| BucketState{
			remaining_slots: c.count - 1,
			last: 0,
			anchor: 0,
		})
		.collect::<Vec<_>>();

	Iter {
		candidates: candidates.into_iter(),
		buckets: buckets.into_boxed_slice(),
		config,
		keep: Vec::new(),

		next_bucket: config.buckets.len(),
		anchor_ref: 0,
		last_ref: 0,
	}
}