1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
//! A `ReadOnlyCache` wraps an arbitrary number of caches, and
//! attempts to satisfy `get` and `touch` requests by hitting each
//! cache in order. For read-only usage, this should be a simple
//! and easy-to-use interface that erases the difference between plain
//! and sharded caches.
use std::fs::File;
#[allow(unused_imports)] // We refer to this enum in comments.
use std::io::ErrorKind;
use std::io::Result;
use std::path::Path;
use std::sync::Arc;
use derivative::Derivative;
use crate::plain::Cache as PlainCache;
use crate::sharded::Cache as ShardedCache;
use crate::Key;
/// A `ConsistencyChecker` function compares cached values for the
/// same key and returns `Err` when the values are incompatible.
type ConsistencyChecker = Arc<
dyn Fn(&mut File, &mut File) -> Result<()>
+ Sync
+ Send
+ std::panic::RefUnwindSafe
+ std::panic::UnwindSafe,
>;
/// The `ReadSide` trait offers `get` and `touch`, as implemented by
/// both plain and sharded caches.
trait ReadSide:
std::fmt::Debug + Sync + Send + std::panic::RefUnwindSafe + std::panic::UnwindSafe
{
/// Returns a read-only file for `key` in the cache directory if
/// it exists, or None if there is no such file.
///
/// Implicitly "touches" the cached file if it exists.
fn get(&self, key: Key) -> Result<Option<File>>;
/// Marks the cached file `key` as newly used, if it exists.
///
/// Returns whether a file for `key` exists in the cache.
fn touch(&self, key: Key) -> Result<bool>;
}
impl ReadSide for PlainCache {
fn get(&self, key: Key) -> Result<Option<File>> {
PlainCache::get(self, key.name)
}
fn touch(&self, key: Key) -> Result<bool> {
PlainCache::touch(self, key.name)
}
}
impl ReadSide for ShardedCache {
fn get(&self, key: Key) -> Result<Option<File>> {
ShardedCache::get(self, key)
}
fn touch(&self, key: Key) -> Result<bool> {
ShardedCache::touch(self, key)
}
}
/// Construct a [`ReadOnlyCache`] with this builder. The resulting
/// cache will access each constituent cache directory in the order
/// they were added.
///
/// The default builder is a fresh builder with no constituent cache
/// and no consistency check function.
#[derive(Default, Derivative)]
#[derivative(Debug)]
pub struct ReadOnlyCacheBuilder {
stack: Vec<Box<dyn ReadSide>>,
#[derivative(Debug = "ignore")]
consistency_checker: Option<ConsistencyChecker>,
}
/// A [`ReadOnlyCache`] wraps an arbitrary number of
/// [`crate::plain::Cache`] and [`crate::sharded::Cache`], and attempts
/// to satisfy [`ReadOnlyCache::get`] and [`ReadOnlyCache::touch`]
/// requests by hitting each constituent cache in order. This
/// interface hides the difference between plain and sharded cache
/// directories, and should be the first resort for read-only uses.
///
/// The default cache wraps an empty set of constituent caches and
/// performs no consistency check.
///
/// [`ReadOnlyCache`] objects are stateless and cheap to clone; don't
/// put an [`Arc`] on them. Avoid creating multiple
/// [`ReadOnlyCache`]s for the same stack of directories: there is no
/// internal state to maintain, so multiple instances simply waste
/// memory without any benefit.
#[derive(Clone, Derivative)]
#[derivative(Debug)]
pub struct ReadOnlyCache {
stack: Arc<[Box<dyn ReadSide>]>,
/// When populated, the `ReadOnlyCache` keeps searching after the
/// first cache hit, and compares subsequent hits with the first one
/// by calling the `consistency_checker` function. That function
/// should return `Ok(())` if the two files are compatible (identical),
/// and `Err` otherwise.
#[derivative(Debug = "ignore")]
consistency_checker: Option<ConsistencyChecker>,
}
impl ReadOnlyCacheBuilder {
/// Returns a fresh empty builder.
pub fn new() -> Self {
Self::default()
}
/// Sets the consistency checker function: when the function is
/// provided, the `ReadOnlyCache` will keep searching after the
/// first cache hit, and compare subsequent hits with the first
/// one by calling `checker`. The `checker` function should
/// return `Ok(())` if the two files are compatible (identical),
/// and `Err` otherwise.
///
/// Kismet will propagate the error on mismatch.
pub fn consistency_checker(
&mut self,
checker: impl Fn(&mut File, &mut File) -> Result<()>
+ Sync
+ Send
+ std::panic::RefUnwindSafe
+ std::panic::UnwindSafe
+ Sized
+ 'static,
) -> &mut Self {
self.arc_consistency_checker(Some(Arc::new(checker)))
}
/// Sets the consistency checker function to
/// [`crate::byte_equality_checker`]: the contents of all cache
/// hits must be bytewise identical, without considering any
/// metadata.
pub fn byte_equality_checker(&mut self) -> &mut Self {
self.consistency_checker(crate::byte_equality_checker)
}
/// Sets the consistency checker function to
/// [`crate::panicking_byte_equality_checker`]: the contents of
/// all cache hits must be bytewise identical, without considering
/// any metadata, and the call will panic on mismatch.
pub fn panicking_byte_equality_checker(&mut self) -> &mut Self {
self.consistency_checker(crate::panicking_byte_equality_checker)
}
/// Removes the consistency checker function, if any.
pub fn clear_consistency_checker(&mut self) -> &mut Self {
self.arc_consistency_checker(None)
}
/// Sets the consistency checker function. `None` clears the
/// checker function. See
/// [`ReadOnlyCacheBuilder::consistency_checker`].
#[allow(clippy::type_complexity)] // We want the public type to be transparent
pub fn arc_consistency_checker(
&mut self,
checker: Option<
Arc<
dyn Fn(&mut File, &mut File) -> Result<()>
+ Sync
+ Send
+ std::panic::RefUnwindSafe
+ std::panic::UnwindSafe,
>,
>,
) -> &mut Self {
self.consistency_checker = checker;
self
}
/// Adds a new cache directory at `path` to the end of the cache
/// builder's search list.
///
/// Adds a plain cache directory if `num_shards <= 1`, and an
/// actual sharded directory otherwise.
pub fn cache(&mut self, path: impl AsRef<Path>, num_shards: usize) -> &mut Self {
if num_shards <= 1 {
self.plain(path)
} else {
self.sharded(path, num_shards)
}
}
/// Adds a new plain cache directory at `path` to the end of the
/// cache builder's search list. A plain cache directory is
/// merely a directory of files where the files' names match their
/// key's name.
pub fn plain(&mut self, path: impl AsRef<Path>) -> &mut Self {
self.stack.push(Box::new(PlainCache::new(
path.as_ref().to_owned(),
usize::MAX,
)));
self
}
/// Adds a new plain cache directory for each path in `paths`.
/// The caches are appended in order to the end of the cache
/// builder's search list.
pub fn plain_caches<P>(&mut self, paths: impl IntoIterator<Item = P>) -> &mut Self
where
P: AsRef<Path>,
{
for path in paths {
self.plain(path);
}
self
}
/// Adds a new sharded cache directory at `path` to the end of the
/// cache builder's search list.
pub fn sharded(&mut self, path: impl AsRef<Path>, num_shards: usize) -> &mut Self {
self.stack.push(Box::new(ShardedCache::new(
path.as_ref().to_owned(),
num_shards,
usize::MAX,
)));
self
}
/// Returns the contents of `self` as a fresh value; `self` is
/// reset to the default empty builder state. This makes it
/// possible to declare simple configurations in a single
/// expression, with `.take().build()`.
pub fn take(&mut self) -> Self {
std::mem::take(self)
}
/// Returns a fresh [`ReadOnlyCache`] for the builder's search list
/// of constituent cache directories.
pub fn build(self) -> ReadOnlyCache {
ReadOnlyCache::new(self.stack, self.consistency_checker)
}
}
impl Default for ReadOnlyCache {
fn default() -> ReadOnlyCache {
ReadOnlyCache::new(Default::default(), None)
}
}
impl ReadOnlyCache {
fn new(
stack: Vec<Box<dyn ReadSide>>,
consistency_checker: Option<ConsistencyChecker>,
) -> ReadOnlyCache {
ReadOnlyCache {
stack: stack.into_boxed_slice().into(),
consistency_checker,
}
}
/// Attempts to open a read-only file for `key`. The
/// [`ReadOnlyCache`] will query each constituent cache in order
/// of registration, and return a read-only file for the first
/// hit.
///
/// Fails with [`ErrorKind::InvalidInput`] if `key.name` is
/// invalid (empty, or starts with a dot or a forward or back slash).
///
/// Returns [`None`] if no file for `key` can be found in any of
/// the constituent caches, and bubbles up the first I/O error
/// encountered, if any.
///
/// In the worst case, each call to `get` attempts to open two
/// files for each cache directory in the `ReadOnlyCache` stack.
pub fn get<'a>(&self, key: impl Into<Key<'a>>) -> Result<Option<File>> {
fn doit(
stack: &[Box<dyn ReadSide>],
checker: &Option<ConsistencyChecker>,
key: Key,
) -> Result<Option<File>> {
use std::io::Seek;
use std::io::SeekFrom;
let mut ret = None;
for cache in stack.iter() {
let mut hit = match cache.get(key)? {
Some(hit) => hit,
None => continue,
};
match checker {
None => return Ok(Some(hit)),
Some(checker) => match ret.as_mut() {
None => ret = Some(hit),
Some(prev) => {
checker(prev, &mut hit)?;
prev.seek(SeekFrom::Start(0))?;
}
},
}
}
Ok(ret)
}
if self.stack.is_empty() {
return Ok(None);
}
doit(&self.stack, &self.consistency_checker, key.into())
}
/// Marks a cache entry for `key` as accessed (read). The
/// [`ReadOnlyCache`] will touch the same file that would be
/// returned by `get`.
///
/// Fails with [`ErrorKind::InvalidInput`] if `key.name` is
/// invalid (empty, or starts with a dot or a forward or back slash).
///
/// Returns whether a file for `key` could be found, and bubbles
/// up the first I/O error encountered, if any.
///
/// In the worst case, each call to `touch` attempts to update the
/// access time on two files for each cache directory in the
/// `ReadOnlyCache` stack.
pub fn touch<'a>(&self, key: impl Into<Key<'a>>) -> Result<bool> {
fn doit(stack: &[Box<dyn ReadSide>], key: Key) -> Result<bool> {
for cache in stack.iter() {
if cache.touch(key)? {
return Ok(true);
}
}
Ok(false)
}
if self.stack.is_empty() {
return Ok(false);
}
doit(&self.stack, key.into())
}
}
#[cfg(test)]
mod test {
use std::fs::File;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use crate::plain::Cache as PlainCache;
use crate::sharded::Cache as ShardedCache;
use crate::Key;
use crate::ReadOnlyCache;
use crate::ReadOnlyCacheBuilder;
struct TestKey {
key: String,
}
impl TestKey {
fn new(key: &str) -> TestKey {
TestKey {
key: key.to_string(),
}
}
}
impl<'a> From<&'a TestKey> for Key<'a> {
fn from(x: &'a TestKey) -> Key<'a> {
Key::new(&x.key, 0, 1)
}
}
fn byte_equality_checker(
counter: Arc<AtomicU64>,
) -> impl 'static + Fn(&mut File, &mut File) -> std::io::Result<()> {
move |x: &mut File, y: &mut File| {
counter.fetch_add(1, Ordering::Relaxed);
crate::byte_equality_checker(x, y)
}
}
/// A stack of 0 caches should always succeed with a trivial result.
#[test]
fn empty() {
let ro: ReadOnlyCache = Default::default();
assert!(matches!(ro.get(Key::new("foo", 1, 2)), Ok(None)));
assert!(matches!(ro.touch(Key::new("foo", 1, 2)), Ok(false)));
}
/// Populate two plain caches and set a consistency checker. We
/// should access both.
#[test]
fn consistency_checker_success() {
use std::io::Read;
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/0", FileType::ZeroFile(2))
.create("first/1", FileType::RandomFile(10))
.create("second/2", FileType::RandomFile(10));
let counter = Arc::new(AtomicU64::new(0));
let ro = ReadOnlyCacheBuilder::new()
.plain(temp.path("first"))
.plain(temp.path("second"))
.consistency_checker(byte_equality_checker(counter.clone()))
.take()
.build();
let mut hit = ro
.get(&TestKey::new("0"))
.expect("must succeed")
.expect("must exist");
assert_eq!(counter.load(Ordering::Relaxed), 1);
let mut contents = Vec::new();
hit.read_to_end(&mut contents).expect("read should succeed");
assert_eq!(contents, "00".as_bytes());
let _ = ro
.get(&TestKey::new("1"))
.expect("must succeed")
.expect("must exist");
// Only found in one subcache, there's nothing to check.
assert_eq!(counter.load(Ordering::Relaxed), 1);
let _ = ro
.get(&TestKey::new("2"))
.expect("must succeed")
.expect("must exist");
// Only found in one subcache, there's nothing to check.
assert_eq!(counter.load(Ordering::Relaxed), 1);
}
/// Populate two plain caches and set a consistency checker. We
/// should error on mismatch.
#[test]
fn consistency_checker_failure() {
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/0", FileType::ZeroFile(3));
let counter = Arc::new(AtomicU64::new(0));
let ro = ReadOnlyCacheBuilder::new()
.plain(temp.path("first"))
.plain(temp.path("second"))
.consistency_checker(byte_equality_checker(counter))
.take()
.build();
// This call should error.
assert!(ro.get(&TestKey::new("0")).is_err());
}
/// Populate two plain caches and unset the consistency checker. We
/// should not error.
#[test]
fn consistency_checker_silent_failure() {
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/0", FileType::ZeroFile(3));
let counter = Arc::new(AtomicU64::new(0));
let ro = ReadOnlyCacheBuilder::new()
.plain(temp.path("first"))
.plain(temp.path("second"))
.consistency_checker(byte_equality_checker(counter.clone()))
.clear_consistency_checker()
.take()
.build();
// This call should not error.
let _ = ro
.get(&TestKey::new("0"))
.expect("must succeed")
.expect("must exist");
// There should be no call to the checker function.
assert_eq!(counter.load(Ordering::Relaxed), 0);
}
/// Populate two plain caches. We should read from both.
#[test]
fn two_plain_caches() {
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/1", FileType::ZeroFile(3));
let ro = ReadOnlyCacheBuilder::new()
.plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
.take()
.build();
// We should find 0 and 1.
let _ = ro
.get(&TestKey::new("0"))
.expect("must succeed")
.expect("must exist");
let _ = ro
.get(&TestKey::new("1"))
.expect("must succeed")
.expect("must exist");
// But not 2.
assert!(ro.get(&TestKey::new("2")).expect("must succeed").is_none());
}
/// Use a byte equality checker with two different cache files for
/// the same key. We should find an error.
#[test]
fn test_byte_equality_checker() {
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/0", FileType::ZeroFile(3));
let ro = ReadOnlyCacheBuilder::new()
.plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
.byte_equality_checker()
.take()
.build();
assert!(ro.get(&TestKey::new("0")).is_err());
}
/// Use a panicking byte equality checker with two different cache
/// files for the same key. We should find an error.
#[test]
#[should_panic(expected = "file contents do not match")]
fn test_panicking_byte_equality_checker() {
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("first", FileType::Dir)
.create("second", FileType::Dir)
.create("first/0", FileType::ZeroFile(2))
.create("second/0", FileType::ZeroFile(3));
let ro = ReadOnlyCacheBuilder::new()
.plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
.panicking_byte_equality_checker()
.take()
.build();
// We should fail before returning Err.
assert!(ro.get(&TestKey::new("0")).is_ok());
}
/// Populate a plain and a sharded cache. We should be able to access
/// both.
#[test]
fn smoke_test() {
use std::io::{Read, Write};
use tempfile::NamedTempFile;
use test_dir::{DirBuilder, FileType, TestDir};
let temp = TestDir::temp()
.create("sharded", FileType::Dir)
.create("plain", FileType::Dir);
{
let cache = ShardedCache::new(temp.path("sharded"), 10, 20);
let tmp = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp.as_file()
.write_all(b"sharded")
.expect("write must succeed");
cache
.put(Key::new("a", 0, 1), tmp.path())
.expect("put must succeed");
let tmp2 = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp2.as_file()
.write_all(b"sharded2")
.expect("write must succeed");
cache
.put(Key::new("b", 0, 1), tmp2.path())
.expect("put must succeed");
}
{
let cache = PlainCache::new(temp.path("plain"), 10);
let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp.as_file()
.write_all(b"plain")
.expect("write must succeed");
cache.put("b", tmp.path()).expect("put must succeed");
let tmp2 = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp2.as_file()
.write_all(b"plain2")
.expect("write must succeed");
cache.put("c", tmp2.path()).expect("put must succeed");
}
// sharded.a => "sharded"
// sharded.b => "sharded2"
// plain.b => "plain"
// plain.c => "plain2"
// Read from sharded, then plain.
{
let ro = ReadOnlyCacheBuilder::new()
.sharded(temp.path("sharded"), 10)
.plain(temp.path("plain"))
.take()
.build();
assert!(matches!(ro.get(&TestKey::new("Missing")), Ok(None)));
assert!(matches!(ro.touch(&TestKey::new("Missing")), Ok(false)));
// We should be able to touch `a`.
assert!(matches!(ro.touch(&TestKey::new("a")), Ok(true)));
// And now check that we get the correct file contents.
{
let mut a_file = ro
.get(&TestKey::new("a"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
a_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded");
}
{
let mut b_file = ro
.get(&TestKey::new("b"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
b_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded2");
}
{
let mut c_file = ro
.get(&TestKey::new("c"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
c_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain2");
}
}
// Read from plain then sharded.
{
let ro = ReadOnlyCacheBuilder::new()
.cache(temp.path("plain"), 1)
.cache(temp.path("sharded"), 10)
.take()
.build();
{
let mut a_file = ro
.get(&TestKey::new("a"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
a_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded");
}
{
let mut b_file = ro
.get(&TestKey::new("b"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
b_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain");
}
{
let mut c_file = ro
.get(&TestKey::new("c"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
c_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain2");
}
}
}
}