mod prefix_tree;
mod wal;
use crate::error::Result;
use crate::types::LogIndex;
pub use prefix_tree::{LookupProof, PrefixTree, ProofNode};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::io::Write as _;
use std::path::Path;
use std::sync::{Arc, RwLock};
pub use wal::{WalReader, WalWriter};
pub type IndexKey = [u8; 32];
pub trait MapFn: Send + Sync {
fn map(&self, data: &[u8]) -> Vec<IndexKey>;
}
pub struct IdentityMapFn;
impl MapFn for IdentityMapFn {
fn map(&self, data: &[u8]) -> Vec<IndexKey> {
let mut hasher = Sha256::new();
hasher.update(data);
vec![hasher.finalize().into()]
}
}
pub struct JsonKeysMapFn {
pub field: String,
}
impl JsonKeysMapFn {
pub fn new(field: impl Into<String>) -> Self {
Self {
field: field.into(),
}
}
}
impl MapFn for JsonKeysMapFn {
fn map(&self, data: &[u8]) -> Vec<IndexKey> {
let Ok(value) = serde_json::from_slice::<serde_json::Value>(data) else {
return Vec::new();
};
let Some(keys) = value.get(&self.field) else {
return Vec::new();
};
match keys {
serde_json::Value::Array(arr) => arr
.iter()
.filter_map(|v| v.as_str())
.map(|s| {
let mut hasher = Sha256::new();
hasher.update(s.as_bytes());
hasher.finalize().into()
})
.collect(),
serde_json::Value::String(s) => {
let mut hasher = Sha256::new();
hasher.update(s.as_bytes());
vec![hasher.finalize().into()]
}
_ => Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct LookupResult {
pub indices: Vec<LogIndex>,
pub tree_size: u64,
pub found: bool,
pub proof: Vec<ProofNode>,
}
pub struct VerifiableIndex {
index: RwLock<HashMap<IndexKey, Vec<LogIndex>>>,
wal_writer: Option<RwLock<WalWriter>>,
map_fn: Arc<dyn MapFn>,
tree_size: RwLock<u64>,
prefix_tree: RwLock<PrefixTree>,
}
impl VerifiableIndex {
pub fn new(map_fn: Arc<dyn MapFn>) -> Self {
Self {
index: RwLock::new(HashMap::new()),
wal_writer: None,
map_fn,
tree_size: RwLock::new(0),
prefix_tree: RwLock::new(PrefixTree::new()),
}
}
pub fn with_wal(map_fn: Arc<dyn MapFn>, wal_path: impl AsRef<Path>) -> Result<Self> {
let wal_path = wal_path.as_ref();
let mut tree_size = 0u64;
let mut index: HashMap<IndexKey, Vec<LogIndex>> = HashMap::new();
let mut prefix_tree = PrefixTree::new();
if wal_path.exists() {
let mut reader = WalReader::open(wal_path)?;
while let Some((idx, keys)) = reader.next_entry()? {
for key in keys {
index.entry(key).or_default().push(idx);
}
tree_size = tree_size.max(idx.value() + 1);
}
for (key, indices) in &index {
let value_hash = compute_value_hash(indices);
prefix_tree.insert(key, value_hash);
}
}
let wal_writer = WalWriter::open(wal_path)?;
Ok(Self {
index: RwLock::new(index),
wal_writer: Some(RwLock::new(wal_writer)),
map_fn,
tree_size: RwLock::new(tree_size),
prefix_tree: RwLock::new(prefix_tree),
})
}
pub fn index_entry(&self, idx: LogIndex, data: &[u8]) -> Result<()> {
let keys = self.map_fn.map(data);
if keys.is_empty() {
let mut tree_size = self.tree_size.write().unwrap();
*tree_size = (*tree_size).max(idx.value() + 1);
return Ok(());
}
if let Some(wal) = &self.wal_writer {
let mut wal = wal.write().unwrap();
wal.append(idx, &keys)?;
}
{
let mut index = self.index.write().unwrap();
let mut prefix_tree = self.prefix_tree.write().unwrap();
for key in &keys {
let indices = index.entry(*key).or_default();
indices.push(idx);
let value_hash = compute_value_hash(indices);
prefix_tree.insert(key, value_hash);
}
}
{
let mut tree_size = self.tree_size.write().unwrap();
*tree_size = (*tree_size).max(idx.value() + 1);
}
Ok(())
}
pub fn lookup(&self, key: &IndexKey) -> LookupResult {
let index = self.index.read().unwrap();
let prefix_tree = self.prefix_tree.read().unwrap();
let tree_size = *self.tree_size.read().unwrap();
let indices = index.get(key).cloned().unwrap_or_default();
let lookup_proof = prefix_tree.lookup(key);
LookupResult {
indices,
tree_size,
found: lookup_proof.found,
proof: lookup_proof.proof,
}
}
pub fn lookup_string(&self, key: &str) -> LookupResult {
let mut hasher = Sha256::new();
hasher.update(key.as_bytes());
let hash: IndexKey = hasher.finalize().into();
self.lookup(&hash)
}
pub fn tree_size(&self) -> u64 {
*self.tree_size.read().unwrap()
}
pub fn key_count(&self) -> usize {
self.index.read().unwrap().len()
}
pub fn flush(&self) -> Result<()> {
if let Some(wal) = &self.wal_writer {
wal.write().unwrap().flush()?;
}
Ok(())
}
pub fn root_hash(&self) -> IndexKey {
self.prefix_tree.read().unwrap().root_hash()
}
}
fn compute_value_hash(indices: &[LogIndex]) -> IndexKey {
let mut hasher = Sha256::new();
for idx in indices {
hasher.write_all(&idx.value().to_be_bytes()).unwrap();
}
hasher.finalize().into()
}
pub fn hash_key(s: &str) -> IndexKey {
let mut hasher = Sha256::new();
hasher.update(s.as_bytes());
hasher.finalize().into()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identity_map_fn() {
let map_fn = IdentityMapFn;
let keys = map_fn.map(b"hello world");
assert_eq!(keys.len(), 1);
let keys2 = map_fn.map(b"hello world");
assert_eq!(keys, keys2);
let keys3 = map_fn.map(b"goodbye world");
assert_ne!(keys, keys3);
}
#[test]
fn test_json_keys_map_fn() {
let map_fn = JsonKeysMapFn::new("packages");
let data = br#"{"packages": ["foo", "bar", "baz"]}"#;
let keys = map_fn.map(data);
assert_eq!(keys.len(), 3);
let data = br#"{"packages": "foo"}"#;
let keys = map_fn.map(data);
assert_eq!(keys.len(), 1);
let data = br#"{"other": "value"}"#;
let keys = map_fn.map(data);
assert_eq!(keys.len(), 0);
let data = b"not json";
let keys = map_fn.map(data);
assert_eq!(keys.len(), 0);
}
#[test]
fn test_verifiable_index_in_memory() {
let map_fn = Arc::new(JsonKeysMapFn::new("name"));
let index = VerifiableIndex::new(map_fn);
index
.index_entry(LogIndex::new(0), br#"{"name": "foo"}"#)
.unwrap();
index
.index_entry(LogIndex::new(1), br#"{"name": "bar"}"#)
.unwrap();
index
.index_entry(LogIndex::new(2), br#"{"name": "foo"}"#)
.unwrap();
let result = index.lookup_string("foo");
assert_eq!(result.indices.len(), 2);
assert_eq!(result.indices[0].value(), 0);
assert_eq!(result.indices[1].value(), 2);
assert!(result.found);
assert!(!result.proof.is_empty());
let result = index.lookup_string("bar");
assert_eq!(result.indices.len(), 1);
assert_eq!(result.indices[0].value(), 1);
assert!(result.found);
assert!(!result.proof.is_empty());
let result = index.lookup_string("unknown");
assert_eq!(result.indices.len(), 0);
assert!(!result.found);
assert_eq!(index.tree_size(), 3);
assert_eq!(index.key_count(), 2);
let root_hash = index.root_hash();
assert_ne!(root_hash, [0u8; 32]);
}
#[test]
fn test_verifiable_index_root_changes() {
let map_fn = Arc::new(JsonKeysMapFn::new("name"));
let index = VerifiableIndex::new(map_fn);
assert_eq!(index.root_hash(), [0u8; 32]);
index
.index_entry(LogIndex::new(0), br#"{"name": "foo"}"#)
.unwrap();
let root1 = index.root_hash();
assert_ne!(root1, [0u8; 32]);
index
.index_entry(LogIndex::new(1), br#"{"name": "bar"}"#)
.unwrap();
let root2 = index.root_hash();
assert_ne!(root2, root1);
index
.index_entry(LogIndex::new(2), br#"{"name": "foo"}"#)
.unwrap();
let root3 = index.root_hash();
assert_ne!(root3, root2);
}
#[test]
fn test_hash_key() {
let key1 = hash_key("foo");
let key2 = hash_key("foo");
let key3 = hash_key("bar");
assert_eq!(key1, key2);
assert_ne!(key1, key3);
assert_eq!(key1.len(), 32);
}
}