use chrono::{DateTime, Utc};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::path::Path;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PrefixState {
InProgress,
Completed,
Failed,
}
#[derive(Clone, Debug, Default)]
pub struct OrderProgress {
prefix_states: HashMap<String, PrefixState>,
pub is_complete: bool,
pub ngrams_processed: u64,
}
impl OrderProgress {
pub fn completed_prefixes(&self) -> impl Iterator<Item = &String> {
self.prefix_states
.iter()
.filter(|(_, s)| **s == PrefixState::Completed)
.map(|(p, _)| p)
}
pub fn in_progress_prefixes(&self) -> impl Iterator<Item = &String> {
self.prefix_states
.iter()
.filter(|(_, s)| **s == PrefixState::InProgress)
.map(|(p, _)| p)
}
pub fn failed_prefixes(&self) -> impl Iterator<Item = &String> {
self.prefix_states
.iter()
.filter(|(_, s)| **s == PrefixState::Failed)
.map(|(p, _)| p)
}
pub fn get_state(&self, prefix: &str) -> Option<PrefixState> {
self.prefix_states.get(prefix).copied()
}
pub fn set_state(&mut self, prefix: String, state: PrefixState) {
self.prefix_states.insert(prefix, state);
}
pub fn clear_state(&mut self, prefix: &str) {
self.prefix_states.remove(prefix);
}
pub fn count_state(&self, state: PrefixState) -> usize {
self.prefix_states.values().filter(|s| **s == state).count()
}
}
#[derive(Serialize, Deserialize)]
struct OrderProgressSerde {
completed_prefixes: Vec<String>,
#[serde(default)]
in_progress_prefixes: Vec<String>,
#[serde(default)]
failed_prefixes: Vec<String>,
is_complete: bool,
ngrams_processed: u64,
}
impl Serialize for OrderProgress {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let serde_repr = OrderProgressSerde {
completed_prefixes: self.completed_prefixes().cloned().collect(),
in_progress_prefixes: self.in_progress_prefixes().cloned().collect(),
failed_prefixes: self.failed_prefixes().cloned().collect(),
is_complete: self.is_complete,
ngrams_processed: self.ngrams_processed,
};
serde_repr.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for OrderProgress {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let serde_repr = OrderProgressSerde::deserialize(deserializer)?;
let mut prefix_states = HashMap::with_capacity(
serde_repr.completed_prefixes.len()
+ serde_repr.in_progress_prefixes.len()
+ serde_repr.failed_prefixes.len(),
);
for prefix in serde_repr.completed_prefixes {
prefix_states.insert(prefix, PrefixState::Completed);
}
for prefix in serde_repr.in_progress_prefixes {
prefix_states.insert(prefix, PrefixState::InProgress);
}
for prefix in serde_repr.failed_prefixes {
prefix_states.insert(prefix, PrefixState::Failed);
}
Ok(OrderProgress {
prefix_states,
is_complete: serde_repr.is_complete,
ngrams_processed: serde_repr.ngrams_processed,
})
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ImportCheckpoint {
pub version: u32,
pub order_progress: HashMap<u8, OrderProgress>,
pub current_prefix: Option<String>,
pub byte_offset: u64,
pub mkn_phase: MknPhase,
pub stats: CheckpointStats,
pub timestamp: DateTime<Utc>,
}
#[derive(Clone, Debug, Deserialize)]
struct ImportCheckpointV1 {
#[serde(rename = "version")]
pub _version: u32,
pub completed_orders: Vec<u8>,
pub current_order: u8,
pub completed_prefixes: Vec<String>,
pub current_prefix: Option<String>,
pub byte_offset: u64,
pub mkn_phase: MknPhase,
pub stats: CheckpointStats,
pub timestamp: DateTime<Utc>,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub enum MknPhase {
#[default]
NotStarted,
Pass1InProgress {
current_order: u8,
},
Pass1Complete,
Pass2InProgress {
current_order: u8,
},
Complete,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct CheckpointStats {
pub ngrams_processed: u64,
#[serde(default)]
pub unique_ngrams: u64,
pub ngrams_by_order: [u64; 5],
pub bytes_downloaded: u64,
pub files_processed: u32,
pub elapsed_seconds: u64,
}
impl ImportCheckpoint {
pub const CURRENT_VERSION: u32 = 3;
pub fn new() -> Self {
Self {
version: Self::CURRENT_VERSION,
order_progress: HashMap::new(),
current_prefix: None,
byte_offset: 0,
mkn_phase: MknPhase::NotStarted,
stats: CheckpointStats::default(),
timestamp: Utc::now(),
}
}
pub fn load(path: &Path) -> Result<Self, CheckpointError> {
let file = File::open(path).map_err(CheckpointError::Io)?;
let reader = BufReader::new(file);
let value: serde_json::Value =
serde_json::from_reader(reader).map_err(CheckpointError::Json)?;
let version = value.get("version").and_then(|v| v.as_u64()).unwrap_or(1) as u32;
if version > Self::CURRENT_VERSION {
return Err(CheckpointError::UnsupportedVersion {
found: version,
max: Self::CURRENT_VERSION,
});
}
if version == 1 {
let v1: ImportCheckpointV1 =
serde_json::from_value(value).map_err(CheckpointError::Json)?;
log::info!(
"Migrating checkpoint from v1 to v2 format (current_order={}, prefixes={})",
v1.current_order,
v1.completed_prefixes.len()
);
Ok(Self::migrate_from_v1(v1))
} else {
serde_json::from_value(value).map_err(CheckpointError::Json)
}
}
fn migrate_from_v1(v1: ImportCheckpointV1) -> Self {
let mut order_progress = HashMap::new();
for order in v1.completed_orders {
let progress = OrderProgress {
prefix_states: HashMap::new(),
is_complete: true,
ngrams_processed: 0, };
order_progress.insert(order, progress);
}
if !v1.completed_prefixes.is_empty() || v1.current_prefix.is_some() {
let mut prefix_states = HashMap::with_capacity(v1.completed_prefixes.len());
for prefix in v1.completed_prefixes {
prefix_states.insert(prefix, PrefixState::Completed);
}
let progress = OrderProgress {
prefix_states,
is_complete: false,
ngrams_processed: 0,
};
order_progress.insert(v1.current_order, progress);
}
Self {
version: Self::CURRENT_VERSION,
order_progress,
current_prefix: v1.current_prefix,
byte_offset: v1.byte_offset,
mkn_phase: v1.mkn_phase,
stats: v1.stats,
timestamp: v1.timestamp,
}
}
pub fn save(&self, path: &Path) -> Result<(), CheckpointError> {
let temp_path = path.with_extension("checkpoint.tmp");
let file = File::create(&temp_path).map_err(CheckpointError::Io)?;
let writer = BufWriter::new(file);
let mut checkpoint = self.clone();
checkpoint.timestamp = Utc::now();
serde_json::to_writer_pretty(writer, &checkpoint).map_err(CheckpointError::Json)?;
std::fs::rename(&temp_path, path).map_err(CheckpointError::Io)?;
Ok(())
}
pub fn exists(path: &Path) -> bool {
path.exists()
}
pub fn delete(path: &Path) -> Result<(), CheckpointError> {
if path.exists() {
std::fs::remove_file(path).map_err(CheckpointError::Io)?;
}
Ok(())
}
pub fn start_prefix(&mut self, order: u8, prefix: &str) {
let progress = self.order_progress.entry(order).or_default();
progress.set_state(prefix.to_string(), PrefixState::InProgress);
}
pub fn complete_prefix(&mut self, order: u8, prefix: &str) {
let progress = self.order_progress.entry(order).or_default();
progress.set_state(prefix.to_string(), PrefixState::Completed);
self.stats.files_processed += 1;
}
pub fn fail_prefix(&mut self, order: u8, prefix: &str) {
let progress = self.order_progress.entry(order).or_default();
progress.set_state(prefix.to_string(), PrefixState::Failed);
}
pub fn clear_failed(&mut self, order: u8, prefix: &str) {
if let Some(progress) = self.order_progress.get_mut(&order) {
if progress.get_state(prefix) == Some(PrefixState::Failed) {
progress.clear_state(prefix);
}
}
}
pub fn is_in_progress(&self, order: u8, prefix: &str) -> bool {
self.order_progress
.get(&order)
.and_then(|p| p.get_state(prefix))
.map(|s| s == PrefixState::InProgress)
.unwrap_or(false)
}
pub fn is_failed_prefix(&self, order: u8, prefix: &str) -> bool {
self.order_progress
.get(&order)
.and_then(|p| p.get_state(prefix))
.map(|s| s == PrefixState::Failed)
.unwrap_or(false)
}
pub fn in_progress_prefixes(&self, order: u8) -> Vec<String> {
self.order_progress
.get(&order)
.map(|p| p.in_progress_prefixes().cloned().collect())
.unwrap_or_default()
}
pub fn failed_prefixes(&self, order: u8) -> Vec<String> {
self.order_progress
.get(&order)
.map(|p| p.failed_prefixes().cloned().collect())
.unwrap_or_default()
}
pub fn recover_in_progress_as_failed(&mut self, order: u8) {
if let Some(progress) = self.order_progress.get_mut(&order) {
let in_progress: Vec<String> = progress.in_progress_prefixes().cloned().collect();
for prefix in in_progress {
progress.set_state(prefix, PrefixState::Failed);
}
}
}
pub fn failed_prefix_count(&self, order: u8) -> usize {
self.order_progress
.get(&order)
.map(|p| p.count_state(PrefixState::Failed))
.unwrap_or(0)
}
pub fn total_failed_prefix_count(&self) -> usize {
self.order_progress
.values()
.map(|p| p.count_state(PrefixState::Failed))
.sum()
}
pub fn add_ngrams(&mut self, order: u8, count: u64) {
let progress = self.order_progress.entry(order).or_default();
progress.ngrams_processed += count;
self.stats.ngrams_processed += count;
}
pub fn complete_order(&mut self, order: u8) -> Result<(), CheckpointError> {
if let Some(progress) = self.order_progress.get(&order) {
let in_progress_count = progress.count_state(PrefixState::InProgress);
if in_progress_count > 0 {
return Err(CheckpointError::OrderHasInProgressPrefixes {
order,
count: in_progress_count,
});
}
}
let progress = self.order_progress.entry(order).or_default();
progress.is_complete = true;
Ok(())
}
pub fn needs_prefix(&self, order: u8, prefix: &str) -> bool {
self.order_progress
.get(&order)
.map(|p| {
if p.is_complete {
return false;
}
match p.get_state(prefix) {
None => true, Some(PrefixState::Failed) => true, Some(PrefixState::Completed) => false,
Some(PrefixState::InProgress) => false,
}
})
.unwrap_or(true) }
pub fn is_order_complete(&self, order: u8) -> bool {
self.order_progress
.get(&order)
.map(|p| p.is_complete)
.unwrap_or(false)
}
pub fn completed_prefix_count(&self, order: u8) -> usize {
self.order_progress
.get(&order)
.map(|p| p.count_state(PrefixState::Completed))
.unwrap_or(0)
}
pub fn total_completed_prefix_count(&self) -> usize {
self.order_progress
.values()
.map(|p| p.count_state(PrefixState::Completed))
.sum()
}
pub fn orders_in_progress(&self) -> Vec<u8> {
self.order_progress
.iter()
.filter(|(_, p)| !p.is_complete)
.map(|(order, _)| *order)
.collect()
}
pub fn completed_orders(&self) -> Vec<u8> {
self.order_progress
.iter()
.filter(|(_, p)| p.is_complete)
.map(|(order, _)| *order)
.collect()
}
pub fn update_offset(&mut self, prefix: &str, offset: u64) {
self.current_prefix = Some(prefix.to_string());
self.byte_offset = offset;
}
pub fn progress_summary(&self) -> String {
let completed: Vec<_> = self.completed_orders();
let in_progress: Vec<_> = self.orders_in_progress();
let prefix_counts: Vec<String> = self
.order_progress
.iter()
.filter(|(_, p)| !p.is_complete)
.map(|(order, p)| format!("{}:{}", order, p.count_state(PrefixState::Completed)))
.collect();
let failed_count = self.total_failed_prefix_count();
let failed_str = if failed_count > 0 {
format!(", Failed: {}", failed_count)
} else {
String::new()
};
format!(
"Completed: {:?}, In progress: {:?}, Prefixes: [{}], N-grams: {}, Files: {}{}",
completed,
in_progress,
prefix_counts.join(", "),
self.stats.ngrams_processed,
self.stats.files_processed,
failed_str,
)
}
}
impl Default for ImportCheckpoint {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, thiserror::Error)]
pub enum CheckpointError {
#[error("I/O error: {0}")]
Io(#[source] std::io::Error),
#[error("JSON error: {0}")]
Json(#[source] serde_json::Error),
#[error("Unsupported checkpoint version: found {found}, max supported {max}")]
UnsupportedVersion { found: u32, max: u32 },
#[error("Trie error: {0}")]
Trie(String),
#[error("Cannot complete order {order}: {count} prefixes still in progress")]
OrderHasInProgressPrefixes { order: u8, count: usize },
}
pub const CHECKPOINT_KEY_PREFIX: &str = "\x00__ckpt__";
pub const CHECKPOINT_VERSION_KEY: &str = "\x00__ckpt__:version";
pub const CHECKPOINT_MKN_PHASE_KEY: &str = "\x00__ckpt__:mkn_phase";
pub const CHECKPOINT_BYTE_OFFSET_KEY: &str = "\x00__ckpt__:byte_offset";
pub const CHECKPOINT_TIMESTAMP_KEY: &str = "\x00__ckpt__:timestamp";
pub const CHECKPOINT_NGRAMS_PROCESSED_KEY: &str = "\x00__ckpt__:ngrams_processed";
pub const CHECKPOINT_UNIQUE_NGRAMS_KEY: &str = "\x00__ckpt__:unique_ngrams";
pub const CHECKPOINT_FILES_PROCESSED_KEY: &str = "\x00__ckpt__:files_processed";
pub const CHECKPOINT_BYTES_DOWNLOADED_KEY: &str = "\x00__ckpt__:bytes_downloaded";
pub const CHECKPOINT_ELAPSED_KEY: &str = "\x00__ckpt__:elapsed_seconds";
pub const CHECKPOINT_NGRAMS_BY_ORDER_PREFIX: &str = "\x00__ckpt__:ngrams_by_order:";
pub const CHECKPOINT_PREFIX_KEY_PREFIX: &str = "\x00__ckpt__:prefix:";
pub const CHECKPOINT_ORDER_COMPLETE_PREFIX: &str = "\x00__ckpt__:order_complete:";
pub const CHECKPOINT_BITMAP_PREFIX: &str = "\x00__ckpt__:bitmap:";
pub const CHECKPOINT_ORDER_NGRAMS_PREFIX: &str = "\x00__ckpt__:order_ngrams:";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u64)]
pub enum PrefixStatusCode {
Completed = 1,
InProgress = 2,
Failed = 3,
}
impl PrefixStatusCode {
pub fn from_u64(value: u64) -> Option<Self> {
match value {
1 => Some(Self::Completed),
2 => Some(Self::InProgress),
3 => Some(Self::Failed),
_ => None,
}
}
}
const BITMAP_STATE_NOT_STARTED: u8 = 0b00;
const BITMAP_STATE_IN_PROGRESS: u8 = 0b01;
const BITMAP_STATE_COMPLETED: u8 = 0b10;
const BITMAP_STATE_FAILED: u8 = 0b11;
const PREFIXES_PER_CHUNK: usize = 32;
fn prefix_to_index(prefix: &str) -> Option<u16> {
let bytes = prefix.as_bytes();
match bytes.len() {
1 => {
let c = bytes[0];
if c >= b'a' && c <= b'z' {
Some((c - b'a') as u16)
} else {
None
}
}
2 => {
let c1 = bytes[0];
let c2 = bytes[1];
if c1 >= b'a' && c1 <= b'z' && c2 >= b'a' && c2 <= b'z' {
Some(((c1 - b'a') as u16) * 26 + ((c2 - b'a') as u16))
} else {
None
}
}
_ => None,
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum PrefixLen {
One,
Two,
}
impl PrefixLen {
#[inline]
fn for_order(order: u8) -> Self {
if order == 1 {
Self::One
} else {
Self::Two
}
}
#[inline]
fn max_index(self) -> u16 {
match self {
Self::One => 26,
Self::Two => 676,
}
}
}
fn index_to_prefix(index: u16, prefix_len: PrefixLen) -> String {
match prefix_len {
PrefixLen::One => {
debug_assert!(
index < 26,
"Index {} out of range for single-char prefix",
index
);
let c = (b'a' + index as u8) as char;
c.to_string()
}
PrefixLen::Two => {
debug_assert!(
index < 676,
"Index {} out of range for two-char prefix",
index
);
let c1 = (b'a' + (index / 26) as u8) as char;
let c2 = (b'a' + (index % 26) as u8) as char;
format!("{}{}", c1, c2)
}
}
}
fn prefix_len_for_order(order: u8) -> PrefixLen {
PrefixLen::for_order(order)
}
fn max_index_for_prefix_len(prefix_len: PrefixLen) -> u16 {
prefix_len.max_index()
}
fn num_chunks_for_prefix_len(prefix_len: PrefixLen) -> usize {
let max_index = max_index_for_prefix_len(prefix_len) as usize;
(max_index + PREFIXES_PER_CHUNK - 1) / PREFIXES_PER_CHUNK
}
fn pack_states(states: &HashMap<String, PrefixState>, prefix_len: PrefixLen) -> Vec<u64> {
let num_chunks = num_chunks_for_prefix_len(prefix_len);
let mut chunks = vec![0u64; num_chunks];
for (prefix, state) in states {
if let Some(index) = prefix_to_index(prefix) {
let chunk_idx = (index as usize) / PREFIXES_PER_CHUNK;
let bit_pos = ((index as usize) % PREFIXES_PER_CHUNK) * 2;
let state_bits = match state {
PrefixState::InProgress => BITMAP_STATE_IN_PROGRESS as u64,
PrefixState::Completed => BITMAP_STATE_COMPLETED as u64,
PrefixState::Failed => BITMAP_STATE_FAILED as u64,
};
chunks[chunk_idx] |= state_bits << bit_pos;
} else {
log::warn!(
"Skipping non-standard prefix '{}' during bitmap packing",
prefix
);
}
}
chunks
}
fn unpack_states(chunks: &[u64], prefix_len: PrefixLen) -> HashMap<String, PrefixState> {
let max_index = max_index_for_prefix_len(prefix_len);
let mut states = HashMap::new();
for index in 0..max_index {
let chunk_idx = (index as usize) / PREFIXES_PER_CHUNK;
let bit_pos = ((index as usize) % PREFIXES_PER_CHUNK) * 2;
if chunk_idx < chunks.len() {
let state_bits = ((chunks[chunk_idx] >> bit_pos) & 0b11) as u8;
let state = match state_bits {
BITMAP_STATE_NOT_STARTED => continue, BITMAP_STATE_IN_PROGRESS => PrefixState::InProgress,
BITMAP_STATE_COMPLETED => PrefixState::Completed,
BITMAP_STATE_FAILED => PrefixState::Failed,
_ => unreachable!("Invalid state bits: {}", state_bits),
};
let prefix = index_to_prefix(index, prefix_len);
states.insert(prefix, state);
}
}
states
}
impl MknPhase {
pub fn to_ordinal(&self) -> u64 {
match self {
MknPhase::NotStarted => 0,
MknPhase::Pass1InProgress { current_order } => 1 + (*current_order as u64) * 10,
MknPhase::Pass1Complete => 100,
MknPhase::Pass2InProgress { current_order } => 101 + (*current_order as u64) * 10,
MknPhase::Complete => 200,
}
}
pub fn from_ordinal(ordinal: u64) -> Self {
match ordinal {
0 => MknPhase::NotStarted,
100 => MknPhase::Pass1Complete,
200 => MknPhase::Complete,
n if n >= 1 && n < 100 => MknPhase::Pass1InProgress {
current_order: ((n - 1) / 10) as u8,
},
n if n >= 101 && n < 200 => MknPhase::Pass2InProgress {
current_order: ((n - 101) / 10) as u8,
},
_ => MknPhase::NotStarted,
}
}
}
impl ImportCheckpoint {
pub fn save_to_trie<T>(&self, trie: &mut T) -> Result<usize, CheckpointError>
where
T: TrieCheckpointStorage,
{
let mut keys_written = 0;
trie.store_checkpoint_u64(CHECKPOINT_VERSION_KEY, Self::CURRENT_VERSION as u64)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_MKN_PHASE_KEY, self.mkn_phase.to_ordinal())
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_BYTE_OFFSET_KEY, self.byte_offset)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
let timestamp_secs = self.timestamp.timestamp() as u64;
trie.store_checkpoint_u64(CHECKPOINT_TIMESTAMP_KEY, timestamp_secs)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_NGRAMS_PROCESSED_KEY, self.stats.ngrams_processed)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_UNIQUE_NGRAMS_KEY, self.stats.unique_ngrams)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(
CHECKPOINT_FILES_PROCESSED_KEY,
self.stats.files_processed as u64,
)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_BYTES_DOWNLOADED_KEY, self.stats.bytes_downloaded)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
trie.store_checkpoint_u64(CHECKPOINT_ELAPSED_KEY, self.stats.elapsed_seconds)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
for (idx, &count) in self.stats.ngrams_by_order.iter().enumerate() {
let key = format!("{}{}", CHECKPOINT_NGRAMS_BY_ORDER_PREFIX, idx + 1);
trie.store_checkpoint_u64(&key, count)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
}
for (order, progress) in &self.order_progress {
if progress.is_complete {
let key = format!("{}{}", CHECKPOINT_ORDER_COMPLETE_PREFIX, order);
trie.store_checkpoint_u64(&key, 1)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
}
let ngrams_key = format!("{}{}", CHECKPOINT_ORDER_NGRAMS_PREFIX, order);
trie.store_checkpoint_u64(&ngrams_key, progress.ngrams_processed)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
let prefix_len = prefix_len_for_order(*order);
let chunks = pack_states(&progress.prefix_states, prefix_len);
for (chunk_idx, &chunk_value) in chunks.iter().enumerate() {
if chunk_value != 0 {
let key = format!("{}{}:{}", CHECKPOINT_BITMAP_PREFIX, order, chunk_idx);
trie.store_checkpoint_u64(&key, chunk_value)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
keys_written += 1;
}
}
}
Ok(keys_written)
}
pub fn load_from_trie<T>(trie: &T) -> Result<Option<Self>, CheckpointError>
where
T: TrieCheckpointStorage,
{
let version = match trie.load_checkpoint_u64(CHECKPOINT_VERSION_KEY) {
Ok(Some(v)) => v as u32,
Ok(None) => return Ok(None),
Err(e) => return Err(CheckpointError::Trie(e.to_string())),
};
if version > Self::CURRENT_VERSION {
return Err(CheckpointError::UnsupportedVersion {
found: version,
max: Self::CURRENT_VERSION,
});
}
let mkn_phase_ordinal = trie
.load_checkpoint_u64(CHECKPOINT_MKN_PHASE_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let mkn_phase = MknPhase::from_ordinal(mkn_phase_ordinal);
let byte_offset = trie
.load_checkpoint_u64(CHECKPOINT_BYTE_OFFSET_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let timestamp_secs = trie
.load_checkpoint_u64(CHECKPOINT_TIMESTAMP_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let timestamp = DateTime::from_timestamp(timestamp_secs as i64, 0).unwrap_or_else(Utc::now);
let ngrams_processed = trie
.load_checkpoint_u64(CHECKPOINT_NGRAMS_PROCESSED_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let unique_ngrams = trie
.load_checkpoint_u64(CHECKPOINT_UNIQUE_NGRAMS_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let files_processed = trie
.load_checkpoint_u64(CHECKPOINT_FILES_PROCESSED_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0) as u32;
let bytes_downloaded = trie
.load_checkpoint_u64(CHECKPOINT_BYTES_DOWNLOADED_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let elapsed_seconds = trie
.load_checkpoint_u64(CHECKPOINT_ELAPSED_KEY)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(0);
let mut ngrams_by_order = [0u64; 5];
for order in 1..=5u8 {
let key = format!("{}{}", CHECKPOINT_NGRAMS_BY_ORDER_PREFIX, order);
if let Ok(Some(count)) = trie.load_checkpoint_u64(&key) {
ngrams_by_order[order as usize - 1] = count;
}
}
let stats = CheckpointStats {
ngrams_processed,
unique_ngrams,
ngrams_by_order,
bytes_downloaded,
files_processed,
elapsed_seconds,
};
let order_progress = if version >= 3 {
Self::load_order_progress_v3(trie, &ngrams_by_order)?
} else {
log::info!("Loading v2 checkpoint (key-per-prefix format), will migrate to v3 on save");
Self::load_order_progress_v2(trie, &ngrams_by_order)?
};
Ok(Some(Self {
version: Self::CURRENT_VERSION, order_progress,
current_prefix: None, byte_offset,
mkn_phase,
stats,
timestamp,
}))
}
fn load_order_progress_v3<T>(
trie: &T,
ngrams_by_order: &[u64; 5],
) -> Result<HashMap<u8, OrderProgress>, CheckpointError>
where
T: TrieCheckpointStorage,
{
let mut order_progress = HashMap::new();
for order in 1..=5u8 {
let complete_key = format!("{}{}", CHECKPOINT_ORDER_COMPLETE_PREFIX, order);
let is_complete = trie
.load_checkpoint_u64(&complete_key)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.map(|v| v == 1)
.unwrap_or(false);
let ngrams_key = format!("{}{}", CHECKPOINT_ORDER_NGRAMS_PREFIX, order);
let order_ngrams = trie
.load_checkpoint_u64(&ngrams_key)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.unwrap_or(ngrams_by_order[order as usize - 1]);
let prefix_len = prefix_len_for_order(order);
let num_chunks = num_chunks_for_prefix_len(prefix_len);
let mut chunks = vec![0u64; num_chunks];
let mut has_any_chunks = false;
for chunk_idx in 0..num_chunks {
let key = format!("{}{}:{}", CHECKPOINT_BITMAP_PREFIX, order, chunk_idx);
if let Ok(Some(chunk_value)) = trie.load_checkpoint_u64(&key) {
chunks[chunk_idx] = chunk_value;
if chunk_value != 0 {
has_any_chunks = true;
}
}
}
let prefix_states = if has_any_chunks {
unpack_states(&chunks, prefix_len)
} else {
HashMap::new()
};
if is_complete || !prefix_states.is_empty() {
order_progress.insert(
order,
OrderProgress {
prefix_states,
is_complete,
ngrams_processed: order_ngrams,
},
);
}
}
Ok(order_progress)
}
fn load_order_progress_v2<T>(
trie: &T,
ngrams_by_order: &[u64; 5],
) -> Result<HashMap<u8, OrderProgress>, CheckpointError>
where
T: TrieCheckpointStorage,
{
let mut order_progress = HashMap::new();
for order in 1..=5u8 {
let complete_key = format!("{}{}", CHECKPOINT_ORDER_COMPLETE_PREFIX, order);
let is_complete = trie
.load_checkpoint_u64(&complete_key)
.map_err(|e| CheckpointError::Trie(e.to_string()))?
.map(|v| v == 1)
.unwrap_or(false);
let prefix_key_prefix = format!("{}{}:", CHECKPOINT_PREFIX_KEY_PREFIX, order);
let prefix_entries = trie
.iter_checkpoint_prefix(&prefix_key_prefix)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
let mut prefix_states = HashMap::new();
for (key, status_code) in prefix_entries {
if let Some(prefix) = key.strip_prefix(&prefix_key_prefix) {
match PrefixStatusCode::from_u64(status_code) {
Some(PrefixStatusCode::Completed) => {
prefix_states.insert(prefix.to_string(), PrefixState::Completed);
}
Some(PrefixStatusCode::InProgress) => {
prefix_states.insert(prefix.to_string(), PrefixState::InProgress);
}
Some(PrefixStatusCode::Failed) => {
prefix_states.insert(prefix.to_string(), PrefixState::Failed);
}
None => {}
}
}
}
if is_complete || !prefix_states.is_empty() {
order_progress.insert(
order,
OrderProgress {
prefix_states,
is_complete,
ngrams_processed: ngrams_by_order[order as usize - 1],
},
);
}
}
Ok(order_progress)
}
pub fn exists_in_trie<T>(trie: &T) -> bool
where
T: TrieCheckpointStorage,
{
trie.load_checkpoint_u64(CHECKPOINT_VERSION_KEY)
.map(|opt| opt.is_some())
.unwrap_or(false)
}
pub fn delete_from_trie<T>(trie: &mut T) -> Result<usize, CheckpointError>
where
T: TrieCheckpointStorage,
{
trie.delete_checkpoint_prefix(CHECKPOINT_KEY_PREFIX)
.map_err(|e| CheckpointError::Trie(e.to_string()))
}
pub fn save_prefix_status_to_trie<T>(
&self,
trie: &mut T,
order: u8,
prefix: &str,
status: PrefixStatusCode,
) -> Result<(), CheckpointError>
where
T: TrieCheckpointStorage,
{
let key = format!("{}{}:{}", CHECKPOINT_PREFIX_KEY_PREFIX, order, prefix);
trie.store_checkpoint_u64(&key, status as u64)
.map_err(|e| CheckpointError::Trie(e.to_string()))?;
Ok(())
}
pub fn remove_prefix_status_from_trie<T>(
trie: &mut T,
order: u8,
prefix: &str,
) -> Result<bool, CheckpointError>
where
T: TrieCheckpointStorage,
{
let key = format!("{}{}:{}", CHECKPOINT_PREFIX_KEY_PREFIX, order, prefix);
trie.delete_checkpoint_key(&key)
.map_err(|e| CheckpointError::Trie(e.to_string()))
}
}
pub trait TrieCheckpointStorage {
type Error: std::error::Error;
fn store_checkpoint_u64(&mut self, key: &str, value: u64) -> Result<(), Self::Error>;
fn load_checkpoint_u64(&self, key: &str) -> Result<Option<u64>, Self::Error>;
fn delete_checkpoint_key(&mut self, key: &str) -> Result<bool, Self::Error>;
fn delete_checkpoint_prefix(&mut self, prefix: &str) -> Result<usize, Self::Error>;
fn iter_checkpoint_prefix(&self, prefix: &str) -> Result<Vec<(String, u64)>, Self::Error>;
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_checkpoint_new() {
let cp = ImportCheckpoint::new();
assert_eq!(cp.version, ImportCheckpoint::CURRENT_VERSION);
assert!(cp.order_progress.is_empty());
}
#[test]
fn test_checkpoint_save_load() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.checkpoint.json");
let mut cp = ImportCheckpoint::new();
cp.complete_order(1)
.expect("no in-progress prefixes for order 1"); cp.complete_prefix(2, "aa"); cp.add_ngrams(2, 12345);
cp.save(&path).unwrap();
assert!(path.exists());
let loaded = ImportCheckpoint::load(&path).unwrap();
assert!(loaded.is_order_complete(1));
assert!(!loaded.is_order_complete(2));
assert!(!loaded.needs_prefix(2, "aa")); assert!(loaded.needs_prefix(2, "ab")); assert_eq!(loaded.stats.ngrams_processed, 12345);
}
#[test]
fn test_needs_prefix() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(2, "aa");
cp.complete_prefix(2, "ab");
assert!(cp.needs_prefix(1, "a"));
assert!(cp.needs_prefix(3, "aaa"));
assert!(!cp.needs_prefix(2, "aa"));
assert!(!cp.needs_prefix(2, "ab"));
assert!(cp.needs_prefix(2, "ac"));
cp.complete_order(2)
.expect("no in-progress prefixes for order 2");
assert!(!cp.needs_prefix(2, "ac"));
assert!(!cp.needs_prefix(2, "zz"));
}
#[test]
fn test_complete_prefix() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(2, "aa");
assert_eq!(cp.completed_prefix_count(2), 1);
assert!(!cp.needs_prefix(2, "aa"));
assert_eq!(cp.stats.files_processed, 1);
}
#[test]
fn test_complete_order() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(1, "a");
cp.complete_prefix(1, "b");
cp.complete_order(1)
.expect("no in-progress prefixes for order 1");
assert!(cp.is_order_complete(1));
assert_eq!(cp.completed_prefix_count(1), 2);
}
#[test]
fn test_complete_order_with_in_progress_fails() {
let mut cp = ImportCheckpoint::new();
cp.start_prefix(1, "a");
let result = cp.complete_order(1);
assert!(result.is_err());
match result {
Err(CheckpointError::OrderHasInProgressPrefixes { order, count }) => {
assert_eq!(order, 1);
assert_eq!(count, 1);
}
_ => panic!("Expected OrderHasInProgressPrefixes error"),
}
cp.complete_prefix(1, "a");
cp.complete_order(1).expect("all prefixes completed");
assert!(cp.is_order_complete(1));
}
#[test]
fn test_overlapping_orders() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(1, "a");
cp.complete_prefix(1, "b");
cp.complete_prefix(2, "aa");
assert!(!cp.is_order_complete(1));
assert!(!cp.is_order_complete(2));
assert!(!cp.needs_prefix(1, "a"));
assert!(!cp.needs_prefix(2, "aa"));
assert!(cp.needs_prefix(1, "c"));
assert!(cp.needs_prefix(2, "ab"));
cp.complete_order(1)
.expect("no in-progress prefixes for order 1");
assert!(cp.is_order_complete(1));
assert!(!cp.is_order_complete(2));
assert_eq!(cp.orders_in_progress(), vec![2]);
assert_eq!(cp.completed_orders(), vec![1]);
}
#[test]
fn test_v1_migration() {
let v1_json = r#"{
"version": 1,
"completed_orders": [1],
"current_order": 2,
"completed_prefixes": ["aa", "ab"],
"current_prefix": null,
"byte_offset": 0,
"mkn_phase": "NotStarted",
"stats": {
"ngrams_processed": 12345,
"ngrams_by_order": [0, 0, 0, 0, 0],
"bytes_downloaded": 0,
"files_processed": 3,
"elapsed_seconds": 100
},
"timestamp": "2024-01-01T00:00:00Z"
}"#;
let dir = tempdir().unwrap();
let path = dir.path().join("v1.checkpoint.json");
std::fs::write(&path, v1_json).unwrap();
let loaded = ImportCheckpoint::load(&path).unwrap();
assert_eq!(loaded.version, ImportCheckpoint::CURRENT_VERSION);
assert!(loaded.is_order_complete(1));
assert!(!loaded.is_order_complete(2));
assert!(!loaded.needs_prefix(2, "aa"));
assert!(!loaded.needs_prefix(2, "ab"));
assert!(loaded.needs_prefix(2, "ac"));
assert_eq!(loaded.stats.ngrams_processed, 12345);
assert_eq!(loaded.stats.files_processed, 3);
}
#[test]
fn test_prefix_lifecycle() {
let mut cp = ImportCheckpoint::new();
cp.start_prefix(2, "aa");
assert!(cp.is_in_progress(2, "aa"));
assert!(!cp.needs_prefix(2, "aa")); assert_eq!(cp.in_progress_prefixes(2), vec!["aa".to_string()]);
cp.complete_prefix(2, "aa");
assert!(!cp.is_in_progress(2, "aa"));
assert!(!cp.needs_prefix(2, "aa")); assert!(cp.in_progress_prefixes(2).is_empty());
assert_eq!(cp.completed_prefix_count(2), 1);
}
#[test]
fn test_prefix_failure() {
let mut cp = ImportCheckpoint::new();
cp.start_prefix(2, "aa");
assert!(cp.is_in_progress(2, "aa"));
cp.fail_prefix(2, "aa");
assert!(!cp.is_in_progress(2, "aa"));
assert!(cp.is_failed_prefix(2, "aa"));
assert_eq!(cp.failed_prefix_count(2), 1);
assert!(cp.needs_prefix(2, "aa"));
}
#[test]
fn test_recover_in_progress() {
let mut cp = ImportCheckpoint::new();
cp.start_prefix(2, "aa");
cp.start_prefix(2, "ab");
assert_eq!(cp.in_progress_prefixes(2).len(), 2);
cp.recover_in_progress_as_failed(2);
assert!(cp.in_progress_prefixes(2).is_empty());
assert_eq!(cp.failed_prefix_count(2), 2);
assert!(cp.is_failed_prefix(2, "aa"));
assert!(cp.is_failed_prefix(2, "ab"));
}
#[test]
fn test_clear_failed() {
let mut cp = ImportCheckpoint::new();
cp.start_prefix(2, "aa");
cp.fail_prefix(2, "aa");
assert!(cp.is_failed_prefix(2, "aa"));
cp.clear_failed(2, "aa");
assert!(!cp.is_failed_prefix(2, "aa"));
assert!(cp.needs_prefix(2, "aa")); }
#[test]
fn test_start_prefix_clears_other_states() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(2, "aa");
assert!(!cp.needs_prefix(2, "aa"));
cp.start_prefix(2, "aa");
assert!(cp.is_in_progress(2, "aa"));
let progress = cp.order_progress.get(&2).unwrap();
assert_eq!(progress.get_state("aa"), Some(PrefixState::InProgress));
assert_eq!(progress.count_state(PrefixState::Completed), 0);
}
#[test]
fn test_failed_prefix_save_load() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.checkpoint.json");
let mut cp = ImportCheckpoint::new();
cp.start_prefix(2, "aa");
cp.fail_prefix(2, "aa");
cp.start_prefix(2, "ab");
cp.save(&path).unwrap();
let loaded = ImportCheckpoint::load(&path).unwrap();
assert!(loaded.is_failed_prefix(2, "aa"));
assert!(loaded.is_in_progress(2, "ab"));
assert_eq!(loaded.failed_prefix_count(2), 1);
}
#[test]
fn test_progress_summary_with_failures() {
let mut cp = ImportCheckpoint::new();
cp.complete_prefix(2, "aa");
cp.fail_prefix(2, "ab");
let summary = cp.progress_summary();
assert!(summary.contains("Failed: 1"));
}
#[test]
fn test_prefix_to_index_single_char() {
assert_eq!(prefix_to_index("a"), Some(0));
assert_eq!(prefix_to_index("b"), Some(1));
assert_eq!(prefix_to_index("m"), Some(12));
assert_eq!(prefix_to_index("z"), Some(25));
assert_eq!(prefix_to_index("A"), None); assert_eq!(prefix_to_index("1"), None); assert_eq!(prefix_to_index(""), None); }
#[test]
fn test_prefix_to_index_two_char() {
assert_eq!(prefix_to_index("aa"), Some(0));
assert_eq!(prefix_to_index("ab"), Some(1));
assert_eq!(prefix_to_index("az"), Some(25));
assert_eq!(prefix_to_index("ba"), Some(26));
assert_eq!(prefix_to_index("bb"), Some(27));
assert_eq!(prefix_to_index("zz"), Some(675));
assert_eq!(prefix_to_index("th"), Some(501));
assert_eq!(prefix_to_index("AA"), None);
assert_eq!(prefix_to_index("a1"), None);
assert_eq!(prefix_to_index("abc"), None); }
#[test]
fn test_prefix_to_index_exhaustive_single_char() {
for (i, c) in ('a'..='z').enumerate() {
let prefix = c.to_string();
assert_eq!(prefix_to_index(&prefix), Some(i as u16));
}
}
#[test]
fn test_prefix_to_index_exhaustive_two_char() {
let mut expected_index = 0u16;
for c1 in 'a'..='z' {
for c2 in 'a'..='z' {
let prefix = format!("{}{}", c1, c2);
assert_eq!(
prefix_to_index(&prefix),
Some(expected_index),
"prefix '{}' should map to index {}",
prefix,
expected_index
);
expected_index += 1;
}
}
assert_eq!(expected_index, 676);
}
#[test]
fn test_index_to_prefix_single_char() {
assert_eq!(index_to_prefix(0, PrefixLen::One), "a");
assert_eq!(index_to_prefix(1, PrefixLen::One), "b");
assert_eq!(index_to_prefix(12, PrefixLen::One), "m");
assert_eq!(index_to_prefix(25, PrefixLen::One), "z");
}
#[test]
fn test_index_to_prefix_two_char() {
assert_eq!(index_to_prefix(0, PrefixLen::Two), "aa");
assert_eq!(index_to_prefix(1, PrefixLen::Two), "ab");
assert_eq!(index_to_prefix(25, PrefixLen::Two), "az");
assert_eq!(index_to_prefix(26, PrefixLen::Two), "ba");
assert_eq!(index_to_prefix(501, PrefixLen::Two), "th");
assert_eq!(index_to_prefix(675, PrefixLen::Two), "zz");
}
#[test]
fn test_index_prefix_roundtrip_single_char() {
for i in 0..26u16 {
let prefix = index_to_prefix(i, PrefixLen::One);
assert_eq!(prefix_to_index(&prefix), Some(i));
}
}
#[test]
fn test_index_prefix_roundtrip_two_char() {
for i in 0..676u16 {
let prefix = index_to_prefix(i, PrefixLen::Two);
assert_eq!(prefix_to_index(&prefix), Some(i));
}
}
#[test]
fn test_pack_states_empty() {
let states: HashMap<String, PrefixState> = HashMap::new();
let chunks = pack_states(&states, PrefixLen::One);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], 0);
let chunks = pack_states(&states, PrefixLen::Two);
assert_eq!(chunks.len(), 22);
assert!(chunks.iter().all(|&c| c == 0));
}
#[test]
fn test_pack_states_single_prefix() {
let mut states = HashMap::new();
states.insert("aa".to_string(), PrefixState::Completed);
let chunks = pack_states(&states, PrefixLen::Two);
assert_eq!(chunks[0], 0b10);
}
#[test]
fn test_pack_states_all_state_types() {
let mut states = HashMap::new();
states.insert("aa".to_string(), PrefixState::InProgress); states.insert("ab".to_string(), PrefixState::Completed); states.insert("ac".to_string(), PrefixState::Failed);
let chunks = pack_states(&states, PrefixLen::Two);
let expected = 0b01 | (0b10 << 2) | (0b11 << 4);
assert_eq!(chunks[0], expected);
}
#[test]
fn test_pack_unpack_roundtrip_sparse() {
let mut states = HashMap::new();
states.insert("aa".to_string(), PrefixState::Completed);
states.insert("th".to_string(), PrefixState::InProgress);
states.insert("zz".to_string(), PrefixState::Failed);
let chunks = pack_states(&states, PrefixLen::Two);
let unpacked = unpack_states(&chunks, PrefixLen::Two);
assert_eq!(unpacked.len(), 3);
assert_eq!(unpacked.get("aa"), Some(&PrefixState::Completed));
assert_eq!(unpacked.get("th"), Some(&PrefixState::InProgress));
assert_eq!(unpacked.get("zz"), Some(&PrefixState::Failed));
}
#[test]
fn test_pack_unpack_roundtrip_full() {
let mut states = HashMap::new();
for c1 in 'a'..='z' {
for c2 in 'a'..='z' {
states.insert(format!("{}{}", c1, c2), PrefixState::Completed);
}
}
let chunks = pack_states(&states, PrefixLen::Two);
let unpacked = unpack_states(&chunks, PrefixLen::Two);
assert_eq!(unpacked.len(), 676);
for (prefix, state) in &unpacked {
assert_eq!(
*state,
PrefixState::Completed,
"prefix '{}' should be Completed",
prefix
);
}
}
#[test]
fn test_pack_unpack_roundtrip_mixed_states() {
let mut states = HashMap::new();
for i in 0..676u16 {
let prefix = index_to_prefix(i, PrefixLen::Two);
let state = match i % 3 {
0 => PrefixState::Completed,
1 => PrefixState::InProgress,
2 => PrefixState::Failed,
_ => unreachable!(),
};
states.insert(prefix, state);
}
let chunks = pack_states(&states, PrefixLen::Two);
let unpacked = unpack_states(&chunks, PrefixLen::Two);
assert_eq!(unpacked.len(), 676);
for i in 0..676u16 {
let prefix = index_to_prefix(i, PrefixLen::Two);
let expected = match i % 3 {
0 => PrefixState::Completed,
1 => PrefixState::InProgress,
2 => PrefixState::Failed,
_ => unreachable!(),
};
assert_eq!(
unpacked.get(&prefix),
Some(&expected),
"prefix '{}' (index {}) state mismatch",
prefix,
i
);
}
}
#[test]
fn test_unpack_states_not_started_excluded() {
let chunks = vec![0u64; 22];
let unpacked = unpack_states(&chunks, PrefixLen::Two);
assert!(unpacked.is_empty());
}
#[test]
fn test_bitmap_chunk_boundaries() {
let mut states = HashMap::new();
let boundary_indices = [0u16, 31, 32, 63, 64, 95, 96, 671, 672, 675];
for &idx in &boundary_indices {
let prefix = index_to_prefix(idx, PrefixLen::Two);
states.insert(prefix, PrefixState::Completed);
}
let chunks = pack_states(&states, PrefixLen::Two);
let unpacked = unpack_states(&chunks, PrefixLen::Two);
assert_eq!(unpacked.len(), boundary_indices.len());
for &idx in &boundary_indices {
let prefix = index_to_prefix(idx, PrefixLen::Two);
assert_eq!(
unpacked.get(&prefix),
Some(&PrefixState::Completed),
"boundary prefix '{}' (index {}) should be present",
prefix,
idx
);
}
}
#[test]
fn test_prefix_len_for_order() {
assert_eq!(prefix_len_for_order(1), PrefixLen::One);
assert_eq!(prefix_len_for_order(2), PrefixLen::Two);
assert_eq!(prefix_len_for_order(3), PrefixLen::Two);
assert_eq!(prefix_len_for_order(4), PrefixLen::Two);
assert_eq!(prefix_len_for_order(5), PrefixLen::Two);
}
#[test]
fn test_num_chunks_for_prefix_len() {
assert_eq!(num_chunks_for_prefix_len(PrefixLen::One), 1);
assert_eq!(num_chunks_for_prefix_len(PrefixLen::Two), 22);
}
#[test]
fn test_bitmap_storage_size() {
let num_chunks = num_chunks_for_prefix_len(PrefixLen::Two);
let v3_bytes = num_chunks * 8;
assert_eq!(num_chunks, 22);
assert_eq!(v3_bytes, 176);
}
}