use crate::CompressionType;
use crate::fs::{Fs, FsOpenOptions, StdFs};
use crate::table::meta::ParsedMeta;
use crate::table::regions::ParsedRegions;
use std::path::Path;
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct TableProperties {
pub id: u64,
pub file_size: u64,
pub min_key: Vec<u8>,
pub max_key: Vec<u8>,
pub item_count: u64,
pub tombstone_count: u64,
pub weak_tombstone_count: u64,
pub weak_tombstone_reclaimable: u64,
pub data_block_count: u64,
pub index_block_count: u64,
pub data_block_compression: CompressionType,
pub index_block_compression: CompressionType,
pub created_at_nanos: u128,
pub page_ecc: bool,
pub ecc_unrecognized: bool,
}
#[cfg(feature = "std")]
pub fn read_table_properties(path: &Path) -> crate::Result<TableProperties> {
let fs = StdFs;
let mut file = fs.open(path, &FsOpenOptions::new().read(true))?;
let sfa_reader = crate::sfa::Reader::from_reader(&mut file)?;
let toc = sfa_reader.toc();
let regions = ParsedRegions::parse_from_toc(toc)?;
let meta = match ParsedMeta::load_with_handle(&*file, ®ions.metadata, None, None) {
Ok(m) => m,
Err(tail_err) => {
if let Some(mid_handle) = regions.metadata_mid {
match ParsedMeta::load_with_handle(&*file, &mid_handle, None, None) {
Ok(mid) => mid,
Err(_) => return Err(tail_err),
}
} else {
return Err(tail_err);
}
}
};
Ok(TableProperties {
id: meta.id,
file_size: meta.file_size,
min_key: meta.key_range.min().to_vec(),
max_key: meta.key_range.max().to_vec(),
item_count: meta.item_count,
tombstone_count: meta.tombstone_count,
weak_tombstone_count: meta.weak_tombstone_count,
weak_tombstone_reclaimable: meta.weak_tombstone_reclaimable,
data_block_count: meta.data_block_count,
index_block_count: meta.index_block_count,
data_block_compression: meta.data_block_compression,
index_block_compression: meta.index_block_compression,
created_at_nanos: *meta.created_at,
page_ecc: meta.page_ecc,
ecc_unrecognized: meta.ecc_unrecognized,
})
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct IndexEntry {
pub end_key: Vec<u8>,
pub seqno: u64,
pub offset: u64,
pub size: u32,
}
#[cfg(feature = "std")]
pub fn read_top_level_index_entries(path: &Path) -> crate::Result<Vec<IndexEntry>> {
use crate::table::block_index::iter::OwnedIndexBlockIter;
use crate::table::{IndexBlock, KeyedBlockHandle};
let fs = StdFs;
let mut file = fs.open(path, &FsOpenOptions::new().read(true))?;
let sfa_reader = crate::sfa::Reader::from_reader(&mut file)?;
let toc = sfa_reader.toc();
let regions = ParsedRegions::parse_from_toc(toc)?;
let meta = match ParsedMeta::load_with_handle(&*file, ®ions.metadata, None, None) {
Ok(m) => m,
Err(tail_err) => {
if let Some(mid_handle) = regions.metadata_mid {
match ParsedMeta::load_with_handle(&*file, &mid_handle, None, None) {
Ok(mid) => mid,
Err(_) => return Err(tail_err),
}
} else {
return Err(tail_err);
}
}
};
let index_compression = meta.index_block_compression;
let ecc = meta.ecc_params;
let table_id = meta.id;
let tli_block = if let Some(tail_handle) = regions.tli_tail {
match load_index_block(&*file, tail_handle, table_id, index_compression, ecc) {
Ok(b) => b,
Err(tail_err) => {
match load_index_block(&*file, regions.tli, table_id, index_compression, ecc) {
Ok(b) => b,
Err(_) => return Err(tail_err),
}
}
}
} else {
load_index_block(&*file, regions.tli, table_id, index_compression, ecc)?
};
let block = IndexBlock::new(tli_block);
let iter = OwnedIndexBlockIter::from_block(block, crate::comparator::default_comparator())?;
let entries = iter
.map(|h: KeyedBlockHandle| IndexEntry {
end_key: h.end_key().to_vec(),
seqno: h.seqno(),
offset: *h.offset(),
size: h.size(),
})
.collect();
Ok(entries)
}
#[cfg(feature = "std")]
fn load_index_block(
file: &dyn crate::fs::FsFile,
handle: crate::table::BlockHandle,
table_id: crate::table::TableId,
compression: CompressionType,
ecc: Option<crate::table::block::EccParams>,
) -> crate::Result<crate::table::Block> {
use crate::table::block::{Block, BlockIdentity, BlockType};
let block = Block::from_file(
file,
handle,
BlockIdentity {
table_id,
block_type: BlockType::Index,
dict_id: 0,
window_log: 0,
},
&{
let t = crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
None,
)?;
if let Some(ecc) = ecc {
t.with_ecc(ecc)
} else {
t
}
},
)?;
if block.header.block_type != BlockType::Index {
return Err(crate::Error::InvalidTag((
"BlockType",
block.header.block_type.into(),
)));
}
Ok(block)
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct DataEntry {
pub key: Vec<u8>,
pub value: Vec<u8>,
pub seqno: u64,
pub value_type: crate::ValueType,
}
impl DataEntry {
#[must_use]
pub fn is_tombstone(&self) -> bool {
self.value_type.is_tombstone()
}
}
#[cfg(feature = "std")]
pub struct DataBlockEntryIter {
file: Box<dyn crate::fs::FsFile>,
table_id: crate::table::TableId,
data_block_compression: CompressionType,
ecc: Option<crate::table::block::EccParams>,
has_kv_footer: bool,
remaining_handles: Vec<crate::table::BlockHandle>,
current: Option<crate::table::iter::OwnedDataBlockIter>,
keys_only: bool,
}
#[cfg(feature = "std")]
impl DataBlockEntryIter {
#[must_use]
pub const fn keys_only(mut self) -> Self {
self.keys_only = true;
self
}
}
#[cfg(feature = "std")]
impl Iterator for DataBlockEntryIter {
type Item = crate::Result<DataEntry>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(iter) = self.current.as_mut() {
if let Some(internal) = iter.next() {
let value = if self.keys_only {
Vec::new()
} else {
internal.value.to_vec()
};
let entry = DataEntry {
key: internal.key.user_key.to_vec(),
value,
seqno: internal.key.seqno,
value_type: internal.key.value_type,
};
return Some(Ok(entry));
}
self.current = None;
}
let handle = self.remaining_handles.pop()?;
match load_data_block_iter(
&*self.file,
&handle,
self.table_id,
self.data_block_compression,
self.ecc,
self.has_kv_footer,
) {
Ok(iter) => {
self.current = Some(iter);
}
Err(e) => {
self.remaining_handles.clear();
return Some(Err(e));
}
}
}
}
}
#[cfg(feature = "std")]
pub fn iter_data_block_entries(path: &Path) -> crate::Result<DataBlockEntryIter> {
use crate::table::IndexBlock;
use crate::table::block_index::iter::OwnedIndexBlockIter;
let fs = StdFs;
let mut file = fs.open(path, &FsOpenOptions::new().read(true))?;
let sfa_reader = crate::sfa::Reader::from_reader(&mut file)?;
let toc = sfa_reader.toc();
let regions = ParsedRegions::parse_from_toc(toc)?;
if regions.index.is_some() {
return Err(crate::Error::Io(std::io::Error::new(
std::io::ErrorKind::Unsupported,
"partitioned-index SST (separate `index` section present) is not yet supported \
by iter_data_block_entries; walking sub-index leaves to enumerate data blocks \
is a follow-up surface",
)));
}
let meta = match ParsedMeta::load_with_handle(&*file, ®ions.metadata, None, None) {
Ok(m) => m,
Err(tail_err) => {
if let Some(mid_handle) = regions.metadata_mid {
match ParsedMeta::load_with_handle(&*file, &mid_handle, None, None) {
Ok(mid) => mid,
Err(_) => return Err(tail_err),
}
} else {
return Err(tail_err);
}
}
};
let table_id = meta.id;
let data_block_compression = meta.data_block_compression;
let index_compression = meta.index_block_compression;
let ecc = meta.ecc_params;
let tli_block = if let Some(tail_handle) = regions.tli_tail {
match load_index_block(&*file, tail_handle, table_id, index_compression, ecc) {
Ok(b) => b,
Err(tail_err) => {
match load_index_block(&*file, regions.tli, table_id, index_compression, ecc) {
Ok(b) => b,
Err(_) => return Err(tail_err),
}
}
}
} else {
load_index_block(&*file, regions.tli, table_id, index_compression, ecc)?
};
let block = IndexBlock::new(tli_block);
let iter = OwnedIndexBlockIter::from_block(block, crate::comparator::default_comparator())?;
let mut handles: Vec<crate::table::BlockHandle> = iter
.map(crate::table::KeyedBlockHandle::into_inner)
.collect();
handles.reverse();
Ok(DataBlockEntryIter {
file,
table_id,
data_block_compression,
ecc: meta.ecc_params,
has_kv_footer: meta.kv_checksum_algo.is_some(),
remaining_handles: handles,
current: None,
keys_only: false,
})
}
#[cfg(feature = "std")]
fn load_data_block_iter(
file: &dyn crate::fs::FsFile,
handle: &crate::table::BlockHandle,
table_id: crate::table::TableId,
compression: CompressionType,
ecc: Option<crate::table::block::EccParams>,
has_kv_footer: bool,
) -> crate::Result<crate::table::iter::OwnedDataBlockIter> {
use crate::table::DataBlock;
use crate::table::block::{Block, BlockIdentity, BlockType};
use crate::table::iter::OwnedDataBlockIter;
let block = Block::from_file(
file,
*handle,
BlockIdentity {
table_id,
block_type: BlockType::Data,
dict_id: 0,
window_log: 0,
},
&{
let t = crate::table::block::BlockTransform::from_parts(
compression,
None,
#[cfg(zstd_any)]
None,
)?;
if let Some(ecc) = ecc {
t.with_ecc(ecc)
} else {
t
}
},
)?;
if block.header.block_type != BlockType::Data {
return Err(crate::Error::InvalidTag((
"BlockType",
block.header.block_type.into(),
)));
}
let data_block = DataBlock::from_loaded(block, has_kv_footer)?;
OwnedDataBlockIter::try_new(data_block, |b| {
b.try_iter(crate::comparator::default_comparator())
})
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct FilterStats {
pub filter_section_bytes: u64,
pub layer_count: u64,
pub item_count: u64,
pub bits_per_key: f64,
}
#[cfg(feature = "std")]
pub fn read_filter_stats(path: &Path) -> crate::Result<Option<FilterStats>> {
use crate::table::block::{Block, BlockIdentity, BlockType};
use crate::table::filter::ribbon::burr::BurrFilterReader;
let fs = StdFs;
let mut file = fs.open(path, &FsOpenOptions::new().read(true))?;
let sfa_reader = crate::sfa::Reader::from_reader(&mut file)?;
let toc = sfa_reader.toc();
let regions = ParsedRegions::parse_from_toc(toc)?;
if regions.filter_tli.is_some() {
return Err(crate::Error::FeatureUnsupported("filter_tli"));
}
let Some(filter_handle) = regions.filter else {
return Ok(None);
};
let filter_section_bytes = u64::from(filter_handle.size());
let meta = match ParsedMeta::load_with_handle(&*file, ®ions.metadata, None, None) {
Ok(m) => m,
Err(tail_err) => {
if let Some(mid_handle) = regions.metadata_mid {
match ParsedMeta::load_with_handle(&*file, &mid_handle, None, None) {
Ok(mid) => mid,
Err(_) => return Err(tail_err),
}
} else {
return Err(tail_err);
}
}
};
let item_count = meta.item_count;
let table_id = meta.id;
let block = Block::from_file(
&*file,
filter_handle,
BlockIdentity {
table_id,
block_type: BlockType::Filter,
dict_id: 0,
window_log: 0,
},
&{
let t = crate::table::block::BlockTransform::PLAIN;
if let Some(ecc) = meta.ecc_params {
t.with_ecc(ecc)
} else {
t
}
},
)?;
if block.header.block_type != BlockType::Filter {
return Err(crate::Error::InvalidTag((
"BlockType",
block.header.block_type.into(),
)));
}
if block.data.is_empty() {
return Ok(None);
}
let layer_count: u64 = BurrFilterReader::new(&block.data)?.layer_count() as u64;
#[expect(
clippy::cast_precision_loss,
reason = "filter stats are diagnostic; precision loss above 2^53 keys is irrelevant"
)]
let denom = item_count.max(1) as f64;
#[expect(
clippy::cast_precision_loss,
reason = "filter stats are diagnostic; precision loss above 2^53 bytes is irrelevant"
)]
let bits = (filter_section_bytes * 8) as f64;
let bits_per_key = bits / denom;
Ok(Some(FilterStats {
filter_section_bytes,
layer_count,
item_count,
bits_per_key,
}))
}