use crate::graph::schema::{CowSelection, DirGraph, InternedKey};
use crate::graph::storage::disk::csr::TOMBSTONE_EDGE;
use crate::graph::storage::disk::graph::DiskGraph;
use crate::graph::storage::mapped::mmap_vec::MmapOrVec;
use std::path::Path;
#[derive(Clone, Debug, Default)]
pub struct SubsetSpec {
pub edge_types: Option<Vec<InternedKey>>,
}
#[derive(Clone, Debug, Default)]
pub struct ScanStats {
pub kept_node_count: u64,
pub kept_edge_count: u64,
pub total_edge_count: u64,
pub scan_duration_secs: f64,
}
#[derive(Clone, Debug)]
pub struct Bitset {
blocks: Vec<u64>,
len: usize,
}
impl Bitset {
pub fn with_len(len: usize) -> Self {
let n_blocks = len.div_ceil(64);
Self {
blocks: vec![0u64; n_blocks],
len,
}
}
#[inline]
pub fn set(&mut self, i: usize) {
if i < self.len {
self.blocks[i / 64] |= 1u64 << (i % 64);
}
}
#[inline]
pub fn get(&self, i: usize) -> bool {
if i >= self.len {
return false;
}
(self.blocks[i / 64] >> (i % 64)) & 1 == 1
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
pub fn count_ones(&self) -> u64 {
self.blocks.iter().map(|b| b.count_ones() as u64).sum()
}
#[inline]
pub(crate) fn blocks(&self) -> &[u64] {
&self.blocks
}
}
#[derive(Debug, Clone)]
pub struct RankIndex {
bitset: Bitset,
block_prefix: Vec<u32>,
kept_count: u32,
}
impl RankIndex {
pub fn from_bitset(bitset: Bitset) -> Self {
let blocks = bitset.blocks();
let mut block_prefix: Vec<u32> = Vec::with_capacity(blocks.len() + 1);
let mut acc: u32 = 0;
block_prefix.push(0);
for blk in blocks {
acc = acc.saturating_add(blk.count_ones());
block_prefix.push(acc);
}
Self {
bitset,
block_prefix,
kept_count: acc,
}
}
#[inline]
pub fn kept_count(&self) -> u32 {
self.kept_count
}
#[inline]
pub fn contains(&self, old_id: u32) -> bool {
self.bitset.get(old_id as usize)
}
#[inline]
pub fn old_to_new(&self, old_id: u32) -> Option<u32> {
let len = self.bitset.len();
let i = old_id as usize;
if i >= len {
return None;
}
let block_idx = i / 64;
let bit = i % 64;
let block = self.bitset.blocks()[block_idx];
if (block >> bit) & 1 == 0 {
return None;
}
let mask: u64 = if bit == 0 { 0 } else { (1u64 << bit) - 1 };
let within_block = (block & mask).count_ones();
Some(self.block_prefix[block_idx] + within_block)
}
}
pub struct PassAResult {
#[allow(dead_code)]
pub kept_nodes: Bitset,
pub stats: ScanStats,
}
pub fn pass_a_scan(source: &DiskGraph, spec: &SubsetSpec) -> PassAResult {
let n_nodes = source.node_slots.len();
let mut kept_nodes = Bitset::with_len(n_nodes);
let edge_type_set: Option<std::collections::HashSet<u64>> = spec
.edge_types
.as_ref()
.map(|v| v.iter().map(|k| k.as_u64()).collect());
source.edge_endpoints.advise_sequential();
let scan_start = std::time::Instant::now();
let n_edges = source.next_edge_idx as usize;
let mut kept_edge_count: u64 = 0;
for edge_idx in 0..n_edges {
let ep = source.edge_endpoints.get(edge_idx);
if ep.source == TOMBSTONE_EDGE {
continue;
}
if let Some(ref types) = edge_type_set {
if !types.contains(&ep.connection_type) {
continue;
}
}
kept_nodes.set(ep.source as usize);
kept_nodes.set(ep.target as usize);
kept_edge_count += 1;
}
let kept_node_count = kept_nodes.count_ones();
source.edge_endpoints.advise_dontneed();
PassAResult {
kept_nodes,
stats: ScanStats {
kept_node_count,
kept_edge_count,
total_edge_count: n_edges as u64,
scan_duration_secs: scan_start.elapsed().as_secs_f64(),
},
}
}
pub struct PassAFileResult {
pub kept_nodes: Bitset,
pub stats: ScanStats,
pub kept_edges_path: std::path::PathBuf,
pub kept_edge_records: u64,
}
pub fn pass_a_scan_to_file(
source: &DiskGraph,
spec: &SubsetSpec,
kept_edges_path: &Path,
) -> Result<PassAFileResult, String> {
let n_nodes = source.node_slots.len();
let n_edges = source.next_edge_idx as usize;
let mut kept_nodes = Bitset::with_len(n_nodes);
let edge_type_set: Option<std::collections::HashSet<u64>> = spec
.edge_types
.as_ref()
.map(|v| v.iter().map(|k| k.as_u64()).collect());
if let Some(parent) = kept_edges_path.parent() {
if !parent.as_os_str().is_empty() {
std::fs::create_dir_all(parent).map_err(|e| {
format!(
"save_subset: failed to create temp dir {}: {}",
parent.display(),
e
)
})?;
}
}
let mut kept_edges: MmapOrVec<(u32, u32, u64)> = MmapOrVec::mapped(kept_edges_path, n_edges)
.unwrap_or_else(|_| MmapOrVec::with_capacity(n_edges));
source.edge_endpoints.advise_sequential();
let scan_start = std::time::Instant::now();
let mut kept_edge_count: u64 = 0;
for edge_idx in 0..n_edges {
let ep = source.edge_endpoints.get(edge_idx);
if ep.source == TOMBSTONE_EDGE {
continue;
}
if let Some(ref types) = edge_type_set {
if !types.contains(&ep.connection_type) {
continue;
}
}
kept_nodes.set(ep.source as usize);
kept_nodes.set(ep.target as usize);
kept_edges.push((ep.source, ep.target, ep.connection_type));
kept_edge_count += 1;
}
let kept_node_count = kept_nodes.count_ones();
let scan_duration_secs = scan_start.elapsed().as_secs_f64();
Ok(PassAFileResult {
kept_nodes,
stats: ScanStats {
kept_node_count,
kept_edge_count,
total_edge_count: n_edges as u64,
scan_duration_secs,
},
kept_edges_path: kept_edges_path.to_path_buf(),
kept_edge_records: kept_edge_count,
})
}
pub fn save_subset(
source: &DirGraph,
selection: &CowSelection,
out_path: &Path,
_spec: Option<&SubsetSpec>,
) -> Result<(), String> {
use crate::graph::mutation::subgraph::extract_subgraph;
let mut extracted = extract_subgraph(source, selection)?;
extracted.enable_columnar();
let path_str = out_path.to_str().ok_or_else(|| {
format!(
"save_subset: out_path is not valid UTF-8: {}",
out_path.display()
)
})?;
const SINGLE_FILE_NODE_THRESHOLD: u64 = 1_000_000;
use crate::graph::storage::GraphRead;
let node_count = u64::try_from(extracted.graph.node_count()).unwrap_or(u64::MAX);
if node_count <= SINGLE_FILE_NODE_THRESHOLD {
let mut arc = std::sync::Arc::new(extracted);
crate::graph::io::file::prepare_save(&mut arc);
crate::graph::io::file::write_graph_v3(&arc, path_str)
.map_err(|e| format!("save_subset: write_graph_v3 failed: {}", e))
} else {
extracted.enable_disk_mode()?;
extracted.save_disk(path_str)
}
}
pub fn save_subset_streaming_disk(
source: &DirGraph,
kept_per_type: &std::collections::HashMap<String, Vec<u32>>,
edge_filter: Option<&[u64]>,
out_path: &Path,
) -> Result<(), String> {
use crate::datatypes::values::Value;
use crate::graph::schema::{EdgeData, NodeData, PropertyStorage};
use crate::graph::storage::backend::GraphBackend;
use crate::graph::storage::column_store::ColumnStore;
use crate::graph::storage::disk::graph::DiskGraph;
use crate::graph::storage::interner::InternedKey;
use petgraph::graph::NodeIndex;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
let timing_enabled = std::env::var("KGLITE_STREAMING_TIMING")
.map(|v| !v.is_empty() && v != "0")
.unwrap_or(false);
let log_phase = |label: &str, t0: Instant| {
if timing_enabled {
eprintln!(
"save_subset_streaming_disk: {} = {:.3}s",
label,
t0.elapsed().as_secs_f64()
);
}
};
let phase_start = Instant::now();
let path_str = out_path.to_str().ok_or_else(|| {
format!(
"save_subset_streaming_disk: out_path is not valid UTF-8: {}",
out_path.display()
)
})?;
std::fs::create_dir_all(out_path).map_err(|e| {
format!(
"save_subset_streaming_disk: create_dir_all({}): {}",
out_path.display(),
e
)
})?;
let dest_disk = DiskGraph::new_at_path(out_path)
.map_err(|e| format!("save_subset_streaming_disk: DiskGraph::new_at_path: {}", e))?;
let mut dest = DirGraph::from_graph(GraphBackend::Disk(Box::new(dest_disk)));
dest.interner = source.interner.clone();
dest.type_schemas = source.type_schemas.clone();
dest.node_type_metadata = source.node_type_metadata.clone();
dest.connection_type_metadata = source.connection_type_metadata.clone();
dest.id_field_aliases = source.id_field_aliases.clone();
dest.title_field_aliases = source.title_field_aliases.clone();
dest.parent_types = source.parent_types.clone();
if let GraphBackend::Disk(ref mut dg) = dest.graph {
dg.defer_csr = true;
}
use crate::graph::storage::GraphRead;
let n_source_nodes = source.graph.node_bound();
let mut bitset = Bitset::with_len(n_source_nodes);
for sorted_ids in kept_per_type.values() {
for &id in sorted_ids {
bitset.set(id as usize);
}
}
let rank = RankIndex::from_bitset(bitset);
use crate::graph::mutation::subgraph_streaming_writer::TypeWriter;
let scratch_root = out_path.join(".tmp_streaming");
std::fs::create_dir_all(&scratch_root).map_err(|e| {
format!(
"save_subset_streaming_disk: create_dir_all({}): {}",
scratch_root.display(),
e
)
})?;
let mut writers: HashMap<String, TypeWriter> = HashMap::new();
for type_name in kept_per_type.keys() {
let schema = if let Some(src_store) = source.column_stores.get(type_name) {
Arc::clone(src_store.schema())
} else if let Some(s) = source.type_schemas.get(type_name) {
Arc::clone(s)
} else {
continue; };
let meta = source
.node_type_metadata
.get(type_name)
.cloned()
.unwrap_or_default();
let writer_dir = scratch_root.join(sanitize_type_name(type_name));
let id_type = source
.column_stores
.get(type_name)
.and_then(|s| s.id_type_str())
.unwrap_or("mixed");
let title_type = source
.column_stores
.get(type_name)
.and_then(|s| s.title_type_str())
.unwrap_or("mixed");
let writer = TypeWriter::new(
schema,
meta,
writer_dir,
&source.interner,
id_type,
title_type,
)
.map_err(|e| {
format!(
"save_subset_streaming_disk: TypeWriter::new {}: {}",
type_name, e
)
})?;
writers.insert(type_name.clone(), writer);
}
log_phase("setup (rank + writer creation)", phase_start);
let phase_node_walk = Instant::now();
let source_disk_for_nodes: Option<&DiskGraph> = match &source.graph {
GraphBackend::Disk(dg) => Some(dg.as_ref()),
_ => None,
};
let placeholder_arc: Option<Arc<ColumnStore>> = if writers.is_empty() {
None
} else {
Some(Arc::new(ColumnStore::new(
Arc::new(crate::graph::schema::TypeSchema::new()),
&HashMap::new(),
&source.interner,
)))
};
let mut t_lookups = std::time::Duration::ZERO;
let mut t_read_id_title = std::time::Duration::ZERO;
let mut t_read_props = std::time::Duration::ZERO;
let mut t_push_row = std::time::Duration::ZERO;
let mut t_add_node = std::time::Duration::ZERO;
let mut row_counter: u64 = 0;
const PROGRESS_EVERY: u64 = 1_000_000;
let mut last_progress_row: u64 = 0;
let mut last_progress_at = Instant::now();
let mut last_t_lookups = std::time::Duration::ZERO;
let mut last_t_read_id_title = std::time::Duration::ZERO;
let mut last_t_read_props = std::time::Duration::ZERO;
let mut last_t_push_row = std::time::Duration::ZERO;
let mut last_t_add_node = std::time::Duration::ZERO;
if let (Some(sdg), Some(placeholder_arc)) = (source_disk_for_nodes, placeholder_arc) {
for old_id in 0..n_source_nodes as u32 {
if !rank.contains(old_id) {
continue;
}
let slot = sdg.node_slots.get(old_id as usize);
if !slot.is_alive() {
continue;
}
let t0 = if timing_enabled {
Some(Instant::now())
} else {
None
};
let type_key = InternedKey::from_u64(slot.node_type);
let type_name = source.interner.resolve(type_key);
let src_store = match source.column_stores.get(type_name) {
Some(s) => s.as_ref(),
None => continue,
};
let writer = match writers.get_mut(type_name) {
Some(w) => w,
None => continue,
};
if let Some(t) = t0 {
t_lookups += t.elapsed();
}
let t1 = if timing_enabled {
Some(Instant::now())
} else {
None
};
let id_borrowed = src_store
.id_borrowed(slot.row_id)
.unwrap_or(crate::datatypes::values::BorrowedValue::Null);
let title_borrowed = match src_store.title_borrowed(slot.row_id) {
Some(s) => crate::datatypes::values::BorrowedValue::String(s),
None => crate::datatypes::values::BorrowedValue::Null,
};
if let Some(t) = t1 {
t_read_id_title += t.elapsed();
}
let t2 = if timing_enabled {
Some(Instant::now())
} else {
None
};
let dest_row_id = writer
.push_row_borrowed(id_borrowed, title_borrowed, |row| {
let t1b = if timing_enabled {
Some(Instant::now())
} else {
None
};
let r = src_store.try_for_each_property_borrowed(slot.row_id, |key, bv| {
row.push_property(key, bv)
});
if let Some(t) = t1b {
t_read_props += t.elapsed();
}
r
})
.map_err(|e| format!("save_subset_streaming_disk: push_row: {}", e))?;
if let Some(t) = t2 {
t_push_row += t.elapsed();
}
let t3 = if timing_enabled {
Some(Instant::now())
} else {
None
};
let new_node_data = NodeData {
id: Value::Null,
title: Value::Null,
node_type: type_key,
properties: PropertyStorage::Columnar {
store: Arc::clone(&placeholder_arc),
row_id: dest_row_id,
},
};
crate::graph::storage::GraphWrite::add_node(&mut dest.graph, new_node_data);
if let Some(t) = t3 {
t_add_node += t.elapsed();
}
row_counter += 1;
if timing_enabled && row_counter - last_progress_row >= PROGRESS_EVERY {
let now = Instant::now();
let dt = now.duration_since(last_progress_at).as_secs_f64();
let drows = (row_counter - last_progress_row) as f64;
let d_lookups = (t_lookups - last_t_lookups).as_secs_f64();
let d_idtitle = (t_read_id_title - last_t_read_id_title).as_secs_f64();
let d_props = (t_read_props - last_t_read_props).as_secs_f64();
let d_push = (t_push_row - last_t_push_row).as_secs_f64();
let d_addn = (t_add_node - last_t_add_node).as_secs_f64();
eprintln!(
"save_subset_streaming_disk: progress: rows={} (+{:.0}M) wall={:.2}s \
({:.1}us/row) | dlookups={:.2}s did+t={:.2}s dprops={:.2}s \
dpush={:.2}s daddn={:.2}s",
row_counter,
drows / 1_000_000.0,
dt,
dt * 1e6 / drows,
d_lookups,
d_idtitle,
d_props,
d_push,
d_addn,
);
last_progress_row = row_counter;
last_progress_at = now;
last_t_lookups = t_lookups;
last_t_read_id_title = t_read_id_title;
last_t_read_props = t_read_props;
last_t_push_row = t_push_row;
last_t_add_node = t_add_node;
}
}
} else if !writers.is_empty() {
return Err(
"save_subset_streaming_disk currently requires a disk-backed source".to_string(),
);
}
log_phase("node walk (push rows + add_node)", phase_node_walk);
if timing_enabled {
let t_read_source = t_read_id_title + t_read_props;
eprintln!(
"save_subset_streaming_disk: node walk sub-phases ({} rows): \
lookups={:.3}s, read_source={:.3}s (id+title={:.3}s, props={:.3}s), \
push_row={:.3}s, add_node={:.3}s",
row_counter,
t_lookups.as_secs_f64(),
t_read_source.as_secs_f64(),
t_read_id_title.as_secs_f64(),
t_read_props.as_secs_f64(),
t_push_row.as_secs_f64(),
t_add_node.as_secs_f64(),
);
}
let phase_finalize = Instant::now();
let mut arc_dest_stores: HashMap<String, Arc<ColumnStore>> = HashMap::new();
for (type_name, writer) in writers.into_iter() {
let store = writer
.finalize(&source.interner)
.map_err(|e| format!("save_subset_streaming_disk: finalize {}: {}", type_name, e))?;
arc_dest_stores.insert(type_name, store);
}
dest.column_stores = arc_dest_stores;
dest.sync_disk_column_stores();
log_phase("writer finalize (close + mmap per type)", phase_finalize);
let phase_edge_walk = Instant::now();
let edge_filter_set: Option<std::collections::HashSet<u64>> =
edge_filter.map(|v| v.iter().copied().collect());
let source_disk = match &source.graph {
GraphBackend::Disk(dg) => Some(dg.as_ref()),
_ => None,
};
if let Some(sdg) = source_disk {
sdg.edge_endpoints.advise_sequential();
sdg.node_slots.advise_dontneed();
sdg.node_slots.fadvise_dontneed();
let n_edges = sdg.next_edge_idx as usize;
for edge_idx in 0..n_edges {
let ep = sdg.edge_endpoints.get(edge_idx);
if ep.source == crate::graph::storage::disk::csr::TOMBSTONE_EDGE {
continue;
}
if let Some(ref types) = edge_filter_set {
if !types.contains(&ep.connection_type) {
continue;
}
}
let new_src = match rank.old_to_new(ep.source) {
Some(x) => NodeIndex::new(x as usize),
None => continue,
};
let new_tgt = match rank.old_to_new(ep.target) {
Some(x) => NodeIndex::new(x as usize),
None => continue,
};
let conn_type = InternedKey::from_u64(ep.connection_type);
let props = sdg
.edge_properties_at(edge_idx as u32)
.map(|cow| cow.into_owned())
.unwrap_or_default();
let edge_data = EdgeData::new_interned(conn_type, props);
crate::graph::storage::GraphWrite::add_edge(
&mut dest.graph,
new_src,
new_tgt,
edge_data,
);
}
} else {
use petgraph::visit::IntoEdgeReferences;
let backend = match &source.graph {
GraphBackend::Memory(g) => Some(g.inner()),
GraphBackend::Mapped(_) | GraphBackend::Recording(_) | GraphBackend::Disk(_) => None,
};
if let Some(g) = backend {
for er in g.edge_references() {
use petgraph::visit::EdgeRef;
let w = er.weight();
if let Some(ref types) = edge_filter_set {
if !types.contains(&w.connection_type.as_u64()) {
continue;
}
}
let src = er.source().index() as u32;
let tgt = er.target().index() as u32;
let new_src = match rank.old_to_new(src) {
Some(x) => NodeIndex::new(x as usize),
None => continue,
};
let new_tgt = match rank.old_to_new(tgt) {
Some(x) => NodeIndex::new(x as usize),
None => continue,
};
let edge_data = EdgeData::new_interned(w.connection_type, w.properties.clone());
crate::graph::storage::GraphWrite::add_edge(
&mut dest.graph,
new_src,
new_tgt,
edge_data,
);
}
} else {
return Err(
"save_subset_streaming_disk: mapped + recording sources not yet supported"
.to_string(),
);
}
}
log_phase("edge walk (translate + add_edge)", phase_edge_walk);
let phase_save = Instant::now();
dest.rebuild_type_indices();
let save_result = dest.save_disk(path_str);
log_phase("save_disk (CSR build + sidecars)", phase_save);
log_phase("TOTAL", phase_start);
drop(dest);
let _ = std::fs::remove_dir_all(&scratch_root);
save_result
}
fn sanitize_type_name(name: &str) -> String {
use std::fmt::Write as _;
let mut prefix: String = name
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
c
} else {
'_'
}
})
.collect();
let hash = InternedKey::from_str(name).as_u64();
let _ = write!(prefix, "_{:016x}", hash);
prefix
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bitset_set_count_across_block_boundaries() {
let mut bs = Bitset::with_len(200);
bs.set(0);
bs.set(63);
bs.set(64);
bs.set(199);
bs.set(64);
assert_eq!(bs.count_ones(), 4);
}
#[test]
fn bitset_out_of_range_writes_are_ignored() {
let mut bs = Bitset::with_len(100);
bs.set(99);
bs.set(500); assert_eq!(bs.count_ones(), 1);
}
#[test]
fn sanitize_type_name_is_collision_free() {
let groups = vec![
vec!["établissement public", "Établissement public"],
vec!["ग", "झ", "ज", "च", "त", "छ", "色", "ञ", "ट", "藪"],
vec!["C♯", "C♭"],
vec!["梅林", "連合"],
];
for group in groups {
let mut sanitized: Vec<String> = group.iter().map(|n| sanitize_type_name(n)).collect();
sanitized.sort();
sanitized.dedup();
assert_eq!(
sanitized.len(),
group.len(),
"sanitize_type_name collapsed distinct types {:?}",
group
);
}
}
fn brute_force_rank(bs: &Bitset, old_id: u32) -> Option<u32> {
if !bs.get(old_id as usize) {
return None;
}
let mut rank: u32 = 0;
for i in 0..(old_id as usize) {
if bs.get(i) {
rank += 1;
}
}
Some(rank)
}
#[test]
fn rank_index_dense_pattern() {
let mut bs = Bitset::with_len(200);
for i in (0..200).step_by(3) {
bs.set(i);
}
let idx = RankIndex::from_bitset(bs.clone());
assert_eq!(idx.kept_count(), 67);
assert_eq!(idx.old_to_new(0), Some(0));
assert_eq!(idx.old_to_new(3), Some(1));
assert_eq!(idx.old_to_new(6), Some(2));
assert_eq!(idx.old_to_new(63), Some(21)); assert_eq!(idx.old_to_new(66), Some(22));
assert_eq!(idx.old_to_new(198), Some(66));
assert_eq!(idx.old_to_new(1), None);
assert_eq!(idx.old_to_new(64), None);
for i in 0..200u32 {
assert_eq!(idx.old_to_new(i), brute_force_rank(&bs, i));
}
}
#[test]
fn rank_index_sparse_pattern() {
let mut bs = Bitset::with_len(256);
bs.set(0);
bs.set(64);
bs.set(128);
bs.set(192);
let idx = RankIndex::from_bitset(bs.clone());
assert_eq!(idx.kept_count(), 4);
assert_eq!(idx.old_to_new(0), Some(0));
assert_eq!(idx.old_to_new(64), Some(1));
assert_eq!(idx.old_to_new(128), Some(2));
assert_eq!(idx.old_to_new(192), Some(3));
assert_eq!(idx.old_to_new(63), None);
assert_eq!(idx.old_to_new(65), None);
assert_eq!(idx.old_to_new(255), None);
}
#[test]
fn rank_index_full_pattern() {
let mut bs = Bitset::with_len(130);
for i in 0..130 {
bs.set(i);
}
let idx = RankIndex::from_bitset(bs);
assert_eq!(idx.kept_count(), 130);
for i in 0..130u32 {
assert_eq!(idx.old_to_new(i), Some(i));
}
}
#[test]
fn rank_index_empty_pattern() {
let bs = Bitset::with_len(200);
let idx = RankIndex::from_bitset(bs);
assert_eq!(idx.kept_count(), 0);
for i in 0..200u32 {
assert_eq!(idx.old_to_new(i), None);
}
}
#[test]
fn rank_index_out_of_range_returns_none() {
let mut bs = Bitset::with_len(100);
bs.set(99);
let idx = RankIndex::from_bitset(bs);
assert_eq!(idx.old_to_new(99), Some(0));
assert_eq!(idx.old_to_new(100), None);
assert_eq!(idx.old_to_new(u32::MAX), None);
}
#[test]
fn rank_index_zero_length_bitset() {
let bs = Bitset::with_len(0);
let idx = RankIndex::from_bitset(bs);
assert_eq!(idx.kept_count(), 0);
assert_eq!(idx.old_to_new(0), None);
}
#[test]
fn rank_index_single_bit_each_block_boundary() {
let mut bs = Bitset::with_len(256);
for i in (0..256).step_by(64) {
bs.set(i);
}
let idx = RankIndex::from_bitset(bs);
assert_eq!(idx.old_to_new(0), Some(0));
assert_eq!(idx.old_to_new(64), Some(1));
assert_eq!(idx.old_to_new(128), Some(2));
assert_eq!(idx.old_to_new(192), Some(3));
}
#[test]
fn rank_index_pseudo_random_differential() {
let mut bs = Bitset::with_len(1024);
let mut state: u32 = 0xDEAD_BEEF;
for i in 0..1024 {
state = state.wrapping_mul(1664525).wrapping_add(1013904223);
if state & 0b11 != 0 {
bs.set(i);
}
}
let idx = RankIndex::from_bitset(bs.clone());
for i in 0..1024u32 {
assert_eq!(idx.old_to_new(i), brute_force_rank(&bs, i));
}
}
#[test]
fn rank_index_contains_matches_bitset() {
let mut bs = Bitset::with_len(100);
bs.set(5);
bs.set(50);
bs.set(99);
let idx = RankIndex::from_bitset(bs);
assert!(idx.contains(5));
assert!(idx.contains(50));
assert!(idx.contains(99));
assert!(!idx.contains(0));
assert!(!idx.contains(49));
assert!(!idx.contains(100));
}
}