use arcstr::ArcStr;
use grafeo_common::types::{PropertyKey, Value};
use grafeo_common::utils::hash::{FxHashMap, FxHashSet};
use thiserror::Error;
use super::CompactStore;
use super::column::ColumnCodec;
use super::csr::CsrAdjacency;
use super::id::MAX_TABLE_ID;
use super::node_table::NodeTable;
use super::rel_table::RelTable;
use super::schema::{ColumnDef, ColumnType, EdgeSchema, TableSchema};
use super::zone_map::ZoneMap;
use crate::codec::{BitPackedInts, BitVector, DictionaryBuilder};
use crate::statistics::{EdgeTypeStatistics, LabelStatistics, Statistics};
#[derive(Debug, Clone, Error)]
#[non_exhaustive]
pub enum CompactStoreError {
#[error("node label not found: {0:?}")]
LabelNotFound(String),
#[error("column length mismatch: expected {expected} rows, got {got}")]
ColumnLengthMismatch {
expected: usize,
got: usize,
},
#[error("duplicate node label: {0:?}")]
DuplicateLabel(String),
#[error("duplicate edge type: {0:?}")]
DuplicateEdgeType(String),
#[error("inconsistent edge data: {0}")]
InconsistentEdgeData(String),
#[error("value overflow in column {column:?}: {value} exceeds i64::MAX ({max})")]
ValueOverflow {
column: String,
value: u64,
max: u64,
},
#[error("table count {count} exceeds compact ID limit of {max} ({kind} tables)")]
TableCountOverflow {
kind: &'static str,
count: usize,
max: u16,
},
}
pub struct NodeTableBuilder {
label: ArcStr,
columns: Vec<(PropertyKey, ColumnCodec)>,
zone_maps: Vec<(PropertyKey, ZoneMap)>,
len: Option<usize>,
length_mismatch: Option<(usize, usize)>,
value_overflow: Option<(String, u64)>,
}
impl NodeTableBuilder {
fn new(label: impl Into<ArcStr>) -> Self {
Self {
label: label.into(),
columns: Vec::new(),
zone_maps: Vec::new(),
len: None,
length_mismatch: None,
value_overflow: None,
}
}
pub fn column_bitpacked(&mut self, name: &str, values: &[u64], bits: u8) -> &mut Self {
self.record_len(values.len());
if let Some(&bad) = values.iter().find(|&&v| v > i64::MAX as u64) {
self.value_overflow = Some((name.to_string(), bad));
}
let bp = BitPackedInts::pack_with_bits(values, bits);
let zone_map = compute_zone_map_u64(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::BitPacked(bp)));
self
}
pub fn column_dict(&mut self, name: &str, values: &[&str]) -> &mut Self {
self.record_len(values.len());
let mut builder = DictionaryBuilder::new();
for &v in values {
builder.add(v);
}
let dict = builder.build();
let zone_map = compute_zone_map_strings(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::Dict(dict)));
self
}
pub fn column_int8_vector(&mut self, name: &str, data: Vec<i8>, dimensions: u16) -> &mut Self {
let dims = dimensions as usize;
let row_count = if dims == 0 {
0
} else {
assert!(
data.len().is_multiple_of(dims),
"Int8Vector data length {} is not a multiple of dimensions {dimensions}",
data.len(),
);
data.len() / dims
};
self.record_len(row_count);
self.columns.push((
PropertyKey::new(name),
ColumnCodec::Int8Vector { data, dimensions },
));
self
}
pub fn column_bitmap(&mut self, name: &str, values: &[bool]) -> &mut Self {
self.record_len(values.len());
let bv = BitVector::from_bools(values);
let zone_map = compute_zone_map_bool(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::Bitmap(bv)));
self
}
pub fn column(&mut self, name: &str, codec: ColumnCodec) -> &mut Self {
self.record_len(codec.len());
self.columns.push((PropertyKey::new(name), codec));
self
}
fn record_len(&mut self, col_len: usize) {
match self.len {
None => self.len = Some(col_len),
Some(expected) => {
if expected != col_len {
self.length_mismatch = Some((expected, col_len));
}
}
}
}
}
pub struct RelTableBuilder {
edge_type: ArcStr,
src_label: ArcStr,
dst_label: ArcStr,
edges: Vec<(u32, u32)>,
backward: bool,
properties: Vec<(PropertyKey, ColumnCodec)>,
}
impl RelTableBuilder {
fn new(
edge_type: impl Into<ArcStr>,
src_label: impl Into<ArcStr>,
dst_label: impl Into<ArcStr>,
) -> Self {
Self {
edge_type: edge_type.into(),
src_label: src_label.into(),
dst_label: dst_label.into(),
edges: Vec::new(),
backward: false,
properties: Vec::new(),
}
}
pub fn edges(&mut self, pairs: impl Into<Vec<(u32, u32)>>) -> &mut Self {
self.edges = pairs.into();
self
}
pub fn backward(&mut self, enabled: bool) -> &mut Self {
self.backward = enabled;
self
}
pub fn column_bitpacked(&mut self, name: &str, values: &[u64], bits: u8) -> &mut Self {
let bp = BitPackedInts::pack_with_bits(values, bits);
self.properties
.push((PropertyKey::new(name), ColumnCodec::BitPacked(bp)));
self
}
}
#[derive(Default)]
pub struct CompactStoreBuilder {
node_table_builders: Vec<NodeTableBuilder>,
rel_table_builders: Vec<RelTableBuilder>,
}
impl CompactStoreBuilder {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn node_table(
mut self,
label: &str,
f: impl FnOnce(&mut NodeTableBuilder) -> &mut NodeTableBuilder,
) -> Self {
let mut builder = NodeTableBuilder::new(label);
f(&mut builder);
self.node_table_builders.push(builder);
self
}
pub fn rel_table(
mut self,
edge_type: &str,
src_label: &str,
dst_label: &str,
f: impl FnOnce(&mut RelTableBuilder) -> &mut RelTableBuilder,
) -> Self {
let mut builder = RelTableBuilder::new(edge_type, src_label, dst_label);
f(&mut builder);
self.rel_table_builders.push(builder);
self
}
pub fn build(self) -> Result<CompactStore, CompactStoreError> {
for ntb in &self.node_table_builders {
if let Some((expected, got)) = ntb.length_mismatch {
return Err(CompactStoreError::ColumnLengthMismatch { expected, got });
}
if let Some((ref column, value)) = ntb.value_overflow {
return Err(CompactStoreError::ValueOverflow {
column: column.clone(),
max: i64::MAX as u64,
value,
});
}
}
{
let mut seen_labels = FxHashSet::default();
for ntb in &self.node_table_builders {
if !seen_labels.insert(&ntb.label) {
return Err(CompactStoreError::DuplicateLabel(ntb.label.to_string()));
}
}
}
{
let mut seen_triples = FxHashSet::default();
for rtb in &self.rel_table_builders {
if !seen_triples.insert((&rtb.edge_type, &rtb.src_label, &rtb.dst_label)) {
return Err(CompactStoreError::DuplicateEdgeType(format!(
"{} ({} -> {})",
rtb.edge_type, rtb.src_label, rtb.dst_label
)));
}
}
}
let max_tables = usize::from(MAX_TABLE_ID) + 1; if self.node_table_builders.len() > max_tables {
return Err(CompactStoreError::TableCountOverflow {
kind: "node",
count: self.node_table_builders.len(),
max: MAX_TABLE_ID,
});
}
if self.rel_table_builders.len() > max_tables {
return Err(CompactStoreError::TableCountOverflow {
kind: "relationship",
count: self.rel_table_builders.len(),
max: MAX_TABLE_ID,
});
}
let mut label_to_table_id: FxHashMap<ArcStr, u16> = FxHashMap::default();
let mut table_id_to_label: Vec<ArcStr> = Vec::new();
for (idx, ntb) in self.node_table_builders.iter().enumerate() {
let table_id =
u16::try_from(idx).map_err(|_| CompactStoreError::TableCountOverflow {
kind: "node",
count: idx,
max: MAX_TABLE_ID,
})?;
label_to_table_id.insert(ntb.label.clone(), table_id);
table_id_to_label.push(ntb.label.clone());
}
let mut node_tables_by_id: Vec<NodeTable> =
Vec::with_capacity(self.node_table_builders.len());
for (idx, ntb) in self.node_table_builders.into_iter().enumerate() {
let table_id =
u16::try_from(idx).map_err(|_| CompactStoreError::TableCountOverflow {
kind: "node",
count: idx,
max: MAX_TABLE_ID,
})?;
let row_count = ntb.len.unwrap_or(0);
let col_defs: Vec<ColumnDef> = ntb
.columns
.iter()
.map(|(key, codec)| {
let col_type = infer_column_type(codec);
ColumnDef::new(key.as_str(), col_type)
})
.collect();
let schema = TableSchema::new(ntb.label.as_str(), table_id, col_defs);
let columns: FxHashMap<PropertyKey, ColumnCodec> = ntb.columns.into_iter().collect();
let zone_maps: FxHashMap<PropertyKey, ZoneMap> = ntb.zone_maps.into_iter().collect();
let table = NodeTable::from_columns(schema, columns, zone_maps, row_count);
node_tables_by_id.push(table);
}
let mut rel_tables_by_id: Vec<RelTable> = Vec::with_capacity(self.rel_table_builders.len());
let mut edge_type_to_rel_id: FxHashMap<ArcStr, Vec<u16>> = FxHashMap::default();
let mut rel_table_id_to_type: Vec<ArcStr> = Vec::new();
for (idx, rtb) in self.rel_table_builders.into_iter().enumerate() {
let rel_table_id =
u16::try_from(idx).map_err(|_| CompactStoreError::TableCountOverflow {
kind: "relationship",
count: idx,
max: MAX_TABLE_ID,
})?;
rel_table_id_to_type.push(rtb.edge_type.clone());
let src_table_id = *label_to_table_id
.get(&rtb.src_label)
.ok_or_else(|| CompactStoreError::LabelNotFound(rtb.src_label.to_string()))?;
let dst_table_id = *label_to_table_id
.get(&rtb.dst_label)
.ok_or_else(|| CompactStoreError::LabelNotFound(rtb.dst_label.to_string()))?;
let src_node_count = node_tables_by_id
.get(src_table_id as usize)
.map_or(0, |t| t.len());
let dst_node_count = node_tables_by_id
.get(dst_table_id as usize)
.map_or(0, |t| t.len());
let mut fwd_edges = rtb.edges.clone();
fwd_edges.sort_by_key(|&(src, _dst)| src);
let fwd = CsrAdjacency::from_sorted_edges(src_node_count, &fwd_edges);
let bwd =
if rtb.backward {
let mut bwd_edges: Vec<(u32, u32)> =
rtb.edges.iter().map(|&(src, dst)| (dst, src)).collect();
bwd_edges.sort_by_key(|&(dst, _src)| dst);
let mut bwd_csr = CsrAdjacency::from_sorted_edges(dst_node_count, &bwd_edges);
let mut mapping = Vec::with_capacity(bwd_edges.len());
for &(dst, src) in &bwd_edges {
let fwd_neighbors = fwd.neighbors(src);
let fwd_start = fwd.offset_of(src);
let local_idx = fwd_neighbors.iter().position(|&t| t == dst).ok_or_else(
|| {
CompactStoreError::InconsistentEdgeData(format!(
"backward edge ({dst}->{src}) has no corresponding forward edge"
))
},
)?;
#[allow(clippy::cast_possible_truncation)]
mapping.push(fwd_start + local_idx as u32);
}
bwd_csr.set_edge_data(mapping);
Some(bwd_csr)
} else {
None
};
let property_col_defs: Vec<ColumnDef> = rtb
.properties
.iter()
.map(|(key, codec)| {
let col_type = infer_column_type(codec);
ColumnDef::new(key.as_str(), col_type)
})
.collect();
let schema = EdgeSchema::new(
rtb.edge_type.as_str(),
rel_table_id,
rtb.src_label.as_str(),
rtb.dst_label.as_str(),
property_col_defs,
);
let properties: FxHashMap<PropertyKey, ColumnCodec> =
rtb.properties.into_iter().collect();
let table = RelTable::new(schema, fwd, bwd, properties, src_table_id, dst_table_id);
edge_type_to_rel_id
.entry(rtb.edge_type.clone())
.or_default()
.push(rel_table_id);
rel_tables_by_id.push(table);
}
let mut stats = Statistics::new();
let mut total_nodes: u64 = 0;
let mut total_edges: u64 = 0;
for (idx, nt) in node_tables_by_id.iter().enumerate() {
let count = nt.len() as u64;
total_nodes += count;
let label = &table_id_to_label[idx];
stats.update_label(label.as_str(), LabelStatistics::new(count));
}
let mut edge_type_counts: FxHashMap<&str, u64> = FxHashMap::default();
for (idx, rt) in rel_tables_by_id.iter().enumerate() {
let count = rt.num_edges() as u64;
total_edges += count;
let edge_type = &rel_table_id_to_type[idx];
*edge_type_counts.entry(edge_type.as_str()).or_default() += count;
}
for (edge_type, count) in edge_type_counts {
stats.update_edge_type(edge_type, EdgeTypeStatistics::new(count, 0.0, 0.0));
}
stats.total_nodes = total_nodes;
stats.total_edges = total_edges;
Ok(CompactStore::new(
node_tables_by_id,
label_to_table_id,
rel_tables_by_id,
edge_type_to_rel_id,
table_id_to_label,
rel_table_id_to_type,
stats,
))
}
}
fn infer_column_type(codec: &ColumnCodec) -> ColumnType {
match codec {
ColumnCodec::BitPacked(bp) => ColumnType::UInt {
bits: bp.bits_per_value(),
},
ColumnCodec::Dict(_) => ColumnType::DictString,
ColumnCodec::Bitmap(_) => ColumnType::Bool,
ColumnCodec::Int8Vector { dimensions, .. } => ColumnType::Int8Vector {
dimensions: *dimensions,
},
ColumnCodec::Float64(_) => ColumnType::Float64,
ColumnCodec::Float32Vector { dimensions, .. } => ColumnType::Float32Vector {
dimensions: *dimensions,
},
ColumnCodec::RawI64(_) => ColumnType::Int64,
}
}
fn compute_zone_map_u64(values: &[u64]) -> ZoneMap {
let Some(&min) = values.iter().min() else {
return ZoneMap::new();
};
let max = *values.iter().max().expect("non-empty after min check");
if max > i64::MAX as u64 {
return ZoneMap {
row_count: values.len(),
..ZoneMap::default()
};
}
#[allow(clippy::cast_possible_wrap)]
ZoneMap {
min: Some(Value::Int64(min as i64)),
max: Some(Value::Int64(max as i64)),
null_count: 0,
row_count: values.len(),
}
}
fn compute_zone_map_i64(values: &[i64]) -> ZoneMap {
let Some(&min) = values.iter().min() else {
return ZoneMap::new();
};
let max = *values.iter().max().expect("non-empty after min check");
ZoneMap {
min: Some(Value::Int64(min)),
max: Some(Value::Int64(max)),
null_count: 0,
row_count: values.len(),
}
}
fn compute_zone_map_strings(values: &[&str]) -> ZoneMap {
let Some(&min) = values.iter().min() else {
return ZoneMap::new();
};
let max = *values.iter().max().expect("non-empty after min check");
ZoneMap {
min: Some(Value::from(min)),
max: Some(Value::from(max)),
null_count: 0,
row_count: values.len(),
}
}
fn compute_zone_map_bool(values: &[bool]) -> ZoneMap {
if values.is_empty() {
return ZoneMap::new();
}
let has_false = values.iter().any(|&v| !v);
let has_true = values.iter().any(|&v| v);
let min = !has_false; let max = has_true; ZoneMap {
min: Some(Value::Bool(min)),
max: Some(Value::Bool(max)),
null_count: 0,
row_count: values.len(),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum InferredType {
BitPacked,
RawI64,
Float64,
Bitmap,
Float32Vector { dimensions: u16 },
Dict,
}
pub fn from_graph_store(
store: &dyn crate::graph::traits::GraphStore,
) -> Result<CompactStore, CompactStoreError> {
let labels = store.all_labels();
if labels.is_empty() {
return CompactStoreBuilder::new().build();
}
let mut id_map: FxHashMap<grafeo_common::types::NodeId, (ArcStr, u32)> = FxHashMap::default();
let mut label_data: Vec<(
ArcStr,
Vec<grafeo_common::types::NodeId>,
FxHashMap<PropertyKey, Vec<Value>>,
)> = Vec::new();
let mut seen_node_ids: FxHashSet<grafeo_common::types::NodeId> = FxHashSet::default();
let mut label_key_index: FxHashMap<ArcStr, usize> = FxHashMap::default();
for label in &labels {
let node_ids = store.nodes_by_label(label);
for &nid in &node_ids {
if !seen_node_ids.insert(nid) {
continue; }
let Some(node) = store.get_node(nid) else {
continue;
};
let label_key: ArcStr = if node.labels.len() <= 1 {
ArcStr::from(label.as_str())
} else {
let mut sorted: Vec<&str> = node.labels.iter().map(|l| l.as_str()).collect();
sorted.sort_unstable();
ArcStr::from(sorted.join("|"))
};
let entry_idx = if let Some(&idx) = label_key_index.get(&label_key) {
idx
} else {
let idx = label_data.len();
label_key_index.insert(label_key.clone(), idx);
label_data.push((label_key.clone(), Vec::new(), FxHashMap::default()));
idx
};
let (_, ref mut node_ids_vec, ref mut props_map) = label_data[entry_idx];
#[allow(clippy::cast_possible_truncation)]
let offset = node_ids_vec.len() as u32;
node_ids_vec.push(nid);
id_map.insert(nid, (label_key, offset));
for (key, value) in node.properties.iter() {
let col = props_map
.entry(key.clone())
.or_insert_with(|| vec![Value::Null; offset as usize]);
while col.len() < offset as usize {
col.push(Value::Null);
}
col.push(value.clone());
}
let expected_len = offset as usize + 1;
for col in props_map.values_mut() {
while col.len() < expected_len {
col.push(Value::Null);
}
}
}
}
let mut builder = CompactStoreBuilder::new();
for (label_key, node_ids_for_label, props_map) in &label_data {
let node_count = node_ids_for_label.len();
builder = builder.node_table(label_key.as_str(), |t| {
t.record_len(node_count);
for (key, values) in props_map {
let inferred = infer_type_from_values(values);
match inferred {
InferredType::BitPacked => {
let u64_values: Vec<u64> = values
.iter()
.map(|v| match v {
#[allow(clippy::cast_sign_loss)]
Value::Int64(n) => *n as u64,
_ => 0,
})
.collect();
let bp = BitPackedInts::pack(&u64_values);
let zone_map = compute_zone_map_u64(&u64_values);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::BitPacked(bp)));
t.record_len(u64_values.len());
}
InferredType::RawI64 => {
let i64_values: Vec<i64> = values
.iter()
.map(|v| match v {
Value::Int64(n) => *n,
_ => 0,
})
.collect();
let zone_map = compute_zone_map_i64(&i64_values);
t.zone_maps.push((key.clone(), zone_map));
t.columns
.push((key.clone(), ColumnCodec::RawI64(i64_values)));
t.record_len(values.len());
}
InferredType::Float64 => {
let f64_values: Vec<f64> = values
.iter()
.map(|v| match v {
Value::Float64(f) => *f,
Value::Int64(n) => *n as f64,
_ => 0.0,
})
.collect();
t.columns
.push((key.clone(), ColumnCodec::Float64(f64_values)));
t.record_len(values.len());
}
InferredType::Float32Vector { dimensions } => {
let mut flat: Vec<f32> =
Vec::with_capacity(values.len() * dimensions as usize);
for v in values {
match v {
Value::Vector(vec) => flat.extend_from_slice(vec),
_ => {
flat.extend(std::iter::repeat_n(
0.0f32,
usize::from(dimensions),
));
}
}
}
t.columns.push((
key.clone(),
ColumnCodec::Float32Vector {
data: flat,
dimensions,
},
));
t.record_len(values.len());
}
InferredType::Bitmap => {
let bool_values: Vec<bool> = values
.iter()
.map(|v| matches!(v, Value::Bool(true)))
.collect();
let bv = BitVector::from_bools(&bool_values);
let zone_map = compute_zone_map_bool(&bool_values);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::Bitmap(bv)));
t.record_len(bool_values.len());
}
InferredType::Dict => {
let str_values: Vec<String> = values
.iter()
.map(|v| match v {
Value::Null => String::new(),
Value::String(s) => s.to_string(),
other => format!("{other}"),
})
.collect();
let str_refs: Vec<&str> = str_values.iter().map(String::as_str).collect();
let mut dict_builder = DictionaryBuilder::new();
for s in &str_refs {
dict_builder.add(s);
}
let dict = dict_builder.build();
let zone_map = compute_zone_map_strings(&str_refs);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::Dict(dict)));
t.record_len(str_values.len());
}
}
}
t
});
}
type EdgeGroupKey = (ArcStr, ArcStr, ArcStr);
let mut edge_groups: FxHashMap<EdgeGroupKey, Vec<(u32, u32)>> = FxHashMap::default();
let mut edge_props_groups: FxHashMap<EdgeGroupKey, FxHashMap<PropertyKey, Vec<Value>>> =
FxHashMap::default();
for (_label_key, node_ids, _) in &label_data {
for &nid in node_ids {
let outgoing = store.edges_from(nid, crate::graph::Direction::Outgoing);
for (_target_nid, edge_id) in outgoing {
let Some(edge) = store.get_edge(edge_id) else {
continue;
};
let Some((src_label, src_offset)) = id_map.get(&edge.src) else {
continue;
};
let Some((dst_label, dst_offset)) = id_map.get(&edge.dst) else {
continue;
};
let group_key: EdgeGroupKey =
(edge.edge_type.clone(), src_label.clone(), dst_label.clone());
let edges_vec = edge_groups.entry(group_key.clone()).or_default();
let edge_idx = edges_vec.len();
edges_vec.push((*src_offset, *dst_offset));
if !edge.properties.is_empty() {
let props = edge_props_groups.entry(group_key).or_default();
for (key, value) in edge.properties.iter() {
let col = props
.entry(key.clone())
.or_insert_with(|| vec![Value::Null; edge_idx]);
while col.len() < edge_idx {
col.push(Value::Null);
}
col.push(value.clone());
}
let expected_len = edge_idx + 1;
for col in props.values_mut() {
while col.len() < expected_len {
col.push(Value::Null);
}
}
}
}
}
}
for ((edge_type, src_label, dst_label), edges) in &edge_groups {
let edge_props =
edge_props_groups.get(&(edge_type.clone(), src_label.clone(), dst_label.clone()));
builder = builder.rel_table(
edge_type.as_str(),
src_label.as_str(),
dst_label.as_str(),
|r| {
r.edges(edges.clone()).backward(true);
if let Some(props) = edge_props {
for (key, values) in props {
let inferred = infer_type_from_values(values);
match inferred {
InferredType::BitPacked => {
let u64_values: Vec<u64> = values
.iter()
.map(|v| match v {
#[allow(clippy::cast_sign_loss)]
Value::Int64(n) => *n as u64,
_ => 0,
})
.collect();
let bp = BitPackedInts::pack(&u64_values);
r.properties.push((key.clone(), ColumnCodec::BitPacked(bp)));
}
InferredType::RawI64 => {
let i64_values: Vec<i64> = values
.iter()
.map(|v| match v {
Value::Int64(n) => *n,
_ => 0,
})
.collect();
r.properties
.push((key.clone(), ColumnCodec::RawI64(i64_values)));
}
InferredType::Float64 => {
let f64_values: Vec<f64> = values
.iter()
.map(|v| match v {
Value::Float64(f) => *f,
Value::Int64(n) => *n as f64,
_ => 0.0,
})
.collect();
r.properties
.push((key.clone(), ColumnCodec::Float64(f64_values)));
}
InferredType::Float32Vector { dimensions } => {
let mut flat: Vec<f32> =
Vec::with_capacity(values.len() * dimensions as usize);
for v in values {
match v {
Value::Vector(vec) => flat.extend_from_slice(vec),
_ => flat.extend(std::iter::repeat_n(
0.0f32,
usize::from(dimensions),
)),
}
}
r.properties.push((
key.clone(),
ColumnCodec::Float32Vector {
data: flat,
dimensions,
},
));
}
InferredType::Bitmap => {
let bool_values: Vec<bool> = values
.iter()
.map(|v| matches!(v, Value::Bool(true)))
.collect();
let bv = BitVector::from_bools(&bool_values);
r.properties.push((key.clone(), ColumnCodec::Bitmap(bv)));
}
InferredType::Dict => {
let str_values: Vec<String> = values
.iter()
.map(|v| match v {
Value::Null => String::new(),
Value::String(s) => s.to_string(),
other => format!("{other}"),
})
.collect();
let mut dict_builder = DictionaryBuilder::new();
for s in &str_values {
dict_builder.add(s);
}
let dict = dict_builder.build();
r.properties.push((key.clone(), ColumnCodec::Dict(dict)));
}
}
}
}
r
},
);
}
builder.build()
}
pub fn from_graph_store_preserving_ids(
store: &dyn crate::graph::traits::GraphStore,
) -> Result<CompactStore, CompactStoreError> {
let mut compact = from_graph_store(store)?;
let labels = store.all_labels();
if labels.is_empty() {
compact.set_id_maps(
FxHashMap::default(),
FxHashMap::default(),
Vec::new(),
Vec::new(),
);
return Ok(compact);
}
let mut node_id_map: FxHashMap<grafeo_common::types::NodeId, (u16, u64)> = FxHashMap::default();
let num_tables = compact.node_tables_by_id.len();
let mut node_offset_to_id: Vec<Vec<grafeo_common::types::NodeId>> =
vec![Vec::new(); num_tables];
let mut seen: FxHashSet<grafeo_common::types::NodeId> = FxHashSet::default();
let mut label_key_offsets: FxHashMap<ArcStr, u32> = FxHashMap::default();
for label in &labels {
let node_ids = store.nodes_by_label(label);
for &nid in &node_ids {
if !seen.insert(nid) {
continue;
}
let Some(node) = store.get_node(nid) else {
continue;
};
let label_key: ArcStr = if node.labels.len() <= 1 {
ArcStr::from(label.as_str())
} else {
let mut sorted: Vec<&str> = node.labels.iter().map(|l| l.as_str()).collect();
sorted.sort_unstable();
ArcStr::from(sorted.join("|"))
};
let offset = label_key_offsets.entry(label_key.clone()).or_insert(0);
let current_offset = *offset;
*offset += 1;
if let Some(&table_id) = compact.label_to_table_id.get(&label_key) {
node_id_map.insert(nid, (table_id, u64::from(current_offset)));
if let Some(rev) = node_offset_to_id.get_mut(table_id as usize) {
while rev.len() <= current_offset as usize {
rev.push(grafeo_common::types::NodeId::INVALID);
}
rev[current_offset as usize] = nid;
}
}
}
}
type RelKey = (ArcStr, u16, u16);
let mut rel_key_to_id: FxHashMap<RelKey, u16> = FxHashMap::default();
for (idx, rt) in compact.rel_tables_by_id.iter().enumerate() {
let key = (rt.edge_type().clone(), rt.src_table_id(), rt.dst_table_id());
let Ok(rel_id) = u16::try_from(idx) else {
continue;
};
rel_key_to_id.insert(key, rel_id);
}
type EdgeGroupEntry = (grafeo_common::types::EdgeId, u32, u32); let mut edge_groups: FxHashMap<RelKey, Vec<EdgeGroupEntry>> = FxHashMap::default();
let mut seen_edges: FxHashSet<grafeo_common::types::EdgeId> = FxHashSet::default();
for &nid in node_id_map.keys() {
let outgoing = store.edges_from(nid, crate::graph::Direction::Outgoing);
for (_target_nid, edge_id) in outgoing {
if !seen_edges.insert(edge_id) {
continue;
}
let Some(edge) = store.get_edge(edge_id) else {
continue;
};
let Some(&(src_tid, src_off)) = node_id_map.get(&edge.src) else {
continue;
};
let Some(&(dst_tid, dst_off)) = node_id_map.get(&edge.dst) else {
continue;
};
let key: RelKey = (edge.edge_type.clone(), src_tid, dst_tid);
edge_groups.entry(key).or_default().push((
edge_id,
u32::try_from(src_off).unwrap_or(0),
u32::try_from(dst_off).unwrap_or(0),
));
}
}
let num_rel_tables = compact.rel_tables_by_id.len();
let mut edge_id_map: FxHashMap<grafeo_common::types::EdgeId, (u16, u64)> = FxHashMap::default();
let mut edge_offset_to_id: Vec<Vec<grafeo_common::types::EdgeId>> =
vec![Vec::new(); num_rel_tables];
for (key, mut entries) in edge_groups {
let Some(&rel_table_id) = rel_key_to_id.get(&key) else {
continue;
};
entries.sort_by_key(|&(_, src, dst)| (src, dst));
let rev = &mut edge_offset_to_id[rel_table_id as usize];
for (csr_pos, (original_eid, _src, _dst)) in entries.iter().enumerate() {
edge_id_map.insert(*original_eid, (rel_table_id, csr_pos as u64));
while rev.len() <= csr_pos {
rev.push(grafeo_common::types::EdgeId::INVALID);
}
rev[csr_pos] = *original_eid;
}
}
compact.set_id_maps(
node_id_map,
edge_id_map,
node_offset_to_id,
edge_offset_to_id,
);
Ok(compact)
}
fn infer_type_from_values(values: &[Value]) -> InferredType {
let mut saw_unsigned_int = false; let mut saw_signed_int = false; let mut saw_float = false;
let mut saw_bool = false;
let mut saw_vector = false;
let mut saw_other = false;
let mut vector_dims: Option<u16> = None;
for v in values {
match v {
Value::Null => {} Value::Int64(n) if *n >= 0 => saw_unsigned_int = true,
Value::Int64(_) => saw_signed_int = true,
Value::Float64(_) => saw_float = true,
Value::Bool(_) => saw_bool = true,
Value::Vector(vec) => {
saw_vector = true;
let Ok(dims) = u16::try_from(vec.len()) else {
saw_other = true; continue;
};
if let Some(prev) = vector_dims {
if prev != dims {
saw_other = true; }
} else {
vector_dims = Some(dims);
}
}
_ => saw_other = true,
}
}
let saw_any_int = saw_unsigned_int || saw_signed_int;
if saw_vector
&& !saw_other
&& !saw_any_int
&& !saw_float
&& !saw_bool
&& let Some(dims) = vector_dims
&& dims > 0
{
return InferredType::Float32Vector { dimensions: dims };
}
if saw_other || saw_vector || (saw_any_int && saw_bool) || (saw_float && saw_bool) {
InferredType::Dict
} else if saw_float {
InferredType::Float64
} else if saw_signed_int {
InferredType::RawI64
} else if saw_unsigned_int {
InferredType::BitPacked
} else if saw_bool {
InferredType::Bitmap
} else {
InferredType::Dict
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::traits::GraphStore;
#[test]
fn test_builder_basic() {
let store = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("age", &[25, 30, 35, 40, 45], 6)
.column_dict("name", &["Alix", "Gus", "Vincent", "Jules", "Mia"])
})
.build()
.unwrap();
let ids = store.nodes_by_label("Person");
assert_eq!(ids.len(), 5);
}
#[test]
fn test_builder_with_edges() {
let store = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("val", &[1, 2, 3], 4))
.node_table("B", |t| t.column_bitpacked("val", &[10, 20], 8))
.rel_table("LINKS", "A", "B", |r| {
r.edges([(0, 0), (0, 1), (1, 0), (2, 1)]).backward(true)
})
.build()
.unwrap();
let a_ids = store.nodes_by_label("A");
assert_eq!(a_ids.len(), 3);
let b_ids = store.nodes_by_label("B");
assert_eq!(b_ids.len(), 2);
}
#[test]
fn test_builder_label_not_found() {
let result = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("val", &[1], 4))
.rel_table("LINKS", "A", "B", |r| {
r.edges([(0, 0)])
})
.build();
assert!(result.is_err());
}
#[test]
fn test_from_graph_store_round_trip() {
let original = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("age", &[25, 30, 35], 6)
.column_dict("name", &["Alix", "Gus", "Vincent"])
.column_bitmap("active", &[true, false, true])
})
.node_table("City", |t| t.column_dict("name", &["Amsterdam", "Berlin"]))
.rel_table("LIVES_IN", "Person", "City", |r| {
r.edges([(0, 0), (1, 1), (2, 0)]).backward(true)
})
.build()
.unwrap();
let converted = from_graph_store(&original).unwrap();
assert_eq!(converted.nodes_by_label("Person").len(), 3);
assert_eq!(converted.nodes_by_label("City").len(), 2);
let person_ids = converted.nodes_by_label("Person");
let mut ages: Vec<i64> = person_ids
.iter()
.filter_map(|&id| {
converted
.get_node_property(id, &PropertyKey::new("age"))
.and_then(|v| v.as_int64())
})
.collect();
ages.sort_unstable();
assert_eq!(ages, vec![25, 30, 35]);
let city_ids = converted.nodes_by_label("City");
let mut total_edges = 0;
for &pid in &person_ids {
let edges = converted.edges_from(pid, crate::graph::Direction::Outgoing);
total_edges += edges.len();
}
assert_eq!(total_edges, 3);
for &cid in &city_ids {
let incoming = converted.edges_from(cid, crate::graph::Direction::Incoming);
assert!(!incoming.is_empty());
}
}
#[test]
fn test_from_graph_store_empty() {
let empty = CompactStoreBuilder::new().build().unwrap();
let converted = from_graph_store(&empty).unwrap();
assert_eq!(converted.nodes_by_label("Anything").len(), 0);
}
#[test]
fn test_from_graph_store_with_lpg_store() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let alix_id = store.create_node(&["Person"]);
store.set_node_property(alix_id, "name", Value::from("Alix"));
store.set_node_property(alix_id, "age", Value::Int64(30));
let gus_id = store.create_node(&["Person"]);
store.set_node_property(gus_id, "name", Value::from("Gus"));
store.set_node_property(gus_id, "age", Value::Int64(25));
let amsterdam_id = store.create_node(&["City"]);
store.set_node_property(amsterdam_id, "name", Value::from("Amsterdam"));
store.create_edge(alix_id, amsterdam_id, "LIVES_IN");
store.create_edge(gus_id, amsterdam_id, "LIVES_IN");
let compact = from_graph_store(&store).unwrap();
assert_eq!(compact.nodes_by_label("Person").len(), 2);
assert_eq!(compact.nodes_by_label("City").len(), 1);
let person_ids = compact.nodes_by_label("Person");
let mut names: Vec<String> = person_ids
.iter()
.filter_map(|&id| {
compact
.get_node_property(id, &PropertyKey::new("name"))
.and_then(|v| v.as_str().map(|s| s.to_string()))
})
.collect();
names.sort();
assert_eq!(names, vec!["Alix", "Gus"]);
let mut total_outgoing = 0;
for &pid in &person_ids {
let edges = compact.edges_from(pid, crate::graph::Direction::Outgoing);
total_outgoing += edges.len();
}
assert_eq!(total_outgoing, 2);
let city_ids = compact.nodes_by_label("City");
assert_eq!(city_ids.len(), 1);
let incoming = compact.edges_from(city_ids[0], crate::graph::Direction::Incoming);
assert_eq!(incoming.len(), 2);
}
#[test]
fn test_from_graph_store_edge_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let alix = store.create_node(&["Person"]);
store.set_node_property(alix, "name", Value::from("Alix"));
let gus = store.create_node(&["Person"]);
store.set_node_property(gus, "name", Value::from("Gus"));
let e1 = store.create_edge(alix, gus, "KNOWS");
store.set_edge_property(e1, "since", Value::Int64(2020));
let e2 = store.create_edge(gus, alix, "KNOWS");
store.set_edge_property(e2, "since", Value::Int64(2021));
let compact = from_graph_store(&store).unwrap();
let person_ids = compact.nodes_by_label("Person");
let mut total_edges = 0;
for &pid in &person_ids {
total_edges += compact
.edges_from(pid, crate::graph::Direction::Outgoing)
.len();
}
assert_eq!(total_edges, 2);
for &pid in &person_ids {
let edges = compact.edges_from(pid, crate::graph::Direction::Outgoing);
for (_target, eid) in &edges {
let edge = compact.get_edge(*eid).unwrap();
let since = edge.properties.get(&PropertyKey::new("since")).unwrap();
match since {
Value::Int64(v) => assert!(*v == 2020 || *v == 2021),
_ => panic!("expected Int64 for 'since', got {since:?}"),
}
}
}
}
#[test]
fn test_from_graph_store_edge_bool_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Node"]);
let b = store.create_node(&["Node"]);
let e = store.create_edge(a, b, "LINK");
store.set_edge_property(e, "active", Value::Bool(true));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Node");
let edges = compact.edges_from(ids[0], crate::graph::Direction::Outgoing);
assert_eq!(edges.len(), 1);
let edge = compact.get_edge(edges[0].1).unwrap();
assert_eq!(
edge.properties.get(&PropertyKey::new("active")),
Some(&Value::Bool(true))
);
}
#[test]
fn test_from_graph_store_edge_string_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Node"]);
let b = store.create_node(&["Node"]);
let e = store.create_edge(a, b, "LINK");
store.set_edge_property(e, "label", Value::from("primary"));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Node");
let edges = compact.edges_from(ids[0], crate::graph::Direction::Outgoing);
let edge = compact.get_edge(edges[0].1).unwrap();
assert_eq!(
edge.properties.get(&PropertyKey::new("label")),
Some(&Value::String(ArcStr::from("primary")))
);
}
#[test]
fn test_from_graph_store_negative_int_preserves_int64_type() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "temp", Value::Int64(-10));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "temp", Value::Int64(5));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 2);
let mut temps: Vec<i64> = ids
.iter()
.filter_map(
|&id| match compact.get_node_property(id, &PropertyKey::new("temp")) {
Some(Value::Int64(n)) => Some(n),
_ => None,
},
)
.collect();
temps.sort_unstable();
assert_eq!(temps, vec![-10, 5]);
}
#[test]
fn test_from_graph_store_float64_column() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Sensor"]);
store.set_node_property(a, "reading", Value::Float64(98.6));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Sensor");
assert_eq!(ids.len(), 1);
let val = compact
.get_node_property(ids[0], &PropertyKey::new("reading"))
.unwrap();
assert_eq!(val, Value::Float64(98.6));
}
#[test]
fn test_from_graph_store_mixed_types_fall_back_to_dict() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Thing"]);
store.set_node_property(a, "value", Value::Int64(42));
let b = store.create_node(&["Thing"]);
store.set_node_property(b, "value", Value::Bool(true));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Thing");
assert_eq!(ids.len(), 2);
for &id in &ids {
let val = compact
.get_node_property(id, &PropertyKey::new("value"))
.unwrap();
assert!(
matches!(val, Value::String(_)),
"expected String (Dict fallback), got {val:?}"
);
}
}
#[test]
fn test_from_graph_store_sparse_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "name", Value::from("alpha"));
store.set_node_property(a, "score", Value::Int64(10));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "name", Value::from("beta"));
let c = store.create_node(&["Item"]);
store.set_node_property(c, "score", Value::Int64(20));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 3);
let mut name_count = 0;
let mut score_count = 0;
for &id in &ids {
if let Some(Value::String(s)) = compact.get_node_property(id, &PropertyKey::new("name"))
&& !s.is_empty()
{
name_count += 1;
}
if let Some(Value::Int64(n)) = compact.get_node_property(id, &PropertyKey::new("score"))
&& n > 0
{
score_count += 1;
}
}
assert_eq!(name_count, 2);
assert_eq!(score_count, 2);
}
#[test]
fn test_from_graph_store_multi_label_nodes() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Person", "Actor"]);
store.set_node_property(a, "name", Value::from("Vincent"));
let b = store.create_node(&["Person"]);
store.set_node_property(b, "name", Value::from("Jules"));
let compact = from_graph_store(&store).unwrap();
let person_ids = compact.nodes_by_label("Person");
assert_eq!(person_ids.len(), 1);
let compound_ids = compact.nodes_by_label("Actor|Person");
assert_eq!(compound_ids.len(), 1);
let val = compact
.get_node_property(compound_ids[0], &PropertyKey::new("name"))
.unwrap();
assert_eq!(val, Value::String(ArcStr::from("Vincent")));
}
#[test]
fn test_from_graph_store_all_null_column() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "x", Value::Int64(1));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "y", Value::Int64(2));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 2);
}
#[test]
fn test_infer_type_all_nulls() {
assert_eq!(
infer_type_from_values(&[Value::Null, Value::Null]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_int_only() {
assert_eq!(
infer_type_from_values(&[Value::Int64(5), Value::Int64(10)]),
InferredType::BitPacked
);
}
#[test]
fn test_infer_type_bool_only() {
assert_eq!(
infer_type_from_values(&[Value::Bool(true), Value::Bool(false)]),
InferredType::Bitmap
);
}
#[test]
fn test_infer_type_mixed_int_bool() {
assert_eq!(
infer_type_from_values(&[Value::Int64(1), Value::Bool(true)]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_negative_int() {
assert_eq!(
infer_type_from_values(&[Value::Int64(-5), Value::Int64(10)]),
InferredType::RawI64
);
assert_eq!(
infer_type_from_values(&[Value::Int64(-5)]),
InferredType::RawI64
);
assert_eq!(
infer_type_from_values(&[Value::Int64(5), Value::Int64(10)]),
InferredType::BitPacked
);
}
#[test]
fn test_infer_type_float() {
assert_eq!(
infer_type_from_values(&[Value::Float64(1.5)]),
InferredType::Float64
);
}
#[test]
fn test_infer_type_mixed_int_float_coalesces_to_float() {
assert_eq!(
infer_type_from_values(&[Value::Int64(1), Value::Float64(2.5)]),
InferredType::Float64
);
}
#[test]
fn test_infer_type_int_with_nulls() {
assert_eq!(
infer_type_from_values(&[Value::Int64(5), Value::Null, Value::Int64(10)]),
InferredType::BitPacked
);
}
#[test]
fn test_from_graph_store_multi_label_edge_type() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let m1 = store.create_node(&["Method"]);
store.set_node_property(m1, "name", Value::from("foo"));
let m2 = store.create_node(&["Method"]);
store.set_node_property(m2, "name", Value::from("bar"));
let c1 = store.create_node(&["Class"]);
store.set_node_property(c1, "name", Value::from("MyClass"));
let i1 = store.create_node(&["Interface"]);
store.set_node_property(i1, "name", Value::from("MyInterface"));
store.create_edge(m1, m2, "CALLS"); store.create_edge(c1, m1, "CALLS"); store.create_edge(m1, c1, "USES_TYPE"); store.create_edge(m1, i1, "USES_TYPE");
let compact = from_graph_store(&store).unwrap();
assert_eq!(compact.nodes_by_label("Method").len(), 2);
assert_eq!(compact.nodes_by_label("Class").len(), 1);
assert_eq!(compact.nodes_by_label("Interface").len(), 1);
let calls_tables = compact.rel_tables_for_type("CALLS");
let uses_tables = compact.rel_tables_for_type("USES_TYPE");
assert_eq!(
calls_tables.len(),
2,
"CALLS should have 2 rel tables (different label pairs)"
);
assert_eq!(
uses_tables.len(),
2,
"USES_TYPE should have 2 rel tables (different label pairs)"
);
let total_calls: usize = calls_tables.iter().map(|rt| rt.num_edges()).sum();
assert_eq!(total_calls, 2, "Should have 2 CALLS edges total");
let total_uses: usize = uses_tables.iter().map(|rt| rt.num_edges()).sum();
assert_eq!(total_uses, 2, "Should have 2 USES_TYPE edges total");
use crate::graph::traits::GraphStore;
let mut edge_types = compact.all_edge_types();
edge_types.sort();
assert_eq!(
edge_types,
vec!["CALLS", "USES_TYPE"],
"all_edge_types should return each type once, not per rel table"
);
let avg_out = compact.estimate_avg_degree("USES_TYPE", true);
assert!(avg_out > 0.0, "USES_TYPE outgoing degree should be > 0");
assert!(
(avg_out - 1.0).abs() < f64::EPSILON,
"USES_TYPE avg outgoing degree should be 1.0 (2 edges / 2 Method nodes), got {avg_out}"
);
let unknown = compact.estimate_avg_degree("NONEXISTENT", true);
assert!(
(unknown - 0.0).abs() < f64::EPSILON,
"Unknown edge type should return 0.0 avg degree"
);
}
#[test]
fn test_zone_map_u64_values_exceeding_i64_max() {
let values = vec![0u64, i64::MAX as u64 + 1, u64::MAX];
let zm = compute_zone_map_u64(&values);
assert!(
zm.min.is_none(),
"min should be None when values overflow i64"
);
assert!(
zm.max.is_none(),
"max should be None when values overflow i64"
);
assert_eq!(zm.row_count, 3);
}
#[test]
fn test_zone_map_u64_within_i64_range() {
let values = vec![10u64, 20, 30];
let zm = compute_zone_map_u64(&values);
assert_eq!(zm.min, Some(Value::Int64(10)));
assert_eq!(zm.max, Some(Value::Int64(30)));
assert_eq!(zm.null_count, 0);
assert_eq!(zm.row_count, 3);
}
#[test]
fn test_zone_map_u64_empty_slice() {
let zm = compute_zone_map_u64(&[]);
assert!(zm.min.is_none());
assert!(zm.max.is_none());
assert_eq!(zm.row_count, 0);
}
#[test]
fn test_zone_map_strings_empty_slice() {
let zm = compute_zone_map_strings(&[]);
assert!(zm.min.is_none());
assert!(zm.max.is_none());
assert_eq!(zm.row_count, 0);
}
#[test]
fn test_zone_map_strings_sorted() {
let values = &["Paris", "Amsterdam", "Berlin"];
let zm = compute_zone_map_strings(values);
assert_eq!(zm.min, Some(Value::from("Amsterdam")));
assert_eq!(zm.max, Some(Value::from("Paris")));
assert_eq!(zm.row_count, 3);
}
#[test]
fn test_zone_map_bool_all_true() {
let values = &[true, true, true];
let zm = compute_zone_map_bool(values);
assert_eq!(zm.min, Some(Value::Bool(true)));
assert_eq!(zm.max, Some(Value::Bool(true)));
assert_eq!(zm.row_count, 3);
}
#[test]
fn test_zone_map_bool_all_false() {
let values = &[false, false];
let zm = compute_zone_map_bool(values);
assert_eq!(zm.min, Some(Value::Bool(false)));
assert_eq!(zm.max, Some(Value::Bool(false)));
assert_eq!(zm.row_count, 2);
}
#[test]
fn test_zone_map_bool_mixed() {
let values = &[false, true, false];
let zm = compute_zone_map_bool(values);
assert_eq!(zm.min, Some(Value::Bool(false)));
assert_eq!(zm.max, Some(Value::Bool(true)));
assert_eq!(zm.row_count, 3);
}
#[test]
fn test_zone_map_bool_empty() {
let zm = compute_zone_map_bool(&[]);
assert!(zm.min.is_none());
assert!(zm.max.is_none());
assert_eq!(zm.row_count, 0);
}
#[test]
fn test_infer_type_string_values() {
assert_eq!(
infer_type_from_values(&[Value::from("Alix"), Value::from("Gus")]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_int_and_null() {
assert_eq!(
infer_type_from_values(&[Value::Int64(0), Value::Null, Value::Int64(5)]),
InferredType::BitPacked
);
}
#[test]
fn test_infer_type_bool_and_null() {
assert_eq!(
infer_type_from_values(&[Value::Bool(true), Value::Null]),
InferredType::Bitmap
);
}
#[test]
fn test_infer_type_empty_values() {
assert_eq!(infer_type_from_values(&[]), InferredType::Dict);
}
#[test]
fn test_from_graph_store_nodes_with_no_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
store.create_node(&["Marker"]);
store.create_node(&["Marker"]);
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Marker");
assert_eq!(ids.len(), 2);
}
#[test]
fn test_from_graph_store_mixed_vector_dims() {
use crate::graph::lpg::LpgStore;
use std::sync::Arc;
let store = LpgStore::new().unwrap();
let alix = store.create_node(&["Doc"]);
let short: Arc<[f32]> = Arc::from([0.1f32, 0.2, 0.3].as_slice());
store.set_node_property(alix, "embedding", Value::Vector(short));
let gus = store.create_node(&["Doc"]);
let long: Arc<[f32]> = Arc::from([0.4f32, 0.5, 0.6, 0.7, 0.8].as_slice());
store.set_node_property(gus, "embedding", Value::Vector(long));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Doc");
assert_eq!(ids.len(), 2);
let mut seen_vec_strings = 0usize;
for &id in &ids {
let val = compact
.get_node_property(id, &PropertyKey::new("embedding"))
.expect("embedding property missing");
match val {
Value::String(s) => {
let lower = s.to_lowercase();
assert!(
lower.contains("vector"),
"dict-encoded vector should include the vector tag: {s}"
);
seen_vec_strings += 1;
}
other => panic!("expected Dict fallback (String), got {other:?}"),
}
}
assert_eq!(seen_vec_strings, 2);
}
#[test]
fn test_from_graph_store_float32_vector() {
use crate::graph::lpg::LpgStore;
use std::sync::Arc;
let store = LpgStore::new().unwrap();
let expected: [f32; 4] = [0.1, 0.2, 0.3, 0.4];
for name in ["Alix", "Gus", "Vincent"] {
let id = store.create_node(&["Doc"]);
store.set_node_property(id, "name", Value::from(name));
let emb: Arc<[f32]> = Arc::from(expected.as_slice());
store.set_node_property(id, "embedding", Value::Vector(emb));
}
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Doc");
assert_eq!(ids.len(), 3);
for &id in &ids {
let v = compact
.get_node_property(id, &PropertyKey::new("embedding"))
.expect("embedding missing");
match v {
Value::Vector(data) => {
assert_eq!(&*data, &expected, "unexpected vector contents");
}
other => panic!("expected Value::Vector, got {other:?}"),
}
}
}
#[test]
fn test_from_graph_store_all_null() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let mut ids = Vec::new();
for i in 0..20 {
let nid = store.create_node(&["Item"]);
if i == 3 || i == 17 {
store.set_node_property(nid, "flag", Value::Int64(i64::from(i)));
}
ids.push(nid);
}
let compact = from_graph_store(&store).unwrap();
let rows = compact.nodes_by_label("Item");
assert_eq!(rows.len(), 20);
let mut nonzero = 0usize;
for &id in &rows {
if let Some(Value::Int64(v)) = compact.get_node_property(id, &PropertyKey::new("flag"))
&& v > 0
{
nonzero += 1;
}
}
assert_eq!(
nonzero, 2,
"only 2 nodes had a real 'flag' value; the rest should be zero-padded"
);
}
#[test]
fn test_compute_zone_map_i64_boundary() {
let at_boundary = vec![0u64, 100, i64::MAX as u64];
let zm = compute_zone_map_u64(&at_boundary);
assert_eq!(zm.min, Some(Value::Int64(0)));
assert_eq!(zm.max, Some(Value::Int64(i64::MAX)));
assert_eq!(zm.row_count, 3);
let above_boundary = vec![0u64, i64::MAX as u64 + 1];
let zm = compute_zone_map_u64(&above_boundary);
assert!(zm.min.is_none());
assert!(zm.max.is_none());
assert_eq!(zm.row_count, 2);
}
#[test]
fn test_compute_zone_map_bool() {
let mostly_false = vec![false; 9]
.into_iter()
.chain(std::iter::once(true))
.collect::<Vec<_>>();
let zm = compute_zone_map_bool(&mostly_false);
assert_eq!(zm.min, Some(Value::Bool(false)));
assert_eq!(zm.max, Some(Value::Bool(true)));
assert_eq!(zm.row_count, 10);
let mostly_true = std::iter::once(false)
.chain(std::iter::repeat_n(true, 9))
.collect::<Vec<_>>();
let zm = compute_zone_map_bool(&mostly_true);
assert_eq!(zm.min, Some(Value::Bool(false)));
assert_eq!(zm.max, Some(Value::Bool(true)));
assert_eq!(zm.row_count, 10);
}
#[test]
fn test_from_graph_store_multi_label_sorted_key() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Zebra", "Alpha"]);
store.set_node_property(a, "name", Value::from("Butch"));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Alpha|Zebra");
assert_eq!(ids.len(), 1);
let val = compact
.get_node_property(ids[0], &PropertyKey::new("name"))
.unwrap();
assert_eq!(val, Value::String(ArcStr::from("Butch")));
}
#[test]
fn test_infer_type_timestamp_falls_back_to_dict() {
use grafeo_common::types::Timestamp;
let ts = Value::Timestamp(Timestamp::from_millis(1_700_000_000_000));
assert_eq!(infer_type_from_values(&[ts]), InferredType::Dict);
}
#[test]
fn test_infer_type_date_falls_back_to_dict() {
use grafeo_common::types::Date;
let d = Value::Date(Date::from_days(19000));
assert_eq!(infer_type_from_values(&[d]), InferredType::Dict);
}
#[test]
fn test_infer_type_vector_consistent_dim_picks_float32_vector() {
let v = Value::Vector(std::sync::Arc::from([0.1f32, 0.2, 0.3].as_slice()));
assert_eq!(
infer_type_from_values(&[v]),
InferredType::Float32Vector { dimensions: 3 }
);
}
#[test]
fn test_infer_type_mixed_dim_vectors_fall_back_to_dict() {
let v3 = Value::Vector(std::sync::Arc::from([0.1f32, 0.2, 0.3].as_slice()));
let v5 = Value::Vector(std::sync::Arc::from(
[0.1f32, 0.2, 0.3, 0.4, 0.5].as_slice(),
));
assert_eq!(infer_type_from_values(&[v3, v5]), InferredType::Dict);
}
#[test]
fn test_infer_type_bytes_falls_back_to_dict() {
let b = Value::Bytes(std::sync::Arc::from(b"payload".as_slice()));
assert_eq!(infer_type_from_values(&[b]), InferredType::Dict);
}
#[test]
fn test_infer_type_null_with_bool_yields_bitmap() {
assert_eq!(
infer_type_from_values(&[Value::Null, Value::Bool(false), Value::Null]),
InferredType::Bitmap
);
}
#[test]
fn test_infer_type_saw_int_and_other_yields_dict() {
assert_eq!(
infer_type_from_values(&[Value::Int64(1), Value::from("hello")]),
InferredType::Dict
);
}
#[test]
fn test_builder_duplicate_label_error() {
let result = CompactStoreBuilder::new()
.node_table("Person", |t| t.column_bitpacked("age", &[30], 6))
.node_table("Person", |t| t.column_bitpacked("age", &[40], 6))
.build();
assert!(matches!(
result,
Err(CompactStoreError::DuplicateLabel(ref s)) if s == "Person"
));
}
#[test]
fn test_builder_duplicate_edge_type_error() {
let result = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("v", &[1], 4))
.node_table("B", |t| t.column_bitpacked("v", &[1], 4))
.rel_table("LINKS", "A", "B", |r| r.edges([(0, 0)]))
.rel_table("LINKS", "A", "B", |r| r.edges([(0, 0)]))
.build();
assert!(matches!(
result,
Err(CompactStoreError::DuplicateEdgeType(_))
));
}
#[test]
fn test_builder_column_length_mismatch_error() {
let result = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("age", &[25, 30, 35], 6)
.column_dict("name", &["Alix", "Gus"]) })
.build();
assert!(matches!(
result,
Err(CompactStoreError::ColumnLengthMismatch {
expected: 3,
got: 2
})
));
}
#[test]
fn test_builder_value_overflow_error() {
let bad_value = (i64::MAX as u64) + 10;
let result = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("x", &[1u64, bad_value], 64)
})
.build();
assert!(matches!(
result,
Err(CompactStoreError::ValueOverflow { ref column, value, .. })
if column == "x" && value == bad_value
));
}
#[test]
fn test_node_table_builder_prebuilt_column() {
use crate::codec::BitPackedInts;
let bp = BitPackedInts::pack(&[100u64, 200, 300]);
let codec = ColumnCodec::BitPacked(bp);
let store = CompactStoreBuilder::new()
.node_table("Item", |t| t.column("value", codec))
.build()
.unwrap();
let ids = store.nodes_by_label("Item");
assert_eq!(ids.len(), 3);
let mut values: Vec<i64> = ids
.iter()
.filter_map(|&id| {
store
.get_node_property(id, &PropertyKey::new("value"))
.and_then(|v| v.as_int64())
})
.collect();
values.sort_unstable();
assert_eq!(values, vec![100, 200, 300]);
}
#[test]
fn test_node_table_builder_int8_vector_zero_dimensions() {
let store = CompactStoreBuilder::new()
.node_table("Item", |t| t.column_int8_vector("embed", Vec::new(), 0))
.build()
.unwrap();
let ids = store.nodes_by_label("Item");
assert_eq!(ids.len(), 0);
}
#[test]
fn test_node_table_builder_int8_vector_multi_row() {
let store = CompactStoreBuilder::new()
.node_table("Doc", |t| {
t.column_int8_vector("embed", vec![1i8, 2, 3, 4, 5, 6], 3)
})
.build()
.unwrap();
let ids = store.nodes_by_label("Doc");
assert_eq!(ids.len(), 2);
}
#[test]
fn test_from_graph_store_preserving_ids_empty() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let compact = crate::graph::compact::from_graph_store_preserving_ids(&store).unwrap();
assert_eq!(compact.node_count(), 0);
assert_eq!(compact.edge_count(), 0);
}
#[test]
fn test_from_graph_store_single_node_no_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
store.create_node(&["Loner"]);
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Loner");
assert_eq!(ids.len(), 1);
}
#[test]
fn test_from_graph_store_preserving_ids_with_data() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let alix = store.create_node(&["Person"]);
store.set_node_property(alix, "name", Value::from("Alix"));
let gus = store.create_node(&["Person"]);
store.set_node_property(gus, "name", Value::from("Gus"));
let edge_id = store.create_edge(alix, gus, "KNOWS");
store.set_edge_property(edge_id, "since", Value::Int64(2020));
let compact = crate::graph::compact::from_graph_store_preserving_ids(&store).unwrap();
let alix_resolved = compact.get_node(alix);
assert!(
alix_resolved.is_some(),
"original NodeId should remain resolvable after preserve_ids"
);
assert_eq!(
alix_resolved
.unwrap()
.properties
.get(&PropertyKey::new("name")),
Some(&Value::String(ArcStr::from("Alix")))
);
let edge_resolved = compact.get_edge(edge_id);
assert!(
edge_resolved.is_some(),
"original EdgeId should remain resolvable after preserve_ids"
);
}
#[test]
fn test_from_graph_store_skewed_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
for (i, (name, key)) in [
("Alix", "a"),
("Gus", "b"),
("Vincent", "c"),
("Jules", "a"),
("Mia", "b"),
]
.iter()
.enumerate()
{
let nid = store.create_node(&["Person"]);
store.set_node_property(nid, "name", Value::from(*name));
let score = i64::try_from(i).unwrap_or(0);
store.set_node_property(nid, key, Value::Int64(score));
}
let compact = from_graph_store(&store).unwrap();
assert_eq!(compact.nodes_by_label("Person").len(), 5);
}
#[test]
#[should_panic(expected = "is not a multiple of dimensions")]
fn test_node_table_column_int8_vector_not_multiple_panics() {
let _ = CompactStoreBuilder::new().node_table("Bad", |t| {
t.column_int8_vector("vec", vec![1, 2, 3, 4, 5], 2)
});
}
#[test]
fn test_node_table_column_bitmap() {
let store = CompactStoreBuilder::new()
.node_table("Flag", |t| {
t.column_bitmap("active", &[true, false, true, false])
})
.build()
.unwrap();
let ids = store.nodes_by_label("Flag");
assert_eq!(ids.len(), 4);
let v0 = store
.get_node_property(ids[0], &PropertyKey::new("active"))
.unwrap();
assert_eq!(v0, Value::Bool(true));
}
#[test]
fn test_builder_same_edge_type_different_labels_allowed() {
let result = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("v", &[1], 4))
.node_table("B", |t| t.column_bitpacked("v", &[1], 4))
.node_table("C", |t| t.column_bitpacked("v", &[1], 4))
.rel_table("LINKS", "A", "B", |r| r.edges([(0, 0)]))
.rel_table("LINKS", "A", "C", |r| r.edges([(0, 0)]))
.build();
assert!(result.is_ok());
}
#[test]
fn test_builder_error_types_trait_impls() {
let err = CompactStoreError::LabelNotFound("Paris".to_string());
let cloned = err.clone();
assert!(format!("{cloned}").contains("Paris"));
assert!(format!("{cloned:?}").contains("LabelNotFound"));
let mismatch = CompactStoreError::ColumnLengthMismatch {
expected: 10,
got: 5,
};
assert!(format!("{mismatch}").contains("10"));
assert!(format!("{mismatch}").contains('5'));
let dup_label = CompactStoreError::DuplicateLabel("Berlin".to_string());
assert!(format!("{dup_label}").contains("Berlin"));
let dup_edge = CompactStoreError::DuplicateEdgeType("KNOWS".to_string());
assert!(format!("{dup_edge}").contains("KNOWS"));
let inconsistent = CompactStoreError::InconsistentEdgeData("boom".to_string());
assert!(format!("{inconsistent}").contains("boom"));
let overflow = CompactStoreError::ValueOverflow {
column: "age".to_string(),
value: u64::MAX,
max: i64::MAX as u64,
};
assert!(format!("{overflow}").contains("age"));
let table_overflow = CompactStoreError::TableCountOverflow {
kind: "node",
count: 99_999,
max: MAX_TABLE_ID,
};
assert!(format!("{table_overflow}").contains("node"));
assert!(format!("{table_overflow}").contains("99999"));
}
#[test]
fn test_rel_table_column_bitpacked() {
let store = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("v", &[1, 2], 4))
.node_table("B", |t| t.column_bitpacked("v", &[3, 4], 4))
.rel_table("LINKS", "A", "B", |r| {
r.edges([(0, 0), (1, 1)])
.backward(true)
.column_bitpacked("weight", &[100, 200], 8)
})
.build()
.unwrap();
let a_ids = store.nodes_by_label("A");
assert_eq!(a_ids.len(), 2);
let mut total_edges = 0;
for &id in &a_ids {
total_edges += store
.edges_from(id, crate::graph::Direction::Outgoing)
.len();
}
assert_eq!(total_edges, 2);
}
#[test]
fn test_from_graph_store_preserving_ids_multi_label() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let butch = store.create_node(&["Person", "Boxer"]);
store.set_node_property(butch, "name", Value::from("Butch"));
let compact = from_graph_store_preserving_ids(&store).unwrap();
assert!(compact.preserves_ids());
let name = compact
.get_node_property(butch, &PropertyKey::new("name"))
.and_then(|v| v.as_str().map(str::to_string));
assert_eq!(name.as_deref(), Some("Butch"));
}
#[test]
fn test_from_graph_store_preserving_ids_edges_sorted_by_csr_order() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Node"]);
let b = store.create_node(&["Node"]);
let c = store.create_node(&["Node"]);
let e_c_a = store.create_edge(c, a, "LINK");
let e_a_b = store.create_edge(a, b, "LINK");
let e_b_c = store.create_edge(b, c, "LINK");
let compact = from_graph_store_preserving_ids(&store).unwrap();
for eid in [e_c_a, e_a_b, e_b_c] {
let rec = compact.get_edge(eid).unwrap();
assert_eq!(rec.edge_type.as_str(), "LINK");
}
}
}