use arcstr::ArcStr;
use grafeo_common::types::{PropertyKey, Value};
use grafeo_common::utils::hash::{FxHashMap, FxHashSet};
use thiserror::Error;
use super::CompactStore;
use super::column::ColumnCodec;
use super::csr::CsrAdjacency;
use super::node_table::NodeTable;
use super::rel_table::RelTable;
use super::schema::{ColumnDef, ColumnType, EdgeSchema, TableSchema};
use super::zone_map::ZoneMap;
use crate::codec::{BitPackedInts, BitVector, DictionaryBuilder};
use crate::statistics::{EdgeTypeStatistics, LabelStatistics, Statistics};
#[derive(Debug, Clone, Error)]
#[non_exhaustive]
pub enum CompactStoreError {
#[error("node label not found: {0:?}")]
LabelNotFound(String),
#[error("column length mismatch: expected {expected} rows, got {got}")]
ColumnLengthMismatch {
expected: usize,
got: usize,
},
#[error("duplicate node label: {0:?}")]
DuplicateLabel(String),
#[error("duplicate edge type: {0:?}")]
DuplicateEdgeType(String),
#[error("inconsistent edge data: {0}")]
InconsistentEdgeData(String),
#[error("value overflow in column {column:?}: {value} exceeds i64::MAX ({max})")]
ValueOverflow {
column: String,
value: u64,
max: u64,
},
}
pub struct NodeTableBuilder {
label: ArcStr,
columns: Vec<(PropertyKey, ColumnCodec)>,
zone_maps: Vec<(PropertyKey, ZoneMap)>,
len: Option<usize>,
length_mismatch: Option<(usize, usize)>,
value_overflow: Option<(String, u64)>,
}
impl NodeTableBuilder {
fn new(label: impl Into<ArcStr>) -> Self {
Self {
label: label.into(),
columns: Vec::new(),
zone_maps: Vec::new(),
len: None,
length_mismatch: None,
value_overflow: None,
}
}
pub fn column_bitpacked(&mut self, name: &str, values: &[u64], bits: u8) -> &mut Self {
self.record_len(values.len());
if let Some(&bad) = values.iter().find(|&&v| v > i64::MAX as u64) {
self.value_overflow = Some((name.to_string(), bad));
}
let bp = BitPackedInts::pack_with_bits(values, bits);
let zone_map = compute_zone_map_u64(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::BitPacked(bp)));
self
}
pub fn column_dict(&mut self, name: &str, values: &[&str]) -> &mut Self {
self.record_len(values.len());
let mut builder = DictionaryBuilder::new();
for &v in values {
builder.add(v);
}
let dict = builder.build();
let zone_map = compute_zone_map_strings(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::Dict(dict)));
self
}
pub fn column_int8_vector(&mut self, name: &str, data: Vec<i8>, dimensions: u16) -> &mut Self {
let dims = dimensions as usize;
let row_count = if dims == 0 {
0
} else {
assert!(
data.len().is_multiple_of(dims),
"Int8Vector data length {} is not a multiple of dimensions {dimensions}",
data.len(),
);
data.len() / dims
};
self.record_len(row_count);
self.columns.push((
PropertyKey::new(name),
ColumnCodec::Int8Vector { data, dimensions },
));
self
}
pub fn column_bitmap(&mut self, name: &str, values: &[bool]) -> &mut Self {
self.record_len(values.len());
let bv = BitVector::from_bools(values);
let zone_map = compute_zone_map_bool(values);
self.zone_maps.push((PropertyKey::new(name), zone_map));
self.columns
.push((PropertyKey::new(name), ColumnCodec::Bitmap(bv)));
self
}
pub fn column(&mut self, name: &str, codec: ColumnCodec) -> &mut Self {
self.record_len(codec.len());
self.columns.push((PropertyKey::new(name), codec));
self
}
fn record_len(&mut self, col_len: usize) {
match self.len {
None => self.len = Some(col_len),
Some(expected) => {
if expected != col_len {
self.length_mismatch = Some((expected, col_len));
}
}
}
}
}
pub struct RelTableBuilder {
edge_type: ArcStr,
src_label: ArcStr,
dst_label: ArcStr,
edges: Vec<(u32, u32)>,
backward: bool,
properties: Vec<(PropertyKey, ColumnCodec)>,
}
impl RelTableBuilder {
fn new(
edge_type: impl Into<ArcStr>,
src_label: impl Into<ArcStr>,
dst_label: impl Into<ArcStr>,
) -> Self {
Self {
edge_type: edge_type.into(),
src_label: src_label.into(),
dst_label: dst_label.into(),
edges: Vec::new(),
backward: false,
properties: Vec::new(),
}
}
pub fn edges(&mut self, pairs: impl Into<Vec<(u32, u32)>>) -> &mut Self {
self.edges = pairs.into();
self
}
pub fn backward(&mut self, enabled: bool) -> &mut Self {
self.backward = enabled;
self
}
pub fn column_bitpacked(&mut self, name: &str, values: &[u64], bits: u8) -> &mut Self {
let bp = BitPackedInts::pack_with_bits(values, bits);
self.properties
.push((PropertyKey::new(name), ColumnCodec::BitPacked(bp)));
self
}
}
#[derive(Default)]
pub struct CompactStoreBuilder {
node_table_builders: Vec<NodeTableBuilder>,
rel_table_builders: Vec<RelTableBuilder>,
}
impl CompactStoreBuilder {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn node_table(
mut self,
label: &str,
f: impl FnOnce(&mut NodeTableBuilder) -> &mut NodeTableBuilder,
) -> Self {
let mut builder = NodeTableBuilder::new(label);
f(&mut builder);
self.node_table_builders.push(builder);
self
}
pub fn rel_table(
mut self,
edge_type: &str,
src_label: &str,
dst_label: &str,
f: impl FnOnce(&mut RelTableBuilder) -> &mut RelTableBuilder,
) -> Self {
let mut builder = RelTableBuilder::new(edge_type, src_label, dst_label);
f(&mut builder);
self.rel_table_builders.push(builder);
self
}
pub fn build(self) -> Result<CompactStore, CompactStoreError> {
for ntb in &self.node_table_builders {
if let Some((expected, got)) = ntb.length_mismatch {
return Err(CompactStoreError::ColumnLengthMismatch { expected, got });
}
if let Some((ref column, value)) = ntb.value_overflow {
return Err(CompactStoreError::ValueOverflow {
column: column.clone(),
max: i64::MAX as u64,
value,
});
}
}
{
let mut seen_labels = FxHashSet::default();
for ntb in &self.node_table_builders {
if !seen_labels.insert(&ntb.label) {
return Err(CompactStoreError::DuplicateLabel(ntb.label.to_string()));
}
}
}
{
let mut seen_triples = FxHashSet::default();
for rtb in &self.rel_table_builders {
if !seen_triples.insert((&rtb.edge_type, &rtb.src_label, &rtb.dst_label)) {
return Err(CompactStoreError::DuplicateEdgeType(format!(
"{} ({} -> {})",
rtb.edge_type, rtb.src_label, rtb.dst_label
)));
}
}
}
let mut label_to_table_id: FxHashMap<ArcStr, u16> = FxHashMap::default();
let mut table_id_to_label: Vec<ArcStr> = Vec::new();
for (idx, ntb) in self.node_table_builders.iter().enumerate() {
let table_id = idx as u16;
label_to_table_id.insert(ntb.label.clone(), table_id);
table_id_to_label.push(ntb.label.clone());
}
let mut node_tables_by_id: Vec<NodeTable> =
Vec::with_capacity(self.node_table_builders.len());
for (idx, ntb) in self.node_table_builders.into_iter().enumerate() {
let table_id = idx as u16;
let row_count = ntb.len.unwrap_or(0);
let col_defs: Vec<ColumnDef> = ntb
.columns
.iter()
.map(|(key, codec)| {
let col_type = infer_column_type(codec);
ColumnDef::new(key.as_str(), col_type)
})
.collect();
let schema = TableSchema::new(ntb.label.as_str(), table_id, col_defs);
let columns: FxHashMap<PropertyKey, ColumnCodec> = ntb.columns.into_iter().collect();
let zone_maps: FxHashMap<PropertyKey, ZoneMap> = ntb.zone_maps.into_iter().collect();
let table = NodeTable::from_columns(schema, columns, zone_maps, row_count);
node_tables_by_id.push(table);
}
let mut rel_tables_by_id: Vec<RelTable> = Vec::with_capacity(self.rel_table_builders.len());
let mut edge_type_to_rel_id: FxHashMap<ArcStr, Vec<u16>> = FxHashMap::default();
let mut rel_table_id_to_type: Vec<ArcStr> = Vec::new();
for (idx, rtb) in self.rel_table_builders.into_iter().enumerate() {
let rel_table_id = idx as u16;
rel_table_id_to_type.push(rtb.edge_type.clone());
let src_table_id = *label_to_table_id
.get(&rtb.src_label)
.ok_or_else(|| CompactStoreError::LabelNotFound(rtb.src_label.to_string()))?;
let dst_table_id = *label_to_table_id
.get(&rtb.dst_label)
.ok_or_else(|| CompactStoreError::LabelNotFound(rtb.dst_label.to_string()))?;
let src_node_count = node_tables_by_id
.get(src_table_id as usize)
.map_or(0, |t| t.len());
let dst_node_count = node_tables_by_id
.get(dst_table_id as usize)
.map_or(0, |t| t.len());
let mut fwd_edges = rtb.edges.clone();
fwd_edges.sort_by_key(|&(src, _dst)| src);
let fwd = CsrAdjacency::from_sorted_edges(src_node_count, &fwd_edges);
let bwd =
if rtb.backward {
let mut bwd_edges: Vec<(u32, u32)> =
rtb.edges.iter().map(|&(src, dst)| (dst, src)).collect();
bwd_edges.sort_by_key(|&(dst, _src)| dst);
let mut bwd_csr = CsrAdjacency::from_sorted_edges(dst_node_count, &bwd_edges);
let mut mapping = Vec::with_capacity(bwd_edges.len());
for &(dst, src) in &bwd_edges {
let fwd_neighbors = fwd.neighbors(src);
let fwd_start = fwd.offset_of(src);
let local_idx = fwd_neighbors.iter().position(|&t| t == dst).ok_or_else(
|| {
CompactStoreError::InconsistentEdgeData(format!(
"backward edge ({dst}->{src}) has no corresponding forward edge"
))
},
)?;
mapping.push(fwd_start + local_idx as u32);
}
bwd_csr.set_edge_data(mapping);
Some(bwd_csr)
} else {
None
};
let property_col_defs: Vec<ColumnDef> = rtb
.properties
.iter()
.map(|(key, codec)| {
let col_type = infer_column_type(codec);
ColumnDef::new(key.as_str(), col_type)
})
.collect();
let schema = EdgeSchema::new(
rtb.edge_type.as_str(),
rel_table_id,
rtb.src_label.as_str(),
rtb.dst_label.as_str(),
property_col_defs,
);
let properties: FxHashMap<PropertyKey, ColumnCodec> =
rtb.properties.into_iter().collect();
let table = RelTable::new(schema, fwd, bwd, properties, src_table_id, dst_table_id);
edge_type_to_rel_id
.entry(rtb.edge_type.clone())
.or_default()
.push(rel_table_id);
rel_tables_by_id.push(table);
}
let mut stats = Statistics::new();
let mut total_nodes: u64 = 0;
let mut total_edges: u64 = 0;
for (idx, nt) in node_tables_by_id.iter().enumerate() {
let count = nt.len() as u64;
total_nodes += count;
let label = &table_id_to_label[idx];
stats.update_label(label.as_str(), LabelStatistics::new(count));
}
let mut edge_type_counts: FxHashMap<&str, u64> = FxHashMap::default();
for (idx, rt) in rel_tables_by_id.iter().enumerate() {
let count = rt.num_edges() as u64;
total_edges += count;
let edge_type = &rel_table_id_to_type[idx];
*edge_type_counts.entry(edge_type.as_str()).or_default() += count;
}
for (edge_type, count) in edge_type_counts {
stats.update_edge_type(edge_type, EdgeTypeStatistics::new(count, 0.0, 0.0));
}
stats.total_nodes = total_nodes;
stats.total_edges = total_edges;
Ok(CompactStore::new(
node_tables_by_id,
label_to_table_id,
rel_tables_by_id,
edge_type_to_rel_id,
table_id_to_label,
rel_table_id_to_type,
stats,
))
}
}
fn infer_column_type(codec: &ColumnCodec) -> ColumnType {
match codec {
ColumnCodec::BitPacked(bp) => ColumnType::UInt {
bits: bp.bits_per_value(),
},
ColumnCodec::Dict(_) => ColumnType::DictString,
ColumnCodec::Bitmap(_) => ColumnType::Bool,
ColumnCodec::Int8Vector { dimensions, .. } => ColumnType::Int8Vector {
dimensions: *dimensions,
},
}
}
fn compute_zone_map_u64(values: &[u64]) -> ZoneMap {
let Some(&min) = values.iter().min() else {
return ZoneMap::new();
};
let max = *values.iter().max().expect("non-empty after min check");
if max > i64::MAX as u64 {
return ZoneMap {
row_count: values.len(),
..ZoneMap::default()
};
}
ZoneMap {
min: Some(Value::Int64(min as i64)),
max: Some(Value::Int64(max as i64)),
null_count: 0,
row_count: values.len(),
}
}
fn compute_zone_map_strings(values: &[&str]) -> ZoneMap {
let Some(&min) = values.iter().min() else {
return ZoneMap::new();
};
let max = *values.iter().max().expect("non-empty after min check");
ZoneMap {
min: Some(Value::from(min)),
max: Some(Value::from(max)),
null_count: 0,
row_count: values.len(),
}
}
fn compute_zone_map_bool(values: &[bool]) -> ZoneMap {
if values.is_empty() {
return ZoneMap::new();
}
let has_false = values.iter().any(|&v| !v);
let has_true = values.iter().any(|&v| v);
let min = !has_false; let max = has_true; ZoneMap {
min: Some(Value::Bool(min)),
max: Some(Value::Bool(max)),
null_count: 0,
row_count: values.len(),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum InferredType {
BitPacked,
Bitmap,
Dict,
}
pub fn from_graph_store(
store: &dyn crate::graph::traits::GraphStore,
) -> Result<CompactStore, CompactStoreError> {
let labels = store.all_labels();
if labels.is_empty() {
return CompactStoreBuilder::new().build();
}
let mut id_map: FxHashMap<grafeo_common::types::NodeId, (ArcStr, u32)> = FxHashMap::default();
let mut label_data: Vec<(
ArcStr,
Vec<grafeo_common::types::NodeId>,
FxHashMap<PropertyKey, Vec<Value>>,
)> = Vec::new();
let mut seen_node_ids: FxHashSet<grafeo_common::types::NodeId> = FxHashSet::default();
let mut label_key_index: FxHashMap<ArcStr, usize> = FxHashMap::default();
for label in &labels {
let node_ids = store.nodes_by_label(label);
for &nid in &node_ids {
if !seen_node_ids.insert(nid) {
continue; }
let Some(node) = store.get_node(nid) else {
continue;
};
let label_key: ArcStr = if node.labels.len() <= 1 {
ArcStr::from(label.as_str())
} else {
let mut sorted: Vec<&str> = node.labels.iter().map(|l| l.as_str()).collect();
sorted.sort_unstable();
ArcStr::from(sorted.join("|"))
};
let entry_idx = if let Some(&idx) = label_key_index.get(&label_key) {
idx
} else {
let idx = label_data.len();
label_key_index.insert(label_key.clone(), idx);
label_data.push((label_key.clone(), Vec::new(), FxHashMap::default()));
idx
};
let (_, ref mut node_ids_vec, ref mut props_map) = label_data[entry_idx];
let offset = node_ids_vec.len() as u32;
node_ids_vec.push(nid);
id_map.insert(nid, (label_key, offset));
for (key, value) in node.properties.iter() {
let col = props_map
.entry(key.clone())
.or_insert_with(|| vec![Value::Null; offset as usize]);
while col.len() < offset as usize {
col.push(Value::Null);
}
col.push(value.clone());
}
let expected_len = offset as usize + 1;
for col in props_map.values_mut() {
while col.len() < expected_len {
col.push(Value::Null);
}
}
}
}
let mut builder = CompactStoreBuilder::new();
for (label_key, node_ids_for_label, props_map) in &label_data {
let node_count = node_ids_for_label.len();
builder = builder.node_table(label_key.as_str(), |t| {
t.record_len(node_count);
for (key, values) in props_map {
let inferred = infer_type_from_values(values);
match inferred {
InferredType::BitPacked => {
let u64_values: Vec<u64> = values
.iter()
.map(|v| match v {
Value::Int64(n) => *n as u64,
_ => 0,
})
.collect();
let bp = BitPackedInts::pack(&u64_values);
let zone_map = compute_zone_map_u64(&u64_values);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::BitPacked(bp)));
t.record_len(u64_values.len());
}
InferredType::Bitmap => {
let bool_values: Vec<bool> = values
.iter()
.map(|v| matches!(v, Value::Bool(true)))
.collect();
let bv = BitVector::from_bools(&bool_values);
let zone_map = compute_zone_map_bool(&bool_values);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::Bitmap(bv)));
t.record_len(bool_values.len());
}
InferredType::Dict => {
let str_values: Vec<String> = values
.iter()
.map(|v| match v {
Value::Null => String::new(),
Value::String(s) => s.to_string(),
other => format!("{other}"),
})
.collect();
let str_refs: Vec<&str> = str_values.iter().map(String::as_str).collect();
let mut dict_builder = DictionaryBuilder::new();
for s in &str_refs {
dict_builder.add(s);
}
let dict = dict_builder.build();
let zone_map = compute_zone_map_strings(&str_refs);
t.zone_maps.push((key.clone(), zone_map));
t.columns.push((key.clone(), ColumnCodec::Dict(dict)));
t.record_len(str_values.len());
}
}
}
t
});
}
type EdgeGroupKey = (ArcStr, ArcStr, ArcStr);
let mut edge_groups: FxHashMap<EdgeGroupKey, Vec<(u32, u32)>> = FxHashMap::default();
let mut edge_props_groups: FxHashMap<EdgeGroupKey, FxHashMap<PropertyKey, Vec<Value>>> =
FxHashMap::default();
for (_label_key, node_ids, _) in &label_data {
for &nid in node_ids {
let outgoing = store.edges_from(nid, crate::graph::Direction::Outgoing);
for (_target_nid, edge_id) in outgoing {
let Some(edge) = store.get_edge(edge_id) else {
continue;
};
let Some((src_label, src_offset)) = id_map.get(&edge.src) else {
continue;
};
let Some((dst_label, dst_offset)) = id_map.get(&edge.dst) else {
continue;
};
let group_key: EdgeGroupKey =
(edge.edge_type.clone(), src_label.clone(), dst_label.clone());
let edges_vec = edge_groups.entry(group_key.clone()).or_default();
let edge_idx = edges_vec.len();
edges_vec.push((*src_offset, *dst_offset));
if !edge.properties.is_empty() {
let props = edge_props_groups.entry(group_key).or_default();
for (key, value) in edge.properties.iter() {
let col = props
.entry(key.clone())
.or_insert_with(|| vec![Value::Null; edge_idx]);
while col.len() < edge_idx {
col.push(Value::Null);
}
col.push(value.clone());
}
let expected_len = edge_idx + 1;
for col in props.values_mut() {
while col.len() < expected_len {
col.push(Value::Null);
}
}
}
}
}
}
for ((edge_type, src_label, dst_label), edges) in &edge_groups {
let edge_props =
edge_props_groups.get(&(edge_type.clone(), src_label.clone(), dst_label.clone()));
builder = builder.rel_table(
edge_type.as_str(),
src_label.as_str(),
dst_label.as_str(),
|r| {
r.edges(edges.clone()).backward(true);
if let Some(props) = edge_props {
for (key, values) in props {
let inferred = infer_type_from_values(values);
match inferred {
InferredType::BitPacked => {
let u64_values: Vec<u64> = values
.iter()
.map(|v| match v {
Value::Int64(n) => *n as u64,
_ => 0,
})
.collect();
let bp = BitPackedInts::pack(&u64_values);
r.properties.push((key.clone(), ColumnCodec::BitPacked(bp)));
}
InferredType::Bitmap => {
let bool_values: Vec<bool> = values
.iter()
.map(|v| matches!(v, Value::Bool(true)))
.collect();
let bv = BitVector::from_bools(&bool_values);
r.properties.push((key.clone(), ColumnCodec::Bitmap(bv)));
}
InferredType::Dict => {
let str_values: Vec<String> = values
.iter()
.map(|v| match v {
Value::Null => String::new(),
Value::String(s) => s.to_string(),
other => format!("{other}"),
})
.collect();
let mut dict_builder = DictionaryBuilder::new();
for s in &str_values {
dict_builder.add(s);
}
let dict = dict_builder.build();
r.properties.push((key.clone(), ColumnCodec::Dict(dict)));
}
}
}
}
r
},
);
}
builder.build()
}
fn infer_type_from_values(values: &[Value]) -> InferredType {
let mut saw_int = false;
let mut saw_bool = false;
let mut saw_other = false;
for v in values {
match v {
Value::Null => {} Value::Int64(n) if *n >= 0 => saw_int = true,
Value::Bool(_) => saw_bool = true,
_ => saw_other = true,
}
}
if saw_other || (saw_int && saw_bool) {
InferredType::Dict
} else if saw_int {
InferredType::BitPacked
} else if saw_bool {
InferredType::Bitmap
} else {
InferredType::Dict
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::traits::GraphStore;
#[test]
fn test_builder_basic() {
let store = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("age", &[25, 30, 35, 40, 45], 6)
.column_dict("name", &["Alix", "Gus", "Vincent", "Jules", "Mia"])
})
.build()
.unwrap();
let ids = store.nodes_by_label("Person");
assert_eq!(ids.len(), 5);
}
#[test]
fn test_builder_with_edges() {
let store = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("val", &[1, 2, 3], 4))
.node_table("B", |t| t.column_bitpacked("val", &[10, 20], 8))
.rel_table("LINKS", "A", "B", |r| {
r.edges([(0, 0), (0, 1), (1, 0), (2, 1)]).backward(true)
})
.build()
.unwrap();
let a_ids = store.nodes_by_label("A");
assert_eq!(a_ids.len(), 3);
let b_ids = store.nodes_by_label("B");
assert_eq!(b_ids.len(), 2);
}
#[test]
fn test_builder_label_not_found() {
let result = CompactStoreBuilder::new()
.node_table("A", |t| t.column_bitpacked("val", &[1], 4))
.rel_table("LINKS", "A", "B", |r| {
r.edges([(0, 0)])
})
.build();
assert!(result.is_err());
}
#[test]
fn test_from_graph_store_round_trip() {
let original = CompactStoreBuilder::new()
.node_table("Person", |t| {
t.column_bitpacked("age", &[25, 30, 35], 6)
.column_dict("name", &["Alix", "Gus", "Vincent"])
.column_bitmap("active", &[true, false, true])
})
.node_table("City", |t| t.column_dict("name", &["Amsterdam", "Berlin"]))
.rel_table("LIVES_IN", "Person", "City", |r| {
r.edges([(0, 0), (1, 1), (2, 0)]).backward(true)
})
.build()
.unwrap();
let converted = from_graph_store(&original).unwrap();
assert_eq!(converted.nodes_by_label("Person").len(), 3);
assert_eq!(converted.nodes_by_label("City").len(), 2);
let person_ids = converted.nodes_by_label("Person");
let mut ages: Vec<i64> = person_ids
.iter()
.filter_map(|&id| {
converted
.get_node_property(id, &PropertyKey::new("age"))
.and_then(|v| v.as_int64())
})
.collect();
ages.sort_unstable();
assert_eq!(ages, vec![25, 30, 35]);
let city_ids = converted.nodes_by_label("City");
let mut total_edges = 0;
for &pid in &person_ids {
let edges = converted.edges_from(pid, crate::graph::Direction::Outgoing);
total_edges += edges.len();
}
assert_eq!(total_edges, 3);
for &cid in &city_ids {
let incoming = converted.edges_from(cid, crate::graph::Direction::Incoming);
assert!(!incoming.is_empty());
}
}
#[test]
fn test_from_graph_store_empty() {
let empty = CompactStoreBuilder::new().build().unwrap();
let converted = from_graph_store(&empty).unwrap();
assert_eq!(converted.nodes_by_label("Anything").len(), 0);
}
#[test]
fn test_from_graph_store_with_lpg_store() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let alix_id = store.create_node(&["Person"]);
store.set_node_property(alix_id, "name", Value::from("Alix"));
store.set_node_property(alix_id, "age", Value::Int64(30));
let gus_id = store.create_node(&["Person"]);
store.set_node_property(gus_id, "name", Value::from("Gus"));
store.set_node_property(gus_id, "age", Value::Int64(25));
let amsterdam_id = store.create_node(&["City"]);
store.set_node_property(amsterdam_id, "name", Value::from("Amsterdam"));
store.create_edge(alix_id, amsterdam_id, "LIVES_IN");
store.create_edge(gus_id, amsterdam_id, "LIVES_IN");
let compact = from_graph_store(&store).unwrap();
assert_eq!(compact.nodes_by_label("Person").len(), 2);
assert_eq!(compact.nodes_by_label("City").len(), 1);
let person_ids = compact.nodes_by_label("Person");
let mut names: Vec<String> = person_ids
.iter()
.filter_map(|&id| {
compact
.get_node_property(id, &PropertyKey::new("name"))
.and_then(|v| v.as_str().map(|s| s.to_string()))
})
.collect();
names.sort();
assert_eq!(names, vec!["Alix", "Gus"]);
let mut total_outgoing = 0;
for &pid in &person_ids {
let edges = compact.edges_from(pid, crate::graph::Direction::Outgoing);
total_outgoing += edges.len();
}
assert_eq!(total_outgoing, 2);
let city_ids = compact.nodes_by_label("City");
assert_eq!(city_ids.len(), 1);
let incoming = compact.edges_from(city_ids[0], crate::graph::Direction::Incoming);
assert_eq!(incoming.len(), 2);
}
#[test]
fn test_from_graph_store_edge_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let alix = store.create_node(&["Person"]);
store.set_node_property(alix, "name", Value::from("Alix"));
let gus = store.create_node(&["Person"]);
store.set_node_property(gus, "name", Value::from("Gus"));
let e1 = store.create_edge(alix, gus, "KNOWS");
store.set_edge_property(e1, "since", Value::Int64(2020));
let e2 = store.create_edge(gus, alix, "KNOWS");
store.set_edge_property(e2, "since", Value::Int64(2021));
let compact = from_graph_store(&store).unwrap();
let person_ids = compact.nodes_by_label("Person");
let mut total_edges = 0;
for &pid in &person_ids {
total_edges += compact
.edges_from(pid, crate::graph::Direction::Outgoing)
.len();
}
assert_eq!(total_edges, 2);
for &pid in &person_ids {
let edges = compact.edges_from(pid, crate::graph::Direction::Outgoing);
for (_target, eid) in &edges {
let edge = compact.get_edge(*eid).unwrap();
let since = edge.properties.get(&PropertyKey::new("since")).unwrap();
match since {
Value::Int64(v) => assert!(*v == 2020 || *v == 2021),
_ => panic!("expected Int64 for 'since', got {since:?}"),
}
}
}
}
#[test]
fn test_from_graph_store_edge_bool_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Node"]);
let b = store.create_node(&["Node"]);
let e = store.create_edge(a, b, "LINK");
store.set_edge_property(e, "active", Value::Bool(true));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Node");
let edges = compact.edges_from(ids[0], crate::graph::Direction::Outgoing);
assert_eq!(edges.len(), 1);
let edge = compact.get_edge(edges[0].1).unwrap();
assert_eq!(
edge.properties.get(&PropertyKey::new("active")),
Some(&Value::Bool(true))
);
}
#[test]
fn test_from_graph_store_edge_string_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Node"]);
let b = store.create_node(&["Node"]);
let e = store.create_edge(a, b, "LINK");
store.set_edge_property(e, "label", Value::from("primary"));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Node");
let edges = compact.edges_from(ids[0], crate::graph::Direction::Outgoing);
let edge = compact.get_edge(edges[0].1).unwrap();
assert_eq!(
edge.properties.get(&PropertyKey::new("label")),
Some(&Value::String(ArcStr::from("primary")))
);
}
#[test]
fn test_from_graph_store_negative_int_falls_back_to_dict() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "temp", Value::Int64(-10));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "temp", Value::Int64(5));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 2);
let mut temps: Vec<String> = ids
.iter()
.filter_map(|&id| {
compact
.get_node_property(id, &PropertyKey::new("temp"))
.and_then(|v| v.as_str().map(|s| s.to_string()))
})
.collect();
temps.sort();
assert_eq!(temps, vec!["-10", "5"]);
}
#[test]
fn test_from_graph_store_float_falls_back_to_dict() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Sensor"]);
store.set_node_property(a, "reading", Value::Float64(98.6));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Sensor");
assert_eq!(ids.len(), 1);
let val = compact
.get_node_property(ids[0], &PropertyKey::new("reading"))
.unwrap();
match val {
Value::String(s) => assert!(s.contains("98.6"), "expected '98.6' in '{s}'"),
other => panic!("expected String (Dict fallback), got {other:?}"),
}
}
#[test]
fn test_from_graph_store_mixed_types_fall_back_to_dict() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Thing"]);
store.set_node_property(a, "value", Value::Int64(42));
let b = store.create_node(&["Thing"]);
store.set_node_property(b, "value", Value::Bool(true));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Thing");
assert_eq!(ids.len(), 2);
for &id in &ids {
let val = compact
.get_node_property(id, &PropertyKey::new("value"))
.unwrap();
assert!(
matches!(val, Value::String(_)),
"expected String (Dict fallback), got {val:?}"
);
}
}
#[test]
fn test_from_graph_store_sparse_properties() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "name", Value::from("alpha"));
store.set_node_property(a, "score", Value::Int64(10));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "name", Value::from("beta"));
let c = store.create_node(&["Item"]);
store.set_node_property(c, "score", Value::Int64(20));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 3);
let mut name_count = 0;
let mut score_count = 0;
for &id in &ids {
if let Some(Value::String(s)) = compact.get_node_property(id, &PropertyKey::new("name"))
&& !s.is_empty()
{
name_count += 1;
}
if let Some(Value::Int64(n)) = compact.get_node_property(id, &PropertyKey::new("score"))
&& n > 0
{
score_count += 1;
}
}
assert_eq!(name_count, 2);
assert_eq!(score_count, 2);
}
#[test]
fn test_from_graph_store_multi_label_nodes() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Person", "Actor"]);
store.set_node_property(a, "name", Value::from("Vincent"));
let b = store.create_node(&["Person"]);
store.set_node_property(b, "name", Value::from("Jules"));
let compact = from_graph_store(&store).unwrap();
let person_ids = compact.nodes_by_label("Person");
assert_eq!(person_ids.len(), 1);
let compound_ids = compact.nodes_by_label("Actor|Person");
assert_eq!(compound_ids.len(), 1);
let val = compact
.get_node_property(compound_ids[0], &PropertyKey::new("name"))
.unwrap();
assert_eq!(val, Value::String(ArcStr::from("Vincent")));
}
#[test]
fn test_from_graph_store_all_null_column() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let a = store.create_node(&["Item"]);
store.set_node_property(a, "x", Value::Int64(1));
let b = store.create_node(&["Item"]);
store.set_node_property(b, "y", Value::Int64(2));
let compact = from_graph_store(&store).unwrap();
let ids = compact.nodes_by_label("Item");
assert_eq!(ids.len(), 2);
}
#[test]
fn test_infer_type_all_nulls() {
assert_eq!(
infer_type_from_values(&[Value::Null, Value::Null]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_int_only() {
assert_eq!(
infer_type_from_values(&[Value::Int64(5), Value::Int64(10)]),
InferredType::BitPacked
);
}
#[test]
fn test_infer_type_bool_only() {
assert_eq!(
infer_type_from_values(&[Value::Bool(true), Value::Bool(false)]),
InferredType::Bitmap
);
}
#[test]
fn test_infer_type_mixed_int_bool() {
assert_eq!(
infer_type_from_values(&[Value::Int64(1), Value::Bool(true)]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_negative_int() {
assert_eq!(
infer_type_from_values(&[Value::Int64(-5), Value::Int64(10)]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_float() {
assert_eq!(
infer_type_from_values(&[Value::Float64(1.5)]),
InferredType::Dict
);
}
#[test]
fn test_infer_type_int_with_nulls() {
assert_eq!(
infer_type_from_values(&[Value::Int64(5), Value::Null, Value::Int64(10)]),
InferredType::BitPacked
);
}
#[test]
fn test_from_graph_store_multi_label_edge_type() {
use crate::graph::lpg::LpgStore;
let store = LpgStore::new().unwrap();
let m1 = store.create_node(&["Method"]);
store.set_node_property(m1, "name", Value::from("foo"));
let m2 = store.create_node(&["Method"]);
store.set_node_property(m2, "name", Value::from("bar"));
let c1 = store.create_node(&["Class"]);
store.set_node_property(c1, "name", Value::from("MyClass"));
let i1 = store.create_node(&["Interface"]);
store.set_node_property(i1, "name", Value::from("MyInterface"));
store.create_edge(m1, m2, "CALLS"); store.create_edge(c1, m1, "CALLS"); store.create_edge(m1, c1, "USES_TYPE"); store.create_edge(m1, i1, "USES_TYPE");
let compact = from_graph_store(&store).unwrap();
assert_eq!(compact.nodes_by_label("Method").len(), 2);
assert_eq!(compact.nodes_by_label("Class").len(), 1);
assert_eq!(compact.nodes_by_label("Interface").len(), 1);
let calls_tables = compact.rel_tables_for_type("CALLS");
let uses_tables = compact.rel_tables_for_type("USES_TYPE");
assert_eq!(
calls_tables.len(),
2,
"CALLS should have 2 rel tables (different label pairs)"
);
assert_eq!(
uses_tables.len(),
2,
"USES_TYPE should have 2 rel tables (different label pairs)"
);
let total_calls: usize = calls_tables.iter().map(|rt| rt.num_edges()).sum();
assert_eq!(total_calls, 2, "Should have 2 CALLS edges total");
let total_uses: usize = uses_tables.iter().map(|rt| rt.num_edges()).sum();
assert_eq!(total_uses, 2, "Should have 2 USES_TYPE edges total");
use crate::graph::traits::GraphStore;
let mut edge_types = compact.all_edge_types();
edge_types.sort();
assert_eq!(
edge_types,
vec!["CALLS", "USES_TYPE"],
"all_edge_types should return each type once, not per rel table"
);
let avg_out = compact.estimate_avg_degree("USES_TYPE", true);
assert!(avg_out > 0.0, "USES_TYPE outgoing degree should be > 0");
assert!(
(avg_out - 1.0).abs() < f64::EPSILON,
"USES_TYPE avg outgoing degree should be 1.0 (2 edges / 2 Method nodes), got {avg_out}"
);
let unknown = compact.estimate_avg_degree("NONEXISTENT", true);
assert!(
(unknown - 0.0).abs() < f64::EPSILON,
"Unknown edge type should return 0.0 avg degree"
);
}
}