use crate::datatypes::values::{FilterCondition, Value};
use crate::graph::storage::interner::STRIP_PROPERTIES;
pub use crate::graph::storage::interner::{InternedKey, StringInterner};
pub(crate) use crate::graph::storage::interner::{
SerdeDeserializeGuard, SerdeSerializeGuard, StripPropertiesGuard,
};
use crate::graph::storage::GraphRead;
pub use crate::graph::dir_graph::DirGraph;
pub use crate::graph::storage::backend::GraphBackend;
#[allow(unused_imports)]
pub use crate::graph::storage::{MappedGraph, MemoryGraph};
use petgraph::graph::NodeIndex;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
pub const PROVISIONAL_KEY: &str = "_provisional";
#[derive(Debug, Clone)]
pub struct TypeSchema {
pub(crate) slots: Vec<InternedKey>,
key_to_slot: FxHashMap<InternedKey, u16>,
}
impl TypeSchema {
pub fn new() -> Self {
TypeSchema {
slots: Vec::new(),
key_to_slot: FxHashMap::default(),
}
}
pub fn from_keys(keys: impl IntoIterator<Item = InternedKey>) -> Self {
let mut schema = TypeSchema::new();
for key in keys {
if !schema.key_to_slot.contains_key(&key) {
let slot = schema.slots.len() as u16;
schema.slots.push(key);
schema.key_to_slot.insert(key, slot);
}
}
schema
}
#[inline]
pub fn slot(&self, key: InternedKey) -> Option<u16> {
self.key_to_slot.get(&key).copied()
}
#[inline]
pub fn len(&self) -> usize {
self.slots.len()
}
pub fn merge(&self, other: &TypeSchema) -> TypeSchema {
let mut merged = self.clone();
for &key in &other.slots {
merged.add_key(key);
}
merged
}
pub fn add_key(&mut self, key: InternedKey) -> u16 {
if let Some(&slot) = self.key_to_slot.get(&key) {
slot
} else {
let slot = self.slots.len() as u16;
self.slots.push(key);
self.key_to_slot.insert(key, slot);
slot
}
}
pub fn iter(&self) -> impl Iterator<Item = (u16, InternedKey)> + '_ {
self.slots.iter().enumerate().map(|(i, &k)| (i as u16, k))
}
}
pub(crate) enum PropertyStorage {
Map(HashMap<InternedKey, Value>),
Compact {
schema: Arc<TypeSchema>,
values: Vec<Value>,
},
Columnar {
store: Arc<crate::graph::storage::column_store::ColumnStore>,
row_id: u32,
},
}
pub(crate) enum PropertyKeyIter<'a> {
Map {
inner: std::collections::hash_map::Keys<'a, InternedKey, Value>,
interner: &'a StringInterner,
},
Compact {
slots: &'a [InternedKey],
values: &'a [Value],
slot_idx: usize,
interner: &'a StringInterner,
},
Columnar(std::vec::IntoIter<&'a str>),
}
impl<'a> Iterator for PropertyKeyIter<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
match self {
PropertyKeyIter::Map { inner, interner } => inner.next().map(|k| interner.resolve(*k)),
PropertyKeyIter::Compact {
slots,
values,
slot_idx,
interner,
} => loop {
let i = *slot_idx;
if i >= slots.len() {
return None;
}
*slot_idx = i + 1;
if values.get(i).is_some_and(|v| !matches!(v, Value::Null)) {
return Some(interner.resolve(slots[i]));
}
},
PropertyKeyIter::Columnar(iter) => iter.next(),
}
}
}
pub(crate) enum PropertyIter<'a> {
Map {
inner: std::collections::hash_map::Iter<'a, InternedKey, Value>,
interner: &'a StringInterner,
},
Compact {
slots: &'a [InternedKey],
values: &'a [Value],
slot_idx: usize,
interner: &'a StringInterner,
},
Columnar,
}
impl<'a> Iterator for PropertyIter<'a> {
type Item = (&'a str, &'a Value);
#[inline]
fn next(&mut self) -> Option<(&'a str, &'a Value)> {
match self {
PropertyIter::Map { inner, interner } => {
inner.next().map(|(k, v)| (interner.resolve(*k), v))
}
PropertyIter::Compact {
slots,
values,
slot_idx,
interner,
} => loop {
let i = *slot_idx;
if i >= slots.len() {
return None;
}
*slot_idx = i + 1;
if let Some(v) = values.get(i) {
if !matches!(v, Value::Null) {
return Some((interner.resolve(slots[i]), v));
}
}
},
PropertyIter::Columnar => None,
}
}
}
impl PropertyStorage {
#[inline]
pub fn get(&self, key: InternedKey) -> Option<Cow<'_, Value>> {
match self {
PropertyStorage::Map(map) => map.get(&key).map(Cow::Borrowed),
PropertyStorage::Compact { schema, values } => schema
.slot(key)
.and_then(|slot| values.get(slot as usize))
.filter(|v| !matches!(v, Value::Null))
.map(Cow::Borrowed),
PropertyStorage::Columnar { store, row_id } => store.get(*row_id, key).map(Cow::Owned),
}
}
#[inline]
pub fn get_value(&self, key: InternedKey) -> Option<Value> {
match self {
PropertyStorage::Map(map) => map.get(&key).cloned(),
PropertyStorage::Compact { schema, values } => schema
.slot(key)
.and_then(|slot| values.get(slot as usize))
.filter(|v| !matches!(v, Value::Null))
.cloned(),
PropertyStorage::Columnar { store, row_id } => store.get(*row_id, key),
}
}
#[inline]
pub fn contains(&self, key: InternedKey) -> bool {
self.get(key).is_some()
}
#[inline]
pub fn str_prop_eq(&self, key: InternedKey, target: &str) -> Option<bool> {
match self {
PropertyStorage::Map(map) => map
.get(&key)
.map(|v| matches!(v, Value::String(s) if s == target)),
PropertyStorage::Compact { schema, values } => schema
.slot(key)
.and_then(|slot| values.get(slot as usize))
.filter(|v| !matches!(v, Value::Null))
.map(|v| matches!(v, Value::String(s) if s == target)),
PropertyStorage::Columnar { store, row_id } => store.str_prop_eq(*row_id, key, target),
}
}
pub fn insert(&mut self, key: InternedKey, value: Value) {
match self {
PropertyStorage::Map(map) => {
map.insert(key, value);
}
PropertyStorage::Compact { schema, values } => {
let slot = if let Some(s) = schema.slot(key) {
s as usize
} else {
let s = Arc::make_mut(schema).add_key(key) as usize;
s
};
if slot >= values.len() {
values.resize(slot + 1, Value::Null);
}
values[slot] = value;
}
PropertyStorage::Columnar { store, row_id } => {
Arc::make_mut(store).set(*row_id, key, &value, None);
}
}
}
pub fn insert_if_absent(&mut self, key: InternedKey, value: Value) {
match self {
PropertyStorage::Map(map) => {
map.entry(key).or_insert(value);
}
PropertyStorage::Compact { schema, values } => {
if let Some(slot) = schema.slot(key) {
let slot = slot as usize;
if slot < values.len() {
if matches!(values[slot], Value::Null) {
values[slot] = value;
}
} else {
values.resize(slot + 1, Value::Null);
values[slot] = value;
}
} else {
let slot = Arc::make_mut(schema).add_key(key) as usize;
if slot >= values.len() {
values.resize(slot + 1, Value::Null);
}
values[slot] = value;
}
}
PropertyStorage::Columnar { store, row_id } => {
if store.get(*row_id, key).is_none() {
Arc::make_mut(store).set(*row_id, key, &value, None);
}
}
}
}
pub fn remove(&mut self, key: InternedKey) -> Option<Value> {
match self {
PropertyStorage::Map(map) => map.remove(&key),
PropertyStorage::Compact { schema, values } => schema.slot(key).and_then(|slot| {
let slot = slot as usize;
if slot < values.len() {
let old = std::mem::replace(&mut values[slot], Value::Null);
if matches!(old, Value::Null) {
None
} else {
Some(old)
}
} else {
None
}
}),
PropertyStorage::Columnar { store, row_id } => {
let old = store.get(*row_id, key);
if old.is_some() {
Arc::make_mut(store).set(*row_id, key, &Value::Null, None);
}
old
}
}
}
pub fn replace_all(&mut self, pairs: impl IntoIterator<Item = (InternedKey, Value)>) {
match self {
PropertyStorage::Map(map) => {
map.clear();
map.extend(pairs);
}
PropertyStorage::Compact { schema, values } => {
for v in values.iter_mut() {
*v = Value::Null;
}
for (key, value) in pairs {
let slot = if let Some(s) = schema.slot(key) {
s as usize
} else {
Arc::make_mut(schema).add_key(key) as usize
};
if slot >= values.len() {
values.resize(slot + 1, Value::Null);
}
values[slot] = value;
}
}
PropertyStorage::Columnar { store, row_id } => {
let st = Arc::make_mut(store);
let props: Vec<_> = st
.row_properties(*row_id)
.into_iter()
.map(|(k, _)| k)
.collect();
for k in props {
st.set(*row_id, k, &Value::Null, None);
}
for (key, value) in pairs {
st.set(*row_id, key, &value, None);
}
}
}
}
pub fn len(&self) -> usize {
match self {
PropertyStorage::Map(map) => map.len(),
PropertyStorage::Compact { values, .. } => {
values.iter().filter(|v| !matches!(v, Value::Null)).count()
}
PropertyStorage::Columnar { store, row_id } => store.row_properties(*row_id).len(),
}
}
pub fn drain_to_interned_pairs(
&mut self,
_interner: &StringInterner,
) -> Vec<(InternedKey, Value)> {
match std::mem::replace(self, PropertyStorage::Map(HashMap::new())) {
PropertyStorage::Map(map) => map.into_iter().collect(),
PropertyStorage::Compact { schema, values } => schema
.slots
.iter()
.zip(values)
.filter(|(_, v)| !matches!(v, Value::Null))
.map(|(ik, v)| (*ik, v))
.collect(),
PropertyStorage::Columnar { .. } => {
Vec::new()
}
}
}
pub fn keys<'a>(&'a self, interner: &'a StringInterner) -> PropertyKeyIter<'a> {
match self {
PropertyStorage::Map(map) => PropertyKeyIter::Map {
inner: map.keys(),
interner,
},
PropertyStorage::Compact { schema, values } => PropertyKeyIter::Compact {
slots: &schema.slots,
values,
slot_idx: 0,
interner,
},
PropertyStorage::Columnar { store, row_id } => {
let props = store.row_properties(*row_id);
let keys: Vec<&'a str> = props
.iter()
.filter_map(|(ik, _)| interner.try_resolve(*ik))
.collect();
PropertyKeyIter::Columnar(keys.into_iter())
}
}
}
pub fn iter<'a>(&'a self, interner: &'a StringInterner) -> PropertyIter<'a> {
match self {
PropertyStorage::Map(map) => PropertyIter::Map {
inner: map.iter(),
interner,
},
PropertyStorage::Compact { schema, values } => PropertyIter::Compact {
slots: &schema.slots,
values,
slot_idx: 0,
interner,
},
PropertyStorage::Columnar { .. } => PropertyIter::Columnar,
}
}
pub fn iter_owned<'a>(&'a self, interner: &'a StringInterner) -> Vec<(String, Value)> {
match self {
PropertyStorage::Map(map) => map
.iter()
.map(|(k, v)| (interner.resolve(*k).to_string(), v.clone()))
.collect(),
PropertyStorage::Compact { schema, values } => schema
.slots
.iter()
.enumerate()
.filter_map(|(i, ik)| {
values.get(i).and_then(|v| {
if matches!(v, Value::Null) {
None
} else {
Some((interner.resolve(*ik).to_string(), v.clone()))
}
})
})
.collect(),
PropertyStorage::Columnar { store, row_id } => store
.row_properties(*row_id)
.into_iter()
.filter_map(|(ik, v)| interner.try_resolve(ik).map(|s| (s.to_string(), v)))
.collect(),
}
}
pub fn from_compact(
pairs: impl IntoIterator<Item = (InternedKey, Value)>,
schema: &Arc<TypeSchema>,
) -> Self {
let mut values = vec![Value::Null; schema.len()];
for (key, value) in pairs {
if let Some(slot) = schema.slot(key) {
values[slot as usize] = value;
}
}
PropertyStorage::Compact {
schema: Arc::clone(schema),
values,
}
}
}
impl Clone for PropertyStorage {
fn clone(&self) -> Self {
match self {
PropertyStorage::Map(map) => PropertyStorage::Map(map.clone()),
PropertyStorage::Compact { schema, values } => PropertyStorage::Compact {
schema: Arc::clone(schema),
values: values.clone(),
},
PropertyStorage::Columnar { store, row_id } => PropertyStorage::Columnar {
store: Arc::clone(store),
row_id: *row_id,
},
}
}
}
impl std::fmt::Debug for PropertyStorage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PropertyStorage::Map(map) => f.debug_tuple("Map").field(map).finish(),
PropertyStorage::Compact { values, .. } => {
f.debug_tuple("Compact").field(values).finish()
}
PropertyStorage::Columnar { row_id, .. } => {
f.debug_struct("Columnar").field("row_id", row_id).finish()
}
}
}
}
impl PartialEq for PropertyStorage {
fn eq(&self, other: &Self) -> bool {
fn collect_entries(ps: &PropertyStorage) -> Vec<(InternedKey, Value)> {
match ps {
PropertyStorage::Map(map) => {
let mut entries: Vec<_> = map.iter().map(|(&k, v)| (k, v.clone())).collect();
entries.sort_by_key(|(k, _)| k.as_u64());
entries
}
PropertyStorage::Compact { schema, values } => {
let mut entries: Vec<_> = schema
.slots
.iter()
.enumerate()
.filter_map(|(i, &ik)| {
values.get(i).and_then(|v| {
if matches!(v, Value::Null) {
None
} else {
Some((ik, v.clone()))
}
})
})
.collect();
entries.sort_by_key(|(k, _)| k.as_u64());
entries
}
PropertyStorage::Columnar { store, row_id } => {
let mut entries: Vec<_> = store.row_properties(*row_id);
entries.sort_by_key(|(k, _)| k.as_u64());
entries
}
}
}
collect_entries(self) == collect_entries(other)
}
}
impl Serialize for PropertyStorage {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeMap;
if STRIP_PROPERTIES.with(|cell| cell.get()) {
return serializer.serialize_map(Some(0))?.end();
}
match self {
PropertyStorage::Map(map) => map.serialize(serializer),
PropertyStorage::Compact { schema, values } => {
let count = values.iter().filter(|v| !matches!(v, Value::Null)).count();
let mut map_ser = serializer.serialize_map(Some(count))?;
for (i, ik) in schema.slots.iter().enumerate() {
if let Some(v) = values.get(i) {
if !matches!(v, Value::Null) {
map_ser.serialize_entry(ik, v)?;
}
}
}
map_ser.end()
}
PropertyStorage::Columnar { store, row_id } => {
let props = store.row_properties(*row_id);
let mut map_ser = serializer.serialize_map(Some(props.len()))?;
for (ik, v) in &props {
map_ser.serialize_entry(ik, v)?;
}
map_ser.end()
}
}
}
}
impl<'de> Deserialize<'de> for PropertyStorage {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let map = HashMap::<InternedKey, Value>::deserialize(deserializer)?;
Ok(PropertyStorage::Map(map))
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
pub struct SpatialConfig {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub location: Option<(String, String)>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub geometry: Option<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub points: HashMap<String, (String, String)>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub shapes: HashMap<String, String>,
}
pub type SpatialColumnParseResult = (Option<SpatialConfig>, Vec<(String, String)>);
pub fn parse_spatial_column_types_from_pairs(
pairs: Vec<(String, String)>,
) -> Result<SpatialColumnParseResult, String> {
let mut cleaned: Vec<(String, String)> = Vec::with_capacity(pairs.len());
let mut config = SpatialConfig::default();
let mut has_spatial = false;
let mut location_lat: Option<String> = None;
let mut location_lon: Option<String> = None;
let mut point_lats: HashMap<String, String> = HashMap::new();
let mut point_lons: HashMap<String, String> = HashMap::new();
for (col_name, type_str) in pairs {
let type_lower = type_str.to_lowercase();
match type_lower.as_str() {
"location.lat" => {
location_lat = Some(col_name.clone());
cleaned.push((col_name, "float".to_string()));
has_spatial = true;
}
"location.lon" => {
location_lon = Some(col_name.clone());
cleaned.push((col_name, "float".to_string()));
has_spatial = true;
}
"geometry" => {
config.geometry = Some(col_name.clone());
cleaned.push((col_name, "str".to_string()));
has_spatial = true;
}
_ if type_lower.starts_with("point.") => {
let parts: Vec<&str> = type_lower.splitn(3, '.').collect();
if parts.len() == 3 {
let name = parts[1].to_string();
match parts[2] {
"lat" => {
point_lats.insert(name, col_name.clone());
}
"lon" => {
point_lons.insert(name, col_name.clone());
}
_ => {
return Err(format!(
"Invalid spatial type '{}' for column '{}'. \
Expected 'point.<name>.lat' or 'point.<name>.lon'.",
type_str, col_name
));
}
}
cleaned.push((col_name, "float".to_string()));
has_spatial = true;
} else {
return Err(format!(
"Invalid spatial type '{}' for column '{}'. \
Expected 'point.<name>.lat' or 'point.<name>.lon'.",
type_str, col_name
));
}
}
_ if type_lower.starts_with("shape.") => {
let parts: Vec<&str> = type_lower.splitn(2, '.').collect();
if parts.len() == 2 {
let name = parts[1].to_string();
config.shapes.insert(name, col_name.clone());
cleaned.push((col_name, "str".to_string()));
has_spatial = true;
} else {
return Err(format!(
"Invalid spatial type '{}' for column '{}'.",
type_str, col_name
));
}
}
_ => {
cleaned.push((col_name, type_str));
}
}
}
if !has_spatial {
return Ok((None, cleaned));
}
match (location_lat, location_lon) {
(Some(lat), Some(lon)) => config.location = Some((lat, lon)),
(Some(_), None) | (None, Some(_)) => {
return Err(
"Incomplete location: both 'location.lat' and 'location.lon' must be specified."
.to_string(),
);
}
(None, None) => {}
}
let all_point_names: std::collections::HashSet<&String> =
point_lats.keys().chain(point_lons.keys()).collect();
for name in all_point_names {
match (point_lats.get(name), point_lons.get(name)) {
(Some(lat), Some(lon)) => {
config
.points
.insert(name.clone(), (lat.clone(), lon.clone()));
}
_ => {
return Err(format!(
"Incomplete point '{}': both 'point.{}.lat' and 'point.{}.lon' must be specified.",
name, name, name
));
}
}
}
Ok((Some(config), cleaned))
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
pub struct TemporalConfig {
pub valid_from: String,
pub valid_to: String,
}
pub type TemporalColumnParseResult = (Option<TemporalConfig>, Vec<(String, String)>);
pub fn parse_temporal_column_types_from_pairs(
pairs: Vec<(String, String)>,
) -> Result<TemporalColumnParseResult, String> {
let mut cleaned: Vec<(String, String)> = Vec::with_capacity(pairs.len());
let mut valid_from_col: Option<String> = None;
let mut valid_to_col: Option<String> = None;
for (col_name, type_str) in pairs {
let type_lower = type_str.to_lowercase();
match type_lower.as_str() {
"validfrom" => {
valid_from_col = Some(col_name.clone());
cleaned.push((col_name, "datetime".to_string()));
}
"validto" => {
valid_to_col = Some(col_name.clone());
cleaned.push((col_name, "datetime".to_string()));
}
_ => {
cleaned.push((col_name, type_str));
}
}
}
match (valid_from_col, valid_to_col) {
(Some(from), Some(to)) => Ok((
Some(TemporalConfig {
valid_from: from,
valid_to: to,
}),
cleaned,
)),
(Some(_), None) | (None, Some(_)) => Err(
"Incomplete temporal config: both 'validFrom' and 'validTo' column types must be specified."
.to_string(),
),
(None, None) => Ok((None, cleaned)),
}
}
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub enum TypeIdIndex {
Integer(HashMap<u32, NodeIndex>),
General(HashMap<Value, NodeIndex>),
}
#[inline]
fn strip_prefix_to_u32(s: &str) -> Option<u32> {
let digit_start = s.bytes().position(|b| b.is_ascii_digit())?;
if digit_start == 0 {
return None;
}
let prefix = &s.as_bytes()[..digit_start];
if !prefix.iter().all(|b| b.is_ascii_alphabetic()) {
return None;
}
s[digit_start..].parse::<u32>().ok()
}
impl TypeIdIndex {
pub fn get(&self, id: &Value) -> Option<NodeIndex> {
match self {
TypeIdIndex::Integer(map) => match id {
Value::UniqueId(u) => map.get(u).copied(),
Value::Int64(i) => {
if *i >= 0 && *i <= u32::MAX as i64 {
map.get(&(*i as u32)).copied()
} else {
None
}
}
Value::Float64(f) => {
if f.fract() == 0.0 {
let i = *f as i64;
if i >= 0 && i <= u32::MAX as i64 {
map.get(&(i as u32)).copied()
} else {
None
}
} else {
None
}
}
Value::String(s) => strip_prefix_to_u32(s).and_then(|u| map.get(&u).copied()),
_ => None,
},
TypeIdIndex::General(map) => {
if let Some(&idx) = map.get(id) {
return Some(idx);
}
match id {
Value::Int64(i) => {
if *i >= 0 && *i <= u32::MAX as i64 {
map.get(&Value::UniqueId(*i as u32)).copied()
} else {
None
}
}
Value::UniqueId(u) => map.get(&Value::Int64(*u as i64)).copied(),
Value::Float64(f) => {
if f.fract() == 0.0 {
let i = *f as i64;
if let Some(&idx) = map.get(&Value::Int64(i)) {
return Some(idx);
}
if i >= 0 && i <= u32::MAX as i64 {
return map.get(&Value::UniqueId(i as u32)).copied();
}
}
None
}
Value::String(s) => strip_prefix_to_u32(s).and_then(|u| {
map.get(&Value::UniqueId(u))
.or_else(|| map.get(&Value::Int64(u as i64)))
.copied()
}),
_ => None,
}
}
}
}
pub fn insert(&mut self, id: Value, idx: NodeIndex) {
match self {
TypeIdIndex::Integer(map) => {
if let Value::UniqueId(u) = id {
map.insert(u, idx);
} else {
let mut general: HashMap<Value, NodeIndex> =
map.drain().map(|(k, v)| (Value::UniqueId(k), v)).collect();
general.insert(id, idx);
*self = TypeIdIndex::General(general);
}
}
TypeIdIndex::General(map) => {
map.insert(id, idx);
}
}
}
pub fn iter(&self) -> Box<dyn Iterator<Item = (Value, NodeIndex)> + '_> {
match self {
TypeIdIndex::Integer(map) => {
Box::new(map.iter().map(|(&k, &v)| (Value::UniqueId(k), v)))
}
TypeIdIndex::General(map) => Box::new(map.iter().map(|(k, &v)| (k.clone(), v))),
}
}
}
impl Default for TypeIdIndex {
fn default() -> Self {
TypeIdIndex::General(HashMap::new())
}
}
#[derive(Clone, Debug)]
pub struct NodeInfo {
pub id: Value,
pub title: Value,
pub node_type: String,
pub properties: HashMap<String, Value>,
}
#[derive(Clone, Debug)]
pub enum SelectionOperation {
Filter(HashMap<String, FilterCondition>),
Sort(Vec<(String, bool)>), Traverse {
connection_type: String,
direction: Option<String>,
max_nodes: Option<usize>,
},
Custom(String), }
#[derive(Clone, Debug)]
pub struct SelectionLevel {
pub selections: HashMap<Option<NodeIndex>, Vec<NodeIndex>>, pub operations: Vec<SelectionOperation>,
}
impl SelectionLevel {
pub fn new() -> Self {
SelectionLevel {
selections: HashMap::new(),
operations: Vec::new(),
}
}
pub fn add_selection(&mut self, parent: Option<NodeIndex>, children: Vec<NodeIndex>) {
self.selections.insert(parent, children);
}
pub fn get_all_nodes(&self) -> Vec<NodeIndex> {
self.selections
.values()
.flat_map(|children| children.iter().copied())
.collect()
}
pub fn is_empty(&self) -> bool {
self.selections.is_empty()
}
pub fn iter_groups(&self) -> impl Iterator<Item = (&Option<NodeIndex>, &Vec<NodeIndex>)> {
self.selections.iter()
}
pub fn iter_node_indices(&self) -> impl Iterator<Item = NodeIndex> + '_ {
self.selections
.values()
.flat_map(|children| children.iter().copied())
}
pub fn node_count(&self) -> usize {
self.selections.values().map(|v| v.len()).sum()
}
}
#[derive(Clone, Debug)]
pub struct PlanStep {
pub operation: String,
pub node_type: Option<String>,
pub estimated_rows: usize,
pub actual_rows: Option<usize>,
}
impl PlanStep {
pub fn new(operation: &str, node_type: Option<&str>, estimated_rows: usize) -> Self {
PlanStep {
operation: operation.to_string(),
node_type: node_type.map(|s| s.to_string()),
estimated_rows,
actual_rows: None,
}
}
pub fn with_actual_rows(mut self, actual: usize) -> Self {
self.actual_rows = Some(actual);
self
}
}
#[derive(Clone, Default)]
pub struct CurrentSelection {
levels: Vec<SelectionLevel>,
current_level: usize,
execution_plan: Vec<PlanStep>,
}
impl CurrentSelection {
pub fn new() -> Self {
let mut selection = CurrentSelection {
levels: Vec::new(),
current_level: 0,
execution_plan: Vec::new(),
};
selection.add_level(); selection
}
pub fn add_level(&mut self) {
self.levels.push(SelectionLevel::new());
self.current_level = self.levels.len() - 1;
}
pub fn clear(&mut self) {
self.levels.clear();
self.current_level = 0;
self.execution_plan.clear();
self.add_level(); }
pub fn add_plan_step(&mut self, step: PlanStep) {
self.execution_plan.push(step);
}
pub fn get_execution_plan(&self) -> &[PlanStep] {
&self.execution_plan
}
pub fn clear_execution_plan(&mut self) {
self.execution_plan.clear();
}
pub fn get_level_count(&self) -> usize {
self.levels.len()
}
pub fn get_level(&self, index: usize) -> Option<&SelectionLevel> {
self.levels.get(index)
}
pub fn get_level_mut(&mut self, index: usize) -> Option<&mut SelectionLevel> {
self.levels.get_mut(index)
}
pub fn current_node_count(&self) -> usize {
self.levels.last().map(|l| l.node_count()).unwrap_or(0)
}
pub fn has_active_selection(&self) -> bool {
self.levels
.last()
.map(|l| !l.operations.is_empty())
.unwrap_or(false)
}
pub fn current_node_indices(&self) -> impl Iterator<Item = NodeIndex> + '_ {
self.levels
.last()
.into_iter()
.flat_map(|l| l.iter_node_indices())
}
pub fn first_node_type(&self, graph: &DirGraph) -> Option<String> {
self.current_node_indices()
.next()
.and_then(|idx| graph.graph.node_weight(idx))
.map(|node| node.node_type_str(&graph.interner).to_string())
}
}
#[derive(Clone, Default)]
pub struct CowSelection {
inner: Arc<CurrentSelection>,
}
impl CowSelection {
pub fn new() -> Self {
CowSelection {
inner: Arc::new(CurrentSelection::new()),
}
}
}
impl std::ops::Deref for CowSelection {
type Target = CurrentSelection;
#[inline]
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl std::ops::DerefMut for CowSelection {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
Arc::make_mut(&mut self.inner)
}
}
pub type IndexKey = (String, String);
pub type CompositeIndexKey = (String, Vec<String>);
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CompositeValue(pub Vec<Value>);
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SaveMetadata {
pub format_version: u32,
pub library_version: String,
}
impl SaveMetadata {
pub fn current() -> Self {
SaveMetadata {
format_version: 2,
library_version: env!("CARGO_PKG_VERSION").to_string(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConnectivityTriple {
pub src: String,
pub conn: String,
pub tgt: String,
pub count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct ConnectionTypeInfo {
pub source_types: HashSet<String>,
pub target_types: HashSet<String>,
pub property_types: HashMap<String, String>,
}
impl Serialize for ConnectionTypeInfo {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeStruct;
let mut sorted_sources: Vec<&String> = self.source_types.iter().collect();
sorted_sources.sort();
let mut sorted_targets: Vec<&String> = self.target_types.iter().collect();
sorted_targets.sort();
let mut sorted_props: Vec<(&String, &String)> = self.property_types.iter().collect();
sorted_props.sort_by(|a, b| a.0.cmp(b.0));
let property_types: std::collections::BTreeMap<&String, &String> =
sorted_props.into_iter().collect();
let mut state = serializer.serialize_struct("ConnectionTypeInfo", 3)?;
state.serialize_field("source_types", &sorted_sources)?;
state.serialize_field("target_types", &sorted_targets)?;
state.serialize_field("property_types", &property_types)?;
state.end()
}
}
impl<'de> Deserialize<'de> for ConnectionTypeInfo {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
struct Legacy {
source_type: Option<String>,
target_type: Option<String>,
#[serde(default)]
source_types: Option<HashSet<String>>,
#[serde(default)]
target_types: Option<HashSet<String>>,
#[serde(default)]
property_types: HashMap<String, String>,
}
let legacy = Legacy::deserialize(deserializer)?;
let source_types = legacy.source_types.unwrap_or_else(|| {
legacy
.source_type
.map(|s| HashSet::from([s]))
.unwrap_or_default()
});
let target_types = legacy.target_types.unwrap_or_else(|| {
legacy
.target_type
.map(|s| HashSet::from([s]))
.unwrap_or_default()
});
Ok(ConnectionTypeInfo {
source_types,
target_types,
property_types: legacy.property_types,
})
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct EmbeddingStore {
pub dimension: usize,
pub data: Vec<f32>,
pub node_to_slot: HashMap<usize, usize>,
pub slot_to_node: Vec<usize>,
#[serde(default)]
pub metric: Option<String>,
}
impl EmbeddingStore {
pub fn new(dimension: usize) -> Self {
EmbeddingStore {
dimension,
data: Vec::new(),
node_to_slot: HashMap::new(),
slot_to_node: Vec::new(),
metric: None,
}
}
pub fn with_metric(dimension: usize, metric: &str) -> Self {
EmbeddingStore {
dimension,
data: Vec::new(),
node_to_slot: HashMap::new(),
slot_to_node: Vec::new(),
metric: Some(metric.to_string()),
}
}
pub fn set_embedding(&mut self, node_index: usize, embedding: &[f32]) -> usize {
if let Some(&slot) = self.node_to_slot.get(&node_index) {
let start = slot * self.dimension;
self.data[start..start + self.dimension].copy_from_slice(embedding);
slot
} else {
let slot = self.slot_to_node.len();
self.node_to_slot.insert(node_index, slot);
self.slot_to_node.push(node_index);
self.data.extend_from_slice(embedding);
slot
}
}
#[inline]
pub fn get_embedding(&self, node_index: usize) -> Option<&[f32]> {
self.node_to_slot.get(&node_index).map(|&slot| {
let start = slot * self.dimension;
&self.data[start..start + self.dimension]
})
}
#[inline]
pub fn len(&self) -> usize {
self.slot_to_node.len()
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NodeData {
pub id: Value,
pub title: Value,
pub node_type: InternedKey,
pub(crate) properties: PropertyStorage,
}
impl NodeData {
pub fn new(
id: Value,
title: Value,
node_type: String,
properties: HashMap<String, Value>,
interner: &mut StringInterner,
) -> Self {
let type_key = interner.get_or_intern(&node_type);
let interned_props = properties
.into_iter()
.map(|(k, v)| {
let key = interner.get_or_intern(&k);
(key, v)
})
.collect();
NodeData {
id,
title,
node_type: type_key,
properties: PropertyStorage::Map(interned_props),
}
}
pub fn new_compact(
id: Value,
title: Value,
node_type: String,
properties: HashMap<String, Value>,
interner: &mut StringInterner,
schema: &Arc<TypeSchema>,
) -> Self {
let type_key = interner.get_or_intern(&node_type);
let pairs = properties.into_iter().map(|(k, v)| {
let key = interner.get_or_intern(&k);
(key, v)
});
NodeData {
id,
title,
node_type: type_key,
properties: PropertyStorage::from_compact(pairs, schema),
}
}
pub fn new_compact_preinterned(
id: Value,
title: Value,
node_type: InternedKey,
properties: Vec<(InternedKey, Value)>,
schema: &Arc<TypeSchema>,
) -> Self {
NodeData {
id,
title,
node_type,
properties: PropertyStorage::from_compact(properties, schema),
}
}
pub fn new_preinterned(
id: Value,
title: Value,
node_type: InternedKey,
properties: Vec<(InternedKey, Value)>,
) -> Self {
let map: HashMap<InternedKey, Value> = properties.into_iter().collect();
NodeData {
id,
title,
node_type,
properties: PropertyStorage::Map(map),
}
}
#[inline]
pub fn id(&self) -> Cow<'_, Value> {
if matches!(self.id, Value::Null) {
if let PropertyStorage::Columnar { store, row_id } = &self.properties {
if let Some(v) = store.get_id(*row_id) {
return Cow::Owned(v);
}
}
}
Cow::Borrowed(&self.id)
}
#[inline]
pub fn title(&self) -> Cow<'_, Value> {
if matches!(self.title, Value::Null) {
if let PropertyStorage::Columnar { store, row_id } = &self.properties {
if let Some(v) = store.get_title(*row_id) {
return Cow::Owned(v);
}
}
}
Cow::Borrowed(&self.title)
}
#[inline]
pub fn node_type_str<'a>(&self, interner: &'a StringInterner) -> &'a str {
interner.resolve(self.node_type)
}
#[inline]
pub fn get_field_ref(&self, field: &str) -> Option<Cow<'_, Value>> {
match field {
"id" => Some(self.id()),
"title" => Some(self.title()),
_ => self.properties.get(InternedKey::from_str(field)),
}
}
pub fn field_contains_ci(&self, field: &str, needle_lower: &str) -> bool {
self.get_field_ref(field)
.and_then(|v| match &*v {
Value::String(s) => Some(s.to_lowercase().contains(needle_lower)),
_ => None,
})
.unwrap_or(false)
}
pub fn field_starts_with_ci(&self, field: &str, prefix_lower: &str) -> bool {
self.get_field_ref(field)
.and_then(|v| match &*v {
Value::String(s) => Some(s.to_lowercase().starts_with(prefix_lower)),
_ => None,
})
.unwrap_or(false)
}
#[inline]
pub fn get_property(&self, key: &str) -> Option<Cow<'_, Value>> {
self.properties.get(InternedKey::from_str(key))
}
#[inline]
pub fn get_property_value(&self, key: &str) -> Option<Value> {
self.properties.get_value(InternedKey::from_str(key))
}
#[inline]
pub fn property_keys<'a>(
&'a self,
interner: &'a StringInterner,
) -> impl Iterator<Item = &'a str> + 'a {
self.properties.keys(interner)
}
#[inline]
pub fn property_iter<'a>(
&'a self,
interner: &'a StringInterner,
) -> impl Iterator<Item = (&'a str, &'a Value)> + 'a {
self.properties.iter(interner)
}
#[inline]
pub fn property_count(&self) -> usize {
self.properties.len()
}
#[inline]
pub fn has_property(&self, key: &str) -> bool {
self.properties.contains(InternedKey::from_str(key))
}
#[inline]
pub fn properties_cloned(&self, interner: &StringInterner) -> HashMap<String, Value> {
match &self.properties {
PropertyStorage::Columnar { .. } => {
self.properties.iter_owned(interner).into_iter().collect()
}
_ => self
.properties
.iter(interner)
.map(|(k, v)| (k.to_string(), v.clone()))
.collect(),
}
}
#[inline]
pub fn get_node_type_ref<'a>(&self, interner: &'a StringInterner) -> &'a str {
interner.resolve(self.node_type)
}
pub fn to_node_info(&self, interner: &StringInterner) -> NodeInfo {
NodeInfo {
id: self.id().into_owned(),
title: self.title().into_owned(),
node_type: self.node_type_str(interner).to_string(),
properties: self.properties_cloned(interner),
}
}
#[inline]
pub fn set_property(&mut self, key: &str, value: Value, interner: &mut StringInterner) {
let interned = interner.get_or_intern(key);
self.properties.insert(interned, value);
}
#[inline]
pub fn remove_property(&mut self, key: &str) -> Option<Value> {
self.properties.remove(InternedKey::from_str(key))
}
#[inline]
pub fn clear_property(&mut self, key: &str) -> Option<Value> {
let interned = InternedKey::from_str(key);
let prior = self.properties.remove(interned);
self.properties.insert(interned, Value::Null);
prior
}
}
pub struct EdgeData {
pub connection_type: InternedKey,
pub properties: Vec<(InternedKey, Value)>,
}
impl Serialize for EdgeData {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeStruct;
let mut s = serializer.serialize_struct("EdgeData", 2)?;
s.serialize_field("connection_type", &self.connection_type)?;
let props_map: HashMap<&InternedKey, &Value> =
self.properties.iter().map(|(k, v)| (k, v)).collect();
s.serialize_field("properties", &props_map)?;
s.end()
}
}
impl<'de> Deserialize<'de> for EdgeData {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
#[derive(Deserialize)]
struct EdgeDataHelper {
connection_type: InternedKey,
#[serde(default)]
properties: HashMap<InternedKey, Value>,
}
let helper = EdgeDataHelper::deserialize(deserializer)?;
Ok(EdgeData {
connection_type: helper.connection_type,
properties: helper.properties.into_iter().collect(),
})
}
}
impl std::fmt::Debug for EdgeData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("EdgeData")
.field("connection_type", &self.connection_type)
.field("properties", &self.properties)
.finish()
}
}
impl Clone for EdgeData {
fn clone(&self) -> Self {
EdgeData {
connection_type: self.connection_type,
properties: self.properties.clone(),
}
}
}
impl EdgeData {
pub fn new(
connection_type: String,
properties: HashMap<String, Value>,
interner: &mut StringInterner,
) -> Self {
let ct_key = interner.get_or_intern(&connection_type);
let interned_props: Vec<(InternedKey, Value)> = properties
.into_iter()
.map(|(k, v)| {
let key = interner.get_or_intern(&k);
(key, v)
})
.collect();
EdgeData {
connection_type: ct_key,
properties: interned_props,
}
}
pub fn new_interned(
connection_type: InternedKey,
properties: Vec<(InternedKey, Value)>,
) -> Self {
EdgeData {
connection_type,
properties,
}
}
#[inline]
pub fn connection_type_str<'a>(&self, interner: &'a StringInterner) -> &'a str {
interner.resolve(self.connection_type)
}
#[inline]
pub fn get_property(&self, key: &str) -> Option<&Value> {
let ik = InternedKey::from_str(key);
self.properties
.iter()
.find(|(k, _)| *k == ik)
.map(|(_, v)| v)
}
#[inline]
pub fn property_keys<'a>(
&'a self,
interner: &'a StringInterner,
) -> impl Iterator<Item = &'a str> {
self.properties
.iter()
.map(move |(k, _)| interner.resolve(*k))
}
#[inline]
pub fn property_iter<'a>(
&'a self,
interner: &'a StringInterner,
) -> impl Iterator<Item = (&'a str, &'a Value)> {
self.properties
.iter()
.map(move |(k, v)| (interner.resolve(*k), v))
}
#[inline]
pub fn property_count(&self) -> usize {
self.properties.len()
}
#[inline]
pub fn properties_cloned(&self, interner: &StringInterner) -> HashMap<String, Value> {
self.properties
.iter()
.map(|(k, v)| (interner.resolve(*k).to_string(), v.clone()))
.collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct NodeSchemaDefinition {
pub required_fields: Vec<String>,
pub optional_fields: Vec<String>,
pub field_types: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConnectionSchemaDefinition {
pub source_type: String,
pub target_type: String,
pub cardinality: Option<String>,
pub required_properties: Vec<String>,
pub property_types: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SchemaDefinition {
pub node_schemas: HashMap<String, NodeSchemaDefinition>,
pub connection_schemas: HashMap<String, ConnectionSchemaDefinition>,
}
impl SchemaDefinition {
pub fn new() -> Self {
SchemaDefinition {
node_schemas: HashMap::new(),
connection_schemas: HashMap::new(),
}
}
pub fn add_node_schema(&mut self, node_type: String, schema: NodeSchemaDefinition) {
self.node_schemas.insert(node_type, schema);
}
pub fn add_connection_schema(
&mut self,
connection_type: String,
schema: ConnectionSchemaDefinition,
) {
self.connection_schemas.insert(connection_type, schema);
}
}
#[derive(Debug, Clone)]
pub enum ValidationError {
MissingRequiredField {
node_type: String,
node_title: String,
field: String,
},
TypeMismatch {
node_type: String,
node_title: String,
field: String,
expected_type: String,
actual_type: String,
},
InvalidConnectionEndpoint {
connection_type: String,
expected_source: String,
expected_target: String,
actual_source: String,
actual_target: String,
},
MissingConnectionProperty {
connection_type: String,
source_title: String,
target_title: String,
property: String,
},
UndefinedNodeType { node_type: String, count: usize },
UndefinedConnectionType {
connection_type: String,
count: usize,
},
}
impl std::fmt::Display for ValidationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ValidationError::MissingRequiredField {
node_type,
node_title,
field,
} => {
write!(
f,
"Missing required field '{}' on {} node '{}'",
field, node_type, node_title
)
}
ValidationError::TypeMismatch {
node_type,
node_title,
field,
expected_type,
actual_type,
} => {
write!(
f,
"Type mismatch on {} node '{}': field '{}' expected {}, got {}",
node_type, node_title, field, expected_type, actual_type
)
}
ValidationError::InvalidConnectionEndpoint {
connection_type,
expected_source,
expected_target,
actual_source,
actual_target,
} => {
write!(
f,
"Invalid connection '{}': expected {}->{} but found {}->{}",
connection_type, expected_source, expected_target, actual_source, actual_target
)
}
ValidationError::MissingConnectionProperty {
connection_type,
source_title,
target_title,
property,
} => {
write!(
f,
"Missing required property '{}' on {} connection from '{}' to '{}'",
property, connection_type, source_title, target_title
)
}
ValidationError::UndefinedNodeType { node_type, count } => {
write!(
f,
"Node type '{}' ({} nodes) exists in graph but not defined in schema",
node_type, count
)
}
ValidationError::UndefinedConnectionType {
connection_type,
count,
} => {
write!(f, "Connection type '{}' ({} connections) exists in graph but not defined in schema", connection_type, count)
}
}
}
}
#[cfg(test)]
mod strip_prefix_tests {
use super::strip_prefix_to_u32;
#[test]
fn strips_standard_prefixes() {
assert_eq!(strip_prefix_to_u32("Q76"), Some(76));
assert_eq!(strip_prefix_to_u32("P31"), Some(31));
assert_eq!(strip_prefix_to_u32("E5"), Some(5));
assert_eq!(strip_prefix_to_u32("L0"), Some(0));
assert_eq!(strip_prefix_to_u32("NODE42"), Some(42));
}
#[test]
fn rejects_inputs_without_a_leading_alpha_prefix() {
assert_eq!(strip_prefix_to_u32("76"), None);
assert_eq!(strip_prefix_to_u32(""), None);
assert_eq!(strip_prefix_to_u32("-76"), None);
assert_eq!(strip_prefix_to_u32("Q 76"), None);
assert_eq!(strip_prefix_to_u32("Q"), None);
}
#[test]
fn rejects_overflow() {
assert_eq!(strip_prefix_to_u32("Q99999999999"), None);
}
}
#[cfg(test)]
mod maintenance_tests {
use super::*;
use crate::graph::storage::{GraphRead, GraphWrite};
fn make_test_graph(num_nodes: usize, num_edges: bool) -> DirGraph {
let mut g = DirGraph::new();
for i in 0..num_nodes {
let mut props = HashMap::new();
props.insert("age".to_string(), Value::Int64(20 + i as i64));
let node = NodeData::new(
Value::UniqueId(i as u32),
Value::String(format!("Person_{}", i)),
"Person".to_string(),
props,
&mut g.interner,
);
let idx = g.graph.add_node(node);
g.type_indices
.entry_or_default("Person".to_string())
.push(idx);
}
if num_edges {
for i in 0..(num_nodes.saturating_sub(1)) {
let src = NodeIndex::new(i);
let tgt = NodeIndex::new(i + 1);
g.graph.add_edge(
src,
tgt,
EdgeData::new("KNOWS".to_string(), HashMap::new(), &mut g.interner),
);
}
}
g
}
#[test]
fn test_graph_info_clean() {
let g = make_test_graph(5, true);
let info = g.graph_info();
assert_eq!(info.node_count, 5);
assert_eq!(info.node_capacity, 5);
assert_eq!(info.node_tombstones, 0);
assert_eq!(info.edge_count, 4);
assert_eq!(info.fragmentation_ratio, 0.0);
assert_eq!(info.type_count, 1);
}
#[test]
fn test_graph_info_after_deletion() {
let mut g = make_test_graph(5, false);
g.graph.remove_node(NodeIndex::new(2));
let info = g.graph_info();
assert_eq!(info.node_count, 4);
assert_eq!(info.node_capacity, 5); assert_eq!(info.node_tombstones, 1);
assert!(info.fragmentation_ratio > 0.19 && info.fragmentation_ratio < 0.21);
}
#[test]
fn test_graph_info_empty() {
let g = DirGraph::new();
let info = g.graph_info();
assert_eq!(info.node_count, 0);
assert_eq!(info.node_capacity, 0);
assert_eq!(info.fragmentation_ratio, 0.0);
}
#[test]
fn test_reindex_rebuilds_type_indices() {
let mut g = make_test_graph(5, false);
g.type_indices.clear();
assert!(g.type_indices.is_empty());
g.reindex();
assert_eq!(g.type_indices.len(), 1);
assert_eq!(g.type_indices.get("Person").unwrap().len(), 5);
}
#[test]
fn test_reindex_rebuilds_property_indices() {
let mut g = make_test_graph(5, false);
g.create_index("Person", "age");
assert!(g.has_index("Person", "age"));
g.property_indices
.get_mut(&("Person".to_string(), "age".to_string()))
.unwrap()
.clear();
g.reindex();
let stats = g.get_index_stats("Person", "age").unwrap();
assert_eq!(stats.unique_values, 5); assert_eq!(stats.total_entries, 5);
}
#[test]
fn test_reindex_rebuilds_composite_indices() {
let mut g = make_test_graph(5, false);
g.create_composite_index("Person", &["age"]);
assert!(g.has_composite_index("Person", &["age".to_string()]));
g.composite_indices.values_mut().for_each(|v| v.clear());
g.reindex();
let stats = g
.get_composite_index_stats("Person", &["age".to_string()])
.unwrap();
assert_eq!(stats.unique_values, 5);
}
#[test]
fn test_reindex_clears_id_indices() {
let mut g = make_test_graph(3, false);
g.build_id_index("Person");
assert!(g.id_indices.contains_key("Person"));
g.reindex();
assert!(g.id_indices.is_empty());
}
#[test]
fn test_reindex_after_deletion() {
let mut g = make_test_graph(5, false);
g.graph.remove_node(NodeIndex::new(2));
assert_eq!(g.type_indices.get("Person").unwrap().len(), 5);
g.reindex();
assert_eq!(g.type_indices.get("Person").unwrap().len(), 4);
assert!(!g
.type_indices
.get("Person")
.unwrap()
.contains(&NodeIndex::new(2)));
}
#[test]
fn test_vacuum_noop_when_clean() {
let mut g = make_test_graph(5, true);
let mapping = g.vacuum();
assert!(mapping.is_empty()); assert_eq!(g.graph.node_count(), 5);
assert_eq!(g.graph_info().node_tombstones, 0);
}
#[test]
fn test_vacuum_compacts_after_deletion() {
let mut g = make_test_graph(5, true);
g.graph.remove_node(NodeIndex::new(2));
assert_eq!(g.graph.node_count(), 4);
assert_eq!(g.graph_info().node_tombstones, 1);
let mapping = g.vacuum();
assert_eq!(g.graph.node_count(), 4);
assert_eq!(g.graph_info().node_tombstones, 0);
assert_eq!(g.graph_info().node_capacity, 4);
assert_eq!(mapping.len(), 4);
}
#[test]
fn test_vacuum_preserves_node_data() {
let mut g = make_test_graph(3, false);
g.graph.remove_node(NodeIndex::new(1));
let mapping = g.vacuum();
let mut titles: Vec<String> = Vec::new();
for idx in g.graph.node_indices() {
if let Some(node) = g.graph.node_weight(idx) {
if let Value::String(s) = &*node.title() {
titles.push(s.clone());
}
}
}
titles.sort();
assert_eq!(titles, vec!["Person_0", "Person_2"]);
assert_eq!(mapping.len(), 2);
}
#[test]
fn test_vacuum_preserves_edges() {
let mut g = make_test_graph(4, true);
g.graph.remove_node(NodeIndex::new(0));
let _mapping = g.vacuum();
assert_eq!(g.graph.edge_count(), 2);
assert_eq!(g.graph.node_count(), 3);
}
#[test]
fn test_vacuum_rebuilds_type_indices() {
let mut g = make_test_graph(5, false);
g.graph.remove_node(NodeIndex::new(2));
g.vacuum();
assert_eq!(g.type_indices.get("Person").unwrap().len(), 4);
for idx in g.type_indices.get("Person").unwrap().iter() {
assert!(g.graph.node_weight(idx).is_some());
}
}
#[test]
fn test_vacuum_rebuilds_property_indices() {
let mut g = make_test_graph(5, false);
g.create_index("Person", "age");
g.graph.remove_node(NodeIndex::new(2));
g.vacuum();
assert!(g.has_index("Person", "age"));
let stats = g.get_index_stats("Person", "age").unwrap();
assert_eq!(stats.total_entries, 4); }
#[test]
fn test_vacuum_heavy_fragmentation() {
let mut g = make_test_graph(100, false);
for i in (0..100).step_by(2) {
g.graph.remove_node(NodeIndex::new(i));
}
assert_eq!(g.graph.node_count(), 50);
let info = g.graph_info();
assert!(info.fragmentation_ratio > 0.49);
let mapping = g.vacuum();
assert_eq!(mapping.len(), 50);
assert_eq!(g.graph.node_count(), 50);
assert_eq!(g.graph_info().node_tombstones, 0);
assert_eq!(g.graph_info().fragmentation_ratio, 0.0);
}
#[test]
fn test_update_property_indices_for_add() {
let mut g = DirGraph::new();
let mut props = HashMap::new();
props.insert("city".to_string(), Value::String("Oslo".to_string()));
let n0 = g.graph.add_node(NodeData::new(
Value::Int64(1),
Value::String("Alice".to_string()),
"Person".to_string(),
props,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n0);
g.create_index("Person", "city");
let mut props2 = HashMap::new();
props2.insert("city".to_string(), Value::String("Bergen".to_string()));
let n1 = g.graph.add_node(NodeData::new(
Value::Int64(2),
Value::String("Bob".to_string()),
"Person".to_string(),
props2,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n1);
g.update_property_indices_for_add("Person", n1);
let oslo = g.lookup_by_index("Person", "city", &Value::String("Oslo".to_string()));
assert_eq!(oslo.unwrap().len(), 1);
let bergen = g.lookup_by_index("Person", "city", &Value::String("Bergen".to_string()));
let bergen = bergen.unwrap();
assert_eq!(bergen.len(), 1);
assert_eq!(bergen[0], n1);
}
#[test]
fn test_update_property_indices_for_set() {
let mut g = DirGraph::new();
let mut props = HashMap::new();
props.insert("city".to_string(), Value::String("Oslo".to_string()));
let n0 = g.graph.add_node(NodeData::new(
Value::Int64(1),
Value::String("Alice".to_string()),
"Person".to_string(),
props,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n0);
g.create_index("Person", "city");
let old_val = Value::String("Oslo".to_string());
let new_val = Value::String("Bergen".to_string());
if let Some(node) = g.graph.node_weight_mut(n0) {
node.set_property("city", new_val.clone(), &mut g.interner);
}
g.update_property_indices_for_set("Person", n0, "city", Some(&old_val), &new_val);
let oslo = g.lookup_by_index("Person", "city", &Value::String("Oslo".to_string()));
assert!(oslo.is_none() || oslo.unwrap().is_empty());
let bergen = g.lookup_by_index("Person", "city", &Value::String("Bergen".to_string()));
assert_eq!(bergen.unwrap(), vec![n0]);
}
#[test]
fn test_update_property_indices_for_remove() {
let mut g = DirGraph::new();
let mut props = HashMap::new();
props.insert("city".to_string(), Value::String("Oslo".to_string()));
let n0 = g.graph.add_node(NodeData::new(
Value::Int64(1),
Value::String("Alice".to_string()),
"Person".to_string(),
props,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n0);
g.create_index("Person", "city");
let old_val = Value::String("Oslo".to_string());
if let Some(node) = g.graph.node_weight_mut(n0) {
node.remove_property("city");
}
g.update_property_indices_for_remove("Person", n0, "city", &old_val);
let oslo = g.lookup_by_index("Person", "city", &Value::String("Oslo".to_string()));
assert!(oslo.is_none() || oslo.unwrap().is_empty());
}
#[test]
fn test_update_composite_index_on_property_change() {
let mut g = DirGraph::new();
let mut props = HashMap::new();
props.insert("city".to_string(), Value::String("Oslo".to_string()));
props.insert("age".to_string(), Value::Int64(30));
let n0 = g.graph.add_node(NodeData::new(
Value::Int64(1),
Value::String("Alice".to_string()),
"Person".to_string(),
props,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n0);
g.create_composite_index("Person", &["city", "age"]);
let key = (
"Person".to_string(),
vec!["city".to_string(), "age".to_string()],
);
assert!(g.composite_indices.get(&key).unwrap().len() == 1);
let old_val = Value::String("Oslo".to_string());
let new_val = Value::String("Bergen".to_string());
if let Some(node) = g.graph.node_weight_mut(n0) {
node.set_property("city", new_val.clone(), &mut g.interner);
}
g.update_property_indices_for_set("Person", n0, "city", Some(&old_val), &new_val);
let comp_map = g.composite_indices.get(&key).unwrap();
let old_comp = CompositeValue(vec![Value::String("Oslo".to_string()), Value::Int64(30)]);
let new_comp = CompositeValue(vec![Value::String("Bergen".to_string()), Value::Int64(30)]);
assert!(!comp_map.contains_key(&old_comp) || comp_map.get(&old_comp).unwrap().is_empty());
assert_eq!(comp_map.get(&new_comp).unwrap(), &vec![n0]);
}
#[test]
fn test_no_update_when_no_index_exists() {
let mut g = DirGraph::new();
let mut props = HashMap::new();
props.insert("city".to_string(), Value::String("Oslo".to_string()));
let n0 = g.graph.add_node(NodeData::new(
Value::Int64(1),
Value::String("Alice".to_string()),
"Person".to_string(),
props,
&mut g.interner,
));
g.type_indices
.entry_or_default("Person".to_string())
.push(n0);
g.update_property_indices_for_add("Person", n0);
g.update_property_indices_for_set(
"Person",
n0,
"city",
Some(&Value::String("Oslo".to_string())),
&Value::String("Bergen".to_string()),
);
g.update_property_indices_for_remove(
"Person",
n0,
"city",
&Value::String("Oslo".to_string()),
);
assert!(g.property_indices.is_empty());
}
#[test]
fn test_enable_columnar_preserves_properties() {
let mut g = make_test_graph(5, false);
let mut meta = HashMap::new();
meta.insert("age".to_string(), "int64".to_string());
g.node_type_metadata.insert("Person".to_string(), meta);
g.compact_properties();
let before: Vec<(Value, Value, i64)> = g
.type_indices
.get("Person")
.unwrap()
.iter()
.map(|idx| {
let n = g.graph.node_weight(idx).unwrap();
let age = n
.get_property("age")
.map(|c| match c.as_ref() {
Value::Int64(v) => *v,
_ => panic!("expected Int64"),
})
.unwrap();
(n.id().into_owned(), n.title().into_owned(), age)
})
.collect();
g.enable_columnar();
assert!(g.is_columnar());
let after: Vec<(Value, Value, i64)> = g
.type_indices
.get("Person")
.unwrap()
.iter()
.map(|idx| {
let n = g.graph.node_weight(idx).unwrap();
let age = n
.get_property("age")
.map(|c| match c.as_ref() {
Value::Int64(v) => *v,
_ => panic!("expected Int64"),
})
.unwrap();
(n.id().into_owned(), n.title().into_owned(), age)
})
.collect();
assert_eq!(before, after);
}
#[test]
fn test_columnar_roundtrip_via_disable() {
let mut g = make_test_graph(3, false);
let mut meta = HashMap::new();
meta.insert("age".to_string(), "int64".to_string());
g.node_type_metadata.insert("Person".to_string(), meta);
g.compact_properties();
g.enable_columnar();
assert!(g.is_columnar());
g.disable_columnar();
assert!(!g.is_columnar());
let idx = g.type_indices.get("Person").unwrap().get(0).unwrap();
let node = g.graph.node_weight(idx).unwrap();
assert!(matches!(node.properties, PropertyStorage::Compact { .. }));
assert!(node.get_property("age").is_some());
}
#[test]
fn test_columnar_set_property() {
let mut g = make_test_graph(2, false);
let mut meta = HashMap::new();
meta.insert("age".to_string(), "int64".to_string());
g.node_type_metadata.insert("Person".to_string(), meta);
g.compact_properties();
g.enable_columnar();
let idx = g.type_indices.get("Person").unwrap().get(0).unwrap();
let node = g.graph.node_weight_mut(idx).unwrap();
node.set_property("age", Value::Int64(99), &mut g.interner);
assert_eq!(
node.get_property("age").map(|c| c.into_owned()),
Some(Value::Int64(99))
);
}
#[test]
fn test_columnar_property_count_and_keys() {
let mut g = make_test_graph(2, false);
let mut meta = HashMap::new();
meta.insert("age".to_string(), "int64".to_string());
g.node_type_metadata.insert("Person".to_string(), meta);
g.compact_properties();
g.enable_columnar();
let idx = g.type_indices.get("Person").unwrap().get(0).unwrap();
let node = g.graph.node_weight(idx).unwrap();
assert_eq!(node.property_count(), 1); let keys: Vec<&str> = node.property_keys(&g.interner).collect();
assert_eq!(keys, vec!["age"]);
}
#[test]
fn test_columnar_serialize_roundtrip() {
let mut g = make_test_graph(3, false);
let mut meta = HashMap::new();
meta.insert("age".to_string(), "int64".to_string());
g.node_type_metadata.insert("Person".to_string(), meta);
g.compact_properties();
g.enable_columnar();
let serialized = {
let _guard = SerdeSerializeGuard::new(&g.interner);
bincode::serialize(&g.graph).unwrap()
};
let graph2: GraphBackend = {
let _guard = SerdeDeserializeGuard::new(&mut g.interner);
bincode::deserialize(&serialized).unwrap()
};
let node0 = graph2.node_weight(NodeIndex::new(0)).unwrap();
assert!(node0.get_property("age").is_some());
}
}