use std::{error::Error, fmt, string::String, sync::Arc, vec::Vec};
use arrow_array::{Array, ArrayRef, PrimitiveArray};
use arrow_schema::Field;
use oxgraph_snapshot::SectionViewError;
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
use crate::width::{
PropertyIndex, PropertySnapshotMetaWord, le_word, le_word_to_u32, le_word_to_usize,
};
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct LayerId<Id>(pub Id);
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct LayerName {
value: String,
}
impl LayerName {
pub fn try_new(value: &str) -> Result<Self, PropertyError> {
if value.is_empty() {
return Err(PropertyError::EmptyLayerName);
}
Ok(Self {
value: String::from(value),
})
}
#[must_use]
pub const fn as_str(&self) -> &str {
self.value.as_str()
}
}
impl fmt::Display for LayerName {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum IdFamily {
Element,
Relation,
Incidence,
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum LayerRole {
Weight,
Property,
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum MissingPolicy {
Null,
Default,
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum StorageMode {
Dense,
Sparse {
missing: MissingPolicy,
},
}
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub struct PropertyLayerDescriptor<Id, I>
where
I: PropertyIndex,
{
pub layer_id: LayerId<Id>,
pub name: LayerName,
pub id_family: IdFamily,
pub role: LayerRole,
pub storage: StorageMode,
pub arrow_field: Field,
index_width: core::marker::PhantomData<I>,
}
impl<Id, I> PropertyLayerDescriptor<Id, I>
where
I: PropertyIndex,
{
#[expect(
clippy::too_many_arguments,
reason = "descriptor constructor mirrors the six-field descriptor contract"
)]
pub fn try_new(
layer_id: LayerId<Id>,
name: &str,
id_family: IdFamily,
role: LayerRole,
storage: StorageMode,
arrow_field: Field,
) -> Result<Self, PropertyError> {
Ok(Self {
layer_id,
name: LayerName::try_new(name)?,
id_family,
role,
storage,
arrow_field,
index_width: core::marker::PhantomData,
})
}
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum PropertyError {
EmptyLayerName,
ExpectedDenseStorage {
name: LayerName,
},
ExpectedSparseStorage {
name: LayerName,
},
DefaultPolicyMismatch {
name: LayerName,
},
ArrowTypeMismatch {
name: LayerName,
},
IdFamilyMismatch {
expected: IdFamily,
actual: IdFamily,
},
LayerTooShort {
required: usize,
actual: usize,
},
UnexpectedNull {
index: usize,
},
SparseLengthMismatch {
indices: usize,
values: usize,
},
SparseIndexOrder {
position: usize,
},
SparseIndexOutOfBounds {
index: u64,
len: usize,
},
DuplicateName {
id_family: IdFamily,
name: LayerName,
},
SparseNullMissingNotTotal {
name: LayerName,
},
DuplicateLayerId {
layer_id: u64,
},
MissingSnapshotSection {
kind: u32,
},
SnapshotSectionVersion {
kind: u32,
version: u32,
},
SnapshotSectionView {
kind: u32,
error: SectionViewError,
},
SnapshotRangeOutOfBounds {
offset: usize,
len: usize,
available: usize,
},
SnapshotInvalidUtf8 {
offset: usize,
},
UnknownIdFamilyTag {
tag: u32,
},
UnknownLayerRoleTag {
tag: u32,
},
UnknownStorageTag {
tag: u32,
},
UnknownMissingPolicyTag {
tag: u32,
},
UnknownArrowFamilyTag {
tag: u32,
},
UnknownIdentityModeTag {
tag: u32,
},
SnapshotDescriptorMismatch {
reason: &'static str,
},
SnapshotDataLength {
reason: &'static str,
},
Arrow {
message: String,
},
MissingIdentityMap {
id_family: IdFamily,
},
IdentityMapLength {
id_family: IdFamily,
required: usize,
actual: usize,
},
LengthDoesNotFitU64 {
value: usize,
},
}
impl fmt::Display for PropertyError {
#[expect(
clippy::too_many_lines,
reason = "property validation has one display branch per concrete error variant"
)]
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::EmptyLayerName => formatter.write_str("property layer name is empty"),
Self::ExpectedDenseStorage { name } => {
write!(formatter, "property layer '{name}' is not dense")
}
Self::ExpectedSparseStorage { name } => {
write!(formatter, "property layer '{name}' is not sparse")
}
Self::DefaultPolicyMismatch { name } => {
write!(formatter, "property layer '{name}' default policy mismatch")
}
Self::ArrowTypeMismatch { name } => {
write!(formatter, "property layer '{name}' Arrow type mismatch")
}
Self::IdFamilyMismatch { expected, actual } => write!(
formatter,
"property ID family mismatch: expected {expected:?}, got {actual:?}"
),
Self::LayerTooShort { required, actual } => write!(
formatter,
"property layer too short: required {required}, got {actual}"
),
Self::UnexpectedNull { index } => write!(
formatter,
"property layer has unexpected null at index {index}"
),
Self::SparseLengthMismatch { indices, values } => write!(
formatter,
"sparse property length mismatch: {indices} indexes for {values} values"
),
Self::SparseIndexOrder { position } => write!(
formatter,
"sparse property indexes are not strictly increasing at position {position}"
),
Self::SparseIndexOutOfBounds { index, len } => write!(
formatter,
"sparse property index {index} is outside logical length {len}"
),
Self::DuplicateName { id_family, name } => write!(
formatter,
"duplicate property name '{name}' in {id_family:?} namespace"
),
Self::SparseNullMissingNotTotal { name } => write!(
formatter,
"sparse property layer '{name}' has null missing policy and is not total"
),
Self::DuplicateLayerId { layer_id } => {
write!(formatter, "duplicate property layer ID {layer_id:?}")
}
Self::MissingSnapshotSection { kind } => {
write!(formatter, "snapshot is missing section kind {kind:#x}")
}
Self::SnapshotSectionVersion { kind, version } => write!(
formatter,
"snapshot section {kind:#x} has unsupported version {version}"
),
Self::SnapshotSectionView { kind, error } => write!(
formatter,
"snapshot section {kind:#x} cannot be borrowed as expected records: {error}"
),
Self::SnapshotRangeOutOfBounds {
offset,
len,
available,
} => write!(
formatter,
"snapshot range {offset}..{} exceeds available {available} bytes",
offset.saturating_add(*len)
),
Self::SnapshotInvalidUtf8 { offset } => {
write!(
formatter,
"snapshot string at byte offset {offset} is not UTF-8"
)
}
Self::UnknownIdFamilyTag { tag } => {
write!(formatter, "unknown property ID-family tag {tag}")
}
Self::UnknownLayerRoleTag { tag } => {
write!(formatter, "unknown property layer-role tag {tag}")
}
Self::UnknownStorageTag { tag } => {
write!(formatter, "unknown property storage tag {tag}")
}
Self::UnknownMissingPolicyTag { tag } => {
write!(formatter, "unknown property missing-policy tag {tag}")
}
Self::UnknownArrowFamilyTag { tag } => {
write!(formatter, "unknown Arrow value-family tag {tag}")
}
Self::UnknownIdentityModeTag { tag } => {
write!(formatter, "unknown identity-map mode tag {tag}")
}
Self::SnapshotDescriptorMismatch { reason } => {
write!(formatter, "property snapshot descriptor mismatch: {reason}")
}
Self::SnapshotDataLength { reason } => {
write!(
formatter,
"property snapshot data length mismatch: {reason}"
)
}
Self::Arrow { message } => write!(formatter, "Arrow property error: {message}"),
Self::MissingIdentityMap { id_family } => {
write!(formatter, "missing explicit identity map for {id_family:?}")
}
Self::IdentityMapLength {
id_family,
required,
actual,
} => write!(
formatter,
"identity map for {id_family:?} has length {actual}, required {required}"
),
Self::LengthDoesNotFitU64 { value } => {
write!(formatter, "length {value} does not fit u64")
}
}
}
}
impl Error for PropertyError {}
#[non_exhaustive]
pub enum PropertyLayerData<I>
where
I: PropertyIndex,
{
Dense {
values: ArrayRef,
},
Sparse {
indices: Arc<PrimitiveArray<I::ArrowType>>,
values: ArrayRef,
default: Option<ArrayRef>,
},
}
impl<I> Clone for PropertyLayerData<I>
where
I: PropertyIndex,
{
fn clone(&self) -> Self {
match self {
Self::Dense { values } => Self::Dense {
values: Arc::clone(values),
},
Self::Sparse {
indices,
values,
default,
} => Self::Sparse {
indices: Arc::clone(indices),
values: Arc::clone(values),
default: default.clone(),
},
}
}
}
impl<I> fmt::Debug for PropertyLayerData<I>
where
I: PropertyIndex,
{
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Dense { values } => formatter
.debug_struct("Dense")
.field("len", &values.len())
.finish(),
Self::Sparse {
indices,
values,
default,
} => formatter
.debug_struct("Sparse")
.field("indices", &indices.len())
.field("values", &values.len())
.field("has_default", &default.is_some())
.finish(),
}
}
}
#[derive(Clone, Debug)]
#[must_use]
pub struct PropertyLayer<Id, I>
where
I: PropertyIndex,
{
descriptor: PropertyLayerDescriptor<Id, I>,
len: usize,
data: PropertyLayerData<I>,
}
impl<Id, I> PropertyLayer<Id, I>
where
I: PropertyIndex,
{
pub fn try_new_dense(
descriptor: PropertyLayerDescriptor<Id, I>,
values: ArrayRef,
) -> Result<Self, PropertyError> {
if descriptor.storage != StorageMode::Dense {
return Err(PropertyError::ExpectedDenseStorage {
name: descriptor.name,
});
}
ensure_arrow_type(&descriptor, values.as_ref())?;
if !descriptor.arrow_field.is_nullable() {
ensure_no_nulls(values.as_ref())?;
}
let len = values.len();
Ok(Self {
descriptor,
len,
data: PropertyLayerData::Dense { values },
})
}
pub fn try_new_sparse(
descriptor: PropertyLayerDescriptor<Id, I>,
len: usize,
indices: Arc<PrimitiveArray<I::ArrowType>>,
values: ArrayRef,
default: Option<ArrayRef>,
) -> Result<Self, PropertyError> {
let StorageMode::Sparse { missing } = descriptor.storage else {
return Err(PropertyError::ExpectedSparseStorage {
name: descriptor.name,
});
};
validate_default_policy(&descriptor, missing, default.as_ref())?;
ensure_arrow_type(&descriptor, values.as_ref())?;
if indices.len() != values.len() {
return Err(PropertyError::SparseLengthMismatch {
indices: indices.len(),
values: values.len(),
});
}
ensure_no_nulls(indices.as_ref())?;
if !descriptor.arrow_field.is_nullable() {
ensure_no_nulls(values.as_ref())?;
}
validate_sparse_indices::<I>(indices.as_ref(), len)?;
Ok(Self {
descriptor,
len,
data: PropertyLayerData::Sparse {
indices,
values,
default,
},
})
}
#[must_use]
pub const fn descriptor(&self) -> &PropertyLayerDescriptor<Id, I> {
&self.descriptor
}
#[must_use]
pub const fn data(&self) -> &PropertyLayerData<I> {
&self.data
}
#[must_use]
pub const fn len(&self) -> usize {
self.len
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.len == 0
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum IdentityMapMode {
LocalEqualsCanonical,
ExplicitMap,
}
impl IdentityMapMode {
const fn tag(self) -> u32 {
match self {
Self::LocalEqualsCanonical => 0,
Self::ExplicitMap => 1,
}
}
const fn from_tag(tag: u32) -> Option<Self> {
match tag {
0 => Some(Self::LocalEqualsCanonical),
1 => Some(Self::ExplicitMap),
_ => None,
}
}
}
#[derive(Clone, Copy, Debug, Eq, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)]
#[repr(C)]
pub struct IdentityModeRecord<W>
where
W: PropertySnapshotMetaWord,
{
id_family: W::LittleEndianWord,
mode: W::LittleEndianWord,
local_len: W::LittleEndianWord,
}
impl<W> IdentityModeRecord<W>
where
W: PropertySnapshotMetaWord,
{
pub fn local_equals_canonical(
id_family: IdFamily,
local_len: usize,
) -> Result<Self, PropertyError> {
Self::new(id_family, IdentityMapMode::LocalEqualsCanonical, local_len)
}
pub fn explicit_map(id_family: IdFamily, local_len: usize) -> Result<Self, PropertyError> {
Self::new(id_family, IdentityMapMode::ExplicitMap, local_len)
}
pub fn new(
id_family: IdFamily,
mode: IdentityMapMode,
local_len: usize,
) -> Result<Self, PropertyError> {
Ok(Self {
id_family: le_word::<W>(id_family_tag(id_family) as usize)?,
mode: le_word::<W>(mode.tag() as usize)?,
local_len: le_word::<W>(local_len)?,
})
}
pub fn id_family(&self) -> Result<IdFamily, PropertyError> {
id_family_from_tag(le_word_to_u32::<W>(self.id_family)?)
}
pub fn mode(&self) -> Result<IdentityMapMode, PropertyError> {
let tag = le_word_to_u32::<W>(self.mode)?;
IdentityMapMode::from_tag(tag).ok_or(PropertyError::UnknownIdentityModeTag { tag })
}
#[must_use]
pub fn local_len(&self) -> usize {
le_word_to_usize::<W>(self.local_len).unwrap_or(usize::MAX)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[must_use]
pub struct IdentitySnapshotSummary {
pub records: Vec<IdentityModeSummary>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct IdentityModeSummary {
pub id_family: IdFamily,
pub mode: IdentityMapMode,
pub local_len: usize,
}
pub(crate) const fn id_family_tag(id_family: IdFamily) -> u32 {
match id_family {
IdFamily::Element => 0,
IdFamily::Relation => 1,
IdFamily::Incidence => 2,
}
}
pub(crate) const fn id_family_from_tag(tag: u32) -> Result<IdFamily, PropertyError> {
match tag {
0 => Ok(IdFamily::Element),
1 => Ok(IdFamily::Relation),
2 => Ok(IdFamily::Incidence),
_ => Err(PropertyError::UnknownIdFamilyTag { tag }),
}
}
pub(crate) const fn layer_role_tag(role: LayerRole) -> u32 {
match role {
LayerRole::Weight => 0,
LayerRole::Property => 1,
}
}
pub(crate) const fn layer_role_from_tag(tag: u32) -> Result<LayerRole, PropertyError> {
match tag {
0 => Ok(LayerRole::Weight),
1 => Ok(LayerRole::Property),
_ => Err(PropertyError::UnknownLayerRoleTag { tag }),
}
}
pub(crate) const fn storage_tag(storage: StorageMode) -> u32 {
match storage {
StorageMode::Dense => 0,
StorageMode::Sparse { .. } => 1,
}
}
pub(crate) const fn missing_policy_tag(storage: StorageMode) -> u32 {
match storage {
StorageMode::Dense => 0,
StorageMode::Sparse {
missing: MissingPolicy::Null,
} => 1,
StorageMode::Sparse {
missing: MissingPolicy::Default,
} => 2,
}
}
pub(crate) const fn storage_from_tags(
storage: u32,
missing: u32,
) -> Result<StorageMode, PropertyError> {
match (storage, missing) {
(0, 0) => Ok(StorageMode::Dense),
(1, 1) => Ok(StorageMode::Sparse {
missing: MissingPolicy::Null,
}),
(1, 2) => Ok(StorageMode::Sparse {
missing: MissingPolicy::Default,
}),
(0, _) => Err(PropertyError::UnknownMissingPolicyTag { tag: missing }),
(_, _) => Err(PropertyError::UnknownStorageTag { tag: storage }),
}
}
pub(crate) fn ensure_arrow_type<Id, I>(
descriptor: &PropertyLayerDescriptor<Id, I>,
values: &dyn Array,
) -> Result<(), PropertyError>
where
I: PropertyIndex,
{
if descriptor.arrow_field.data_type() == values.data_type() {
Ok(())
} else {
Err(PropertyError::ArrowTypeMismatch {
name: descriptor.name.clone(),
})
}
}
fn validate_default_policy<Id, I>(
descriptor: &PropertyLayerDescriptor<Id, I>,
missing: MissingPolicy,
default: Option<&ArrayRef>,
) -> Result<(), PropertyError>
where
I: PropertyIndex,
{
match (missing, default) {
(MissingPolicy::Null, None) => Ok(()),
(MissingPolicy::Default, Some(array)) => {
ensure_arrow_type(descriptor, array.as_ref())?;
if array.len() == 1 && !array.is_null(0) {
Ok(())
} else {
Err(PropertyError::DefaultPolicyMismatch {
name: descriptor.name.clone(),
})
}
}
(MissingPolicy::Null | MissingPolicy::Default, _) => {
Err(PropertyError::DefaultPolicyMismatch {
name: descriptor.name.clone(),
})
}
}
}
pub(crate) fn ensure_no_nulls(array: &dyn Array) -> Result<(), PropertyError> {
for index in 0..array.len() {
if array.is_null(index) {
return Err(PropertyError::UnexpectedNull { index });
}
}
Ok(())
}
pub(crate) fn validate_sparse_indices<I>(
indices: &PrimitiveArray<I::ArrowType>,
len: usize,
) -> Result<(), PropertyError>
where
I: PropertyIndex,
{
let mut previous = None;
for position in 0..indices.len() {
let index = indices.value(position);
let Some(index_usize) = index.to_usize() else {
return Err(PropertyError::SparseIndexOutOfBounds {
index: index.to_u64(),
len,
});
};
if index_usize >= len {
return Err(PropertyError::SparseIndexOutOfBounds {
index: index.to_u64(),
len,
});
}
if let Some(prior) = previous
&& index <= prior
{
return Err(PropertyError::SparseIndexOrder { position });
}
previous = Some(index);
}
Ok(())
}
#[expect(
clippy::needless_pass_by_value,
reason = "Arrow result adapters hand over owned errors and this helper consumes them into messages"
)]
pub(crate) fn map_arrow_error(error: arrow_schema::ArrowError) -> PropertyError {
PropertyError::Arrow {
message: error.to_string(),
}
}