Skip to main content

reddb_server/storage/unified/
entity.rs

1//! Unified Entity Model
2//!
3//! Provides a single entity type that can represent table rows, graph nodes,
4//! graph edges, or vectors with seamless interoperability.
5
6use std::collections::HashMap;
7use std::fmt;
8use std::sync::Arc;
9
10use crate::storage::schema::Value;
11
12/// Unique identifier for any entity in the unified storage
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
14pub struct EntityId(pub u64);
15
16impl EntityId {
17    /// Create a new entity ID
18    pub fn new(id: u64) -> Self {
19        Self(id)
20    }
21
22    /// Get the raw ID value
23    pub fn raw(&self) -> u64 {
24        self.0
25    }
26}
27
28impl fmt::Display for EntityId {
29    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30        write!(f, "e{}", self.0)
31    }
32}
33
34impl From<u64> for EntityId {
35    fn from(id: u64) -> Self {
36        Self(id)
37    }
38}
39
40/// The kind of entity (what storage type it belongs to)
41#[derive(Debug, Clone, PartialEq, Eq, Hash)]
42pub enum EntityKind {
43    /// A row in a structured table (hot path — kept inline for cache performance)
44    TableRow { table: Arc<str>, row_id: u64 },
45    /// A node in the graph (boxed — saves ~56 bytes per entity for table rows)
46    GraphNode(Box<GraphNodeKind>),
47    /// An edge in the graph (boxed)
48    GraphEdge(Box<GraphEdgeKind>),
49    /// A vector in a collection
50    Vector { collection: String },
51    /// A time-series data point (boxed)
52    TimeSeriesPoint(Box<TimeSeriesPointKind>),
53    /// A queue message
54    QueueMessage { queue: String, position: u64 },
55}
56
57#[derive(Debug, Clone, PartialEq, Eq, Hash)]
58pub struct GraphNodeKind {
59    pub label: String,
60    pub node_type: String,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Hash)]
64pub struct GraphEdgeKind {
65    pub label: String,
66    pub from_node: String,
67    pub to_node: String,
68    pub weight: u32,
69}
70
71#[derive(Debug, Clone, PartialEq, Eq, Hash)]
72pub struct TimeSeriesPointKind {
73    pub series: String,
74    pub metric: String,
75}
76
77impl EntityKind {
78    /// Get the storage type as a string
79    pub fn storage_type(&self) -> &'static str {
80        match self {
81            Self::TableRow { .. } => "table",
82            Self::GraphNode(_) => "graph_node",
83            Self::GraphEdge(_) => "graph_edge",
84            Self::Vector { .. } => "vector",
85            Self::TimeSeriesPoint(_) => "timeseries",
86            Self::QueueMessage { .. } => "queue",
87        }
88    }
89
90    /// Get the collection/table name
91    pub fn collection(&self) -> &str {
92        match self {
93            Self::TableRow { table, .. } => table,
94            Self::GraphNode(n) => &n.label,
95            Self::GraphEdge(e) => &e.label,
96            Self::Vector { collection } => collection,
97            Self::TimeSeriesPoint(ts) => &ts.series,
98            Self::QueueMessage { queue, .. } => queue,
99        }
100    }
101}
102
103/// The actual data content of an entity
104#[derive(Debug, Clone)]
105pub enum EntityData {
106    /// Table row data
107    Row(RowData),
108    /// Graph node data
109    Node(NodeData),
110    /// Graph edge data
111    Edge(EdgeData),
112    /// Vector data
113    Vector(VectorData),
114    /// Time-series data point
115    TimeSeries(TimeSeriesData),
116    /// Queue message data
117    QueueMessage(QueueMessageData),
118}
119
120impl EntityData {
121    /// Check if this is row data
122    pub fn is_row(&self) -> bool {
123        matches!(self, Self::Row(_))
124    }
125
126    /// Check if this is node data
127    pub fn is_node(&self) -> bool {
128        matches!(self, Self::Node(_))
129    }
130
131    /// Check if this is edge data
132    pub fn is_edge(&self) -> bool {
133        matches!(self, Self::Edge(_))
134    }
135
136    /// Check if this is vector data
137    pub fn is_vector(&self) -> bool {
138        matches!(self, Self::Vector(_))
139    }
140
141    /// Get as row data
142    pub fn as_row(&self) -> Option<&RowData> {
143        match self {
144            Self::Row(r) => Some(r),
145            _ => None,
146        }
147    }
148
149    /// Get as node data
150    pub fn as_node(&self) -> Option<&NodeData> {
151        match self {
152            Self::Node(n) => Some(n),
153            _ => None,
154        }
155    }
156
157    /// Get as edge data
158    pub fn as_edge(&self) -> Option<&EdgeData> {
159        match self {
160            Self::Edge(e) => Some(e),
161            _ => None,
162        }
163    }
164
165    /// Get as vector data
166    pub fn as_vector(&self) -> Option<&VectorData> {
167        match self {
168            Self::Vector(v) => Some(v),
169            _ => None,
170        }
171    }
172}
173
174/// Data for a table row
175#[derive(Debug, Clone)]
176pub struct RowData {
177    /// Column values in schema order
178    pub columns: Vec<Value>,
179    /// Named column access (optional, for convenience)
180    pub named: Option<HashMap<String, Value>>,
181    /// Shared column schema: column names in order (maps index → name).
182    /// When set, `columns` holds the values and `named` is None.
183    /// This saves ~60% memory vs per-row HashMap.
184    pub schema: Option<std::sync::Arc<Vec<String>>>,
185}
186
187impl RowData {
188    /// Create new row data from column values
189    pub fn new(columns: Vec<Value>) -> Self {
190        Self {
191            columns,
192            named: None,
193            schema: None,
194        }
195    }
196
197    /// Create row data with named columns
198    pub fn with_names(columns: Vec<Value>, names: Vec<String>) -> Self {
199        let named: HashMap<String, Value> =
200            names.into_iter().zip(columns.iter().cloned()).collect();
201        Self {
202            columns,
203            named: Some(named),
204            schema: None,
205        }
206    }
207
208    /// Get a named field value — checks named HashMap first, then schema+columns.
209    pub fn get_field(&self, name: &str) -> Option<&Value> {
210        // Fast path: named HashMap
211        if let Some(ref named) = self.named {
212            return named.get(name);
213        }
214        // Columnar path: use schema to find index
215        if let Some(ref schema) = self.schema {
216            if let Some(idx) = schema.iter().position(|s| s == name) {
217                return self.columns.get(idx);
218            }
219        }
220        None
221    }
222
223    /// Iterate over all (name, value) pairs — works for both named and columnar.
224    pub fn iter_fields(&self) -> Box<dyn Iterator<Item = (&str, &Value)> + '_> {
225        if let Some(ref named) = self.named {
226            Box::new(named.iter().map(|(k, v)| (k.as_str(), v)))
227        } else if let Some(ref schema) = self.schema {
228            Box::new(
229                schema
230                    .iter()
231                    .zip(self.columns.iter())
232                    .map(|(k, v)| (k.as_str(), v)),
233            )
234        } else {
235            Box::new(std::iter::empty())
236        }
237    }
238
239    /// Get column by index
240    pub fn get(&self, index: usize) -> Option<&Value> {
241        self.columns.get(index)
242    }
243
244    /// Get column by name
245    pub fn get_by_name(&self, name: &str) -> Option<&Value> {
246        self.named.as_ref()?.get(name)
247    }
248
249    /// Number of columns
250    pub fn len(&self) -> usize {
251        self.columns.len()
252    }
253
254    /// Check if empty
255    pub fn is_empty(&self) -> bool {
256        self.columns.is_empty()
257    }
258}
259
260/// Data for a graph node
261#[derive(Debug, Clone)]
262pub struct NodeData {
263    /// Node properties
264    pub properties: HashMap<String, Value>,
265}
266
267impl NodeData {
268    /// Create new node data
269    pub fn new() -> Self {
270        Self {
271            properties: HashMap::new(),
272        }
273    }
274
275    /// Create with properties
276    pub fn with_properties(properties: HashMap<String, Value>) -> Self {
277        Self { properties }
278    }
279
280    /// Set a property
281    pub fn set(&mut self, key: impl Into<String>, value: Value) {
282        self.properties.insert(key.into(), value);
283    }
284
285    /// Get a property
286    pub fn get(&self, key: &str) -> Option<&Value> {
287        self.properties.get(key)
288    }
289
290    /// Check if property exists
291    pub fn has(&self, key: &str) -> bool {
292        self.properties.contains_key(key)
293    }
294}
295
296impl Default for NodeData {
297    fn default() -> Self {
298        Self::new()
299    }
300}
301
302/// Data for a graph edge
303#[derive(Debug, Clone)]
304pub struct EdgeData {
305    /// Edge properties
306    pub properties: HashMap<String, Value>,
307    /// Edge weight (for weighted graphs)
308    pub weight: f32,
309}
310
311impl EdgeData {
312    /// Create new edge data
313    pub fn new(weight: f32) -> Self {
314        Self {
315            properties: HashMap::new(),
316            weight,
317        }
318    }
319
320    /// Create with properties
321    pub fn with_properties(weight: f32, properties: HashMap<String, Value>) -> Self {
322        Self { properties, weight }
323    }
324
325    /// Set a property
326    pub fn set(&mut self, key: impl Into<String>, value: Value) {
327        self.properties.insert(key.into(), value);
328    }
329
330    /// Get a property
331    pub fn get(&self, key: &str) -> Option<&Value> {
332        self.properties.get(key)
333    }
334}
335
336impl Default for EdgeData {
337    fn default() -> Self {
338        Self::new(1.0)
339    }
340}
341
342/// Data for a vector
343#[derive(Debug, Clone)]
344pub struct VectorData {
345    /// Dense vector (primary embedding)
346    pub dense: Vec<f32>,
347    /// Optional sparse vector
348    pub sparse: Option<SparseVector>,
349    /// Original content (if applicable)
350    pub content: Option<String>,
351}
352
353impl VectorData {
354    /// Create new vector data from dense vector
355    pub fn new(dense: Vec<f32>) -> Self {
356        Self {
357            dense,
358            sparse: None,
359            content: None,
360        }
361    }
362
363    /// Create with sparse vector
364    pub fn with_sparse(dense: Vec<f32>, sparse: SparseVector) -> Self {
365        Self {
366            dense,
367            sparse: Some(sparse),
368            content: None,
369        }
370    }
371
372    /// Set content
373    pub fn with_content(mut self, content: impl Into<String>) -> Self {
374        self.content = Some(content.into());
375        self
376    }
377
378    /// Get dimension
379    pub fn dimension(&self) -> usize {
380        self.dense.len()
381    }
382
383    /// Check if has sparse component
384    pub fn is_hybrid(&self) -> bool {
385        self.sparse.is_some()
386    }
387}
388
389/// Time-series data point
390#[derive(Debug, Clone)]
391pub struct TimeSeriesData {
392    /// Metric name (e.g., "cpu.idle")
393    pub metric: String,
394    /// Timestamp in nanoseconds since epoch
395    pub timestamp_ns: u64,
396    /// Metric value
397    pub value: f64,
398    /// Dimensional tags (e.g., {"host": "srv1"})
399    pub tags: std::collections::HashMap<String, String>,
400}
401
402/// Queue message data
403#[derive(Debug, Clone)]
404pub struct QueueMessageData {
405    /// Message payload
406    pub payload: Value,
407    /// Optional priority (higher = more urgent)
408    pub priority: Option<i32>,
409    /// Enqueue timestamp (nanoseconds)
410    pub enqueued_at_ns: u64,
411    /// Number of delivery attempts
412    pub attempts: u32,
413    /// Maximum delivery attempts before DLQ
414    pub max_attempts: u32,
415    /// Whether the message has been acknowledged
416    pub acked: bool,
417}
418
419/// Sparse vector representation
420#[derive(Debug, Clone)]
421pub struct SparseVector {
422    /// Indices of non-zero elements
423    pub indices: Vec<u32>,
424    /// Values at those indices
425    pub values: Vec<f32>,
426    /// Total dimension (may be larger than indices.len())
427    pub dimension: usize,
428}
429
430impl SparseVector {
431    /// Create new sparse vector
432    pub fn new(indices: Vec<u32>, values: Vec<f32>, dimension: usize) -> Self {
433        debug_assert_eq!(indices.len(), values.len());
434        Self {
435            indices,
436            values,
437            dimension,
438        }
439    }
440
441    /// Number of non-zero elements
442    pub fn nnz(&self) -> usize {
443        self.indices.len()
444    }
445
446    /// Sparsity ratio
447    pub fn sparsity(&self) -> f32 {
448        if self.dimension == 0 {
449            1.0
450        } else {
451            1.0 - (self.nnz() as f32 / self.dimension as f32)
452        }
453    }
454
455    /// Get value at index (0 if not present)
456    pub fn get(&self, index: u32) -> f32 {
457        self.indices
458            .iter()
459            .position(|&i| i == index)
460            .map(|pos| self.values[pos])
461            .unwrap_or(0.0)
462    }
463}
464
465/// A slot for embedding a specific aspect of an entity
466#[derive(Debug, Clone)]
467pub struct EmbeddingSlot {
468    /// Slot name (e.g., "content", "summary", "title", "code")
469    pub name: String,
470    /// The embedding vector
471    pub vector: Vec<f32>,
472    /// Model used to generate embedding
473    pub model: String,
474    /// Vector dimension
475    pub dimension: usize,
476    /// Generation timestamp
477    pub generated_at: u64,
478}
479
480fn current_unix_secs() -> u64 {
481    std::time::SystemTime::now()
482        .duration_since(std::time::UNIX_EPOCH)
483        .unwrap_or_default()
484        .as_secs()
485}
486
487impl EmbeddingSlot {
488    /// Create a new embedding slot
489    pub fn new(name: impl Into<String>, vector: Vec<f32>, model: impl Into<String>) -> Self {
490        let dimension = vector.len();
491        Self {
492            name: name.into(),
493            vector,
494            model: model.into(),
495            dimension,
496            generated_at: current_unix_secs(),
497        }
498    }
499}
500
501/// A unified entity that can represent any storage type
502#[derive(Debug, Clone)]
503pub struct UnifiedEntity {
504    /// Unique entity identifier
505    pub id: EntityId,
506    /// Stable user-visible identity shared by all physical versions.
507    ///
508    /// `None` is the legacy encoding and resolves to `id`.
509    logical_id: Option<EntityId>,
510    /// What kind of entity this is
511    pub kind: EntityKind,
512    /// Creation timestamp
513    pub created_at: u64,
514    /// Last update timestamp
515    pub updated_at: u64,
516    /// The actual data content
517    pub data: EntityData,
518    /// Sequence ID for ordering/versioning
519    pub sequence_id: u64,
520    /// Field-name bloom filter (u64, zero-allocation).
521    ///
522    /// Each bit encodes one possible mid-character value: for field name `n`
523    /// the bit position is `n.as_bytes()[n.len()/2] & 63`. OR of all user
524    /// field names present in this entity. Cleared for schema-based bulk rows
525    /// (all rows share the same schema so bloom is segment-level).
526    ///
527    /// The compiled filter computes `required_bloom` from predicate field names
528    /// at compile time. If `entity.field_bloom & required_bloom != required_bloom`,
529    /// the entity cannot match and is skipped before any HashMap probe.
530    pub field_bloom: u64,
531    /// MVCC creation transaction ID (Phase 2.3 PG parity).
532    ///
533    /// `0` means "pre-MVCC" / auto-commit — visible to every snapshot. When
534    /// a BEGIN-wrapped INSERT runs, it stamps `xmin` with the transaction's
535    /// snapshot id so other concurrent transactions only see the row after
536    /// the writer commits (snapshot isolation semantics).
537    ///
538    /// Visibility rule: `xmin <= snapshot.xid && (xmax == 0 || xmax > snapshot.xid)`.
539    pub xmin: u64,
540    /// MVCC deletion transaction ID (Phase 2.3 PG parity).
541    ///
542    /// `0` means "live". Set to the deleting transaction's snapshot id on
543    /// DELETE/UPDATE (row is kept until VACUUM reclaims it). Snapshots with
544    /// `xid < xmax` still see the row; newer snapshots skip it.
545    pub xmax: u64,
546    /// Optional auxiliary data (embeddings, cross-refs).
547    /// None for most table rows — saves 40 bytes/entity.
548    aux: Option<Box<EntityAux>>,
549}
550
551/// Auxiliary entity data — only allocated when needed.
552#[derive(Debug, Clone, Default)]
553pub struct EntityAux {
554    /// Embedding slots (for multi-vector support)
555    pub embeddings: Vec<EmbeddingSlot>,
556    /// Cross-references to other entities
557    pub cross_refs: Vec<CrossRef>,
558}
559
560impl UnifiedEntity {
561    /// Access embeddings (returns empty slice if no aux data).
562    pub fn embeddings(&self) -> &[EmbeddingSlot] {
563        self.aux
564            .as_ref()
565            .map(|a| a.embeddings.as_slice())
566            .unwrap_or(&[])
567    }
568
569    /// Access cross-references (returns empty slice if no aux data).
570    pub fn cross_refs(&self) -> &[CrossRef] {
571        self.aux
572            .as_ref()
573            .map(|a| a.cross_refs.as_slice())
574            .unwrap_or(&[])
575    }
576
577    /// Get mutable embeddings (allocates aux if needed).
578    pub fn embeddings_mut(&mut self) -> &mut Vec<EmbeddingSlot> {
579        &mut self.aux.get_or_insert_with(Default::default).embeddings
580    }
581
582    /// Get mutable cross-refs (allocates aux if needed).
583    pub fn cross_refs_mut(&mut self) -> &mut Vec<CrossRef> {
584        &mut self.aux.get_or_insert_with(Default::default).cross_refs
585    }
586
587    /// Check if entity has any auxiliary data.
588    pub fn has_aux(&self) -> bool {
589        self.aux.is_some()
590    }
591}
592
593/// Compute one bit of a field-name bloom filter.
594///
595/// Uses the mid-character trick from MongoDB's `FieldNameBloomFilter.h`:
596/// the bit position is the mid-byte value clamped to 0..63. Zero-allocation,
597/// ~1.5% false-positive rate for ≤5 distinct field names.
598#[inline]
599pub fn field_name_bloom(name: &str) -> u64 {
600    let b = name.as_bytes();
601    if b.is_empty() {
602        return 0;
603    }
604    1u64 << (b[b.len() / 2] & 63)
605}
606
607/// Compute the combined field-name bloom for all user-level fields in `data`.
608/// Returns 0 for schema-based rows (all rows share the same schema, so the
609/// per-entity bloom would be identical — caller uses a segment-level bloom).
610pub fn compute_entity_field_bloom(data: &EntityData) -> u64 {
611    match data {
612        EntityData::Row(row) => {
613            if row.schema.is_some() {
614                // Schema path: bloom is identical for every row in this table.
615                // Don't store per-entity — use segment-level bloom instead.
616                return 0;
617            }
618            if let Some(named) = &row.named {
619                named.keys().fold(0u64, |acc, k| acc | field_name_bloom(k))
620            } else {
621                0
622            }
623        }
624        EntityData::Node(node) => node
625            .properties
626            .keys()
627            .fold(0u64, |acc, k| acc | field_name_bloom(k)),
628        EntityData::Edge(edge) => edge
629            .properties
630            .keys()
631            .fold(0u64, |acc, k| acc | field_name_bloom(k)),
632        // Vectors, time-series, queue: no user-named fields worth blooming.
633        _ => 0,
634    }
635}
636
637impl UnifiedEntity {
638    /// Create a new unified entity
639    pub fn new(id: EntityId, kind: EntityKind, data: EntityData) -> Self {
640        let now = current_unix_secs();
641        let field_bloom = compute_entity_field_bloom(&data);
642
643        Self {
644            id,
645            logical_id: None,
646            kind,
647            created_at: now,
648            updated_at: now,
649            data,
650            sequence_id: 0,
651            field_bloom,
652            // Pre-MVCC default: xmin/xmax = 0 means visible to every snapshot.
653            // Transactional writers stamp real snapshot IDs after allocation.
654            xmin: 0,
655            xmax: 0,
656            aux: None,
657        }
658    }
659
660    /// MVCC visibility check (Phase 2.3 PG parity).
661    ///
662    /// Returns `true` when this tuple is visible under the provided
663    /// snapshot xid. Pre-MVCC rows (`xmin == 0`, `xmax == 0`) are visible
664    /// to every snapshot — preserves full compatibility with existing
665    /// data inserted before the MVCC headers existed.
666    ///
667    /// Snapshot isolation rule:
668    ///   - `xmin == 0 || xmin <= snapshot_xid`  (creator committed before snapshot)
669    ///   - `xmax == 0 || xmax > snapshot_xid`   (deleter committed after snapshot)
670    #[inline]
671    pub fn is_visible(&self, snapshot_xid: u64) -> bool {
672        if self.xmin != 0 && self.xmin > snapshot_xid {
673            return false;
674        }
675        if self.xmax != 0 && self.xmax <= snapshot_xid {
676            return false;
677        }
678        true
679    }
680
681    /// Stamp `xmin` (creation transaction ID). Called by the runtime on
682    /// INSERT inside an active transaction.
683    #[inline]
684    pub fn set_xmin(&mut self, xid: u64) {
685        self.xmin = xid;
686    }
687
688    /// Stamp `xmax` (deletion transaction ID). Called by the runtime on
689    /// DELETE/UPDATE inside an active transaction — the tuple survives
690    /// until VACUUM reclaims it.
691    #[inline]
692    pub fn set_xmax(&mut self, xid: u64) {
693        self.xmax = xid;
694    }
695
696    /// Stable user-visible identity. Legacy rows without an explicit
697    /// logical id map to their physical entity id.
698    #[inline]
699    pub fn logical_id(&self) -> EntityId {
700        self.logical_id.unwrap_or(self.id)
701    }
702
703    /// Returns true when the entity carries an explicit logical id on disk.
704    #[inline]
705    pub fn has_explicit_logical_id(&self) -> bool {
706        self.logical_id.is_some()
707    }
708
709    /// Set the stable user-visible identity for this physical version.
710    #[inline]
711    pub fn set_logical_id(&mut self, logical_id: EntityId) {
712        self.logical_id = Some(logical_id);
713    }
714
715    /// Ensure table rows written by the current engine carry explicit
716    /// logical identity. Other models retain the legacy implicit mapping
717    /// until their MVCC rollout adopts the resolver.
718    #[inline]
719    pub(crate) fn ensure_table_logical_id(&mut self) {
720        if matches!(self.kind, EntityKind::TableRow { .. }) && self.logical_id.is_none() {
721            self.logical_id = Some(self.id);
722        }
723    }
724
725    /// Create a table row entity
726    pub fn table_row(
727        id: EntityId,
728        table: impl Into<Arc<str>>,
729        row_id: u64,
730        columns: Vec<Value>,
731    ) -> Self {
732        Self::new(
733            id,
734            EntityKind::TableRow {
735                table: table.into(),
736                row_id,
737            },
738            EntityData::Row(RowData::new(columns)),
739        )
740    }
741
742    /// Create a graph node entity
743    pub fn graph_node(
744        id: EntityId,
745        label: impl Into<String>,
746        node_type: impl Into<String>,
747        properties: HashMap<String, Value>,
748    ) -> Self {
749        Self::new(
750            id,
751            EntityKind::GraphNode(Box::new(GraphNodeKind {
752                label: label.into(),
753                node_type: node_type.into(),
754            })),
755            EntityData::Node(NodeData::with_properties(properties)),
756        )
757    }
758
759    /// Create a graph edge entity
760    pub fn graph_edge(
761        id: EntityId,
762        label: impl Into<String>,
763        from: impl Into<String>,
764        to: impl Into<String>,
765        weight: f32,
766        properties: HashMap<String, Value>,
767    ) -> Self {
768        Self::new(
769            id,
770            EntityKind::GraphEdge(Box::new(GraphEdgeKind {
771                label: label.into(),
772                from_node: from.into(),
773                to_node: to.into(),
774                weight: (weight * 1000.0) as u32,
775            })),
776            EntityData::Edge(EdgeData::with_properties(weight, properties)),
777        )
778    }
779
780    /// Create a vector entity
781    pub fn vector(id: EntityId, collection: impl Into<String>, vector: Vec<f32>) -> Self {
782        Self::new(
783            id,
784            EntityKind::Vector {
785                collection: collection.into(),
786            },
787            EntityData::Vector(VectorData::new(vector)),
788        )
789    }
790
791    /// Add an embedding to this entity
792    pub fn add_embedding(&mut self, slot: EmbeddingSlot) {
793        self.embeddings_mut().push(slot);
794        self.touch();
795    }
796
797    /// Add a cross-reference
798    pub fn add_cross_ref(&mut self, cross_ref: CrossRef) {
799        self.cross_refs_mut().push(cross_ref);
800        self.touch();
801    }
802
803    /// Get embedding by slot name
804    pub fn get_embedding(&self, name: &str) -> Option<&EmbeddingSlot> {
805        self.embeddings().iter().find(|e| e.name == name)
806    }
807
808    /// Update timestamp
809    fn touch(&mut self) {
810        self.updated_at = current_unix_secs();
811    }
812
813    /// Check if entity is stale (not updated in given seconds)
814    pub fn is_stale(&self, max_age_secs: u64) -> bool {
815        let now = current_unix_secs();
816        now.saturating_sub(self.updated_at) > max_age_secs
817    }
818}
819
820/// A cross-reference between entities
821#[derive(Debug, Clone, PartialEq)]
822pub struct CrossRef {
823    /// Source entity ID (the entity that holds this reference)
824    pub source: EntityId,
825    /// Target entity ID
826    pub target: EntityId,
827    /// Target collection name
828    pub target_collection: String,
829    /// Type of reference
830    pub ref_type: RefType,
831    /// Reference weight/strength (0.0-1.0)
832    pub weight: f32,
833    /// When this reference was created
834    pub created_at: u64,
835}
836
837impl CrossRef {
838    /// Create a new cross-reference
839    pub fn new(
840        source: EntityId,
841        target: EntityId,
842        target_collection: impl Into<String>,
843        ref_type: RefType,
844    ) -> Self {
845        Self {
846            source,
847            target,
848            target_collection: target_collection.into(),
849            ref_type,
850            weight: 1.0,
851            created_at: current_unix_secs(),
852        }
853    }
854
855    /// Create with weight
856    pub fn with_weight(
857        source: EntityId,
858        target: EntityId,
859        target_collection: impl Into<String>,
860        ref_type: RefType,
861        weight: f32,
862    ) -> Self {
863        let mut cr = Self::new(source, target, target_collection, ref_type);
864        cr.weight = weight;
865        cr
866    }
867}
868
869/// Types of cross-references between entities
870#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
871pub enum RefType {
872    // Table ↔ Graph
873    RowToNode, // Table row represents a graph node
874    RowToEdge, // Table row represents a graph edge
875    NodeToRow, // Node links back to source row
876
877    // Table ↔ Vector
878    RowToVector, // Table row has embeddings
879    VectorToRow, // Vector search → source row
880
881    // Graph ↔ Vector
882    NodeToVector, // Node has embeddings
883    EdgeToVector, // Edge has embeddings
884    VectorToNode, // Vector search → source node
885
886    // Semantic links (discovered)
887    SimilarTo,   // Discovered by vector similarity
888    RelatedTo,   // Domain-specific relationship
889    DerivesFrom, // Data lineage
890    Mentions,    // Text mentions another entity
891    Contains,    // Structural containment
892    DependsOn,   // Dependency relationship
893}
894
895impl RefType {
896    /// Get the inverse reference type (for bidirectional tracking)
897    pub fn inverse(&self) -> Option<Self> {
898        match self {
899            Self::RowToNode => Some(Self::NodeToRow),
900            Self::NodeToRow => Some(Self::RowToNode),
901            Self::RowToVector => Some(Self::VectorToRow),
902            Self::VectorToRow => Some(Self::RowToVector),
903            Self::NodeToVector => Some(Self::VectorToNode),
904            Self::VectorToNode => Some(Self::NodeToVector),
905            Self::SimilarTo => Some(Self::SimilarTo), // Symmetric
906            Self::RelatedTo => Some(Self::RelatedTo), // Symmetric
907            _ => None,                                // One-directional references
908        }
909    }
910
911    /// Check if this is a symmetric reference type
912    pub fn is_symmetric(&self) -> bool {
913        matches!(self, Self::SimilarTo | Self::RelatedTo)
914    }
915
916    /// Convert RefType to byte for binary serialization
917    pub fn to_byte(&self) -> u8 {
918        match self {
919            Self::RowToNode => 0,
920            Self::RowToEdge => 1,
921            Self::NodeToRow => 2,
922            Self::RowToVector => 3,
923            Self::VectorToRow => 4,
924            Self::NodeToVector => 5,
925            Self::EdgeToVector => 6,
926            Self::VectorToNode => 7,
927            Self::SimilarTo => 8,
928            Self::RelatedTo => 9,
929            Self::DerivesFrom => 10,
930            Self::Mentions => 11,
931            Self::Contains => 12,
932            Self::DependsOn => 13,
933        }
934    }
935
936    /// Create RefType from byte (binary deserialization)
937    pub fn from_byte(byte: u8) -> Self {
938        match byte {
939            0 => Self::RowToNode,
940            1 => Self::RowToEdge,
941            2 => Self::NodeToRow,
942            3 => Self::RowToVector,
943            4 => Self::VectorToRow,
944            5 => Self::NodeToVector,
945            6 => Self::EdgeToVector,
946            7 => Self::VectorToNode,
947            8 => Self::SimilarTo,
948            9 => Self::RelatedTo,
949            10 => Self::DerivesFrom,
950            11 => Self::Mentions,
951            12 => Self::Contains,
952            13 => Self::DependsOn,
953            _ => Self::RelatedTo, // Default fallback
954        }
955    }
956}
957
958/// Convert Vec<Value> to RowData
959impl From<Vec<Value>> for RowData {
960    fn from(columns: Vec<Value>) -> Self {
961        RowData::new(columns)
962    }
963}
964
965/// Convert HashMap to NodeData
966impl From<HashMap<String, Value>> for NodeData {
967    fn from(properties: HashMap<String, Value>) -> Self {
968        NodeData::with_properties(properties)
969    }
970}
971
972/// Convert dense vector to VectorData
973impl From<Vec<f32>> for VectorData {
974    fn from(dense: Vec<f32>) -> Self {
975        VectorData::new(dense)
976    }
977}
978
979/// Convert tuple (dense, sparse) to VectorData
980impl From<(Vec<f32>, SparseVector)> for VectorData {
981    fn from((dense, sparse): (Vec<f32>, SparseVector)) -> Self {
982        VectorData::with_sparse(dense, sparse)
983    }
984}
985
986// Helper trait for uniform entity creation
987impl UnifiedEntity {
988    /// Create a graph node entity from properties map
989    pub fn from_properties(
990        id: EntityId,
991        label: impl Into<String>,
992        node_type: impl Into<String>,
993        properties: impl IntoIterator<Item = (impl Into<String>, Value)>,
994    ) -> Self {
995        let props: HashMap<String, Value> =
996            properties.into_iter().map(|(k, v)| (k.into(), v)).collect();
997        Self::graph_node(id, label, node_type, props)
998    }
999
1000    /// Convert entity to row data if applicable
1001    pub fn into_row(self) -> Option<RowData> {
1002        match self.data {
1003            EntityData::Row(r) => Some(r),
1004            _ => None,
1005        }
1006    }
1007
1008    /// Convert entity to node data if applicable
1009    pub fn into_node(self) -> Option<NodeData> {
1010        match self.data {
1011            EntityData::Node(n) => Some(n),
1012            _ => None,
1013        }
1014    }
1015
1016    /// Convert entity to edge data if applicable
1017    pub fn into_edge(self) -> Option<EdgeData> {
1018        match self.data {
1019            EntityData::Edge(e) => Some(e),
1020            _ => None,
1021        }
1022    }
1023
1024    /// Convert entity to vector data if applicable
1025    pub fn into_vector(self) -> Option<VectorData> {
1026        match self.data {
1027            EntityData::Vector(v) => Some(v),
1028            _ => None,
1029        }
1030    }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035    use super::*;
1036
1037    #[test]
1038    fn test_entity_creation() {
1039        let id = EntityId::new(1);
1040        let entity = UnifiedEntity::table_row(
1041            id,
1042            "users",
1043            100,
1044            vec![Value::text("alice".to_string()), Value::Integer(25)],
1045        );
1046
1047        assert!(entity.data.is_row());
1048        assert_eq!(entity.kind.storage_type(), "table");
1049        assert_eq!(entity.kind.collection(), "users");
1050    }
1051
1052    #[test]
1053    fn test_cross_refs() {
1054        let id1 = EntityId::new(1);
1055        let id2 = EntityId::new(2);
1056
1057        let cross_ref = CrossRef::new(id1, id2, "nodes", RefType::RowToNode);
1058        assert_eq!(cross_ref.source, id1);
1059        assert_eq!(cross_ref.target, id2);
1060        assert_eq!(cross_ref.ref_type.inverse(), Some(RefType::NodeToRow));
1061    }
1062
1063    #[test]
1064    fn test_sparse_vector() {
1065        let sparse = SparseVector::new(vec![0, 5, 10], vec![1.0, 2.0, 3.0], 100);
1066
1067        assert_eq!(sparse.nnz(), 3);
1068        assert_eq!(sparse.get(5), 2.0);
1069        assert_eq!(sparse.get(3), 0.0);
1070        assert!(sparse.sparsity() > 0.9);
1071    }
1072
1073    #[test]
1074    fn test_embedding_slots() {
1075        let mut entity = UnifiedEntity::table_row(
1076            EntityId::new(1),
1077            "documents",
1078            1,
1079            vec![Value::text("Hello world".to_string())],
1080        );
1081
1082        entity.add_embedding(EmbeddingSlot::new(
1083            "content",
1084            vec![0.1, 0.2, 0.3],
1085            "text-embedding-3-small",
1086        ));
1087
1088        assert_eq!(entity.embeddings().len(), 1);
1089        assert!(entity.get_embedding("content").is_some());
1090        assert!(entity.get_embedding("summary").is_none());
1091    }
1092}