fcdb_core/
lib.rs

1//! # Enishi Core
2//!
3//! Core data structures and algorithms for the Enishi graph database.
4//!
5//! Merkle DAG: enishi_core -> cid, cap, monoid, path_sig, class_sig, trace_normal_form
6
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use std::collections::HashMap;
10
11/// Content Identifier (CID) - BLAKE3/256 hash
12#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13pub struct Cid([u8; 32]);
14
15impl Cid {
16    pub fn from_bytes(bytes: [u8; 32]) -> Self {
17        Self(bytes)
18    }
19
20    pub fn as_bytes(&self) -> &[u8; 32] {
21        &self.0
22    }
23
24    pub fn hash(data: &[u8]) -> Self {
25        let mut hasher = blake3::Hasher::new();
26        hasher.update(data);
27        let hash = hasher.finalize();
28        Self(hash.into())
29    }
30
31    pub fn from_json<T: serde::Serialize>(value: &T) -> Result<Self, serde_json::Error> {
32        let canonical_json = serde_json::to_string(value)?;
33        Ok(Self::hash(canonical_json.as_bytes()))
34    }
35}
36
37impl fmt::Debug for Cid {
38    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39        write!(f, "Cid({})", hex::encode(&self.0[..8]))
40    }
41}
42
43impl fmt::Display for Cid {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        write!(f, "{}", hex::encode(self.0))
46    }
47}
48
49/// Capability (Cap) - Cheri-style capability
50#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
51pub struct Cap {
52    pub base: u64,
53    pub len: u64,
54    pub perms: u32,
55    pub proof: [u8; 16],
56}
57
58impl Cap {
59    pub fn new(base: u64, len: u64, perms: u32) -> Self {
60        let proof = rand::random::<[u8; 16]>();
61        Self { base, len, perms, proof }
62    }
63
64    pub fn contains(&self, addr: u64) -> bool {
65        addr >= self.base && addr < self.base + self.len
66    }
67
68    pub fn has_perm(&self, perm: u32) -> bool {
69        (self.perms & perm) != 0
70    }
71}
72
73/// Query Key for caching and indexing
74#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
75pub struct QKey {
76    pub path_sig: Cid,
77    pub class_sig: Cid,
78    pub as_of: u64,
79    pub cap_region: (u64, u64),
80    pub type_part: u16,
81}
82
83/// Monoid for deterministic composition
84pub trait Monoid {
85    fn empty() -> Self;
86    fn combine(self, other: Self) -> Self;
87}
88
89/// Varint encoding utilities
90pub mod varint {
91    use integer_encoding::{VarIntReader, VarIntWriter};
92    use std::io::Read;
93
94    pub fn encode_u64(value: u64, buf: &mut Vec<u8>) {
95        buf.write_varint(value).unwrap();
96    }
97
98    pub fn decode_u64<R: Read>(reader: &mut R) -> Result<u64, std::io::Error> {
99        reader.read_varint()
100    }
101}
102
103// ===== PHASE B: Path/Class Signatures =====
104
105/// Path signature for query optimization
106/// Computes a compact representation of query paths for caching
107pub fn compute_path_sig(path: &[&str]) -> Cid {
108    let mut data = Vec::new();
109    for segment in path {
110        data.extend_from_slice(segment.as_bytes());
111        data.push(0); // null terminator
112    }
113    Cid::hash(&data)
114}
115
116/// Class signature for type-based optimization
117/// Sorts classes to ensure deterministic signatures
118pub fn compute_class_sig(classes: &[&str]) -> Cid {
119    let mut sorted_classes = classes.to_vec();
120    sorted_classes.sort();
121    let mut data = Vec::new();
122    for class in sorted_classes {
123        data.extend_from_slice(class.as_bytes());
124        data.push(0);
125    }
126    Cid::hash(&data)
127}
128
129// ===== PHASE B: Trace Normal Form =====
130
131/// Trace element representing a single operation
132#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
133pub enum TraceOp {
134    NodeCreate { id: u64, data: Cid },
135    EdgeCreate { from: u64, to: u64, label: u32, props: Cid },
136    PropertyUpdate { node: u64, key: String, value: Cid },
137}
138
139/// Trace sequence for commutative operations
140#[derive(Clone, Debug, Serialize, Deserialize)]
141pub struct Trace {
142    pub ops: Vec<TraceOp>,
143    pub timestamp: u64,
144}
145
146impl Trace {
147    pub fn new(timestamp: u64) -> Self {
148        Self {
149            ops: Vec::new(),
150            timestamp,
151        }
152    }
153
154    pub fn add_op(&mut self, op: TraceOp) {
155        self.ops.push(op);
156    }
157}
158
159impl Monoid for Trace {
160    fn empty() -> Self {
161        Self::new(0)
162    }
163
164    fn combine(mut self, other: Self) -> Self {
165        // Simple concatenation - in full implementation would handle commutativity
166        self.ops.extend(other.ops);
167        self.timestamp = self.timestamp.max(other.timestamp);
168        self
169    }
170}
171
172/// Trace normal form - canonical representation for key reduction
173pub struct TraceNF {
174    pub canonical_form: Cid,
175    pub commutative_groups: Vec<Vec<TraceOp>>,
176}
177
178impl TraceNF {
179    /// Convert trace to normal form for key explosion reduction
180    pub fn from_trace(trace: &Trace) -> Self {
181        // Group commutative operations
182        let mut node_ops = Vec::new();
183        let mut edge_ops = Vec::new();
184        let mut prop_ops = Vec::new();
185
186        for op in &trace.ops {
187            match op {
188                TraceOp::NodeCreate { .. } => node_ops.push(op.clone()),
189                TraceOp::EdgeCreate { .. } => edge_ops.push(op.clone()),
190                TraceOp::PropertyUpdate { .. } => prop_ops.push(op.clone()),
191            }
192        }
193
194        // Sort within each commutative group
195        node_ops.sort_by_key(|op| match op {
196            TraceOp::NodeCreate { id, .. } => *id,
197            _ => 0,
198        });
199
200        edge_ops.sort_by_key(|op| match op {
201            TraceOp::EdgeCreate { from, to, .. } => (*from, *to),
202            _ => (0, 0),
203        });
204
205        prop_ops.sort_by_key(|op| match op {
206            TraceOp::PropertyUpdate { node, key, .. } => (*node, key.clone()),
207            _ => (0, String::new()),
208        });
209
210        let commutative_groups = vec![node_ops, edge_ops, prop_ops];
211
212        // Compute canonical form
213        let mut canonical_data = Vec::new();
214        for group in &commutative_groups {
215            for op in group {
216                let op_json = serde_json::to_string(op).unwrap();
217                canonical_data.extend_from_slice(op_json.as_bytes());
218            }
219        }
220
221        Self {
222            canonical_form: Cid::hash(&canonical_data),
223            commutative_groups,
224        }
225    }
226}
227
228// ===== PHASE B: Manifest Diffing =====
229
230/// Manifest entry for query result caching
231#[derive(Clone, Debug, Serialize, Deserialize)]
232pub struct ManifestEntry {
233    pub qkey: QKey,
234    pub result_cid: Cid,
235    pub last_accessed: u64,
236    pub access_count: u64,
237}
238
239/// Manifest with diff support for efficient updates
240#[derive(Clone, Debug)]
241pub struct Manifest {
242    pub base_version: u64,
243    pub entries: HashMap<QKey, ManifestEntry>,
244    pub diffs: Vec<ManifestDiff>,
245}
246
247#[derive(Clone, Debug, Serialize, Deserialize)]
248pub struct ManifestDiff {
249    pub version: u64,
250    pub timestamp: u64,
251    pub added: Vec<ManifestEntry>,
252    pub removed: Vec<QKey>,
253    pub updated: Vec<(QKey, Cid)>, // qkey -> new_result_cid
254}
255
256impl Manifest {
257    pub fn new() -> Self {
258        Self {
259            base_version: 0,
260            entries: HashMap::new(),
261            diffs: Vec::new(),
262        }
263    }
264
265    /// Apply diff to manifest
266    pub fn apply_diff(&mut self, diff: ManifestDiff) {
267        // Remove entries
268        for qkey in &diff.removed {
269            self.entries.remove(qkey);
270        }
271
272        // Update entries
273        for (qkey, new_cid) in &diff.updated {
274            if let Some(entry) = self.entries.get_mut(qkey) {
275                entry.result_cid = *new_cid;
276            }
277        }
278
279        // Add new entries
280        for entry in &diff.added {
281            self.entries.insert(entry.qkey.clone(), entry.clone());
282        }
283
284        self.diffs.push(diff);
285    }
286
287    /// Get result for query key, checking diffs
288    pub fn get_result(&self, qkey: &QKey) -> Option<Cid> {
289        self.entries.get(qkey).map(|entry| entry.result_cid)
290    }
291
292    /// Create diff from current state to new state
293    pub fn create_diff(&self, new_entries: HashMap<QKey, ManifestEntry>) -> ManifestDiff {
294        let mut added = Vec::new();
295        let mut removed = Vec::new();
296        let mut updated = Vec::new();
297
298        // Find added entries
299        for (qkey, entry) in &new_entries {
300            if !self.entries.contains_key(qkey) {
301                added.push(entry.clone());
302            }
303        }
304
305        // Find removed and updated entries
306        for (qkey, old_entry) in &self.entries {
307            if let Some(new_entry) = new_entries.get(qkey) {
308                if old_entry.result_cid != new_entry.result_cid {
309                    updated.push((qkey.clone(), new_entry.result_cid));
310                }
311            } else {
312                removed.push(qkey.clone());
313            }
314        }
315
316        ManifestDiff {
317            version: self.base_version + self.diffs.len() as u64 + 1,
318            timestamp: std::time::SystemTime::now()
319                .duration_since(std::time::UNIX_EPOCH)
320                .unwrap()
321                .as_secs(),
322            added,
323            removed,
324            updated,
325        }
326    }
327}
328
329// ===== PHASE B: Query Optimization =====
330
331/// Query plan with optimization hints
332#[derive(Clone, Debug)]
333pub struct QueryPlan {
334    pub qkey: QKey,
335    pub estimated_cost: f64,
336    pub use_path_sig: bool,
337    pub use_class_sig: bool,
338    pub trace_optimized: bool,
339    pub manifest_cached: bool,
340}
341
342impl QueryPlan {
343    /// Create optimized plan for query
344    pub fn optimize(path: &[&str], classes: &[&str], as_of: u64) -> Self {
345        let path_sig = compute_path_sig(path);
346        let class_sig = compute_class_sig(classes);
347
348        let qkey = QKey {
349            path_sig,
350            class_sig,
351            as_of,
352            cap_region: (0, u64::MAX), // Full range
353            type_part: 0,
354        };
355
356        // Estimate cost based on path complexity
357        let path_complexity = path.len() as f64;
358        let class_selectivity = classes.len() as f64;
359
360        let estimated_cost = path_complexity * class_selectivity * 10.0; // Simplified
361
362        Self {
363            qkey,
364            estimated_cost,
365            use_path_sig: path.len() > 1,
366            use_class_sig: classes.len() > 1,
367            trace_optimized: true, // Phase B feature
368            manifest_cached: true, // Assume manifest caching
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn test_cid_creation() {
379        let data = b"hello world";
380        let cid = Cid::hash(data);
381        assert_eq!(cid.as_bytes().len(), 32);
382    }
383
384    #[test]
385    fn test_path_signature() {
386        let path1 = &["user", "posts"];
387        let path2 = &["user", "posts"];
388        let path3 = &["posts", "user"];
389
390        let sig1 = compute_path_sig(path1);
391        let sig2 = compute_path_sig(path2);
392        let sig3 = compute_path_sig(path3);
393
394        assert_eq!(sig1, sig2); // Same path
395        assert_ne!(sig1, sig3); // Different path
396    }
397
398    #[test]
399    fn test_class_signature() {
400        let classes1 = &["User", "Post"];
401        let classes2 = &["Post", "User"]; // Different order
402        let classes3 = &["User", "Comment"];
403
404        let sig1 = compute_class_sig(classes1);
405        let sig2 = compute_class_sig(classes2);
406        let sig3 = compute_class_sig(classes3);
407
408        assert_eq!(sig1, sig2); // Same classes, different order
409        assert_ne!(sig1, sig3); // Different classes
410    }
411
412    #[test]
413    fn test_trace_normal_form() {
414        let mut trace = Trace::new(1234567890);
415        trace.add_op(TraceOp::NodeCreate { id: 1, data: Cid::hash(b"node1") });
416        trace.add_op(TraceOp::NodeCreate { id: 2, data: Cid::hash(b"node2") });
417
418        let nf = TraceNF::from_trace(&trace);
419        assert!(!nf.commutative_groups.is_empty());
420    }
421
422    #[test]
423    fn test_manifest_diffing() {
424        let mut manifest = Manifest::new();
425
426        let qkey1 = QKey {
427            path_sig: compute_path_sig(&["test"]),
428            class_sig: compute_class_sig(&["Test"]),
429            as_of: 1000,
430            cap_region: (0, 100),
431            type_part: 1,
432        };
433
434        let entry1 = ManifestEntry {
435            qkey: qkey1.clone(),
436            result_cid: Cid::hash(b"result1"),
437            last_accessed: 1000,
438            access_count: 1,
439        };
440
441        let mut new_entries = HashMap::new();
442        new_entries.insert(qkey1.clone(), entry1);
443
444        let diff = manifest.create_diff(new_entries);
445        assert_eq!(diff.added.len(), 1);
446        assert_eq!(diff.removed.len(), 0);
447
448        manifest.apply_diff(diff);
449        assert!(manifest.get_result(&qkey1).is_some());
450    }
451
452    #[test]
453    fn test_query_plan_optimization() {
454        let path = &["user", "posts", "comments"];
455        let classes = &["User", "Post", "Comment"];
456
457        let plan = QueryPlan::optimize(path, classes, 1234567890);
458
459        assert!(plan.use_path_sig);
460        assert!(plan.use_class_sig);
461        assert!(plan.trace_optimized);
462        assert!(plan.manifest_cached);
463        assert!(plan.estimated_cost > 0.0);
464    }
465}