Skip to main content

aurora_db/
types.rs

1use chrono::{DateTime, Utc};
2use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
3use serde::{Deserialize, Serialize};
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::error::Error;
7use std::fmt;
8use std::hash::{Hash, Hasher};
9use std::path::{Path, PathBuf};
10use uuid::Uuid;
11
12#[derive(
13    Debug,
14    Clone,
15    Serialize,
16    Deserialize,
17    PartialEq,
18    Eq,
19    Hash,
20    Archive,
21    RkyvSerialize,
22    RkyvDeserialize,
23)]
24#[archive(check_bytes)]
25pub enum ScalarType {
26    String,
27    Int,
28    Uuid,
29    Bool,
30    Float,
31    Any,
32    Object,
33    Array,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
37pub enum FieldType {
38    Scalar(ScalarType),
39    Array(ScalarType),                             // Array of scalars
40    Object,                                        // Flat object (no schema validation)
41    Nested(Box<HashMap<String, FieldDefinition>>), // Deeply nested object with schema
42    Any,
43}
44
45impl Hash for FieldType {
46    fn hash<H: Hasher>(&self, state: &mut H) {
47        std::mem::discriminant(self).hash(state);
48        match self {
49            FieldType::Scalar(s) => s.hash(state),
50            FieldType::Array(s) => s.hash(state),
51            FieldType::Object => {}
52            FieldType::Nested(map) => {
53                let mut entries: Vec<_> = map.iter().collect();
54                entries.sort_by(|a, b| a.0.cmp(b.0));
55                for (k, v) in entries {
56                    k.hash(state);
57                    v.hash(state);
58                }
59            }
60            FieldType::Any => {}
61        }
62    }
63}
64
65// Helpers for backward compatibility/ease of use
66#[allow(non_upper_case_globals)]
67impl FieldType {
68    pub const String: FieldType = FieldType::Scalar(ScalarType::String);
69    pub const Int: FieldType = FieldType::Scalar(ScalarType::Int);
70    pub const Uuid: FieldType = FieldType::Scalar(ScalarType::Uuid);
71    pub const Bool: FieldType = FieldType::Scalar(ScalarType::Bool);
72    pub const Float: FieldType = FieldType::Scalar(ScalarType::Float);
73    pub const Any: FieldType = FieldType::Scalar(ScalarType::Any);
74
75    // Renamed to avoid shadowing the Object and Array enum variants
76    /// Scalar object type (use `FieldType::Object` variant for flat objects without schema)
77    pub const SCALAR_OBJECT: FieldType = FieldType::Scalar(ScalarType::Object);
78    /// Scalar array type (use `FieldType::Array(T)` variant for typed arrays)
79    pub const SCALAR_ARRAY: FieldType = FieldType::Scalar(ScalarType::Array);
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
83pub struct FieldDefinition {
84    pub field_type: FieldType,
85    pub unique: bool,
86    pub indexed: bool,
87    pub nullable: bool, // Added #1
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct Collection {
92    pub name: String,
93    pub fields: HashMap<String, FieldDefinition>,
94}
95
96#[derive(Clone, Serialize, Deserialize)]
97pub struct Document {
98    pub id: String,
99    pub data: HashMap<String, Value>,
100}
101
102impl Default for Document {
103    fn default() -> Self {
104        Self {
105            id: Uuid::new_v4().to_string(),
106            data: HashMap::new(),
107        }
108    }
109}
110
111impl Document {
112    pub fn new() -> Self {
113        Self::default()
114    }
115}
116
117impl fmt::Display for Document {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        write!(f, "{{ ")?;
120        let mut first = true;
121        for (key, value) in &self.data {
122            if !first {
123                write!(f, ", ")?;
124            }
125            write!(f, "\"{}\": {}", key, value)?;
126            first = false;
127        }
128        write!(f, " }}")
129    }
130}
131
132impl fmt::Debug for Document {
133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
134        fmt::Display::fmt(self, f)
135    }
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub enum Value {
140    Null,
141    String(String),
142    Int(i64),
143    Float(f64),
144    Bool(bool),
145    Array(Vec<Value>),
146    Object(HashMap<String, Value>),
147    Uuid(Uuid),
148}
149
150// Custom implementations for Hash, Eq, and PartialEq
151impl Hash for Value {
152    fn hash<H: Hasher>(&self, state: &mut H) {
153        match self {
154            Value::Null => 0.hash(state),
155            Value::String(s) => s.hash(state),
156            Value::Int(i) => i.hash(state),
157            Value::Float(f) => {
158                // Convert to bits to hash floating point numbers
159                f.to_bits().hash(state)
160            }
161            Value::Bool(b) => b.hash(state),
162            Value::Array(arr) => arr.hash(state),
163            Value::Object(map) => {
164                // Sort keys for consistent hashing
165                let mut keys: Vec<_> = map.keys().collect();
166                keys.sort();
167                for key in keys {
168                    key.hash(state);
169                    map.get(key).unwrap().hash(state);
170                }
171            }
172            Value::Uuid(u) => u.hash(state),
173        }
174    }
175}
176
177impl PartialEq for Value {
178    fn eq(&self, other: &Self) -> bool {
179        match (self, other) {
180            (Value::Null, Value::Null) => true,
181            (Value::String(a), Value::String(b)) => a == b,
182            (Value::Int(a), Value::Int(b)) => a == b,
183            (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
184            (Value::Bool(a), Value::Bool(b)) => a == b,
185            (Value::Array(a), Value::Array(b)) => a == b,
186            (Value::Object(a), Value::Object(b)) => a == b,
187            (Value::Uuid(a), Value::Uuid(b)) => a == b,
188            _ => false,
189        }
190    }
191}
192
193impl Eq for Value {}
194
195// Implement Display for Value
196impl fmt::Display for Value {
197    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198        match self {
199            Value::String(s) => write!(f, "\"{}\"", s),
200            Value::Int(i) => write!(f, "{}", i),
201            Value::Float(fl) => write!(f, "{}", fl),
202            Value::Bool(b) => write!(f, "{}", b),
203            Value::Array(arr) => {
204                let items: Vec<String> = arr.iter().map(|v| v.to_string()).collect();
205                write!(f, "[{}]", items.join(", "))
206            }
207            Value::Object(obj) => {
208                let items: Vec<String> = obj
209                    .iter()
210                    .map(|(k, v)| format!("\"{}\": {}", k, v))
211                    .collect();
212                write!(f, "{{{}}}", items.join(", "))
213            }
214            Value::Uuid(u) => write!(f, "\"{}\"", u),
215            Value::Null => write!(f, "null"),
216        }
217    }
218}
219
220// Helper for deterministic ordering of different types
221fn type_rank(v: &Value) -> u8 {
222    match v {
223        Value::Null => 0,
224        Value::Bool(_) => 1,
225        Value::Int(_) => 2,
226        Value::Float(_) => 3,
227        Value::String(_) => 4,
228        Value::Uuid(_) => 5,
229        Value::Array(_) => 6,
230        Value::Object(_) => 7,
231    }
232}
233
234impl PartialOrd for Value {
235    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
236        let self_rank = type_rank(self);
237        let other_rank = type_rank(other);
238
239        if self_rank != other_rank {
240            return Some(self_rank.cmp(&other_rank));
241        }
242
243        match (self, other) {
244            (Value::String(a), Value::String(b)) => a.partial_cmp(b),
245            (Value::Int(a), Value::Int(b)) => a.partial_cmp(b),
246            (Value::Float(a), Value::Float(b)) => a.partial_cmp(b),
247            (Value::Bool(a), Value::Bool(b)) => a.partial_cmp(b),
248            (Value::Array(a), Value::Array(b)) => a.partial_cmp(b),
249            (Value::Uuid(a), Value::Uuid(b)) => a.partial_cmp(b),
250            (Value::Object(_), Value::Object(_)) => Some(Ordering::Equal),
251            (Value::Null, Value::Null) => Some(Ordering::Equal),
252            _ => None,
253        }
254    }
255}
256
257impl Ord for Value {
258    fn cmp(&self, other: &Self) -> Ordering {
259        self.partial_cmp(other)
260            .expect("Value::partial_cmp should always return Some for same-type values")
261    }
262}
263
264// Add From implementations for common types
265impl From<i64> for Value {
266    fn from(v: i64) -> Self {
267        Value::Int(v)
268    }
269}
270
271impl From<i32> for Value {
272    fn from(v: i32) -> Self {
273        Value::Int(v as i64)
274    }
275}
276
277impl From<&str> for Value {
278    fn from(v: &str) -> Self {
279        Value::String(v.to_string())
280    }
281}
282
283impl From<String> for Value {
284    fn from(v: String) -> Self {
285        Value::String(v)
286    }
287}
288
289impl From<bool> for Value {
290    fn from(v: bool) -> Self {
291        Value::Bool(v)
292    }
293}
294
295impl From<f64> for Value {
296    fn from(v: f64) -> Self {
297        Value::Float(v)
298    }
299}
300
301impl From<Vec<Value>> for Value {
302    fn from(v: Vec<Value>) -> Self {
303        Value::Array(v)
304    }
305}
306
307impl From<HashMap<String, Value>> for Value {
308    fn from(v: HashMap<String, Value>) -> Self {
309        Value::Object(v)
310    }
311}
312
313impl From<Uuid> for Value {
314    fn from(v: Uuid) -> Self {
315        Value::Uuid(v)
316    }
317}
318
319// Helper methods for Value type conversion and extraction
320impl Value {
321    pub fn as_str(&self) -> Option<&str> {
322        if let Value::String(s) = self {
323            Some(s)
324        } else {
325            None
326        }
327    }
328
329    pub fn as_bool(&self) -> Option<bool> {
330        if let Value::Bool(b) = self {
331            Some(*b)
332        } else {
333            None
334        }
335    }
336
337    pub fn as_i64(&self) -> Option<i64> {
338        if let Value::Int(i) = self {
339            Some(*i)
340        } else {
341            None
342        }
343    }
344
345    pub fn as_f64(&self) -> Option<f64> {
346        if let Value::Float(f) = self {
347            Some(*f)
348        } else {
349            None
350        }
351    }
352
353    pub fn as_array(&self) -> Option<&Vec<Value>> {
354        if let Value::Array(arr) = self {
355            Some(arr)
356        } else {
357            None
358        }
359    }
360
361    pub fn as_object(&self) -> Option<&HashMap<String, Value>> {
362        if let Value::Object(obj) = self {
363            Some(obj)
364        } else {
365            None
366        }
367    }
368
369    pub fn as_uuid(&self) -> Option<Uuid> {
370        match self {
371            Value::Uuid(u) => Some(*u),
372            Value::String(s) => Uuid::parse_str(s).ok(),
373            _ => None,
374        }
375    }
376
377    pub fn as_datetime(&self) -> Option<DateTime<Utc>> {
378        match self {
379            Value::String(s) => DateTime::parse_from_rfc3339(s)
380                .ok()
381                .map(|dt| dt.with_timezone(&Utc)),
382            _ => None,
383        }
384    }
385
386    /// Generate a string from known value types.
387    pub fn to_safe_string(&self) -> Option<String> {
388        match self {
389            Value::String(s) => Some(s.clone()),
390            Value::Int(i) => Some(i.to_string()),
391            Value::Bool(b) => Some(b.to_string()),
392            Value::Float(f) => Some(f.to_string()),
393            Value::Uuid(u) => Some(u.to_string()),
394            _ => None,
395        }
396    }
397
398    /// Try conversion to i32
399    pub fn as_i32(&self) -> Option<i32> {
400        self.as_i64().and_then(|i| i.try_into().ok())
401    }
402}
403
404//
405// General extractor helpers for repeated access patterns
406//
407
408pub fn required_str<'a>(
409    map: &'a HashMap<String, Value>,
410    key: &str,
411) -> Result<&'a str, Box<dyn Error>> {
412    map.get(key)
413        .and_then(|v| v.as_str())
414        .ok_or_else(|| format!("Missing or invalid '{}' (str)", key).into())
415}
416
417pub fn optional_str(map: &HashMap<String, Value>, key: &str) -> Option<String> {
418    map.get(key).and_then(|v| v.as_str()).map(|s| s.to_string())
419}
420
421pub fn required_uuid(map: &HashMap<String, Value>, key: &str) -> Result<Uuid, Box<dyn Error>> {
422    map.get(key)
423        .and_then(|v| v.as_uuid())
424        .ok_or_else(|| format!("Missing or invalid '{}' (uuid)", key).into())
425}
426
427pub fn optional_uuid(map: &HashMap<String, Value>, key: &str) -> Option<Uuid> {
428    map.get(key).and_then(|v| v.as_uuid())
429}
430
431pub fn required_i64(map: &HashMap<String, Value>, key: &str) -> Result<i64, Box<dyn Error>> {
432    map.get(key)
433        .and_then(|v| v.as_i64())
434        .ok_or_else(|| format!("Missing or invalid '{}' (i64)", key).into())
435}
436
437pub fn optional_i64(map: &HashMap<String, Value>, key: &str) -> Option<i64> {
438    map.get(key).and_then(|v| v.as_i64())
439}
440
441pub fn required_bool(map: &HashMap<String, Value>, key: &str) -> Result<bool, Box<dyn Error>> {
442    map.get(key)
443        .and_then(|v| v.as_bool())
444        .ok_or_else(|| format!("Missing or invalid '{}' (bool)", key).into())
445}
446
447pub fn optional_bool(map: &HashMap<String, Value>, key: &str) -> Option<bool> {
448    map.get(key).and_then(|v| v.as_bool())
449}
450
451pub fn required_datetime(
452    map: &HashMap<String, Value>,
453    key: &str,
454) -> Result<DateTime<Utc>, Box<dyn Error>> {
455    map.get(key)
456        .and_then(|v| v.as_datetime())
457        .ok_or_else(|| format!("Missing or invalid '{}' (datetime)", key).into())
458}
459
460pub fn optional_datetime(map: &HashMap<String, Value>, key: &str) -> Option<DateTime<Utc>> {
461    map.get(key).and_then(|v| v.as_datetime())
462}
463
464/// Get a vector of Strings from a Value Array field
465pub fn array_of_strings(map: &HashMap<String, Value>, key: &str) -> Vec<String> {
466    map.get(key)
467        .and_then(|v| v.as_array())
468        .map(|arr| {
469            arr.iter()
470                .filter_map(|v| v.as_str().map(|s| s.to_string()))
471                .collect()
472        })
473        .unwrap_or_default()
474}
475
476/// Configuration for Aurora database
477#[derive(Debug, Clone)]
478pub struct AuroraConfig {
479    // Database location settings
480    pub db_path: PathBuf,
481    pub create_dirs: bool, // Create parent directories if they don't exist
482
483    // Hot store config
484    pub hot_cache_size_mb: usize,
485    pub hot_cache_cleanup_interval_secs: u64,
486    pub eviction_policy: crate::storage::EvictionPolicy,
487
488    // Cold store config
489    pub cold_cache_capacity_mb: usize,
490    pub cold_flush_interval_ms: Option<u64>,
491    pub cold_mode: ColdStoreMode,
492
493    // General config
494    pub auto_compact: bool,
495    pub compact_interval_mins: u64,
496
497    // Index config
498    pub max_index_entries_per_field: usize, // Limit memory for indices
499
500    // Write config
501    pub enable_write_buffering: bool, // Background write buffering
502    pub write_buffer_size: usize,     // Number of operations to buffer
503    pub write_buffer_flush_interval_ms: u64, // Flush interval
504
505    // Durability config
506    pub durability_mode: DurabilityMode, // Trade-off between performance and data safety
507    pub enable_wal: bool,                // Enable write-ahead logging
508    pub checkpoint_interval_ms: u64,     // Background checkpoint interval (flush + WAL truncate)
509
510    /// Optional path to audit log file. If None, audit logging is disabled.
511    pub audit_log_path: Option<PathBuf>,
512}
513
514/// Durability mode determines the trade-off between performance and data safety
515#[derive(Debug, Clone, Copy, PartialEq)]
516pub enum DurabilityMode {
517    /// No durability guarantees - fastest, but data may be lost on crash
518    /// Write buffer enabled, no WAL, no explicit flushes
519    None,
520    /// Write-ahead log for crash recovery - good balance of performance and safety
521    /// WAL is flushed on every write, data is recoverable after crash
522    WAL,
523    /// Synchronous writes to disk - slowest, but maximum durability
524    /// Every write is flushed to disk immediately
525    Synchronous,
526}
527
528#[derive(Debug, Clone, Copy)]
529pub enum ColdStoreMode {
530    HighThroughput,
531    LowSpace,
532}
533
534impl Default for AuroraConfig {
535    fn default() -> Self {
536        Self {
537            db_path: PathBuf::from("aurora.db"),
538            create_dirs: true,
539
540            hot_cache_size_mb: 64,
541            hot_cache_cleanup_interval_secs: 300,
542            eviction_policy: crate::storage::EvictionPolicy::LRU,
543
544            cold_cache_capacity_mb: 128,
545            cold_flush_interval_ms: Some(100),
546            cold_mode: ColdStoreMode::HighThroughput,
547
548            auto_compact: true,
549            compact_interval_mins: 60,
550
551            max_index_entries_per_field: 100_000,
552
553            enable_write_buffering: true,
554            write_buffer_size: 1000,
555            write_buffer_flush_interval_ms: 100,
556
557            // Use WAL mode by default for good balance of performance and durability
558            durability_mode: DurabilityMode::WAL,
559            enable_wal: true,
560            checkpoint_interval_ms: 5000, // Checkpoint every 100ms
561            audit_log_path: None,         // Disabled by default
562        }
563    }
564}
565
566impl AuroraConfig {
567    /// Create a new configuration with a specific database path
568    pub fn with_path<P: AsRef<Path>>(path: P) -> Self {
569        Self {
570            db_path: path.as_ref().to_path_buf(),
571            ..Default::default()
572        }
573    }
574
575    /// Configuration optimized for read-heavy workloads (news sites, blogs)
576    pub fn read_optimized() -> Self {
577        Self {
578            hot_cache_size_mb: 512,
579            eviction_policy: crate::storage::EvictionPolicy::LFU,
580            cold_cache_capacity_mb: 256,
581            cold_mode: ColdStoreMode::HighThroughput,
582            ..Default::default()
583        }
584    }
585
586    /// Configuration optimized for write-heavy workloads (analytics, logging)
587    pub fn write_optimized() -> Self {
588        Self {
589            hot_cache_size_mb: 128,
590            eviction_policy: crate::storage::EvictionPolicy::LRU,
591            cold_cache_capacity_mb: 512,
592            cold_flush_interval_ms: Some(50),
593            enable_write_buffering: true,
594            write_buffer_size: 10000,
595            ..Default::default()
596        }
597    }
598
599    /// Configuration for memory-constrained environments
600    pub fn low_memory() -> Self {
601        Self {
602            hot_cache_size_mb: 32,
603            eviction_policy: crate::storage::EvictionPolicy::LRU,
604            cold_cache_capacity_mb: 32,
605            cold_mode: ColdStoreMode::LowSpace,
606            max_index_entries_per_field: 10_000,
607            ..Default::default()
608        }
609    }
610
611    /// Configuration for high-traffic real-time applications
612    pub fn realtime() -> Self {
613        Self {
614            hot_cache_size_mb: 1024,
615            eviction_policy: crate::storage::EvictionPolicy::Hybrid,
616            cold_cache_capacity_mb: 512,
617            cold_flush_interval_ms: Some(25),
618            enable_write_buffering: true,
619            write_buffer_size: 5000,
620            auto_compact: false,
621            max_index_entries_per_field: 500_000,
622            ..Default::default()
623        }
624    }
625}