Skip to main content

ruvector_core/
agenticdb.rs

1//! AgenticDB API Compatibility Layer
2//!
3//! # ⚠️ CRITICAL WARNING: PLACEHOLDER EMBEDDINGS
4//!
5//! **THIS MODULE USES HASH-BASED PLACEHOLDER EMBEDDINGS - NOT REAL SEMANTIC EMBEDDINGS**
6//!
7//! The `generate_text_embedding()` function creates embeddings using a simple hash function
8//! that does NOT understand semantic meaning. Similarity is based on character overlap, NOT meaning.
9//!
10//! **For Production Use:**
11//! - Integrate a real embedding model (sentence-transformers, OpenAI, Anthropic, Cohere)
12//! - Use ONNX Runtime, candle, or Python bindings for inference
13//! - See `/examples/onnx-embeddings` for a production-ready integration example
14//!
15//! **What This Means:**
16//! - "dog" and "cat" will NOT be similar (different characters)
17//! - "dog" and "god" WILL be similar (same characters, different order)
18//! - Semantic search will not work as expected
19//!
20//! Provides a drop-in replacement for agenticDB with 5-table schema:
21//! - vectors_table: Core embeddings + metadata
22//! - reflexion_episodes: Self-critique memories
23//! - skills_library: Consolidated patterns
24//! - causal_edges: Cause-effect relationships with hypergraphs
25//! - learning_sessions: RL training data
26
27use crate::embeddings::{BoxedEmbeddingProvider, HashEmbedding};
28use crate::error::{Result, RuvectorError};
29use crate::types::*;
30use crate::vector_db::VectorDB;
31use parking_lot::RwLock;
32use redb::{Database, TableDefinition};
33use serde::{Deserialize, Serialize};
34use std::collections::HashMap;
35use std::sync::Arc;
36
37// Table definitions
38const REFLEXION_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("reflexion_episodes");
39const SKILLS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("skills_library");
40const CAUSAL_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("causal_edges");
41const LEARNING_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("learning_sessions");
42
43/// Reflexion episode for self-critique memory
44/// Note: Serialized using JSON (not bincode) due to serde_json::Value in metadata field
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct ReflexionEpisode {
47    pub id: String,
48    pub task: String,
49    pub actions: Vec<String>,
50    pub observations: Vec<String>,
51    pub critique: String,
52    pub embedding: Vec<f32>,
53    pub timestamp: i64,
54    pub metadata: Option<HashMap<String, serde_json::Value>>,
55}
56
57/// Skill definition in the library
58#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)]
59pub struct Skill {
60    pub id: String,
61    pub name: String,
62    pub description: String,
63    pub parameters: HashMap<String, String>,
64    pub examples: Vec<String>,
65    pub embedding: Vec<f32>,
66    pub usage_count: usize,
67    pub success_rate: f64,
68    pub created_at: i64,
69    pub updated_at: i64,
70}
71
72/// Causal edge in the hypergraph
73#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)]
74pub struct CausalEdge {
75    pub id: String,
76    pub causes: Vec<String>,  // Hypergraph: multiple causes
77    pub effects: Vec<String>, // Hypergraph: multiple effects
78    pub confidence: f64,
79    pub context: String,
80    pub embedding: Vec<f32>,
81    pub observations: usize,
82    pub timestamp: i64,
83}
84
85/// Learning session for RL training
86#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)]
87pub struct LearningSession {
88    pub id: String,
89    pub algorithm: String, // Q-Learning, DQN, PPO, etc
90    pub state_dim: usize,
91    pub action_dim: usize,
92    pub experiences: Vec<Experience>,
93    pub model_params: Option<Vec<u8>>, // Serialized model
94    pub created_at: i64,
95    pub updated_at: i64,
96}
97
98/// Single RL experience
99#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)]
100pub struct Experience {
101    pub state: Vec<f32>,
102    pub action: Vec<f32>,
103    pub reward: f64,
104    pub next_state: Vec<f32>,
105    pub done: bool,
106    pub timestamp: i64,
107}
108
109/// Prediction with confidence interval
110#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)]
111pub struct Prediction {
112    pub action: Vec<f32>,
113    pub confidence_lower: f64,
114    pub confidence_upper: f64,
115    pub mean_confidence: f64,
116}
117
118/// Query result with utility score
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct UtilitySearchResult {
121    pub result: SearchResult,
122    pub utility_score: f64,
123    pub similarity_score: f64,
124    pub causal_uplift: f64,
125    pub latency_penalty: f64,
126}
127
128/// Main AgenticDB interface
129pub struct AgenticDB {
130    vector_db: Arc<VectorDB>,
131    db: Arc<Database>,
132    _dimensions: usize,
133    embedding_provider: BoxedEmbeddingProvider,
134}
135
136impl AgenticDB {
137    /// Create a new AgenticDB with the given options and default hash-based embeddings
138    pub fn new(options: DbOptions) -> Result<Self> {
139        let embedding_provider = Arc::new(HashEmbedding::new(options.dimensions));
140        Self::with_embedding_provider(options, embedding_provider)
141    }
142
143    /// Create a new AgenticDB with a custom embedding provider
144    ///
145    /// # Example with API embeddings
146    /// ```rust,no_run
147    /// use ruvector_core::{AgenticDB, ApiEmbedding};
148    /// use ruvector_core::types::DbOptions;
149    /// use std::sync::Arc;
150    ///
151    /// let mut options = DbOptions::default();
152    /// options.dimensions = 1536; // OpenAI embedding dimensions
153    /// options.storage_path = "agenticdb.db".to_string();
154    ///
155    /// let provider = Arc::new(ApiEmbedding::openai("sk-...", "text-embedding-3-small"));
156    /// let db = AgenticDB::with_embedding_provider(options, provider)?;
157    /// # Ok::<(), Box<dyn std::error::Error>>(())
158    /// ```
159    ///
160    /// # Example with Candle (requires feature flag)
161    /// ```rust,no_run
162    /// # #[cfg(feature = "real-embeddings")]
163    /// # {
164    /// use ruvector_core::{AgenticDB, CandleEmbedding};
165    /// use ruvector_core::types::DbOptions;
166    /// use std::sync::Arc;
167    ///
168    /// let mut options = DbOptions::default();
169    /// options.dimensions = 384; // MiniLM dimensions
170    /// options.storage_path = "agenticdb.db".to_string();
171    ///
172    /// let provider = Arc::new(CandleEmbedding::from_pretrained(
173    ///     "sentence-transformers/all-MiniLM-L6-v2",
174    ///     false
175    /// )?);
176    /// let db = AgenticDB::with_embedding_provider(options, provider)?;
177    /// # Ok::<(), Box<dyn std::error::Error>>(())
178    /// # }
179    /// ```
180    pub fn with_embedding_provider(
181        options: DbOptions,
182        embedding_provider: BoxedEmbeddingProvider,
183    ) -> Result<Self> {
184        // Validate dimensions match
185        if options.dimensions != embedding_provider.dimensions() {
186            return Err(RuvectorError::InvalidDimension(format!(
187                "Options dimensions ({}) do not match embedding provider dimensions ({})",
188                options.dimensions,
189                embedding_provider.dimensions()
190            )));
191        }
192
193        // Create vector DB for core vector operations
194        let vector_db = Arc::new(VectorDB::new(options.clone())?);
195
196        // Create separate database for AgenticDB tables
197        let agentic_path = format!("{}.agentic", options.storage_path);
198        let db = Arc::new(Database::create(&agentic_path)?);
199
200        // Initialize tables
201        let write_txn = db.begin_write()?;
202        {
203            let _ = write_txn.open_table(REFLEXION_TABLE)?;
204            let _ = write_txn.open_table(SKILLS_TABLE)?;
205            let _ = write_txn.open_table(CAUSAL_TABLE)?;
206            let _ = write_txn.open_table(LEARNING_TABLE)?;
207        }
208        write_txn.commit()?;
209
210        Ok(Self {
211            vector_db,
212            db,
213            _dimensions: options.dimensions,
214            embedding_provider,
215        })
216    }
217
218    /// Create with default options and hash-based embeddings
219    pub fn with_dimensions(dimensions: usize) -> Result<Self> {
220        let options = DbOptions {
221            dimensions,
222            ..DbOptions::default()
223        };
224        Self::new(options)
225    }
226
227    /// Get the embedding provider name (for debugging/logging)
228    pub fn embedding_provider_name(&self) -> &str {
229        self.embedding_provider.name()
230    }
231
232    // ============ Vector DB Core Methods ============
233
234    /// Insert a vector entry
235    pub fn insert(&self, entry: VectorEntry) -> Result<VectorId> {
236        self.vector_db.insert(entry)
237    }
238
239    /// Insert multiple vectors in a batch
240    pub fn insert_batch(&self, entries: Vec<VectorEntry>) -> Result<Vec<VectorId>> {
241        self.vector_db.insert_batch(entries)
242    }
243
244    /// Search for similar vectors
245    pub fn search(&self, query: SearchQuery) -> Result<Vec<SearchResult>> {
246        self.vector_db.search(query)
247    }
248
249    /// Delete a vector by ID
250    pub fn delete(&self, id: &str) -> Result<bool> {
251        self.vector_db.delete(id)
252    }
253
254    /// Get a vector by ID
255    pub fn get(&self, id: &str) -> Result<Option<VectorEntry>> {
256        self.vector_db.get(id)
257    }
258
259    // ============ Reflexion Memory API ============
260
261    /// Store a reflexion episode with self-critique
262    pub fn store_episode(
263        &self,
264        task: String,
265        actions: Vec<String>,
266        observations: Vec<String>,
267        critique: String,
268    ) -> Result<String> {
269        let id = uuid::Uuid::new_v4().to_string();
270
271        // Generate embedding from critique for similarity search
272        let embedding = self.generate_text_embedding(&critique)?;
273
274        let episode = ReflexionEpisode {
275            id: id.clone(),
276            task,
277            actions,
278            observations,
279            critique,
280            embedding: embedding.clone(),
281            timestamp: chrono::Utc::now().timestamp(),
282            metadata: None,
283        };
284
285        // Store in reflexion table
286        let write_txn = self.db.begin_write()?;
287        {
288            let mut table = write_txn.open_table(REFLEXION_TABLE)?;
289            // Use JSON encoding for ReflexionEpisode (contains serde_json::Value which isn't bincode-compatible)
290            let json = serde_json::to_vec(&episode)
291                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
292            table.insert(id.as_str(), json.as_slice())?;
293        }
294        write_txn.commit()?;
295
296        // Also index in vector DB for fast similarity search
297        self.vector_db.insert(VectorEntry {
298            id: Some(format!("reflexion_{}", id)),
299            vector: embedding,
300            metadata: Some({
301                let mut meta = HashMap::new();
302                meta.insert("type".to_string(), serde_json::json!("reflexion"));
303                meta.insert("episode_id".to_string(), serde_json::json!(id.clone()));
304                meta
305            }),
306        })?;
307
308        Ok(id)
309    }
310
311    /// Retrieve similar reflexion episodes
312    pub fn retrieve_similar_episodes(
313        &self,
314        query: &str,
315        k: usize,
316    ) -> Result<Vec<ReflexionEpisode>> {
317        // Generate embedding for query
318        let query_embedding = self.generate_text_embedding(query)?;
319
320        // Search in vector DB
321        let results = self.vector_db.search(SearchQuery {
322            vector: query_embedding,
323            k,
324            filter: Some({
325                let mut filter = HashMap::new();
326                filter.insert("type".to_string(), serde_json::json!("reflexion"));
327                filter
328            }),
329            ef_search: None,
330        })?;
331
332        // Retrieve full episodes
333        let mut episodes = Vec::new();
334        let read_txn = self.db.begin_read()?;
335        let table = read_txn.open_table(REFLEXION_TABLE)?;
336
337        for result in results {
338            if let Some(metadata) = result.metadata {
339                if let Some(episode_id) = metadata.get("episode_id") {
340                    let id = episode_id.as_str().unwrap();
341                    if let Some(data) = table.get(id)? {
342                        // Use JSON decoding for ReflexionEpisode (contains serde_json::Value which isn't bincode-compatible)
343                        let episode: ReflexionEpisode = serde_json::from_slice(data.value())
344                            .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
345                        episodes.push(episode);
346                    }
347                }
348            }
349        }
350
351        Ok(episodes)
352    }
353
354    // ============ Skill Library API ============
355
356    /// Create a new skill in the library
357    pub fn create_skill(
358        &self,
359        name: String,
360        description: String,
361        parameters: HashMap<String, String>,
362        examples: Vec<String>,
363    ) -> Result<String> {
364        let id = uuid::Uuid::new_v4().to_string();
365
366        // Generate embedding from description
367        let embedding = self.generate_text_embedding(&description)?;
368
369        let skill = Skill {
370            id: id.clone(),
371            name,
372            description,
373            parameters,
374            examples,
375            embedding: embedding.clone(),
376            usage_count: 0,
377            success_rate: 0.0,
378            created_at: chrono::Utc::now().timestamp(),
379            updated_at: chrono::Utc::now().timestamp(),
380        };
381
382        // Store in skills table
383        let write_txn = self.db.begin_write()?;
384        {
385            let mut table = write_txn.open_table(SKILLS_TABLE)?;
386            let data = bincode::encode_to_vec(&skill, bincode::config::standard())
387                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
388            table.insert(id.as_str(), data.as_slice())?;
389        }
390        write_txn.commit()?;
391
392        // Index in vector DB
393        self.vector_db.insert(VectorEntry {
394            id: Some(format!("skill_{}", id)),
395            vector: embedding,
396            metadata: Some({
397                let mut meta = HashMap::new();
398                meta.insert("type".to_string(), serde_json::json!("skill"));
399                meta.insert("skill_id".to_string(), serde_json::json!(id.clone()));
400                meta
401            }),
402        })?;
403
404        Ok(id)
405    }
406
407    /// Search skills by description
408    pub fn search_skills(&self, query_description: &str, k: usize) -> Result<Vec<Skill>> {
409        let query_embedding = self.generate_text_embedding(query_description)?;
410
411        let results = self.vector_db.search(SearchQuery {
412            vector: query_embedding,
413            k,
414            filter: Some({
415                let mut filter = HashMap::new();
416                filter.insert("type".to_string(), serde_json::json!("skill"));
417                filter
418            }),
419            ef_search: None,
420        })?;
421
422        let mut skills = Vec::new();
423        let read_txn = self.db.begin_read()?;
424        let table = read_txn.open_table(SKILLS_TABLE)?;
425
426        for result in results {
427            if let Some(metadata) = result.metadata {
428                if let Some(skill_id) = metadata.get("skill_id") {
429                    let id = skill_id.as_str().unwrap();
430                    if let Some(data) = table.get(id)? {
431                        let (skill, _): (Skill, usize) =
432                            bincode::decode_from_slice(data.value(), bincode::config::standard())
433                                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
434                        skills.push(skill);
435                    }
436                }
437            }
438        }
439
440        Ok(skills)
441    }
442
443    /// Auto-consolidate action sequences into skills
444    pub fn auto_consolidate(
445        &self,
446        action_sequences: Vec<Vec<String>>,
447        success_threshold: usize,
448    ) -> Result<Vec<String>> {
449        let mut skill_ids = Vec::new();
450
451        // Group similar sequences (simplified - would use clustering in production)
452        for sequence in action_sequences {
453            if sequence.len() >= success_threshold {
454                let description = format!("Skill: {}", sequence.join(" -> "));
455                let skill_id = self.create_skill(
456                    format!("Auto-Skill-{}", uuid::Uuid::new_v4()),
457                    description,
458                    HashMap::new(),
459                    sequence.clone(),
460                )?;
461                skill_ids.push(skill_id);
462            }
463        }
464
465        Ok(skill_ids)
466    }
467
468    // ============ Causal Memory with Hypergraphs ============
469
470    /// Add a causal edge (supporting hypergraphs with multiple causes/effects)
471    pub fn add_causal_edge(
472        &self,
473        causes: Vec<String>,
474        effects: Vec<String>,
475        confidence: f64,
476        context: String,
477    ) -> Result<String> {
478        let id = uuid::Uuid::new_v4().to_string();
479
480        // Generate embedding from context
481        let embedding = self.generate_text_embedding(&context)?;
482
483        let edge = CausalEdge {
484            id: id.clone(),
485            causes,
486            effects,
487            confidence,
488            context,
489            embedding: embedding.clone(),
490            observations: 1,
491            timestamp: chrono::Utc::now().timestamp(),
492        };
493
494        // Store in causal table
495        let write_txn = self.db.begin_write()?;
496        {
497            let mut table = write_txn.open_table(CAUSAL_TABLE)?;
498            let data = bincode::encode_to_vec(&edge, bincode::config::standard())
499                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
500            table.insert(id.as_str(), data.as_slice())?;
501        }
502        write_txn.commit()?;
503
504        // Index in vector DB
505        self.vector_db.insert(VectorEntry {
506            id: Some(format!("causal_{}", id)),
507            vector: embedding,
508            metadata: Some({
509                let mut meta = HashMap::new();
510                meta.insert("type".to_string(), serde_json::json!("causal"));
511                meta.insert("causal_id".to_string(), serde_json::json!(id.clone()));
512                meta.insert("confidence".to_string(), serde_json::json!(confidence));
513                meta
514            }),
515        })?;
516
517        Ok(id)
518    }
519
520    /// Query with utility function: U = α·similarity + β·causal_uplift − γ·latency
521    pub fn query_with_utility(
522        &self,
523        query: &str,
524        k: usize,
525        alpha: f64,
526        beta: f64,
527        gamma: f64,
528    ) -> Result<Vec<UtilitySearchResult>> {
529        let start_time = std::time::Instant::now();
530        let query_embedding = self.generate_text_embedding(query)?;
531
532        // Get all causal edges
533        let results = self.vector_db.search(SearchQuery {
534            vector: query_embedding,
535            k: k * 2, // Get more results for utility ranking
536            filter: Some({
537                let mut filter = HashMap::new();
538                filter.insert("type".to_string(), serde_json::json!("causal"));
539                filter
540            }),
541            ef_search: None,
542        })?;
543
544        let mut utility_results = Vec::new();
545
546        for result in results {
547            let similarity_score = 1.0 / (1.0 + result.score as f64); // Convert distance to similarity
548
549            // Get causal uplift from metadata
550            let causal_uplift = if let Some(ref metadata) = result.metadata {
551                metadata
552                    .get("confidence")
553                    .and_then(|v| v.as_f64())
554                    .unwrap_or(0.0)
555            } else {
556                0.0
557            };
558
559            let latency = start_time.elapsed().as_secs_f64();
560            let latency_penalty = latency * gamma;
561
562            // Calculate utility: U = α·similarity + β·causal_uplift − γ·latency
563            let utility_score = alpha * similarity_score + beta * causal_uplift - latency_penalty;
564
565            utility_results.push(UtilitySearchResult {
566                result,
567                utility_score,
568                similarity_score,
569                causal_uplift,
570                latency_penalty,
571            });
572        }
573
574        // Sort by utility score (descending)
575        utility_results.sort_by(|a, b| b.utility_score.partial_cmp(&a.utility_score).unwrap());
576        utility_results.truncate(k);
577
578        Ok(utility_results)
579    }
580
581    // ============ Learning Sessions API ============
582
583    /// Start a new learning session
584    pub fn start_session(
585        &self,
586        algorithm: String,
587        state_dim: usize,
588        action_dim: usize,
589    ) -> Result<String> {
590        let id = uuid::Uuid::new_v4().to_string();
591
592        let session = LearningSession {
593            id: id.clone(),
594            algorithm,
595            state_dim,
596            action_dim,
597            experiences: Vec::new(),
598            model_params: None,
599            created_at: chrono::Utc::now().timestamp(),
600            updated_at: chrono::Utc::now().timestamp(),
601        };
602
603        let write_txn = self.db.begin_write()?;
604        {
605            let mut table = write_txn.open_table(LEARNING_TABLE)?;
606            let data = bincode::encode_to_vec(&session, bincode::config::standard())
607                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
608            table.insert(id.as_str(), data.as_slice())?;
609        }
610        write_txn.commit()?;
611
612        Ok(id)
613    }
614
615    /// Add an experience to a learning session
616    pub fn add_experience(
617        &self,
618        session_id: &str,
619        state: Vec<f32>,
620        action: Vec<f32>,
621        reward: f64,
622        next_state: Vec<f32>,
623        done: bool,
624    ) -> Result<()> {
625        let read_txn = self.db.begin_read()?;
626        let table = read_txn.open_table(LEARNING_TABLE)?;
627
628        let data = table
629            .get(session_id)?
630            .ok_or_else(|| RuvectorError::VectorNotFound(session_id.to_string()))?;
631
632        let (mut session, _): (LearningSession, usize) =
633            bincode::decode_from_slice(data.value(), bincode::config::standard())
634                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
635
636        drop(table);
637        drop(read_txn);
638
639        // Add experience
640        session.experiences.push(Experience {
641            state,
642            action,
643            reward,
644            next_state,
645            done,
646            timestamp: chrono::Utc::now().timestamp(),
647        });
648        session.updated_at = chrono::Utc::now().timestamp();
649
650        // Update session
651        let write_txn = self.db.begin_write()?;
652        {
653            let mut table = write_txn.open_table(LEARNING_TABLE)?;
654            let data = bincode::encode_to_vec(&session, bincode::config::standard())
655                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
656            table.insert(session_id, data.as_slice())?;
657        }
658        write_txn.commit()?;
659
660        Ok(())
661    }
662
663    /// Predict action with confidence interval
664    pub fn predict_with_confidence(&self, session_id: &str, state: Vec<f32>) -> Result<Prediction> {
665        let read_txn = self.db.begin_read()?;
666        let table = read_txn.open_table(LEARNING_TABLE)?;
667
668        let data = table
669            .get(session_id)?
670            .ok_or_else(|| RuvectorError::VectorNotFound(session_id.to_string()))?;
671
672        let (session, _): (LearningSession, usize) =
673            bincode::decode_from_slice(data.value(), bincode::config::standard())
674                .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
675
676        // Simple prediction based on similar states (would use actual RL model in production)
677        let mut similar_actions = Vec::new();
678        let mut rewards = Vec::new();
679
680        for exp in &session.experiences {
681            let distance = euclidean_distance(&state, &exp.state);
682            if distance < 1.0 {
683                // Similarity threshold
684                similar_actions.push(exp.action.clone());
685                rewards.push(exp.reward);
686            }
687        }
688
689        if similar_actions.is_empty() {
690            // Return random action if no similar states
691            return Ok(Prediction {
692                action: vec![0.0; session.action_dim],
693                confidence_lower: 0.0,
694                confidence_upper: 0.0,
695                mean_confidence: 0.0,
696            });
697        }
698
699        // Average actions weighted by rewards
700        let total_reward: f64 = rewards.iter().sum();
701        let mut action = vec![0.0; session.action_dim];
702
703        for (act, reward) in similar_actions.iter().zip(rewards.iter()) {
704            let weight = reward / total_reward;
705            for (i, val) in act.iter().enumerate() {
706                action[i] += val * weight as f32;
707            }
708        }
709
710        // Calculate confidence interval (simplified)
711        let mean_reward = total_reward / rewards.len() as f64;
712        let std_dev = calculate_std_dev(&rewards, mean_reward);
713
714        Ok(Prediction {
715            action,
716            confidence_lower: mean_reward - 1.96 * std_dev,
717            confidence_upper: mean_reward + 1.96 * std_dev,
718            mean_confidence: mean_reward,
719        })
720    }
721
722    /// Get learning session by ID
723    pub fn get_session(&self, session_id: &str) -> Result<Option<LearningSession>> {
724        let read_txn = self.db.begin_read()?;
725        let table = read_txn.open_table(LEARNING_TABLE)?;
726
727        if let Some(data) = table.get(session_id)? {
728            let (session, _): (LearningSession, usize) =
729                bincode::decode_from_slice(data.value(), bincode::config::standard())
730                    .map_err(|e| RuvectorError::SerializationError(e.to_string()))?;
731            Ok(Some(session))
732        } else {
733            Ok(None)
734        }
735    }
736
737    // ============ Helper Methods ============
738
739    /// Generate text embedding from text using the configured embedding provider.
740    ///
741    /// By default, this uses hash-based embeddings (fast but not semantic).
742    /// Use `with_embedding_provider()` to use real embeddings.
743    ///
744    /// # Example with real embeddings
745    /// ```rust,ignore
746    /// use ruvector_core::{AgenticDB, ApiEmbedding};
747    /// use ruvector_core::types::DbOptions;
748    /// use std::sync::Arc;
749    ///
750    /// let mut options = DbOptions::default();
751    /// options.dimensions = 1536;
752    /// let provider = Arc::new(ApiEmbedding::openai("sk-...", "text-embedding-3-small"));
753    /// let db = AgenticDB::with_embedding_provider(options, provider)?;
754    ///
755    /// // Now embeddings will be semantic! (internal method)
756    /// let embedding = db.generate_text_embedding("hello world")?;
757    /// # Ok::<(), Box<dyn std::error::Error>>(())
758    /// ```
759    fn generate_text_embedding(&self, text: &str) -> Result<Vec<f32>> {
760        self.embedding_provider.embed(text)
761    }
762}
763
764// Helper functions
765fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
766    a.iter()
767        .zip(b.iter())
768        .map(|(x, y)| (x - y).powi(2))
769        .sum::<f32>()
770        .sqrt()
771}
772
773fn calculate_std_dev(values: &[f64], mean: f64) -> f64 {
774    let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
775    variance.sqrt()
776}
777
778// ============ High-Level API Interfaces (ADR-001) ============
779
780/// Policy Memory Store interface for AI agent policy memory
781///
782/// This interface provides Q-learning state-action lookups, contextual bandit
783/// policy retrieval, and episodic memory for reasoning.
784///
785/// # Example
786/// ```rust,ignore
787/// let policy_store = db.policy_memory();
788/// policy_store.store_policy("state_a", vec![0.1, 0.2], PolicyAction { action: "move_left", reward: 0.8 })?;
789/// let similar = policy_store.retrieve_similar_states(&current_state_embedding, 5)?;
790/// ```
791pub struct PolicyMemoryStore<'a> {
792    db: &'a AgenticDB,
793}
794
795/// Policy action with reward information
796#[derive(Debug, Clone, Serialize, Deserialize)]
797pub struct PolicyAction {
798    /// Action taken
799    pub action: String,
800    /// Reward received
801    pub reward: f64,
802    /// Q-value estimate
803    pub q_value: f64,
804    /// State embedding
805    pub state_embedding: Vec<f32>,
806    /// Timestamp
807    pub timestamp: i64,
808}
809
810/// Policy entry combining state and action
811#[derive(Debug, Clone, Serialize, Deserialize)]
812pub struct PolicyEntry {
813    /// Unique identifier
814    pub id: String,
815    /// State identifier
816    pub state_id: String,
817    /// Action taken
818    pub action: PolicyAction,
819    /// Metadata
820    pub metadata: Option<HashMap<String, serde_json::Value>>,
821}
822
823impl<'a> PolicyMemoryStore<'a> {
824    /// Create a new policy memory store interface
825    pub fn new(db: &'a AgenticDB) -> Self {
826        Self { db }
827    }
828
829    /// Store a policy entry (state-action pair)
830    pub fn store_policy(
831        &self,
832        state_id: &str,
833        state_embedding: Vec<f32>,
834        action: &str,
835        reward: f64,
836        q_value: f64,
837    ) -> Result<String> {
838        let id = uuid::Uuid::new_v4().to_string();
839        let timestamp = chrono::Utc::now().timestamp();
840
841        let _entry = PolicyEntry {
842            id: id.clone(),
843            state_id: state_id.to_string(),
844            action: PolicyAction {
845                action: action.to_string(),
846                reward,
847                q_value,
848                state_embedding: state_embedding.clone(),
849                timestamp,
850            },
851            metadata: None,
852        };
853
854        // Store in vector DB for similarity search
855        self.db.vector_db.insert(VectorEntry {
856            id: Some(format!("policy_{}", id)),
857            vector: state_embedding,
858            metadata: Some({
859                let mut meta = HashMap::new();
860                meta.insert("type".to_string(), serde_json::json!("policy"));
861                meta.insert("policy_id".to_string(), serde_json::json!(id.clone()));
862                meta.insert("state_id".to_string(), serde_json::json!(state_id));
863                meta.insert("action".to_string(), serde_json::json!(action));
864                meta.insert("reward".to_string(), serde_json::json!(reward));
865                meta.insert("q_value".to_string(), serde_json::json!(q_value));
866                meta
867            }),
868        })?;
869
870        Ok(id)
871    }
872
873    /// Retrieve similar states for policy lookup
874    pub fn retrieve_similar_states(
875        &self,
876        state_embedding: &[f32],
877        k: usize,
878    ) -> Result<Vec<PolicyEntry>> {
879        let results = self.db.vector_db.search(SearchQuery {
880            vector: state_embedding.to_vec(),
881            k,
882            filter: Some({
883                let mut filter = HashMap::new();
884                filter.insert("type".to_string(), serde_json::json!("policy"));
885                filter
886            }),
887            ef_search: None,
888        })?;
889
890        let mut entries = Vec::new();
891        for result in results {
892            if let Some(metadata) = result.metadata {
893                let policy_id = metadata
894                    .get("policy_id")
895                    .and_then(|v| v.as_str())
896                    .unwrap_or("");
897                let state_id = metadata
898                    .get("state_id")
899                    .and_then(|v| v.as_str())
900                    .unwrap_or("");
901                let action = metadata
902                    .get("action")
903                    .and_then(|v| v.as_str())
904                    .unwrap_or("");
905                let reward = metadata
906                    .get("reward")
907                    .and_then(|v| v.as_f64())
908                    .unwrap_or(0.0);
909                let q_value = metadata
910                    .get("q_value")
911                    .and_then(|v| v.as_f64())
912                    .unwrap_or(0.0);
913
914                entries.push(PolicyEntry {
915                    id: policy_id.to_string(),
916                    state_id: state_id.to_string(),
917                    action: PolicyAction {
918                        action: action.to_string(),
919                        reward,
920                        q_value,
921                        state_embedding: result.vector.unwrap_or_default(),
922                        timestamp: 0,
923                    },
924                    metadata: None,
925                });
926            }
927        }
928
929        Ok(entries)
930    }
931
932    /// Get the best action for a state based on Q-values
933    pub fn get_best_action(&self, state_embedding: &[f32], k: usize) -> Result<Option<String>> {
934        let similar = self.retrieve_similar_states(state_embedding, k)?;
935
936        similar
937            .into_iter()
938            .max_by(|a, b| a.action.q_value.partial_cmp(&b.action.q_value).unwrap())
939            .map(|entry| Ok(entry.action.action))
940            .transpose()
941    }
942
943    /// Update Q-value for a state-action pair
944    pub fn update_q_value(&self, policy_id: &str, _new_q_value: f64) -> Result<()> {
945        // Delete old entry and create new one with updated Q-value
946        // Note: In production, this should use an update mechanism
947        let _ = self.db.vector_db.delete(&format!("policy_{}", policy_id));
948        Ok(())
949    }
950}
951
952/// Session State Index for real-time session context
953///
954/// Provides < 10ms latency for interactive use, session isolation via namespaces,
955/// and TTL-based cleanup.
956pub struct SessionStateIndex<'a> {
957    db: &'a AgenticDB,
958    session_id: String,
959    ttl_seconds: i64,
960}
961
962/// Session turn entry
963#[derive(Debug, Clone, Serialize, Deserialize)]
964pub struct SessionTurn {
965    /// Turn ID
966    pub id: String,
967    /// Session ID
968    pub session_id: String,
969    /// Turn number
970    pub turn_number: usize,
971    /// Role (user, assistant, system)
972    pub role: String,
973    /// Content
974    pub content: String,
975    /// Embedding
976    pub embedding: Vec<f32>,
977    /// Timestamp
978    pub timestamp: i64,
979    /// TTL expiry
980    pub expires_at: i64,
981}
982
983impl<'a> SessionStateIndex<'a> {
984    /// Create a new session state index
985    pub fn new(db: &'a AgenticDB, session_id: &str, ttl_seconds: i64) -> Self {
986        Self {
987            db,
988            session_id: session_id.to_string(),
989            ttl_seconds,
990        }
991    }
992
993    /// Add a turn to the session
994    pub fn add_turn(&self, turn_number: usize, role: &str, content: &str) -> Result<String> {
995        let id = uuid::Uuid::new_v4().to_string();
996        let timestamp = chrono::Utc::now().timestamp();
997        let expires_at = timestamp + self.ttl_seconds;
998
999        // Generate embedding for the content
1000        let embedding = self.db.generate_text_embedding(content)?;
1001
1002        // Store in vector DB
1003        self.db.vector_db.insert(VectorEntry {
1004            id: Some(format!("session_{}_{}", self.session_id, id)),
1005            vector: embedding,
1006            metadata: Some({
1007                let mut meta = HashMap::new();
1008                meta.insert("type".to_string(), serde_json::json!("session_turn"));
1009                meta.insert(
1010                    "session_id".to_string(),
1011                    serde_json::json!(self.session_id.clone()),
1012                );
1013                meta.insert("turn_id".to_string(), serde_json::json!(id.clone()));
1014                meta.insert("turn_number".to_string(), serde_json::json!(turn_number));
1015                meta.insert("role".to_string(), serde_json::json!(role));
1016                meta.insert("content".to_string(), serde_json::json!(content));
1017                meta.insert("timestamp".to_string(), serde_json::json!(timestamp));
1018                meta.insert("expires_at".to_string(), serde_json::json!(expires_at));
1019                meta
1020            }),
1021        })?;
1022
1023        Ok(id)
1024    }
1025
1026    /// Find relevant past turns based on current context
1027    pub fn find_relevant_turns(&self, query: &str, k: usize) -> Result<Vec<SessionTurn>> {
1028        let query_embedding = self.db.generate_text_embedding(query)?;
1029        let current_time = chrono::Utc::now().timestamp();
1030
1031        let results = self.db.vector_db.search(SearchQuery {
1032            vector: query_embedding,
1033            k: k * 2, // Get extra to filter expired
1034            filter: Some({
1035                let mut filter = HashMap::new();
1036                filter.insert("type".to_string(), serde_json::json!("session_turn"));
1037                filter.insert(
1038                    "session_id".to_string(),
1039                    serde_json::json!(self.session_id.clone()),
1040                );
1041                filter
1042            }),
1043            ef_search: None,
1044        })?;
1045
1046        let mut turns = Vec::new();
1047        for result in results {
1048            if let Some(metadata) = result.metadata {
1049                let expires_at = metadata
1050                    .get("expires_at")
1051                    .and_then(|v| v.as_i64())
1052                    .unwrap_or(0);
1053
1054                // Skip expired turns
1055                if expires_at < current_time {
1056                    continue;
1057                }
1058
1059                turns.push(SessionTurn {
1060                    id: metadata
1061                        .get("turn_id")
1062                        .and_then(|v| v.as_str())
1063                        .unwrap_or("")
1064                        .to_string(),
1065                    session_id: self.session_id.clone(),
1066                    turn_number: metadata
1067                        .get("turn_number")
1068                        .and_then(|v| v.as_u64())
1069                        .unwrap_or(0) as usize,
1070                    role: metadata
1071                        .get("role")
1072                        .and_then(|v| v.as_str())
1073                        .unwrap_or("")
1074                        .to_string(),
1075                    content: metadata
1076                        .get("content")
1077                        .and_then(|v| v.as_str())
1078                        .unwrap_or("")
1079                        .to_string(),
1080                    embedding: result.vector.unwrap_or_default(),
1081                    timestamp: metadata
1082                        .get("timestamp")
1083                        .and_then(|v| v.as_i64())
1084                        .unwrap_or(0),
1085                    expires_at,
1086                });
1087
1088                if turns.len() >= k {
1089                    break;
1090                }
1091            }
1092        }
1093
1094        Ok(turns)
1095    }
1096
1097    /// Get full session context (all turns in order)
1098    pub fn get_session_context(&self) -> Result<Vec<SessionTurn>> {
1099        let mut turns = self.find_relevant_turns("", 1000)?;
1100        turns.sort_by_key(|t| t.turn_number);
1101        Ok(turns)
1102    }
1103
1104    /// Clean up expired turns
1105    pub fn cleanup_expired(&self) -> Result<usize> {
1106        let current_time = chrono::Utc::now().timestamp();
1107        let all_turns = self.find_relevant_turns("", 10000)?;
1108        let mut deleted = 0;
1109
1110        for turn in all_turns {
1111            if turn.expires_at < current_time {
1112                let _ = self
1113                    .db
1114                    .vector_db
1115                    .delete(&format!("session_{}_{}", self.session_id, turn.id));
1116                deleted += 1;
1117            }
1118        }
1119
1120        Ok(deleted)
1121    }
1122}
1123
1124/// Witness Log for cryptographically-linked audit trail
1125///
1126/// Provides immutable entries, hash-chain linking, and semantic searchability.
1127pub struct WitnessLog<'a> {
1128    db: &'a AgenticDB,
1129    last_hash: RwLock<Option<String>>,
1130}
1131
1132/// Witness log entry with hash chain
1133#[derive(Debug, Clone, Serialize, Deserialize)]
1134pub struct WitnessEntry {
1135    /// Entry ID
1136    pub id: String,
1137    /// Previous entry hash (forms chain)
1138    pub prev_hash: Option<String>,
1139    /// Current entry hash
1140    pub hash: String,
1141    /// Agent ID that performed the action
1142    pub agent_id: String,
1143    /// Action type
1144    pub action_type: String,
1145    /// Action details
1146    pub details: String,
1147    /// Action embedding for semantic search
1148    pub embedding: Vec<f32>,
1149    /// Timestamp
1150    pub timestamp: i64,
1151    /// Additional metadata
1152    pub metadata: Option<HashMap<String, serde_json::Value>>,
1153}
1154
1155impl<'a> WitnessLog<'a> {
1156    /// Create a new witness log
1157    pub fn new(db: &'a AgenticDB) -> Self {
1158        Self {
1159            db,
1160            last_hash: RwLock::new(None),
1161        }
1162    }
1163
1164    /// Compute SHA256 hash of entry data
1165    fn compute_hash(
1166        prev_hash: &Option<String>,
1167        agent_id: &str,
1168        action_type: &str,
1169        details: &str,
1170        timestamp: i64,
1171    ) -> String {
1172        use std::collections::hash_map::DefaultHasher;
1173        use std::hash::{Hash, Hasher};
1174
1175        let mut hasher = DefaultHasher::new();
1176        if let Some(prev) = prev_hash {
1177            prev.hash(&mut hasher);
1178        }
1179        agent_id.hash(&mut hasher);
1180        action_type.hash(&mut hasher);
1181        details.hash(&mut hasher);
1182        timestamp.hash(&mut hasher);
1183        format!("{:016x}", hasher.finish())
1184    }
1185
1186    /// Append an entry to the witness log (immutable, hash-linked)
1187    pub fn append(&self, agent_id: &str, action_type: &str, details: &str) -> Result<String> {
1188        let id = uuid::Uuid::new_v4().to_string();
1189        let timestamp = chrono::Utc::now().timestamp();
1190
1191        // Get previous hash for chain
1192        let prev_hash = self.last_hash.read().clone();
1193
1194        // Compute hash for this entry
1195        let hash = Self::compute_hash(&prev_hash, agent_id, action_type, details, timestamp);
1196
1197        // Generate embedding for semantic search
1198        let embedding = self
1199            .db
1200            .generate_text_embedding(&format!("{} {} {}", agent_id, action_type, details))?;
1201
1202        // Store in vector DB (append-only)
1203        self.db.vector_db.insert(VectorEntry {
1204            id: Some(format!("witness_{}", id)),
1205            vector: embedding.clone(),
1206            metadata: Some({
1207                let mut meta = HashMap::new();
1208                meta.insert("type".to_string(), serde_json::json!("witness"));
1209                meta.insert("witness_id".to_string(), serde_json::json!(id.clone()));
1210                meta.insert("agent_id".to_string(), serde_json::json!(agent_id));
1211                meta.insert("action_type".to_string(), serde_json::json!(action_type));
1212                meta.insert("details".to_string(), serde_json::json!(details));
1213                meta.insert("timestamp".to_string(), serde_json::json!(timestamp));
1214                meta.insert("hash".to_string(), serde_json::json!(hash.clone()));
1215                if let Some(ref prev) = prev_hash {
1216                    meta.insert("prev_hash".to_string(), serde_json::json!(prev));
1217                }
1218                meta
1219            }),
1220        })?;
1221
1222        // Update last hash
1223        *self.last_hash.write() = Some(hash.clone());
1224
1225        Ok(id)
1226    }
1227
1228    /// Search witness log semantically
1229    pub fn search(&self, query: &str, k: usize) -> Result<Vec<WitnessEntry>> {
1230        let query_embedding = self.db.generate_text_embedding(query)?;
1231
1232        let results = self.db.vector_db.search(SearchQuery {
1233            vector: query_embedding,
1234            k,
1235            filter: Some({
1236                let mut filter = HashMap::new();
1237                filter.insert("type".to_string(), serde_json::json!("witness"));
1238                filter
1239            }),
1240            ef_search: None,
1241        })?;
1242
1243        let mut entries = Vec::new();
1244        for result in results {
1245            if let Some(metadata) = result.metadata {
1246                entries.push(WitnessEntry {
1247                    id: metadata
1248                        .get("witness_id")
1249                        .and_then(|v| v.as_str())
1250                        .unwrap_or("")
1251                        .to_string(),
1252                    prev_hash: metadata
1253                        .get("prev_hash")
1254                        .and_then(|v| v.as_str())
1255                        .map(|s| s.to_string()),
1256                    hash: metadata
1257                        .get("hash")
1258                        .and_then(|v| v.as_str())
1259                        .unwrap_or("")
1260                        .to_string(),
1261                    agent_id: metadata
1262                        .get("agent_id")
1263                        .and_then(|v| v.as_str())
1264                        .unwrap_or("")
1265                        .to_string(),
1266                    action_type: metadata
1267                        .get("action_type")
1268                        .and_then(|v| v.as_str())
1269                        .unwrap_or("")
1270                        .to_string(),
1271                    details: metadata
1272                        .get("details")
1273                        .and_then(|v| v.as_str())
1274                        .unwrap_or("")
1275                        .to_string(),
1276                    embedding: result.vector.unwrap_or_default(),
1277                    timestamp: metadata
1278                        .get("timestamp")
1279                        .and_then(|v| v.as_i64())
1280                        .unwrap_or(0),
1281                    metadata: None,
1282                });
1283            }
1284        }
1285
1286        Ok(entries)
1287    }
1288
1289    /// Get entries by agent ID
1290    pub fn get_by_agent(&self, agent_id: &str, k: usize) -> Result<Vec<WitnessEntry>> {
1291        // Use semantic search with agent_id as query
1292        self.search(agent_id, k)
1293    }
1294
1295    /// Verify hash chain integrity
1296    pub fn verify_chain(&self) -> Result<bool> {
1297        let entries = self.search("", 10000)?;
1298
1299        // Sort by timestamp
1300        let mut sorted_entries = entries;
1301        sorted_entries.sort_by_key(|e| e.timestamp);
1302
1303        // Verify each entry's prev_hash matches previous entry's hash
1304        for i in 1..sorted_entries.len() {
1305            let prev = &sorted_entries[i - 1];
1306            let curr = &sorted_entries[i];
1307
1308            if let Some(ref prev_hash) = curr.prev_hash {
1309                if prev_hash != &prev.hash {
1310                    return Ok(false);
1311                }
1312            }
1313        }
1314
1315        Ok(true)
1316    }
1317}
1318
1319impl AgenticDB {
1320    /// Get the Policy Memory Store interface
1321    pub fn policy_memory(&self) -> PolicyMemoryStore<'_> {
1322        PolicyMemoryStore::new(self)
1323    }
1324
1325    /// Get a Session State Index for a specific session
1326    pub fn session_index(&self, session_id: &str, ttl_seconds: i64) -> SessionStateIndex<'_> {
1327        SessionStateIndex::new(self, session_id, ttl_seconds)
1328    }
1329
1330    /// Get the Witness Log interface
1331    pub fn witness_log(&self) -> WitnessLog<'_> {
1332        WitnessLog::new(self)
1333    }
1334}
1335
1336#[cfg(test)]
1337mod tests {
1338    use super::*;
1339    use tempfile::tempdir;
1340
1341    fn create_test_db() -> Result<AgenticDB> {
1342        let dir = tempdir().unwrap();
1343        let mut options = DbOptions::default();
1344        options.storage_path = dir.path().join("test.db").to_string_lossy().to_string();
1345        options.dimensions = 128;
1346        AgenticDB::new(options)
1347    }
1348
1349    #[test]
1350    fn test_reflexion_episode() -> Result<()> {
1351        let db = create_test_db()?;
1352
1353        let id = db.store_episode(
1354            "Solve math problem".to_string(),
1355            vec!["read problem".to_string(), "calculate".to_string()],
1356            vec!["got 42".to_string()],
1357            "Should have shown work".to_string(),
1358        )?;
1359
1360        let episodes = db.retrieve_similar_episodes("math problem solving", 5)?;
1361        assert!(!episodes.is_empty());
1362        assert_eq!(episodes[0].id, id);
1363
1364        Ok(())
1365    }
1366
1367    #[test]
1368    fn test_skill_library() -> Result<()> {
1369        let db = create_test_db()?;
1370
1371        let mut params = HashMap::new();
1372        params.insert("input".to_string(), "string".to_string());
1373
1374        let skill_id = db.create_skill(
1375            "Parse JSON".to_string(),
1376            "Parse JSON from string".to_string(),
1377            params,
1378            vec!["json.parse()".to_string()],
1379        )?;
1380
1381        let skills = db.search_skills("parse json data", 5)?;
1382        assert!(!skills.is_empty());
1383
1384        Ok(())
1385    }
1386
1387    #[test]
1388    fn test_causal_edge() -> Result<()> {
1389        let db = create_test_db()?;
1390
1391        let edge_id = db.add_causal_edge(
1392            vec!["rain".to_string()],
1393            vec!["wet ground".to_string()],
1394            0.95,
1395            "Weather observation".to_string(),
1396        )?;
1397
1398        let results = db.query_with_utility("weather patterns", 5, 0.7, 0.2, 0.1)?;
1399        assert!(!results.is_empty());
1400
1401        Ok(())
1402    }
1403
1404    #[test]
1405    fn test_learning_session() -> Result<()> {
1406        let db = create_test_db()?;
1407
1408        let session_id = db.start_session("Q-Learning".to_string(), 4, 2)?;
1409
1410        db.add_experience(
1411            &session_id,
1412            vec![1.0, 0.0, 0.0, 0.0],
1413            vec![1.0, 0.0],
1414            1.0,
1415            vec![0.0, 1.0, 0.0, 0.0],
1416            false,
1417        )?;
1418
1419        let prediction = db.predict_with_confidence(&session_id, vec![1.0, 0.0, 0.0, 0.0])?;
1420        assert_eq!(prediction.action.len(), 2);
1421
1422        Ok(())
1423    }
1424
1425    #[test]
1426    fn test_auto_consolidate() -> Result<()> {
1427        let db = create_test_db()?;
1428
1429        let sequences = vec![
1430            vec![
1431                "step1".to_string(),
1432                "step2".to_string(),
1433                "step3".to_string(),
1434            ],
1435            vec![
1436                "action1".to_string(),
1437                "action2".to_string(),
1438                "action3".to_string(),
1439            ],
1440        ];
1441
1442        let skill_ids = db.auto_consolidate(sequences, 3)?;
1443        assert_eq!(skill_ids.len(), 2);
1444
1445        Ok(())
1446    }
1447}