Skip to main content

symbi_runtime/rag/
types.rs

1//! RAG Engine Data Structures and Types
2//!
3//! This module contains all the data structures, enums, and types used by the RAG engine.
4
5use crate::types::{AgentId, PolicyId};
6use serde::{Deserialize, Serialize};
7
8use std::time::{Duration, SystemTime};
9use uuid::Uuid;
10
11/// Errors that can occur during RAG operations
12#[derive(Debug, thiserror::Error)]
13pub enum RAGError {
14    #[error("Query analysis failed: {0}")]
15    QueryAnalysisFailed(String),
16
17    #[error("Document retrieval failed: {0}")]
18    DocumentRetrievalFailed(String),
19
20    #[error("Ranking failed: {0}")]
21    RankingFailed(String),
22
23    #[error("Context augmentation failed: {0}")]
24    ContextAugmentationFailed(String),
25
26    #[error("Response generation failed: {0}")]
27    ResponseGenerationFailed(String),
28
29    #[error("Validation failed: {0}")]
30    ValidationFailed(String),
31
32    #[error("Configuration error: {0}")]
33    ConfigurationError(String),
34
35    #[error("Vector database error: {0}")]
36    VectorDatabaseError(String),
37
38    #[error("Context manager error: {0}")]
39    ContextManagerError(String),
40
41    #[error("Policy violation: {0}")]
42    PolicyViolation(String),
43
44    #[error("Insufficient permissions: {0}")]
45    InsufficientPermissions(String),
46
47    #[error("Timeout error: {0}")]
48    Timeout(String),
49}
50
51/// Unique identifier for documents
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
53pub struct DocumentId(pub Uuid);
54
55impl Default for DocumentId {
56    fn default() -> Self {
57        Self::new()
58    }
59}
60
61impl DocumentId {
62    pub fn new() -> Self {
63        Self(Uuid::new_v4())
64    }
65}
66
67/// RAG request containing query and context
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct RAGRequest {
70    pub agent_id: AgentId,
71    pub query: String,
72    pub preferences: QueryPreferences,
73    pub constraints: QueryConstraints,
74}
75
76/// Query preferences for response generation
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct QueryPreferences {
79    pub response_length: ResponseLength,
80    pub include_citations: bool,
81    pub preferred_sources: Vec<String>,
82    pub response_format: ResponseFormat,
83    pub language: String,
84}
85
86/// Response length preferences
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub enum ResponseLength {
89    Brief,
90    Standard,
91    Detailed,
92    Comprehensive,
93}
94
95/// Response format options
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub enum ResponseFormat {
98    Text,
99    Markdown,
100    Structured,
101    Code,
102}
103
104/// Query constraints and limitations
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct QueryConstraints {
107    pub max_documents: usize,
108    pub time_limit: Duration,
109    pub security_level: AccessLevel,
110    pub allowed_sources: Vec<String>,
111    pub excluded_sources: Vec<String>,
112}
113
114/// Access levels for security
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub enum AccessLevel {
117    Public,
118    Restricted,
119    Confidential,
120    Secret,
121}
122
123/// Analyzed query with expanded terms and metadata
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct AnalyzedQuery {
126    pub original_query: String,
127    pub expanded_terms: Vec<String>,
128    pub intent: QueryIntent,
129    pub entities: Vec<Entity>,
130    pub keywords: Vec<String>,
131    pub embeddings: Vec<f32>,
132    pub context_keywords: Vec<String>,
133}
134
135/// Query intent classification
136#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
137pub enum QueryIntent {
138    Factual,
139    Procedural,
140    Analytical,
141    Creative,
142    Comparative,
143    Troubleshooting,
144}
145
146/// Named entities extracted from query
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct Entity {
149    pub text: String,
150    pub entity_type: EntityType,
151    pub confidence: f32,
152}
153
154/// Types of entities
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub enum EntityType {
157    Person,
158    Organization,
159    Location,
160    Technology,
161    Concept,
162    Date,
163    Number,
164}
165
166/// Document for retrieval and processing
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct Document {
169    pub id: DocumentId,
170    pub title: String,
171    pub content: String,
172    pub metadata: DocumentMetadata,
173    pub embeddings: Vec<f32>,
174    pub chunks: Vec<DocumentChunk>,
175}
176
177/// Document metadata
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct DocumentMetadata {
180    pub document_type: DocumentType,
181    pub author: Option<String>,
182    pub created_at: SystemTime,
183    pub updated_at: SystemTime,
184    pub language: String,
185    pub domain: String,
186    pub access_level: AccessLevel,
187    pub tags: Vec<String>,
188    pub source_url: Option<String>,
189    pub file_path: Option<String>,
190}
191
192/// Types of documents
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub enum DocumentType {
195    Text,
196    Code,
197    Structured,
198    Manual,
199    API,
200    Research,
201}
202
203/// Document chunk for processing
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct DocumentChunk {
206    pub chunk_id: String,
207    pub content: String,
208    pub start_index: usize,
209    pub end_index: usize,
210    pub embeddings: Vec<f32>,
211}
212
213/// Ranked document with relevance scoring
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct RankedDocument {
216    pub document: Document,
217    pub relevance_score: f32,
218    pub ranking_factors: RankingFactors,
219    pub selected_chunks: Vec<DocumentChunk>,
220}
221
222/// Breakdown of ranking factors
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct RankingFactors {
225    pub semantic_similarity: f32,
226    pub keyword_match: f32,
227    pub recency_score: f32,
228    pub authority_score: f32,
229    pub diversity_score: f32,
230}
231
232/// Augmented context for response generation
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct AugmentedContext {
235    pub original_query: String,
236    pub analyzed_query: AnalyzedQuery,
237    pub retrieved_documents: Vec<RankedDocument>,
238    pub context_summary: String,
239    pub citations: Vec<Citation>,
240}
241
242/// Citation information
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct Citation {
245    pub document_id: DocumentId,
246    pub title: String,
247    pub author: Option<String>,
248    pub url: Option<String>,
249    pub relevance_score: f32,
250}
251
252/// Generated response with metadata
253#[derive(Debug, Clone, Serialize, Deserialize)]
254pub struct GeneratedResponse {
255    pub content: String,
256    pub confidence: f32,
257    pub citations: Vec<Citation>,
258    pub metadata: ResponseMetadata,
259    pub validation_status: ValidationStatus,
260}
261
262/// Response generation metadata
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct ResponseMetadata {
265    pub generation_time: Duration,
266    pub tokens_used: usize,
267    pub sources_consulted: usize,
268    pub model_version: String,
269}
270
271/// Validation status for responses
272#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
273pub enum ValidationStatus {
274    Pending,
275    Approved,
276    Rejected(String),
277    RequiresReview,
278}
279
280/// Validation result with details
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct ValidationResult {
283    pub is_valid: bool,
284    pub policy_violations: Vec<PolicyViolation>,
285    pub content_issues: Vec<ContentIssue>,
286    pub confidence_score: f32,
287    pub recommendations: Vec<String>,
288}
289
290/// Policy violation details
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct PolicyViolation {
293    pub policy_id: PolicyId,
294    pub violation_type: ViolationType,
295    pub description: String,
296    pub severity: Severity,
297}
298
299/// Types of policy violations
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub enum ViolationType {
302    AccessControl,
303    DataClassification,
304    ContentFilter,
305    SecurityLevel,
306}
307
308/// Severity levels
309#[derive(Debug, Clone, Serialize, Deserialize)]
310pub enum Severity {
311    Low,
312    Medium,
313    High,
314    Critical,
315}
316
317/// Content issues in responses
318#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct ContentIssue {
320    pub issue_type: ContentIssueType,
321    pub description: String,
322    pub confidence: f32,
323}
324
325/// Types of content issues
326#[derive(Debug, Clone, Serialize, Deserialize)]
327pub enum ContentIssueType {
328    Factual,
329    Bias,
330    Toxicity,
331    Misinformation,
332    Inconsistency,
333}
334
335/// Final RAG response
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct RAGResponse {
338    pub response: GeneratedResponse,
339    pub processing_time: Duration,
340    pub sources_used: Vec<Citation>,
341    pub confidence_score: f32,
342    pub follow_up_suggestions: Vec<String>,
343}
344
345/// Document input for ingestion
346#[derive(Debug, Clone, Serialize, Deserialize)]
347pub struct DocumentInput {
348    pub title: String,
349    pub content: String,
350    pub metadata: DocumentMetadata,
351    pub chunking_strategy: ChunkingStrategy,
352}
353
354/// Chunking strategies for documents
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub enum ChunkingStrategy {
357    FixedSize { size: usize, overlap: usize },
358    Semantic { min_size: usize, max_size: usize },
359    Paragraph,
360    Sentence,
361    Custom(String),
362}
363
364/// RAG engine statistics
365#[derive(Debug, Clone, Serialize, Deserialize)]
366pub struct RAGStats {
367    pub total_documents: usize,
368    pub total_queries: usize,
369    pub avg_response_time: Duration,
370    pub cache_hit_rate: f32,
371    pub validation_pass_rate: f32,
372    pub top_query_types: Vec<(QueryIntent, usize)>,
373}
374
375/// RAG engine configuration
376#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct RAGConfig {
378    pub embedding_model: EmbeddingModelConfig,
379    pub retrieval_config: RetrievalConfig,
380    pub ranking_config: RankingConfig,
381    pub generation_config: GenerationConfig,
382    pub validation_config: ValidationConfig,
383}
384
385/// Embedding model configuration
386#[derive(Debug, Clone, Serialize, Deserialize)]
387pub struct EmbeddingModelConfig {
388    pub model_name: String,
389    pub model_type: EmbeddingModelType,
390    pub dimension: usize,
391    pub max_tokens: usize,
392    pub batch_size: usize,
393}
394
395/// Types of embedding models
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub enum EmbeddingModelType {
398    OpenAI,
399    HuggingFace,
400    Local,
401    Custom,
402}
403
404/// Retrieval configuration
405#[derive(Debug, Clone, Serialize, Deserialize)]
406pub struct RetrievalConfig {
407    pub max_documents: usize,
408    pub similarity_threshold: f32,
409    pub context_window: usize,
410    pub enable_hybrid_search: bool,
411    pub reranking_enabled: bool,
412}
413
414/// Ranking configuration
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct RankingConfig {
417    pub ranking_algorithm: RankingAlgorithm,
418    pub relevance_weight: f32,
419    pub recency_weight: f32,
420    pub authority_weight: f32,
421    pub diversity_weight: f32,
422}
423
424/// Ranking algorithms
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub enum RankingAlgorithm {
427    CosineSimilarity,
428    BM25,
429    Hybrid,
430    LearningToRank,
431}
432
433/// Generation configuration
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct GenerationConfig {
436    pub max_response_length: usize,
437    pub temperature: f32,
438    pub top_p: f32,
439    pub enable_citations: bool,
440    pub response_format: ResponseFormat,
441}
442
443/// Validation configuration
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct ValidationConfig {
446    pub enable_policy_check: bool,
447    pub enable_content_filter: bool,
448    pub enable_fact_check: bool,
449    pub confidence_threshold: f32,
450}