symbi-runtime 0.6.1

Agent Runtime System for the Symbi platform
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
//! RAG Engine Data Structures and Types
//!
//! This module contains all the data structures, enums, and types used by the RAG engine.

use crate::types::{AgentId, PolicyId};
use serde::{Deserialize, Serialize};

use std::time::{Duration, SystemTime};
use uuid::Uuid;

/// Errors that can occur during RAG operations
#[derive(Debug, thiserror::Error)]
pub enum RAGError {
    #[error("Query analysis failed: {0}")]
    QueryAnalysisFailed(String),

    #[error("Document retrieval failed: {0}")]
    DocumentRetrievalFailed(String),

    #[error("Ranking failed: {0}")]
    RankingFailed(String),

    #[error("Context augmentation failed: {0}")]
    ContextAugmentationFailed(String),

    #[error("Response generation failed: {0}")]
    ResponseGenerationFailed(String),

    #[error("Validation failed: {0}")]
    ValidationFailed(String),

    #[error("Configuration error: {0}")]
    ConfigurationError(String),

    #[error("Vector database error: {0}")]
    VectorDatabaseError(String),

    #[error("Context manager error: {0}")]
    ContextManagerError(String),

    #[error("Policy violation: {0}")]
    PolicyViolation(String),

    #[error("Insufficient permissions: {0}")]
    InsufficientPermissions(String),

    #[error("Timeout error: {0}")]
    Timeout(String),
}

/// Unique identifier for documents
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct DocumentId(pub Uuid);

impl Default for DocumentId {
    fn default() -> Self {
        Self::new()
    }
}

impl DocumentId {
    pub fn new() -> Self {
        Self(Uuid::new_v4())
    }
}

/// RAG request containing query and context
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RAGRequest {
    pub agent_id: AgentId,
    pub query: String,
    pub preferences: QueryPreferences,
    pub constraints: QueryConstraints,
}

/// Query preferences for response generation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryPreferences {
    pub response_length: ResponseLength,
    pub include_citations: bool,
    pub preferred_sources: Vec<String>,
    pub response_format: ResponseFormat,
    pub language: String,
}

/// Response length preferences
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ResponseLength {
    Brief,
    Standard,
    Detailed,
    Comprehensive,
}

/// Response format options
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ResponseFormat {
    Text,
    Markdown,
    Structured,
    Code,
}

/// Query constraints and limitations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryConstraints {
    pub max_documents: usize,
    pub time_limit: Duration,
    pub security_level: AccessLevel,
    pub allowed_sources: Vec<String>,
    pub excluded_sources: Vec<String>,
}

/// Access levels for security
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AccessLevel {
    Public,
    Restricted,
    Confidential,
    Secret,
}

/// Analyzed query with expanded terms and metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalyzedQuery {
    pub original_query: String,
    pub expanded_terms: Vec<String>,
    pub intent: QueryIntent,
    pub entities: Vec<Entity>,
    pub keywords: Vec<String>,
    pub embeddings: Vec<f32>,
    pub context_keywords: Vec<String>,
}

/// Query intent classification
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum QueryIntent {
    Factual,
    Procedural,
    Analytical,
    Creative,
    Comparative,
    Troubleshooting,
}

/// Named entities extracted from query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Entity {
    pub text: String,
    pub entity_type: EntityType,
    pub confidence: f32,
}

/// Types of entities
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EntityType {
    Person,
    Organization,
    Location,
    Technology,
    Concept,
    Date,
    Number,
}

/// Document for retrieval and processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
    pub id: DocumentId,
    pub title: String,
    pub content: String,
    pub metadata: DocumentMetadata,
    pub embeddings: Vec<f32>,
    pub chunks: Vec<DocumentChunk>,
}

/// Document metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentMetadata {
    pub document_type: DocumentType,
    pub author: Option<String>,
    pub created_at: SystemTime,
    pub updated_at: SystemTime,
    pub language: String,
    pub domain: String,
    pub access_level: AccessLevel,
    pub tags: Vec<String>,
    pub source_url: Option<String>,
    pub file_path: Option<String>,
}

/// Types of documents
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DocumentType {
    Text,
    Code,
    Structured,
    Manual,
    API,
    Research,
}

/// Document chunk for processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentChunk {
    pub chunk_id: String,
    pub content: String,
    pub start_index: usize,
    pub end_index: usize,
    pub embeddings: Vec<f32>,
}

/// Ranked document with relevance scoring
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RankedDocument {
    pub document: Document,
    pub relevance_score: f32,
    pub ranking_factors: RankingFactors,
    pub selected_chunks: Vec<DocumentChunk>,
}

/// Breakdown of ranking factors
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RankingFactors {
    pub semantic_similarity: f32,
    pub keyword_match: f32,
    pub recency_score: f32,
    pub authority_score: f32,
    pub diversity_score: f32,
}

/// Augmented context for response generation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AugmentedContext {
    pub original_query: String,
    pub analyzed_query: AnalyzedQuery,
    pub retrieved_documents: Vec<RankedDocument>,
    pub context_summary: String,
    pub citations: Vec<Citation>,
}

/// Citation information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Citation {
    pub document_id: DocumentId,
    pub title: String,
    pub author: Option<String>,
    pub url: Option<String>,
    pub relevance_score: f32,
}

/// Generated response with metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GeneratedResponse {
    pub content: String,
    pub confidence: f32,
    pub citations: Vec<Citation>,
    pub metadata: ResponseMetadata,
    pub validation_status: ValidationStatus,
}

/// Response generation metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponseMetadata {
    pub generation_time: Duration,
    pub tokens_used: usize,
    pub sources_consulted: usize,
    pub model_version: String,
}

/// Validation status for responses
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum ValidationStatus {
    Pending,
    Approved,
    Rejected(String),
    RequiresReview,
}

/// Validation result with details
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationResult {
    pub is_valid: bool,
    pub policy_violations: Vec<PolicyViolation>,
    pub content_issues: Vec<ContentIssue>,
    pub confidence_score: f32,
    pub recommendations: Vec<String>,
}

/// Policy violation details
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PolicyViolation {
    pub policy_id: PolicyId,
    pub violation_type: ViolationType,
    pub description: String,
    pub severity: Severity,
}

/// Types of policy violations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ViolationType {
    AccessControl,
    DataClassification,
    ContentFilter,
    SecurityLevel,
}

/// Severity levels
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Severity {
    Low,
    Medium,
    High,
    Critical,
}

/// Content issues in responses
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentIssue {
    pub issue_type: ContentIssueType,
    pub description: String,
    pub confidence: f32,
}

/// Types of content issues
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ContentIssueType {
    Factual,
    Bias,
    Toxicity,
    Misinformation,
    Inconsistency,
}

/// Final RAG response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RAGResponse {
    pub response: GeneratedResponse,
    pub processing_time: Duration,
    pub sources_used: Vec<Citation>,
    pub confidence_score: f32,
    pub follow_up_suggestions: Vec<String>,
}

/// Document input for ingestion
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentInput {
    pub title: String,
    pub content: String,
    pub metadata: DocumentMetadata,
    pub chunking_strategy: ChunkingStrategy,
}

/// Chunking strategies for documents
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ChunkingStrategy {
    FixedSize { size: usize, overlap: usize },
    Semantic { min_size: usize, max_size: usize },
    Paragraph,
    Sentence,
    Custom(String),
}

/// RAG engine statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RAGStats {
    pub total_documents: usize,
    pub total_queries: usize,
    pub avg_response_time: Duration,
    pub cache_hit_rate: f32,
    pub validation_pass_rate: f32,
    pub top_query_types: Vec<(QueryIntent, usize)>,
}

/// RAG engine configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RAGConfig {
    pub embedding_model: EmbeddingModelConfig,
    pub retrieval_config: RetrievalConfig,
    pub ranking_config: RankingConfig,
    pub generation_config: GenerationConfig,
    pub validation_config: ValidationConfig,
}

/// Embedding model configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingModelConfig {
    pub model_name: String,
    pub model_type: EmbeddingModelType,
    pub dimension: usize,
    pub max_tokens: usize,
    pub batch_size: usize,
}

/// Types of embedding models
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EmbeddingModelType {
    OpenAI,
    HuggingFace,
    Local,
    Custom,
}

/// Retrieval configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalConfig {
    pub max_documents: usize,
    pub similarity_threshold: f32,
    pub context_window: usize,
    pub enable_hybrid_search: bool,
    pub reranking_enabled: bool,
}

/// Ranking configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RankingConfig {
    pub ranking_algorithm: RankingAlgorithm,
    pub relevance_weight: f32,
    pub recency_weight: f32,
    pub authority_weight: f32,
    pub diversity_weight: f32,
}

/// Ranking algorithms
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RankingAlgorithm {
    CosineSimilarity,
    BM25,
    Hybrid,
    LearningToRank,
}

/// Generation configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerationConfig {
    pub max_response_length: usize,
    pub temperature: f32,
    pub top_p: f32,
    pub enable_citations: bool,
    pub response_format: ResponseFormat,
}

/// Validation configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationConfig {
    pub enable_policy_check: bool,
    pub enable_content_filter: bool,
    pub enable_fact_check: bool,
    pub confidence_threshold: f32,
}