oxirs_chat/rag/
types.rs

1//! Core types and data structures for the RAG system
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::Duration;
7
8/// A document in the RAG system
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct RagDocument {
11    /// Unique document identifier
12    pub id: String,
13    /// Document content (text)
14    pub content: String,
15    /// Document metadata
16    pub metadata: HashMap<String, String>,
17    /// Document embedding vector (if available)
18    pub embedding: Option<Vec<f32>>,
19    /// Document timestamp
20    pub timestamp: DateTime<Utc>,
21    /// Source of the document
22    pub source: String,
23}
24
25impl RagDocument {
26    /// Create a new RAG document
27    pub fn new(id: String, content: String, source: String) -> Self {
28        Self {
29            id,
30            content,
31            source,
32            metadata: HashMap::new(),
33            embedding: None,
34            timestamp: Utc::now(),
35        }
36    }
37
38    /// Add metadata to the document
39    pub fn with_metadata(mut self, key: String, value: String) -> Self {
40        self.metadata.insert(key, value);
41        self
42    }
43
44    /// Set the embedding vector
45    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
46        self.embedding = Some(embedding);
47        self
48    }
49
50    /// Get the length of the document content
51    pub fn content_length(&self) -> usize {
52        self.content.len()
53    }
54
55    /// Check if document has embedding
56    pub fn has_embedding(&self) -> bool {
57        self.embedding.is_some()
58    }
59}
60
61/// Search result from the RAG system
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SearchResult {
64    /// Retrieved document
65    pub document: RagDocument,
66    /// Relevance score
67    pub score: f64,
68    /// Factors contributing to relevance
69    pub relevance_factors: Vec<String>,
70}
71
72impl SearchResult {
73    /// Create a new search result
74    pub fn new(document: RagDocument, score: f64) -> Self {
75        Self {
76            document,
77            score,
78            relevance_factors: Vec::new(),
79        }
80    }
81
82    /// Add a relevance factor
83    pub fn add_relevance_factor(mut self, factor: String) -> Self {
84        self.relevance_factors.push(factor);
85        self
86    }
87}
88
89/// Retrieval result (alias for SearchResult for backward compatibility)
90pub type RetrievalResult = SearchResult;
91
92/// Query context for RAG retrieval
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct QueryContext {
95    /// User ID (for personalization)
96    pub user_id: Option<String>,
97    /// Session ID
98    pub session_id: String,
99    /// Query text
100    pub query: Option<String>,
101    /// Query intent classification
102    pub intent: Option<QueryIntent>,
103    /// Extracted entities
104    pub entities: Option<Vec<String>>,
105    /// Previous messages in conversation
106    pub conversation_history: Vec<ConversationMessage>,
107    /// Domain or topic constraints
108    pub domain_constraints: Vec<String>,
109    /// Preferred response format
110    pub response_format: ResponseFormat,
111    /// Maximum response length
112    pub max_response_length: usize,
113    /// Query intent classification
114    pub query_intent: QueryIntent,
115}
116
117impl QueryContext {
118    /// Create a new query context
119    pub fn new(session_id: String) -> Self {
120        Self {
121            user_id: None,
122            session_id,
123            query: None,
124            intent: None,
125            entities: None,
126            conversation_history: Vec::new(),
127            domain_constraints: Vec::new(),
128            response_format: ResponseFormat::Text,
129            max_response_length: 4000,
130            query_intent: QueryIntent::Information,
131        }
132    }
133
134    /// Add a message to conversation history
135    pub fn add_message(mut self, message: ConversationMessage) -> Self {
136        self.conversation_history.push(message);
137        self
138    }
139
140    /// Set domain constraints
141    pub fn with_domain_constraints(mut self, constraints: Vec<String>) -> Self {
142        self.domain_constraints = constraints;
143        self
144    }
145
146    /// Set query intent
147    pub fn with_intent(mut self, intent: QueryIntent) -> Self {
148        self.query_intent = intent;
149        self
150    }
151}
152
153/// Conversation message
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct ConversationMessage {
156    /// Message role (user, assistant, system)
157    pub role: MessageRole,
158    /// Message content
159    pub content: String,
160    /// Message timestamp
161    pub timestamp: DateTime<Utc>,
162}
163
164/// Message role enumeration
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub enum MessageRole {
167    User,
168    Assistant,
169    System,
170}
171
172/// Response format preferences
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub enum ResponseFormat {
175    Text,
176    Structured,
177    Code,
178    Table,
179    List,
180}
181
182/// Query intent classification
183#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
184pub enum QueryIntent {
185    Information,
186    Navigation,
187    Transaction,
188    Comparison,
189    Explanation,
190    Discovery,
191    Relationship,
192}
193
194/// Assembled context from RAG retrieval
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct AssembledContext {
197    /// Retrieved documents
198    pub documents: Vec<SearchResult>,
199    /// Synthesized context text
200    pub context_text: String,
201    /// Context metadata
202    pub metadata: ContextMetadata,
203    /// Assembly statistics
204    pub stats: AssemblyStats,
205}
206
207impl AssembledContext {
208    /// Create a new assembled context
209    pub fn new(documents: Vec<SearchResult>, context_text: String) -> Self {
210        Self {
211            documents,
212            context_text,
213            metadata: ContextMetadata::default(),
214            stats: AssemblyStats::default(),
215        }
216    }
217
218    /// Get the total number of documents
219    pub fn document_count(&self) -> usize {
220        self.documents.len()
221    }
222
223    /// Get the context length in characters
224    pub fn context_length(&self) -> usize {
225        self.context_text.len()
226    }
227
228    /// Get the average relevance score
229    pub fn average_relevance_score(&self) -> f64 {
230        if self.documents.is_empty() {
231            0.0
232        } else {
233            self.documents.iter().map(|d| d.score).sum::<f64>() / self.documents.len() as f64
234        }
235    }
236}
237
238/// Context metadata
239#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct ContextMetadata {
241    /// Assembly timestamp
242    pub assembled_at: DateTime<Utc>,
243    /// Source diversity (number of different sources)
244    pub source_diversity: usize,
245    /// Topic coverage
246    pub topic_coverage: Vec<String>,
247    /// Confidence score
248    pub confidence_score: f64,
249}
250
251impl Default for ContextMetadata {
252    fn default() -> Self {
253        Self {
254            assembled_at: Utc::now(),
255            source_diversity: 0,
256            topic_coverage: Vec::new(),
257            confidence_score: 0.0,
258        }
259    }
260}
261
262/// Assembly statistics
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct AssemblyStats {
265    /// Time taken to assemble context
266    pub assembly_time: Duration,
267    /// Number of documents processed
268    pub documents_processed: usize,
269    /// Number of documents selected
270    pub documents_selected: usize,
271    /// Total tokens in context
272    pub total_tokens: usize,
273    /// Retrieval method used
274    pub retrieval_method: String,
275}
276
277impl Default for AssemblyStats {
278    fn default() -> Self {
279        Self {
280            assembly_time: Duration::from_millis(0),
281            documents_processed: 0,
282            documents_selected: 0,
283            total_tokens: 0,
284            retrieval_method: "default".to_string(),
285        }
286    }
287}
288
289/// Retrieval configuration
290#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct RetrievalConfig {
292    /// Maximum number of documents to retrieve
293    pub max_documents: usize,
294    /// Similarity threshold
295    pub similarity_threshold: f64,
296    /// Enable re-ranking
297    pub enable_reranking: bool,
298    /// Re-ranking model
299    pub reranking_model: Option<String>,
300    /// Enable temporal filtering
301    pub enable_temporal_filtering: bool,
302    /// Temporal window (for filtering by recency)
303    pub temporal_window: Option<Duration>,
304}
305
306impl Default for RetrievalConfig {
307    fn default() -> Self {
308        Self {
309            max_documents: 20,
310            similarity_threshold: 0.7,
311            enable_reranking: true,
312            reranking_model: None,
313            enable_temporal_filtering: false,
314            temporal_window: None,
315        }
316    }
317}
318
319/// Context assembly configuration
320#[derive(Debug, Clone, Serialize, Deserialize)]
321pub struct AssemblyConfig {
322    /// Maximum context length in tokens
323    pub max_context_tokens: usize,
324    /// Context overlap for chunking
325    pub context_overlap: usize,
326    /// Prioritize recent documents
327    pub prioritize_recent: bool,
328    /// Enable diversity optimization
329    pub enable_diversity: bool,
330    /// Diversity threshold
331    pub diversity_threshold: f64,
332}
333
334impl Default for AssemblyConfig {
335    fn default() -> Self {
336        Self {
337            max_context_tokens: 4000,
338            context_overlap: 200,
339            prioritize_recent: true,
340            enable_diversity: true,
341            diversity_threshold: 0.8,
342        }
343    }
344}