1use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::Duration;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct RagDocument {
11 pub id: String,
13 pub content: String,
15 pub metadata: HashMap<String, String>,
17 pub embedding: Option<Vec<f32>>,
19 pub timestamp: DateTime<Utc>,
21 pub source: String,
23}
24
25impl RagDocument {
26 pub fn new(id: String, content: String, source: String) -> Self {
28 Self {
29 id,
30 content,
31 source,
32 metadata: HashMap::new(),
33 embedding: None,
34 timestamp: Utc::now(),
35 }
36 }
37
38 pub fn with_metadata(mut self, key: String, value: String) -> Self {
40 self.metadata.insert(key, value);
41 self
42 }
43
44 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
46 self.embedding = Some(embedding);
47 self
48 }
49
50 pub fn content_length(&self) -> usize {
52 self.content.len()
53 }
54
55 pub fn has_embedding(&self) -> bool {
57 self.embedding.is_some()
58 }
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SearchResult {
64 pub document: RagDocument,
66 pub score: f64,
68 pub relevance_factors: Vec<String>,
70}
71
72impl SearchResult {
73 pub fn new(document: RagDocument, score: f64) -> Self {
75 Self {
76 document,
77 score,
78 relevance_factors: Vec::new(),
79 }
80 }
81
82 pub fn add_relevance_factor(mut self, factor: String) -> Self {
84 self.relevance_factors.push(factor);
85 self
86 }
87}
88
89pub type RetrievalResult = SearchResult;
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct QueryContext {
95 pub user_id: Option<String>,
97 pub session_id: String,
99 pub query: Option<String>,
101 pub intent: Option<QueryIntent>,
103 pub entities: Option<Vec<String>>,
105 pub conversation_history: Vec<ConversationMessage>,
107 pub domain_constraints: Vec<String>,
109 pub response_format: ResponseFormat,
111 pub max_response_length: usize,
113 pub query_intent: QueryIntent,
115}
116
117impl QueryContext {
118 pub fn new(session_id: String) -> Self {
120 Self {
121 user_id: None,
122 session_id,
123 query: None,
124 intent: None,
125 entities: None,
126 conversation_history: Vec::new(),
127 domain_constraints: Vec::new(),
128 response_format: ResponseFormat::Text,
129 max_response_length: 4000,
130 query_intent: QueryIntent::Information,
131 }
132 }
133
134 pub fn add_message(mut self, message: ConversationMessage) -> Self {
136 self.conversation_history.push(message);
137 self
138 }
139
140 pub fn with_domain_constraints(mut self, constraints: Vec<String>) -> Self {
142 self.domain_constraints = constraints;
143 self
144 }
145
146 pub fn with_intent(mut self, intent: QueryIntent) -> Self {
148 self.query_intent = intent;
149 self
150 }
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct ConversationMessage {
156 pub role: MessageRole,
158 pub content: String,
160 pub timestamp: DateTime<Utc>,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
166pub enum MessageRole {
167 User,
168 Assistant,
169 System,
170}
171
172#[derive(Debug, Clone, Serialize, Deserialize)]
174pub enum ResponseFormat {
175 Text,
176 Structured,
177 Code,
178 Table,
179 List,
180}
181
182#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
184pub enum QueryIntent {
185 Information,
186 Navigation,
187 Transaction,
188 Comparison,
189 Explanation,
190 Discovery,
191 Relationship,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct AssembledContext {
197 pub documents: Vec<SearchResult>,
199 pub context_text: String,
201 pub metadata: ContextMetadata,
203 pub stats: AssemblyStats,
205}
206
207impl AssembledContext {
208 pub fn new(documents: Vec<SearchResult>, context_text: String) -> Self {
210 Self {
211 documents,
212 context_text,
213 metadata: ContextMetadata::default(),
214 stats: AssemblyStats::default(),
215 }
216 }
217
218 pub fn document_count(&self) -> usize {
220 self.documents.len()
221 }
222
223 pub fn context_length(&self) -> usize {
225 self.context_text.len()
226 }
227
228 pub fn average_relevance_score(&self) -> f64 {
230 if self.documents.is_empty() {
231 0.0
232 } else {
233 self.documents.iter().map(|d| d.score).sum::<f64>() / self.documents.len() as f64
234 }
235 }
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct ContextMetadata {
241 pub assembled_at: DateTime<Utc>,
243 pub source_diversity: usize,
245 pub topic_coverage: Vec<String>,
247 pub confidence_score: f64,
249}
250
251impl Default for ContextMetadata {
252 fn default() -> Self {
253 Self {
254 assembled_at: Utc::now(),
255 source_diversity: 0,
256 topic_coverage: Vec::new(),
257 confidence_score: 0.0,
258 }
259 }
260}
261
262#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct AssemblyStats {
265 pub assembly_time: Duration,
267 pub documents_processed: usize,
269 pub documents_selected: usize,
271 pub total_tokens: usize,
273 pub retrieval_method: String,
275}
276
277impl Default for AssemblyStats {
278 fn default() -> Self {
279 Self {
280 assembly_time: Duration::from_millis(0),
281 documents_processed: 0,
282 documents_selected: 0,
283 total_tokens: 0,
284 retrieval_method: "default".to_string(),
285 }
286 }
287}
288
289#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct RetrievalConfig {
292 pub max_documents: usize,
294 pub similarity_threshold: f64,
296 pub enable_reranking: bool,
298 pub reranking_model: Option<String>,
300 pub enable_temporal_filtering: bool,
302 pub temporal_window: Option<Duration>,
304}
305
306impl Default for RetrievalConfig {
307 fn default() -> Self {
308 Self {
309 max_documents: 20,
310 similarity_threshold: 0.7,
311 enable_reranking: true,
312 reranking_model: None,
313 enable_temporal_filtering: false,
314 temporal_window: None,
315 }
316 }
317}
318
319#[derive(Debug, Clone, Serialize, Deserialize)]
321pub struct AssemblyConfig {
322 pub max_context_tokens: usize,
324 pub context_overlap: usize,
326 pub prioritize_recent: bool,
328 pub enable_diversity: bool,
330 pub diversity_threshold: f64,
332}
333
334impl Default for AssemblyConfig {
335 fn default() -> Self {
336 Self {
337 max_context_tokens: 4000,
338 context_overlap: 200,
339 prioritize_recent: true,
340 enable_diversity: true,
341 diversity_threshold: 0.8,
342 }
343 }
344}