use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RagDocument {
pub id: String,
pub content: String,
pub metadata: HashMap<String, String>,
pub embedding: Option<Vec<f32>>,
pub timestamp: DateTime<Utc>,
pub source: String,
}
impl RagDocument {
pub fn new(id: String, content: String, source: String) -> Self {
Self {
id,
content,
source,
metadata: HashMap::new(),
embedding: None,
timestamp: Utc::now(),
}
}
pub fn with_metadata(mut self, key: String, value: String) -> Self {
self.metadata.insert(key, value);
self
}
pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
self.embedding = Some(embedding);
self
}
pub fn content_length(&self) -> usize {
self.content.len()
}
pub fn has_embedding(&self) -> bool {
self.embedding.is_some()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub document: RagDocument,
pub score: f64,
pub relevance_factors: Vec<String>,
}
impl SearchResult {
pub fn new(document: RagDocument, score: f64) -> Self {
Self {
document,
score,
relevance_factors: Vec::new(),
}
}
pub fn add_relevance_factor(mut self, factor: String) -> Self {
self.relevance_factors.push(factor);
self
}
}
pub type RetrievalResult = SearchResult;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryContext {
pub user_id: Option<String>,
pub session_id: String,
pub query: Option<String>,
pub intent: Option<QueryIntent>,
pub entities: Option<Vec<String>>,
pub conversation_history: Vec<ConversationMessage>,
pub domain_constraints: Vec<String>,
pub response_format: ResponseFormat,
pub max_response_length: usize,
pub query_intent: QueryIntent,
}
impl QueryContext {
pub fn new(session_id: String) -> Self {
Self {
user_id: None,
session_id,
query: None,
intent: None,
entities: None,
conversation_history: Vec::new(),
domain_constraints: Vec::new(),
response_format: ResponseFormat::Text,
max_response_length: 4000,
query_intent: QueryIntent::Information,
}
}
pub fn add_message(mut self, message: ConversationMessage) -> Self {
self.conversation_history.push(message);
self
}
pub fn with_domain_constraints(mut self, constraints: Vec<String>) -> Self {
self.domain_constraints = constraints;
self
}
pub fn with_intent(mut self, intent: QueryIntent) -> Self {
self.query_intent = intent;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConversationMessage {
pub role: MessageRole,
pub content: String,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MessageRole {
User,
Assistant,
System,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ResponseFormat {
Text,
Structured,
Code,
Table,
List,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum QueryIntent {
Information,
Navigation,
Transaction,
Comparison,
Explanation,
Discovery,
Relationship,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssembledContext {
pub documents: Vec<SearchResult>,
pub context_text: String,
pub metadata: ContextMetadata,
pub stats: AssemblyStats,
}
impl AssembledContext {
pub fn new(documents: Vec<SearchResult>, context_text: String) -> Self {
Self {
documents,
context_text,
metadata: ContextMetadata::default(),
stats: AssemblyStats::default(),
}
}
pub fn document_count(&self) -> usize {
self.documents.len()
}
pub fn context_length(&self) -> usize {
self.context_text.len()
}
pub fn average_relevance_score(&self) -> f64 {
if self.documents.is_empty() {
0.0
} else {
self.documents.iter().map(|d| d.score).sum::<f64>() / self.documents.len() as f64
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContextMetadata {
pub assembled_at: DateTime<Utc>,
pub source_diversity: usize,
pub topic_coverage: Vec<String>,
pub confidence_score: f64,
}
impl Default for ContextMetadata {
fn default() -> Self {
Self {
assembled_at: Utc::now(),
source_diversity: 0,
topic_coverage: Vec::new(),
confidence_score: 0.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssemblyStats {
pub assembly_time: Duration,
pub documents_processed: usize,
pub documents_selected: usize,
pub total_tokens: usize,
pub retrieval_method: String,
}
impl Default for AssemblyStats {
fn default() -> Self {
Self {
assembly_time: Duration::from_millis(0),
documents_processed: 0,
documents_selected: 0,
total_tokens: 0,
retrieval_method: "default".to_string(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalConfig {
pub max_documents: usize,
pub similarity_threshold: f64,
pub enable_reranking: bool,
pub reranking_model: Option<String>,
pub enable_temporal_filtering: bool,
pub temporal_window: Option<Duration>,
}
impl Default for RetrievalConfig {
fn default() -> Self {
Self {
max_documents: 20,
similarity_threshold: 0.7,
enable_reranking: true,
reranking_model: None,
enable_temporal_filtering: false,
temporal_window: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssemblyConfig {
pub max_context_tokens: usize,
pub context_overlap: usize,
pub prioritize_recent: bool,
pub enable_diversity: bool,
pub diversity_threshold: f64,
}
impl Default for AssemblyConfig {
fn default() -> Self {
Self {
max_context_tokens: 4000,
context_overlap: 200,
prioritize_recent: true,
enable_diversity: true,
diversity_threshold: 0.8,
}
}
}