llm_toolkit/
retrieval.rs

1//! Data structures for Retrieval-Augmented Generation (RAG).
2//!
3//! This module provides core data structures for RAG integration.
4//! Retrieval and ingestion logic should be implemented as `Agent`s,
5//! not as separate traits, allowing for better composability and
6//! integration with the agent ecosystem.
7//!
8//! # Design Philosophy
9//!
10//! Instead of defining `Retriever` and `Ingestor` traits, implement
11//! retrieval and ingestion as regular agents:
12//!
13//! ## Retrieval Pattern
14//!
15//! Retrievers return `Vec<Document>` and can be composed with `RetrievalAwareAgent`:
16//!
17//! ```ignore
18//! // Retriever as Agent
19//! impl Agent for MyVectorStore {
20//!     type Output = Vec<Document>;
21//!     async fn execute(&self, payload: Payload) -> Result<Vec<Document>, AgentError> {
22//!         let query = payload.to_text();
23//!         // Perform semantic search...
24//!         Ok(documents)
25//!     }
26//! }
27//!
28//! // Compose with RetrievalAwareAgent
29//! let retriever = MyVectorStore::new();
30//! let base_agent = MyLLMAgent::new();
31//! let rag_agent = RetrievalAwareAgent::new(retriever, base_agent);
32//! ```
33//!
34//! ## Ingestion Pattern
35//!
36//! Ingest agent accept `Attachment`s from payload and handle all implementation details
37//! (upload, store creation, metadata management) internally:
38//!
39//! ```ignore
40//! use llm_toolkit::attachment::Attachment;
41//!
42//! // Gemini Files API style
43//! struct GeminiIngestAgent {
44//!     client: GeminiClient,
45//!     store_name: String,  // Internal state
46//! }
47//!
48//! impl Agent for GeminiIngestAgent {
49//!     type Output = IngestResult;  // Can be any type
50//!
51//!     async fn execute(&self, payload: Payload) -> Result<IngestResult, AgentError> {
52//!         let attachments = payload.attachments();
53//!         let mut file_names = Vec::new();
54//!
55//!         for attachment in attachments {
56//!             // 1. Upload file
57//!             let file = self.client.files.upload(attachment).await?;
58//!
59//!             // 2. Import into store (internal detail)
60//!             self.client.stores.import_file(&self.store_name, &file.name).await?;
61//!
62//!             file_names.push(file.name);
63//!         }
64//!
65//!         Ok(IngestResult { file_names })
66//!     }
67//! }
68//!
69//! // Usage - just pass files
70//! let geminiIngestAgent = GeminiIngestAgent::new(client, "my-store");
71//! let payload = Payload::attachment(Attachment::local("document.pdf"));
72//! let result = geminiIngestAgent.execute(payload).await?;
73//! ```
74use serde::{Deserialize, Serialize};
75
76/// Represents a piece of retrieved content from a knowledge source.
77///
78/// This is typically returned by retriever agents (agents with `Output = Vec<Document>`).
79/// Documents can be attached to payloads and will be formatted by `PersonaAgent`
80/// into a "Retrieved Context" section in the prompt.
81///
82/// # Examples
83///
84/// ```rust
85/// use llm_toolkit::retrieval::Document;
86///
87/// let doc = Document {
88///     content: "Rust is a systems programming language.".to_string(),
89///     source: Some("rust_intro.md".to_string()),
90///     score: Some(0.92),
91/// };
92/// ```
93#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
94pub struct Document {
95    /// The textual content of the document
96    pub content: String,
97
98    /// Optional source identifier (e.g., file path, URL, document ID)
99    pub source: Option<String>,
100
101    /// Optional relevance or similarity score (higher = more relevant)
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub score: Option<f32>,
104}
105
106impl Document {
107    /// Creates a new document with the given content.
108    pub fn new(content: impl Into<String>) -> Self {
109        Self {
110            content: content.into(),
111            source: None,
112            score: None,
113        }
114    }
115
116    /// Sets the source identifier for this document.
117    pub fn with_source(mut self, source: impl Into<String>) -> Self {
118        self.source = Some(source.into());
119        self
120    }
121
122    /// Sets the relevance score for this document.
123    pub fn with_score(mut self, score: f32) -> Self {
124        self.score = Some(score);
125        self
126    }
127}