llm_toolkit/retrieval.rs
1//! Data structures for Retrieval-Augmented Generation (RAG).
2//!
3//! This module provides core data structures for RAG integration.
4//! Retrieval and ingestion logic should be implemented as `Agent`s,
5//! not as separate traits, allowing for better composability and
6//! integration with the agent ecosystem.
7//!
8//! # Design Philosophy
9//!
10//! Instead of defining `Retriever` and `Ingestor` traits, implement
11//! retrieval and ingestion as regular agents:
12//!
13//! ## Retrieval Pattern
14//!
15//! Retrievers return `Vec<Document>` and can be composed with `RetrievalAwareAgent`:
16//!
17//! ```ignore
18//! // Retriever as Agent
19//! impl Agent for MyVectorStore {
20//! type Output = Vec<Document>;
21//! async fn execute(&self, payload: Payload) -> Result<Vec<Document>, AgentError> {
22//! let query = payload.to_text();
23//! // Perform semantic search...
24//! Ok(documents)
25//! }
26//! }
27//!
28//! // Compose with RetrievalAwareAgent
29//! let retriever = MyVectorStore::new();
30//! let base_agent = MyLLMAgent::new();
31//! let rag_agent = RetrievalAwareAgent::new(retriever, base_agent);
32//! ```
33//!
34//! ## Ingestion Pattern
35//!
36//! Ingest agent accept `Attachment`s from payload and handle all implementation details
37//! (upload, store creation, metadata management) internally:
38//!
39//! ```ignore
40//! use llm_toolkit::attachment::Attachment;
41//!
42//! // Gemini Files API style
43//! struct GeminiIngestAgent {
44//! client: GeminiClient,
45//! store_name: String, // Internal state
46//! }
47//!
48//! impl Agent for GeminiIngestAgent {
49//! type Output = IngestResult; // Can be any type
50//!
51//! async fn execute(&self, payload: Payload) -> Result<IngestResult, AgentError> {
52//! let attachments = payload.attachments();
53//! let mut file_names = Vec::new();
54//!
55//! for attachment in attachments {
56//! // 1. Upload file
57//! let file = self.client.files.upload(attachment).await?;
58//!
59//! // 2. Import into store (internal detail)
60//! self.client.stores.import_file(&self.store_name, &file.name).await?;
61//!
62//! file_names.push(file.name);
63//! }
64//!
65//! Ok(IngestResult { file_names })
66//! }
67//! }
68//!
69//! // Usage - just pass files
70//! let geminiIngestAgent = GeminiIngestAgent::new(client, "my-store");
71//! let payload = Payload::attachment(Attachment::local("document.pdf"));
72//! let result = geminiIngestAgent.execute(payload).await?;
73//! ```
74use serde::{Deserialize, Serialize};
75
76/// Represents a piece of retrieved content from a knowledge source.
77///
78/// This is typically returned by retriever agents (agents with `Output = Vec<Document>`).
79/// Documents can be attached to payloads and will be formatted by `PersonaAgent`
80/// into a "Retrieved Context" section in the prompt.
81///
82/// # Examples
83///
84/// ```rust
85/// use llm_toolkit::retrieval::Document;
86///
87/// let doc = Document {
88/// content: "Rust is a systems programming language.".to_string(),
89/// source: Some("rust_intro.md".to_string()),
90/// score: Some(0.92),
91/// };
92/// ```
93#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
94pub struct Document {
95 /// The textual content of the document
96 pub content: String,
97
98 /// Optional source identifier (e.g., file path, URL, document ID)
99 pub source: Option<String>,
100
101 /// Optional relevance or similarity score (higher = more relevant)
102 #[serde(skip_serializing_if = "Option::is_none")]
103 pub score: Option<f32>,
104}
105
106impl Document {
107 /// Creates a new document with the given content.
108 pub fn new(content: impl Into<String>) -> Self {
109 Self {
110 content: content.into(),
111 source: None,
112 score: None,
113 }
114 }
115
116 /// Sets the source identifier for this document.
117 pub fn with_source(mut self, source: impl Into<String>) -> Self {
118 self.source = Some(source.into());
119 self
120 }
121
122 /// Sets the relevance score for this document.
123 pub fn with_score(mut self, score: f32) -> Self {
124 self.score = Some(score);
125 self
126 }
127}