adk_rag/document.rs
1//! Data types for documents, chunks, and search results.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A source document containing text content and metadata.
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9pub struct Document {
10 /// Unique identifier for the document.
11 pub id: String,
12 /// The text content of the document.
13 pub text: String,
14 /// Key-value metadata associated with the document.
15 pub metadata: HashMap<String, String>,
16 /// Optional URI pointing to the original source.
17 #[serde(skip_serializing_if = "Option::is_none")]
18 pub source_uri: Option<String>,
19}
20
21/// A segment of a [`Document`] with its vector embedding.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
23pub struct Chunk {
24 /// Unique identifier for the chunk.
25 pub id: String,
26 /// The text content of the chunk.
27 pub text: String,
28 /// The vector embedding for this chunk's text.
29 pub embedding: Vec<f32>,
30 /// Key-value metadata inherited from the parent document plus chunk-specific fields.
31 pub metadata: HashMap<String, String>,
32 /// The ID of the parent [`Document`].
33 pub document_id: String,
34}
35
36/// A retrieved [`Chunk`] paired with a relevance score.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SearchResult {
39 /// The retrieved chunk.
40 pub chunk: Chunk,
41 /// The similarity score (higher is more relevant).
42 pub score: f32,
43}