Skip to main content

adk_rag/
document.rs

1//! Data types for documents, chunks, and search results.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A source document containing text content and metadata.
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9pub struct Document {
10    /// Unique identifier for the document.
11    pub id: String,
12    /// The text content of the document.
13    pub text: String,
14    /// Key-value metadata associated with the document.
15    pub metadata: HashMap<String, String>,
16    /// Optional URI pointing to the original source.
17    #[serde(skip_serializing_if = "Option::is_none")]
18    pub source_uri: Option<String>,
19}
20
21/// A segment of a [`Document`] with its vector embedding.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
23pub struct Chunk {
24    /// Unique identifier for the chunk.
25    pub id: String,
26    /// The text content of the chunk.
27    pub text: String,
28    /// The vector embedding for this chunk's text.
29    pub embedding: Vec<f32>,
30    /// Key-value metadata inherited from the parent document plus chunk-specific fields.
31    pub metadata: HashMap<String, String>,
32    /// The ID of the parent [`Document`].
33    pub document_id: String,
34}
35
36/// A retrieved [`Chunk`] paired with a relevance score.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SearchResult {
39    /// The retrieved chunk.
40    pub chunk: Chunk,
41    /// The similarity score (higher is more relevant).
42    pub score: f32,
43}