Skip to main content

cognis_rag/
document.rs

1//! `Document` — the unit of RAG: a piece of text plus typed metadata.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A piece of text that flows through loaders → splitters → embeddings →
8/// vector store → retrievers.
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
10pub struct Document {
11    /// Optional ID — set by stores once persisted, by loaders that have a
12    /// natural key (e.g. file path), or left `None` for ephemeral chunks.
13    #[serde(default, skip_serializing_if = "Option::is_none")]
14    pub id: Option<String>,
15
16    /// The text content. Loaders fill this from the source; splitters carve
17    /// it into chunks.
18    pub content: String,
19
20    /// Free-form metadata (e.g. `{ "source": "file.txt", "page": 3 }`).
21    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
22    pub metadata: HashMap<String, serde_json::Value>,
23}
24
25impl Document {
26    /// Construct from text only.
27    pub fn new(content: impl Into<String>) -> Self {
28        Self {
29            id: None,
30            content: content.into(),
31            metadata: HashMap::new(),
32        }
33    }
34
35    /// Set the ID (builder-style).
36    pub fn with_id(mut self, id: impl Into<String>) -> Self {
37        self.id = Some(id.into());
38        self
39    }
40
41    /// Insert one metadata key (builder-style).
42    pub fn with_metadata(
43        mut self,
44        key: impl Into<String>,
45        value: impl Into<serde_json::Value>,
46    ) -> Self {
47        self.metadata.insert(key.into(), value.into());
48        self
49    }
50
51    /// Replace metadata wholesale (builder-style).
52    pub fn with_metadata_map(mut self, m: HashMap<String, serde_json::Value>) -> Self {
53        self.metadata = m;
54        self
55    }
56}
57
58impl From<String> for Document {
59    fn from(s: String) -> Self {
60        Self::new(s)
61    }
62}
63
64impl From<&str> for Document {
65    fn from(s: &str) -> Self {
66        Self::new(s.to_string())
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn builder_chains() {
76        let d = Document::new("hello")
77            .with_id("doc-1")
78            .with_metadata("source", "file.txt");
79        assert_eq!(d.id.as_deref(), Some("doc-1"));
80        assert_eq!(d.content, "hello");
81        assert_eq!(d.metadata["source"], "file.txt");
82    }
83
84    #[test]
85    fn from_str_works() {
86        let d: Document = "hello".into();
87        assert_eq!(d.content, "hello");
88        assert!(d.metadata.is_empty());
89    }
90
91    #[test]
92    fn serde_skips_optional_when_empty() {
93        let d = Document::new("x");
94        let s = serde_json::to_string(&d).unwrap();
95        assert!(!s.contains("\"id\""));
96        assert!(!s.contains("\"metadata\""));
97    }
98}