swiftide_core/
document.rs

1//! Documents are the main data structure that is retrieved via the query pipeline
2//!
3//! Retrievers are expected to eagerly set any configured metadata on the document, with the same
4//! field name used during indexing if applicable.
5use std::fmt;
6
7use derive_builder::Builder;
8use serde::{Deserialize, Serialize};
9
10use crate::{metadata::Metadata, util::debug_long_utf8};
11
12/// A document represents a single unit of retrieved text
13#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Builder)]
14#[builder(setter(into))]
15pub struct Document {
16    #[builder(default)]
17    metadata: Metadata,
18    content: String,
19}
20
21impl From<Document> for serde_json::Value {
22    fn from(document: Document) -> Self {
23        serde_json::json!({
24            "metadata": document.metadata,
25            "content": document.content,
26        })
27    }
28}
29
30impl From<&Document> for serde_json::Value {
31    fn from(document: &Document) -> Self {
32        serde_json::json!({
33            "metadata": document.metadata,
34            "content": document.content,
35        })
36    }
37}
38
39impl PartialOrd for Document {
40    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41        Some(self.cmp(other))
42    }
43}
44
45impl Ord for Document {
46    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
47        self.content.cmp(&other.content)
48    }
49}
50
51impl fmt::Debug for Document {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        f.debug_struct("Document")
54            .field("metadata", &self.metadata)
55            .field("content", &debug_long_utf8(&self.content, 100))
56            .finish()
57    }
58}
59
60impl<T: AsRef<str>> From<T> for Document {
61    fn from(value: T) -> Self {
62        Document::new(value.as_ref(), None)
63    }
64}
65
66impl Document {
67    pub fn new(content: impl Into<String>, metadata: Option<Metadata>) -> Self {
68        Self {
69            metadata: metadata.unwrap_or_default(),
70            content: content.into(),
71        }
72    }
73
74    pub fn builder() -> DocumentBuilder {
75        DocumentBuilder::default()
76    }
77
78    pub fn content(&self) -> &str {
79        &self.content
80    }
81
82    pub fn metadata(&self) -> &Metadata {
83        &self.metadata
84    }
85
86    pub fn bytes(&self) -> &[u8] {
87        self.content.as_bytes()
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94    use crate::metadata::Metadata;
95
96    #[test]
97    fn test_document_creation() {
98        let content = "Test content";
99        let metadata = Metadata::from([("some", "metadata")]);
100        let document = Document::new(content, Some(metadata.clone()));
101
102        assert_eq!(document.content(), content);
103        assert_eq!(document.metadata(), &metadata);
104    }
105
106    #[test]
107    fn test_document_default_metadata() {
108        let content = "Test content";
109        let document = Document::new(content, None);
110
111        assert_eq!(document.content(), content);
112        assert_eq!(document.metadata(), &Metadata::default());
113    }
114
115    #[test]
116    fn test_document_from_str() {
117        let content = "Test content";
118        let document: Document = content.into();
119
120        assert_eq!(document.content(), content);
121        assert_eq!(document.metadata(), &Metadata::default());
122    }
123
124    #[test]
125    fn test_document_partial_ord() {
126        let doc1 = Document::new("A", None);
127        let doc2 = Document::new("B", None);
128
129        assert!(doc1 < doc2);
130    }
131
132    #[test]
133    fn test_document_ord() {
134        let doc1 = Document::new("A", None);
135        let doc2 = Document::new("B", None);
136
137        assert!(doc1.cmp(&doc2) == std::cmp::Ordering::Less);
138    }
139
140    #[test]
141    fn test_document_debug() {
142        let content = "Test content";
143        let document = Document::new(content, None);
144        let debug_str = format!("{document:?}");
145
146        assert!(debug_str.contains("Document"));
147        assert!(debug_str.contains("metadata"));
148        assert!(debug_str.contains("content"));
149    }
150
151    #[test]
152    fn test_document_to_json() {
153        let content = "Test content";
154        let metadata = Metadata::from([("some", "metadata")]);
155        let document = Document::new(content, Some(metadata.clone()));
156        let json_value: serde_json::Value = document.into();
157
158        assert_eq!(json_value["content"], content);
159        assert_eq!(json_value["metadata"], serde_json::json!(metadata));
160    }
161
162    #[test]
163    fn test_document_ref_to_json() {
164        let content = "Test content";
165        let metadata = Metadata::from([("some", "metadata")]);
166        let document = Document::new(content, Some(metadata.clone()));
167        let json_value: serde_json::Value = (&document).into();
168
169        assert_eq!(json_value["content"], content);
170        assert_eq!(json_value["metadata"], serde_json::json!(metadata));
171    }
172}