swiftide_core/
document.rs

1//! Documents are the main data structure that is retrieved via the query pipeline
2//!
3//! Retrievers are expected to eagerly set any configured metadata on the document, with the same
4//! field name used during indexing if applicable.
5use std::fmt;
6
7use derive_builder::Builder;
8use serde::{Deserialize, Serialize};
9
10use crate::{metadata::Metadata, util::debug_long_utf8};
11
12/// A document represents a single unit of retrieved text
13#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Builder)]
14#[builder(setter(into))]
15pub struct Document {
16    #[builder(default)]
17    metadata: Metadata,
18    content: String,
19}
20
21impl From<Document> for serde_json::Value {
22    fn from(document: Document) -> Self {
23        serde_json::json!({
24            "metadata": document.metadata,
25            "content": document.content,
26        })
27    }
28}
29
30impl From<&Document> for serde_json::Value {
31    fn from(document: &Document) -> Self {
32        serde_json::json!({
33            "metadata": document.metadata,
34            "content": document.content,
35        })
36    }
37}
38
39impl PartialOrd for Document {
40    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41        Some(self.content.cmp(&other.content))
42    }
43}
44
45impl Ord for Document {
46    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
47        self.content.cmp(&other.content)
48    }
49}
50
51impl fmt::Debug for Document {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        f.debug_struct("Document")
54            .field("metadata", &self.metadata)
55            .field("content", &debug_long_utf8(&self.content, 100))
56            .finish()
57    }
58}
59
60impl<T: AsRef<str>> From<T> for Document {
61    fn from(value: T) -> Self {
62        Document::new(value.as_ref(), None)
63    }
64}
65
66impl Document {
67    pub fn new(content: impl Into<String>, metadata: Option<Metadata>) -> Self {
68        Self {
69            metadata: metadata.unwrap_or_default(),
70            content: content.into(),
71        }
72    }
73
74    pub fn builder() -> DocumentBuilder {
75        DocumentBuilder::default()
76    }
77
78    pub fn content(&self) -> &str {
79        &self.content
80    }
81
82    pub fn metadata(&self) -> &Metadata {
83        &self.metadata
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90    use crate::metadata::Metadata;
91
92    #[test]
93    fn test_document_creation() {
94        let content = "Test content";
95        let metadata = Metadata::from([("some", "metadata")]);
96        let document = Document::new(content, Some(metadata.clone()));
97
98        assert_eq!(document.content(), content);
99        assert_eq!(document.metadata(), &metadata);
100    }
101
102    #[test]
103    fn test_document_default_metadata() {
104        let content = "Test content";
105        let document = Document::new(content, None);
106
107        assert_eq!(document.content(), content);
108        assert_eq!(document.metadata(), &Metadata::default());
109    }
110
111    #[test]
112    fn test_document_from_str() {
113        let content = "Test content";
114        let document: Document = content.into();
115
116        assert_eq!(document.content(), content);
117        assert_eq!(document.metadata(), &Metadata::default());
118    }
119
120    #[test]
121    fn test_document_partial_ord() {
122        let doc1 = Document::new("A", None);
123        let doc2 = Document::new("B", None);
124
125        assert!(doc1 < doc2);
126    }
127
128    #[test]
129    fn test_document_ord() {
130        let doc1 = Document::new("A", None);
131        let doc2 = Document::new("B", None);
132
133        assert!(doc1.cmp(&doc2) == std::cmp::Ordering::Less);
134    }
135
136    #[test]
137    fn test_document_debug() {
138        let content = "Test content";
139        let document = Document::new(content, None);
140        let debug_str = format!("{document:?}");
141
142        assert!(debug_str.contains("Document"));
143        assert!(debug_str.contains("metadata"));
144        assert!(debug_str.contains("content"));
145    }
146
147    #[test]
148    fn test_document_to_json() {
149        let content = "Test content";
150        let metadata = Metadata::from([("some", "metadata")]);
151        let document = Document::new(content, Some(metadata.clone()));
152        let json_value: serde_json::Value = document.into();
153
154        assert_eq!(json_value["content"], content);
155        assert_eq!(json_value["metadata"], serde_json::json!(metadata));
156    }
157
158    #[test]
159    fn test_document_ref_to_json() {
160        let content = "Test content";
161        let metadata = Metadata::from([("some", "metadata")]);
162        let document = Document::new(content, Some(metadata.clone()));
163        let json_value: serde_json::Value = (&document).into();
164
165        assert_eq!(json_value["content"], content);
166        assert_eq!(json_value["metadata"], serde_json::json!(metadata));
167    }
168}