1#![deny(missing_docs)]
68#![deny(clippy::all)]
69#![warn(clippy::pedantic)]
70#![allow(clippy::missing_errors_doc)]
71#![allow(clippy::missing_panics_doc)]
72#![allow(clippy::module_name_repetitions)]
73#![allow(clippy::cast_precision_loss)]
74#![allow(clippy::cast_possible_truncation)]
75#![allow(clippy::doc_markdown)]
76#![allow(clippy::map_unwrap_or)]
77#![allow(clippy::redundant_closure_for_method_calls)]
78#![allow(clippy::unnecessary_literal_bound)]
79#![allow(clippy::cloned_instead_of_copied)]
80#![allow(clippy::must_use_candidate)]
81#![allow(clippy::assigning_clones)]
82#![allow(clippy::manual_div_ceil)]
83#![allow(clippy::unnecessary_map_or)]
84#![allow(clippy::derivable_impls)]
85
86pub mod chunk;
87#[cfg(feature = "compression")]
88pub mod compressed;
89pub mod embed;
90pub mod error;
91pub mod fusion;
92pub mod index;
93pub mod metrics;
94pub mod pipeline;
95pub mod rerank;
96pub mod retrieve;
97
98pub use chunk::{
99 Chunk, ChunkId, ChunkMetadata, Chunker, ChunkingStrategy, FixedSizeChunker, ParagraphChunker,
100 RecursiveChunker, SemanticChunker, SentenceChunker, StructuralChunker,
101};
102#[cfg(feature = "compression")]
103pub use compressed::Compression;
104pub use embed::{Embedder, EmbeddingConfig, PoolingStrategy};
105#[cfg(feature = "embeddings")]
106pub use embed::{EmbeddingModelType, FastEmbedder};
107pub use error::{Error, Result};
108pub use fusion::FusionStrategy;
109pub use index::{BM25Index, SparseIndex, VectorStore};
110pub use metrics::{AggregatedMetrics, RetrievalMetrics};
111pub use pipeline::{ContextAssembler, RagPipeline};
112pub use rerank::Reranker;
113pub use retrieve::{HybridRetriever, RetrievalResult};
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
117pub struct DocumentId(pub uuid::Uuid);
118
119impl DocumentId {
120 #[must_use]
122 pub fn new() -> Self {
123 Self(uuid::Uuid::new_v4())
124 }
125}
126
127impl Default for DocumentId {
128 fn default() -> Self {
129 Self::new()
130 }
131}
132
133impl std::fmt::Display for DocumentId {
134 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135 write!(f, "{}", self.0)
136 }
137}
138
139#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
141pub struct Document {
142 pub id: DocumentId,
144 pub content: String,
146 pub title: Option<String>,
148 pub source: Option<String>,
150 pub metadata: std::collections::HashMap<String, serde_json::Value>,
152}
153
154impl Document {
155 #[must_use]
157 pub fn new(content: impl Into<String>) -> Self {
158 Self {
159 id: DocumentId::new(),
160 content: content.into(),
161 title: None,
162 source: None,
163 metadata: std::collections::HashMap::new(),
164 }
165 }
166
167 #[must_use]
169 pub fn with_title(mut self, title: impl Into<String>) -> Self {
170 self.title = Some(title.into());
171 self
172 }
173
174 #[must_use]
176 pub fn with_source(mut self, source: impl Into<String>) -> Self {
177 self.source = Some(source.into());
178 self
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_document_id_unique() {
188 let id1 = DocumentId::new();
189 let id2 = DocumentId::new();
190 assert_ne!(id1, id2);
191 }
192
193 #[test]
194 fn test_document_creation() {
195 let doc = Document::new("Hello, world!");
196 assert_eq!(doc.content, "Hello, world!");
197 assert!(doc.title.is_none());
198 assert!(doc.source.is_none());
199 }
200
201 #[test]
202 fn test_document_builder() {
203 let doc = Document::new("Content")
204 .with_title("Test Title")
205 .with_source("https://example.com");
206
207 assert_eq!(doc.content, "Content");
208 assert_eq!(doc.title, Some("Test Title".to_string()));
209 assert_eq!(doc.source, Some("https://example.com".to_string()));
210 }
211
212 #[test]
213 fn test_document_id_display() {
214 let id = DocumentId::new();
215 let display = format!("{id}");
216 assert!(!display.is_empty());
217 assert!(display.contains('-')); }
219
220 #[test]
221 fn test_document_id_serialization() {
222 let id = DocumentId::new();
223 let json = serde_json::to_string(&id).unwrap();
224 let deserialized: DocumentId = serde_json::from_str(&json).unwrap();
225 assert_eq!(id, deserialized);
226 }
227}