context_core/document/
document.rs1use serde::{Deserialize, Serialize};
2use thiserror::Error;
3
4use crate::types::identifiers::{DocumentId, DocumentVersion};
5use super::metadata::Metadata;
6
7#[derive(Debug, Error)]
8pub enum DocumentError {
9 #[error("Content must be valid UTF-8")]
10 InvalidUtf8(#[from] std::string::FromUtf8Error),
11}
12
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct Document {
16 pub id: DocumentId,
17 pub version: DocumentVersion,
18 pub source: String,
19 pub content: String,
20 pub metadata: Metadata,
21}
22
23impl Document {
24 pub fn ingest(
29 id: DocumentId,
30 source: String,
31 raw_content: Vec<u8>,
32 metadata: Metadata,
33 ) -> Result<Self, DocumentError> {
34 let content = String::from_utf8(raw_content)?;
35
36 let version = DocumentVersion::from_content(content.as_bytes());
38
39 Ok(Document {
40 id,
41 version,
42 source,
43 content,
44 metadata,
45 })
46 }
47}