Skip to main content

context_core/document/
document.rs

1use serde::{Deserialize, Serialize};
2use thiserror::Error;
3
4use crate::types::identifiers::{DocumentId, DocumentVersion};
5use super::metadata::Metadata;
6
7#[derive(Debug, Error)]
8pub enum DocumentError {
9    #[error("Content must be valid UTF-8")]
10    InvalidUtf8(#[from] std::string::FromUtf8Error),
11}
12
13/// The atomic unit of content.
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct Document {
16    pub id: DocumentId,
17    pub version: DocumentVersion,
18    pub source: String,
19    pub content: String,
20    pub metadata: Metadata,
21}
22
23impl Document {
24    /// Ingest raw bytes into a Document.
25    ///
26    /// This is the ONLY way to construct a Document.
27    /// It enforces all invariants: validation, versioning, and immutability.
28    pub fn ingest(
29        id: DocumentId,
30        source: String,
31        raw_content: Vec<u8>,
32        metadata: Metadata,
33    ) -> Result<Self, DocumentError> {
34        let content = String::from_utf8(raw_content)?;
35
36        // Spec: Version computed on verified content
37        let version = DocumentVersion::from_content(content.as_bytes());
38
39        Ok(Document {
40            id,
41            version,
42            source,
43            content,
44            metadata,
45        })
46    }
47}