Skip to main content

origin_types/
sources.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Document source types -- MemoryType enum, RawDocument, SourceType, SyncStatus.
3
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7/// Closed taxonomy of memory facets -- validated at API boundary.
8/// Stored as lowercase TEXT in SQLite.
9///
10/// Reduced from 8 to 5 types: Correction, Custom, Recap removed.
11/// - Correction -> Fact (corrections are just facts that update prior knowledge)
12/// - Custom -> Fact (catch-all absorbed into the most general type)
13/// - Recap -> is_recap flag on chunks table (orthogonal to memory type)
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "lowercase")]
16pub enum MemoryType {
17    Identity,
18    Preference,
19    Decision,
20    Fact,
21    Goal,
22}
23
24impl MemoryType {
25    /// All valid lowercase string values (5 canonical types).
26    pub fn all_values() -> &'static [&'static str] {
27        &["identity", "preference", "decision", "fact", "goal"]
28    }
29
30    /// Check if input is the "profile" high-level alias (case-insensitive).
31    /// Used by the store flow to detect when async LLM sub-classification is needed.
32    pub fn is_profile_alias(s: &str) -> bool {
33        s.eq_ignore_ascii_case("profile")
34    }
35}
36
37impl std::fmt::Display for MemoryType {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        let s = match self {
40            Self::Identity => "identity",
41            Self::Preference => "preference",
42            Self::Decision => "decision",
43            Self::Fact => "fact",
44            Self::Goal => "goal",
45        };
46        f.write_str(s)
47    }
48}
49
50impl std::str::FromStr for MemoryType {
51    type Err = String;
52    fn from_str(s: &str) -> Result<Self, Self::Err> {
53        match s.to_lowercase().as_str() {
54            "identity" => Ok(Self::Identity),
55            "preference" => Ok(Self::Preference),
56            "decision" => Ok(Self::Decision),
57            "fact" => Ok(Self::Fact),
58            "goal" => Ok(Self::Goal),
59            // High-level alias: "knowledge" maps directly to Fact
60            "knowledge" => Ok(Self::Fact),
61            // High-level alias: "profile" needs async LLM sub-classification
62            "profile" => Err(
63                "profile requires sub-classification into identity, preference, or goal -- use classify_memory_type".to_string()
64            ),
65            // Backward compat: removed types map to Fact
66            "correction" | "custom" | "recap" => Ok(Self::Fact),
67            _ => Err(format!(
68                "invalid memory_type '{}', valid values: {}",
69                s,
70                Self::all_values().join(", ")
71            )),
72        }
73    }
74}
75
76/// Stability tiers determine supersede behavior, confidence defaults, and retrieval decay.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum StabilityTier {
79    /// identity, preference -- supersede requires human confirmation
80    Protected,
81    /// fact, decision -- supersede auto-applies unconfirmed
82    Standard,
83    /// goal -- supersede auto-applies silently
84    Ephemeral,
85}
86
87/// Map a memory type string to its stability tier. NULL -> Ephemeral.
88pub fn stability_tier(memory_type: Option<&str>) -> StabilityTier {
89    match memory_type {
90        Some("identity") | Some("preference") => StabilityTier::Protected,
91        Some("fact") | Some("decision") => StabilityTier::Standard,
92        _ => StabilityTier::Ephemeral,
93    }
94}
95
96/// A raw document fetched from any source, ready for chunking and embedding.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct RawDocument {
99    /// Source identifier ("gmail", "notion", "local_files", etc.)
100    pub source: String,
101    /// Unique ID within the source (message ID, page ID, file path)
102    pub source_id: String,
103    /// Document title (filename, subject line, page title)
104    pub title: String,
105    /// LLM-generated summary (stored separately from chunk content)
106    pub summary: Option<String>,
107    /// Plain text content
108    pub content: String,
109    /// Deep link back to the source (URL, file path)
110    pub url: Option<String>,
111    /// Unix timestamp of last modification
112    pub last_modified: i64,
113    /// Additional metadata
114    pub metadata: HashMap<String, String>,
115
116    // --- Memory layer fields (all optional for backward compat) ---
117    /// Memory category: "preference", "decision", "fact", "goal", "relationship"
118    #[serde(default, skip_serializing_if = "Option::is_none")]
119    pub memory_type: Option<String>,
120    /// Domain context: "work", "personal", "health", or "project:<name>"
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub domain: Option<String>,
123    /// Which AI agent stored this memory (e.g. "claude-code", "chatgpt")
124    #[serde(default, skip_serializing_if = "Option::is_none")]
125    pub source_agent: Option<String>,
126    /// Confidence score (0.0-1.0) assigned by the storing agent
127    #[serde(default, skip_serializing_if = "Option::is_none")]
128    pub confidence: Option<f32>,
129    /// Whether a human has confirmed this memory
130    #[serde(default, skip_serializing_if = "Option::is_none")]
131    pub confirmed: Option<bool>,
132    /// Stability tier: "new", "learned", or "confirmed"
133    #[serde(default, skip_serializing_if = "Option::is_none")]
134    pub stability: Option<String>,
135    /// source_id of the memory this entry supersedes (version chain)
136    #[serde(default, skip_serializing_if = "Option::is_none")]
137    pub supersedes: Option<String>,
138    /// Whether this is a pending revision awaiting human approval (Protected tier supersede)
139    #[serde(default)]
140    pub pending_revision: bool,
141    /// Link to a knowledge graph entity (nullable, cascade handled manually)
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub entity_id: Option<String>,
144    /// Quality assessment: "low", "medium", "high" (NULL = unassessed)
145    #[serde(default, skip_serializing_if = "Option::is_none")]
146    pub quality: Option<String>,
147    /// Whether this memory is a recap/summary of other memories
148    #[serde(default)]
149    pub is_recap: bool,
150    /// Deprecated: enrichment status is now derived from the `enrichment_steps` table.
151    /// This field is ignored on INSERT. Kept for API compatibility with downstream consumers.
152    #[serde(default = "default_enrichment_status")]
153    pub enrichment_status: String,
154    /// How superseded content is handled: "hide" (default) or "archive" (visible but muted)
155    #[serde(default = "default_supersede_mode")]
156    pub supersede_mode: String,
157    /// JSON object with type-specific structured fields (e.g. {"claim": "...", "context": "..."})
158    #[serde(default, skip_serializing_if = "Option::is_none")]
159    pub structured_fields: Option<String>,
160    /// LLM-generated question this memory answers -- embedded for vector search
161    #[serde(default, skip_serializing_if = "Option::is_none")]
162    pub retrieval_cue: Option<String>,
163    /// Original prose content, preserved when structured_fields are promoted to primary content
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub source_text: Option<String>,
166}
167
168fn default_enrichment_status() -> String {
169    "raw".to_string()
170}
171
172fn default_supersede_mode() -> String {
173    "hide".to_string()
174}
175
176impl Default for RawDocument {
177    fn default() -> Self {
178        Self {
179            source: String::new(),
180            source_id: String::new(),
181            title: String::new(),
182            summary: None,
183            content: String::new(),
184            url: None,
185            last_modified: 0,
186            metadata: HashMap::new(),
187            memory_type: None,
188            domain: None,
189            source_agent: None,
190            confidence: None,
191            confirmed: None,
192            stability: None,
193            supersedes: None,
194            pending_revision: false,
195            entity_id: None,
196            quality: None,
197            is_recap: false,
198            enrichment_status: "raw".to_string(),
199            supersede_mode: "hide".to_string(),
200            structured_fields: None,
201            retrieval_cue: None,
202            source_text: None,
203        }
204    }
205}
206
207/// Persisted source type.
208#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
209#[serde(rename_all = "lowercase")]
210pub enum SourceType {
211    Obsidian,
212    Directory,
213}
214
215impl SourceType {
216    pub fn as_str(&self) -> &'static str {
217        match self {
218            Self::Obsidian => "obsidian",
219            Self::Directory => "directory",
220        }
221    }
222}
223
224/// Sync status for a connected source.
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub enum SyncStatus {
227    Active,
228    Paused,
229    Error(String),
230}