Skip to main content

shodh_memory/mif/
export.rs

1//! Streaming export from shodh internals to MifDocument.
2//!
3//! Converts internal Memory, Todo, Project, Reminder, and Graph data
4//! into the vendor-neutral MIF v2 schema. Entity types are preserved
5//! by looking up EntityNode labels from the knowledge graph.
6
7use std::collections::HashMap;
8
9use anyhow::Result;
10use chrono::{DateTime, Utc};
11use sha2::{Digest, Sha256};
12use uuid::Uuid;
13
14use crate::graph_memory::{EntityLabel, GraphMemory, RelationshipEdge};
15use crate::memory::types::{
16    ExperienceType, Memory, ProspectiveTask, ProspectiveTrigger, SourceType, Todo, TodoPriority,
17    TodoStatus,
18};
19use crate::memory::Project;
20
21use super::pii::PiiPatterns;
22use super::schema::*;
23
24/// Options controlling what gets exported.
25#[derive(Debug, Clone)]
26pub struct ExportOptions {
27    pub user_id: String,
28    pub include_embeddings: bool,
29    pub include_graph: bool,
30    pub redact_pii: bool,
31    pub since: Option<DateTime<Utc>>,
32}
33
34/// Build a complete MIF v2 document from shodh internals.
35///
36/// This function takes pre-fetched data (memories, graph, todos, etc.) so the
37/// caller can control locking and streaming. Entity types are resolved from the
38/// graph when available; otherwise fall back to "unknown".
39pub fn build_document(
40    memories: &[std::sync::Arc<Memory>],
41    graph: Option<&GraphMemory>,
42    todos: &[Todo],
43    projects: &[Project],
44    reminders: &[ProspectiveTask],
45    options: &ExportOptions,
46) -> Result<MifDocument> {
47    let pii = if options.redact_pii {
48        Some(PiiPatterns::new())
49    } else {
50        None
51    };
52
53    let mut privacy = MifPrivacy {
54        pii_detected: false,
55        secrets_detected: false,
56        redacted_fields: Vec::new(),
57    };
58
59    // Build entity lookup maps from graph for type resolution.
60    // Two maps: UUID-based (primary) and name-based (fallback), because
61    // entity_refs on memories may store UUIDs that don't match graph entity UUIDs
62    // when the NER pipeline created refs before the graph entity was consolidated.
63    let (entity_map_by_id, entity_map_by_name): (
64        HashMap<Uuid, Vec<String>>,
65        HashMap<String, Vec<String>>,
66    ) = if let Some(g) = graph {
67        let entities = g.get_all_entities().unwrap_or_default();
68        let by_id: HashMap<Uuid, Vec<String>> = entities
69            .iter()
70            .map(|e| {
71                let types: Vec<String> = e.labels.iter().map(label_to_string).collect();
72                (e.uuid, types)
73            })
74            .collect();
75        let by_name: HashMap<String, Vec<String>> = entities
76            .iter()
77            .map(|e| {
78                let types: Vec<String> = e.labels.iter().map(label_to_string).collect();
79                (e.name.to_lowercase(), types)
80            })
81            .collect();
82        (by_id, by_name)
83    } else {
84        (HashMap::new(), HashMap::new())
85    };
86
87    // Convert memories
88    let mut mif_memories = Vec::with_capacity(memories.len());
89    let mut vendor_memory_meta: HashMap<String, serde_json::Value> = HashMap::new();
90
91    for m in memories {
92        if let Some(ref since) = options.since {
93            if m.created_at < *since {
94                continue;
95            }
96        }
97
98        let (content, _redactions) = if let Some(ref patterns) = pii {
99            let (redacted, records, found) = patterns.redact(&m.experience.content);
100            if found {
101                privacy.pii_detected = true;
102                if patterns.has_secrets(&m.experience.content) {
103                    privacy.secrets_detected = true;
104                }
105                for r in &records {
106                    if !privacy.redacted_fields.contains(&r.redaction_type) {
107                        privacy.redacted_fields.push(r.redaction_type.clone());
108                    }
109                }
110            }
111            (
112                redacted,
113                if records.is_empty() {
114                    None
115                } else {
116                    Some(records)
117                },
118            )
119        } else {
120            (m.experience.content.clone(), None)
121        };
122
123        // Resolve entity types from graph (UUID lookup, then name fallback)
124        let entities: Vec<MifEntityRef> = m
125            .entity_refs
126            .iter()
127            .map(|eref| {
128                let entity_type = entity_map_by_id
129                    .get(&eref.entity_id)
130                    .and_then(|types| types.first().cloned())
131                    .or_else(|| {
132                        entity_map_by_name
133                            .get(&eref.name.to_lowercase())
134                            .and_then(|types| types.first().cloned())
135                    })
136                    .unwrap_or_else(|| "unknown".to_string());
137                MifEntityRef {
138                    name: eref.name.clone(),
139                    entity_type,
140                    confidence: 1.0,
141                }
142            })
143            .collect();
144
145        // Also include experience.entities that didn't make it to entity_refs,
146        // resolving types from the graph name map when possible.
147        let ref_names: std::collections::HashSet<&str> =
148            m.entity_refs.iter().map(|r| r.name.as_str()).collect();
149        let mut extra_entities: Vec<MifEntityRef> = m
150            .experience
151            .entities
152            .iter()
153            .filter(|e| !ref_names.contains(e.as_str()))
154            .map(|e| {
155                let entity_type = entity_map_by_name
156                    .get(&e.to_lowercase())
157                    .and_then(|types| types.first().cloned())
158                    .unwrap_or_else(|| "unknown".to_string());
159                MifEntityRef {
160                    name: e.clone(),
161                    entity_type,
162                    confidence: 0.8,
163                }
164            })
165            .collect();
166
167        let mut all_entities = entities;
168        all_entities.append(&mut extra_entities);
169
170        let embeddings = if options.include_embeddings {
171            m.experience.embeddings.as_ref().map(|v| MifEmbedding {
172                model: "minilm-l6-v2".to_string(),
173                dimensions: v.len(),
174                vector: v.clone(),
175                normalized: true,
176            })
177        } else {
178            None
179        };
180
181        let (source_type, session_id) = m
182            .experience
183            .context
184            .as_ref()
185            .map(|ctx| {
186                let src = source_type_to_string(&ctx.source.source_type);
187                let sess = ctx.episode.episode_id.clone();
188                (src, sess)
189            })
190            .unwrap_or_else(|| ("unknown".to_string(), None));
191
192        let agent_name = m
193            .experience
194            .context
195            .as_ref()
196            .and_then(|ctx| ctx.source.source_id.clone());
197
198        let tags: Vec<String> = m.experience.tags.clone();
199
200        let memory_type = experience_type_to_string(&m.experience.experience_type);
201
202        let related_memory_ids: Vec<Uuid> = m
203            .experience
204            .related_memories
205            .iter()
206            .map(|id| id.0)
207            .collect();
208        let related_todo_ids: Vec<Uuid> = m.related_todo_ids.iter().map(|id| id.0).collect();
209
210        mif_memories.push(MifMemory {
211            id: m.id.0,
212            content,
213            memory_type,
214            created_at: m.created_at,
215            tags,
216            entities: all_entities,
217            metadata: m.experience.metadata.clone(),
218            embeddings,
219            source: Some(MifSource {
220                source_type,
221                session_id,
222                agent: agent_name,
223            }),
224            parent_id: m.parent_id.as_ref().map(|p| p.0),
225            related_memory_ids,
226            related_todo_ids,
227            agent_id: m.agent_id.clone(),
228            external_id: m.external_id.clone(),
229            version: m.version,
230        });
231
232        // Vendor extension: shodh-specific metadata per memory
233        vendor_memory_meta.insert(
234            m.id.0.to_string(),
235            serde_json::json!({
236                "importance": m.importance(),
237                "access_count": m.access_count(),
238                "tier": format!("{:?}", m.tier).to_lowercase(),
239                "activation": m.importance(), // activation approximated by importance
240                "last_accessed": m.last_accessed().to_rfc3339(),
241            }),
242        );
243    }
244
245    // Convert knowledge graph
246    let knowledge_graph = if options.include_graph {
247        if let Some(g) = graph {
248            Some(build_knowledge_graph(g)?)
249        } else {
250            None
251        }
252    } else {
253        None
254    };
255
256    // Convert todos
257    let mif_todos: Vec<MifTodo> = todos.iter().map(convert_todo).collect();
258
259    // Convert projects
260    let mif_projects: Vec<MifProject> = projects.iter().map(convert_project).collect();
261
262    // Convert reminders
263    let mif_reminders: Vec<MifReminder> = reminders.iter().map(convert_reminder).collect();
264
265    // Build vendor extensions
266    let mut vendor_extensions: HashMap<String, serde_json::Value> = HashMap::new();
267    let mut edge_metadata: HashMap<String, serde_json::Value> = HashMap::new();
268
269    if let Some(g) = graph {
270        for edge in g.get_all_relationships().unwrap_or_default() {
271            edge_metadata.insert(
272                edge.uuid.to_string(),
273                serde_json::json!({
274                    "strength": edge.strength,
275                    "ltp_status": format!("{:?}", edge.ltp_status),
276                    "tier": format!("{:?}", edge.tier),
277                    "activation_count": edge.activation_count,
278                    "last_activated": edge.last_activated.to_rfc3339(),
279                }),
280            );
281        }
282    }
283
284    vendor_extensions.insert(
285        "shodh-memory".to_string(),
286        serde_json::json!({
287            "version": env!("CARGO_PKG_VERSION"),
288            "memory_metadata": vendor_memory_meta,
289            "edge_metadata": edge_metadata,
290        }),
291    );
292
293    // Build checksum
294    let mut hasher = Sha256::new();
295    hasher.update(format!(
296        "{}:{}:{}:{}",
297        mif_memories.len(),
298        mif_todos.len(),
299        mif_projects.len(),
300        mif_reminders.len()
301    ));
302    let checksum = format!("sha256:{}", hex::encode(hasher.finalize()));
303
304    let now = Utc::now();
305    let export_id = Uuid::new_v4().to_string();
306
307    Ok(MifDocument {
308        mif_version: "2.0".to_string(),
309        generator: MifGenerator {
310            name: "shodh-memory".to_string(),
311            version: env!("CARGO_PKG_VERSION").to_string(),
312        },
313        export_meta: MifExportMeta {
314            id: export_id,
315            created_at: now,
316            user_id: options.user_id.clone(),
317            checksum,
318            privacy: Some(privacy),
319        },
320        memories: mif_memories,
321        knowledge_graph,
322        todos: mif_todos,
323        projects: mif_projects,
324        reminders: mif_reminders,
325        vendor_extensions,
326    })
327}
328
329fn build_knowledge_graph(graph: &GraphMemory) -> Result<MifKnowledgeGraph> {
330    let entities = graph
331        .get_all_entities()
332        .unwrap_or_default()
333        .into_iter()
334        .map(|e| MifGraphEntity {
335            id: e.uuid,
336            name: e.name,
337            types: e.labels.iter().map(label_to_string).collect(),
338            attributes: e.attributes,
339            summary: e.summary,
340            created_at: e.created_at,
341            last_seen_at: e.last_seen_at,
342        })
343        .collect();
344
345    let relationships = graph
346        .get_all_relationships()
347        .unwrap_or_default()
348        .into_iter()
349        .map(|e| convert_relationship(&e))
350        .collect();
351
352    Ok(MifKnowledgeGraph {
353        entities,
354        relationships,
355        episodes: Vec::new(), // Episodes exported via entity_ids references
356    })
357}
358
359fn convert_relationship(edge: &RelationshipEdge) -> MifGraphRelationship {
360    let confidence = Some(edge.strength.clamp(0.0, 1.0));
361    MifGraphRelationship {
362        id: edge.uuid,
363        source_entity_id: edge.from_entity,
364        target_entity_id: edge.to_entity,
365        relation_type: relation_type_to_string(&edge.relation_type),
366        context: edge.context.clone(),
367        confidence,
368        created_at: edge.created_at,
369        valid_at: edge.valid_at,
370        invalidated_at: edge.invalidated_at,
371    }
372}
373
374fn convert_todo(t: &Todo) -> MifTodo {
375    let comments: Vec<MifTodoComment> = t
376        .comments
377        .iter()
378        .map(|c| MifTodoComment {
379            id: c.id.0,
380            content: c.content.clone(),
381            comment_type: format!("{:?}", c.comment_type).to_lowercase(),
382            created_at: c.created_at,
383            author: Some(c.author.clone()),
384        })
385        .collect();
386
387    MifTodo {
388        id: t.id.0,
389        content: t.content.clone(),
390        status: todo_status_to_string(&t.status),
391        priority: todo_priority_to_string(&t.priority),
392        created_at: t.created_at,
393        updated_at: t.updated_at,
394        due_date: t.due_date,
395        completed_at: t.completed_at,
396        project_id: t.project_id.as_ref().map(|p| p.0),
397        parent_id: t.parent_id.as_ref().map(|p| p.0),
398        tags: t.tags.clone(),
399        contexts: t.contexts.clone(),
400        notes: t.notes.clone(),
401        blocked_on: t.blocked_on.clone(),
402        recurrence: t
403            .recurrence
404            .as_ref()
405            .map(|r| format!("{r:?}").to_lowercase()),
406        comments,
407        related_memory_ids: t.related_memory_ids.iter().map(|id| id.0).collect(),
408        external_id: t.external_id.clone(),
409    }
410}
411
412fn convert_project(p: &Project) -> MifProject {
413    MifProject {
414        id: p.id.0,
415        name: p.name.clone(),
416        prefix: p.prefix.clone().unwrap_or_default(),
417        description: p.description.clone(),
418        status: format!("{:?}", p.status).to_lowercase(),
419        created_at: p.created_at,
420        color: p.color.clone(),
421        icon: None,
422    }
423}
424
425fn convert_reminder(r: &ProspectiveTask) -> MifReminder {
426    let trigger = match &r.trigger {
427        ProspectiveTrigger::AtTime { at } => MifTrigger::Time { at: *at },
428        ProspectiveTrigger::AfterDuration { seconds, from } => MifTrigger::Duration {
429            seconds: *seconds,
430            from: *from,
431        },
432        ProspectiveTrigger::OnContext {
433            keywords,
434            threshold,
435        } => MifTrigger::Context {
436            keywords: keywords.clone(),
437            threshold: *threshold,
438        },
439    };
440
441    MifReminder {
442        id: r.id.0,
443        content: r.content.clone(),
444        trigger,
445        status: format!("{:?}", r.status).to_lowercase(),
446        priority: r.priority,
447        tags: r.tags.clone(),
448        created_at: r.created_at,
449        triggered_at: r.triggered_at,
450        dismissed_at: r.dismissed_at,
451    }
452}
453
454// =============================================================================
455// STRING CONVERSION HELPERS
456// =============================================================================
457
458fn label_to_string(label: &EntityLabel) -> String {
459    match label {
460        EntityLabel::Person => "person".to_string(),
461        EntityLabel::Organization => "organization".to_string(),
462        EntityLabel::Location => "location".to_string(),
463        EntityLabel::Technology => "technology".to_string(),
464        EntityLabel::Concept => "concept".to_string(),
465        EntityLabel::Event => "event".to_string(),
466        EntityLabel::Date => "date".to_string(),
467        EntityLabel::Product => "product".to_string(),
468        EntityLabel::Skill => "skill".to_string(),
469        EntityLabel::Keyword => "keyword".to_string(),
470        EntityLabel::Other(s) => s.to_lowercase(),
471    }
472}
473
474pub(crate) fn experience_type_to_string(t: &ExperienceType) -> String {
475    match t {
476        ExperienceType::Observation => "observation",
477        ExperienceType::Decision => "decision",
478        ExperienceType::Learning => "learning",
479        ExperienceType::Error => "error",
480        ExperienceType::Discovery => "discovery",
481        ExperienceType::Pattern => "pattern",
482        ExperienceType::Context => "context",
483        ExperienceType::Task => "task",
484        ExperienceType::CodeEdit => "code_edit",
485        ExperienceType::FileAccess => "file_access",
486        ExperienceType::Search => "search",
487        ExperienceType::Command => "command",
488        ExperienceType::Conversation => "conversation",
489        ExperienceType::Intention => "intention",
490    }
491    .to_string()
492}
493
494fn source_type_to_string(s: &SourceType) -> String {
495    match s {
496        SourceType::User => "user",
497        SourceType::System => "system",
498        SourceType::ExternalApi => "api",
499        SourceType::File => "file",
500        SourceType::Web => "web",
501        SourceType::AiGenerated => "ai_generated",
502        SourceType::Inferred => "inferred",
503        SourceType::Unknown => "unknown",
504    }
505    .to_string()
506}
507
508fn relation_type_to_string(r: &crate::graph_memory::RelationType) -> String {
509    use crate::graph_memory::RelationType;
510    match r {
511        RelationType::WorksWith => "works_with",
512        RelationType::WorksAt => "works_at",
513        RelationType::EmployedBy => "employed_by",
514        RelationType::PartOf => "part_of",
515        RelationType::Contains => "contains",
516        RelationType::OwnedBy => "owned_by",
517        RelationType::LocatedIn => "located_in",
518        RelationType::LocatedAt => "located_at",
519        RelationType::Uses => "uses",
520        RelationType::CreatedBy => "created_by",
521        RelationType::DevelopedBy => "developed_by",
522        RelationType::Causes => "causes",
523        RelationType::ResultsIn => "results_in",
524        RelationType::Learned => "learned",
525        RelationType::Knows => "knows",
526        RelationType::Teaches => "teaches",
527        RelationType::RelatedTo => "related_to",
528        RelationType::AssociatedWith => "associated_with",
529        RelationType::CoRetrieved => "co_retrieved",
530        RelationType::CoOccurs => "co_occurs",
531        RelationType::Custom(s) => return s.to_lowercase(),
532    }
533    .to_string()
534}
535
536fn todo_status_to_string(s: &TodoStatus) -> String {
537    match s {
538        TodoStatus::Backlog => "backlog",
539        TodoStatus::Todo => "todo",
540        TodoStatus::InProgress => "in_progress",
541        TodoStatus::Blocked => "blocked",
542        TodoStatus::Done => "done",
543        TodoStatus::Cancelled => "cancelled",
544    }
545    .to_string()
546}
547
548fn todo_priority_to_string(p: &TodoPriority) -> String {
549    match p {
550        TodoPriority::Urgent => "urgent",
551        TodoPriority::High => "high",
552        TodoPriority::Medium => "medium",
553        TodoPriority::Low => "low",
554        TodoPriority::None => "none",
555    }
556    .to_string()
557}