1use std::collections::HashMap;
8
9use anyhow::Result;
10use chrono::{DateTime, Utc};
11use sha2::{Digest, Sha256};
12use uuid::Uuid;
13
14use crate::graph_memory::{EntityLabel, GraphMemory, RelationshipEdge};
15use crate::memory::types::{
16 ExperienceType, Memory, ProspectiveTask, ProspectiveTrigger, SourceType, Todo, TodoPriority,
17 TodoStatus,
18};
19use crate::memory::Project;
20
21use super::pii::PiiPatterns;
22use super::schema::*;
23
24#[derive(Debug, Clone)]
26pub struct ExportOptions {
27 pub user_id: String,
28 pub include_embeddings: bool,
29 pub include_graph: bool,
30 pub redact_pii: bool,
31 pub since: Option<DateTime<Utc>>,
32}
33
34pub fn build_document(
40 memories: &[std::sync::Arc<Memory>],
41 graph: Option<&GraphMemory>,
42 todos: &[Todo],
43 projects: &[Project],
44 reminders: &[ProspectiveTask],
45 options: &ExportOptions,
46) -> Result<MifDocument> {
47 let pii = if options.redact_pii {
48 Some(PiiPatterns::new())
49 } else {
50 None
51 };
52
53 let mut privacy = MifPrivacy {
54 pii_detected: false,
55 secrets_detected: false,
56 redacted_fields: Vec::new(),
57 };
58
59 let (entity_map_by_id, entity_map_by_name): (
64 HashMap<Uuid, Vec<String>>,
65 HashMap<String, Vec<String>>,
66 ) = if let Some(g) = graph {
67 let entities = g.get_all_entities().unwrap_or_default();
68 let by_id: HashMap<Uuid, Vec<String>> = entities
69 .iter()
70 .map(|e| {
71 let types: Vec<String> = e.labels.iter().map(label_to_string).collect();
72 (e.uuid, types)
73 })
74 .collect();
75 let by_name: HashMap<String, Vec<String>> = entities
76 .iter()
77 .map(|e| {
78 let types: Vec<String> = e.labels.iter().map(label_to_string).collect();
79 (e.name.to_lowercase(), types)
80 })
81 .collect();
82 (by_id, by_name)
83 } else {
84 (HashMap::new(), HashMap::new())
85 };
86
87 let mut mif_memories = Vec::with_capacity(memories.len());
89 let mut vendor_memory_meta: HashMap<String, serde_json::Value> = HashMap::new();
90
91 for m in memories {
92 if let Some(ref since) = options.since {
93 if m.created_at < *since {
94 continue;
95 }
96 }
97
98 let (content, _redactions) = if let Some(ref patterns) = pii {
99 let (redacted, records, found) = patterns.redact(&m.experience.content);
100 if found {
101 privacy.pii_detected = true;
102 if patterns.has_secrets(&m.experience.content) {
103 privacy.secrets_detected = true;
104 }
105 for r in &records {
106 if !privacy.redacted_fields.contains(&r.redaction_type) {
107 privacy.redacted_fields.push(r.redaction_type.clone());
108 }
109 }
110 }
111 (
112 redacted,
113 if records.is_empty() {
114 None
115 } else {
116 Some(records)
117 },
118 )
119 } else {
120 (m.experience.content.clone(), None)
121 };
122
123 let entities: Vec<MifEntityRef> = m
125 .entity_refs
126 .iter()
127 .map(|eref| {
128 let entity_type = entity_map_by_id
129 .get(&eref.entity_id)
130 .and_then(|types| types.first().cloned())
131 .or_else(|| {
132 entity_map_by_name
133 .get(&eref.name.to_lowercase())
134 .and_then(|types| types.first().cloned())
135 })
136 .unwrap_or_else(|| "unknown".to_string());
137 MifEntityRef {
138 name: eref.name.clone(),
139 entity_type,
140 confidence: 1.0,
141 }
142 })
143 .collect();
144
145 let ref_names: std::collections::HashSet<&str> =
148 m.entity_refs.iter().map(|r| r.name.as_str()).collect();
149 let mut extra_entities: Vec<MifEntityRef> = m
150 .experience
151 .entities
152 .iter()
153 .filter(|e| !ref_names.contains(e.as_str()))
154 .map(|e| {
155 let entity_type = entity_map_by_name
156 .get(&e.to_lowercase())
157 .and_then(|types| types.first().cloned())
158 .unwrap_or_else(|| "unknown".to_string());
159 MifEntityRef {
160 name: e.clone(),
161 entity_type,
162 confidence: 0.8,
163 }
164 })
165 .collect();
166
167 let mut all_entities = entities;
168 all_entities.append(&mut extra_entities);
169
170 let embeddings = if options.include_embeddings {
171 m.experience.embeddings.as_ref().map(|v| MifEmbedding {
172 model: "minilm-l6-v2".to_string(),
173 dimensions: v.len(),
174 vector: v.clone(),
175 normalized: true,
176 })
177 } else {
178 None
179 };
180
181 let (source_type, session_id) = m
182 .experience
183 .context
184 .as_ref()
185 .map(|ctx| {
186 let src = source_type_to_string(&ctx.source.source_type);
187 let sess = ctx.episode.episode_id.clone();
188 (src, sess)
189 })
190 .unwrap_or_else(|| ("unknown".to_string(), None));
191
192 let agent_name = m
193 .experience
194 .context
195 .as_ref()
196 .and_then(|ctx| ctx.source.source_id.clone());
197
198 let tags: Vec<String> = m.experience.tags.clone();
199
200 let memory_type = experience_type_to_string(&m.experience.experience_type);
201
202 let related_memory_ids: Vec<Uuid> = m
203 .experience
204 .related_memories
205 .iter()
206 .map(|id| id.0)
207 .collect();
208 let related_todo_ids: Vec<Uuid> = m.related_todo_ids.iter().map(|id| id.0).collect();
209
210 mif_memories.push(MifMemory {
211 id: m.id.0,
212 content,
213 memory_type,
214 created_at: m.created_at,
215 tags,
216 entities: all_entities,
217 metadata: m.experience.metadata.clone(),
218 embeddings,
219 source: Some(MifSource {
220 source_type,
221 session_id,
222 agent: agent_name,
223 }),
224 parent_id: m.parent_id.as_ref().map(|p| p.0),
225 related_memory_ids,
226 related_todo_ids,
227 agent_id: m.agent_id.clone(),
228 external_id: m.external_id.clone(),
229 version: m.version,
230 });
231
232 vendor_memory_meta.insert(
234 m.id.0.to_string(),
235 serde_json::json!({
236 "importance": m.importance(),
237 "access_count": m.access_count(),
238 "tier": format!("{:?}", m.tier).to_lowercase(),
239 "activation": m.importance(), "last_accessed": m.last_accessed().to_rfc3339(),
241 }),
242 );
243 }
244
245 let knowledge_graph = if options.include_graph {
247 if let Some(g) = graph {
248 Some(build_knowledge_graph(g)?)
249 } else {
250 None
251 }
252 } else {
253 None
254 };
255
256 let mif_todos: Vec<MifTodo> = todos.iter().map(convert_todo).collect();
258
259 let mif_projects: Vec<MifProject> = projects.iter().map(convert_project).collect();
261
262 let mif_reminders: Vec<MifReminder> = reminders.iter().map(convert_reminder).collect();
264
265 let mut vendor_extensions: HashMap<String, serde_json::Value> = HashMap::new();
267 let mut edge_metadata: HashMap<String, serde_json::Value> = HashMap::new();
268
269 if let Some(g) = graph {
270 for edge in g.get_all_relationships().unwrap_or_default() {
271 edge_metadata.insert(
272 edge.uuid.to_string(),
273 serde_json::json!({
274 "strength": edge.strength,
275 "ltp_status": format!("{:?}", edge.ltp_status),
276 "tier": format!("{:?}", edge.tier),
277 "activation_count": edge.activation_count,
278 "last_activated": edge.last_activated.to_rfc3339(),
279 }),
280 );
281 }
282 }
283
284 vendor_extensions.insert(
285 "shodh-memory".to_string(),
286 serde_json::json!({
287 "version": env!("CARGO_PKG_VERSION"),
288 "memory_metadata": vendor_memory_meta,
289 "edge_metadata": edge_metadata,
290 }),
291 );
292
293 let mut hasher = Sha256::new();
295 hasher.update(format!(
296 "{}:{}:{}:{}",
297 mif_memories.len(),
298 mif_todos.len(),
299 mif_projects.len(),
300 mif_reminders.len()
301 ));
302 let checksum = format!("sha256:{}", hex::encode(hasher.finalize()));
303
304 let now = Utc::now();
305 let export_id = Uuid::new_v4().to_string();
306
307 Ok(MifDocument {
308 mif_version: "2.0".to_string(),
309 generator: MifGenerator {
310 name: "shodh-memory".to_string(),
311 version: env!("CARGO_PKG_VERSION").to_string(),
312 },
313 export_meta: MifExportMeta {
314 id: export_id,
315 created_at: now,
316 user_id: options.user_id.clone(),
317 checksum,
318 privacy: Some(privacy),
319 },
320 memories: mif_memories,
321 knowledge_graph,
322 todos: mif_todos,
323 projects: mif_projects,
324 reminders: mif_reminders,
325 vendor_extensions,
326 })
327}
328
329fn build_knowledge_graph(graph: &GraphMemory) -> Result<MifKnowledgeGraph> {
330 let entities = graph
331 .get_all_entities()
332 .unwrap_or_default()
333 .into_iter()
334 .map(|e| MifGraphEntity {
335 id: e.uuid,
336 name: e.name,
337 types: e.labels.iter().map(label_to_string).collect(),
338 attributes: e.attributes,
339 summary: e.summary,
340 created_at: e.created_at,
341 last_seen_at: e.last_seen_at,
342 })
343 .collect();
344
345 let relationships = graph
346 .get_all_relationships()
347 .unwrap_or_default()
348 .into_iter()
349 .map(|e| convert_relationship(&e))
350 .collect();
351
352 Ok(MifKnowledgeGraph {
353 entities,
354 relationships,
355 episodes: Vec::new(), })
357}
358
359fn convert_relationship(edge: &RelationshipEdge) -> MifGraphRelationship {
360 let confidence = Some(edge.strength.clamp(0.0, 1.0));
361 MifGraphRelationship {
362 id: edge.uuid,
363 source_entity_id: edge.from_entity,
364 target_entity_id: edge.to_entity,
365 relation_type: relation_type_to_string(&edge.relation_type),
366 context: edge.context.clone(),
367 confidence,
368 created_at: edge.created_at,
369 valid_at: edge.valid_at,
370 invalidated_at: edge.invalidated_at,
371 }
372}
373
374fn convert_todo(t: &Todo) -> MifTodo {
375 let comments: Vec<MifTodoComment> = t
376 .comments
377 .iter()
378 .map(|c| MifTodoComment {
379 id: c.id.0,
380 content: c.content.clone(),
381 comment_type: format!("{:?}", c.comment_type).to_lowercase(),
382 created_at: c.created_at,
383 author: Some(c.author.clone()),
384 })
385 .collect();
386
387 MifTodo {
388 id: t.id.0,
389 content: t.content.clone(),
390 status: todo_status_to_string(&t.status),
391 priority: todo_priority_to_string(&t.priority),
392 created_at: t.created_at,
393 updated_at: t.updated_at,
394 due_date: t.due_date,
395 completed_at: t.completed_at,
396 project_id: t.project_id.as_ref().map(|p| p.0),
397 parent_id: t.parent_id.as_ref().map(|p| p.0),
398 tags: t.tags.clone(),
399 contexts: t.contexts.clone(),
400 notes: t.notes.clone(),
401 blocked_on: t.blocked_on.clone(),
402 recurrence: t
403 .recurrence
404 .as_ref()
405 .map(|r| format!("{r:?}").to_lowercase()),
406 comments,
407 related_memory_ids: t.related_memory_ids.iter().map(|id| id.0).collect(),
408 external_id: t.external_id.clone(),
409 }
410}
411
412fn convert_project(p: &Project) -> MifProject {
413 MifProject {
414 id: p.id.0,
415 name: p.name.clone(),
416 prefix: p.prefix.clone().unwrap_or_default(),
417 description: p.description.clone(),
418 status: format!("{:?}", p.status).to_lowercase(),
419 created_at: p.created_at,
420 color: p.color.clone(),
421 icon: None,
422 }
423}
424
425fn convert_reminder(r: &ProspectiveTask) -> MifReminder {
426 let trigger = match &r.trigger {
427 ProspectiveTrigger::AtTime { at } => MifTrigger::Time { at: *at },
428 ProspectiveTrigger::AfterDuration { seconds, from } => MifTrigger::Duration {
429 seconds: *seconds,
430 from: *from,
431 },
432 ProspectiveTrigger::OnContext {
433 keywords,
434 threshold,
435 } => MifTrigger::Context {
436 keywords: keywords.clone(),
437 threshold: *threshold,
438 },
439 };
440
441 MifReminder {
442 id: r.id.0,
443 content: r.content.clone(),
444 trigger,
445 status: format!("{:?}", r.status).to_lowercase(),
446 priority: r.priority,
447 tags: r.tags.clone(),
448 created_at: r.created_at,
449 triggered_at: r.triggered_at,
450 dismissed_at: r.dismissed_at,
451 }
452}
453
454fn label_to_string(label: &EntityLabel) -> String {
459 match label {
460 EntityLabel::Person => "person".to_string(),
461 EntityLabel::Organization => "organization".to_string(),
462 EntityLabel::Location => "location".to_string(),
463 EntityLabel::Technology => "technology".to_string(),
464 EntityLabel::Concept => "concept".to_string(),
465 EntityLabel::Event => "event".to_string(),
466 EntityLabel::Date => "date".to_string(),
467 EntityLabel::Product => "product".to_string(),
468 EntityLabel::Skill => "skill".to_string(),
469 EntityLabel::Keyword => "keyword".to_string(),
470 EntityLabel::Other(s) => s.to_lowercase(),
471 }
472}
473
474pub(crate) fn experience_type_to_string(t: &ExperienceType) -> String {
475 match t {
476 ExperienceType::Observation => "observation",
477 ExperienceType::Decision => "decision",
478 ExperienceType::Learning => "learning",
479 ExperienceType::Error => "error",
480 ExperienceType::Discovery => "discovery",
481 ExperienceType::Pattern => "pattern",
482 ExperienceType::Context => "context",
483 ExperienceType::Task => "task",
484 ExperienceType::CodeEdit => "code_edit",
485 ExperienceType::FileAccess => "file_access",
486 ExperienceType::Search => "search",
487 ExperienceType::Command => "command",
488 ExperienceType::Conversation => "conversation",
489 ExperienceType::Intention => "intention",
490 }
491 .to_string()
492}
493
494fn source_type_to_string(s: &SourceType) -> String {
495 match s {
496 SourceType::User => "user",
497 SourceType::System => "system",
498 SourceType::ExternalApi => "api",
499 SourceType::File => "file",
500 SourceType::Web => "web",
501 SourceType::AiGenerated => "ai_generated",
502 SourceType::Inferred => "inferred",
503 SourceType::Unknown => "unknown",
504 }
505 .to_string()
506}
507
508fn relation_type_to_string(r: &crate::graph_memory::RelationType) -> String {
509 use crate::graph_memory::RelationType;
510 match r {
511 RelationType::WorksWith => "works_with",
512 RelationType::WorksAt => "works_at",
513 RelationType::EmployedBy => "employed_by",
514 RelationType::PartOf => "part_of",
515 RelationType::Contains => "contains",
516 RelationType::OwnedBy => "owned_by",
517 RelationType::LocatedIn => "located_in",
518 RelationType::LocatedAt => "located_at",
519 RelationType::Uses => "uses",
520 RelationType::CreatedBy => "created_by",
521 RelationType::DevelopedBy => "developed_by",
522 RelationType::Causes => "causes",
523 RelationType::ResultsIn => "results_in",
524 RelationType::Learned => "learned",
525 RelationType::Knows => "knows",
526 RelationType::Teaches => "teaches",
527 RelationType::RelatedTo => "related_to",
528 RelationType::AssociatedWith => "associated_with",
529 RelationType::CoRetrieved => "co_retrieved",
530 RelationType::CoOccurs => "co_occurs",
531 RelationType::Custom(s) => return s.to_lowercase(),
532 }
533 .to_string()
534}
535
536fn todo_status_to_string(s: &TodoStatus) -> String {
537 match s {
538 TodoStatus::Backlog => "backlog",
539 TodoStatus::Todo => "todo",
540 TodoStatus::InProgress => "in_progress",
541 TodoStatus::Blocked => "blocked",
542 TodoStatus::Done => "done",
543 TodoStatus::Cancelled => "cancelled",
544 }
545 .to_string()
546}
547
548fn todo_priority_to_string(p: &TodoPriority) -> String {
549 match p {
550 TodoPriority::Urgent => "urgent",
551 TodoPriority::High => "high",
552 TodoPriority::Medium => "medium",
553 TodoPriority::Low => "low",
554 TodoPriority::None => "none",
555 }
556 .to_string()
557}