Skip to main content

shodh_memory/mif/
import.rs

1//! Reference-preserving import from MifDocument to shodh internals.
2//!
3//! Key improvements over v1:
4//! - UUID preservation: memories keep their original IDs via `remember_with_id()`
5//! - Content-hash dedup: O(1) duplicate check via SHA256 HashSet (replaces O(n*k) recall)
6//! - Reference mapping: remaps parent_id, related_ids when collisions occur
7//! - Graph reconstruction: entities and edges restored with proper types
8
9use std::collections::{HashMap, HashSet};
10
11use crate::graph_memory::{
12    EdgeTier, EntityLabel, EntityNode, GraphMemory, LtpStatus, RelationType, RelationshipEdge,
13};
14use crate::memory::types::{
15    Experience, ExperienceType, MemoryId, ProspectiveTask, ProspectiveTaskId,
16    ProspectiveTaskStatus, ProspectiveTrigger, Todo, TodoId, TodoPriority, TodoStatus,
17};
18use crate::memory::{Project, ProjectId, ProjectStatus};
19use anyhow::{bail, Result};
20use chrono::{DateTime, Utc};
21use sha2::{Digest, Sha256};
22
23use super::schema::*;
24
25/// Options controlling import behavior.
26#[derive(Debug, Clone)]
27pub struct ImportOptions {
28    pub user_id: String,
29    pub skip_duplicates: bool,
30}
31
32/// Result of an import operation.
33#[derive(Debug, Default, serde::Serialize)]
34pub struct ImportResult {
35    pub memories_imported: usize,
36    pub todos_imported: usize,
37    pub projects_imported: usize,
38    pub reminders_imported: usize,
39    pub edges_imported: usize,
40    pub entities_imported: usize,
41    pub duplicates_skipped: usize,
42    pub errors: Vec<String>,
43}
44
45/// Build a content-hash set from existing memories for O(1) dedup.
46pub fn build_dedup_set(existing_contents: &[String]) -> HashSet<[u8; 32]> {
47    existing_contents.iter().map(|c| content_hash(c)).collect()
48}
49
50fn content_hash(content: &str) -> [u8; 32] {
51    let mut hasher = Sha256::new();
52    hasher.update(content.as_bytes());
53    hasher.finalize().into()
54}
55
56/// Convert MIF memories into Experience structs ready for `remember_with_id()`.
57///
58/// A prepared memory ready for import: (id, experience, optional creation timestamp).
59pub type PreparedMemory = (MemoryId, Experience, Option<DateTime<Utc>>);
60
61/// Returns a vec of prepared memories plus the count of skipped duplicates.
62/// The caller is responsible for actually storing them.
63pub fn prepare_memories(
64    doc: &MifDocument,
65    dedup_set: &HashSet<[u8; 32]>,
66    options: &ImportOptions,
67) -> (Vec<PreparedMemory>, usize) {
68    let mut prepared = Vec::new();
69    let mut skipped = 0;
70
71    for mem in &doc.memories {
72        // Dedup check
73        if options.skip_duplicates && dedup_set.contains(&content_hash(&mem.content)) {
74            skipped += 1;
75            continue;
76        }
77
78        let exp_type = parse_experience_type(&mem.memory_type);
79
80        let mut metadata = mem.metadata.clone();
81        if !mem.tags.is_empty() && !metadata.contains_key("tags") {
82            metadata.insert("tags".to_string(), mem.tags.join(","));
83        }
84
85        let entities: Vec<String> = mem.entities.iter().map(|e| e.name.clone()).collect();
86
87        let embeddings = mem.embeddings.as_ref().map(|e| e.vector.clone());
88
89        let experience = Experience {
90            experience_type: exp_type,
91            content: mem.content.clone(),
92            entities,
93            metadata,
94            embeddings,
95            tags: mem.tags.clone(),
96            ..Default::default()
97        };
98
99        let memory_id = MemoryId(mem.id);
100        let created_at = Some(mem.created_at);
101
102        prepared.push((memory_id, experience, created_at));
103    }
104
105    (prepared, skipped)
106}
107
108/// Convert MIF todos into internal Todo structs.
109///
110/// Returns todos ready for `store_todo()`. Project IDs are preserved if present.
111pub fn prepare_todos(doc: &MifDocument, user_id: &str) -> Vec<Todo> {
112    doc.todos
113        .iter()
114        .map(|t| {
115            let status = parse_todo_status(&t.status);
116            let priority = parse_todo_priority(&t.priority);
117
118            let comments = t
119                .comments
120                .iter()
121                .map(|c| crate::memory::types::TodoComment {
122                    id: crate::memory::types::TodoCommentId(c.id),
123                    todo_id: TodoId(t.id),
124                    author: c.author.clone().unwrap_or_else(|| "import".to_string()),
125                    content: c.content.clone(),
126                    comment_type: parse_comment_type(&c.comment_type),
127                    created_at: c.created_at,
128                    updated_at: None,
129                })
130                .collect();
131
132            let related_memory_ids: Vec<MemoryId> = t
133                .related_memory_ids
134                .iter()
135                .map(|id| MemoryId(*id))
136                .collect();
137
138            Todo {
139                id: TodoId(t.id),
140                seq_num: 0,
141                project_prefix: None,
142                project: None,
143                user_id: user_id.to_string(),
144                content: t.content.clone(),
145                status,
146                priority,
147                project_id: t.project_id.map(ProjectId),
148                parent_id: t.parent_id.map(TodoId),
149                contexts: t.contexts.clone(),
150                tags: t.tags.clone(),
151                notes: t.notes.clone(),
152                blocked_on: t.blocked_on.clone(),
153                recurrence: None,
154                created_at: t.created_at,
155                updated_at: t.updated_at,
156                due_date: t.due_date,
157                completed_at: t.completed_at,
158                sort_order: 0,
159                comments,
160                embedding: None,
161                related_memory_ids,
162                external_id: t.external_id.clone(),
163            }
164        })
165        .collect()
166}
167
168/// Convert MIF projects into internal Project structs.
169pub fn prepare_projects(doc: &MifDocument, user_id: &str) -> Vec<Project> {
170    doc.projects
171        .iter()
172        .map(|p| Project {
173            id: ProjectId(p.id),
174            user_id: user_id.to_string(),
175            name: p.name.clone(),
176            prefix: if p.prefix.is_empty() {
177                None
178            } else {
179                Some(p.prefix.clone())
180            },
181            description: p.description.clone(),
182            status: parse_project_status(&p.status),
183            color: p.color.clone(),
184            parent_id: None,
185            created_at: p.created_at,
186            completed_at: None,
187            codebase_path: None,
188            codebase_indexed: false,
189            codebase_indexed_at: None,
190            codebase_file_count: 0,
191            embedding: None,
192            related_memory_ids: Vec::new(),
193            todo_counts: Default::default(),
194        })
195        .collect()
196}
197
198/// Convert MIF reminders into internal ProspectiveTask structs.
199pub fn prepare_reminders(doc: &MifDocument, user_id: &str) -> Vec<ProspectiveTask> {
200    doc.reminders
201        .iter()
202        .map(|r| {
203            let trigger = match &r.trigger {
204                MifTrigger::Time { at } => ProspectiveTrigger::AtTime { at: *at },
205                MifTrigger::Duration { seconds, from } => ProspectiveTrigger::AfterDuration {
206                    seconds: *seconds,
207                    from: *from,
208                },
209                MifTrigger::Context {
210                    keywords,
211                    threshold,
212                } => ProspectiveTrigger::OnContext {
213                    keywords: keywords.clone(),
214                    threshold: *threshold,
215                },
216            };
217
218            let status = parse_reminder_status(&r.status);
219
220            ProspectiveTask {
221                id: ProspectiveTaskId(r.id),
222                user_id: user_id.to_string(),
223                content: r.content.clone(),
224                trigger,
225                status,
226                created_at: r.created_at,
227                triggered_at: r.triggered_at,
228                dismissed_at: r.dismissed_at,
229                tags: r.tags.clone(),
230                priority: r.priority,
231                embedding: None,
232                related_memory_ids: Vec::new(),
233            }
234        })
235        .collect()
236}
237
238/// Import graph entities from MIF document.
239///
240/// Returns the count of entities imported and any errors.
241pub fn import_graph_entities(kg: &MifKnowledgeGraph, graph: &GraphMemory) -> (usize, Vec<String>) {
242    let mut imported = 0;
243    let mut errors = Vec::new();
244
245    for entity in &kg.entities {
246        let labels: Vec<EntityLabel> = entity.types.iter().map(|t| parse_entity_label(t)).collect();
247
248        let node = EntityNode {
249            uuid: entity.id,
250            name: entity.name.clone(),
251            labels: if labels.is_empty() {
252                vec![EntityLabel::Concept]
253            } else {
254                labels
255            },
256            created_at: entity.created_at,
257            last_seen_at: entity.last_seen_at,
258            mention_count: 1,
259            summary: entity.summary.clone(),
260            attributes: entity.attributes.clone(),
261            name_embedding: None,
262            salience: 0.5,
263            is_proper_noun: true,
264        };
265
266        match graph.add_entity(node) {
267            Ok(_) => imported += 1,
268            Err(e) => errors.push(format!("Entity '{}': {}", entity.name, e)),
269        }
270    }
271
272    (imported, errors)
273}
274
275/// Import graph relationships from MIF document.
276///
277/// Returns the count of edges imported and any errors.
278pub fn import_graph_relationships(
279    kg: &MifKnowledgeGraph,
280    graph: &GraphMemory,
281    vendor_extensions: &HashMap<String, serde_json::Value>,
282) -> (usize, Vec<String>) {
283    let mut imported = 0;
284    let mut errors = Vec::new();
285
286    // Extract shodh edge metadata if available for restoring strength/LTP
287    let edge_meta = vendor_extensions
288        .get("shodh-memory")
289        .and_then(|v| v.get("edge_metadata"))
290        .and_then(|v| v.as_object());
291
292    for rel in &kg.relationships {
293        let relation_type = parse_relation_type(&rel.relation_type);
294        let strength = rel.confidence.unwrap_or(0.5);
295
296        // Restore shodh-specific metadata from vendor extensions
297        let (ltp_status, tier, activation_count) = if let Some(meta) = edge_meta {
298            if let Some(em) = meta.get(&rel.id.to_string()) {
299                let ltp = em
300                    .get("ltp_status")
301                    .and_then(|v| v.as_str())
302                    .map(parse_ltp_status)
303                    .unwrap_or_default();
304                let tier = em
305                    .get("tier")
306                    .and_then(|v| v.as_str())
307                    .map(parse_edge_tier)
308                    .unwrap_or_default();
309                let count = em
310                    .get("activation_count")
311                    .and_then(|v| v.as_u64())
312                    .unwrap_or(1) as u32;
313                (ltp, tier, count)
314            } else {
315                (LtpStatus::None, EdgeTier::L1Working, 1)
316            }
317        } else {
318            (LtpStatus::None, EdgeTier::L1Working, 1)
319        };
320
321        let edge = RelationshipEdge {
322            uuid: rel.id,
323            from_entity: rel.source_entity_id,
324            to_entity: rel.target_entity_id,
325            relation_type,
326            strength,
327            created_at: rel.created_at,
328            valid_at: rel.valid_at,
329            invalidated_at: rel.invalidated_at,
330            source_episode_id: None,
331            context: rel.context.clone(),
332            last_activated: rel.created_at,
333            activation_count,
334            ltp_status,
335            tier,
336            activation_timestamps: None,
337            entity_confidence: rel.confidence,
338        };
339
340        match graph.add_relationship(edge) {
341            Ok(_) => imported += 1,
342            Err(e) => errors.push(format!("Edge {}: {}", rel.id, e)),
343        }
344    }
345
346    (imported, errors)
347}
348
349/// Validate MIF document version.
350pub fn validate_version(doc: &MifDocument) -> Result<()> {
351    if !doc.mif_version.starts_with("2.") && !doc.mif_version.starts_with("1.") {
352        bail!(
353            "Unsupported MIF version: {}. Supported: 1.x, 2.x",
354            doc.mif_version
355        );
356    }
357    Ok(())
358}
359
360// =============================================================================
361// STRING → ENUM PARSERS
362// =============================================================================
363
364pub(crate) fn parse_experience_type(s: &str) -> ExperienceType {
365    match s.to_lowercase().as_str() {
366        "observation" => ExperienceType::Observation,
367        "decision" => ExperienceType::Decision,
368        "learning" => ExperienceType::Learning,
369        "error" => ExperienceType::Error,
370        "discovery" => ExperienceType::Discovery,
371        "pattern" => ExperienceType::Pattern,
372        "context" => ExperienceType::Context,
373        "task" => ExperienceType::Task,
374        "code_edit" | "codeedit" => ExperienceType::CodeEdit,
375        "file_access" | "fileaccess" => ExperienceType::FileAccess,
376        "search" => ExperienceType::Search,
377        "command" => ExperienceType::Command,
378        "conversation" => ExperienceType::Conversation,
379        "intention" => ExperienceType::Intention,
380        _ => ExperienceType::Observation,
381    }
382}
383
384fn parse_todo_status(s: &str) -> TodoStatus {
385    match s {
386        "backlog" => TodoStatus::Backlog,
387        "todo" => TodoStatus::Todo,
388        "in_progress" => TodoStatus::InProgress,
389        "blocked" => TodoStatus::Blocked,
390        "done" => TodoStatus::Done,
391        "cancelled" => TodoStatus::Cancelled,
392        _ => TodoStatus::Todo,
393    }
394}
395
396fn parse_todo_priority(s: &str) -> TodoPriority {
397    match s {
398        "urgent" | "!!!" => TodoPriority::Urgent,
399        "high" | "!!" => TodoPriority::High,
400        "medium" | "!" => TodoPriority::Medium,
401        "low" => TodoPriority::Low,
402        "none" | "" => TodoPriority::None,
403        _ => TodoPriority::Medium,
404    }
405}
406
407fn parse_comment_type(s: &str) -> crate::memory::types::TodoCommentType {
408    use crate::memory::types::TodoCommentType;
409    match s {
410        "comment" => TodoCommentType::Comment,
411        "progress" => TodoCommentType::Progress,
412        "resolution" => TodoCommentType::Resolution,
413        "activity" => TodoCommentType::Activity,
414        _ => TodoCommentType::Comment,
415    }
416}
417
418fn parse_project_status(s: &str) -> ProjectStatus {
419    match s {
420        "active" => ProjectStatus::Active,
421        "onhold" | "on_hold" => ProjectStatus::OnHold,
422        "completed" => ProjectStatus::Completed,
423        "archived" => ProjectStatus::Archived,
424        _ => ProjectStatus::Active,
425    }
426}
427
428fn parse_reminder_status(s: &str) -> ProspectiveTaskStatus {
429    match s {
430        "pending" => ProspectiveTaskStatus::Pending,
431        "triggered" => ProspectiveTaskStatus::Triggered,
432        "dismissed" => ProspectiveTaskStatus::Dismissed,
433        "expired" => ProspectiveTaskStatus::Expired,
434        _ => ProspectiveTaskStatus::Pending,
435    }
436}
437
438fn parse_entity_label(s: &str) -> EntityLabel {
439    match s.to_lowercase().as_str() {
440        "person" => EntityLabel::Person,
441        "organization" => EntityLabel::Organization,
442        "location" => EntityLabel::Location,
443        "technology" => EntityLabel::Technology,
444        "concept" => EntityLabel::Concept,
445        "event" => EntityLabel::Event,
446        "date" => EntityLabel::Date,
447        "product" => EntityLabel::Product,
448        "skill" => EntityLabel::Skill,
449        "keyword" => EntityLabel::Keyword,
450        other => EntityLabel::Other(other.to_string()),
451    }
452}
453
454pub(crate) fn parse_relation_type(s: &str) -> RelationType {
455    match s {
456        "works_with" | "workswith" => RelationType::WorksWith,
457        "works_at" | "worksat" => RelationType::WorksAt,
458        "employed_by" | "employedby" => RelationType::EmployedBy,
459        "part_of" | "partof" => RelationType::PartOf,
460        "contains" => RelationType::Contains,
461        "owned_by" | "ownedby" => RelationType::OwnedBy,
462        "located_in" | "locatedin" => RelationType::LocatedIn,
463        "located_at" | "locatedat" => RelationType::LocatedAt,
464        "uses" => RelationType::Uses,
465        "created_by" | "createdby" => RelationType::CreatedBy,
466        "developed_by" | "developedby" => RelationType::DevelopedBy,
467        "causes" => RelationType::Causes,
468        "results_in" | "resultsin" => RelationType::ResultsIn,
469        "learned" => RelationType::Learned,
470        "knows" => RelationType::Knows,
471        "teaches" => RelationType::Teaches,
472        "related_to" | "relatedto" => RelationType::RelatedTo,
473        "associated_with" | "associatedwith" => RelationType::AssociatedWith,
474        "co_retrieved" | "coretrieved" => RelationType::CoRetrieved,
475        "co_occurs" | "cooccurs" => RelationType::CoOccurs,
476        other => RelationType::Custom(other.to_string()),
477    }
478}
479
480fn parse_ltp_status(s: &str) -> LtpStatus {
481    match s {
482        "None" => LtpStatus::None,
483        "Weekly" => LtpStatus::Weekly,
484        "Full" => LtpStatus::Full,
485        s if s.starts_with("Burst") => LtpStatus::Burst {
486            detected_at: Utc::now(),
487        },
488        _ => LtpStatus::None,
489    }
490}
491
492fn parse_edge_tier(s: &str) -> EdgeTier {
493    match s {
494        "L1Working" => EdgeTier::L1Working,
495        "L2Episodic" => EdgeTier::L2Episodic,
496        "L3Semantic" => EdgeTier::L3Semantic,
497        _ => EdgeTier::L1Working,
498    }
499}