1use std::collections::{HashMap, HashSet};
10
11use crate::graph_memory::{
12 EdgeTier, EntityLabel, EntityNode, GraphMemory, LtpStatus, RelationType, RelationshipEdge,
13};
14use crate::memory::types::{
15 Experience, ExperienceType, MemoryId, ProspectiveTask, ProspectiveTaskId,
16 ProspectiveTaskStatus, ProspectiveTrigger, Todo, TodoId, TodoPriority, TodoStatus,
17};
18use crate::memory::{Project, ProjectId, ProjectStatus};
19use anyhow::{bail, Result};
20use chrono::{DateTime, Utc};
21use sha2::{Digest, Sha256};
22
23use super::schema::*;
24
25#[derive(Debug, Clone)]
27pub struct ImportOptions {
28 pub user_id: String,
29 pub skip_duplicates: bool,
30}
31
32#[derive(Debug, Default, serde::Serialize)]
34pub struct ImportResult {
35 pub memories_imported: usize,
36 pub todos_imported: usize,
37 pub projects_imported: usize,
38 pub reminders_imported: usize,
39 pub edges_imported: usize,
40 pub entities_imported: usize,
41 pub duplicates_skipped: usize,
42 pub errors: Vec<String>,
43}
44
45pub fn build_dedup_set(existing_contents: &[String]) -> HashSet<[u8; 32]> {
47 existing_contents.iter().map(|c| content_hash(c)).collect()
48}
49
50fn content_hash(content: &str) -> [u8; 32] {
51 let mut hasher = Sha256::new();
52 hasher.update(content.as_bytes());
53 hasher.finalize().into()
54}
55
56pub type PreparedMemory = (MemoryId, Experience, Option<DateTime<Utc>>);
60
61pub fn prepare_memories(
64 doc: &MifDocument,
65 dedup_set: &HashSet<[u8; 32]>,
66 options: &ImportOptions,
67) -> (Vec<PreparedMemory>, usize) {
68 let mut prepared = Vec::new();
69 let mut skipped = 0;
70
71 for mem in &doc.memories {
72 if options.skip_duplicates && dedup_set.contains(&content_hash(&mem.content)) {
74 skipped += 1;
75 continue;
76 }
77
78 let exp_type = parse_experience_type(&mem.memory_type);
79
80 let mut metadata = mem.metadata.clone();
81 if !mem.tags.is_empty() && !metadata.contains_key("tags") {
82 metadata.insert("tags".to_string(), mem.tags.join(","));
83 }
84
85 let entities: Vec<String> = mem.entities.iter().map(|e| e.name.clone()).collect();
86
87 let embeddings = mem.embeddings.as_ref().map(|e| e.vector.clone());
88
89 let experience = Experience {
90 experience_type: exp_type,
91 content: mem.content.clone(),
92 entities,
93 metadata,
94 embeddings,
95 tags: mem.tags.clone(),
96 ..Default::default()
97 };
98
99 let memory_id = MemoryId(mem.id);
100 let created_at = Some(mem.created_at);
101
102 prepared.push((memory_id, experience, created_at));
103 }
104
105 (prepared, skipped)
106}
107
108pub fn prepare_todos(doc: &MifDocument, user_id: &str) -> Vec<Todo> {
112 doc.todos
113 .iter()
114 .map(|t| {
115 let status = parse_todo_status(&t.status);
116 let priority = parse_todo_priority(&t.priority);
117
118 let comments = t
119 .comments
120 .iter()
121 .map(|c| crate::memory::types::TodoComment {
122 id: crate::memory::types::TodoCommentId(c.id),
123 todo_id: TodoId(t.id),
124 author: c.author.clone().unwrap_or_else(|| "import".to_string()),
125 content: c.content.clone(),
126 comment_type: parse_comment_type(&c.comment_type),
127 created_at: c.created_at,
128 updated_at: None,
129 })
130 .collect();
131
132 let related_memory_ids: Vec<MemoryId> = t
133 .related_memory_ids
134 .iter()
135 .map(|id| MemoryId(*id))
136 .collect();
137
138 Todo {
139 id: TodoId(t.id),
140 seq_num: 0,
141 project_prefix: None,
142 project: None,
143 user_id: user_id.to_string(),
144 content: t.content.clone(),
145 status,
146 priority,
147 project_id: t.project_id.map(ProjectId),
148 parent_id: t.parent_id.map(TodoId),
149 contexts: t.contexts.clone(),
150 tags: t.tags.clone(),
151 notes: t.notes.clone(),
152 blocked_on: t.blocked_on.clone(),
153 recurrence: None,
154 created_at: t.created_at,
155 updated_at: t.updated_at,
156 due_date: t.due_date,
157 completed_at: t.completed_at,
158 sort_order: 0,
159 comments,
160 embedding: None,
161 related_memory_ids,
162 external_id: t.external_id.clone(),
163 }
164 })
165 .collect()
166}
167
168pub fn prepare_projects(doc: &MifDocument, user_id: &str) -> Vec<Project> {
170 doc.projects
171 .iter()
172 .map(|p| Project {
173 id: ProjectId(p.id),
174 user_id: user_id.to_string(),
175 name: p.name.clone(),
176 prefix: if p.prefix.is_empty() {
177 None
178 } else {
179 Some(p.prefix.clone())
180 },
181 description: p.description.clone(),
182 status: parse_project_status(&p.status),
183 color: p.color.clone(),
184 parent_id: None,
185 created_at: p.created_at,
186 completed_at: None,
187 codebase_path: None,
188 codebase_indexed: false,
189 codebase_indexed_at: None,
190 codebase_file_count: 0,
191 embedding: None,
192 related_memory_ids: Vec::new(),
193 todo_counts: Default::default(),
194 })
195 .collect()
196}
197
198pub fn prepare_reminders(doc: &MifDocument, user_id: &str) -> Vec<ProspectiveTask> {
200 doc.reminders
201 .iter()
202 .map(|r| {
203 let trigger = match &r.trigger {
204 MifTrigger::Time { at } => ProspectiveTrigger::AtTime { at: *at },
205 MifTrigger::Duration { seconds, from } => ProspectiveTrigger::AfterDuration {
206 seconds: *seconds,
207 from: *from,
208 },
209 MifTrigger::Context {
210 keywords,
211 threshold,
212 } => ProspectiveTrigger::OnContext {
213 keywords: keywords.clone(),
214 threshold: *threshold,
215 },
216 };
217
218 let status = parse_reminder_status(&r.status);
219
220 ProspectiveTask {
221 id: ProspectiveTaskId(r.id),
222 user_id: user_id.to_string(),
223 content: r.content.clone(),
224 trigger,
225 status,
226 created_at: r.created_at,
227 triggered_at: r.triggered_at,
228 dismissed_at: r.dismissed_at,
229 tags: r.tags.clone(),
230 priority: r.priority,
231 embedding: None,
232 related_memory_ids: Vec::new(),
233 }
234 })
235 .collect()
236}
237
238pub fn import_graph_entities(kg: &MifKnowledgeGraph, graph: &GraphMemory) -> (usize, Vec<String>) {
242 let mut imported = 0;
243 let mut errors = Vec::new();
244
245 for entity in &kg.entities {
246 let labels: Vec<EntityLabel> = entity.types.iter().map(|t| parse_entity_label(t)).collect();
247
248 let node = EntityNode {
249 uuid: entity.id,
250 name: entity.name.clone(),
251 labels: if labels.is_empty() {
252 vec![EntityLabel::Concept]
253 } else {
254 labels
255 },
256 created_at: entity.created_at,
257 last_seen_at: entity.last_seen_at,
258 mention_count: 1,
259 summary: entity.summary.clone(),
260 attributes: entity.attributes.clone(),
261 name_embedding: None,
262 salience: 0.5,
263 is_proper_noun: true,
264 };
265
266 match graph.add_entity(node) {
267 Ok(_) => imported += 1,
268 Err(e) => errors.push(format!("Entity '{}': {}", entity.name, e)),
269 }
270 }
271
272 (imported, errors)
273}
274
275pub fn import_graph_relationships(
279 kg: &MifKnowledgeGraph,
280 graph: &GraphMemory,
281 vendor_extensions: &HashMap<String, serde_json::Value>,
282) -> (usize, Vec<String>) {
283 let mut imported = 0;
284 let mut errors = Vec::new();
285
286 let edge_meta = vendor_extensions
288 .get("shodh-memory")
289 .and_then(|v| v.get("edge_metadata"))
290 .and_then(|v| v.as_object());
291
292 for rel in &kg.relationships {
293 let relation_type = parse_relation_type(&rel.relation_type);
294 let strength = rel.confidence.unwrap_or(0.5);
295
296 let (ltp_status, tier, activation_count) = if let Some(meta) = edge_meta {
298 if let Some(em) = meta.get(&rel.id.to_string()) {
299 let ltp = em
300 .get("ltp_status")
301 .and_then(|v| v.as_str())
302 .map(parse_ltp_status)
303 .unwrap_or_default();
304 let tier = em
305 .get("tier")
306 .and_then(|v| v.as_str())
307 .map(parse_edge_tier)
308 .unwrap_or_default();
309 let count = em
310 .get("activation_count")
311 .and_then(|v| v.as_u64())
312 .unwrap_or(1) as u32;
313 (ltp, tier, count)
314 } else {
315 (LtpStatus::None, EdgeTier::L1Working, 1)
316 }
317 } else {
318 (LtpStatus::None, EdgeTier::L1Working, 1)
319 };
320
321 let edge = RelationshipEdge {
322 uuid: rel.id,
323 from_entity: rel.source_entity_id,
324 to_entity: rel.target_entity_id,
325 relation_type,
326 strength,
327 created_at: rel.created_at,
328 valid_at: rel.valid_at,
329 invalidated_at: rel.invalidated_at,
330 source_episode_id: None,
331 context: rel.context.clone(),
332 last_activated: rel.created_at,
333 activation_count,
334 ltp_status,
335 tier,
336 activation_timestamps: None,
337 entity_confidence: rel.confidence,
338 };
339
340 match graph.add_relationship(edge) {
341 Ok(_) => imported += 1,
342 Err(e) => errors.push(format!("Edge {}: {}", rel.id, e)),
343 }
344 }
345
346 (imported, errors)
347}
348
349pub fn validate_version(doc: &MifDocument) -> Result<()> {
351 if !doc.mif_version.starts_with("2.") && !doc.mif_version.starts_with("1.") {
352 bail!(
353 "Unsupported MIF version: {}. Supported: 1.x, 2.x",
354 doc.mif_version
355 );
356 }
357 Ok(())
358}
359
360pub(crate) fn parse_experience_type(s: &str) -> ExperienceType {
365 match s.to_lowercase().as_str() {
366 "observation" => ExperienceType::Observation,
367 "decision" => ExperienceType::Decision,
368 "learning" => ExperienceType::Learning,
369 "error" => ExperienceType::Error,
370 "discovery" => ExperienceType::Discovery,
371 "pattern" => ExperienceType::Pattern,
372 "context" => ExperienceType::Context,
373 "task" => ExperienceType::Task,
374 "code_edit" | "codeedit" => ExperienceType::CodeEdit,
375 "file_access" | "fileaccess" => ExperienceType::FileAccess,
376 "search" => ExperienceType::Search,
377 "command" => ExperienceType::Command,
378 "conversation" => ExperienceType::Conversation,
379 "intention" => ExperienceType::Intention,
380 _ => ExperienceType::Observation,
381 }
382}
383
384fn parse_todo_status(s: &str) -> TodoStatus {
385 match s {
386 "backlog" => TodoStatus::Backlog,
387 "todo" => TodoStatus::Todo,
388 "in_progress" => TodoStatus::InProgress,
389 "blocked" => TodoStatus::Blocked,
390 "done" => TodoStatus::Done,
391 "cancelled" => TodoStatus::Cancelled,
392 _ => TodoStatus::Todo,
393 }
394}
395
396fn parse_todo_priority(s: &str) -> TodoPriority {
397 match s {
398 "urgent" | "!!!" => TodoPriority::Urgent,
399 "high" | "!!" => TodoPriority::High,
400 "medium" | "!" => TodoPriority::Medium,
401 "low" => TodoPriority::Low,
402 "none" | "" => TodoPriority::None,
403 _ => TodoPriority::Medium,
404 }
405}
406
407fn parse_comment_type(s: &str) -> crate::memory::types::TodoCommentType {
408 use crate::memory::types::TodoCommentType;
409 match s {
410 "comment" => TodoCommentType::Comment,
411 "progress" => TodoCommentType::Progress,
412 "resolution" => TodoCommentType::Resolution,
413 "activity" => TodoCommentType::Activity,
414 _ => TodoCommentType::Comment,
415 }
416}
417
418fn parse_project_status(s: &str) -> ProjectStatus {
419 match s {
420 "active" => ProjectStatus::Active,
421 "onhold" | "on_hold" => ProjectStatus::OnHold,
422 "completed" => ProjectStatus::Completed,
423 "archived" => ProjectStatus::Archived,
424 _ => ProjectStatus::Active,
425 }
426}
427
428fn parse_reminder_status(s: &str) -> ProspectiveTaskStatus {
429 match s {
430 "pending" => ProspectiveTaskStatus::Pending,
431 "triggered" => ProspectiveTaskStatus::Triggered,
432 "dismissed" => ProspectiveTaskStatus::Dismissed,
433 "expired" => ProspectiveTaskStatus::Expired,
434 _ => ProspectiveTaskStatus::Pending,
435 }
436}
437
438fn parse_entity_label(s: &str) -> EntityLabel {
439 match s.to_lowercase().as_str() {
440 "person" => EntityLabel::Person,
441 "organization" => EntityLabel::Organization,
442 "location" => EntityLabel::Location,
443 "technology" => EntityLabel::Technology,
444 "concept" => EntityLabel::Concept,
445 "event" => EntityLabel::Event,
446 "date" => EntityLabel::Date,
447 "product" => EntityLabel::Product,
448 "skill" => EntityLabel::Skill,
449 "keyword" => EntityLabel::Keyword,
450 other => EntityLabel::Other(other.to_string()),
451 }
452}
453
454pub(crate) fn parse_relation_type(s: &str) -> RelationType {
455 match s {
456 "works_with" | "workswith" => RelationType::WorksWith,
457 "works_at" | "worksat" => RelationType::WorksAt,
458 "employed_by" | "employedby" => RelationType::EmployedBy,
459 "part_of" | "partof" => RelationType::PartOf,
460 "contains" => RelationType::Contains,
461 "owned_by" | "ownedby" => RelationType::OwnedBy,
462 "located_in" | "locatedin" => RelationType::LocatedIn,
463 "located_at" | "locatedat" => RelationType::LocatedAt,
464 "uses" => RelationType::Uses,
465 "created_by" | "createdby" => RelationType::CreatedBy,
466 "developed_by" | "developedby" => RelationType::DevelopedBy,
467 "causes" => RelationType::Causes,
468 "results_in" | "resultsin" => RelationType::ResultsIn,
469 "learned" => RelationType::Learned,
470 "knows" => RelationType::Knows,
471 "teaches" => RelationType::Teaches,
472 "related_to" | "relatedto" => RelationType::RelatedTo,
473 "associated_with" | "associatedwith" => RelationType::AssociatedWith,
474 "co_retrieved" | "coretrieved" => RelationType::CoRetrieved,
475 "co_occurs" | "cooccurs" => RelationType::CoOccurs,
476 other => RelationType::Custom(other.to_string()),
477 }
478}
479
480fn parse_ltp_status(s: &str) -> LtpStatus {
481 match s {
482 "None" => LtpStatus::None,
483 "Weekly" => LtpStatus::Weekly,
484 "Full" => LtpStatus::Full,
485 s if s.starts_with("Burst") => LtpStatus::Burst {
486 detected_at: Utc::now(),
487 },
488 _ => LtpStatus::None,
489 }
490}
491
492fn parse_edge_tier(s: &str) -> EdgeTier {
493 match s {
494 "L1Working" => EdgeTier::L1Working,
495 "L2Episodic" => EdgeTier::L2Episodic,
496 "L3Semantic" => EdgeTier::L3Semantic,
497 _ => EdgeTier::L1Working,
498 }
499}