1use rusqlite::Connection;
4use serde_json::Value;
5use std::collections::HashMap;
6use std::fs;
7use std::path::Path;
8
9use crate::error::{Error, Result};
10use crate::graph::Entity;
11use crate::KnowledgeGraph;
12
13pub fn migrate_papers(source_db: &str, kg: &KnowledgeGraph) -> Result<i64> {
20 let source_conn = Connection::open(source_db)?;
21
22 let tx = kg.transaction()?;
23 let mut count = 0;
24
25 let mut stmt = source_conn.prepare(
27 r#"
28 SELECT arxiv_id, title, file_path, keywords, utility,
29 skill_created, last_accessed, created_at, notes
30 FROM papers
31 "#,
32 )?;
33
34 let rows = stmt.query_map([], |row| {
35 Ok((
36 row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, Option<String>>(2)?, row.get::<_, Option<String>>(3)?, row.get::<_, Option<f64>>(4)?, row.get::<_, Option<String>>(5)?, row.get::<_, Option<String>>(6)?, row.get::<_, Option<String>>(7)?, row.get::<_, Option<String>>(8)?, ))
46 })?;
47
48 for row in rows {
49 let (
50 arxiv_id,
51 title,
52 file_path,
53 keywords,
54 utility,
55 skill_created,
56 last_accessed,
57 created_at,
58 notes,
59 ) = row?;
60
61 let mut properties = HashMap::new();
62 properties.insert("arxiv_id".to_string(), Value::String(arxiv_id.clone()));
63
64 if let Some(fp) = file_path {
65 properties.insert("file_path".to_string(), Value::String(fp));
66 }
67
68 if let Some(kw) = keywords {
69 properties.insert("keywords".to_string(), Value::String(kw));
70 }
71
72 if let Some(util) = utility {
73 properties.insert(
74 "utility".to_string(),
75 Value::Number(
76 serde_json::Number::from_f64(util).unwrap_or(serde_json::Number::from(0)),
77 ),
78 );
79 }
80
81 if let Some(skill) = skill_created {
82 properties.insert("skill_created".to_string(), Value::String(skill));
83 }
84
85 if let Some(la) = last_accessed {
86 properties.insert("last_accessed".to_string(), Value::String(la));
87 }
88
89 if let Some(ca) = created_at {
90 properties.insert("created_at".to_string(), Value::String(ca));
91 }
92
93 if let Some(n) = notes {
94 properties.insert("notes".to_string(), Value::String(n));
95 }
96
97 let entity = Entity::with_properties("paper", title, properties);
98
99 let entity_id = crate::graph::insert_entity(&tx, &entity)?;
100 count += 1;
101
102 let placeholder_vector = vec![0.0_f32; 384]; crate::vector::VectorStore::new().insert_vector(&tx, entity_id, placeholder_vector)?;
105 }
106
107 tx.commit()?;
108 Ok(count)
109}
110
111pub fn migrate_skills(skills_dir: &str, kg: &KnowledgeGraph) -> Result<i64> {
118 let skills_path = Path::new(skills_dir);
119
120 if !skills_path.exists() {
121 return Err(Error::Other(format!(
122 "Skills directory not found: {}",
123 skills_dir
124 )));
125 }
126
127 let tx = kg.transaction()?;
128 let mut count = 0;
129
130 for entry in fs::read_dir(skills_path)? {
131 let entry = entry?;
132 let skill_dir = entry.path();
133
134 if skill_dir.is_dir() {
135 let skill_name = skill_dir
136 .file_name()
137 .and_then(|n| n.to_str())
138 .ok_or_else(|| Error::Other("Invalid skill directory name".to_string()))?;
139
140 let mut properties = HashMap::new();
141 properties.insert(
142 "skill_name".to_string(),
143 Value::String(skill_name.to_string()),
144 );
145
146 let skill_md_path = skill_dir.join("SKILL.md");
148 if skill_md_path.exists() {
149 let content = fs::read_to_string(&skill_md_path).unwrap_or_default();
150
151 if let Some(description) = extract_description(&content) {
153 properties.insert("description".to_string(), Value::String(description));
154 }
155
156 properties.insert("content".to_string(), Value::String(content));
157 }
158
159 let entity = Entity::with_properties("skill", skill_name, properties);
160 let entity_id = crate::graph::insert_entity(&tx, &entity)?;
161 count += 1;
162
163 let placeholder_vector = vec![0.0_f32; 384];
165 crate::vector::VectorStore::new().insert_vector(&tx, entity_id, placeholder_vector)?;
166 }
167 }
168
169 tx.commit()?;
170 Ok(count)
171}
172
173pub fn build_relationships(kg: &KnowledgeGraph) -> Result<i64> {
180 let tx = kg.transaction()?;
181 let mut count = 0;
182
183 let papers = kg.list_entities(Some("paper"), None)?;
185 let skills = kg.list_entities(Some("skill"), None)?;
186
187 let mut paper_map: HashMap<String, i64> = HashMap::new();
189 for paper in &papers {
190 if let Some(arxiv_id) = paper.get_property("arxiv_id").and_then(|v| v.as_str()) {
191 if let Some(id) = paper.id {
192 paper_map.insert(arxiv_id.to_string(), id);
193 }
194 }
195 }
196
197 let mut skill_map: HashMap<String, i64> = HashMap::new();
199 for skill in &skills {
200 if let Some(skill_name) = skill.get_property("skill_name").and_then(|v| v.as_str()) {
201 if let Some(id) = skill.id {
202 skill_map.insert(skill_name.to_string(), id);
203 }
204 }
205 }
206
207 for paper in &papers {
209 if let Some(skill_created) = paper.get_property("skill_created").and_then(|v| v.as_str()) {
210 if !skill_created.is_empty() {
211 if let Some(paper_id) = paper.id {
212 if let Some(skill_id) = skill_map.get(skill_created) {
213 let relation =
214 crate::graph::Relation::new(paper_id, *skill_id, "derived_from", 1.0)?;
215 crate::graph::insert_relation(&tx, &relation)?;
216 count += 1;
217 }
218 }
219 }
220 }
221 }
222
223 for i in 0..papers.len() {
225 for j in (i + 1)..papers.len() {
226 let paper_a = &papers[i];
227 let paper_b = &papers[j];
228
229 if let (Some(id_a), Some(id_b)) = (paper_a.id, paper_b.id) {
230 if let Some(similarity) = compute_keyword_similarity(paper_a, paper_b) {
231 if similarity > 0.3 {
232 let relation = crate::graph::Relation::new(
233 id_a,
234 id_b,
235 "related_by_keywords",
236 similarity,
237 )?;
238 crate::graph::insert_relation(&tx, &relation)?;
239 count += 1;
240 }
241 }
242 }
243 }
244 }
245
246 for i in 0..skills.len() {
248 for j in (i + 1)..skills.len() {
249 let skill_a = &skills[i];
250 let skill_b = &skills[j];
251
252 if let (Some(id_a), Some(id_b)) = (skill_a.id, skill_b.id) {
253 if let Some(similarity) = compute_skill_similarity(skill_a, skill_b) {
254 if similarity > 0.3 {
255 let relation =
256 crate::graph::Relation::new(id_a, id_b, "similar_to", similarity)?;
257 crate::graph::insert_relation(&tx, &relation)?;
258 count += 1;
259 }
260 }
261 }
262 }
263 }
264
265 tx.commit()?;
266 Ok(count)
267}
268
269fn extract_description(content: &str) -> Option<String> {
271 for line in content.lines() {
273 let line = line.trim();
274 if line.starts_with("# Description") || line.starts_with("## Description") {
275 continue;
276 }
277 if !line.is_empty() && !line.starts_with("#") {
278 return Some(line.to_string());
279 }
280 }
281 None
282}
283
284fn compute_keyword_similarity(paper_a: &Entity, paper_b: &Entity) -> Option<f64> {
286 let keywords_a: Vec<String> = paper_a
287 .get_property("keywords")
288 .and_then(|v| v.as_str())
289 .and_then(|s| serde_json::from_str::<Vec<String>>(s).ok())
290 .unwrap_or_default();
291
292 let keywords_b: Vec<String> = paper_b
293 .get_property("keywords")
294 .and_then(|v| v.as_str())
295 .and_then(|s| serde_json::from_str::<Vec<String>>(s).ok())
296 .unwrap_or_default();
297
298 if keywords_a.is_empty() || keywords_b.is_empty() {
299 return None;
300 }
301
302 let set_a: std::collections::HashSet<&String> = keywords_a.iter().collect();
303 let set_b: std::collections::HashSet<&String> = keywords_b.iter().collect();
304
305 let intersection = set_a.intersection(&set_b).count();
306 let union = set_a.union(&set_b).count();
307
308 if union == 0 {
309 return Some(0.0);
310 }
311
312 Some(intersection as f64 / union as f64)
313}
314
315fn compute_skill_similarity(skill_a: &Entity, skill_b: &Entity) -> Option<f64> {
317 let desc_a = skill_a
318 .get_property("description")
319 .and_then(|v| v.as_str())
320 .unwrap_or("");
321 let desc_b = skill_b
322 .get_property("description")
323 .and_then(|v| v.as_str())
324 .unwrap_or("");
325
326 if desc_a.is_empty() || desc_b.is_empty() {
327 return None;
328 }
329
330 let words_a: std::collections::HashSet<&str> = desc_a.split_whitespace().collect();
332 let words_b: std::collections::HashSet<&str> = desc_b.split_whitespace().collect();
333
334 let intersection = words_a.intersection(&words_b).count();
335 let union = words_a.union(&words_b).count();
336
337 if union == 0 {
338 return Some(0.0);
339 }
340
341 Some(intersection as f64 / union as f64)
342}
343
344pub fn migrate_all(
346 source_db: &str,
347 skills_dir: &str,
348 kg: &KnowledgeGraph,
349) -> Result<MigrationStats> {
350 let papers_count = migrate_papers(source_db, kg)?;
351 let skills_count = migrate_skills(skills_dir, kg)?;
352 let relations_count = build_relationships(kg)?;
353
354 Ok(MigrationStats {
355 papers_count,
356 skills_count,
357 relations_count,
358 })
359}
360
361#[derive(Debug, Clone, serde::Serialize)]
363pub struct MigrationStats {
364 pub papers_count: i64,
365 pub skills_count: i64,
366 pub relations_count: i64,
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 #[test]
374 fn test_extract_description() {
375 let content = "# Test Skill\n\nThis is a test description.\n\nMore content here.";
376 let desc = extract_description(content);
377 assert_eq!(desc, Some("This is a test description.".to_string()));
378 }
379
380 #[test]
381 fn test_keyword_similarity() {
382 let mut paper_a = Entity::new("paper", "Paper A");
383 paper_a.set_property(
385 "keywords",
386 serde_json::Value::String(r#"["machine", "learning"]"#.to_string()),
387 );
388
389 let mut paper_b = Entity::new("paper", "Paper B");
390 paper_b.set_property(
391 "keywords",
392 serde_json::Value::String(r#"["machine", "vision"]"#.to_string()),
393 );
394
395 let similarity = compute_keyword_similarity(&paper_a, &paper_b).unwrap();
396 assert!((similarity - 0.333).abs() < 0.01);
402 }
403
404 #[test]
405 fn test_skill_similarity() {
406 let mut skill_a = Entity::new("skill", "Skill A");
407 skill_a.set_property("description", serde_json::json!("neural network learning"));
408
409 let mut skill_b = Entity::new("skill", "Skill B");
410 skill_b.set_property("description", serde_json::json!("neural network vision"));
411
412 let similarity = compute_skill_similarity(&skill_a, &skill_b).unwrap();
413 assert!((similarity - 0.5).abs() < 0.01);
417 }
418}