1pub mod algorithms;
21pub mod embed;
22pub mod error;
23pub mod export;
24pub mod extension;
25pub mod functions;
26pub mod graph;
27pub mod migrate;
28pub mod schema;
29pub mod vector;
30
31pub use algorithms::{
32 analyze_graph, connected_components, louvain_communities, pagerank, CommunityResult,
33 PageRankConfig,
34};
35pub use embed::{
36 check_dependencies, get_entities_needing_embedding, EmbeddingConfig, EmbeddingGenerator,
37 EmbeddingStats,
38};
39pub use error::{Error, Result};
40pub use export::{D3ExportGraph, D3ExportMetadata, D3Link, D3Node, DotConfig};
41pub use extension::sqlite3_sqlite_knowledge_graph_init;
42pub use functions::register_functions;
43pub use graph::{Direction, GraphStats, PathStep, TraversalNode, TraversalPath, TraversalQuery};
44pub use graph::{Entity, Neighbor, Relation};
45pub use graph::{HigherOrderNeighbor, HigherOrderPath, HigherOrderPathStep, Hyperedge};
46pub use migrate::{
47 build_relationships, migrate_all, migrate_papers, migrate_skills, MigrationStats,
48};
49pub use schema::{create_schema, schema_exists};
50pub use vector::{cosine_similarity, SearchResult, VectorStore};
51pub use vector::{TurboQuantConfig, TurboQuantIndex, TurboQuantStats};
52
53use rusqlite::Connection;
54use serde::{Deserialize, Serialize};
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct SearchResultWithEntity {
59 pub entity: Entity,
60 pub similarity: f32,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct GraphContext {
66 pub root_entity: Entity,
67 pub neighbors: Vec<Neighbor>,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct HybridSearchResult {
73 pub entity: Entity,
74 pub similarity: f32,
75 pub context: Option<GraphContext>,
76}
77
78#[derive(Debug)]
80pub struct KnowledgeGraph {
81 conn: Connection,
82}
83
84impl KnowledgeGraph {
85 pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
87 let conn = Connection::open(path)?;
88
89 conn.execute("PRAGMA foreign_keys = ON", [])?;
91
92 if !schema_exists(&conn)? {
94 create_schema(&conn)?;
95 }
96
97 register_functions(&conn)?;
99
100 Ok(Self { conn })
101 }
102
103 pub fn open_in_memory() -> Result<Self> {
105 let conn = Connection::open_in_memory()?;
106
107 conn.execute("PRAGMA foreign_keys = ON", [])?;
109
110 create_schema(&conn)?;
112
113 register_functions(&conn)?;
115
116 Ok(Self { conn })
117 }
118
119 pub fn connection(&self) -> &Connection {
121 &self.conn
122 }
123
124 pub fn transaction(&self) -> Result<rusqlite::Transaction<'_>> {
126 Ok(self.conn.unchecked_transaction()?)
127 }
128
129 pub fn insert_entity(&self, entity: &Entity) -> Result<i64> {
131 graph::insert_entity(&self.conn, entity)
132 }
133
134 pub fn get_entity(&self, id: i64) -> Result<Entity> {
136 graph::get_entity(&self.conn, id)
137 }
138
139 pub fn list_entities(
141 &self,
142 entity_type: Option<&str>,
143 limit: Option<i64>,
144 ) -> Result<Vec<Entity>> {
145 graph::list_entities(&self.conn, entity_type, limit)
146 }
147
148 pub fn update_entity(&self, entity: &Entity) -> Result<()> {
150 graph::update_entity(&self.conn, entity)
151 }
152
153 pub fn delete_entity(&self, id: i64) -> Result<()> {
155 graph::delete_entity(&self.conn, id)
156 }
157
158 pub fn insert_relation(&self, relation: &Relation) -> Result<i64> {
160 graph::insert_relation(&self.conn, relation)
161 }
162
163 pub fn get_neighbors(&self, entity_id: i64, depth: u32) -> Result<Vec<Neighbor>> {
165 graph::get_neighbors(&self.conn, entity_id, depth)
166 }
167
168 pub fn insert_vector(&self, entity_id: i64, vector: Vec<f32>) -> Result<()> {
170 let store = VectorStore::new();
171 store.insert_vector(&self.conn, entity_id, vector)
172 }
173
174 pub fn search_vectors(&self, query: Vec<f32>, k: usize) -> Result<Vec<SearchResult>> {
176 let store = VectorStore::new();
177 store.search_vectors(&self.conn, query, k)
178 }
179
180 pub fn create_turboquant_index(
210 &self,
211 config: Option<TurboQuantConfig>,
212 ) -> Result<TurboQuantIndex> {
213 let config = config.unwrap_or_default();
214
215 TurboQuantIndex::new(config)
216 }
217
218 pub fn build_turboquant_index(
221 &self,
222 config: Option<TurboQuantConfig>,
223 ) -> Result<TurboQuantIndex> {
224 let dimension = self.get_vector_dimension()?.unwrap_or(384);
226
227 let config = config.unwrap_or(TurboQuantConfig {
228 dimension,
229 bit_width: 3,
230 seed: 42,
231 });
232
233 let mut index = TurboQuantIndex::new(config)?;
234
235 let vectors = self.load_all_vectors()?;
237
238 for (entity_id, vector) in vectors {
239 index.add_vector(entity_id, &vector)?;
240 }
241
242 Ok(index)
243 }
244
245 fn get_vector_dimension(&self) -> Result<Option<usize>> {
247 let result = self
248 .conn
249 .query_row("SELECT dimension FROM kg_vectors LIMIT 1", [], |row| {
250 row.get::<_, i64>(0)
251 });
252
253 match result {
254 Ok(dim) => Ok(Some(dim as usize)),
255 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
256 Err(e) => Err(e.into()),
257 }
258 }
259
260 fn load_all_vectors(&self) -> Result<Vec<(i64, Vec<f32>)>> {
262 let mut stmt = self
263 .conn
264 .prepare("SELECT entity_id, vector, dimension FROM kg_vectors")?;
265
266 let rows = stmt.query_map([], |row| {
267 let entity_id: i64 = row.get(0)?;
268 let vector_blob: Vec<u8> = row.get(1)?;
269 let dimension: i64 = row.get(2)?;
270
271 let mut vector = Vec::with_capacity(dimension as usize);
272 for chunk in vector_blob.chunks_exact(4) {
273 let bytes: [u8; 4] = chunk.try_into().unwrap();
274 vector.push(f32::from_le_bytes(bytes));
275 }
276
277 Ok((entity_id, vector))
278 })?;
279
280 let mut vectors = Vec::new();
281 for row in rows {
282 vectors.push(row?);
283 }
284
285 Ok(vectors)
286 }
287
288 pub fn insert_hyperedge(&self, hyperedge: &Hyperedge) -> Result<i64> {
292 graph::insert_hyperedge(&self.conn, hyperedge)
293 }
294
295 pub fn get_hyperedge(&self, id: i64) -> Result<Hyperedge> {
297 graph::get_hyperedge(&self.conn, id)
298 }
299
300 pub fn list_hyperedges(
302 &self,
303 hyperedge_type: Option<&str>,
304 min_arity: Option<usize>,
305 max_arity: Option<usize>,
306 limit: Option<i64>,
307 ) -> Result<Vec<Hyperedge>> {
308 graph::list_hyperedges(&self.conn, hyperedge_type, min_arity, max_arity, limit)
309 }
310
311 pub fn update_hyperedge(&self, hyperedge: &Hyperedge) -> Result<()> {
313 graph::update_hyperedge(&self.conn, hyperedge)
314 }
315
316 pub fn delete_hyperedge(&self, id: i64) -> Result<()> {
318 graph::delete_hyperedge(&self.conn, id)
319 }
320
321 pub fn get_higher_order_neighbors(
323 &self,
324 entity_id: i64,
325 min_arity: Option<usize>,
326 max_arity: Option<usize>,
327 ) -> Result<Vec<HigherOrderNeighbor>> {
328 graph::get_higher_order_neighbors(&self.conn, entity_id, min_arity, max_arity)
329 }
330
331 pub fn get_entity_hyperedges(&self, entity_id: i64) -> Result<Vec<Hyperedge>> {
333 graph::get_entity_hyperedges(&self.conn, entity_id)
334 }
335
336 pub fn kg_higher_order_bfs(
338 &self,
339 start_id: i64,
340 max_depth: u32,
341 min_arity: Option<usize>,
342 ) -> Result<Vec<TraversalNode>> {
343 graph::higher_order_bfs(&self.conn, start_id, max_depth, min_arity)
344 }
345
346 pub fn kg_higher_order_shortest_path(
348 &self,
349 from_id: i64,
350 to_id: i64,
351 max_depth: u32,
352 ) -> Result<Option<HigherOrderPath>> {
353 graph::higher_order_shortest_path(&self.conn, from_id, to_id, max_depth)
354 }
355
356 pub fn kg_hyperedge_degree(&self, entity_id: i64) -> Result<f64> {
358 graph::hyperedge_degree(&self.conn, entity_id)
359 }
360
361 pub fn kg_hypergraph_entity_pagerank(
363 &self,
364 damping: Option<f64>,
365 max_iter: Option<usize>,
366 tolerance: Option<f64>,
367 ) -> Result<std::collections::HashMap<i64, f64>> {
368 graph::hypergraph_entity_pagerank(
369 &self.conn,
370 damping.unwrap_or(0.85),
371 max_iter.unwrap_or(100),
372 tolerance.unwrap_or(1e-6),
373 )
374 }
375
376 pub fn kg_semantic_search(
381 &self,
382 query_embedding: Vec<f32>,
383 k: usize,
384 ) -> Result<Vec<SearchResultWithEntity>> {
385 let results = self.search_vectors(query_embedding, k)?;
386
387 let mut entities_with_results = Vec::new();
388 for result in results {
389 let entity = self.get_entity(result.entity_id)?;
390 entities_with_results.push(SearchResultWithEntity {
391 entity,
392 similarity: result.similarity,
393 });
394 }
395
396 Ok(entities_with_results)
397 }
398
399 pub fn kg_get_context(&self, entity_id: i64, depth: u32) -> Result<GraphContext> {
402 let root_entity = self.get_entity(entity_id)?;
403 let neighbors = self.get_neighbors(entity_id, depth)?;
404
405 Ok(GraphContext {
406 root_entity,
407 neighbors,
408 })
409 }
410
411 pub fn kg_hybrid_search(
414 &self,
415 _query_text: &str,
416 query_embedding: Vec<f32>,
417 k: usize,
418 ) -> Result<Vec<HybridSearchResult>> {
419 let semantic_results = self.kg_semantic_search(query_embedding, k)?;
420
421 let mut hybrid_results = Vec::new();
422 for result in semantic_results.iter() {
423 let entity_id = result.entity.id.ok_or(Error::EntityNotFound(0))?;
424 let context = self.kg_get_context(entity_id, 1)?; hybrid_results.push(HybridSearchResult {
427 entity: result.entity.clone(),
428 similarity: result.similarity,
429 context: Some(context),
430 });
431 }
432
433 Ok(hybrid_results)
434 }
435
436 pub fn kg_bfs_traversal(
441 &self,
442 start_id: i64,
443 direction: Direction,
444 max_depth: u32,
445 ) -> Result<Vec<TraversalNode>> {
446 let query = TraversalQuery {
447 direction,
448 max_depth,
449 ..Default::default()
450 };
451 graph::bfs_traversal(&self.conn, start_id, query)
452 }
453
454 pub fn kg_dfs_traversal(
457 &self,
458 start_id: i64,
459 direction: Direction,
460 max_depth: u32,
461 ) -> Result<Vec<TraversalNode>> {
462 let query = TraversalQuery {
463 direction,
464 max_depth,
465 ..Default::default()
466 };
467 graph::dfs_traversal(&self.conn, start_id, query)
468 }
469
470 pub fn kg_shortest_path(
473 &self,
474 from_id: i64,
475 to_id: i64,
476 max_depth: u32,
477 ) -> Result<Option<TraversalPath>> {
478 graph::find_shortest_path(&self.conn, from_id, to_id, max_depth)
479 }
480
481 pub fn kg_graph_stats(&self) -> Result<GraphStats> {
483 graph::compute_graph_stats(&self.conn)
484 }
485
486 pub fn kg_pagerank(&self, config: Option<PageRankConfig>) -> Result<Vec<(i64, f64)>> {
491 algorithms::pagerank(&self.conn, config.unwrap_or_default())
492 }
493
494 pub fn kg_louvain(&self) -> Result<CommunityResult> {
497 algorithms::louvain_communities(&self.conn)
498 }
499
500 pub fn kg_connected_components(&self) -> Result<Vec<Vec<i64>>> {
503 algorithms::connected_components(&self.conn)
504 }
505
506 pub fn kg_analyze(&self) -> Result<algorithms::GraphAnalysis> {
508 algorithms::analyze_graph(&self.conn)
509 }
510
511 pub fn export_json(&self) -> Result<D3ExportGraph> {
525 export::export_d3_json(&self.conn)
526 }
527
528 pub fn export_dot(&self, config: &DotConfig) -> Result<String> {
540 export::export_dot(&self.conn, config)
541 }
542}
543
544#[cfg(test)]
545mod tests {
546 use super::*;
547
548 #[test]
549 fn test_open_in_memory() {
550 let kg = KnowledgeGraph::open_in_memory().unwrap();
551 assert!(schema_exists(kg.connection()).unwrap());
552 }
553
554 #[test]
555 fn test_crud_operations() {
556 let kg = KnowledgeGraph::open_in_memory().unwrap();
557
558 let mut entity = Entity::new("paper", "Test Paper");
560 entity.set_property("author", serde_json::json!("John Doe"));
561 let id = kg.insert_entity(&entity).unwrap();
562
563 let retrieved = kg.get_entity(id).unwrap();
565 assert_eq!(retrieved.name, "Test Paper");
566
567 let entities = kg.list_entities(Some("paper"), None).unwrap();
569 assert_eq!(entities.len(), 1);
570
571 let mut updated = retrieved.clone();
573 updated.set_property("year", serde_json::json!(2024));
574 kg.update_entity(&updated).unwrap();
575
576 kg.delete_entity(id).unwrap();
578 let entities = kg.list_entities(None, None).unwrap();
579 assert_eq!(entities.len(), 0);
580 }
581
582 #[test]
583 fn test_graph_traversal() {
584 let kg = KnowledgeGraph::open_in_memory().unwrap();
585
586 let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
588 let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
589 let id3 = kg.insert_entity(&Entity::new("paper", "Paper 3")).unwrap();
590
591 kg.insert_relation(&Relation::new(id1, id2, "cites", 0.8).unwrap())
593 .unwrap();
594 kg.insert_relation(&Relation::new(id2, id3, "cites", 0.9).unwrap())
595 .unwrap();
596
597 let neighbors = kg.get_neighbors(id1, 1).unwrap();
599 assert_eq!(neighbors.len(), 1);
600
601 let neighbors = kg.get_neighbors(id1, 2).unwrap();
603 assert_eq!(neighbors.len(), 2);
604 }
605
606 #[test]
607 fn test_vector_search() {
608 let kg = KnowledgeGraph::open_in_memory().unwrap();
609
610 let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
612 let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
613
614 kg.insert_vector(id1, vec![1.0, 0.0, 0.0]).unwrap();
616 kg.insert_vector(id2, vec![0.0, 1.0, 0.0]).unwrap();
617
618 let results = kg.search_vectors(vec![1.0, 0.0, 0.0], 2).unwrap();
620 assert_eq!(results.len(), 2);
621 assert_eq!(results[0].entity_id, id1);
622 }
623}