1pub mod algorithms;
21pub mod embed;
22pub mod error;
23pub mod export;
24pub mod extension;
25pub mod functions;
26pub mod graph;
27pub mod migrate;
28pub mod rag;
29pub mod schema;
30pub mod vector;
31
32pub use algorithms::{
33 analyze_graph, connected_components, louvain_communities, pagerank, CommunityResult,
34 PageRankConfig,
35};
36pub use embed::{
37 check_dependencies, get_entities_needing_embedding, EmbeddingConfig, EmbeddingGenerator,
38 EmbeddingStats,
39};
40pub use error::{Error, Result};
41pub use export::{D3ExportGraph, D3ExportMetadata, D3Link, D3Node, DotConfig};
42pub use extension::sqlite3_sqlite_knowledge_graph_init;
43pub use functions::register_functions;
44pub use graph::{Direction, GraphStats, PathStep, TraversalNode, TraversalPath, TraversalQuery};
45pub use graph::{Entity, Neighbor, Relation};
46pub use graph::{HigherOrderNeighbor, HigherOrderPath, HigherOrderPathStep, Hyperedge};
47pub use migrate::{
48 build_relationships, migrate_all, migrate_papers, migrate_skills, MigrationStats,
49};
50pub use rag::{embedder::Embedder, embedder::FixedEmbedder, RagConfig, RagEngine, RagResult};
51pub use schema::{create_schema, schema_exists};
52pub use vector::{cosine_similarity, SearchResult, VectorStore};
53pub use vector::{TurboQuantConfig, TurboQuantIndex, TurboQuantStats};
54
55use rusqlite::Connection;
56use serde::{Deserialize, Serialize};
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct SearchResultWithEntity {
61 pub entity: Entity,
62 pub similarity: f32,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct GraphContext {
68 pub root_entity: Entity,
69 pub neighbors: Vec<Neighbor>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct HybridSearchResult {
75 pub entity: Entity,
76 pub similarity: f32,
77 pub context: Option<GraphContext>,
78}
79
80#[derive(Debug)]
82pub struct KnowledgeGraph {
83 conn: Connection,
84}
85
86impl KnowledgeGraph {
87 pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
89 let conn = Connection::open(path)?;
90
91 conn.execute("PRAGMA foreign_keys = ON", [])?;
93
94 if !schema_exists(&conn)? {
96 create_schema(&conn)?;
97 }
98
99 register_functions(&conn)?;
101
102 Ok(Self { conn })
103 }
104
105 pub fn open_in_memory() -> Result<Self> {
107 let conn = Connection::open_in_memory()?;
108
109 conn.execute("PRAGMA foreign_keys = ON", [])?;
111
112 create_schema(&conn)?;
114
115 register_functions(&conn)?;
117
118 Ok(Self { conn })
119 }
120
121 pub fn connection(&self) -> &Connection {
123 &self.conn
124 }
125
126 pub fn transaction(&self) -> Result<rusqlite::Transaction<'_>> {
128 Ok(self.conn.unchecked_transaction()?)
129 }
130
131 pub fn insert_entity(&self, entity: &Entity) -> Result<i64> {
133 graph::insert_entity(&self.conn, entity)
134 }
135
136 pub fn get_entity(&self, id: i64) -> Result<Entity> {
138 graph::get_entity(&self.conn, id)
139 }
140
141 pub fn list_entities(
143 &self,
144 entity_type: Option<&str>,
145 limit: Option<i64>,
146 ) -> Result<Vec<Entity>> {
147 graph::list_entities(&self.conn, entity_type, limit)
148 }
149
150 pub fn update_entity(&self, entity: &Entity) -> Result<()> {
152 graph::update_entity(&self.conn, entity)
153 }
154
155 pub fn delete_entity(&self, id: i64) -> Result<()> {
157 graph::delete_entity(&self.conn, id)
158 }
159
160 pub fn insert_relation(&self, relation: &Relation) -> Result<i64> {
162 graph::insert_relation(&self.conn, relation)
163 }
164
165 pub fn get_neighbors(&self, entity_id: i64, depth: u32) -> Result<Vec<Neighbor>> {
167 graph::get_neighbors(&self.conn, entity_id, depth)
168 }
169
170 pub fn insert_vector(&self, entity_id: i64, vector: Vec<f32>) -> Result<()> {
172 let store = VectorStore::new();
173 store.insert_vector(&self.conn, entity_id, vector)
174 }
175
176 pub fn search_vectors(&self, query: Vec<f32>, k: usize) -> Result<Vec<SearchResult>> {
178 let store = VectorStore::new();
179 store.search_vectors(&self.conn, query, k)
180 }
181
182 pub fn create_turboquant_index(
212 &self,
213 config: Option<TurboQuantConfig>,
214 ) -> Result<TurboQuantIndex> {
215 let config = config.unwrap_or_default();
216
217 TurboQuantIndex::new(config)
218 }
219
220 pub fn build_turboquant_index(
223 &self,
224 config: Option<TurboQuantConfig>,
225 ) -> Result<TurboQuantIndex> {
226 let dimension = self.get_vector_dimension()?.unwrap_or(384);
228
229 let config = config.unwrap_or(TurboQuantConfig {
230 dimension,
231 bit_width: 3,
232 seed: 42,
233 });
234
235 let mut index = TurboQuantIndex::new(config)?;
236
237 let vectors = self.load_all_vectors()?;
239
240 for (entity_id, vector) in vectors {
241 index.add_vector(entity_id, &vector)?;
242 }
243
244 Ok(index)
245 }
246
247 fn get_vector_dimension(&self) -> Result<Option<usize>> {
249 let result = self
250 .conn
251 .query_row("SELECT dimension FROM kg_vectors LIMIT 1", [], |row| {
252 row.get::<_, i64>(0)
253 });
254
255 match result {
256 Ok(dim) => Ok(Some(dim as usize)),
257 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
258 Err(e) => Err(e.into()),
259 }
260 }
261
262 fn load_all_vectors(&self) -> Result<Vec<(i64, Vec<f32>)>> {
264 let mut stmt = self
265 .conn
266 .prepare("SELECT entity_id, vector, dimension FROM kg_vectors")?;
267
268 let rows = stmt.query_map([], |row| {
269 let entity_id: i64 = row.get(0)?;
270 let vector_blob: Vec<u8> = row.get(1)?;
271 let dimension: i64 = row.get(2)?;
272
273 let mut vector = Vec::with_capacity(dimension as usize);
274 for chunk in vector_blob.chunks_exact(4) {
275 let bytes: [u8; 4] = chunk.try_into().unwrap();
276 vector.push(f32::from_le_bytes(bytes));
277 }
278
279 Ok((entity_id, vector))
280 })?;
281
282 let mut vectors = Vec::new();
283 for row in rows {
284 vectors.push(row?);
285 }
286
287 Ok(vectors)
288 }
289
290 pub fn insert_hyperedge(&self, hyperedge: &Hyperedge) -> Result<i64> {
294 graph::insert_hyperedge(&self.conn, hyperedge)
295 }
296
297 pub fn get_hyperedge(&self, id: i64) -> Result<Hyperedge> {
299 graph::get_hyperedge(&self.conn, id)
300 }
301
302 pub fn list_hyperedges(
304 &self,
305 hyperedge_type: Option<&str>,
306 min_arity: Option<usize>,
307 max_arity: Option<usize>,
308 limit: Option<i64>,
309 ) -> Result<Vec<Hyperedge>> {
310 graph::list_hyperedges(&self.conn, hyperedge_type, min_arity, max_arity, limit)
311 }
312
313 pub fn update_hyperedge(&self, hyperedge: &Hyperedge) -> Result<()> {
315 graph::update_hyperedge(&self.conn, hyperedge)
316 }
317
318 pub fn delete_hyperedge(&self, id: i64) -> Result<()> {
320 graph::delete_hyperedge(&self.conn, id)
321 }
322
323 pub fn get_higher_order_neighbors(
325 &self,
326 entity_id: i64,
327 min_arity: Option<usize>,
328 max_arity: Option<usize>,
329 ) -> Result<Vec<HigherOrderNeighbor>> {
330 graph::get_higher_order_neighbors(&self.conn, entity_id, min_arity, max_arity)
331 }
332
333 pub fn get_entity_hyperedges(&self, entity_id: i64) -> Result<Vec<Hyperedge>> {
335 graph::get_entity_hyperedges(&self.conn, entity_id)
336 }
337
338 pub fn kg_higher_order_bfs(
340 &self,
341 start_id: i64,
342 max_depth: u32,
343 min_arity: Option<usize>,
344 ) -> Result<Vec<TraversalNode>> {
345 graph::higher_order_bfs(&self.conn, start_id, max_depth, min_arity)
346 }
347
348 pub fn kg_higher_order_shortest_path(
350 &self,
351 from_id: i64,
352 to_id: i64,
353 max_depth: u32,
354 ) -> Result<Option<HigherOrderPath>> {
355 graph::higher_order_shortest_path(&self.conn, from_id, to_id, max_depth)
356 }
357
358 pub fn kg_hyperedge_degree(&self, entity_id: i64) -> Result<f64> {
360 graph::hyperedge_degree(&self.conn, entity_id)
361 }
362
363 pub fn kg_hypergraph_entity_pagerank(
365 &self,
366 damping: Option<f64>,
367 max_iter: Option<usize>,
368 tolerance: Option<f64>,
369 ) -> Result<std::collections::HashMap<i64, f64>> {
370 graph::hypergraph_entity_pagerank(
371 &self.conn,
372 damping.unwrap_or(0.85),
373 max_iter.unwrap_or(100),
374 tolerance.unwrap_or(1e-6),
375 )
376 }
377
378 pub fn kg_semantic_search(
383 &self,
384 query_embedding: Vec<f32>,
385 k: usize,
386 ) -> Result<Vec<SearchResultWithEntity>> {
387 let results = self.search_vectors(query_embedding, k)?;
388
389 let mut entities_with_results = Vec::new();
390 for result in results {
391 let entity = self.get_entity(result.entity_id)?;
392 entities_with_results.push(SearchResultWithEntity {
393 entity,
394 similarity: result.similarity,
395 });
396 }
397
398 Ok(entities_with_results)
399 }
400
401 pub fn kg_get_context(&self, entity_id: i64, depth: u32) -> Result<GraphContext> {
404 let root_entity = self.get_entity(entity_id)?;
405 let neighbors = self.get_neighbors(entity_id, depth)?;
406
407 Ok(GraphContext {
408 root_entity,
409 neighbors,
410 })
411 }
412
413 pub fn kg_hybrid_search(
416 &self,
417 _query_text: &str,
418 query_embedding: Vec<f32>,
419 k: usize,
420 ) -> Result<Vec<HybridSearchResult>> {
421 let semantic_results = self.kg_semantic_search(query_embedding, k)?;
422
423 let mut hybrid_results = Vec::new();
424 for result in semantic_results.iter() {
425 let entity_id = result.entity.id.ok_or(Error::EntityNotFound(0))?;
426 let context = self.kg_get_context(entity_id, 1)?; hybrid_results.push(HybridSearchResult {
429 entity: result.entity.clone(),
430 similarity: result.similarity,
431 context: Some(context),
432 });
433 }
434
435 Ok(hybrid_results)
436 }
437
438 pub fn kg_bfs_traversal(
443 &self,
444 start_id: i64,
445 direction: Direction,
446 max_depth: u32,
447 ) -> Result<Vec<TraversalNode>> {
448 let query = TraversalQuery {
449 direction,
450 max_depth,
451 ..Default::default()
452 };
453 graph::bfs_traversal(&self.conn, start_id, query)
454 }
455
456 pub fn kg_dfs_traversal(
459 &self,
460 start_id: i64,
461 direction: Direction,
462 max_depth: u32,
463 ) -> Result<Vec<TraversalNode>> {
464 let query = TraversalQuery {
465 direction,
466 max_depth,
467 ..Default::default()
468 };
469 graph::dfs_traversal(&self.conn, start_id, query)
470 }
471
472 pub fn kg_shortest_path(
475 &self,
476 from_id: i64,
477 to_id: i64,
478 max_depth: u32,
479 ) -> Result<Option<TraversalPath>> {
480 graph::find_shortest_path(&self.conn, from_id, to_id, max_depth)
481 }
482
483 pub fn kg_graph_stats(&self) -> Result<GraphStats> {
485 graph::compute_graph_stats(&self.conn)
486 }
487
488 pub fn kg_pagerank(&self, config: Option<PageRankConfig>) -> Result<Vec<(i64, f64)>> {
493 algorithms::pagerank(&self.conn, config.unwrap_or_default())
494 }
495
496 pub fn kg_louvain(&self) -> Result<CommunityResult> {
499 algorithms::louvain_communities(&self.conn)
500 }
501
502 pub fn kg_connected_components(&self) -> Result<Vec<Vec<i64>>> {
505 algorithms::connected_components(&self.conn)
506 }
507
508 pub fn kg_analyze(&self) -> Result<algorithms::GraphAnalysis> {
510 algorithms::analyze_graph(&self.conn)
511 }
512
513 pub fn export_json(&self) -> Result<D3ExportGraph> {
527 export::export_d3_json(&self.conn)
528 }
529
530 pub fn export_dot(&self, config: &DotConfig) -> Result<String> {
542 export::export_dot(&self.conn, config)
543 }
544}
545
546#[cfg(test)]
547mod tests {
548 use super::*;
549
550 #[test]
551 fn test_open_in_memory() {
552 let kg = KnowledgeGraph::open_in_memory().unwrap();
553 assert!(schema_exists(kg.connection()).unwrap());
554 }
555
556 #[test]
557 fn test_crud_operations() {
558 let kg = KnowledgeGraph::open_in_memory().unwrap();
559
560 let mut entity = Entity::new("paper", "Test Paper");
562 entity.set_property("author", serde_json::json!("John Doe"));
563 let id = kg.insert_entity(&entity).unwrap();
564
565 let retrieved = kg.get_entity(id).unwrap();
567 assert_eq!(retrieved.name, "Test Paper");
568
569 let entities = kg.list_entities(Some("paper"), None).unwrap();
571 assert_eq!(entities.len(), 1);
572
573 let mut updated = retrieved.clone();
575 updated.set_property("year", serde_json::json!(2024));
576 kg.update_entity(&updated).unwrap();
577
578 kg.delete_entity(id).unwrap();
580 let entities = kg.list_entities(None, None).unwrap();
581 assert_eq!(entities.len(), 0);
582 }
583
584 #[test]
585 fn test_graph_traversal() {
586 let kg = KnowledgeGraph::open_in_memory().unwrap();
587
588 let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
590 let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
591 let id3 = kg.insert_entity(&Entity::new("paper", "Paper 3")).unwrap();
592
593 kg.insert_relation(&Relation::new(id1, id2, "cites", 0.8).unwrap())
595 .unwrap();
596 kg.insert_relation(&Relation::new(id2, id3, "cites", 0.9).unwrap())
597 .unwrap();
598
599 let neighbors = kg.get_neighbors(id1, 1).unwrap();
601 assert_eq!(neighbors.len(), 1);
602
603 let neighbors = kg.get_neighbors(id1, 2).unwrap();
605 assert_eq!(neighbors.len(), 2);
606 }
607
608 #[test]
609 fn test_vector_search() {
610 let kg = KnowledgeGraph::open_in_memory().unwrap();
611
612 let id1 = kg.insert_entity(&Entity::new("paper", "Paper 1")).unwrap();
614 let id2 = kg.insert_entity(&Entity::new("paper", "Paper 2")).unwrap();
615
616 kg.insert_vector(id1, vec![1.0, 0.0, 0.0]).unwrap();
618 kg.insert_vector(id2, vec![0.0, 1.0, 0.0]).unwrap();
619
620 let results = kg.search_vectors(vec![1.0, 0.0, 0.0], 2).unwrap();
622 assert_eq!(results.len(), 2);
623 assert_eq!(results[0].entity_id, id1);
624 }
625}