1use crate::schema::col;
21use crate::store::{ArrowGraphStore, QuerySpec, StoreError, Triple};
22use crate::triple_store::{StoredTriple, batches_to_stored_triples};
23
24use arrow::array::StringArray;
25use std::collections::HashMap;
26
27fn default_prefixes() -> HashMap<String, String> {
29 let mut m = HashMap::new();
30 m.insert(
31 "rdf".into(),
32 "http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(),
33 );
34 m.insert(
35 "rdfs".into(),
36 "http://www.w3.org/2000/01/rdf-schema#".into(),
37 );
38 m.insert("owl".into(), "http://www.w3.org/2002/07/owl#".into());
39 m.insert("xsd".into(), "http://www.w3.org/2001/XMLSchema#".into());
40 m.insert("foaf".into(), "http://xmlns.com/foaf/0.1/".into());
41 m.insert("prov".into(), "http://www.w3.org/ns/prov#".into());
42 m
43}
44
45#[derive(Debug, Clone)]
47pub struct KnowledgeGap {
48 pub question: String,
49 pub keywords: Vec<String>,
50 pub confidence: f64,
51 pub missing_concepts: Vec<String>,
52 pub resolved: bool,
53}
54
55#[derive(Debug, Clone)]
57pub struct KgStats {
58 pub total_triples: usize,
59 pub unique_subjects: usize,
60 pub unique_predicates: usize,
61 pub unique_objects: usize,
62 pub namespace_count: usize,
63}
64
65pub struct KgStore {
74 inner: ArrowGraphStore,
75 prefixes: HashMap<String, String>,
76 gaps: Vec<KnowledgeGap>,
77 default_namespace: String,
78 default_layer: Option<u8>,
79}
80
81impl KgStore {
82 pub fn new() -> Self {
84 Self {
85 inner: ArrowGraphStore::new(&["default"]),
86 prefixes: default_prefixes(),
87 gaps: Vec::new(),
88 default_namespace: "default".to_string(),
89 default_layer: Some(0),
90 }
91 }
92
93 pub fn with_config(namespaces: &[&str], default_namespace: &str, layer: Option<u8>) -> Self {
95 Self {
96 inner: ArrowGraphStore::new(namespaces),
97 prefixes: default_prefixes(),
98 gaps: Vec::new(),
99 default_namespace: default_namespace.to_string(),
100 default_layer: layer,
101 }
102 }
103
104 pub fn bind_prefix(&mut self, prefix: &str, uri: &str) {
108 self.prefixes.insert(prefix.to_string(), uri.to_string());
109 }
110
111 pub fn expand_uri(&self, value: &str) -> String {
114 if let Some(idx) = value.find(':') {
115 let prefix = &value[..idx];
116 let local = &value[idx + 1..];
117 if let Some(ns_uri) = self.prefixes.get(prefix) {
118 return format!("{ns_uri}{local}");
119 }
120 }
121 value.to_string()
122 }
123
124 pub fn compact_uri(&self, uri: &str) -> String {
127 let mut sorted: Vec<_> = self.prefixes.iter().collect();
128 sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
129
130 for (prefix, ns_uri) in sorted {
131 if let Some(local) = uri.strip_prefix(ns_uri.as_str()) {
132 return format!("{prefix}:{local}");
133 }
134 }
135 uri.to_string()
136 }
137
138 pub fn prefixes(&self) -> &HashMap<String, String> {
140 &self.prefixes
141 }
142
143 pub fn add_triple(
147 &mut self,
148 subject: &str,
149 predicate: &str,
150 object: &str,
151 source: Option<&str>,
152 confidence: f64,
153 ) -> Result<String, StoreError> {
154 let triple = Triple {
155 subject: self.expand_uri(subject),
156 predicate: self.expand_uri(predicate),
157 object: self.expand_uri(object),
158 graph: None,
159 confidence: Some(confidence),
160 source_document: source.map(|s| s.to_string()),
161 source_chunk_id: None,
162 extracted_by: source.map(|s| s.to_string()),
163 caused_by: None,
164 derived_from: None,
165 consolidated_at: None,
166 };
167 self.inner
168 .add_triple(&triple, &self.default_namespace, self.default_layer)
169 }
170
171 pub fn add_triples(
173 &mut self,
174 triples: &[(&str, &str, &str, f64)],
175 source: Option<&str>,
176 ) -> Result<Vec<String>, StoreError> {
177 let ts: Vec<Triple> = triples
178 .iter()
179 .map(|(s, p, o, conf)| Triple {
180 subject: self.expand_uri(s),
181 predicate: self.expand_uri(p),
182 object: self.expand_uri(o),
183 graph: None,
184 confidence: Some(*conf),
185 source_document: source.map(|s| s.to_string()),
186 source_chunk_id: None,
187 extracted_by: source.map(|s| s.to_string()),
188 caused_by: None,
189 derived_from: None,
190 consolidated_at: None,
191 })
192 .collect();
193 self.inner
194 .add_batch(&ts, &self.default_namespace, self.default_layer)
195 }
196
197 pub fn query(
199 &self,
200 subject: Option<&str>,
201 predicate: Option<&str>,
202 object: Option<&str>,
203 ) -> Result<Vec<StoredTriple>, StoreError> {
204 let spec = QuerySpec {
205 subject: subject.map(|s| self.expand_uri(s)),
206 predicate: predicate.map(|s| self.expand_uri(s)),
207 object: object.map(|s| self.expand_uri(s)),
208 ..Default::default()
209 };
210 let batches = self.inner.query(&spec)?;
211 Ok(batches_to_stored_triples(&batches))
212 }
213
214 pub fn search_by_keywords(&self, keywords: &[&str]) -> Vec<(StoredTriple, String)> {
216 let spec = QuerySpec::default();
217 let batches = self.inner.query(&spec).unwrap_or_default();
218 let mut results = Vec::new();
219
220 for batch in &batches {
221 let subjects = batch
222 .column(col::SUBJECT)
223 .as_any()
224 .downcast_ref::<StringArray>()
225 .expect("subject column");
226 let predicates = batch
227 .column(col::PREDICATE)
228 .as_any()
229 .downcast_ref::<StringArray>()
230 .expect("predicate column");
231 let objects = batch
232 .column(col::OBJECT)
233 .as_any()
234 .downcast_ref::<StringArray>()
235 .expect("object column");
236
237 for i in 0..batch.num_rows() {
238 let s = subjects.value(i).to_lowercase();
239 let p = predicates.value(i).to_lowercase();
240 let o = objects.value(i).to_lowercase();
241
242 for kw in keywords {
243 let kw_lower = kw.to_lowercase();
244 if s.contains(&kw_lower) || p.contains(&kw_lower) || o.contains(&kw_lower) {
245 results.push((
246 crate::triple_store::extract_stored_triple(batch, i),
247 kw.to_string(),
248 ));
249 break;
250 }
251 }
252 }
253 }
254 results
255 }
256
257 pub fn clear(&mut self) {
259 self.inner.clear();
260 }
261
262 pub fn record_knowledge_gap(
266 &mut self,
267 question: &str,
268 keywords: &[&str],
269 confidence: f64,
270 missing_concepts: &[&str],
271 ) -> usize {
272 let gap = KnowledgeGap {
273 question: question.to_string(),
274 keywords: keywords.iter().map(|s| s.to_string()).collect(),
275 confidence,
276 missing_concepts: missing_concepts.iter().map(|s| s.to_string()).collect(),
277 resolved: false,
278 };
279 self.gaps.push(gap);
280 self.gaps.len() - 1
281 }
282
283 pub fn unresolved_gaps(&self) -> Vec<&KnowledgeGap> {
285 self.gaps.iter().filter(|g| !g.resolved).collect()
286 }
287
288 pub fn resolve_gap(&mut self, index: usize) -> bool {
290 if let Some(gap) = self.gaps.get_mut(index) {
291 gap.resolved = true;
292 true
293 } else {
294 false
295 }
296 }
297
298 pub fn statistics(&self) -> KgStats {
302 let spec = QuerySpec::default();
303 let batches = self.inner.query(&spec).unwrap_or_default();
304 let triples = batches_to_stored_triples(&batches);
305
306 let mut subjects = std::collections::HashSet::new();
307 let mut predicates = std::collections::HashSet::new();
308 let mut objects = std::collections::HashSet::new();
309
310 for t in &triples {
311 subjects.insert(t.subject.clone());
312 predicates.insert(t.predicate.clone());
313 objects.insert(t.object.clone());
314 }
315
316 KgStats {
317 total_triples: triples.len(),
318 unique_subjects: subjects.len(),
319 unique_predicates: predicates.len(),
320 unique_objects: objects.len(),
321 namespace_count: self.prefixes.len(),
322 }
323 }
324
325 pub fn len(&self) -> usize {
327 self.inner.len()
328 }
329
330 pub fn is_empty(&self) -> bool {
332 self.inner.is_empty()
333 }
334
335 pub fn inner(&self) -> &ArrowGraphStore {
337 &self.inner
338 }
339
340 pub fn inner_mut(&mut self) -> &mut ArrowGraphStore {
342 &mut self.inner
343 }
344}
345
346impl Default for KgStore {
347 fn default() -> Self {
348 Self::new()
349 }
350}
351
352#[cfg(test)]
353mod tests {
354 use super::*;
355
356 #[test]
357 fn test_prefix_expand() {
358 let store = KgStore::new();
359 assert_eq!(
360 store.expand_uri("rdf:type"),
361 "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
362 );
363 assert_eq!(store.expand_uri("no_prefix"), "no_prefix");
364 }
365
366 #[test]
367 fn test_prefix_compact() {
368 let store = KgStore::new();
369 assert_eq!(
370 store.compact_uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
371 "rdf:type"
372 );
373 assert_eq!(
374 store.compact_uri("http://unknown/foo"),
375 "http://unknown/foo"
376 );
377 }
378
379 #[test]
380 fn test_add_with_prefix_expansion() {
381 let mut store = KgStore::new();
382 store.bind_prefix("ex", "http://example.org/");
383 store
384 .add_triple("ex:Alice", "rdf:type", "ex:Person", None, 1.0)
385 .unwrap();
386
387 let results = store
388 .query(Some("http://example.org/Alice"), None, None)
389 .unwrap();
390 assert_eq!(results.len(), 1);
391 assert_eq!(
392 results[0].predicate,
393 "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
394 );
395 }
396
397 #[test]
398 fn test_query_with_prefix() {
399 let mut store = KgStore::new();
400 store.bind_prefix("ex", "http://example.org/");
401 store
402 .add_triple("ex:Alice", "rdf:type", "ex:Person", None, 1.0)
403 .unwrap();
404
405 let results = store.query(Some("ex:Alice"), None, None).unwrap();
406 assert_eq!(results.len(), 1);
407 }
408
409 #[test]
410 fn test_keyword_search() {
411 let mut store = KgStore::new();
412 store
413 .add_triple("Alice", "knows", "Bob", None, 1.0)
414 .unwrap();
415 store
416 .add_triple("Carol", "likes", "Dave", None, 1.0)
417 .unwrap();
418
419 let results = store.search_by_keywords(&["Alice"]);
420 assert_eq!(results.len(), 1);
421 assert_eq!(results[0].1, "Alice");
422 }
423
424 #[test]
425 fn test_keyword_search_case_insensitive() {
426 let mut store = KgStore::new();
427 store
428 .add_triple("Alice", "knows", "Bob", None, 1.0)
429 .unwrap();
430
431 assert_eq!(store.search_by_keywords(&["alice"]).len(), 1);
432 assert_eq!(store.search_by_keywords(&["ALICE"]).len(), 1);
433 }
434
435 #[test]
436 fn test_knowledge_gaps() {
437 let mut store = KgStore::new();
438 let idx = store.record_knowledge_gap(
439 "What is photosynthesis?",
440 &["photosynthesis", "plants"],
441 0.3,
442 &["chloroplast"],
443 );
444
445 assert_eq!(store.unresolved_gaps().len(), 1);
446 assert!(store.resolve_gap(idx));
447 assert_eq!(store.unresolved_gaps().len(), 0);
448 }
449
450 #[test]
451 fn test_bulk_add() {
452 let mut store = KgStore::new();
453 store.bind_prefix("ex", "http://example.org/");
454 let ids = store
455 .add_triples(
456 &[
457 ("ex:A", "rdf:type", "ex:Person", 1.0),
458 ("ex:B", "rdf:type", "ex:Person", 1.0),
459 ("ex:C", "rdf:type", "ex:Person", 1.0),
460 ],
461 Some("bulk_import"),
462 )
463 .unwrap();
464 assert_eq!(ids.len(), 3);
465 assert_eq!(store.len(), 3);
466 }
467
468 #[test]
469 fn test_statistics() {
470 let mut store = KgStore::new();
471 store.add_triple("s1", "p1", "o1", None, 1.0).unwrap();
472 store.add_triple("s2", "p1", "o2", None, 1.0).unwrap();
473
474 let stats = store.statistics();
475 assert_eq!(stats.total_triples, 2);
476 assert_eq!(stats.unique_subjects, 2);
477 assert_eq!(stats.unique_predicates, 1);
478 assert!(stats.namespace_count >= 6); }
480
481 #[test]
482 fn test_with_config() {
483 let store = KgStore::with_config(&["world", "code"], "world", Some(1));
484 assert!(store.is_empty());
485 }
486}