1use ahash::AHashMap;
2use itertools::Itertools;
3use memoize::memoize;
4use regex::Regex;
5use std::collections::hash_map::Entry;
6use std::sync::Arc;
7use terraphim_types::{
8 Document, Edge, IndexedDocument, Node, NormalizedTermValue, RoleName, Thesaurus,
9};
10use tokio::sync::{Mutex, MutexGuard};
11pub mod input;
12
13#[cfg(feature = "medical")]
14pub mod medical;
15#[cfg(feature = "medical")]
16pub mod medical_loaders;
17#[cfg(feature = "medical")]
18pub mod symbolic_embeddings;
19
20use aho_corasick::{AhoCorasick, MatchKind};
21use unicode_segmentation::UnicodeSegmentation;
22
23#[derive(thiserror::Error, Debug)]
24pub enum Error {
25 #[error("The given node ID was not found")]
26 NodeIdNotFound,
27 #[error("The given Edge ID was not found")]
28 EdgeIdNotFound,
29 #[error("Cannot convert IndexedDocument to JSON: {0}")]
30 JsonConversionError(#[from] serde_json::Error),
31 #[error("Error while driving terraphim automata: {0}")]
32 TerraphimAutomataError(#[from] terraphim_automata::TerraphimAutomataError),
33 #[error("Indexing error: {0}")]
34 AhoCorasickError(#[from] aho_corasick::BuildError),
35}
36
37type Result<T> = std::result::Result<T, Error>;
38
39#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
41pub struct GraphStats {
42 pub node_count: usize,
43 pub edge_count: usize,
44 pub document_count: usize,
45 pub thesaurus_size: usize,
46 pub is_populated: bool,
47}
48
49#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
54pub struct SerializableRoleGraph {
55 pub role: RoleName,
57 pub nodes: AHashMap<u64, Node>,
59 pub edges: AHashMap<u64, Edge>,
61 pub documents: AHashMap<String, IndexedDocument>,
63 pub thesaurus: Thesaurus,
65 pub aho_corasick_values: Vec<u64>,
67 pub ac_reverse_nterm: AHashMap<u64, NormalizedTermValue>,
69}
70
71impl SerializableRoleGraph {
72 pub fn to_json(&self) -> std::result::Result<String, serde_json::Error> {
74 serde_json::to_string(self)
75 }
76
77 pub fn to_json_pretty(&self) -> std::result::Result<String, serde_json::Error> {
79 serde_json::to_string_pretty(self)
80 }
81
82 pub fn from_json(json: &str) -> std::result::Result<Self, serde_json::Error> {
84 serde_json::from_str(json)
85 }
86}
87
88#[derive(Debug, Clone)]
94pub struct RoleGraph {
95 pub role: RoleName,
97 nodes: AHashMap<u64, Node>,
99 edges: AHashMap<u64, Edge>,
101 documents: AHashMap<String, IndexedDocument>,
103 pub thesaurus: Thesaurus,
105 aho_corasick_values: Vec<u64>,
107 pub ac: AhoCorasick,
109 pub ac_reverse_nterm: AHashMap<u64, NormalizedTermValue>,
111}
112
113impl RoleGraph {
114 pub async fn new(role: RoleName, thesaurus: Thesaurus) -> Result<Self> {
116 Self::new_sync(role, thesaurus)
117 }
118
119 pub fn new_sync(role: RoleName, thesaurus: Thesaurus) -> Result<Self> {
125 let (ac, aho_corasick_values, ac_reverse_nterm) = Self::build_aho_corasick(&thesaurus)?;
126
127 Ok(Self {
128 role,
129 nodes: AHashMap::new(),
130 edges: AHashMap::new(),
131 documents: AHashMap::new(),
132 thesaurus,
133 aho_corasick_values,
134 ac,
135 ac_reverse_nterm,
136 })
137 }
138
139 fn build_aho_corasick(
141 thesaurus: &Thesaurus,
142 ) -> Result<(AhoCorasick, Vec<u64>, AHashMap<u64, NormalizedTermValue>)> {
143 let mut keys = Vec::new();
144 let mut values = Vec::new();
145 let mut ac_reverse_nterm = AHashMap::new();
146
147 for (key, normalized_term) in thesaurus {
148 keys.push(key.as_str());
149 values.push(normalized_term.id);
150 ac_reverse_nterm.insert(normalized_term.id, normalized_term.value.clone());
151 }
152
153 let ac = AhoCorasick::builder()
154 .match_kind(MatchKind::LeftmostLongest)
155 .ascii_case_insensitive(true)
156 .build(keys)?;
157
158 Ok((ac, values, ac_reverse_nterm))
159 }
160
161 pub fn rebuild_automata(&mut self) -> Result<()> {
163 let (ac, values, ac_reverse_nterm) = Self::build_aho_corasick(&self.thesaurus)?;
164 self.ac = ac;
165 self.aho_corasick_values = values;
166 self.ac_reverse_nterm = ac_reverse_nterm;
167 Ok(())
168 }
169
170 pub fn to_serializable(&self) -> SerializableRoleGraph {
172 SerializableRoleGraph {
173 role: self.role.clone(),
174 nodes: self.nodes.clone(),
175 edges: self.edges.clone(),
176 documents: self.documents.clone(),
177 thesaurus: self.thesaurus.clone(),
178 aho_corasick_values: self.aho_corasick_values.clone(),
179 ac_reverse_nterm: self.ac_reverse_nterm.clone(),
180 }
181 }
182
183 pub async fn from_serializable(serializable: SerializableRoleGraph) -> Result<Self> {
185 let mut role_graph = RoleGraph {
186 role: serializable.role,
187 nodes: serializable.nodes,
188 edges: serializable.edges,
189 documents: serializable.documents,
190 thesaurus: serializable.thesaurus,
191 aho_corasick_values: serializable.aho_corasick_values,
192 ac: AhoCorasick::new([""])?, ac_reverse_nterm: serializable.ac_reverse_nterm,
194 };
195
196 role_graph.rebuild_automata()?;
198
199 Ok(role_graph)
200 }
201
202 pub fn find_matching_node_ids(&self, text: &str) -> Vec<u64> {
206 log::trace!("Finding matching node IDs for text: '{text}'");
207 self.ac
208 .find_iter(text)
209 .map(|mat| self.aho_corasick_values[mat.pattern()])
210 .collect()
211 }
212
213 pub fn is_all_terms_connected_by_path(&self, text: &str) -> bool {
222 let mut targets = self.find_matching_node_ids(text);
223 targets.sort_unstable();
224 targets.dedup();
225 let k = targets.len();
226 if k <= 1 {
227 return true;
228 }
229
230 let mut adj: AHashMap<u64, ahash::AHashSet<u64>> = AHashMap::new();
232 for (node_id, node) in &self.nodes {
233 let entry = adj.entry(*node_id).or_default();
234 for edge_id in &node.connected_with {
235 if let Some(edge) = self.edges.get(edge_id) {
236 let (a, b) = magic_unpair(edge.id);
237 entry.insert(if a == *node_id { b } else { a });
238 }
239 }
240 }
241
242 if targets
244 .iter()
245 .any(|t| adj.get(t).map(|s| s.is_empty()).unwrap_or(true))
246 {
247 return false;
248 }
249
250 fn dfs(
252 current: u64,
253 remaining: &mut ahash::AHashSet<u64>,
254 adj: &AHashMap<u64, ahash::AHashSet<u64>>,
255 visited_edges: &mut ahash::AHashSet<(u64, u64)>,
256 ) -> bool {
257 if remaining.is_empty() {
258 return true;
259 }
260 if let Some(neighbors) = adj.get(¤t) {
261 for &n in neighbors {
262 let edge = if current < n {
263 (current, n)
264 } else {
265 (n, current)
266 };
267 if visited_edges.contains(&edge) {
268 continue;
269 }
270 let removed = remaining.remove(&n);
271 visited_edges.insert(edge);
272 if dfs(n, remaining, adj, visited_edges) {
273 return true;
274 }
275 visited_edges.remove(&edge);
276 if removed {
277 remaining.insert(n);
278 }
279 }
280 }
281 false
282 }
283
284 for &start in &targets {
286 let mut remaining: ahash::AHashSet<u64> = targets.iter().cloned().collect();
287 remaining.remove(&start);
288 let mut visited_edges: ahash::AHashSet<(u64, u64)> = ahash::AHashSet::new();
289 if dfs(start, &mut remaining, &adj, &mut visited_edges) {
290 return true;
291 }
292 }
293 false
294 }
295
296 pub fn query_graph(
342 &self,
343 query_string: &str,
344 offset: Option<usize>,
345 limit: Option<usize>,
346 ) -> Result<Vec<(String, IndexedDocument)>> {
347 log::debug!("Performing graph query with string: '{query_string}'");
348 let node_ids = self.find_matching_node_ids(query_string);
349
350 if node_ids.is_empty() {
352 log::debug!("No matching terms found in thesaurus for query: '{query_string}'");
353 return Ok(vec![]);
354 }
355
356 if self.nodes.is_empty() {
358 log::debug!("Graph has no nodes yet - no documents have been indexed");
359 return Ok(vec![]);
360 }
361
362 let mut results = AHashMap::new();
363 for node_id in node_ids {
364 let Some(node) = self.nodes.get(&node_id) else {
366 log::trace!(
367 "Node ID {} from thesaurus not found in graph - no documents contain this term yet",
368 node_id
369 );
370 continue;
371 };
372
373 let Some(normalized_term) = self.ac_reverse_nterm.get(&node_id) else {
374 log::warn!(
375 "Node ID {} found in graph but missing from thesaurus reverse lookup",
376 node_id
377 );
378 continue;
379 };
380 log::debug!("Processing node ID: {:?} with rank: {}", node_id, node.rank);
381
382 for edge_id in &node.connected_with {
383 let Some(edge) = self.edges.get(edge_id) else {
384 log::warn!(
385 "Edge ID {} referenced by node {} not found in edges map",
386 edge_id,
387 node_id
388 );
389 continue;
390 };
391 log::trace!("Processing edge ID: {:?} with rank: {}", edge_id, edge.rank);
392
393 for (document_id, document_rank) in &edge.doc_hash {
394 let total_rank = node.rank + edge.rank + document_rank;
396 match results.entry(document_id.clone()) {
397 Entry::Vacant(e) => {
398 e.insert(IndexedDocument {
399 id: document_id.clone(),
400 matched_edges: vec![edge.clone()],
401 rank: total_rank,
402 tags: vec![normalized_term.to_string()],
403 nodes: vec![node_id],
404 });
405 }
406 Entry::Occupied(mut e) => {
407 let doc = e.get_mut();
408 doc.rank += total_rank; doc.matched_edges.push(edge.clone());
410 doc.matched_edges.dedup_by_key(|e| e.id);
412 }
413 }
414 }
415 }
416 }
417
418 let mut ranked_documents = results.into_iter().collect::<Vec<_>>();
419 ranked_documents.sort_by_key(|(_, doc)| std::cmp::Reverse(doc.rank));
420
421 let documents: Vec<_> = ranked_documents
422 .into_iter()
423 .skip(offset.unwrap_or(0))
424 .take(limit.unwrap_or(usize::MAX))
425 .collect();
426
427 log::debug!("Query resulted in {} documents", documents.len());
428 Ok(documents)
429 }
430
431 pub fn query_graph_with_operators(
433 &self,
434 search_terms: &[&str],
435 operator: &terraphim_types::LogicalOperator,
436 offset: Option<usize>,
437 limit: Option<usize>,
438 ) -> Result<Vec<(String, IndexedDocument)>> {
439 use terraphim_types::LogicalOperator;
440
441 log::debug!(
442 "Performing multi-term graph query with {} terms using {:?} operator",
443 search_terms.len(),
444 operator
445 );
446
447 if search_terms.is_empty() {
448 return Ok(vec![]);
449 }
450
451 if search_terms.len() == 1 {
453 return self.query_graph(search_terms[0], offset, limit);
454 }
455
456 if self.nodes.is_empty() {
458 log::debug!("Graph has no nodes yet - no documents have been indexed");
459 return Ok(vec![]);
460 }
461
462 match operator {
463 LogicalOperator::Or => self.query_graph_or(search_terms, offset, limit),
464 LogicalOperator::And => self.query_graph_and(search_terms, offset, limit),
465 }
466 }
467
468 fn query_graph_or(
470 &self,
471 search_terms: &[&str],
472 offset: Option<usize>,
473 limit: Option<usize>,
474 ) -> Result<Vec<(String, IndexedDocument)>> {
475 let mut results = AHashMap::new();
476
477 for term in search_terms {
478 let node_ids = self.find_matching_node_ids(term);
479
480 for node_id in node_ids {
481 let Some(node) = self.nodes.get(&node_id) else {
482 continue;
483 };
484
485 let Some(normalized_term) = self.ac_reverse_nterm.get(&node_id) else {
486 continue;
487 };
488
489 for edge_id in &node.connected_with {
490 let Some(edge) = self.edges.get(edge_id) else {
491 continue;
492 };
493
494 for (document_id, document_rank) in &edge.doc_hash {
495 let total_rank = node.rank + edge.rank + document_rank;
496 match results.entry(document_id.clone()) {
497 Entry::Vacant(e) => {
498 e.insert(IndexedDocument {
499 id: document_id.clone(),
500 matched_edges: vec![edge.clone()],
501 rank: total_rank,
502 tags: vec![normalized_term.to_string()],
503 nodes: vec![node_id],
504 });
505 }
506 Entry::Occupied(mut e) => {
507 let doc = e.get_mut();
508 doc.rank += total_rank;
509 doc.matched_edges.push(edge.clone());
510 doc.matched_edges.dedup_by_key(|e| e.id);
511 if !doc.tags.contains(&normalized_term.to_string()) {
513 doc.tags.push(normalized_term.to_string());
514 }
515 if !doc.nodes.contains(&node_id) {
516 doc.nodes.push(node_id);
517 }
518 }
519 }
520 }
521 }
522 }
523 }
524
525 let mut ranked_documents = results.into_iter().collect::<Vec<_>>();
526 ranked_documents.sort_by_key(|(_, doc)| std::cmp::Reverse(doc.rank));
527
528 let documents: Vec<_> = ranked_documents
529 .into_iter()
530 .skip(offset.unwrap_or(0))
531 .take(limit.unwrap_or(usize::MAX))
532 .collect();
533
534 log::debug!("OR query resulted in {} documents", documents.len());
535 Ok(documents)
536 }
537
538 fn query_graph_and(
540 &self,
541 search_terms: &[&str],
542 offset: Option<usize>,
543 limit: Option<usize>,
544 ) -> Result<Vec<(String, IndexedDocument)>> {
545 let mut term_document_sets: Vec<AHashMap<String, (IndexedDocument, Vec<String>)>> =
547 Vec::new();
548
549 for term in search_terms {
550 let node_ids = if term.contains(' ') {
552 log::debug!("Multi-word term detected: '{}'", term);
553 let phrase_matches = self.find_matching_node_ids(term);
555 if phrase_matches.is_empty() {
556 log::debug!(
557 "No exact phrase match for '{}', trying individual words",
558 term
559 );
560 term.split_whitespace()
562 .flat_map(|word| {
563 log::debug!("Searching for word: '{}'", word);
564 self.find_matching_node_ids(word)
565 })
566 .collect()
567 } else {
568 log::debug!(
569 "Found {} phrase matches for '{}'",
570 phrase_matches.len(),
571 term
572 );
573 phrase_matches
574 }
575 } else {
576 self.find_matching_node_ids(term)
577 };
578
579 log::debug!("Term '{}' matched {} node IDs", term, node_ids.len());
580 let mut term_docs = AHashMap::new();
581
582 for node_id in node_ids {
583 let Some(node) = self.nodes.get(&node_id) else {
584 continue;
585 };
586
587 let Some(normalized_term) = self.ac_reverse_nterm.get(&node_id) else {
588 continue;
589 };
590
591 for edge_id in &node.connected_with {
592 let Some(edge) = self.edges.get(edge_id) else {
593 continue;
594 };
595
596 for (document_id, document_rank) in &edge.doc_hash {
597 let total_rank = node.rank + edge.rank + document_rank;
598 match term_docs.entry(document_id.clone()) {
599 Entry::Vacant(e) => {
600 e.insert((
601 IndexedDocument {
602 id: document_id.clone(),
603 matched_edges: vec![edge.clone()],
604 rank: total_rank,
605 tags: vec![normalized_term.to_string()],
606 nodes: vec![node_id],
607 },
608 vec![term.to_string()],
609 ));
610 }
611 Entry::Occupied(mut e) => {
612 let (doc, terms) = e.get_mut();
613 doc.rank += total_rank;
614 doc.matched_edges.push(edge.clone());
615 doc.matched_edges.dedup_by_key(|e| e.id);
616 if !doc.tags.contains(&normalized_term.to_string()) {
617 doc.tags.push(normalized_term.to_string());
618 }
619 if !doc.nodes.contains(&node_id) {
620 doc.nodes.push(node_id);
621 }
622 if !terms.contains(&term.to_string()) {
623 terms.push(term.to_string());
624 }
625 }
626 }
627 }
628 }
629 }
630 term_document_sets.push(term_docs);
631 }
632
633 if term_document_sets.is_empty() {
635 return Ok(vec![]);
636 }
637
638 let mut final_results = AHashMap::new();
639 let first_set = &term_document_sets[0];
640
641 for (doc_id, (first_doc, first_terms)) in first_set {
642 let mut appears_in_all = true;
644 let mut combined_doc = first_doc.clone();
645 let mut all_matched_terms = first_terms.clone();
646
647 for term_set in &term_document_sets[1..] {
648 if let Some((term_doc, term_matched)) = term_set.get(doc_id) {
649 combined_doc.rank += term_doc.rank;
651 combined_doc
652 .matched_edges
653 .extend(term_doc.matched_edges.clone());
654 combined_doc.matched_edges.dedup_by_key(|e| e.id);
655
656 for tag in &term_doc.tags {
657 if !combined_doc.tags.contains(tag) {
658 combined_doc.tags.push(tag.clone());
659 }
660 }
661
662 for node in &term_doc.nodes {
663 if !combined_doc.nodes.contains(node) {
664 combined_doc.nodes.push(*node);
665 }
666 }
667
668 all_matched_terms.extend(term_matched.clone());
669 } else {
670 appears_in_all = false;
671 break;
672 }
673 }
674
675 if appears_in_all && all_matched_terms.len() == search_terms.len() {
676 final_results.insert(doc_id.clone(), combined_doc);
677 }
678 }
679
680 let mut ranked_documents = final_results.into_iter().collect::<Vec<_>>();
681 ranked_documents.sort_by_key(|(_, doc)| std::cmp::Reverse(doc.rank));
682
683 let documents: Vec<_> = ranked_documents
684 .into_iter()
685 .skip(offset.unwrap_or(0))
686 .take(limit.unwrap_or(usize::MAX))
687 .collect();
688
689 log::debug!(
690 "AND query resulted in {} documents (from {} search terms)",
691 documents.len(),
692 search_terms.len()
693 );
694 Ok(documents)
695 }
696
697 pub fn insert_document(&mut self, document_id: &str, document: Document) {
708 self.documents.insert(
709 document_id.to_string(),
710 IndexedDocument::from_document(document.clone()),
711 );
712 let matches = self.find_matching_node_ids(&document.to_string());
713 for (a, b) in matches.into_iter().tuple_windows() {
714 self.add_or_update_document(document_id, a, b);
715 }
716 }
717
718 pub fn has_document(&self, document_id: &str) -> bool {
720 self.documents.contains_key(document_id)
721 }
722
723 pub fn add_or_update_document(&mut self, document_id: &str, x: u64, y: u64) {
724 let edge = magic_pair(x, y);
725 let edge = self.init_or_update_edge(edge, document_id);
726 self.init_or_update_node(x, &edge);
727 self.init_or_update_node(y, &edge);
728 }
729
730 fn init_or_update_node(&mut self, node_id: u64, edge: &Edge) {
731 match self.nodes.entry(node_id) {
732 Entry::Vacant(_) => {
733 let node = Node::new(node_id, edge.clone());
734 self.nodes.insert(node.id, node);
735 }
736 Entry::Occupied(entry) => {
737 let node = entry.into_mut();
738 node.rank += 1;
739 node.connected_with.insert(edge.id);
740 }
741 };
742 }
743
744 pub fn get_node_count(&self) -> usize {
746 self.nodes.len()
747 }
748
749 pub fn get_edge_count(&self) -> usize {
751 self.edges.len()
752 }
753
754 pub fn get_document_count(&self) -> usize {
756 self.documents.len()
757 }
758
759 pub fn is_graph_populated(&self) -> bool {
761 !self.nodes.is_empty() && !self.edges.is_empty() && !self.documents.is_empty()
762 }
763
764 pub fn get_graph_stats(&self) -> GraphStats {
766 GraphStats {
767 node_count: self.nodes.len(),
768 edge_count: self.edges.len(),
769 document_count: self.documents.len(),
770 thesaurus_size: self.thesaurus.len(),
771 is_populated: self.is_graph_populated(),
772 }
773 }
774
775 pub fn validate_documents(&self) -> Vec<String> {
777 let mut warnings = Vec::new();
778
779 for (doc_id, _indexed_doc) in &self.documents {
780 let has_nodes = self.nodes.values().any(|node| {
782 node.connected_with.iter().any(|edge_id| {
783 self.edges
784 .get(edge_id)
785 .is_some_and(|edge| edge.doc_hash.contains_key(doc_id))
786 })
787 });
788
789 if !has_nodes {
790 warnings.push(format!("Document '{}' did not create any nodes (may have empty body or no thesaurus matches)", doc_id));
791 }
792 }
793
794 warnings
795 }
796
797 pub fn find_document_ids_for_term(&self, term: &str) -> Vec<String> {
799 let node_ids = self.find_matching_node_ids(term);
800 let mut document_ids = std::collections::HashSet::new();
801
802 for node_id in node_ids {
803 if let Some(node) = self.nodes.get(&node_id) {
804 for edge_id in &node.connected_with {
805 if let Some(edge) = self.edges.get(edge_id) {
806 for doc_id in edge.doc_hash.keys() {
807 document_ids.insert(doc_id.clone());
808 }
809 }
810 }
811 }
812 }
813
814 document_ids.into_iter().collect()
815 }
816
817 fn init_or_update_edge(&mut self, edge_key: u64, document_id: &str) -> Edge {
818 match self.edges.entry(edge_key) {
819 Entry::Vacant(_) => {
820 let edge = Edge::new(edge_key, document_id.to_string());
821 self.edges.insert(edge.id, edge.clone());
822 edge
823 }
824 Entry::Occupied(entry) => {
825 let edge = entry.into_mut();
826 *edge.doc_hash.entry(document_id.to_string()).or_insert(1) += 1;
827 edge.clone()
828 }
829 }
830 }
831
832 pub fn get_document(&self, document_id: &str) -> Option<&IndexedDocument> {
834 self.documents.get(document_id)
835 }
836
837 pub fn get_all_documents(&self) -> impl Iterator<Item = (&String, &IndexedDocument)> {
839 self.documents.iter()
840 }
841
842 pub fn document_count(&self) -> usize {
844 self.documents.len()
845 }
846
847 pub fn nodes_map(&self) -> &ahash::AHashMap<u64, Node> {
849 &self.nodes
850 }
851
852 pub fn edges_map(&self) -> &ahash::AHashMap<u64, Edge> {
854 &self.edges
855 }
856}
857
858#[derive(Debug, Clone)]
860pub struct RoleGraphSync {
861 inner: Arc<Mutex<RoleGraph>>,
862}
863
864impl RoleGraphSync {
865 pub async fn lock(&self) -> MutexGuard<'_, RoleGraph> {
867 self.inner.lock().await
868 }
869
870 pub async fn to_json(&self) -> Result<String> {
873 let rolegraph = self.inner.lock().await;
874 let serializable = rolegraph.to_serializable();
875 serializable.to_json().map_err(Error::JsonConversionError)
876 }
877
878 pub async fn to_json_pretty(&self) -> Result<String> {
881 let rolegraph = self.inner.lock().await;
882 let serializable = rolegraph.to_serializable();
883 serializable
884 .to_json_pretty()
885 .map_err(Error::JsonConversionError)
886 }
887
888 pub async fn from_json(json: &str) -> Result<Self> {
890 let serializable =
891 SerializableRoleGraph::from_json(json).map_err(Error::JsonConversionError)?;
892 let rolegraph = RoleGraph::from_serializable(serializable).await?;
893 Ok(Self {
894 inner: Arc::new(Mutex::new(rolegraph)),
895 })
896 }
897
898 pub async fn to_serializable(&self) -> Result<SerializableRoleGraph> {
901 let rolegraph = self.inner.lock().await;
902 Ok(rolegraph.to_serializable())
903 }
904}
905
906impl From<RoleGraph> for RoleGraphSync {
907 fn from(rolegraph: RoleGraph) -> Self {
908 Self {
909 inner: Arc::new(Mutex::new(rolegraph)),
910 }
911 }
912}
913
914#[macro_use]
915extern crate lazy_static;
916lazy_static! {
917 static ref RE: Regex = Regex::new(r"[?!|]\s+").unwrap();
918}
919
920pub fn split_paragraphs(paragraphs: &str) -> Vec<&str> {
921 let sentences = UnicodeSegmentation::split_sentence_bounds(paragraphs);
922 let parts =
923 sentences.flat_map(|sentence| RE.split(sentence.trim_end_matches(char::is_whitespace)));
924 parts
925 .map(|part| part.trim())
926 .filter(|part| !part.is_empty())
927 .collect()
928}
929
930#[memoize(CustomHasher: ahash::AHashMap)]
934pub fn magic_pair(x: u64, y: u64) -> u64 {
935 if x >= y { x * x + x + y } else { y * y + x }
936}
937
938#[memoize(CustomHasher: ahash::AHashMap)]
948pub fn magic_unpair(z: u64) -> (u64, u64) {
949 let q = (z as f64).sqrt().floor() as u64;
950 let l = z - q * q;
951 if l < q { (l, q) } else { (q, l - q) }
952}
953
954#[cfg(test)]
1019mod tests {
1020 use super::*;
1021
1022 use terraphim_automata::{AutomataPath, load_thesaurus};
1023 use tokio::test;
1024 use ulid::Ulid;
1025
1026 async fn load_sample_thesaurus() -> Thesaurus {
1027 load_thesaurus(&AutomataPath::local_example_full())
1028 .await
1029 .unwrap()
1030 }
1031
1032 #[test]
1033 async fn test_split_paragraphs() {
1034 let paragraph = "This is the first sentence.\n\n This is the second sentence. This is the second sentence? This is the second sentence| This is the second sentence!\n\nThis is the third sentence. Mr. John Johnson Jr. was born in the U.S.A but earned his Ph.D. in Israel before joining Nike Inc. as an engineer. He also worked at craigslist.org as a business analyst.";
1035 let sentences = split_paragraphs(paragraph);
1036 assert_eq!(sentences.len(), 9);
1037 assert_eq!(sentences[0], "This is the first sentence.");
1038 assert_eq!(sentences[1], "This is the second sentence.");
1039 assert_eq!(sentences[2], "This is the second sentence?");
1040 assert_eq!(sentences[3], "This is the second sentence");
1041 assert_eq!(sentences[4], "This is the second sentence!");
1042 assert_eq!(sentences[5], "This is the third sentence.");
1043 assert_eq!(sentences[6], "Mr.");
1044 assert_eq!(
1045 sentences[7],
1046 "John Johnson Jr. was born in the U.S.A but earned his Ph.D. in Israel before joining Nike Inc. as an engineer."
1047 );
1048 assert_eq!(
1049 sentences[8],
1050 "He also worked at craigslist.org as a business analyst."
1051 );
1052 }
1053
1054 #[test]
1055 async fn test_find_matching_node_idss() {
1056 let query = "I am a text with the word Life cycle concepts and bar and Trained operators and maintainers, project direction, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1057 let role = "system operator".to_string();
1058 let rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1059 .await
1060 .unwrap();
1061 let matches = rolegraph.find_matching_node_ids(query);
1062 assert_eq!(matches.len(), 7);
1064 }
1065
1066 #[test]
1067 async fn test_find_matching_node_idss_ac_values() {
1068 let query = "life cycle framework I am a text with the word Life cycle concepts and bar and Trained operators and maintainers, project direction, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1069 let role = "system operator".to_string();
1070 let rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1071 .await
1072 .unwrap();
1073 println!("rolegraph: {:?}", rolegraph);
1074 let matches = rolegraph.find_matching_node_ids(query);
1075 println!("matches: {:?}", matches);
1076 for each_match in matches.iter() {
1077 let ac_reverse_nterm = rolegraph.ac_reverse_nterm.get(each_match).unwrap();
1078 println!("{each_match} ac_reverse_nterm: {:?}", ac_reverse_nterm);
1079 }
1080 assert_eq!(
1081 rolegraph.ac_reverse_nterm.get(&matches[0]).unwrap(),
1082 &NormalizedTermValue::new("life cycle models".to_string())
1083 );
1084 }
1085
1086 #[test]
1087 async fn test_terraphim_engineer() {
1088 let role_name = "Terraphim Engineer".to_string();
1089 const DEFAULT_HAYSTACK_PATH: &str = "docs/src/";
1090 let mut docs_path = std::env::current_dir().unwrap();
1091 docs_path.pop();
1092 docs_path.pop();
1093 docs_path = docs_path.join(DEFAULT_HAYSTACK_PATH);
1094 println!("Docs path: {:?}", docs_path);
1095 let engineer_thesaurus_path = docs_path.join("Terraphim Engineer_thesaurus.json");
1096 if !engineer_thesaurus_path.exists() {
1097 eprintln!(
1098 "Engineer thesaurus not found at {:?}; skipping test_terraphim_engineer",
1099 engineer_thesaurus_path
1100 );
1101 return;
1102 }
1103 let automata_path = AutomataPath::from_local(engineer_thesaurus_path);
1104 let thesaurus = load_thesaurus(&automata_path).await.unwrap();
1105 let mut rolegraph = RoleGraph::new(role_name.into(), thesaurus.clone())
1106 .await
1107 .unwrap();
1108 let document_id = Ulid::new().to_string();
1109 let test_document = r#"
1110 This folder is an example of personal knowledge graph used for testing and fixtures
1111 terraphim-graph
1112 "#;
1113 println!("thesaurus: {:?}", thesaurus);
1114 assert_eq!(thesaurus.len(), 10);
1115 let matches = rolegraph.find_matching_node_ids(test_document);
1116 println!("Matches {:?}", matches);
1117 for (a, b) in matches.into_iter().tuple_windows() {
1118 rolegraph.add_or_update_document(&document_id, a, b);
1119 }
1120 let document = Document {
1121 stub: None,
1122 url: "/path/to/document".to_string(),
1123 tags: None,
1124 rank: None,
1125 source_haystack: None,
1126 id: document_id.clone(),
1127 title: "README".to_string(),
1128 body: test_document.to_string(),
1129 description: None,
1130 summarization: None,
1131 doc_type: terraphim_types::DocumentType::KgEntry,
1132 synonyms: None,
1133 route: None,
1134 priority: None,
1135 };
1136 rolegraph.insert_document(&document_id, document);
1137 println!("query with terraphim-graph and service");
1138 let results: Vec<(String, IndexedDocument)> =
1139 match rolegraph.query_graph("terraphim-graph and service", Some(0), Some(10)) {
1140 Ok(results) => results,
1141 Err(Error::NodeIdNotFound) => {
1142 println!("NodeIdNotFound");
1143 Vec::new()
1144 }
1145 Err(e) => {
1146 println!("Error: {:?}", e);
1147 Vec::new()
1148 }
1149 };
1150 println!("results shall be zero: {:#?}", results);
1151
1152 let document_id2 = "document2".to_string();
1153 let test_document2 = r#"
1154 # Terraphim-Graph scorer
1155 Terraphim-Graph (scorer) is using unique graph embeddings, where the rank of the term is defined by number of synonyms connected to the concept.
1156
1157 synonyms:: graph embeddings, graph, knowledge graph based embeddings
1158
1159 Now we will have a concept "Terrpahim Graph Scorer" with synonyms "graph embeddings" and "terraphim-graph". This provides service
1160 "#;
1161 let document2 = Document {
1162 stub: None,
1163 url: "/path/to/document2".to_string(),
1164 tags: None,
1165 rank: None,
1166 source_haystack: None,
1167 id: document_id2.clone(),
1168 title: "terraphim-graph".to_string(),
1169 body: test_document2.to_string(),
1170 description: None,
1171 summarization: None,
1172 doc_type: terraphim_types::DocumentType::KgEntry,
1173 synonyms: None,
1174 route: None,
1175 priority: None,
1176 };
1177 rolegraph.insert_document(&document_id2, document2);
1178 log::debug!("Query graph");
1179 let results: Vec<(String, IndexedDocument)> = rolegraph
1180 .query_graph("terraphim-graph and service", Some(0), Some(10))
1181 .unwrap();
1182 println!("results: {:#?}", results);
1183 let top_result = results.first().unwrap();
1184 println!("Top result {:?} Rank {:?}", top_result.0, top_result.1.rank);
1185 println!("Top result {:#?}", top_result.1);
1186 println!("Nodes {:#?} ", rolegraph.nodes);
1187 println!("Nodes count {:?}", rolegraph.nodes.len());
1188 println!("Edges count {:?}", rolegraph.edges.len());
1189 }
1190
1191 #[test]
1192 async fn test_rolegraph() {
1193 let role = "system operator".to_string();
1194 let mut rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1195 .await
1196 .unwrap();
1197 let document_id = Ulid::new().to_string();
1198 let query = "I am a text with the word Life cycle concepts and bar and Trained operators and maintainers, project direction, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1199 let matches = rolegraph.find_matching_node_ids(query);
1200 for (a, b) in matches.into_iter().tuple_windows() {
1201 rolegraph.add_or_update_document(&document_id, a, b);
1202 }
1203 let document_id2 = Ulid::new().to_string();
1204 let query2 = "I am a text with the word Life cycle concepts and bar and maintainers, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1205 let matches2 = rolegraph.find_matching_node_ids(query2);
1206 for (a, b) in matches2.into_iter().tuple_windows() {
1207 rolegraph.add_or_update_document(&document_id2, a, b);
1208 }
1209 let document_id3 = Ulid::new().to_string();
1210 let query3 = "I am a text with the word Life cycle concepts and bar and maintainers, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1211 let matches3 = rolegraph.find_matching_node_ids(query3);
1212 for (a, b) in matches3.into_iter().tuple_windows() {
1213 rolegraph.add_or_update_document(&document_id3, a, b);
1214 }
1215 let document_id4 = "DocumentID4".to_string();
1216 let query4 = "I am a text with the word Life cycle concepts and bar and maintainers, some bingo words, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1217 let document = Document {
1218 stub: None,
1219 url: "/path/to/document".to_string(),
1220 tags: None,
1221 rank: None,
1222 source_haystack: None,
1223 id: document_id4.clone(),
1224 title: "Life cycle concepts and project direction".to_string(),
1225 body: query4.to_string(),
1226 description: None,
1227 summarization: None,
1228 doc_type: terraphim_types::DocumentType::KgEntry,
1229 synonyms: None,
1230 route: None,
1231 priority: None,
1232 };
1233 rolegraph.insert_document(&document_id4, document);
1234 log::debug!("Query graph");
1235 let results: Vec<(String, IndexedDocument)> = rolegraph
1236 .query_graph(
1237 "Life cycle concepts and project direction",
1238 Some(0),
1239 Some(10),
1240 )
1241 .unwrap();
1242 println!("results: {:#?}", results);
1243 let top_result = results.first().unwrap();
1244 println!("Top result {:?} Rank {:?}", top_result.0, top_result.1.rank);
1245 println!("Top result {:#?}", top_result.1);
1246 assert_eq!(results.len(), 4);
1247 }
1248
1249 #[test]
1250 #[ignore]
1251 async fn test_is_all_terms_connected_by_path_true() {
1252 let role = "system operator".to_string();
1253 let rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1254 .await
1255 .unwrap();
1256 let text = "Life cycle concepts ... Paradigm Map ... project planning";
1257 assert!(rolegraph.is_all_terms_connected_by_path(text));
1258 }
1259
1260 #[test]
1261 async fn test_is_all_terms_connected_by_path_false() {
1262 let role = "system operator".to_string();
1263 let rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1264 .await
1265 .unwrap();
1266 let text = "Trained operators ... bar";
1268 let _ = rolegraph.is_all_terms_connected_by_path(text);
1270 }
1272
1273 #[tokio::test]
1274 async fn test_rolegraph_with_thesaurus_no_node_not_found_errors() {
1275 use terraphim_types::Document;
1276
1277 let role_name = "Test Role".to_string();
1279 let thesaurus = load_sample_thesaurus().await;
1280 let mut rolegraph = RoleGraph::new(role_name.into(), thesaurus.clone())
1281 .await
1282 .expect("Failed to create rolegraph");
1283
1284 assert!(
1286 !rolegraph.thesaurus.is_empty(),
1287 "Thesaurus should not be empty"
1288 );
1289 assert!(
1290 !rolegraph.ac_reverse_nterm.is_empty(),
1291 "Reverse term lookup should be populated"
1292 );
1293 log::info!(
1294 "✅ Loaded thesaurus with {} terms",
1295 rolegraph.thesaurus.len()
1296 );
1297
1298 log::info!("🔍 Testing query on empty graph");
1300 let empty_results = rolegraph
1301 .query_graph("Life cycle concepts", None, Some(10))
1302 .expect("Query on empty graph should not fail");
1303 assert!(
1304 empty_results.is_empty(),
1305 "Empty graph should return no results"
1306 );
1307 log::info!("✅ Empty graph query handled gracefully");
1308
1309 let nonexistent_results = rolegraph
1311 .query_graph("nonexistentterms", None, Some(10))
1312 .expect("Query with non-existent terms should not fail");
1313 assert!(
1314 nonexistent_results.is_empty(),
1315 "Non-existent terms should return no results"
1316 );
1317 log::info!("✅ Non-existent terms query handled gracefully");
1318
1319 let document_text = "I am a text with the word Life cycle concepts and bar and Trained operators and maintainers, project direction, some bingo words Paradigm Map and project planning, then again: some bingo words Paradigm Map and project planning, then repeats: Trained operators and maintainers, project direction";
1321
1322 let test_document = Document {
1324 id: "test_doc".to_string(),
1325 title: "System Engineering Document".to_string(),
1326 body: document_text.to_string(),
1327 url: "/test/document".to_string(),
1328 tags: Some(vec!["engineering".to_string()]),
1329 rank: Some(1),
1330 stub: None,
1331 description: Some("Test document with thesaurus terms".to_string()),
1332 summarization: None,
1333 source_haystack: None,
1334 doc_type: terraphim_types::DocumentType::KgEntry,
1335 synonyms: None,
1336 route: None,
1337 priority: None,
1338 };
1339
1340 rolegraph.insert_document(&test_document.id, test_document.clone());
1342
1343 log::info!("✅ Inserted 1 document into rolegraph");
1344 log::info!(" - Graph now has {} nodes", rolegraph.nodes.len());
1345 log::info!(" - Graph now has {} edges", rolegraph.edges.len());
1346 log::info!(" - Graph now has {} documents", rolegraph.documents.len());
1347
1348 assert!(
1350 !rolegraph.nodes.is_empty(),
1351 "Nodes should be created from document indexing"
1352 );
1353 assert!(
1354 !rolegraph.edges.is_empty(),
1355 "Edges should be created from document indexing"
1356 );
1357 assert_eq!(rolegraph.documents.len(), 1, "1 document should be stored");
1358
1359 let test_queries = vec![
1361 "Life cycle concepts",
1362 "Trained operators",
1363 "Paradigm Map",
1364 "project planning",
1365 ];
1366
1367 for query in test_queries {
1368 log::info!("🔍 Testing query: '{}'", query);
1369 let results = rolegraph
1370 .query_graph(query, None, Some(10))
1371 .unwrap_or_else(|_| panic!("Query '{}' should not fail", query));
1372
1373 log::info!(" - Found {} results", results.len());
1374
1375 if query == "Life cycle concepts"
1377 || query == "Trained operators"
1378 || query == "Paradigm Map"
1379 {
1380 if !results.is_empty() {
1381 log::info!(" ✅ Found expected results for query '{}'", query);
1382 } else {
1383 log::info!(
1384 " ⚠️ No results for '{}' but no error - this is acceptable",
1385 query
1386 );
1387 }
1388 }
1389 }
1390
1391 let document_ids = rolegraph.find_document_ids_for_term("Life cycle concepts");
1393 if !document_ids.is_empty() {
1394 log::info!("✅ Found {} documents for term lookup", document_ids.len());
1395 } else {
1396 log::info!(
1397 "⚠️ No documents found for term lookup - acceptable if term not in indexed docs"
1398 );
1399 }
1400
1401 let original_failing_query = rolegraph
1403 .query_graph("terraphim-graph", None, Some(10))
1404 .expect("Query that previously caused NodeIdNotFound should now work");
1405 log::info!(
1406 "✅ Previously failing query now works - found {} results",
1407 original_failing_query.len()
1408 );
1409
1410 log::info!("🎉 All rolegraph and thesaurus tests completed successfully!");
1411 log::info!("✅ Thesaurus loading: Working");
1412 log::info!("✅ Document indexing: Working");
1413 log::info!("✅ Graph querying: Working (no NodeIdNotFound errors)");
1414 log::info!("✅ Defensive error handling: Working");
1415 }
1416
1417 #[tokio::test]
1418 async fn test_rolegraph_serialization() {
1419 let role = "test role".to_string();
1421 let mut rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1422 .await
1423 .unwrap();
1424
1425 let document_id = Ulid::new().to_string();
1427 let test_document = Document {
1428 id: document_id.clone(),
1429 title: "Test Document".to_string(),
1430 body: "This is a test document with Life cycle concepts and project planning content and operators".to_string(),
1431 url: "/test/document".to_string(),
1432 description: Some("Test document description".to_string()),
1433 tags: Some(vec!["test".to_string(), "serialization".to_string()]),
1434 rank: Some(1),
1435 stub: None,
1436 summarization: None,
1437 source_haystack: None,
1438 doc_type: terraphim_types::DocumentType::KgEntry,
1439 synonyms: None,
1440 route: None,
1441 priority: None,
1442 };
1443
1444 rolegraph.insert_document(&document_id, test_document);
1446
1447 let serializable = rolegraph.to_serializable();
1449 assert_eq!(serializable.role.original, "test role");
1450 assert_eq!(serializable.nodes.len(), rolegraph.nodes.len());
1451 assert_eq!(serializable.edges.len(), rolegraph.edges.len());
1452 assert_eq!(serializable.documents.len(), rolegraph.documents.len());
1453 assert_eq!(serializable.thesaurus.len(), rolegraph.thesaurus.len());
1454 assert!(!serializable.aho_corasick_values.is_empty());
1455 assert!(!serializable.ac_reverse_nterm.is_empty());
1456
1457 let json_str = serializable.to_json().unwrap();
1459 assert!(!json_str.is_empty());
1460
1461 let deserialized = SerializableRoleGraph::from_json(&json_str).unwrap();
1463 assert_eq!(deserialized.role.original, serializable.role.original);
1464 assert_eq!(deserialized.nodes.len(), serializable.nodes.len());
1465 assert_eq!(deserialized.edges.len(), serializable.edges.len());
1466 assert_eq!(deserialized.documents.len(), serializable.documents.len());
1467 assert_eq!(deserialized.thesaurus.len(), serializable.thesaurus.len());
1468 assert_eq!(
1469 deserialized.aho_corasick_values,
1470 serializable.aho_corasick_values
1471 );
1472 assert_eq!(deserialized.ac_reverse_nterm, serializable.ac_reverse_nterm);
1473
1474 let recreated_rolegraph = RoleGraph::from_serializable(deserialized).await.unwrap();
1476 assert_eq!(recreated_rolegraph.role.original, rolegraph.role.original);
1477 assert_eq!(recreated_rolegraph.nodes.len(), rolegraph.nodes.len());
1478 assert_eq!(recreated_rolegraph.edges.len(), rolegraph.edges.len());
1479 assert_eq!(
1480 recreated_rolegraph.documents.len(),
1481 rolegraph.documents.len()
1482 );
1483 assert_eq!(
1484 recreated_rolegraph.thesaurus.len(),
1485 rolegraph.thesaurus.len()
1486 );
1487
1488 let search_results = recreated_rolegraph
1490 .query_graph("Life cycle", None, Some(10))
1491 .unwrap();
1492 println!("Search results count: {}", search_results.len());
1493
1494 let matches = recreated_rolegraph.find_matching_node_ids("Life cycle concepts");
1496 println!("Aho-Corasick matches count: {}", matches.len());
1497
1498 assert_eq!(recreated_rolegraph.role.original, rolegraph.role.original);
1501 }
1502
1503 #[tokio::test]
1504 async fn test_rolegraph_sync_serialization() {
1505 let role = "sync test role".to_string();
1507 let mut rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1508 .await
1509 .unwrap();
1510
1511 let document_id = Ulid::new().to_string();
1513 let test_document = Document {
1514 id: document_id.clone(),
1515 title: "Sync Test Document".to_string(),
1516 body:
1517 "Document content for testing RoleGraphSync serialization with Paradigm Map terms"
1518 .to_string(),
1519 url: "/test/sync_document".to_string(),
1520 description: None,
1521 tags: None,
1522 rank: None,
1523 stub: None,
1524 summarization: None,
1525 source_haystack: None,
1526 doc_type: terraphim_types::DocumentType::KgEntry,
1527 synonyms: None,
1528 route: None,
1529 priority: None,
1530 };
1531
1532 rolegraph.insert_document(&document_id, test_document);
1533 let rolegraph_sync = RoleGraphSync::from(rolegraph);
1534
1535 let json_str = rolegraph_sync.to_json().await.unwrap();
1537 assert!(!json_str.is_empty());
1538
1539 let json_pretty = rolegraph_sync.to_json_pretty().await.unwrap();
1541 assert!(json_pretty.len() > json_str.len()); let restored_sync = RoleGraphSync::from_json(&json_str).await.unwrap();
1545
1546 let rolegraph_guard = restored_sync.lock().await;
1548 assert_eq!(rolegraph_guard.role.original, "sync test role");
1549 assert_eq!(rolegraph_guard.documents.len(), 1);
1550
1551 let search_results = rolegraph_guard
1553 .query_graph("Paradigm Map", None, Some(10))
1554 .unwrap();
1555 println!(
1556 "RoleGraphSync search results count: {}",
1557 search_results.len()
1558 );
1559
1560 assert_eq!(rolegraph_guard.role.original, "sync test role");
1562 }
1563
1564 #[tokio::test]
1565 async fn test_graph_stats_serialization() {
1566 let role = "stats test role".to_string();
1568 let mut rolegraph = RoleGraph::new(role.into(), load_sample_thesaurus().await)
1569 .await
1570 .unwrap();
1571
1572 let document_id = Ulid::new().to_string();
1574 let test_document = Document {
1575 id: document_id.clone(),
1576 title: "Stats Test Document".to_string(),
1577 body: "Test content with Life cycle concepts and operators and maintainers".to_string(),
1578 url: "/test/stats_document".to_string(),
1579 description: None,
1580 tags: None,
1581 rank: None,
1582 stub: None,
1583 summarization: None,
1584 source_haystack: None,
1585 doc_type: terraphim_types::DocumentType::KgEntry,
1586 synonyms: None,
1587 route: None,
1588 priority: None,
1589 };
1590
1591 rolegraph.insert_document(&document_id, test_document);
1592
1593 let stats = rolegraph.get_graph_stats();
1595 assert!(stats.thesaurus_size > 0); println!(
1600 "Stats - nodes: {}, edges: {}, documents: {}, thesaurus: {}, populated: {}",
1601 stats.node_count,
1602 stats.edge_count,
1603 stats.document_count,
1604 stats.thesaurus_size,
1605 stats.is_populated
1606 );
1607
1608 let json_str = serde_json::to_string(&stats).unwrap();
1610 let deserialized_stats: GraphStats = serde_json::from_str(&json_str).unwrap();
1611
1612 assert_eq!(stats.node_count, deserialized_stats.node_count);
1613 assert_eq!(stats.edge_count, deserialized_stats.edge_count);
1614 assert_eq!(stats.document_count, deserialized_stats.document_count);
1615 assert_eq!(stats.thesaurus_size, deserialized_stats.thesaurus_size);
1616 assert_eq!(stats.is_populated, deserialized_stats.is_populated);
1617 }
1618
1619 #[tokio::test]
1620 async fn test_serialization_edge_cases() {
1621 let role = "empty test".to_string();
1623 let empty_thesaurus = Thesaurus::new("empty".to_string());
1624 let empty_rolegraph = RoleGraph::new(role.into(), empty_thesaurus).await.unwrap();
1625
1626 let serializable = empty_rolegraph.to_serializable();
1627 let json = serializable.to_json().unwrap();
1628 let deserialized = SerializableRoleGraph::from_json(&json).unwrap();
1629 let restored = RoleGraph::from_serializable(deserialized).await.unwrap();
1630
1631 assert_eq!(restored.nodes.len(), 0);
1632 assert_eq!(restored.edges.len(), 0);
1633 assert_eq!(restored.documents.len(), 0);
1634 assert_eq!(restored.thesaurus.len(), 0);
1635
1636 let role = "single node test".to_string();
1638 let thesaurus = load_sample_thesaurus().await;
1639 let mut single_rolegraph = RoleGraph::new(role.into(), thesaurus).await.unwrap();
1640
1641 let document_id = Ulid::new().to_string();
1642 let simple_document = Document {
1643 id: document_id.clone(),
1644 title: "Simple".to_string(),
1645 body: "Life cycle concepts and operators".to_string(), url: "/test/simple".to_string(),
1647 description: None,
1648 tags: None,
1649 rank: None,
1650 stub: None,
1651 summarization: None,
1652 source_haystack: None,
1653 doc_type: terraphim_types::DocumentType::KgEntry,
1654 synonyms: None,
1655 route: None,
1656 priority: None,
1657 };
1658
1659 single_rolegraph.insert_document(&document_id, simple_document);
1660
1661 let serializable = single_rolegraph.to_serializable();
1663 let json = serializable.to_json().unwrap();
1664 let deserialized = SerializableRoleGraph::from_json(&json).unwrap();
1665 let restored = RoleGraph::from_serializable(deserialized).await.unwrap();
1666
1667 assert_eq!(restored.documents.len(), 1);
1668 assert_eq!(restored.role.original, "single node test");
1669
1670 println!(
1673 "Single node test - nodes: {}, edges: {}",
1674 restored.nodes.len(),
1675 restored.edges.len()
1676 );
1677 }
1678}