#[cfg(feature = "medical")]
pub mod medical_types;
#[cfg(feature = "medical")]
pub use medical_types::*;
#[cfg(feature = "hgnc")]
pub mod hgnc;
pub mod capability;
pub use capability::*;
pub mod mcp_tool;
pub use mcp_tool::*;
pub mod procedure;
pub use procedure::*;
pub mod persona;
pub use persona::{CharacteristicDef, PersonaDefinition, PersonaLoadError, SfiaSkillDef};
use ahash::AHashMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashSet;
use std::collections::hash_map::Iter;
use std::fmt::{self, Display, Formatter};
use std::iter::IntoIterator;
use std::ops::{Deref, DerefMut};
use std::sync::atomic::{AtomicU64, Ordering};
static INT_SEQ: AtomicU64 = AtomicU64::new(1);
fn get_int_id() -> u64 {
INT_SEQ.fetch_add(1, Ordering::SeqCst)
}
use schemars::JsonSchema;
use std::str::FromStr;
#[cfg(feature = "typescript")]
use tsify::Tsify;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, JsonSchema)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct RoleName {
pub original: String,
pub lowercase: String,
}
impl RoleName {
pub fn new(name: &str) -> Self {
RoleName {
original: name.to_string(),
lowercase: name.to_lowercase(),
}
}
pub fn as_lowercase(&self) -> &str {
&self.lowercase
}
pub fn as_str(&self) -> &str {
&self.original
}
}
impl fmt::Display for RoleName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.original)
}
}
impl FromStr for RoleName {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(RoleName::new(s))
}
}
impl From<&str> for RoleName {
fn from(s: &str) -> Self {
RoleName::new(s)
}
}
impl From<String> for RoleName {
fn from(s: String) -> Self {
RoleName::new(&s)
}
}
impl Serialize for RoleName {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.original)
}
}
impl<'de> Deserialize<'de> for RoleName {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(RoleName::new(&s))
}
}
#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct NormalizedTermValue(String);
impl NormalizedTermValue {
pub fn new(term: String) -> Self {
let value = term.trim().to_lowercase();
Self(value)
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl From<String> for NormalizedTermValue {
fn from(term: String) -> Self {
Self::new(term)
}
}
impl From<&str> for NormalizedTermValue {
fn from(term: &str) -> Self {
Self::new(term.to_string())
}
}
impl Display for NormalizedTermValue {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl AsRef<[u8]> for NormalizedTermValue {
fn as_ref(&self) -> &[u8] {
self.0.as_bytes()
}
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct NormalizedTerm {
pub id: u64,
#[serde(rename = "nterm")]
pub value: NormalizedTermValue,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub display_value: Option<String>,
pub url: Option<String>,
}
impl NormalizedTerm {
pub fn new(id: u64, value: NormalizedTermValue) -> Self {
Self {
id,
value,
display_value: None,
url: None,
}
}
pub fn with_auto_id(value: NormalizedTermValue) -> Self {
Self {
id: get_int_id(),
value,
display_value: None,
url: None,
}
}
pub fn with_display_value(mut self, display_value: String) -> Self {
self.display_value = Some(display_value);
self
}
pub fn with_url(mut self, url: String) -> Self {
self.url = Some(url);
self
}
pub fn display(&self) -> &str {
self.display_value
.as_deref()
.unwrap_or_else(|| self.value.as_str())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Concept {
pub id: u64,
pub value: NormalizedTermValue,
}
impl Concept {
pub fn new(value: NormalizedTermValue) -> Self {
Self {
id: get_int_id(),
value,
}
}
pub fn with_id(id: u64, value: NormalizedTermValue) -> Self {
Self { id, value }
}
}
impl From<String> for Concept {
fn from(concept: String) -> Self {
let concept = NormalizedTermValue::new(concept);
Self::new(concept)
}
}
impl Display for Concept {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.value)
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DocumentType {
#[default]
KgEntry,
Document,
ConfigDocument,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RouteDirective {
pub provider: String,
pub model: String,
#[serde(default)]
pub action: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct MarkdownDirectives {
#[serde(default)]
pub doc_type: DocumentType,
#[serde(default)]
pub synonyms: Vec<String>,
#[serde(default)]
pub route: Option<RouteDirective>,
#[serde(default)]
pub routes: Vec<RouteDirective>,
#[serde(default)]
pub priority: Option<u8>,
#[serde(default)]
pub trigger: Option<String>,
#[serde(default)]
pub pinned: bool,
#[serde(default)]
pub heading: Option<String>,
}
#[derive(Deserialize, Serialize, Debug, Clone, Default)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct Document {
pub id: String,
pub url: String,
pub title: String,
pub body: String,
pub description: Option<String>,
pub summarization: Option<String>,
pub stub: Option<String>,
pub tags: Option<Vec<String>>,
pub rank: Option<u64>,
pub source_haystack: Option<String>,
#[serde(default)]
pub doc_type: DocumentType,
#[serde(default)]
pub synonyms: Option<Vec<String>>,
#[serde(default)]
pub route: Option<RouteDirective>,
#[serde(default)]
pub priority: Option<u8>,
}
impl fmt::Display for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.title, self.body)?;
if let Some(ref description) = self.description {
write!(f, " {}", description)?;
}
if let Some(ref summarization) = self.summarization {
if Some(summarization) != self.description.as_ref() {
write!(f, " {}", summarization)?;
}
}
Ok(())
}
}
impl Document {
pub fn with_source_haystack(mut self, haystack_location: String) -> Self {
self.source_haystack = Some(haystack_location);
self
}
pub fn get_source_haystack(&self) -> Option<&String> {
self.source_haystack.as_ref()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Edge {
pub id: u64,
pub rank: u64,
pub doc_hash: AHashMap<String, u64>,
#[cfg(feature = "medical")]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub edge_type: Option<medical_types::MedicalEdgeType>,
}
impl Edge {
pub fn new(id: u64, document_id: String) -> Self {
let mut doc_hash = AHashMap::new();
doc_hash.insert(document_id, 1);
Self {
id,
rank: 1,
doc_hash,
#[cfg(feature = "medical")]
edge_type: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Node {
pub id: u64,
pub rank: u64,
pub connected_with: HashSet<u64>,
#[cfg(feature = "medical")]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub node_type: Option<medical_types::MedicalNodeType>,
#[cfg(feature = "medical")]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub term: Option<String>,
#[cfg(feature = "medical")]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub snomed_id: Option<u64>,
}
impl Node {
pub fn new(id: u64, edge: Edge) -> Self {
let mut connected_with = HashSet::new();
connected_with.insert(edge.id);
Self {
id,
rank: 1,
connected_with,
#[cfg(feature = "medical")]
node_type: None,
#[cfg(feature = "medical")]
term: None,
#[cfg(feature = "medical")]
snomed_id: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct Thesaurus {
name: String,
data: AHashMap<NormalizedTermValue, NormalizedTerm>,
}
impl Thesaurus {
pub fn new(name: String) -> Self {
Self {
name,
data: AHashMap::new(),
}
}
pub fn name(&self) -> &str {
&self.name
}
pub fn insert(&mut self, key: NormalizedTermValue, value: NormalizedTerm) {
self.data.insert(key, value);
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn get(&self, key: &NormalizedTermValue) -> Option<&NormalizedTerm> {
self.data.get(key)
}
pub fn keys(
&self,
) -> std::collections::hash_map::Keys<'_, NormalizedTermValue, NormalizedTerm> {
self.data.keys()
}
}
impl<'a> IntoIterator for &'a Thesaurus {
type Item = (&'a NormalizedTermValue, &'a NormalizedTerm);
type IntoIter = Iter<'a, NormalizedTermValue, NormalizedTerm>;
fn into_iter(self) -> Self::IntoIter {
self.data.iter()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Index {
inner: AHashMap<String, Document>,
}
impl Default for Index {
fn default() -> Self {
Self::new()
}
}
impl Index {
pub fn new() -> Self {
Self {
inner: AHashMap::new(),
}
}
pub fn get_documents(&self, docs: Vec<IndexedDocument>) -> Vec<Document> {
let mut documents: Vec<Document> = Vec::new();
for doc in docs {
log::trace!("doc: {:#?}", doc);
if let Some(document) = self.get_document(&doc) {
let mut document = document;
document.tags = Some(doc.tags.clone());
document.rank = Some(doc.rank);
documents.push(document.clone());
} else {
log::warn!("Document not found in cache. Cannot convert.");
}
}
documents
}
pub fn get_all_documents(&self) -> Vec<Document> {
let documents: Vec<Document> = self.values().cloned().collect::<Vec<Document>>();
documents
}
pub fn get_document(&self, doc: &IndexedDocument) -> Option<Document> {
if let Some(document) = self.inner.get(&doc.id).cloned() {
let mut document = document;
document.tags = Some(doc.tags.clone());
document.rank = Some(doc.rank);
Some(document)
} else {
None
}
}
}
impl Deref for Index {
type Target = AHashMap<String, Document>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl DerefMut for Index {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
impl IntoIterator for Index {
type Item = (String, Document);
type IntoIter = std::collections::hash_map::IntoIter<String, Document>;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
pub struct QualityScore {
pub knowledge: Option<f64>,
pub learning: Option<f64>,
pub synthesis: Option<f64>,
}
impl QualityScore {
pub fn composite(&self) -> f64 {
let mut sum = 0.0;
let mut count = 0;
if let Some(k) = self.knowledge {
sum += k;
count += 1;
}
if let Some(l) = self.learning {
sum += l;
count += 1;
}
if let Some(s) = self.synthesis {
sum += s;
count += 1;
}
if count == 0 { 0.0 } else { sum / count as f64 }
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct IndexedDocument {
pub id: String,
pub matched_edges: Vec<Edge>,
pub rank: u64,
pub tags: Vec<String>,
pub nodes: Vec<u64>,
#[serde(default)]
pub quality_score: Option<QualityScore>,
}
impl IndexedDocument {
pub fn to_json_string(&self) -> Result<String, serde_json::Error> {
serde_json::to_string(&self)
}
pub fn from_document(document: Document) -> Self {
IndexedDocument {
id: document.id,
matched_edges: Vec::new(),
rank: 0,
tags: document.tags.unwrap_or_default(),
nodes: Vec::new(),
quality_score: None,
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum LogicalOperator {
#[serde(rename = "and")]
And,
#[serde(rename = "or")]
Or,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, JsonSchema)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum Layer {
#[serde(rename = "1")]
#[default]
One,
#[serde(rename = "2")]
Two,
#[serde(rename = "3")]
Three,
}
impl Layer {
pub fn from_u8(value: u8) -> Option<Self> {
match value {
1 => Some(Layer::One),
2 => Some(Layer::Two),
3 => Some(Layer::Three),
_ => None,
}
}
pub fn includes_content(&self) -> bool {
matches!(self, Layer::Two | Layer::Three)
}
pub fn includes_full_content(&self) -> bool {
matches!(self, Layer::Three)
}
}
impl std::fmt::Display for Layer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Layer::One => write!(f, "1"),
Layer::Two => write!(f, "2"),
Layer::Three => write!(f, "3"),
}
}
}
pub fn extract_first_paragraph(body: &str) -> String {
let content = if body.trim_start().starts_with("---") {
if let Some(end_pos) = body[3..].find("---") {
&body[end_pos + 6..] } else {
body
}
} else {
body
};
for line in content.lines() {
let trimmed = line.trim();
if !trimmed.is_empty() {
return trimmed.to_string();
}
}
String::new()
}
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct SearchQuery {
#[serde(alias = "query")]
pub search_term: NormalizedTermValue,
pub search_terms: Option<Vec<NormalizedTermValue>>,
pub operator: Option<LogicalOperator>,
pub skip: Option<usize>,
pub limit: Option<usize>,
pub role: Option<RoleName>,
#[serde(default)]
pub layer: Layer,
#[serde(default)]
pub include_pinned: bool,
}
impl SearchQuery {
pub fn get_all_terms(&self) -> Vec<&NormalizedTermValue> {
if let Some(ref multiple_terms) = self.search_terms {
let mut all_terms: Vec<&NormalizedTermValue> =
Vec::with_capacity(1 + multiple_terms.len());
all_terms.push(&self.search_term);
for term in multiple_terms.iter() {
if term.as_str() != self.search_term.as_str() {
all_terms.push(term);
}
}
all_terms
} else {
vec![&self.search_term]
}
}
pub fn is_multi_term_query(&self) -> bool {
self.search_terms.is_some() && !self.search_terms.as_ref().unwrap().is_empty()
}
pub fn get_operator(&self) -> LogicalOperator {
self.operator
.as_ref()
.unwrap_or(&LogicalOperator::Or)
.clone()
}
pub fn with_terms_and_operator(
primary_term: NormalizedTermValue,
additional_terms: Vec<NormalizedTermValue>,
operator: LogicalOperator,
role: Option<RoleName>,
) -> Self {
Self {
search_term: primary_term,
search_terms: Some(additional_terms),
operator: Some(operator),
skip: None,
limit: None,
role,
layer: Layer::default(),
include_pinned: false,
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, JsonSchema, Default)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum RelevanceFunction {
#[serde(rename = "terraphim-graph")]
TerraphimGraph,
#[default]
#[serde(rename = "title-scorer")]
TitleScorer,
#[serde(rename = "bm25")]
BM25,
#[serde(rename = "bm25f")]
BM25F,
#[serde(rename = "bm25plus")]
BM25Plus,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum KnowledgeGraphInputType {
#[serde(rename = "markdown")]
Markdown,
#[serde(rename = "json")]
Json,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ConversationId(pub String);
impl ConversationId {
pub fn new() -> Self {
Self(uuid::Uuid::new_v4().to_string())
}
pub fn from_string(id: String) -> Self {
Self(id)
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl Default for ConversationId {
fn default() -> Self {
Self::new()
}
}
impl Display for ConversationId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum ContextType {
System,
UserInput,
Document,
SearchResult,
External,
KGTermDefinition,
KGIndex,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct MessageId(pub String);
impl MessageId {
pub fn new() -> Self {
Self(uuid::Uuid::new_v4().to_string())
}
pub fn from_string(id: String) -> Self {
Self(id)
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl Default for MessageId {
fn default() -> Self {
Self::new()
}
}
impl Display for MessageId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ContextItem {
pub id: String,
pub context_type: ContextType,
pub title: String,
pub summary: Option<String>,
pub content: String,
pub metadata: AHashMap<String, String>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub relevance_score: Option<f64>,
}
impl ContextItem {
pub fn from_document(document: &Document) -> Self {
let mut metadata = AHashMap::new();
metadata.insert("source_type".to_string(), "document".to_string());
metadata.insert("document_id".to_string(), document.id.clone());
if !document.url.is_empty() {
metadata.insert("url".to_string(), document.url.clone());
}
if let Some(tags) = &document.tags {
metadata.insert("tags".to_string(), tags.join(", "));
}
if let Some(rank) = document.rank {
metadata.insert("rank".to_string(), rank.to_string());
}
Self {
id: uuid::Uuid::new_v4().to_string(),
context_type: ContextType::Document,
title: if document.title.is_empty() {
document.id.clone()
} else {
document.title.clone()
},
summary: document.description.clone(),
content: format!(
"Title: {}\n\n{}\n\n{}",
document.title,
document.description.as_deref().unwrap_or(""),
document.body
),
metadata,
created_at: chrono::Utc::now(),
relevance_score: document.rank.map(|r| r as f64),
}
}
pub fn from_search_result(query: &str, documents: &[Document]) -> Self {
let mut metadata = AHashMap::new();
metadata.insert("source_type".to_string(), "search_result".to_string());
metadata.insert("query".to_string(), query.to_string());
metadata.insert("result_count".to_string(), documents.len().to_string());
let content = if documents.is_empty() {
format!("Search query: '{}'\nNo results found.", query)
} else {
let mut content = format!("Search query: '{}'\nResults:\n\n", query);
for (i, doc) in documents.iter().take(5).enumerate() {
content.push_str(&format!(
"{}. {}\n {}\n Rank: {}\n\n",
i + 1,
doc.title,
doc.description.as_deref().unwrap_or("No description"),
doc.rank.unwrap_or(0)
));
}
if documents.len() > 5 {
content.push_str(&format!("... and {} more results\n", documents.len() - 5));
}
content
};
Self {
id: uuid::Uuid::new_v4().to_string(),
context_type: ContextType::Document, title: format!("Search: {}", query),
summary: Some(format!(
"Search results for '{}' - {} documents found",
query,
documents.len()
)),
content,
metadata,
created_at: chrono::Utc::now(),
relevance_score: documents.first().and_then(|d| d.rank.map(|r| r as f64)),
}
}
pub fn from_kg_term_definition(kg_term: &KGTermDefinition) -> Self {
let mut metadata = AHashMap::new();
metadata.insert("source_type".to_string(), "kg_term".to_string());
metadata.insert("term_id".to_string(), kg_term.id.to_string());
metadata.insert(
"normalized_term".to_string(),
kg_term.normalized_term.to_string(),
);
metadata.insert(
"synonyms_count".to_string(),
kg_term.synonyms.len().to_string(),
);
metadata.insert(
"related_terms_count".to_string(),
kg_term.related_terms.len().to_string(),
);
metadata.insert(
"usage_examples_count".to_string(),
kg_term.usage_examples.len().to_string(),
);
if let Some(ref url) = kg_term.url {
metadata.insert("url".to_string(), url.clone());
}
for (key, value) in &kg_term.metadata {
metadata.insert(format!("kg_{}", key), value.clone());
}
let mut content = format!("**Term:** {}\n", kg_term.term);
if let Some(ref definition) = kg_term.definition {
content.push_str(&format!("**Definition:** {}\n", definition));
}
if !kg_term.synonyms.is_empty() {
content.push_str(&format!("**Synonyms:** {}\n", kg_term.synonyms.join(", ")));
}
if !kg_term.related_terms.is_empty() {
content.push_str(&format!(
"**Related Terms:** {}\n",
kg_term.related_terms.join(", ")
));
}
if !kg_term.usage_examples.is_empty() {
content.push_str("**Usage Examples:**\n");
for (i, example) in kg_term.usage_examples.iter().enumerate() {
content.push_str(&format!("{}. {}\n", i + 1, example));
}
}
Self {
id: uuid::Uuid::new_v4().to_string(),
context_type: ContextType::KGTermDefinition,
title: format!("KG Term: {}", kg_term.term),
summary: Some(format!(
"Knowledge Graph term '{}' with {} synonyms and {} related terms",
kg_term.term,
kg_term.synonyms.len(),
kg_term.related_terms.len()
)),
content,
metadata,
created_at: chrono::Utc::now(),
relevance_score: kg_term.relevance_score,
}
}
pub fn from_kg_index(kg_index: &KGIndexInfo) -> Self {
let mut metadata = AHashMap::new();
metadata.insert("source_type".to_string(), "kg_index".to_string());
metadata.insert("kg_name".to_string(), kg_index.name.clone());
metadata.insert("total_terms".to_string(), kg_index.total_terms.to_string());
metadata.insert("total_nodes".to_string(), kg_index.total_nodes.to_string());
metadata.insert("total_edges".to_string(), kg_index.total_edges.to_string());
metadata.insert("source".to_string(), kg_index.source.clone());
metadata.insert(
"last_updated".to_string(),
kg_index.last_updated.to_rfc3339(),
);
if let Some(ref version) = kg_index.version {
metadata.insert("version".to_string(), version.clone());
}
let content = format!(
"**Knowledge Graph Index: {}**\n\n\
**Statistics:**\n\
- Total Terms: {}\n\
- Total Nodes: {}\n\
- Total Edges: {}\n\
- Source: {}\n\
- Last Updated: {}\n\
- Version: {}\n\n\
This context includes the complete knowledge graph index with all terms, \
relationships, and metadata available for reference.",
kg_index.name,
kg_index.total_terms,
kg_index.total_nodes,
kg_index.total_edges,
kg_index.source,
kg_index.last_updated.format("%Y-%m-%d %H:%M:%S UTC"),
kg_index.version.as_deref().unwrap_or("N/A")
);
Self {
id: uuid::Uuid::new_v4().to_string(),
context_type: ContextType::KGIndex,
title: format!("KG Index: {}", kg_index.name),
summary: Some(format!(
"Complete knowledge graph index with {} terms, {} nodes, and {} edges",
kg_index.total_terms, kg_index.total_nodes, kg_index.total_edges
)),
content,
metadata,
created_at: chrono::Utc::now(),
relevance_score: Some(1.0), }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct KGTermDefinition {
pub term: String,
pub normalized_term: NormalizedTermValue,
pub id: u64,
pub definition: Option<String>,
pub synonyms: Vec<String>,
pub related_terms: Vec<String>,
pub usage_examples: Vec<String>,
pub url: Option<String>,
pub metadata: AHashMap<String, String>,
pub relevance_score: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct KGIndexInfo {
pub name: String,
pub total_terms: usize,
pub total_nodes: usize,
pub total_edges: usize,
pub last_updated: chrono::DateTime<chrono::Utc>,
pub source: String,
pub version: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ChatMessage {
pub id: MessageId,
pub role: String, pub content: String,
pub context_items: Vec<ContextItem>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub token_count: Option<u32>,
pub model: Option<String>,
}
impl ChatMessage {
pub fn user(content: String) -> Self {
Self {
id: MessageId::new(),
role: "user".to_string(),
content,
context_items: Vec::new(),
created_at: chrono::Utc::now(),
token_count: None,
model: None,
}
}
pub fn assistant(content: String, model: Option<String>) -> Self {
Self {
id: MessageId::new(),
role: "assistant".to_string(),
content,
context_items: Vec::new(),
created_at: chrono::Utc::now(),
token_count: None,
model,
}
}
pub fn system(content: String) -> Self {
Self {
id: MessageId::new(),
role: "system".to_string(),
content,
context_items: Vec::new(),
created_at: chrono::Utc::now(),
token_count: None,
model: None,
}
}
pub fn add_context(&mut self, context: ContextItem) {
self.context_items.push(context);
}
pub fn add_contexts(&mut self, contexts: Vec<ContextItem>) {
self.context_items.extend(contexts);
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct Conversation {
pub id: ConversationId,
pub title: String,
pub messages: Vec<ChatMessage>,
pub global_context: Vec<ContextItem>,
pub role: RoleName,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
pub metadata: AHashMap<String, String>,
}
impl Conversation {
pub fn new(title: String, role: RoleName) -> Self {
let now = chrono::Utc::now();
Self {
id: ConversationId::new(),
title,
messages: Vec::new(),
global_context: Vec::new(),
role,
created_at: now,
updated_at: now,
metadata: AHashMap::new(),
}
}
pub fn add_message(&mut self, message: ChatMessage) {
self.messages.push(message);
self.updated_at = chrono::Utc::now();
}
pub fn add_global_context(&mut self, context: ContextItem) {
self.global_context.push(context);
self.updated_at = chrono::Utc::now();
}
pub fn estimated_context_length(&self) -> usize {
let message_length: usize = self
.messages
.iter()
.map(|m| {
m.content.len()
+ m.context_items
.iter()
.map(|c| c.content.len())
.sum::<usize>()
})
.sum();
let global_context_length: usize =
self.global_context.iter().map(|c| c.content.len()).sum();
message_length + global_context_length
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ConversationSummary {
pub id: ConversationId,
pub title: String,
pub role: RoleName,
pub message_count: usize,
pub context_count: usize,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
pub preview: Option<String>,
}
impl From<&Conversation> for ConversationSummary {
fn from(conversation: &Conversation) -> Self {
let context_count = conversation.global_context.len()
+ conversation
.messages
.iter()
.map(|m| m.context_items.len())
.sum::<usize>();
let preview = conversation
.messages
.iter()
.find(|m| m.role == "user")
.map(|m| {
if m.content.len() > 100 {
format!("{}...", &m.content[..100])
} else {
m.content.clone()
}
});
Self {
id: conversation.id.clone(),
title: conversation.title.clone(),
role: conversation.role.clone(),
message_count: conversation.messages.len(),
context_count,
created_at: conversation.created_at,
updated_at: conversation.updated_at,
preview,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ContextHistory {
pub used_contexts: Vec<ContextHistoryEntry>,
pub max_entries: usize,
}
impl ContextHistory {
pub fn new(max_entries: usize) -> Self {
Self {
used_contexts: Vec::new(),
max_entries,
}
}
pub fn record_usage(
&mut self,
context_id: &str,
conversation_id: &ConversationId,
usage_type: ContextUsageType,
) {
let entry = ContextHistoryEntry {
context_id: context_id.to_string(),
conversation_id: conversation_id.clone(),
usage_type,
used_at: chrono::Utc::now(),
usage_count: 1,
};
if let Some(existing) = self
.used_contexts
.iter_mut()
.find(|e| e.context_id == context_id && e.conversation_id == *conversation_id)
{
existing.usage_count += 1;
existing.used_at = chrono::Utc::now();
} else {
self.used_contexts.push(entry);
}
if self.used_contexts.len() > self.max_entries {
self.used_contexts.sort_by_key(|e| e.used_at);
self.used_contexts
.drain(0..self.used_contexts.len() - self.max_entries);
}
}
pub fn get_frequent_contexts(&self, limit: usize) -> Vec<&ContextHistoryEntry> {
let mut entries = self.used_contexts.iter().collect::<Vec<_>>();
entries.sort_by_key(|e| std::cmp::Reverse(e.usage_count));
entries.into_iter().take(limit).collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct ContextHistoryEntry {
pub context_id: String,
pub conversation_id: ConversationId,
pub usage_type: ContextUsageType,
pub used_at: chrono::DateTime<chrono::Utc>,
pub usage_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum ContextUsageType {
Manual,
Automatic,
SearchResult,
DocumentReference,
}
#[derive(
Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema, Default,
)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct Priority(pub u8);
impl Priority {
pub fn new(value: u8) -> Self {
Self(value.clamp(0, 100))
}
pub fn value(&self) -> u8 {
self.0
}
pub fn is_high(&self) -> bool {
self.0 >= 80
}
pub fn is_medium(&self) -> bool {
self.0 >= 40 && self.0 < 80
}
pub fn is_low(&self) -> bool {
self.0 < 40
}
pub const MAX: Self = Self(100);
pub const HIGH: Self = Self(80);
pub const MEDIUM: Self = Self(50);
pub const LOW: Self = Self(20);
pub const MIN: Self = Self(0);
}
impl fmt::Display for Priority {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<u8> for Priority {
fn from(value: u8) -> Self {
Self::new(value)
}
}
impl From<i32> for Priority {
fn from(value: i32) -> Self {
Self::new(value as u8)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct RoutingRule {
pub id: String,
pub name: String,
pub pattern: String,
pub priority: Priority,
pub provider: String,
pub model: String,
pub description: Option<String>,
pub tags: Vec<String>,
pub enabled: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
impl RoutingRule {
pub fn new(
id: String,
name: String,
pattern: String,
priority: Priority,
provider: String,
model: String,
) -> Self {
let now = chrono::Utc::now();
Self {
id,
name,
pattern,
priority,
provider,
model,
description: None,
tags: Vec::new(),
enabled: true,
created_at: now,
updated_at: now,
}
}
pub fn with_defaults(
id: String,
name: String,
pattern: String,
provider: String,
model: String,
) -> Self {
Self::new(id, name, pattern, Priority::MEDIUM, provider, model)
}
pub fn with_description(mut self, description: String) -> Self {
self.description = Some(description);
self
}
pub fn with_tag(mut self, tag: String) -> Self {
self.tags.push(tag);
self
}
pub fn with_enabled(mut self, enabled: bool) -> Self {
self.enabled = enabled;
self
}
pub fn touch(&mut self) {
self.updated_at = chrono::Utc::now();
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct PatternMatch {
pub concept: String,
pub provider: String,
pub model: String,
pub score: f64,
pub priority: Priority,
pub weighted_score: f64,
pub rule_id: String,
}
impl PatternMatch {
pub fn new(
concept: String,
provider: String,
model: String,
score: f64,
priority: Priority,
rule_id: String,
) -> Self {
let priority_factor = priority.value() as f64 / 100.0;
let weighted_score = score * priority_factor;
Self {
concept,
provider,
model,
score,
priority,
weighted_score,
rule_id,
}
}
pub fn simple(concept: String, provider: String, model: String, score: f64) -> Self {
Self::new(
concept,
provider,
model,
score,
Priority::MEDIUM,
"default".to_string(),
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct RoutingDecision {
pub provider: String,
pub model: String,
pub scenario: RoutingScenario,
pub priority: Priority,
pub confidence: f64,
pub rule_id: Option<String>,
pub reason: String,
}
impl RoutingDecision {
pub fn new(
provider: String,
model: String,
scenario: RoutingScenario,
priority: Priority,
confidence: f64,
reason: String,
) -> Self {
Self {
provider,
model,
scenario,
priority,
confidence,
rule_id: None,
reason,
}
}
pub fn with_rule(
provider: String,
model: String,
scenario: RoutingScenario,
priority: Priority,
confidence: f64,
rule_id: String,
reason: String,
) -> Self {
Self {
provider,
model,
scenario,
priority,
confidence,
rule_id: Some(rule_id),
reason,
}
}
pub fn default(provider: String, model: String) -> Self {
Self::new(
provider,
model,
RoutingScenario::Default,
Priority::LOW,
0.5,
"Default routing".to_string(),
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, Default)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum RoutingScenario {
#[serde(rename = "default")]
#[default]
Default,
#[serde(rename = "background")]
Background,
#[serde(rename = "think")]
Think,
#[serde(rename = "long_context")]
LongContext,
#[serde(rename = "web_search")]
WebSearch,
#[serde(rename = "image")]
Image,
#[serde(rename = "pattern")]
Pattern(String),
#[serde(rename = "priority")]
Priority,
#[serde(rename = "custom")]
Custom(String),
}
impl fmt::Display for RoutingScenario {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Default => write!(f, "default"),
Self::Background => write!(f, "background"),
Self::Think => write!(f, "think"),
Self::LongContext => write!(f, "long_context"),
Self::WebSearch => write!(f, "web_search"),
Self::Image => write!(f, "image"),
Self::Pattern(concept) => write!(f, "pattern:{}", concept),
Self::Priority => write!(f, "priority"),
Self::Custom(name) => write!(f, "custom:{}", name),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct MultiAgentContext {
pub session_id: String,
pub agents: Vec<AgentInfo>,
pub shared_context: Vec<ContextItem>,
pub agent_contexts: AHashMap<String, Vec<ContextItem>>,
pub agent_communications: Vec<AgentCommunication>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
impl MultiAgentContext {
pub fn new() -> Self {
let now = chrono::Utc::now();
Self {
session_id: uuid::Uuid::new_v4().to_string(),
agents: Vec::new(),
shared_context: Vec::new(),
agent_contexts: AHashMap::new(),
agent_communications: Vec::new(),
created_at: now,
updated_at: now,
}
}
pub fn add_agent(&mut self, agent: AgentInfo) {
self.agents.push(agent.clone());
self.agent_contexts.insert(agent.id, Vec::new());
self.updated_at = chrono::Utc::now();
}
pub fn add_agent_context(&mut self, agent_id: &str, context: ContextItem) {
if let Some(contexts) = self.agent_contexts.get_mut(agent_id) {
contexts.push(context);
self.updated_at = chrono::Utc::now();
}
}
pub fn record_communication(
&mut self,
from_agent: &str,
to_agent: Option<&str>,
message: String,
) {
let communication = AgentCommunication {
from_agent: from_agent.to_string(),
to_agent: to_agent.map(|s| s.to_string()),
message,
timestamp: chrono::Utc::now(),
};
self.agent_communications.push(communication);
self.updated_at = chrono::Utc::now();
}
}
impl Default for MultiAgentContext {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct AgentInfo {
pub id: String,
pub name: String,
pub role: String,
pub capabilities: Vec<String>,
pub model: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub struct AgentCommunication {
pub from_agent: String,
pub to_agent: Option<String>,
pub message: String,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum NormalizationMethod {
#[default]
Exact,
Fuzzy,
GraphRank,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct GroundingMetadata {
pub normalized_uri: Option<String>,
pub normalized_label: Option<String>,
pub normalized_prov: Option<String>,
pub normalized_score: Option<f32>,
pub normalized_method: Option<NormalizationMethod>,
}
impl GroundingMetadata {
pub fn new(
uri: String,
label: String,
prov: String,
score: f32,
method: NormalizationMethod,
) -> Self {
Self {
normalized_uri: Some(uri),
normalized_label: Some(label),
normalized_prov: Some(prov),
normalized_score: Some(score),
normalized_method: Some(method),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CoverageSignal {
pub total_categories: usize,
pub matched_categories: usize,
pub coverage_ratio: f32,
pub threshold: f32,
pub needs_review: bool,
}
impl CoverageSignal {
pub fn compute(categories: &[String], matched: usize, threshold: f32) -> Self {
let total = categories.len();
let ratio = if total > 0 {
matched as f32 / total as f32
} else {
0.0
};
Self {
total_categories: total,
matched_categories: matched,
coverage_ratio: ratio,
threshold,
needs_review: ratio < threshold,
}
}
}
#[cfg(feature = "medical")]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EntityType {
CancerDiagnosis,
Tumor,
GenomicVariant,
Biomarker,
Drug,
Treatment,
SideEffect,
}
#[cfg(feature = "medical")]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum RelationshipType {
HasTumor,
HasVariant,
HasBiomarker,
TreatedWith,
Causes,
HasDiagnosis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedEntity {
pub entity_type: String,
pub raw_value: String,
pub normalized_value: Option<String>,
pub grounding: Option<GroundingMetadata>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedRelationship {
pub relationship_type: String,
pub source: String,
pub target: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchemaSignal {
pub entities: Vec<ExtractedEntity>,
pub relationships: Vec<ExtractedRelationship>,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OntologyEntityType {
pub id: String,
pub label: String,
#[serde(default)]
pub uri_prefix: Option<String>,
#[serde(default)]
pub aliases: Vec<String>,
#[serde(default)]
pub category: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OntologyRelationshipType {
pub id: String,
pub label: String,
pub source_type: String,
pub target_type: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OntologyAntiPattern {
pub id: String,
pub description: String,
pub indicators: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OntologySchema {
pub name: String,
pub version: String,
pub entity_types: Vec<OntologyEntityType>,
#[serde(default)]
pub relationship_types: Vec<OntologyRelationshipType>,
#[serde(default)]
pub anti_patterns: Vec<OntologyAntiPattern>,
}
impl OntologySchema {
pub fn load_from_file(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
let content = std::fs::read_to_string(path)?;
let schema: Self = serde_json::from_str(&content)?;
Ok(schema)
}
pub fn to_thesaurus_entries(&self) -> Vec<(String, String, Option<String>)> {
let mut entries = Vec::new();
for entity_type in &self.entity_types {
let url = entity_type
.uri_prefix
.clone()
.unwrap_or_else(|| format!("kg://{}", entity_type.id));
entries.push((
entity_type.id.clone(),
entity_type.label.clone(),
Some(url.clone()),
));
for alias in &entity_type.aliases {
entries.push((entity_type.id.clone(), alias.clone(), Some(url.clone())));
}
}
entries
}
pub fn category_ids(&self) -> Vec<String> {
self.entity_types.iter().map(|e| e.id.clone()).collect()
}
pub fn uri_for(&self, entity_type_id: &str) -> Option<String> {
self.entity_types
.iter()
.find(|e| e.id == entity_type_id)
.and_then(|e| e.uri_prefix.clone())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_search_query_logical_operators() {
let single_query = SearchQuery {
search_term: NormalizedTermValue::new("rust".to_string()),
search_terms: None,
operator: None,
skip: None,
limit: Some(10),
role: Some(RoleName::new("test")),
layer: Layer::default(),
include_pinned: false,
};
assert!(!single_query.is_multi_term_query());
assert_eq!(single_query.get_all_terms().len(), 1);
assert_eq!(single_query.get_operator(), LogicalOperator::Or);
let and_query = SearchQuery::with_terms_and_operator(
NormalizedTermValue::new("machine".to_string()),
vec![NormalizedTermValue::new("learning".to_string())],
LogicalOperator::And,
Some(RoleName::new("test")),
);
assert!(and_query.is_multi_term_query());
assert_eq!(and_query.get_all_terms().len(), 2);
assert_eq!(and_query.get_operator(), LogicalOperator::And);
let or_query = SearchQuery::with_terms_and_operator(
NormalizedTermValue::new("neural".to_string()),
vec![NormalizedTermValue::new("networks".to_string())],
LogicalOperator::Or,
Some(RoleName::new("test")),
);
assert!(or_query.is_multi_term_query());
assert_eq!(or_query.get_all_terms().len(), 2);
assert_eq!(or_query.get_operator(), LogicalOperator::Or);
}
#[test]
fn test_logical_operator_serialization() {
let and_op = LogicalOperator::And;
let or_op = LogicalOperator::Or;
let and_json = serde_json::to_string(&and_op).unwrap();
let or_json = serde_json::to_string(&or_op).unwrap();
assert_eq!(and_json, "\"and\"");
assert_eq!(or_json, "\"or\"");
let and_deser: LogicalOperator = serde_json::from_str("\"and\"").unwrap();
let or_deser: LogicalOperator = serde_json::from_str("\"or\"").unwrap();
assert_eq!(and_deser, LogicalOperator::And);
assert_eq!(or_deser, LogicalOperator::Or);
}
#[test]
fn test_search_query_serialization() {
let query = SearchQuery {
search_term: NormalizedTermValue::new("test".to_string()),
search_terms: Some(vec![
NormalizedTermValue::new("additional".to_string()),
NormalizedTermValue::new("terms".to_string()),
]),
operator: Some(LogicalOperator::And),
skip: Some(0),
limit: Some(10),
role: Some(RoleName::new("test_role")),
layer: Layer::default(),
include_pinned: false,
};
let json = serde_json::to_string(&query).unwrap();
let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
assert_eq!(query.search_term, deserialized.search_term);
assert_eq!(query.search_terms, deserialized.search_terms);
assert_eq!(query.operator, deserialized.operator);
assert_eq!(query.skip, deserialized.skip);
assert_eq!(query.limit, deserialized.limit);
assert_eq!(query.role, deserialized.role);
}
#[test]
fn test_priority_creation_and_comparison() {
let high = Priority::HIGH;
let medium = Priority::MEDIUM;
let low = Priority::LOW;
let custom = Priority::new(75);
assert_eq!(high.value(), 80);
assert_eq!(medium.value(), 50);
assert_eq!(low.value(), 20);
assert_eq!(custom.value(), 75);
assert!(high.is_high());
assert!(!medium.is_high());
assert!(medium.is_medium());
assert!(low.is_low());
assert!(high > medium);
assert!(medium > low);
assert!(custom > medium);
assert!(custom < high);
let max = Priority::new(150);
assert_eq!(max.value(), 100);
let min = Priority::new(0);
assert_eq!(min.value(), 0);
}
#[test]
fn test_routing_rule_creation() {
let rule = RoutingRule::new(
"test-rule".to_string(),
"Test Rule".to_string(),
"test.*pattern".to_string(),
Priority::HIGH,
"openai".to_string(),
"gpt-4".to_string(),
)
.with_description("A test rule for unit testing".to_string())
.with_tag("test".to_string())
.with_tag("example".to_string());
assert_eq!(rule.id, "test-rule");
assert_eq!(rule.name, "Test Rule");
assert_eq!(rule.pattern, "test.*pattern");
assert_eq!(rule.priority, Priority::HIGH);
assert_eq!(rule.provider, "openai");
assert_eq!(rule.model, "gpt-4");
assert_eq!(
rule.description,
Some("A test rule for unit testing".to_string())
);
assert_eq!(rule.tags, vec!["test", "example"]);
assert!(rule.enabled);
}
#[test]
fn test_routing_rule_defaults() {
let rule = RoutingRule::with_defaults(
"default-rule".to_string(),
"Default Rule".to_string(),
"default".to_string(),
"anthropic".to_string(),
"claude-3-sonnet".to_string(),
);
assert_eq!(rule.priority, Priority::MEDIUM);
assert!(rule.enabled);
assert!(rule.tags.is_empty());
assert!(rule.description.is_none());
}
#[test]
fn test_pattern_match() {
let pattern_match = PatternMatch::new(
"machine-learning".to_string(),
"openai".to_string(),
"gpt-4".to_string(),
0.95,
Priority::HIGH,
"ml-rule".to_string(),
);
assert_eq!(pattern_match.concept, "machine-learning");
assert_eq!(pattern_match.provider, "openai");
assert_eq!(pattern_match.model, "gpt-4");
assert_eq!(pattern_match.score, 0.95);
assert_eq!(pattern_match.priority, Priority::HIGH);
assert_eq!(pattern_match.rule_id, "ml-rule");
assert_eq!(pattern_match.weighted_score, 0.95 * 0.8);
}
#[test]
fn test_pattern_match_simple() {
let simple = PatternMatch::simple(
"test".to_string(),
"anthropic".to_string(),
"claude-3-haiku".to_string(),
0.8,
);
assert_eq!(simple.priority, Priority::MEDIUM);
assert_eq!(simple.rule_id, "default");
assert_eq!(simple.weighted_score, 0.8 * 0.5);
}
#[test]
fn test_routing_decision() {
let decision = RoutingDecision::new(
"openai".to_string(),
"gpt-4".to_string(),
RoutingScenario::Think,
Priority::HIGH,
0.9,
"High priority thinking task".to_string(),
);
assert_eq!(decision.provider, "openai");
assert_eq!(decision.model, "gpt-4");
assert_eq!(decision.scenario, RoutingScenario::Think);
assert_eq!(decision.priority, Priority::HIGH);
assert_eq!(decision.confidence, 0.9);
assert_eq!(decision.reason, "High priority thinking task");
assert!(decision.rule_id.is_none());
}
#[test]
fn test_routing_decision_with_rule() {
let decision = RoutingDecision::with_rule(
"anthropic".to_string(),
"claude-3-sonnet".to_string(),
RoutingScenario::Pattern("web-search".to_string()),
Priority::MEDIUM,
0.85,
"web-rule".to_string(),
"Web search pattern matched".to_string(),
);
assert_eq!(decision.rule_id, Some("web-rule".to_string()));
assert_eq!(
decision.scenario,
RoutingScenario::Pattern("web-search".to_string())
);
}
#[test]
fn test_routing_decision_default() {
let default = RoutingDecision::default("openai".to_string(), "gpt-3.5-turbo".to_string());
assert_eq!(default.provider, "openai");
assert_eq!(default.model, "gpt-3.5-turbo");
assert_eq!(default.scenario, RoutingScenario::Default);
assert_eq!(default.priority, Priority::LOW);
assert_eq!(default.confidence, 0.5);
assert_eq!(default.reason, "Default routing");
}
#[test]
fn test_routing_scenario_serialization() {
let scenarios = vec![
RoutingScenario::Default,
RoutingScenario::Background,
RoutingScenario::Think,
RoutingScenario::LongContext,
RoutingScenario::WebSearch,
RoutingScenario::Image,
RoutingScenario::Pattern("test".to_string()),
RoutingScenario::Priority,
RoutingScenario::Custom("special".to_string()),
];
for scenario in scenarios {
let json = serde_json::to_string(&scenario).unwrap();
let deserialized: RoutingScenario = serde_json::from_str(&json).unwrap();
assert_eq!(scenario, deserialized);
}
}
#[test]
fn test_routing_scenario_display() {
assert_eq!(format!("{}", RoutingScenario::Default), "default");
assert_eq!(format!("{}", RoutingScenario::Think), "think");
assert_eq!(
format!("{}", RoutingScenario::Pattern("ml".to_string())),
"pattern:ml"
);
assert_eq!(
format!("{}", RoutingScenario::Custom("test".to_string())),
"custom:test"
);
}
#[test]
fn test_priority_serialization() {
let priority = Priority::new(75);
let json = serde_json::to_string(&priority).unwrap();
let deserialized: Priority = serde_json::from_str(&json).unwrap();
assert_eq!(priority, deserialized);
assert_eq!(deserialized.value(), 75);
}
#[test]
fn test_routing_rule_serialization() {
let rule = RoutingRule::new(
"serialize-test".to_string(),
"Serialize Test".to_string(),
"test-pattern".to_string(),
Priority::MEDIUM,
"provider".to_string(),
"model".to_string(),
);
let json = serde_json::to_string(&rule).unwrap();
let deserialized: RoutingRule = serde_json::from_str(&json).unwrap();
assert_eq!(rule.id, deserialized.id);
assert_eq!(rule.name, deserialized.name);
assert_eq!(rule.priority, deserialized.priority);
assert_eq!(rule.provider, deserialized.provider);
assert_eq!(rule.model, deserialized.model);
}
#[test]
fn test_document_type_serialization() {
let types = vec![
DocumentType::KgEntry,
DocumentType::Document,
DocumentType::ConfigDocument,
];
for doc_type in types {
let json = serde_json::to_string(&doc_type).unwrap();
let deserialized: DocumentType = serde_json::from_str(&json).unwrap();
assert_eq!(doc_type, deserialized);
}
}
#[test]
fn test_document_defaults_for_new_fields() {
let json = r#"{
"id":"doc-1",
"url":"file:///tmp/doc.md",
"title":"Doc",
"body":"Body"
}"#;
let doc: Document = serde_json::from_str(json).unwrap();
assert_eq!(doc.doc_type, DocumentType::KgEntry);
assert!(doc.synonyms.is_none());
assert!(doc.route.is_none());
assert!(doc.priority.is_none());
}
#[test]
fn test_ontology_schema_deserialize() {
let json = include_str!("../test-fixtures/sample_ontology_schema.json");
let schema: OntologySchema = serde_json::from_str(json).unwrap();
assert_eq!(schema.name, "Publishing Domain Model");
assert_eq!(schema.version, "1.0.0");
assert_eq!(schema.entity_types.len(), 3);
assert_eq!(schema.relationship_types.len(), 1);
assert_eq!(schema.anti_patterns.len(), 1);
}
#[test]
fn test_ontology_schema_to_thesaurus_entries() {
let json = include_str!("../test-fixtures/sample_ontology_schema.json");
let schema: OntologySchema = serde_json::from_str(json).unwrap();
let entries = schema.to_thesaurus_entries();
assert_eq!(entries.len(), 10);
assert!(entries.iter().any(|(_, term, _)| term == "Chapter"));
assert!(entries.iter().any(|(_, term, _)| term == "Concept"));
assert!(entries.iter().any(|(_, term, _)| term == "Knowledge Graph"));
assert!(entries.iter().any(|(_, term, _)| term == "section"));
assert!(entries.iter().any(|(_, term, _)| term == "KG"));
assert!(entries.iter().all(|(_, _, url)| url.is_some()));
}
#[test]
fn test_ontology_schema_category_ids() {
let json = include_str!("../test-fixtures/sample_ontology_schema.json");
let schema: OntologySchema = serde_json::from_str(json).unwrap();
let ids = schema.category_ids();
assert_eq!(ids.len(), 3);
assert!(ids.contains(&"chapter".to_string()));
assert!(ids.contains(&"concept".to_string()));
assert!(ids.contains(&"knowledge_graph".to_string()));
}
#[test]
fn test_ontology_schema_uri_for() {
let json = include_str!("../test-fixtures/sample_ontology_schema.json");
let schema: OntologySchema = serde_json::from_str(json).unwrap();
assert_eq!(
schema.uri_for("chapter"),
Some("https://schema.org/Chapter".to_string())
);
assert_eq!(
schema.uri_for("concept"),
Some("https://schema.org/DefinedTerm".to_string())
);
assert_eq!(schema.uri_for("nonexistent"), None);
}
#[test]
fn test_ontology_schema_minimal() {
let json = r#"{
"name": "Minimal",
"version": "0.1.0",
"entity_types": [
{"id": "item", "label": "Item"}
]
}"#;
let schema: OntologySchema = serde_json::from_str(json).unwrap();
assert_eq!(schema.name, "Minimal");
assert_eq!(schema.entity_types.len(), 1);
assert!(schema.relationship_types.is_empty());
assert!(schema.anti_patterns.is_empty());
assert!(schema.entity_types[0].aliases.is_empty());
assert!(schema.entity_types[0].uri_prefix.is_none());
}
#[test]
fn test_layer_enum() {
let default: Layer = Default::default();
assert_eq!(default, Layer::One);
assert_eq!(Layer::from_u8(1), Some(Layer::One));
assert_eq!(Layer::from_u8(2), Some(Layer::Two));
assert_eq!(Layer::from_u8(3), Some(Layer::Three));
assert_eq!(Layer::from_u8(0), None);
assert_eq!(Layer::from_u8(4), None);
assert_eq!(format!("{}", Layer::One), "1");
assert_eq!(format!("{}", Layer::Two), "2");
assert_eq!(format!("{}", Layer::Three), "3");
assert!(!Layer::One.includes_content());
assert!(Layer::Two.includes_content());
assert!(Layer::Three.includes_content());
assert!(!Layer::One.includes_full_content());
assert!(!Layer::Two.includes_full_content());
assert!(Layer::Three.includes_full_content());
}
#[test]
fn test_extract_first_paragraph_simple() {
let body = "First paragraph here.\n\nSecond paragraph here.";
assert_eq!(extract_first_paragraph(body), "First paragraph here.");
}
#[test]
fn test_extract_first_paragraph_with_yaml_frontmatter() {
let body = "---\ntitle: My Document\ntags: [rust, programming]\n---\n\nThis is the actual first paragraph.\nMore content here.";
assert_eq!(
extract_first_paragraph(body),
"This is the actual first paragraph."
);
}
#[test]
fn test_extract_first_paragraph_empty_lines() {
let body = "\n\n\nFirst paragraph after empty lines.";
assert_eq!(
extract_first_paragraph(body),
"First paragraph after empty lines."
);
}
#[test]
fn test_extract_first_paragraph_single_line() {
let body = "Just one line";
assert_eq!(extract_first_paragraph(body), "Just one line");
}
#[test]
fn test_layer_serialization() {
let query = SearchQuery {
search_term: NormalizedTermValue::new("test".to_string()),
search_terms: None,
operator: None,
skip: None,
limit: None,
role: None,
layer: Layer::Two,
include_pinned: false,
};
let json = serde_json::to_string(&query).unwrap();
assert!(json.contains("\"layer\""));
let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.layer, Layer::Two);
}
#[test]
fn test_quality_score_composite() {
let full_score = QualityScore {
knowledge: Some(0.8),
learning: Some(0.6),
synthesis: Some(0.7),
};
assert!((full_score.composite() - 0.7).abs() < f64::EPSILON);
let partial_score = QualityScore {
knowledge: Some(0.9),
learning: None,
synthesis: Some(0.5),
};
assert!((partial_score.composite() - 0.7).abs() < f64::EPSILON);
let single_score = QualityScore {
knowledge: Some(0.8),
learning: None,
synthesis: None,
};
assert!((single_score.composite() - 0.8).abs() < f64::EPSILON);
let empty_score = QualityScore::default();
assert_eq!(empty_score.composite(), 0.0);
}
#[test]
fn test_quality_score_serialization() {
let score = QualityScore {
knowledge: Some(0.8),
learning: Some(0.6),
synthesis: Some(0.7),
};
let json = serde_json::to_string(&score).unwrap();
assert!(json.contains("0.8"));
assert!(json.contains("0.6"));
assert!(json.contains("0.7"));
let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.knowledge, Some(0.8));
assert_eq!(deserialized.learning, Some(0.6));
assert_eq!(deserialized.synthesis, Some(0.7));
}
#[test]
fn test_quality_score_default_serialization() {
let score = QualityScore::default();
let json = serde_json::to_string(&score).unwrap();
let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
assert!(deserialized.knowledge.is_none());
assert!(deserialized.learning.is_none());
assert!(deserialized.synthesis.is_none());
}
#[test]
fn test_indexed_document_with_quality_score() {
let doc = IndexedDocument {
id: "test-doc-1".to_string(),
matched_edges: vec![],
rank: 10,
tags: vec!["rust".to_string()],
nodes: vec![1, 2],
quality_score: Some(QualityScore {
knowledge: Some(0.8),
learning: Some(0.6),
synthesis: Some(0.7),
}),
};
assert_eq!(doc.id, "test-doc-1");
assert!((doc.quality_score.as_ref().unwrap().composite() - 0.7).abs() < f64::EPSILON);
}
#[test]
fn test_indexed_document_from_document_quality_score_none() {
let doc = Document {
id: "doc-1".to_string(),
url: "https://example.com".to_string(),
title: "Test".to_string(),
body: "Body".to_string(),
description: None,
summarization: None,
stub: None,
tags: None,
rank: None,
source_haystack: None,
doc_type: DocumentType::Document,
synonyms: None,
route: None,
priority: None,
};
let indexed = IndexedDocument::from_document(doc);
assert!(indexed.quality_score.is_none());
}
#[test]
fn test_indexed_document_serialization_backward_compat() {
let json = r#"{
"id": "doc-1",
"matched_edges": [],
"rank": 5,
"tags": ["test"],
"nodes": [1]
}"#;
let doc: IndexedDocument = serde_json::from_str(json).unwrap();
assert_eq!(doc.id, "doc-1");
assert!(doc.quality_score.is_none());
}
}