Skip to main content

terraphim_types/
lib.rs

1//! Core type definitions for the Terraphim AI system.
2//!
3//! This crate provides the fundamental data structures used throughout the Terraphim ecosystem:
4//!
5//! - **Knowledge Graph Types**: [`Concept`], [`Node`], [`Edge`], [`Thesaurus`]
6//! - **Document Management**: [`Document`], [`Index`], [`IndexedDocument`]
7//! - **Search Operations**: [`SearchQuery`], [`LogicalOperator`], [`RelevanceFunction`]
8//! - **Conversation Context**: [`Conversation`], [`ChatMessage`], [`ContextItem`]
9//! - **LLM Routing**: [`RoutingRule`], [`RoutingDecision`], [`Priority`]
10//! - **Multi-Agent Coordination**: [`MultiAgentContext`], [`AgentInfo`]
11//! - **Dynamic Ontology**: [`SchemaSignal`], [`ExtractedEntity`], [`CoverageSignal`], [`GroundingMetadata`]
12//! - **HGNC Gene Normalization**: [`HgncGene`], [`HgncNormalizer`]
13//!
14//! # Features
15//!
16//! - `typescript`: Enable TypeScript type generation via tsify for WASM compatibility
17//!
18//! # Examples
19//!
20//! ## Creating a Search Query
21//!
22//! ```
23//! use terraphim_types::{SearchQuery, NormalizedTermValue, Layer, LogicalOperator, RoleName};
24//!
25//! // Simple single-term query
26//! let query = SearchQuery {
27//!     search_term: NormalizedTermValue::from("rust"),
28//!     search_terms: None,
29//!     operator: None,
30//!     skip: None,
31//!     limit: Some(10),
32//!     role: Some(RoleName::new("engineer")),
33//!     layer: Layer::default(),
34//! };
35//!
36//! // Multi-term AND query
37//! let multi_query = SearchQuery::with_terms_and_operator(
38//!     NormalizedTermValue::from("async"),
39//!     vec![NormalizedTermValue::from("programming")],
40//!     LogicalOperator::And,
41//!     Some(RoleName::new("engineer")),
42//! );
43//! ```
44//!
45//! ## Working with Documents
46//!
47//! ```
48//! use terraphim_types::{Document, DocumentType};
49//!
50//! let document = Document {
51//!     id: "doc-1".to_string(),
52//!     url: "https://example.com/article".to_string(),
53//!     title: "Introduction to Rust".to_string(),
54//!     body: "Rust is a systems programming language...".to_string(),
55//!     description: Some("A guide to Rust".to_string()),
56//!     summarization: None,
57//!     stub: None,
58//!     tags: Some(vec!["rust".to_string(), "programming".to_string()]),
59//!     rank: None,
60//!     source_haystack: None,
61//!     doc_type: DocumentType::KgEntry,
62//!     synonyms: None,
63//!     route: None,
64//!     priority: None,
65//! };
66//! ```
67//!
68//! ## Building a Knowledge Graph
69//!
70//! ```
71//! use terraphim_types::{Thesaurus, NormalizedTermValue, NormalizedTerm};
72//!
73//! let mut thesaurus = Thesaurus::new("programming".to_string());
74//! thesaurus.insert(
75//!     NormalizedTermValue::from("rust"),
76//!     NormalizedTerm::with_auto_id(NormalizedTermValue::from("rust programming language"))
77//!         .with_url("https://rust-lang.org".to_string())
78//! );
79//! ```
80
81// Medical types module (feature-gated)
82#[cfg(feature = "medical")]
83pub mod medical_types;
84#[cfg(feature = "medical")]
85pub use medical_types::*;
86
87// HGNC Gene Normalization module (feature-gated)
88#[cfg(feature = "hgnc")]
89pub mod hgnc;
90
91// Capability-based routing types
92pub mod capability;
93pub use capability::*;
94
95// MCP Tool types for self-learning system
96pub mod mcp_tool;
97pub use mcp_tool::*;
98
99// Procedure capture types for self-learning system
100pub mod procedure;
101pub use procedure::*;
102
103// Persona definition types for agent personas
104pub mod persona;
105pub use persona::{CharacteristicDef, PersonaDefinition, PersonaLoadError, SfiaSkillDef};
106
107use ahash::AHashMap;
108use serde::{Deserialize, Deserializer, Serialize, Serializer};
109use std::collections::HashSet;
110use std::collections::hash_map::Iter;
111use std::fmt::{self, Display, Formatter};
112use std::iter::IntoIterator;
113use std::ops::{Deref, DerefMut};
114use std::sync::atomic::{AtomicU64, Ordering};
115static INT_SEQ: AtomicU64 = AtomicU64::new(1);
116fn get_int_id() -> u64 {
117    INT_SEQ.fetch_add(1, Ordering::SeqCst)
118}
119
120use schemars::JsonSchema;
121use std::str::FromStr;
122#[cfg(feature = "typescript")]
123use tsify::Tsify;
124
125/// A role name with case-insensitive lookup support.
126///
127/// Stores both the original casing and a lowercase version for efficient
128/// case-insensitive operations. Roles represent different user profiles or
129/// personas in the Terraphim system, each with specific knowledge domains
130/// and search preferences.
131///
132/// Note: Equality is based on both fields, so two instances with different
133/// original casing are not equal. Use `as_lowercase()` for case-insensitive comparisons.
134///
135/// # Examples
136///
137/// ```
138/// use terraphim_types::RoleName;
139///
140/// let role = RoleName::new("DataScientist");
141/// assert_eq!(role.as_str(), "DataScientist");
142/// assert_eq!(role.as_lowercase(), "datascientist");
143///
144/// // Compare using lowercase for case-insensitive matching
145/// let role2 = RoleName::new("datascientist");
146/// assert_eq!(role.as_lowercase(), role2.as_lowercase());
147/// ```
148#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, JsonSchema)]
149#[cfg_attr(feature = "typescript", derive(Tsify))]
150#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
151pub struct RoleName {
152    /// The original role name preserving the original casing
153    pub original: String,
154    /// Lowercase version for case-insensitive comparisons
155    pub lowercase: String,
156}
157
158impl RoleName {
159    /// Creates a new role name from a string.
160    ///
161    /// # Arguments
162    ///
163    /// * `name` - The role name with any casing
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use terraphim_types::RoleName;
169    ///
170    /// let role = RoleName::new("SoftwareEngineer");
171    /// ```
172    pub fn new(name: &str) -> Self {
173        RoleName {
174            original: name.to_string(),
175            lowercase: name.to_lowercase(),
176        }
177    }
178
179    /// Returns the lowercase version of the role name.
180    ///
181    /// Use this for case-insensitive comparisons.
182    pub fn as_lowercase(&self) -> &str {
183        &self.lowercase
184    }
185
186    /// Returns the original role name with preserved casing.
187    pub fn as_str(&self) -> &str {
188        &self.original
189    }
190}
191
192impl fmt::Display for RoleName {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        write!(f, "{}", self.original)
195    }
196}
197
198impl FromStr for RoleName {
199    type Err = ();
200
201    fn from_str(s: &str) -> Result<Self, Self::Err> {
202        Ok(RoleName::new(s))
203    }
204}
205
206impl From<&str> for RoleName {
207    fn from(s: &str) -> Self {
208        RoleName::new(s)
209    }
210}
211
212impl From<String> for RoleName {
213    fn from(s: String) -> Self {
214        RoleName::new(&s)
215    }
216}
217
218impl Serialize for RoleName {
219    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
220    where
221        S: Serializer,
222    {
223        serializer.serialize_str(&self.original)
224    }
225}
226
227impl<'de> Deserialize<'de> for RoleName {
228    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
229    where
230        D: Deserializer<'de>,
231    {
232        let s = String::deserialize(deserializer)?;
233        Ok(RoleName::new(&s))
234    }
235}
236/// The value of a normalized term
237///
238/// This is a string that has been normalized to lowercase and trimmed.
239#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
240#[cfg_attr(feature = "typescript", derive(Tsify))]
241#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
242pub struct NormalizedTermValue(String);
243
244impl NormalizedTermValue {
245    pub fn new(term: String) -> Self {
246        let value = term.trim().to_lowercase();
247        Self(value)
248    }
249    // convert to &str
250    pub fn as_str(&self) -> &str {
251        &self.0
252    }
253}
254
255impl From<String> for NormalizedTermValue {
256    fn from(term: String) -> Self {
257        Self::new(term)
258    }
259}
260
261impl From<&str> for NormalizedTermValue {
262    fn from(term: &str) -> Self {
263        Self::new(term.to_string())
264    }
265}
266
267impl Display for NormalizedTermValue {
268    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
269        write!(f, "{}", self.0)
270    }
271}
272
273impl AsRef<[u8]> for NormalizedTermValue {
274    fn as_ref(&self) -> &[u8] {
275        self.0.as_bytes()
276    }
277}
278
279/// A normalized term is a higher-level term that has been normalized
280///
281/// It holds a unique identifier to an underlying and the normalized value.
282/// The `display_value` field stores the original case for output purposes.
283#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord)]
284pub struct NormalizedTerm {
285    /// Unique identifier for the normalized term (u64)
286    pub id: u64,
287    /// The normalized value (lowercase, used for case-insensitive matching)
288    // This field is currently called `nterm` in the JSON
289    #[serde(rename = "nterm")]
290    pub value: NormalizedTermValue,
291    /// The display value with original case preserved (used for replacement output)
292    /// Falls back to `value` if None for backward compatibility
293    #[serde(default, skip_serializing_if = "Option::is_none")]
294    pub display_value: Option<String>,
295    /// The URL of the normalized term
296    pub url: Option<String>,
297}
298
299impl NormalizedTerm {
300    /// Create a new normalized term with the given id and value.
301    /// The display_value will be None (falls back to value for output).
302    pub fn new(id: u64, value: NormalizedTermValue) -> Self {
303        Self {
304            id,
305            value,
306            display_value: None,
307            url: None,
308        }
309    }
310
311    /// Create a new normalized term with auto-generated ID.
312    /// The display_value will be None (falls back to value for output).
313    pub fn with_auto_id(value: NormalizedTermValue) -> Self {
314        Self {
315            id: get_int_id(),
316            value,
317            display_value: None,
318            url: None,
319        }
320    }
321
322    /// Set the display value (original case for output).
323    /// Use this to preserve the original case from markdown headings.
324    pub fn with_display_value(mut self, display_value: String) -> Self {
325        self.display_value = Some(display_value);
326        self
327    }
328
329    /// Set the URL for this term.
330    pub fn with_url(mut self, url: String) -> Self {
331        self.url = Some(url);
332        self
333    }
334
335    /// Get the display value, falling back to the normalized value if not set.
336    /// This is the value that should be used for replacement output.
337    pub fn display(&self) -> &str {
338        self.display_value
339            .as_deref()
340            .unwrap_or_else(|| self.value.as_str())
341    }
342}
343
344/// A concept is a higher-level, normalized term.
345///
346/// It describes a unique, abstract idea in a machine-readable format.
347///
348/// An example of a concept is "machine learning" which is normalized from
349/// "Machine Learning"
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
351pub struct Concept {
352    /// A unique identifier for the concept (u64)
353    pub id: u64,
354    /// The normalized concept
355    pub value: NormalizedTermValue,
356}
357
358impl Concept {
359    /// Create a new concept with auto-generated ID.
360    pub fn new(value: NormalizedTermValue) -> Self {
361        Self {
362            id: get_int_id(),
363            value,
364        }
365    }
366
367    /// Create a new concept with a specific ID.
368    pub fn with_id(id: u64, value: NormalizedTermValue) -> Self {
369        Self { id, value }
370    }
371}
372
373impl From<String> for Concept {
374    fn from(concept: String) -> Self {
375        let concept = NormalizedTermValue::new(concept);
376        Self::new(concept)
377    }
378}
379
380impl Display for Concept {
381    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
382        write!(f, "{}", self.value)
383    }
384}
385
386#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
387#[serde(rename_all = "snake_case")]
388pub enum DocumentType {
389    #[default]
390    KgEntry,
391    Document,
392    ConfigDocument,
393}
394
395#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
396pub struct RouteDirective {
397    pub provider: String,
398    pub model: String,
399}
400
401#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
402pub struct MarkdownDirectives {
403    #[serde(default)]
404    pub doc_type: DocumentType,
405    #[serde(default)]
406    pub synonyms: Vec<String>,
407    #[serde(default)]
408    pub route: Option<RouteDirective>,
409    #[serde(default)]
410    pub priority: Option<u8>,
411    #[serde(default)]
412    pub trigger: Option<String>,
413    #[serde(default)]
414    pub pinned: bool,
415    /// First `# Heading` from the markdown file, preserving original case.
416    #[serde(default)]
417    pub heading: Option<String>,
418}
419
420/// The central document type representing indexed and searchable content.
421///
422/// Documents are the primary unit of content in Terraphim. They can come from
423/// various sources (local files, web pages, API responses) and are indexed for
424/// semantic search using knowledge graphs.
425///
426/// # Fields
427///
428/// * `id` - Unique identifier (typically a UUID or URL-based ID)
429/// * `url` - Source URL or file path
430/// * `title` - Document title (used for display and basic search)
431/// * `body` - Full text content
432/// * `description` - Optional short description (extracted or provided)
433/// * `summarization` - Optional AI-generated summary
434/// * `stub` - Optional brief excerpt
435/// * `tags` - Optional categorization tags (often from knowledge graph)
436/// * `rank` - Optional relevance score from search results
437/// * `source_haystack` - Optional identifier of the data source that provided this document
438///
439/// # Examples
440///
441/// ```
442/// use terraphim_types::{Document, DocumentType};
443///
444/// let doc = Document {
445///     id: "rust-book-ch1".to_string(),
446///     url: "https://doc.rust-lang.org/book/ch01-00-getting-started.html".to_string(),
447///     title: "Getting Started".to_string(),
448///     body: "Let's start your Rust journey...".to_string(),
449///     description: Some("Introduction to Rust programming".to_string()),
450///     summarization: None,
451///     stub: None,
452///     tags: Some(vec!["rust".to_string(), "tutorial".to_string()]),
453///     rank: Some(95),
454///     source_haystack: Some("rust-docs".to_string()),
455///     doc_type: DocumentType::KgEntry,
456///     synonyms: None,
457///     route: None,
458///     priority: None,
459///};
460/// ```
461#[derive(Deserialize, Serialize, Debug, Clone, Default)]
462#[cfg_attr(feature = "typescript", derive(Tsify))]
463#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
464pub struct Document {
465    /// Unique identifier for the document
466    pub id: String,
467    /// URL to the document
468    pub url: String,
469    /// Title of the document
470    pub title: String,
471    /// The document body
472    pub body: String,
473
474    /// A short description of the document (extracted from content)
475    pub description: Option<String>,
476    /// AI-generated summarization of the document content
477    pub summarization: Option<String>,
478    /// A short excerpt of the document
479    pub stub: Option<String>,
480    /// Tags for the document
481    pub tags: Option<Vec<String>>,
482    /// Rank of the document in the search results
483    pub rank: Option<u64>,
484    /// Source haystack location that this document came from
485    pub source_haystack: Option<String>,
486    /// Document classification derived from directives
487    #[serde(default)]
488    pub doc_type: DocumentType,
489    /// Synonyms extracted from directives (optional)
490    #[serde(default)]
491    pub synonyms: Option<Vec<String>>,
492    /// Optional route directive (provider/model)
493    #[serde(default)]
494    pub route: Option<RouteDirective>,
495    /// Optional priority directive (0-100)
496    #[serde(default)]
497    pub priority: Option<u8>,
498}
499
500impl fmt::Display for Document {
501    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
502        // Start with title and body
503        write!(f, "{} {}", self.title, self.body)?;
504
505        // Append description if it exists
506        if let Some(ref description) = self.description {
507            write!(f, " {}", description)?;
508        }
509
510        // Append summarization if it exists and is different from description
511        if let Some(ref summarization) = self.summarization {
512            if Some(summarization) != self.description.as_ref() {
513                write!(f, " {}", summarization)?;
514            }
515        }
516
517        Ok(())
518    }
519}
520
521impl Document {
522    /// Set the source haystack for this document
523    pub fn with_source_haystack(mut self, haystack_location: String) -> Self {
524        self.source_haystack = Some(haystack_location);
525        self
526    }
527
528    /// Get the source haystack location
529    pub fn get_source_haystack(&self) -> Option<&String> {
530        self.source_haystack.as_ref()
531    }
532}
533
534#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
535pub struct Edge {
536    /// ID of the edge (u64)
537    pub id: u64,
538    /// Rank of the edge
539    pub rank: u64,
540    /// A hashmap of `document_id` to `rank`
541    pub doc_hash: AHashMap<String, u64>,
542    /// Medical edge type (only available with the `medical` feature)
543    #[cfg(feature = "medical")]
544    #[serde(default, skip_serializing_if = "Option::is_none")]
545    pub edge_type: Option<medical_types::MedicalEdgeType>,
546}
547
548impl Edge {
549    pub fn new(id: u64, document_id: String) -> Self {
550        let mut doc_hash = AHashMap::new();
551        doc_hash.insert(document_id, 1);
552        Self {
553            id,
554            rank: 1,
555            doc_hash,
556            #[cfg(feature = "medical")]
557            edge_type: None,
558        }
559    }
560}
561
562/// A `Node` represents single concept and its connections to other concepts.
563///
564/// Each node can have multiple edges to other nodes
565#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
566pub struct Node {
567    /// Unique identifier of the node (u64)
568    pub id: u64,
569    /// Number of co-occurrences
570    pub rank: u64,
571    /// List of connected edges
572    pub connected_with: HashSet<u64>,
573    /// Medical node type (only available with the `medical` feature)
574    #[cfg(feature = "medical")]
575    #[serde(default, skip_serializing_if = "Option::is_none")]
576    pub node_type: Option<medical_types::MedicalNodeType>,
577    /// Human-readable term for this node (only available with the `medical` feature)
578    #[cfg(feature = "medical")]
579    #[serde(default, skip_serializing_if = "Option::is_none")]
580    pub term: Option<String>,
581    /// SNOMED CT concept identifier (only available with the `medical` feature)
582    #[cfg(feature = "medical")]
583    #[serde(default, skip_serializing_if = "Option::is_none")]
584    pub snomed_id: Option<u64>,
585}
586
587impl Node {
588    /// Create a new node with a given id and edge
589    pub fn new(id: u64, edge: Edge) -> Self {
590        let mut connected_with = HashSet::new();
591        connected_with.insert(edge.id);
592        Self {
593            id,
594            rank: 1,
595            connected_with,
596            #[cfg(feature = "medical")]
597            node_type: None,
598            #[cfg(feature = "medical")]
599            term: None,
600            #[cfg(feature = "medical")]
601            snomed_id: None,
602        }
603    }
604
605    // pub fn sort_edges_by_value(&self) {
606    //     // let count_b: BTreeMap<&u64, &Edge> =
607    //     // self.connected_with.iter().map(|(k, v)| (v, k)).collect();
608    //     // for (k, v) in self.connected_with.iter().map(|(k, v)| (v.rank, k)) {
609    //     // log::warn!("k {:?} v {:?}", k, v);
610    //     // }
611    //     log::warn!("Connected with {:?}", self.connected_with);
612    // }
613}
614
615/// A thesaurus is a dictionary with synonyms which map to upper-level concepts.
616#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
617pub struct Thesaurus {
618    /// Name of the thesaurus
619    name: String,
620    /// The inner hashmap of normalized terms
621    data: AHashMap<NormalizedTermValue, NormalizedTerm>,
622}
623
624impl Thesaurus {
625    /// Create a new, empty thesaurus
626    pub fn new(name: String) -> Self {
627        Self {
628            name,
629            data: AHashMap::new(),
630        }
631    }
632
633    /// Get the name of the thesaurus
634    pub fn name(&self) -> &str {
635        &self.name
636    }
637
638    /// Inserts a key-value pair into the thesaurus.
639    pub fn insert(&mut self, key: NormalizedTermValue, value: NormalizedTerm) {
640        self.data.insert(key, value);
641    }
642
643    /// Get the length of the thesaurus
644    pub fn len(&self) -> usize {
645        self.data.len()
646    }
647
648    /// Check if the thesaurus is empty
649    pub fn is_empty(&self) -> bool {
650        self.data.is_empty()
651    }
652
653    /// Custom `get` method for the thesaurus, which accepts a
654    /// `NormalizedTermValue` and returns a reference to the
655    /// `NormalizedTerm`.
656    pub fn get(&self, key: &NormalizedTermValue) -> Option<&NormalizedTerm> {
657        self.data.get(key)
658    }
659
660    pub fn keys(
661        &self,
662    ) -> std::collections::hash_map::Keys<'_, NormalizedTermValue, NormalizedTerm> {
663        self.data.keys()
664    }
665}
666
667// Implement `IntoIterator` for a reference to `Thesaurus`
668impl<'a> IntoIterator for &'a Thesaurus {
669    type Item = (&'a NormalizedTermValue, &'a NormalizedTerm);
670    type IntoIter = Iter<'a, NormalizedTermValue, NormalizedTerm>;
671
672    fn into_iter(self) -> Self::IntoIter {
673        self.data.iter()
674    }
675}
676
677/// An index is a hashmap of documents
678///
679/// It holds the documents that have been indexed
680/// and can be searched through using the `RoleGraph`.
681#[derive(Debug, Clone, Serialize, Deserialize)]
682pub struct Index {
683    inner: AHashMap<String, Document>,
684}
685
686impl Default for Index {
687    fn default() -> Self {
688        Self::new()
689    }
690}
691
692impl Index {
693    /// Create a new, empty index
694    pub fn new() -> Self {
695        Self {
696            inner: AHashMap::new(),
697        }
698    }
699
700    /// Converts all given indexed documents to documents
701    ///
702    /// Returns the all converted documents
703    pub fn get_documents(&self, docs: Vec<IndexedDocument>) -> Vec<Document> {
704        let mut documents: Vec<Document> = Vec::new();
705        for doc in docs {
706            log::trace!("doc: {:#?}", doc);
707            if let Some(document) = self.get_document(&doc) {
708                // Document found in cache
709                let mut document = document;
710                document.tags = Some(doc.tags.clone());
711                // rank only available for terraphim graph
712                // use scorer to populate the rank for all cases
713                document.rank = Some(doc.rank);
714                documents.push(document.clone());
715            } else {
716                log::warn!("Document not found in cache. Cannot convert.");
717            }
718        }
719        documents
720    }
721    /// Returns all documents from the index for scorer without graph embeddings
722    pub fn get_all_documents(&self) -> Vec<Document> {
723        let documents: Vec<Document> = self.values().cloned().collect::<Vec<Document>>();
724        documents
725    }
726
727    /// Get a document from the index (if it exists in the index)
728    pub fn get_document(&self, doc: &IndexedDocument) -> Option<Document> {
729        if let Some(document) = self.inner.get(&doc.id).cloned() {
730            // Document found in cache
731            let mut document = document;
732            document.tags = Some(doc.tags.clone());
733            // Rank only available for terraphim graph
734            // use scorer to populate the rank for all cases
735            document.rank = Some(doc.rank);
736            Some(document)
737        } else {
738            None
739        }
740    }
741}
742
743impl Deref for Index {
744    type Target = AHashMap<String, Document>;
745
746    fn deref(&self) -> &Self::Target {
747        &self.inner
748    }
749}
750
751impl DerefMut for Index {
752    fn deref_mut(&mut self) -> &mut Self::Target {
753        &mut self.inner
754    }
755}
756
757impl IntoIterator for Index {
758    type Item = (String, Document);
759    type IntoIter = std::collections::hash_map::IntoIter<String, Document>;
760
761    fn into_iter(self) -> Self::IntoIter {
762        self.inner.into_iter()
763    }
764}
765
766/// Quality scores for Knowledge/Learning/Synthesis (K/L/S) dimensions.
767///
768/// These scores represent the quality of a document across three dimensions:
769/// - Knowledge: Depth and accuracy of domain knowledge
770/// - Learning: Educational value and clarity
771/// - Synthesis: Integration of concepts and insight
772///
773/// All scores are optional and range from 0.0 to 1.0 when present.
774#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
775pub struct QualityScore {
776    /// Knowledge quality score (0.0-1.0)
777    pub knowledge: Option<f64>,
778    /// Learning quality score (0.0-1.0)
779    pub learning: Option<f64>,
780    /// Synthesis quality score (0.0-1.0)
781    pub synthesis: Option<f64>,
782}
783
784impl QualityScore {
785    /// Calculate the composite score by averaging all available scores.
786    ///
787    /// Returns 0.0 if no scores are available.
788    ///
789    /// # Examples
790    ///
791    /// ```
792    /// use terraphim_types::QualityScore;
793    ///
794    /// let score = QualityScore {
795    ///     knowledge: Some(0.8),
796    ///     learning: Some(0.6),
797    ///     synthesis: None,
798    /// };
799    /// assert_eq!(score.composite(), 0.7); // (0.8 + 0.6) / 2
800    ///
801    /// let empty = QualityScore::default();
802    /// assert_eq!(empty.composite(), 0.0);
803    /// ```
804    pub fn composite(&self) -> f64 {
805        let mut sum = 0.0;
806        let mut count = 0;
807
808        if let Some(k) = self.knowledge {
809            sum += k;
810            count += 1;
811        }
812        if let Some(l) = self.learning {
813            sum += l;
814            count += 1;
815        }
816        if let Some(s) = self.synthesis {
817            sum += s;
818            count += 1;
819        }
820
821        if count == 0 { 0.0 } else { sum / count as f64 }
822    }
823}
824
825/// Reference to external storage of documents
826#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
827pub struct IndexedDocument {
828    /// UUID of the indexed document, matching external storage id
829    pub id: String,
830    /// Matched to edges
831    pub matched_edges: Vec<Edge>,
832    /// Graph rank (the sum of node rank, edge rank)
833    /// Number of nodes and edges connected to the document
834    pub rank: u64,
835    /// Tags, which are nodes turned into concepts for human readability
836    pub tags: Vec<String>,
837    /// List of node IDs for validation of matching
838    pub nodes: Vec<u64>,
839    /// Quality scores for K/L/S dimensions
840    #[serde(default)]
841    pub quality_score: Option<QualityScore>,
842}
843
844impl IndexedDocument {
845    pub fn to_json_string(&self) -> Result<String, serde_json::Error> {
846        serde_json::to_string(&self)
847    }
848    pub fn from_document(document: Document) -> Self {
849        IndexedDocument {
850            id: document.id,
851            matched_edges: Vec::new(),
852            rank: 0,
853            tags: document.tags.unwrap_or_default(),
854            nodes: Vec::new(),
855            quality_score: None,
856        }
857    }
858}
859
860/// Logical operators for combining multiple search terms
861#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
862#[cfg_attr(feature = "typescript", derive(Tsify))]
863#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
864pub enum LogicalOperator {
865    /// AND operator - documents must contain all terms
866    #[serde(rename = "and")]
867    And,
868    /// OR operator - documents may contain any of the terms
869    #[serde(rename = "or")]
870    Or,
871}
872
873/// Layered output levels for search results.
874///
875/// Controls how much content is returned per search result to optimize token usage:
876/// - Layer 1: Title + tags only (~50 tokens/result)
877/// - Layer 2: + first paragraph summary (~150 tokens/result)
878/// - Layer 3: Full content (current default behaviour)
879#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, JsonSchema)]
880#[cfg_attr(feature = "typescript", derive(Tsify))]
881#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
882pub enum Layer {
883    /// Title + tags only (~50 tokens/result)
884    #[serde(rename = "1")]
885    #[default]
886    One,
887    /// + first paragraph summary (~150 tokens/result)
888    #[serde(rename = "2")]
889    Two,
890    /// Full content (default)
891    #[serde(rename = "3")]
892    Three,
893}
894
895impl Layer {
896    /// Parse a layer from an integer value (1, 2, or 3)
897    pub fn from_u8(value: u8) -> Option<Self> {
898        match value {
899            1 => Some(Layer::One),
900            2 => Some(Layer::Two),
901            3 => Some(Layer::Three),
902            _ => None,
903        }
904    }
905
906    /// Returns true if this layer includes content (layer 2 or 3)
907    pub fn includes_content(&self) -> bool {
908        matches!(self, Layer::Two | Layer::Three)
909    }
910
911    /// Returns true if this layer includes full content (layer 3)
912    pub fn includes_full_content(&self) -> bool {
913        matches!(self, Layer::Three)
914    }
915}
916
917impl std::fmt::Display for Layer {
918    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
919        match self {
920            Layer::One => write!(f, "1"),
921            Layer::Two => write!(f, "2"),
922            Layer::Three => write!(f, "3"),
923        }
924    }
925}
926
927/// Extract the first paragraph from document body text.
928///
929/// Skips YAML frontmatter (content between `---` markers) and returns
930/// the first non-empty line or the first paragraph.
931pub fn extract_first_paragraph(body: &str) -> String {
932    // Skip YAML frontmatter if present
933    let content = if body.trim_start().starts_with("---") {
934        // Find the end of frontmatter
935        if let Some(end_pos) = body[3..].find("---") {
936            &body[end_pos + 6..] // Skip past the closing ---
937        } else {
938            body
939        }
940    } else {
941        body
942    };
943
944    // Find first non-empty line
945    for line in content.lines() {
946        let trimmed = line.trim();
947        if !trimmed.is_empty() {
948            return trimmed.to_string();
949        }
950    }
951
952    // Fallback to empty string if no content found
953    String::new()
954}
955
956/// A search query for finding documents in the knowledge graph.
957///
958/// Supports both single-term and multi-term queries with logical operators (AND/OR).
959/// Results can be paginated using `skip` and `limit`, and scoped to specific roles.
960///
961/// # Examples
962///
963/// ## Single-term query
964///
965/// ```
966/// use terraphim_types::{SearchQuery, NormalizedTermValue, Layer, RoleName};
967///
968/// let query = SearchQuery {
969///     search_term: NormalizedTermValue::from("machine learning"),
970///     search_terms: None,
971///     operator: None,
972///     skip: None,
973///     limit: Some(10),
974///     role: Some(RoleName::new("data_scientist")),
975///     layer: Layer::default(),
976/// };
977/// ```
978///
979/// ## Multi-term AND query
980///
981/// ```
982/// use terraphim_types::{SearchQuery, NormalizedTermValue, LogicalOperator, RoleName};
983///
984/// let query = SearchQuery::with_terms_and_operator(
985///     NormalizedTermValue::from("rust"),
986///     vec![NormalizedTermValue::from("async"), NormalizedTermValue::from("tokio")],
987///     LogicalOperator::And,
988///     Some(RoleName::new("engineer")),
989/// );
990/// assert!(query.is_multi_term_query());
991/// assert_eq!(query.get_all_terms().len(), 3);
992/// ```
993#[derive(Debug, Serialize, Deserialize, Clone, Default)]
994#[cfg_attr(feature = "typescript", derive(Tsify))]
995#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
996pub struct SearchQuery {
997    /// Primary search term for backward compatibility
998    #[serde(alias = "query")]
999    pub search_term: NormalizedTermValue,
1000    /// Multiple search terms for logical operations
1001    pub search_terms: Option<Vec<NormalizedTermValue>>,
1002    /// Logical operator for combining multiple terms (defaults to OR if not specified)
1003    pub operator: Option<LogicalOperator>,
1004    /// Number of results to skip (for pagination)
1005    pub skip: Option<usize>,
1006    /// Maximum number of results to return
1007    pub limit: Option<usize>,
1008    /// Role context for this search
1009    pub role: Option<RoleName>,
1010    /// Output layer for controlling result detail (1=minimal, 2=summary, 3=full)
1011    #[serde(default)]
1012    pub layer: Layer,
1013}
1014
1015impl SearchQuery {
1016    /// Get all search terms (both single and multiple)
1017    pub fn get_all_terms(&self) -> Vec<&NormalizedTermValue> {
1018        if let Some(ref multiple_terms) = self.search_terms {
1019            // For multi-term queries, include primary term + additional terms,
1020            // but avoid duplicates when the primary term is also present in `search_terms`.
1021            let mut all_terms: Vec<&NormalizedTermValue> =
1022                Vec::with_capacity(1 + multiple_terms.len());
1023            all_terms.push(&self.search_term);
1024
1025            for term in multiple_terms.iter() {
1026                if term.as_str() != self.search_term.as_str() {
1027                    all_terms.push(term);
1028                }
1029            }
1030
1031            all_terms
1032        } else {
1033            // For single-term queries, use search_term
1034            vec![&self.search_term]
1035        }
1036    }
1037
1038    /// Check if this is a multi-term query with logical operators
1039    pub fn is_multi_term_query(&self) -> bool {
1040        self.search_terms.is_some() && !self.search_terms.as_ref().unwrap().is_empty()
1041    }
1042
1043    /// Get the effective logical operator (defaults to Or for multi-term queries)
1044    pub fn get_operator(&self) -> LogicalOperator {
1045        self.operator
1046            .as_ref()
1047            .unwrap_or(&LogicalOperator::Or)
1048            .clone()
1049    }
1050
1051    /// Create a new SearchQuery with multiple terms and an operator
1052    pub fn with_terms_and_operator(
1053        primary_term: NormalizedTermValue,
1054        additional_terms: Vec<NormalizedTermValue>,
1055        operator: LogicalOperator,
1056        role: Option<RoleName>,
1057    ) -> Self {
1058        Self {
1059            search_term: primary_term,
1060            search_terms: Some(additional_terms),
1061            operator: Some(operator),
1062            skip: None,
1063            limit: None,
1064            role,
1065            layer: Layer::default(),
1066        }
1067    }
1068}
1069
1070/// Defines the relevance function (scorer) to be used for ranking search
1071/// results for the `Role`.
1072#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, JsonSchema, Default)]
1073#[cfg_attr(feature = "typescript", derive(Tsify))]
1074#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1075pub enum RelevanceFunction {
1076    /// Scorer for ranking search results based on the Terraphim graph
1077    ///
1078    /// This is based on filtered result outputs according to the ranking of the
1079    /// knowledge graph. The node, which is most connected will produce the
1080    /// highest ranking
1081    #[serde(rename = "terraphim-graph")]
1082    TerraphimGraph,
1083    /// Scorer for ranking search results based on the title of a document
1084    #[default]
1085    #[serde(rename = "title-scorer")]
1086    TitleScorer,
1087    /// BM25 (Okapi BM25) relevance function for probabilistic ranking
1088    #[serde(rename = "bm25")]
1089    BM25,
1090    /// BM25F relevance function with field-specific weights (title, body, description, tags)
1091    #[serde(rename = "bm25f")]
1092    BM25F,
1093    /// BM25Plus relevance function with enhanced parameters for fine-tuning
1094    #[serde(rename = "bm25plus")]
1095    BM25Plus,
1096}
1097
1098/// Defines all supported inputs for the knowledge graph.
1099///
1100/// Every knowledge graph is built from a specific input, such as Markdown files
1101/// or JSON files.
1102#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
1103#[cfg_attr(feature = "typescript", derive(Tsify))]
1104#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1105pub enum KnowledgeGraphInputType {
1106    /// A set of Markdown files
1107    #[serde(rename = "markdown")]
1108    Markdown,
1109    /// A JSON files
1110    #[serde(rename = "json")]
1111    Json,
1112}
1113
1114// Context Management Types for LLM Conversations
1115
1116/// Unique identifier for conversations
1117#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1118#[cfg_attr(feature = "typescript", derive(Tsify))]
1119#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1120pub struct ConversationId(pub String);
1121
1122impl ConversationId {
1123    pub fn new() -> Self {
1124        Self(uuid::Uuid::new_v4().to_string())
1125    }
1126
1127    pub fn from_string(id: String) -> Self {
1128        Self(id)
1129    }
1130
1131    pub fn as_str(&self) -> &str {
1132        &self.0
1133    }
1134}
1135
1136impl Default for ConversationId {
1137    fn default() -> Self {
1138        Self::new()
1139    }
1140}
1141
1142impl Display for ConversationId {
1143    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1144        write!(f, "{}", self.0)
1145    }
1146}
1147
1148/// Types of context that can be added to conversations
1149#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1150#[cfg_attr(feature = "typescript", derive(Tsify))]
1151#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1152pub enum ContextType {
1153    /// System-level context
1154    System,
1155    /// User-provided context
1156    UserInput,
1157    /// Document-based context
1158    Document,
1159    /// Search result context
1160    SearchResult,
1161    /// External data or API context
1162    External,
1163    /// Context from KG term definition with synonyms and metadata
1164    KGTermDefinition,
1165    /// Context from complete knowledge graph index
1166    KGIndex,
1167}
1168
1169/// Unique identifier for messages within conversations
1170#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1171#[cfg_attr(feature = "typescript", derive(Tsify))]
1172#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1173pub struct MessageId(pub String);
1174
1175impl MessageId {
1176    pub fn new() -> Self {
1177        Self(uuid::Uuid::new_v4().to_string())
1178    }
1179
1180    pub fn from_string(id: String) -> Self {
1181        Self(id)
1182    }
1183
1184    pub fn as_str(&self) -> &str {
1185        &self.0
1186    }
1187}
1188
1189impl Default for MessageId {
1190    fn default() -> Self {
1191        Self::new()
1192    }
1193}
1194
1195impl Display for MessageId {
1196    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1197        write!(f, "{}", self.0)
1198    }
1199}
1200
1201/// Context item that can be added to LLM conversations
1202#[derive(Debug, Clone, Serialize, Deserialize)]
1203#[cfg_attr(feature = "typescript", derive(Tsify))]
1204#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1205pub struct ContextItem {
1206    /// Unique identifier for the context item
1207    pub id: String,
1208    /// Type of context (document, search_result, user_input, etc.)
1209    pub context_type: ContextType,
1210    /// Title or summary of the context item
1211    pub title: String,
1212    /// Brief summary of the content (separate from full content)
1213    pub summary: Option<String>,
1214    /// The actual content to be included in the LLM context
1215    pub content: String,
1216    /// Metadata about the context (source, relevance score, etc.)
1217    pub metadata: AHashMap<String, String>,
1218    /// Timestamp when this context was added
1219    pub created_at: chrono::DateTime<chrono::Utc>,
1220    /// Relevance score for ordering context items
1221    pub relevance_score: Option<f64>,
1222}
1223
1224impl ContextItem {
1225    /// Create a new context item from a document
1226    pub fn from_document(document: &Document) -> Self {
1227        let mut metadata = AHashMap::new();
1228        metadata.insert("source_type".to_string(), "document".to_string());
1229        metadata.insert("document_id".to_string(), document.id.clone());
1230        if !document.url.is_empty() {
1231            metadata.insert("url".to_string(), document.url.clone());
1232        }
1233        if let Some(tags) = &document.tags {
1234            metadata.insert("tags".to_string(), tags.join(", "));
1235        }
1236        if let Some(rank) = document.rank {
1237            metadata.insert("rank".to_string(), rank.to_string());
1238        }
1239
1240        Self {
1241            id: uuid::Uuid::new_v4().to_string(),
1242            context_type: ContextType::Document,
1243            title: if document.title.is_empty() {
1244                document.id.clone()
1245            } else {
1246                document.title.clone()
1247            },
1248            summary: document.description.clone(),
1249            content: format!(
1250                "Title: {}\n\n{}\n\n{}",
1251                document.title,
1252                document.description.as_deref().unwrap_or(""),
1253                document.body
1254            ),
1255            metadata,
1256            created_at: chrono::Utc::now(),
1257            relevance_score: document.rank.map(|r| r as f64),
1258        }
1259    }
1260
1261    /// Create a new context item from search results
1262    pub fn from_search_result(query: &str, documents: &[Document]) -> Self {
1263        let mut metadata = AHashMap::new();
1264        metadata.insert("source_type".to_string(), "search_result".to_string());
1265        metadata.insert("query".to_string(), query.to_string());
1266        metadata.insert("result_count".to_string(), documents.len().to_string());
1267
1268        let content = if documents.is_empty() {
1269            format!("Search query: '{}'\nNo results found.", query)
1270        } else {
1271            let mut content = format!("Search query: '{}'\nResults:\n\n", query);
1272            for (i, doc) in documents.iter().take(5).enumerate() {
1273                content.push_str(&format!(
1274                    "{}. {}\n   {}\n   Rank: {}\n\n",
1275                    i + 1,
1276                    doc.title,
1277                    doc.description.as_deref().unwrap_or("No description"),
1278                    doc.rank.unwrap_or(0)
1279                ));
1280            }
1281            if documents.len() > 5 {
1282                content.push_str(&format!("... and {} more results\n", documents.len() - 5));
1283            }
1284            content
1285        };
1286
1287        Self {
1288            id: uuid::Uuid::new_v4().to_string(),
1289            context_type: ContextType::Document, // Changed from SearchResult to Document
1290            title: format!("Search: {}", query),
1291            summary: Some(format!(
1292                "Search results for '{}' - {} documents found",
1293                query,
1294                documents.len()
1295            )),
1296            content,
1297            metadata,
1298            created_at: chrono::Utc::now(),
1299            relevance_score: documents.first().and_then(|d| d.rank.map(|r| r as f64)),
1300        }
1301    }
1302
1303    /// Create a new context item from a KG term definition
1304    pub fn from_kg_term_definition(kg_term: &KGTermDefinition) -> Self {
1305        let mut metadata = AHashMap::new();
1306        metadata.insert("source_type".to_string(), "kg_term".to_string());
1307        metadata.insert("term_id".to_string(), kg_term.id.to_string());
1308        metadata.insert(
1309            "normalized_term".to_string(),
1310            kg_term.normalized_term.to_string(),
1311        );
1312        metadata.insert(
1313            "synonyms_count".to_string(),
1314            kg_term.synonyms.len().to_string(),
1315        );
1316        metadata.insert(
1317            "related_terms_count".to_string(),
1318            kg_term.related_terms.len().to_string(),
1319        );
1320        metadata.insert(
1321            "usage_examples_count".to_string(),
1322            kg_term.usage_examples.len().to_string(),
1323        );
1324
1325        if let Some(ref url) = kg_term.url {
1326            metadata.insert("url".to_string(), url.clone());
1327        }
1328
1329        // Add KG-specific metadata
1330        for (key, value) in &kg_term.metadata {
1331            metadata.insert(format!("kg_{}", key), value.clone());
1332        }
1333
1334        let mut content = format!("**Term:** {}\n", kg_term.term);
1335
1336        if let Some(ref definition) = kg_term.definition {
1337            content.push_str(&format!("**Definition:** {}\n", definition));
1338        }
1339
1340        if !kg_term.synonyms.is_empty() {
1341            content.push_str(&format!("**Synonyms:** {}\n", kg_term.synonyms.join(", ")));
1342        }
1343
1344        if !kg_term.related_terms.is_empty() {
1345            content.push_str(&format!(
1346                "**Related Terms:** {}\n",
1347                kg_term.related_terms.join(", ")
1348            ));
1349        }
1350
1351        if !kg_term.usage_examples.is_empty() {
1352            content.push_str("**Usage Examples:**\n");
1353            for (i, example) in kg_term.usage_examples.iter().enumerate() {
1354                content.push_str(&format!("{}. {}\n", i + 1, example));
1355            }
1356        }
1357
1358        Self {
1359            id: uuid::Uuid::new_v4().to_string(),
1360            context_type: ContextType::KGTermDefinition,
1361            title: format!("KG Term: {}", kg_term.term),
1362            summary: Some(format!(
1363                "Knowledge Graph term '{}' with {} synonyms and {} related terms",
1364                kg_term.term,
1365                kg_term.synonyms.len(),
1366                kg_term.related_terms.len()
1367            )),
1368            content,
1369            metadata,
1370            created_at: chrono::Utc::now(),
1371            relevance_score: kg_term.relevance_score,
1372        }
1373    }
1374
1375    /// Create a new context item from a complete KG index
1376    pub fn from_kg_index(kg_index: &KGIndexInfo) -> Self {
1377        let mut metadata = AHashMap::new();
1378        metadata.insert("source_type".to_string(), "kg_index".to_string());
1379        metadata.insert("kg_name".to_string(), kg_index.name.clone());
1380        metadata.insert("total_terms".to_string(), kg_index.total_terms.to_string());
1381        metadata.insert("total_nodes".to_string(), kg_index.total_nodes.to_string());
1382        metadata.insert("total_edges".to_string(), kg_index.total_edges.to_string());
1383        metadata.insert("source".to_string(), kg_index.source.clone());
1384        metadata.insert(
1385            "last_updated".to_string(),
1386            kg_index.last_updated.to_rfc3339(),
1387        );
1388
1389        if let Some(ref version) = kg_index.version {
1390            metadata.insert("version".to_string(), version.clone());
1391        }
1392
1393        let content = format!(
1394            "**Knowledge Graph Index: {}**\n\n\
1395            **Statistics:**\n\
1396            - Total Terms: {}\n\
1397            - Total Nodes: {}\n\
1398            - Total Edges: {}\n\
1399            - Source: {}\n\
1400            - Last Updated: {}\n\
1401            - Version: {}\n\n\
1402            This context includes the complete knowledge graph index with all terms, \
1403            relationships, and metadata available for reference.",
1404            kg_index.name,
1405            kg_index.total_terms,
1406            kg_index.total_nodes,
1407            kg_index.total_edges,
1408            kg_index.source,
1409            kg_index.last_updated.format("%Y-%m-%d %H:%M:%S UTC"),
1410            kg_index.version.as_deref().unwrap_or("N/A")
1411        );
1412
1413        Self {
1414            id: uuid::Uuid::new_v4().to_string(),
1415            context_type: ContextType::KGIndex,
1416            title: format!("KG Index: {}", kg_index.name),
1417            summary: Some(format!(
1418                "Complete knowledge graph index with {} terms, {} nodes, and {} edges",
1419                kg_index.total_terms, kg_index.total_nodes, kg_index.total_edges
1420            )),
1421            content,
1422            metadata,
1423            created_at: chrono::Utc::now(),
1424            relevance_score: Some(1.0), // High relevance for complete index
1425        }
1426    }
1427}
1428
1429/// Knowledge Graph term definition with comprehensive metadata
1430#[derive(Debug, Clone, Serialize, Deserialize)]
1431#[cfg_attr(feature = "typescript", derive(Tsify))]
1432#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1433pub struct KGTermDefinition {
1434    /// The primary term
1435    pub term: String,
1436    /// Normalized term value
1437    pub normalized_term: NormalizedTermValue,
1438    /// Unique identifier for the term
1439    pub id: u64,
1440    /// Definition of the term
1441    pub definition: Option<String>,
1442    /// Synonyms for the term
1443    pub synonyms: Vec<String>,
1444    /// Related terms
1445    pub related_terms: Vec<String>,
1446    /// Usage examples
1447    pub usage_examples: Vec<String>,
1448    /// URL reference if available
1449    pub url: Option<String>,
1450    /// Additional metadata
1451    pub metadata: AHashMap<String, String>,
1452    /// Relevance score for ranking
1453    pub relevance_score: Option<f64>,
1454}
1455
1456/// Knowledge Graph index information
1457#[derive(Debug, Clone, Serialize, Deserialize)]
1458#[cfg_attr(feature = "typescript", derive(Tsify))]
1459#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1460pub struct KGIndexInfo {
1461    /// Name of the knowledge graph
1462    pub name: String,
1463    /// Total number of terms in the index
1464    pub total_terms: usize,
1465    /// Number of nodes in the graph
1466    pub total_nodes: usize,
1467    /// Number of edges in the graph
1468    pub total_edges: usize,
1469    /// Last updated timestamp
1470    pub last_updated: chrono::DateTime<chrono::Utc>,
1471    /// Source of the knowledge graph
1472    pub source: String,
1473    /// Version of the knowledge graph
1474    pub version: Option<String>,
1475}
1476
1477#[derive(Debug, Clone, Serialize, Deserialize)]
1478#[cfg_attr(feature = "typescript", derive(Tsify))]
1479#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1480pub struct ChatMessage {
1481    /// Unique identifier for this message
1482    pub id: MessageId,
1483    /// Role of the message sender
1484    pub role: String, // "system" | "user" | "assistant"
1485    /// The message content
1486    pub content: String,
1487    /// Context items associated with this message
1488    pub context_items: Vec<ContextItem>,
1489    /// Timestamp when the message was created
1490    pub created_at: chrono::DateTime<chrono::Utc>,
1491    /// Token count for this message (if available)
1492    pub token_count: Option<u32>,
1493    /// Model used to generate this message (for assistant messages)
1494    pub model: Option<String>,
1495}
1496
1497impl ChatMessage {
1498    /// Create a new user message
1499    pub fn user(content: String) -> Self {
1500        Self {
1501            id: MessageId::new(),
1502            role: "user".to_string(),
1503            content,
1504            context_items: Vec::new(),
1505            created_at: chrono::Utc::now(),
1506            token_count: None,
1507            model: None,
1508        }
1509    }
1510
1511    /// Create a new assistant message
1512    pub fn assistant(content: String, model: Option<String>) -> Self {
1513        Self {
1514            id: MessageId::new(),
1515            role: "assistant".to_string(),
1516            content,
1517            context_items: Vec::new(),
1518            created_at: chrono::Utc::now(),
1519            token_count: None,
1520            model,
1521        }
1522    }
1523
1524    /// Create a new system message
1525    pub fn system(content: String) -> Self {
1526        Self {
1527            id: MessageId::new(),
1528            role: "system".to_string(),
1529            content,
1530            context_items: Vec::new(),
1531            created_at: chrono::Utc::now(),
1532            token_count: None,
1533            model: None,
1534        }
1535    }
1536
1537    /// Add context item to this message
1538    pub fn add_context(&mut self, context: ContextItem) {
1539        self.context_items.push(context);
1540    }
1541
1542    /// Add multiple context items to this message
1543    pub fn add_contexts(&mut self, contexts: Vec<ContextItem>) {
1544        self.context_items.extend(contexts);
1545    }
1546}
1547
1548/// A conversation containing multiple messages and context
1549#[derive(Debug, Clone, Serialize, Deserialize)]
1550#[cfg_attr(feature = "typescript", derive(Tsify))]
1551#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1552pub struct Conversation {
1553    /// Unique identifier for this conversation
1554    pub id: ConversationId,
1555    /// Human-readable title for the conversation
1556    pub title: String,
1557    /// Messages in this conversation
1558    pub messages: Vec<ChatMessage>,
1559    /// Global context items for the entire conversation
1560    pub global_context: Vec<ContextItem>,
1561    /// Role used for this conversation
1562    pub role: RoleName,
1563    /// When this conversation was created
1564    pub created_at: chrono::DateTime<chrono::Utc>,
1565    /// When this conversation was last updated
1566    pub updated_at: chrono::DateTime<chrono::Utc>,
1567    /// Metadata about the conversation
1568    pub metadata: AHashMap<String, String>,
1569}
1570
1571impl Conversation {
1572    /// Create a new conversation
1573    pub fn new(title: String, role: RoleName) -> Self {
1574        let now = chrono::Utc::now();
1575        Self {
1576            id: ConversationId::new(),
1577            title,
1578            messages: Vec::new(),
1579            global_context: Vec::new(),
1580            role,
1581            created_at: now,
1582            updated_at: now,
1583            metadata: AHashMap::new(),
1584        }
1585    }
1586
1587    /// Add a message to the conversation
1588    pub fn add_message(&mut self, message: ChatMessage) {
1589        self.messages.push(message);
1590        self.updated_at = chrono::Utc::now();
1591    }
1592
1593    /// Add global context to the conversation
1594    pub fn add_global_context(&mut self, context: ContextItem) {
1595        self.global_context.push(context);
1596        self.updated_at = chrono::Utc::now();
1597    }
1598
1599    /// Get the total context length (approximation)
1600    pub fn estimated_context_length(&self) -> usize {
1601        let message_length: usize = self
1602            .messages
1603            .iter()
1604            .map(|m| {
1605                m.content.len()
1606                    + m.context_items
1607                        .iter()
1608                        .map(|c| c.content.len())
1609                        .sum::<usize>()
1610            })
1611            .sum();
1612        let global_context_length: usize =
1613            self.global_context.iter().map(|c| c.content.len()).sum();
1614        message_length + global_context_length
1615    }
1616}
1617
1618/// Summary of a conversation for listing purposes
1619#[derive(Debug, Clone, Serialize, Deserialize)]
1620#[cfg_attr(feature = "typescript", derive(Tsify))]
1621#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1622pub struct ConversationSummary {
1623    /// Unique identifier for this conversation
1624    pub id: ConversationId,
1625    /// Human-readable title for the conversation
1626    pub title: String,
1627    /// Role used for this conversation
1628    pub role: RoleName,
1629    /// Number of messages in the conversation
1630    pub message_count: usize,
1631    /// Number of context items in the conversation
1632    pub context_count: usize,
1633    /// When this conversation was created
1634    pub created_at: chrono::DateTime<chrono::Utc>,
1635    /// When this conversation was last updated
1636    pub updated_at: chrono::DateTime<chrono::Utc>,
1637    /// Preview of the first user message (if any)
1638    pub preview: Option<String>,
1639}
1640
1641// Note: Persistable implementation for Conversation will be added in the service layer
1642// to avoid circular dependencies
1643
1644impl From<&Conversation> for ConversationSummary {
1645    fn from(conversation: &Conversation) -> Self {
1646        let context_count = conversation.global_context.len()
1647            + conversation
1648                .messages
1649                .iter()
1650                .map(|m| m.context_items.len())
1651                .sum::<usize>();
1652
1653        let preview = conversation
1654            .messages
1655            .iter()
1656            .find(|m| m.role == "user")
1657            .map(|m| {
1658                if m.content.len() > 100 {
1659                    format!("{}...", &m.content[..100])
1660                } else {
1661                    m.content.clone()
1662                }
1663            });
1664
1665        Self {
1666            id: conversation.id.clone(),
1667            title: conversation.title.clone(),
1668            role: conversation.role.clone(),
1669            message_count: conversation.messages.len(),
1670            context_count,
1671            created_at: conversation.created_at,
1672            updated_at: conversation.updated_at,
1673            preview,
1674        }
1675    }
1676}
1677
1678/// Context history that tracks what context has been used across conversations
1679#[derive(Debug, Clone, Serialize, Deserialize)]
1680#[cfg_attr(feature = "typescript", derive(Tsify))]
1681#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1682pub struct ContextHistory {
1683    /// Items that have been used in conversations
1684    pub used_contexts: Vec<ContextHistoryEntry>,
1685    /// Maximum number of history entries to keep
1686    pub max_entries: usize,
1687}
1688
1689impl ContextHistory {
1690    pub fn new(max_entries: usize) -> Self {
1691        Self {
1692            used_contexts: Vec::new(),
1693            max_entries,
1694        }
1695    }
1696
1697    /// Record that a context item was used
1698    pub fn record_usage(
1699        &mut self,
1700        context_id: &str,
1701        conversation_id: &ConversationId,
1702        usage_type: ContextUsageType,
1703    ) {
1704        let entry = ContextHistoryEntry {
1705            context_id: context_id.to_string(),
1706            conversation_id: conversation_id.clone(),
1707            usage_type,
1708            used_at: chrono::Utc::now(),
1709            usage_count: 1,
1710        };
1711
1712        // Check if we already have this context for this conversation
1713        if let Some(existing) = self
1714            .used_contexts
1715            .iter_mut()
1716            .find(|e| e.context_id == context_id && e.conversation_id == *conversation_id)
1717        {
1718            existing.usage_count += 1;
1719            existing.used_at = chrono::Utc::now();
1720        } else {
1721            self.used_contexts.push(entry);
1722        }
1723
1724        // Trim to max entries if needed
1725        if self.used_contexts.len() > self.max_entries {
1726            self.used_contexts.sort_by_key(|e| e.used_at);
1727            self.used_contexts
1728                .drain(0..self.used_contexts.len() - self.max_entries);
1729        }
1730    }
1731
1732    /// Get frequently used contexts
1733    pub fn get_frequent_contexts(&self, limit: usize) -> Vec<&ContextHistoryEntry> {
1734        let mut entries = self.used_contexts.iter().collect::<Vec<_>>();
1735        entries.sort_by_key(|e| std::cmp::Reverse(e.usage_count));
1736        entries.into_iter().take(limit).collect()
1737    }
1738}
1739
1740/// Entry in context usage history
1741#[derive(Debug, Clone, Serialize, Deserialize)]
1742#[cfg_attr(feature = "typescript", derive(Tsify))]
1743#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1744pub struct ContextHistoryEntry {
1745    /// ID of the context item that was used
1746    pub context_id: String,
1747    /// Conversation where it was used
1748    pub conversation_id: ConversationId,
1749    /// How the context was used
1750    pub usage_type: ContextUsageType,
1751    /// When it was used
1752    pub used_at: chrono::DateTime<chrono::Utc>,
1753    /// How many times it's been used in this conversation
1754    pub usage_count: usize,
1755}
1756
1757/// How a context item was used
1758#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1759#[cfg_attr(feature = "typescript", derive(Tsify))]
1760#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1761pub enum ContextUsageType {
1762    /// Added manually by user
1763    Manual,
1764    /// Added automatically by system
1765    Automatic,
1766    /// Added from search results
1767    SearchResult,
1768    /// Added from document reference
1769    DocumentReference,
1770}
1771
1772// Routing and Priority Types
1773
1774/// Priority level for routing rules and decisions
1775/// Higher numeric values indicate higher priority
1776#[derive(
1777    Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema, Default,
1778)]
1779#[cfg_attr(feature = "typescript", derive(Tsify))]
1780#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1781pub struct Priority(pub u8);
1782
1783impl Priority {
1784    /// Create a new priority with the given value
1785    pub fn new(value: u8) -> Self {
1786        Self(value.clamp(0, 100))
1787    }
1788
1789    /// Get the priority value
1790    pub fn value(&self) -> u8 {
1791        self.0
1792    }
1793
1794    /// Check if this is high priority (>= 80)
1795    pub fn is_high(&self) -> bool {
1796        self.0 >= 80
1797    }
1798
1799    /// Check if this is medium priority (>= 40 && < 80)
1800    pub fn is_medium(&self) -> bool {
1801        self.0 >= 40 && self.0 < 80
1802    }
1803
1804    /// Check if this is low priority (< 40)
1805    pub fn is_low(&self) -> bool {
1806        self.0 < 40
1807    }
1808
1809    /// Maximum priority value
1810    pub const MAX: Self = Self(100);
1811
1812    /// High priority (default for fast/expensive rules)
1813    pub const HIGH: Self = Self(80);
1814
1815    /// Medium priority (default for standard rules)
1816    pub const MEDIUM: Self = Self(50);
1817
1818    /// Low priority (default for fallback rules)
1819    pub const LOW: Self = Self(20);
1820
1821    /// Minimum priority value
1822    pub const MIN: Self = Self(0);
1823}
1824
1825impl fmt::Display for Priority {
1826    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1827        write!(f, "{}", self.0)
1828    }
1829}
1830
1831impl From<u8> for Priority {
1832    fn from(value: u8) -> Self {
1833        Self::new(value)
1834    }
1835}
1836
1837impl From<i32> for Priority {
1838    fn from(value: i32) -> Self {
1839        Self::new(value as u8)
1840    }
1841}
1842
1843/// A routing rule with pattern matching and priority
1844#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1845#[cfg_attr(feature = "typescript", derive(Tsify))]
1846#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1847pub struct RoutingRule {
1848    /// Unique identifier for this rule
1849    pub id: String,
1850
1851    /// Name of the rule (human-readable)
1852    pub name: String,
1853
1854    /// Pattern to match (can be regex, exact string, or concept name)
1855    pub pattern: String,
1856
1857    /// Priority of this rule (higher = more important)
1858    pub priority: Priority,
1859
1860    /// Provider to route to when this rule matches
1861    pub provider: String,
1862
1863    /// Model to use when this rule matches
1864    pub model: String,
1865
1866    /// Optional description of when this rule applies
1867    pub description: Option<String>,
1868
1869    /// Tags for categorizing rules
1870    pub tags: Vec<String>,
1871
1872    /// Whether this rule is enabled
1873    pub enabled: bool,
1874
1875    /// When this rule was created
1876    pub created_at: chrono::DateTime<chrono::Utc>,
1877
1878    /// When this rule was last updated
1879    pub updated_at: chrono::DateTime<chrono::Utc>,
1880}
1881
1882impl RoutingRule {
1883    /// Create a new routing rule
1884    pub fn new(
1885        id: String,
1886        name: String,
1887        pattern: String,
1888        priority: Priority,
1889        provider: String,
1890        model: String,
1891    ) -> Self {
1892        let now = chrono::Utc::now();
1893        Self {
1894            id,
1895            name,
1896            pattern,
1897            priority,
1898            provider,
1899            model,
1900            description: None,
1901            tags: Vec::new(),
1902            enabled: true,
1903            created_at: now,
1904            updated_at: now,
1905        }
1906    }
1907
1908    /// Create a rule with default medium priority
1909    pub fn with_defaults(
1910        id: String,
1911        name: String,
1912        pattern: String,
1913        provider: String,
1914        model: String,
1915    ) -> Self {
1916        Self::new(id, name, pattern, Priority::MEDIUM, provider, model)
1917    }
1918
1919    /// Set the description
1920    pub fn with_description(mut self, description: String) -> Self {
1921        self.description = Some(description);
1922        self
1923    }
1924
1925    /// Add a tag
1926    pub fn with_tag(mut self, tag: String) -> Self {
1927        self.tags.push(tag);
1928        self
1929    }
1930
1931    /// Set enabled status
1932    pub fn with_enabled(mut self, enabled: bool) -> Self {
1933        self.enabled = enabled;
1934        self
1935    }
1936
1937    /// Update the rule's timestamp
1938    pub fn touch(&mut self) {
1939        self.updated_at = chrono::Utc::now();
1940    }
1941}
1942
1943/// Result of pattern matching with priority scoring
1944#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1945#[cfg_attr(feature = "typescript", derive(Tsify))]
1946#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1947pub struct PatternMatch {
1948    /// The concept that was matched
1949    pub concept: String,
1950
1951    /// Provider to route to
1952    pub provider: String,
1953
1954    /// Model to use
1955    pub model: String,
1956
1957    /// Match score (0.0 to 1.0)
1958    pub score: f64,
1959
1960    /// Priority of the matched rule
1961    pub priority: Priority,
1962
1963    /// Combined weighted score (score * priority_factor)
1964    pub weighted_score: f64,
1965
1966    /// The rule that was matched
1967    pub rule_id: String,
1968}
1969
1970impl PatternMatch {
1971    /// Create a new pattern match
1972    pub fn new(
1973        concept: String,
1974        provider: String,
1975        model: String,
1976        score: f64,
1977        priority: Priority,
1978        rule_id: String,
1979    ) -> Self {
1980        let priority_factor = priority.value() as f64 / 100.0;
1981        let weighted_score = score * priority_factor;
1982
1983        Self {
1984            concept,
1985            provider,
1986            model,
1987            score,
1988            priority,
1989            weighted_score,
1990            rule_id,
1991        }
1992    }
1993
1994    /// Create a simple pattern match with default priority
1995    pub fn simple(concept: String, provider: String, model: String, score: f64) -> Self {
1996        Self::new(
1997            concept,
1998            provider,
1999            model,
2000            score,
2001            Priority::MEDIUM,
2002            "default".to_string(),
2003        )
2004    }
2005}
2006
2007/// Routing decision with priority information
2008#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2009#[cfg_attr(feature = "typescript", derive(Tsify))]
2010#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2011pub struct RoutingDecision {
2012    /// Provider to route to
2013    pub provider: String,
2014
2015    /// Model to use
2016    pub model: String,
2017
2018    /// The scenario that was matched
2019    pub scenario: RoutingScenario,
2020
2021    /// Priority of this decision
2022    pub priority: Priority,
2023
2024    /// Confidence score (0.0 to 1.0)
2025    pub confidence: f64,
2026
2027    /// The rule that led to this decision (if any)
2028    pub rule_id: Option<String>,
2029
2030    /// Reason for this decision
2031    pub reason: String,
2032}
2033
2034impl RoutingDecision {
2035    /// Create a new routing decision
2036    pub fn new(
2037        provider: String,
2038        model: String,
2039        scenario: RoutingScenario,
2040        priority: Priority,
2041        confidence: f64,
2042        reason: String,
2043    ) -> Self {
2044        Self {
2045            provider,
2046            model,
2047            scenario,
2048            priority,
2049            confidence,
2050            rule_id: None,
2051            reason,
2052        }
2053    }
2054
2055    /// Create a decision with a specific rule
2056    pub fn with_rule(
2057        provider: String,
2058        model: String,
2059        scenario: RoutingScenario,
2060        priority: Priority,
2061        confidence: f64,
2062        rule_id: String,
2063        reason: String,
2064    ) -> Self {
2065        Self {
2066            provider,
2067            model,
2068            scenario,
2069            priority,
2070            confidence,
2071            rule_id: Some(rule_id),
2072            reason,
2073        }
2074    }
2075
2076    /// Create a simple default decision
2077    pub fn default(provider: String, model: String) -> Self {
2078        Self::new(
2079            provider,
2080            model,
2081            RoutingScenario::Default,
2082            Priority::LOW,
2083            0.5,
2084            "Default routing".to_string(),
2085        )
2086    }
2087}
2088
2089/// Routing scenario types
2090#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, Default)]
2091#[cfg_attr(feature = "typescript", derive(Tsify))]
2092#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2093pub enum RoutingScenario {
2094    /// Default routing scenario
2095    #[serde(rename = "default")]
2096    #[default]
2097    Default,
2098
2099    /// Background processing (low priority, cost-optimized)
2100    #[serde(rename = "background")]
2101    Background,
2102
2103    /// Thinking/reasoning tasks (high quality)
2104    #[serde(rename = "think")]
2105    Think,
2106
2107    /// Long context tasks
2108    #[serde(rename = "long_context")]
2109    LongContext,
2110
2111    /// Web search required
2112    #[serde(rename = "web_search")]
2113    WebSearch,
2114
2115    /// Image processing required
2116    #[serde(rename = "image")]
2117    Image,
2118
2119    /// Pattern-based routing with concept name
2120    #[serde(rename = "pattern")]
2121    Pattern(String),
2122
2123    /// Priority-based routing
2124    #[serde(rename = "priority")]
2125    Priority,
2126
2127    /// Custom scenario
2128    #[serde(rename = "custom")]
2129    Custom(String),
2130}
2131
2132impl fmt::Display for RoutingScenario {
2133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2134        match self {
2135            Self::Default => write!(f, "default"),
2136            Self::Background => write!(f, "background"),
2137            Self::Think => write!(f, "think"),
2138            Self::LongContext => write!(f, "long_context"),
2139            Self::WebSearch => write!(f, "web_search"),
2140            Self::Image => write!(f, "image"),
2141            Self::Pattern(concept) => write!(f, "pattern:{}", concept),
2142            Self::Priority => write!(f, "priority"),
2143            Self::Custom(name) => write!(f, "custom:{}", name),
2144        }
2145    }
2146}
2147
2148/// Multi-agent context for coordinating between different AI agents
2149#[derive(Debug, Clone, Serialize, Deserialize)]
2150#[cfg_attr(feature = "typescript", derive(Tsify))]
2151#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2152pub struct MultiAgentContext {
2153    /// Unique identifier for the multi-agent session
2154    pub session_id: String,
2155    /// Agents participating in this context
2156    pub agents: Vec<AgentInfo>,
2157    /// Shared context items available to all agents
2158    pub shared_context: Vec<ContextItem>,
2159    /// Agent-specific context
2160    pub agent_contexts: AHashMap<String, Vec<ContextItem>>,
2161    /// Communication log between agents
2162    pub agent_communications: Vec<AgentCommunication>,
2163    /// When this session was created
2164    pub created_at: chrono::DateTime<chrono::Utc>,
2165    /// When this session was last updated
2166    pub updated_at: chrono::DateTime<chrono::Utc>,
2167}
2168
2169impl MultiAgentContext {
2170    pub fn new() -> Self {
2171        let now = chrono::Utc::now();
2172        Self {
2173            session_id: uuid::Uuid::new_v4().to_string(),
2174            agents: Vec::new(),
2175            shared_context: Vec::new(),
2176            agent_contexts: AHashMap::new(),
2177            agent_communications: Vec::new(),
2178            created_at: now,
2179            updated_at: now,
2180        }
2181    }
2182
2183    /// Add an agent to the session
2184    pub fn add_agent(&mut self, agent: AgentInfo) {
2185        self.agents.push(agent.clone());
2186        self.agent_contexts.insert(agent.id, Vec::new());
2187        self.updated_at = chrono::Utc::now();
2188    }
2189
2190    /// Add context for a specific agent
2191    pub fn add_agent_context(&mut self, agent_id: &str, context: ContextItem) {
2192        if let Some(contexts) = self.agent_contexts.get_mut(agent_id) {
2193            contexts.push(context);
2194            self.updated_at = chrono::Utc::now();
2195        }
2196    }
2197
2198    /// Record communication between agents
2199    pub fn record_communication(
2200        &mut self,
2201        from_agent: &str,
2202        to_agent: Option<&str>,
2203        message: String,
2204    ) {
2205        let communication = AgentCommunication {
2206            from_agent: from_agent.to_string(),
2207            to_agent: to_agent.map(|s| s.to_string()),
2208            message,
2209            timestamp: chrono::Utc::now(),
2210        };
2211        self.agent_communications.push(communication);
2212        self.updated_at = chrono::Utc::now();
2213    }
2214}
2215
2216impl Default for MultiAgentContext {
2217    fn default() -> Self {
2218        Self::new()
2219    }
2220}
2221
2222/// Information about an AI agent in a multi-agent context
2223#[derive(Debug, Clone, Serialize, Deserialize)]
2224#[cfg_attr(feature = "typescript", derive(Tsify))]
2225#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2226pub struct AgentInfo {
2227    /// Unique identifier for the agent
2228    pub id: String,
2229    /// Human-readable name of the agent
2230    pub name: String,
2231    /// Role/specialty of the agent
2232    pub role: String,
2233    /// Capabilities or description of what this agent does
2234    pub capabilities: Vec<String>,
2235    /// Model or provider powering this agent
2236    pub model: Option<String>,
2237}
2238
2239/// Communication between agents in a multi-agent context
2240#[derive(Debug, Clone, Serialize, Deserialize)]
2241#[cfg_attr(feature = "typescript", derive(Tsify))]
2242#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2243pub struct AgentCommunication {
2244    /// ID of the agent sending the message
2245    pub from_agent: String,
2246    /// ID of the agent receiving the message (None for broadcast)
2247    pub to_agent: Option<String>,
2248    /// The communication message
2249    pub message: String,
2250    /// When this communication occurred
2251    pub timestamp: chrono::DateTime<chrono::Utc>,
2252}
2253
2254// ============================================================================
2255// Dynamic Ontology Types - Schema-First Knowledge Graph with Grounding
2256// ============================================================================
2257
2258/// Normalization method used for grounding
2259#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2260#[serde(rename_all = "snake_case")]
2261pub enum NormalizationMethod {
2262    /// Exact match via Aho-Corasick
2263    #[default]
2264    Exact,
2265    /// Fuzzy match via Levenshtein or Jaro-Winkler
2266    Fuzzy,
2267    /// Graph rank-based prioritization
2268    GraphRank,
2269}
2270
2271/// Grounding metadata for normalized terms (Dynamic Ontology)
2272#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2273pub struct GroundingMetadata {
2274    /// Canonical URI from ontology (NCIt, HGNC, etc.)
2275    pub normalized_uri: Option<String>,
2276    /// Human-friendly label for display
2277    pub normalized_label: Option<String>,
2278    /// Source ontology (NCIt, HGNC, custom)
2279    pub normalized_prov: Option<String>,
2280    /// Similarity/confidence score (0.0 - 1.0)
2281    pub normalized_score: Option<f32>,
2282    /// Method used for normalization
2283    pub normalized_method: Option<NormalizationMethod>,
2284}
2285
2286impl GroundingMetadata {
2287    /// Create new grounding metadata with URI and score
2288    pub fn new(
2289        uri: String,
2290        label: String,
2291        prov: String,
2292        score: f32,
2293        method: NormalizationMethod,
2294    ) -> Self {
2295        Self {
2296            normalized_uri: Some(uri),
2297            normalized_label: Some(label),
2298            normalized_prov: Some(prov),
2299            normalized_score: Some(score),
2300            normalized_method: Some(method),
2301        }
2302    }
2303}
2304
2305/// Coverage governance signal
2306#[derive(Debug, Clone, Serialize, Deserialize)]
2307pub struct CoverageSignal {
2308    /// Total categories in extracted schema
2309    pub total_categories: usize,
2310    /// Categories matched in ontology catalog
2311    pub matched_categories: usize,
2312    /// Coverage ratio = matched/total
2313    pub coverage_ratio: f32,
2314    /// Threshold for needing review
2315    pub threshold: f32,
2316    /// Whether this needs human review
2317    pub needs_review: bool,
2318}
2319
2320impl CoverageSignal {
2321    /// Compute coverage signal from categories and matched count
2322    pub fn compute(categories: &[String], matched: usize, threshold: f32) -> Self {
2323        let total = categories.len();
2324        let ratio = if total > 0 {
2325            matched as f32 / total as f32
2326        } else {
2327            0.0
2328        };
2329        Self {
2330            total_categories: total,
2331            matched_categories: matched,
2332            coverage_ratio: ratio,
2333            threshold,
2334            needs_review: ratio < threshold,
2335        }
2336    }
2337}
2338
2339/// Entity types for oncology schema (feature-gated)
2340#[cfg(feature = "medical")]
2341#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2342#[serde(rename_all = "snake_case")]
2343pub enum EntityType {
2344    CancerDiagnosis,
2345    Tumor,
2346    GenomicVariant,
2347    Biomarker,
2348    Drug,
2349    Treatment,
2350    SideEffect,
2351}
2352
2353/// Relationship types for oncology schema (feature-gated)
2354#[cfg(feature = "medical")]
2355#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2356#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
2357pub enum RelationshipType {
2358    HasTumor,
2359    HasVariant,
2360    HasBiomarker,
2361    TreatedWith,
2362    Causes,
2363    HasDiagnosis,
2364}
2365
2366/// Extracted entity from text
2367#[derive(Debug, Clone, Serialize, Deserialize)]
2368pub struct ExtractedEntity {
2369    /// Type of entity (string for generic cross-domain use)
2370    pub entity_type: String,
2371    /// Raw value from text
2372    pub raw_value: String,
2373    /// Normalized value if available
2374    pub normalized_value: Option<String>,
2375    /// Grounding metadata
2376    pub grounding: Option<GroundingMetadata>,
2377}
2378
2379/// Extracted relationship from text
2380#[derive(Debug, Clone, Serialize, Deserialize)]
2381pub struct ExtractedRelationship {
2382    /// Type of relationship (string for generic cross-domain use)
2383    pub relationship_type: String,
2384    /// Source entity
2385    pub source: String,
2386    /// Target entity
2387    pub target: String,
2388    /// Confidence score
2389    pub confidence: f32,
2390}
2391
2392/// Schema signal extracted from text
2393#[derive(Debug, Clone, Serialize, Deserialize)]
2394pub struct SchemaSignal {
2395    /// Extracted entities
2396    pub entities: Vec<ExtractedEntity>,
2397    /// Extracted relationships
2398    pub relationships: Vec<ExtractedRelationship>,
2399    /// Overall confidence score
2400    pub confidence: f32,
2401}
2402
2403// ============================================================================
2404// Ontology Schema Types - Schema-First Knowledge Graph Definition (#547)
2405// ============================================================================
2406
2407/// Entity type definition in an ontology schema
2408#[derive(Debug, Clone, Serialize, Deserialize)]
2409pub struct OntologyEntityType {
2410    /// Unique identifier within the schema (e.g., "chapter", "concept", "author")
2411    pub id: String,
2412    /// Human-readable label
2413    pub label: String,
2414    /// Canonical URI prefix for grounding (e.g., "https://schema.org/Chapter")
2415    #[serde(default)]
2416    pub uri_prefix: Option<String>,
2417    /// Alternative names / synonyms for matching
2418    #[serde(default)]
2419    pub aliases: Vec<String>,
2420    /// Category for coverage grouping (e.g., "core", "supporting", "optional")
2421    #[serde(default)]
2422    pub category: Option<String>,
2423}
2424
2425/// Relationship type definition in an ontology schema
2426#[derive(Debug, Clone, Serialize, Deserialize)]
2427pub struct OntologyRelationshipType {
2428    /// Relationship identifier (e.g., "references", "defines")
2429    pub id: String,
2430    /// Human-readable label
2431    pub label: String,
2432    /// Source entity type ID
2433    pub source_type: String,
2434    /// Target entity type ID
2435    pub target_type: String,
2436}
2437
2438/// Anti-pattern definition for detection
2439#[derive(Debug, Clone, Serialize, Deserialize)]
2440pub struct OntologyAntiPattern {
2441    /// Anti-pattern identifier
2442    pub id: String,
2443    /// Description of what this anti-pattern represents
2444    pub description: String,
2445    /// Terms that indicate this anti-pattern
2446    pub indicators: Vec<String>,
2447}
2448
2449/// Schema-first ontology definition
2450///
2451/// Loaded from JSON file, used to build thesaurus for extraction.
2452#[derive(Debug, Clone, Serialize, Deserialize)]
2453pub struct OntologySchema {
2454    /// Schema name
2455    pub name: String,
2456    /// Schema version
2457    pub version: String,
2458    /// Entity type definitions
2459    pub entity_types: Vec<OntologyEntityType>,
2460    /// Relationship type definitions
2461    #[serde(default)]
2462    pub relationship_types: Vec<OntologyRelationshipType>,
2463    /// Anti-patterns to detect
2464    #[serde(default)]
2465    pub anti_patterns: Vec<OntologyAntiPattern>,
2466}
2467
2468impl OntologySchema {
2469    /// Load schema from JSON file
2470    pub fn load_from_file(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
2471        let content = std::fs::read_to_string(path)?;
2472        let schema: Self = serde_json::from_str(&content)?;
2473        Ok(schema)
2474    }
2475
2476    /// Build thesaurus entries from schema entity types + aliases
2477    ///
2478    /// Each entity type label and its aliases become thesaurus entries
2479    /// with the URI prefix as the URL for grounding.
2480    /// Returns tuples of (id, term, url).
2481    pub fn to_thesaurus_entries(&self) -> Vec<(String, String, Option<String>)> {
2482        let mut entries = Vec::new();
2483        for entity_type in &self.entity_types {
2484            let url = entity_type
2485                .uri_prefix
2486                .clone()
2487                .unwrap_or_else(|| format!("kg://{}", entity_type.id));
2488            // Primary label
2489            entries.push((
2490                entity_type.id.clone(),
2491                entity_type.label.clone(),
2492                Some(url.clone()),
2493            ));
2494            // Aliases
2495            for alias in &entity_type.aliases {
2496                entries.push((entity_type.id.clone(), alias.clone(), Some(url.clone())));
2497            }
2498        }
2499        entries
2500    }
2501
2502    /// Get all entity type IDs for coverage calculation
2503    pub fn category_ids(&self) -> Vec<String> {
2504        self.entity_types.iter().map(|e| e.id.clone()).collect()
2505    }
2506
2507    /// Get URI for a matched entity type ID
2508    pub fn uri_for(&self, entity_type_id: &str) -> Option<String> {
2509        self.entity_types
2510            .iter()
2511            .find(|e| e.id == entity_type_id)
2512            .and_then(|e| e.uri_prefix.clone())
2513    }
2514}
2515
2516#[cfg(test)]
2517mod tests {
2518    use super::*;
2519
2520    #[test]
2521    fn test_search_query_logical_operators() {
2522        // Test single term query (backward compatibility)
2523        let single_query = SearchQuery {
2524            search_term: NormalizedTermValue::new("rust".to_string()),
2525            search_terms: None,
2526            operator: None,
2527            skip: None,
2528            limit: Some(10),
2529            role: Some(RoleName::new("test")),
2530            layer: Layer::default(),
2531        };
2532
2533        assert!(!single_query.is_multi_term_query());
2534        assert_eq!(single_query.get_all_terms().len(), 1);
2535        assert_eq!(single_query.get_operator(), LogicalOperator::Or); // Default
2536
2537        // Test multi-term query with AND operator
2538        let and_query = SearchQuery::with_terms_and_operator(
2539            NormalizedTermValue::new("machine".to_string()),
2540            vec![NormalizedTermValue::new("learning".to_string())],
2541            LogicalOperator::And,
2542            Some(RoleName::new("test")),
2543        );
2544
2545        assert!(and_query.is_multi_term_query());
2546        assert_eq!(and_query.get_all_terms().len(), 2);
2547        assert_eq!(and_query.get_operator(), LogicalOperator::And);
2548
2549        // Test multi-term query with OR operator
2550        let or_query = SearchQuery::with_terms_and_operator(
2551            NormalizedTermValue::new("neural".to_string()),
2552            vec![NormalizedTermValue::new("networks".to_string())],
2553            LogicalOperator::Or,
2554            Some(RoleName::new("test")),
2555        );
2556
2557        assert!(or_query.is_multi_term_query());
2558        assert_eq!(or_query.get_all_terms().len(), 2);
2559        assert_eq!(or_query.get_operator(), LogicalOperator::Or);
2560    }
2561
2562    #[test]
2563    fn test_logical_operator_serialization() {
2564        // Test LogicalOperator serialization
2565        let and_op = LogicalOperator::And;
2566        let or_op = LogicalOperator::Or;
2567
2568        let and_json = serde_json::to_string(&and_op).unwrap();
2569        let or_json = serde_json::to_string(&or_op).unwrap();
2570
2571        assert_eq!(and_json, "\"and\"");
2572        assert_eq!(or_json, "\"or\"");
2573
2574        // Test deserialization
2575        let and_deser: LogicalOperator = serde_json::from_str("\"and\"").unwrap();
2576        let or_deser: LogicalOperator = serde_json::from_str("\"or\"").unwrap();
2577
2578        assert_eq!(and_deser, LogicalOperator::And);
2579        assert_eq!(or_deser, LogicalOperator::Or);
2580    }
2581
2582    #[test]
2583    fn test_search_query_serialization() {
2584        let query = SearchQuery {
2585            search_term: NormalizedTermValue::new("test".to_string()),
2586            search_terms: Some(vec![
2587                NormalizedTermValue::new("additional".to_string()),
2588                NormalizedTermValue::new("terms".to_string()),
2589            ]),
2590            operator: Some(LogicalOperator::And),
2591            skip: Some(0),
2592            limit: Some(10),
2593            role: Some(RoleName::new("test_role")),
2594            layer: Layer::default(),
2595        };
2596
2597        let json = serde_json::to_string(&query).unwrap();
2598        let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
2599
2600        assert_eq!(query.search_term, deserialized.search_term);
2601        assert_eq!(query.search_terms, deserialized.search_terms);
2602        assert_eq!(query.operator, deserialized.operator);
2603        assert_eq!(query.skip, deserialized.skip);
2604        assert_eq!(query.limit, deserialized.limit);
2605        assert_eq!(query.role, deserialized.role);
2606    }
2607
2608    #[test]
2609    fn test_priority_creation_and_comparison() {
2610        let high = Priority::HIGH;
2611        let medium = Priority::MEDIUM;
2612        let low = Priority::LOW;
2613        let custom = Priority::new(75);
2614
2615        assert_eq!(high.value(), 80);
2616        assert_eq!(medium.value(), 50);
2617        assert_eq!(low.value(), 20);
2618        assert_eq!(custom.value(), 75);
2619
2620        assert!(high.is_high());
2621        assert!(!medium.is_high());
2622        assert!(medium.is_medium());
2623        assert!(low.is_low());
2624
2625        // Test ordering
2626        assert!(high > medium);
2627        assert!(medium > low);
2628        assert!(custom > medium);
2629        assert!(custom < high);
2630
2631        // Test bounds
2632        let max = Priority::new(150);
2633        assert_eq!(max.value(), 100);
2634        let min = Priority::new(0);
2635        assert_eq!(min.value(), 0);
2636    }
2637
2638    #[test]
2639    fn test_routing_rule_creation() {
2640        let rule = RoutingRule::new(
2641            "test-rule".to_string(),
2642            "Test Rule".to_string(),
2643            "test.*pattern".to_string(),
2644            Priority::HIGH,
2645            "openai".to_string(),
2646            "gpt-4".to_string(),
2647        )
2648        .with_description("A test rule for unit testing".to_string())
2649        .with_tag("test".to_string())
2650        .with_tag("example".to_string());
2651
2652        assert_eq!(rule.id, "test-rule");
2653        assert_eq!(rule.name, "Test Rule");
2654        assert_eq!(rule.pattern, "test.*pattern");
2655        assert_eq!(rule.priority, Priority::HIGH);
2656        assert_eq!(rule.provider, "openai");
2657        assert_eq!(rule.model, "gpt-4");
2658        assert_eq!(
2659            rule.description,
2660            Some("A test rule for unit testing".to_string())
2661        );
2662        assert_eq!(rule.tags, vec!["test", "example"]);
2663        assert!(rule.enabled);
2664    }
2665
2666    #[test]
2667    fn test_routing_rule_defaults() {
2668        let rule = RoutingRule::with_defaults(
2669            "default-rule".to_string(),
2670            "Default Rule".to_string(),
2671            "default".to_string(),
2672            "anthropic".to_string(),
2673            "claude-3-sonnet".to_string(),
2674        );
2675
2676        assert_eq!(rule.priority, Priority::MEDIUM);
2677        assert!(rule.enabled);
2678        assert!(rule.tags.is_empty());
2679        assert!(rule.description.is_none());
2680    }
2681
2682    #[test]
2683    fn test_pattern_match() {
2684        let pattern_match = PatternMatch::new(
2685            "machine-learning".to_string(),
2686            "openai".to_string(),
2687            "gpt-4".to_string(),
2688            0.95,
2689            Priority::HIGH,
2690            "ml-rule".to_string(),
2691        );
2692
2693        assert_eq!(pattern_match.concept, "machine-learning");
2694        assert_eq!(pattern_match.provider, "openai");
2695        assert_eq!(pattern_match.model, "gpt-4");
2696        assert_eq!(pattern_match.score, 0.95);
2697        assert_eq!(pattern_match.priority, Priority::HIGH);
2698        assert_eq!(pattern_match.rule_id, "ml-rule");
2699
2700        // Weighted score should be score * priority_factor
2701        assert_eq!(pattern_match.weighted_score, 0.95 * 0.8);
2702    }
2703
2704    #[test]
2705    fn test_pattern_match_simple() {
2706        let simple = PatternMatch::simple(
2707            "test".to_string(),
2708            "anthropic".to_string(),
2709            "claude-3-haiku".to_string(),
2710            0.8,
2711        );
2712
2713        assert_eq!(simple.priority, Priority::MEDIUM);
2714        assert_eq!(simple.rule_id, "default");
2715        assert_eq!(simple.weighted_score, 0.8 * 0.5);
2716    }
2717
2718    #[test]
2719    fn test_routing_decision() {
2720        let decision = RoutingDecision::new(
2721            "openai".to_string(),
2722            "gpt-4".to_string(),
2723            RoutingScenario::Think,
2724            Priority::HIGH,
2725            0.9,
2726            "High priority thinking task".to_string(),
2727        );
2728
2729        assert_eq!(decision.provider, "openai");
2730        assert_eq!(decision.model, "gpt-4");
2731        assert_eq!(decision.scenario, RoutingScenario::Think);
2732        assert_eq!(decision.priority, Priority::HIGH);
2733        assert_eq!(decision.confidence, 0.9);
2734        assert_eq!(decision.reason, "High priority thinking task");
2735        assert!(decision.rule_id.is_none());
2736    }
2737
2738    #[test]
2739    fn test_routing_decision_with_rule() {
2740        let decision = RoutingDecision::with_rule(
2741            "anthropic".to_string(),
2742            "claude-3-sonnet".to_string(),
2743            RoutingScenario::Pattern("web-search".to_string()),
2744            Priority::MEDIUM,
2745            0.85,
2746            "web-rule".to_string(),
2747            "Web search pattern matched".to_string(),
2748        );
2749
2750        assert_eq!(decision.rule_id, Some("web-rule".to_string()));
2751        assert_eq!(
2752            decision.scenario,
2753            RoutingScenario::Pattern("web-search".to_string())
2754        );
2755    }
2756
2757    #[test]
2758    fn test_routing_decision_default() {
2759        let default = RoutingDecision::default("openai".to_string(), "gpt-3.5-turbo".to_string());
2760
2761        assert_eq!(default.provider, "openai");
2762        assert_eq!(default.model, "gpt-3.5-turbo");
2763        assert_eq!(default.scenario, RoutingScenario::Default);
2764        assert_eq!(default.priority, Priority::LOW);
2765        assert_eq!(default.confidence, 0.5);
2766        assert_eq!(default.reason, "Default routing");
2767    }
2768
2769    #[test]
2770    fn test_routing_scenario_serialization() {
2771        let scenarios = vec![
2772            RoutingScenario::Default,
2773            RoutingScenario::Background,
2774            RoutingScenario::Think,
2775            RoutingScenario::LongContext,
2776            RoutingScenario::WebSearch,
2777            RoutingScenario::Image,
2778            RoutingScenario::Pattern("test".to_string()),
2779            RoutingScenario::Priority,
2780            RoutingScenario::Custom("special".to_string()),
2781        ];
2782
2783        for scenario in scenarios {
2784            let json = serde_json::to_string(&scenario).unwrap();
2785            let deserialized: RoutingScenario = serde_json::from_str(&json).unwrap();
2786            assert_eq!(scenario, deserialized);
2787        }
2788    }
2789
2790    #[test]
2791    fn test_routing_scenario_display() {
2792        assert_eq!(format!("{}", RoutingScenario::Default), "default");
2793        assert_eq!(format!("{}", RoutingScenario::Think), "think");
2794        assert_eq!(
2795            format!("{}", RoutingScenario::Pattern("ml".to_string())),
2796            "pattern:ml"
2797        );
2798        assert_eq!(
2799            format!("{}", RoutingScenario::Custom("test".to_string())),
2800            "custom:test"
2801        );
2802    }
2803
2804    #[test]
2805    fn test_priority_serialization() {
2806        let priority = Priority::new(75);
2807        let json = serde_json::to_string(&priority).unwrap();
2808        let deserialized: Priority = serde_json::from_str(&json).unwrap();
2809        assert_eq!(priority, deserialized);
2810        assert_eq!(deserialized.value(), 75);
2811    }
2812
2813    #[test]
2814    fn test_routing_rule_serialization() {
2815        let rule = RoutingRule::new(
2816            "serialize-test".to_string(),
2817            "Serialize Test".to_string(),
2818            "test-pattern".to_string(),
2819            Priority::MEDIUM,
2820            "provider".to_string(),
2821            "model".to_string(),
2822        );
2823
2824        let json = serde_json::to_string(&rule).unwrap();
2825        let deserialized: RoutingRule = serde_json::from_str(&json).unwrap();
2826        assert_eq!(rule.id, deserialized.id);
2827        assert_eq!(rule.name, deserialized.name);
2828        assert_eq!(rule.priority, deserialized.priority);
2829        assert_eq!(rule.provider, deserialized.provider);
2830        assert_eq!(rule.model, deserialized.model);
2831    }
2832
2833    #[test]
2834    fn test_document_type_serialization() {
2835        let types = vec![
2836            DocumentType::KgEntry,
2837            DocumentType::Document,
2838            DocumentType::ConfigDocument,
2839        ];
2840
2841        for doc_type in types {
2842            let json = serde_json::to_string(&doc_type).unwrap();
2843            let deserialized: DocumentType = serde_json::from_str(&json).unwrap();
2844            assert_eq!(doc_type, deserialized);
2845        }
2846    }
2847
2848    #[test]
2849    fn test_document_defaults_for_new_fields() {
2850        let json = r#"{
2851            "id":"doc-1",
2852            "url":"file:///tmp/doc.md",
2853            "title":"Doc",
2854            "body":"Body"
2855        }"#;
2856
2857        let doc: Document = serde_json::from_str(json).unwrap();
2858        assert_eq!(doc.doc_type, DocumentType::KgEntry);
2859        assert!(doc.synonyms.is_none());
2860        assert!(doc.route.is_none());
2861        assert!(doc.priority.is_none());
2862    }
2863
2864    #[test]
2865    fn test_ontology_schema_deserialize() {
2866        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
2867        let schema: OntologySchema = serde_json::from_str(json).unwrap();
2868        assert_eq!(schema.name, "Publishing Domain Model");
2869        assert_eq!(schema.version, "1.0.0");
2870        assert_eq!(schema.entity_types.len(), 3);
2871        assert_eq!(schema.relationship_types.len(), 1);
2872        assert_eq!(schema.anti_patterns.len(), 1);
2873    }
2874
2875    #[test]
2876    fn test_ontology_schema_to_thesaurus_entries() {
2877        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
2878        let schema: OntologySchema = serde_json::from_str(json).unwrap();
2879        let entries = schema.to_thesaurus_entries();
2880        // 3 primary labels + 2 + 2 + 3 aliases = 10 entries
2881        assert_eq!(entries.len(), 10);
2882        // Check that primary labels are present
2883        assert!(entries.iter().any(|(_, term, _)| term == "Chapter"));
2884        assert!(entries.iter().any(|(_, term, _)| term == "Concept"));
2885        assert!(entries.iter().any(|(_, term, _)| term == "Knowledge Graph"));
2886        // Check that aliases are present
2887        assert!(entries.iter().any(|(_, term, _)| term == "section"));
2888        assert!(entries.iter().any(|(_, term, _)| term == "KG"));
2889        // Check URIs are populated
2890        assert!(entries.iter().all(|(_, _, url)| url.is_some()));
2891    }
2892
2893    #[test]
2894    fn test_ontology_schema_category_ids() {
2895        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
2896        let schema: OntologySchema = serde_json::from_str(json).unwrap();
2897        let ids = schema.category_ids();
2898        assert_eq!(ids.len(), 3);
2899        assert!(ids.contains(&"chapter".to_string()));
2900        assert!(ids.contains(&"concept".to_string()));
2901        assert!(ids.contains(&"knowledge_graph".to_string()));
2902    }
2903
2904    #[test]
2905    fn test_ontology_schema_uri_for() {
2906        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
2907        let schema: OntologySchema = serde_json::from_str(json).unwrap();
2908        assert_eq!(
2909            schema.uri_for("chapter"),
2910            Some("https://schema.org/Chapter".to_string())
2911        );
2912        assert_eq!(
2913            schema.uri_for("concept"),
2914            Some("https://schema.org/DefinedTerm".to_string())
2915        );
2916        assert_eq!(schema.uri_for("nonexistent"), None);
2917    }
2918
2919    #[test]
2920    fn test_ontology_schema_minimal() {
2921        // Minimal schema with only required fields
2922        let json = r#"{
2923            "name": "Minimal",
2924            "version": "0.1.0",
2925            "entity_types": [
2926                {"id": "item", "label": "Item"}
2927            ]
2928        }"#;
2929        let schema: OntologySchema = serde_json::from_str(json).unwrap();
2930        assert_eq!(schema.name, "Minimal");
2931        assert_eq!(schema.entity_types.len(), 1);
2932        assert!(schema.relationship_types.is_empty());
2933        assert!(schema.anti_patterns.is_empty());
2934        assert!(schema.entity_types[0].aliases.is_empty());
2935        assert!(schema.entity_types[0].uri_prefix.is_none());
2936    }
2937
2938    #[test]
2939    fn test_layer_enum() {
2940        // Test default is Layer::One
2941        let default: Layer = Default::default();
2942        assert_eq!(default, Layer::One);
2943
2944        // Test from_u8
2945        assert_eq!(Layer::from_u8(1), Some(Layer::One));
2946        assert_eq!(Layer::from_u8(2), Some(Layer::Two));
2947        assert_eq!(Layer::from_u8(3), Some(Layer::Three));
2948        assert_eq!(Layer::from_u8(0), None);
2949        assert_eq!(Layer::from_u8(4), None);
2950
2951        // Test Display
2952        assert_eq!(format!("{}", Layer::One), "1");
2953        assert_eq!(format!("{}", Layer::Two), "2");
2954        assert_eq!(format!("{}", Layer::Three), "3");
2955
2956        // Test includes_content
2957        assert!(!Layer::One.includes_content());
2958        assert!(Layer::Two.includes_content());
2959        assert!(Layer::Three.includes_content());
2960
2961        // Test includes_full_content
2962        assert!(!Layer::One.includes_full_content());
2963        assert!(!Layer::Two.includes_full_content());
2964        assert!(Layer::Three.includes_full_content());
2965    }
2966
2967    #[test]
2968    fn test_extract_first_paragraph_simple() {
2969        let body = "First paragraph here.\n\nSecond paragraph here.";
2970        assert_eq!(extract_first_paragraph(body), "First paragraph here.");
2971    }
2972
2973    #[test]
2974    fn test_extract_first_paragraph_with_yaml_frontmatter() {
2975        let body = "---\ntitle: My Document\ntags: [rust, programming]\n---\n\nThis is the actual first paragraph.\nMore content here.";
2976        assert_eq!(
2977            extract_first_paragraph(body),
2978            "This is the actual first paragraph."
2979        );
2980    }
2981
2982    #[test]
2983    fn test_extract_first_paragraph_empty_lines() {
2984        let body = "\n\n\nFirst paragraph after empty lines.";
2985        assert_eq!(
2986            extract_first_paragraph(body),
2987            "First paragraph after empty lines."
2988        );
2989    }
2990
2991    #[test]
2992    fn test_extract_first_paragraph_single_line() {
2993        let body = "Just one line";
2994        assert_eq!(extract_first_paragraph(body), "Just one line");
2995    }
2996
2997    #[test]
2998    fn test_layer_serialization() {
2999        // Test that Layer serializes correctly
3000        let query = SearchQuery {
3001            search_term: NormalizedTermValue::new("test".to_string()),
3002            search_terms: None,
3003            operator: None,
3004            skip: None,
3005            limit: None,
3006            role: None,
3007            layer: Layer::Two,
3008        };
3009
3010        let json = serde_json::to_string(&query).unwrap();
3011        assert!(json.contains("\"layer\""));
3012
3013        // Deserialize and check layer is preserved
3014        let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
3015        assert_eq!(deserialized.layer, Layer::Two);
3016    }
3017
3018    #[test]
3019    fn test_quality_score_composite() {
3020        // Test with all three scores
3021        let full_score = QualityScore {
3022            knowledge: Some(0.8),
3023            learning: Some(0.6),
3024            synthesis: Some(0.7),
3025        };
3026        assert!((full_score.composite() - 0.7).abs() < f64::EPSILON); // (0.8 + 0.6 + 0.7) / 3
3027
3028        // Test with two scores
3029        let partial_score = QualityScore {
3030            knowledge: Some(0.9),
3031            learning: None,
3032            synthesis: Some(0.5),
3033        };
3034        assert!((partial_score.composite() - 0.7).abs() < f64::EPSILON); // (0.9 + 0.5) / 2
3035
3036        // Test with one score
3037        let single_score = QualityScore {
3038            knowledge: Some(0.8),
3039            learning: None,
3040            synthesis: None,
3041        };
3042        assert!((single_score.composite() - 0.8).abs() < f64::EPSILON);
3043
3044        // Test with no scores (default)
3045        let empty_score = QualityScore::default();
3046        assert_eq!(empty_score.composite(), 0.0);
3047    }
3048
3049    #[test]
3050    fn test_quality_score_serialization() {
3051        let score = QualityScore {
3052            knowledge: Some(0.8),
3053            learning: Some(0.6),
3054            synthesis: Some(0.7),
3055        };
3056
3057        let json = serde_json::to_string(&score).unwrap();
3058        assert!(json.contains("0.8"));
3059        assert!(json.contains("0.6"));
3060        assert!(json.contains("0.7"));
3061
3062        let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
3063        assert_eq!(deserialized.knowledge, Some(0.8));
3064        assert_eq!(deserialized.learning, Some(0.6));
3065        assert_eq!(deserialized.synthesis, Some(0.7));
3066    }
3067
3068    #[test]
3069    fn test_quality_score_default_serialization() {
3070        // Test that default QualityScore serializes/deserializes correctly
3071        let score = QualityScore::default();
3072        let json = serde_json::to_string(&score).unwrap();
3073        let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
3074        assert!(deserialized.knowledge.is_none());
3075        assert!(deserialized.learning.is_none());
3076        assert!(deserialized.synthesis.is_none());
3077    }
3078
3079    #[test]
3080    fn test_indexed_document_with_quality_score() {
3081        let doc = IndexedDocument {
3082            id: "test-doc-1".to_string(),
3083            matched_edges: vec![],
3084            rank: 10,
3085            tags: vec!["rust".to_string()],
3086            nodes: vec![1, 2],
3087            quality_score: Some(QualityScore {
3088                knowledge: Some(0.8),
3089                learning: Some(0.6),
3090                synthesis: Some(0.7),
3091            }),
3092        };
3093
3094        assert_eq!(doc.id, "test-doc-1");
3095        assert!((doc.quality_score.as_ref().unwrap().composite() - 0.7).abs() < f64::EPSILON);
3096    }
3097
3098    #[test]
3099    fn test_indexed_document_from_document_quality_score_none() {
3100        let doc = Document {
3101            id: "doc-1".to_string(),
3102            url: "https://example.com".to_string(),
3103            title: "Test".to_string(),
3104            body: "Body".to_string(),
3105            description: None,
3106            summarization: None,
3107            stub: None,
3108            tags: None,
3109            rank: None,
3110            source_haystack: None,
3111            doc_type: DocumentType::Document,
3112            synonyms: None,
3113            route: None,
3114            priority: None,
3115        };
3116
3117        let indexed = IndexedDocument::from_document(doc);
3118        assert!(indexed.quality_score.is_none());
3119    }
3120
3121    #[test]
3122    fn test_indexed_document_serialization_backward_compat() {
3123        // Test that IndexedDocument without quality_score deserializes correctly
3124        // This simulates old data that doesn't have the quality_score field
3125        // NOTE: node IDs are u64 integers
3126        let json = r#"{
3127            "id": "doc-1",
3128            "matched_edges": [],
3129            "rank": 5,
3130            "tags": ["test"],
3131            "nodes": [1]
3132        }"#;
3133
3134        let doc: IndexedDocument = serde_json::from_str(json).unwrap();
3135        assert_eq!(doc.id, "doc-1");
3136        assert!(doc.quality_score.is_none());
3137    }
3138}