Skip to main content

terraphim_types/
lib.rs

1//! Core type definitions for the Terraphim AI system.
2//!
3//! This crate provides the fundamental data structures used throughout the Terraphim ecosystem:
4//!
5//! - **Knowledge Graph Types**: [`Concept`], [`Node`], [`Edge`], [`Thesaurus`]
6//! - **Document Management**: [`Document`], [`Index`], [`IndexedDocument`]
7//! - **Search Operations**: [`SearchQuery`], [`LogicalOperator`], [`RelevanceFunction`]
8//! - **Conversation Context**: [`Conversation`], [`ChatMessage`], [`ContextItem`]
9//! - **LLM Routing**: [`RoutingRule`], [`RoutingDecision`], [`Priority`]
10//! - **Multi-Agent Coordination**: [`MultiAgentContext`], [`AgentInfo`]
11//! - **Dynamic Ontology**: [`SchemaSignal`], [`ExtractedEntity`], [`CoverageSignal`], [`GroundingMetadata`]
12//! - **HGNC Gene Normalization**: `HgncGene`, `HgncNormalizer` (requires `hgnc` feature)
13//!
14//! # Features
15//!
16//! - `typescript`: Enable TypeScript type generation via tsify for WASM compatibility
17//!
18//! # Examples
19//!
20//! ## Creating a Search Query
21//!
22//! ```
23//! use terraphim_types::{SearchQuery, NormalizedTermValue, Layer, LogicalOperator, RoleName};
24//!
25//! // Simple single-term query
26//! let query = SearchQuery {
27//!     search_term: NormalizedTermValue::from("rust"),
28//!     search_terms: None,
29//!     operator: None,
30//!     skip: None,
31//!     limit: Some(10),
32//!     role: Some(RoleName::new("engineer")),
33//!     layer: Layer::default(),
34//!     include_pinned: false,
35//!     min_quality: None,
36//! };
37//!
38//! // Multi-term AND query
39//! let multi_query = SearchQuery::with_terms_and_operator(
40//!     NormalizedTermValue::from("async"),
41//!     vec![NormalizedTermValue::from("programming")],
42//!     LogicalOperator::And,
43//!     Some(RoleName::new("engineer")),
44//! );
45//! ```
46//!
47//! ## Working with Documents
48//!
49//! ```
50//! use terraphim_types::{Document, DocumentType};
51//!
52//! let document = Document {
53//!     id: "doc-1".to_string(),
54//!     url: "https://example.com/article".to_string(),
55//!     title: "Introduction to Rust".to_string(),
56//!     body: "Rust is a systems programming language...".to_string(),
57//!     description: Some("A guide to Rust".to_string()),
58//!     summarization: None,
59//!     stub: None,
60//!     tags: Some(vec!["rust".to_string(), "programming".to_string()]),
61//!     rank: None,
62//!     source_haystack: None,
63//!     doc_type: DocumentType::KgEntry,
64//!     synonyms: None,
65//!     route: None,
66//!     priority: None,
67//!     quality_score: None,
68//! };
69//! ```
70//!
71//! ## Building a Knowledge Graph
72//!
73//! ```
74//! use terraphim_types::{Thesaurus, NormalizedTermValue, NormalizedTerm};
75//!
76//! let mut thesaurus = Thesaurus::new("programming".to_string());
77//! thesaurus.insert(
78//!     NormalizedTermValue::from("rust"),
79//!     NormalizedTerm::with_auto_id(NormalizedTermValue::from("rust programming language"))
80//!         .with_url("https://rust-lang.org".to_string())
81//! );
82//! ```
83
84// Medical types module (feature-gated)
85#[cfg(feature = "medical")]
86pub mod medical_types;
87#[cfg(feature = "medical")]
88pub use medical_types::*;
89
90// HGNC Gene Normalization module (feature-gated)
91#[cfg(feature = "hgnc")]
92pub mod hgnc;
93
94// Shared learning types for knowledge graph integration
95#[cfg(feature = "kg-integration")]
96pub mod shared_learning;
97
98// Capability-based routing types
99pub mod capability;
100pub use capability::*;
101
102pub mod score;
103
104// MCP Tool types for self-learning system
105pub mod mcp_tool;
106pub use mcp_tool::*;
107
108// Procedure capture types for self-learning system
109pub mod procedure;
110pub use procedure::*;
111
112// Persona definition types for agent personas
113pub mod persona;
114pub use persona::{CharacteristicDef, PersonaDefinition, PersonaLoadError, SfiaSkillDef};
115
116// LLM usage tracking types for cost monitoring
117pub mod llm_usage;
118pub use llm_usage::{LlmResult, LlmUsage, ModelPricing};
119
120// Review finding types for multi-agent code review
121pub mod review;
122pub use review::{
123    FindingCategory, FindingSeverity, ReviewAgentOutput, ReviewFinding, deduplicate_findings,
124};
125
126use ahash::AHashMap;
127use serde::{Deserialize, Deserializer, Serialize, Serializer};
128use std::collections::HashSet;
129use std::collections::hash_map::Iter;
130use std::fmt::{self, Display, Formatter};
131use std::iter::IntoIterator;
132use std::ops::{Deref, DerefMut};
133use std::sync::atomic::{AtomicU64, Ordering};
134static INT_SEQ: AtomicU64 = AtomicU64::new(1);
135fn get_int_id() -> u64 {
136    INT_SEQ.fetch_add(1, Ordering::SeqCst)
137}
138
139use schemars::JsonSchema;
140use std::str::FromStr;
141#[cfg(feature = "typescript")]
142use tsify::Tsify;
143
144/// A role name with case-insensitive lookup support.
145///
146/// Stores both the original casing and a lowercase version for efficient
147/// case-insensitive operations. Roles represent different user profiles or
148/// personas in the Terraphim system, each with specific knowledge domains
149/// and search preferences.
150///
151/// Note: Equality is based on both fields, so two instances with different
152/// original casing are not equal. Use `as_lowercase()` for case-insensitive comparisons.
153///
154/// # Examples
155///
156/// ```
157/// use terraphim_types::RoleName;
158///
159/// let role = RoleName::new("DataScientist");
160/// assert_eq!(role.as_str(), "DataScientist");
161/// assert_eq!(role.as_lowercase(), "datascientist");
162///
163/// // Compare using lowercase for case-insensitive matching
164/// let role2 = RoleName::new("datascientist");
165/// assert_eq!(role.as_lowercase(), role2.as_lowercase());
166/// ```
167#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, JsonSchema)]
168#[cfg_attr(feature = "typescript", derive(Tsify))]
169#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
170pub struct RoleName {
171    /// The original role name preserving the original casing
172    pub original: String,
173    /// Lowercase version for case-insensitive comparisons
174    pub lowercase: String,
175}
176
177impl RoleName {
178    /// Creates a new role name from a string.
179    ///
180    /// # Arguments
181    ///
182    /// * `name` - The role name with any casing
183    ///
184    /// # Examples
185    ///
186    /// ```
187    /// use terraphim_types::RoleName;
188    ///
189    /// let role = RoleName::new("SoftwareEngineer");
190    /// ```
191    pub fn new(name: &str) -> Self {
192        RoleName {
193            original: name.to_string(),
194            lowercase: name.to_lowercase(),
195        }
196    }
197
198    /// Returns the lowercase version of the role name.
199    ///
200    /// Use this for case-insensitive comparisons.
201    pub fn as_lowercase(&self) -> &str {
202        &self.lowercase
203    }
204
205    /// Returns the original role name with preserved casing.
206    pub fn as_str(&self) -> &str {
207        &self.original
208    }
209}
210
211impl fmt::Display for RoleName {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        write!(f, "{}", self.original)
214    }
215}
216
217impl FromStr for RoleName {
218    type Err = ();
219
220    fn from_str(s: &str) -> Result<Self, Self::Err> {
221        Ok(RoleName::new(s))
222    }
223}
224
225impl From<&str> for RoleName {
226    fn from(s: &str) -> Self {
227        RoleName::new(s)
228    }
229}
230
231impl From<String> for RoleName {
232    fn from(s: String) -> Self {
233        RoleName::new(&s)
234    }
235}
236
237impl Serialize for RoleName {
238    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
239    where
240        S: Serializer,
241    {
242        serializer.serialize_str(&self.original)
243    }
244}
245
246impl<'de> Deserialize<'de> for RoleName {
247    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
248    where
249        D: Deserializer<'de>,
250    {
251        let s = String::deserialize(deserializer)?;
252        Ok(RoleName::new(&s))
253    }
254}
255/// The value of a normalized term
256///
257/// This is a string that has been normalized to lowercase and trimmed.
258#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
259#[cfg_attr(feature = "typescript", derive(Tsify))]
260#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
261pub struct NormalizedTermValue(String);
262
263impl NormalizedTermValue {
264    pub fn new(term: String) -> Self {
265        let value = term.trim().to_lowercase();
266        Self(value)
267    }
268    // convert to &str
269    pub fn as_str(&self) -> &str {
270        &self.0
271    }
272}
273
274impl From<String> for NormalizedTermValue {
275    fn from(term: String) -> Self {
276        Self::new(term)
277    }
278}
279
280impl From<&str> for NormalizedTermValue {
281    fn from(term: &str) -> Self {
282        Self::new(term.to_string())
283    }
284}
285
286impl Display for NormalizedTermValue {
287    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
288        write!(f, "{}", self.0)
289    }
290}
291
292impl AsRef<[u8]> for NormalizedTermValue {
293    fn as_ref(&self) -> &[u8] {
294        self.0.as_bytes()
295    }
296}
297
298/// A normalized term is a higher-level term that has been normalized
299///
300/// It holds a unique identifier to an underlying and the normalized value.
301/// The `display_value` field stores the original case for output purposes.
302#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, PartialOrd, Ord)]
303pub struct NormalizedTerm {
304    /// Unique identifier for the normalized term (u64)
305    pub id: u64,
306    /// The normalized value (lowercase, used for case-insensitive matching)
307    // This field is currently called `nterm` in the JSON
308    #[serde(rename = "nterm")]
309    pub value: NormalizedTermValue,
310    /// The display value with original case preserved (used for replacement output)
311    /// Falls back to `value` if None for backward compatibility
312    #[serde(default, skip_serializing_if = "Option::is_none")]
313    pub display_value: Option<String>,
314    /// The URL of the normalized term
315    #[serde(default, skip_serializing_if = "Option::is_none")]
316    pub url: Option<String>,
317    /// CLI action template with `{{ model }}` and `{{ prompt }}` placeholders.
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub action: Option<String>,
320    /// Routing tiebreaking priority (higher = preferred).
321    #[serde(default, skip_serializing_if = "Option::is_none")]
322    pub priority: Option<u8>,
323    /// Pattern or alias that activates this term.
324    #[serde(default, skip_serializing_if = "Option::is_none")]
325    pub trigger: Option<String>,
326    /// Whether the term is pinned.
327    #[serde(default)]
328    pub pinned: bool,
329}
330
331impl NormalizedTerm {
332    /// Create a new normalized term with the given id and value.
333    /// The display_value will be None (falls back to value for output).
334    pub fn new(id: u64, value: NormalizedTermValue) -> Self {
335        Self {
336            id,
337            value,
338            display_value: None,
339            url: None,
340            action: None,
341            priority: None,
342            trigger: None,
343            pinned: false,
344        }
345    }
346
347    /// Create a new normalized term with auto-generated ID.
348    /// The display_value will be None (falls back to value for output).
349    pub fn with_auto_id(value: NormalizedTermValue) -> Self {
350        Self {
351            id: get_int_id(),
352            value,
353            display_value: None,
354            url: None,
355            action: None,
356            priority: None,
357            trigger: None,
358            pinned: false,
359        }
360    }
361
362    /// Set the display value (original case for output).
363    /// Use this to preserve the original case from markdown headings.
364    pub fn with_display_value(mut self, display_value: String) -> Self {
365        self.display_value = Some(display_value);
366        self
367    }
368
369    /// Set the URL for this term.
370    pub fn with_url(mut self, url: String) -> Self {
371        self.url = Some(url);
372        self
373    }
374
375    /// Set the action template for this term.
376    pub fn with_action(mut self, action: String) -> Self {
377        self.action = Some(action);
378        self
379    }
380
381    /// Set the priority for this term.
382    pub fn with_priority(mut self, priority: u8) -> Self {
383        self.priority = Some(priority);
384        self
385    }
386
387    /// Set the trigger for this term.
388    pub fn with_trigger(mut self, trigger: String) -> Self {
389        self.trigger = Some(trigger);
390        self
391    }
392
393    /// Set the pinned flag for this term.
394    pub fn with_pinned(mut self, pinned: bool) -> Self {
395        self.pinned = pinned;
396        self
397    }
398
399    /// Get the display value, falling back to the normalized value if not set.
400    /// This is the value that should be used for replacement output.
401    pub fn display(&self) -> &str {
402        self.display_value
403            .as_deref()
404            .unwrap_or_else(|| self.value.as_str())
405    }
406
407    /// Get the action template.
408    pub fn action(&self) -> Option<&String> {
409        self.action.as_ref()
410    }
411
412    /// Get the priority.
413    pub fn priority(&self) -> Option<&u8> {
414        self.priority.as_ref()
415    }
416
417    /// Get the trigger.
418    pub fn trigger(&self) -> Option<&String> {
419        self.trigger.as_ref()
420    }
421
422    /// Get the pinned flag.
423    pub fn pinned(&self) -> bool {
424        self.pinned
425    }
426}
427
428/// A concept is a higher-level, normalized term.
429///
430/// It describes a unique, abstract idea in a machine-readable format.
431///
432/// An example of a concept is "machine learning" which is normalized from
433/// "Machine Learning"
434#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
435pub struct Concept {
436    /// A unique identifier for the concept (u64)
437    pub id: u64,
438    /// The normalized concept
439    pub value: NormalizedTermValue,
440}
441
442impl Concept {
443    /// Create a new concept with auto-generated ID.
444    pub fn new(value: NormalizedTermValue) -> Self {
445        Self {
446            id: get_int_id(),
447            value,
448        }
449    }
450
451    /// Create a new concept with a specific ID.
452    pub fn with_id(id: u64, value: NormalizedTermValue) -> Self {
453        Self { id, value }
454    }
455}
456
457impl From<String> for Concept {
458    fn from(concept: String) -> Self {
459        let concept = NormalizedTermValue::new(concept);
460        Self::new(concept)
461    }
462}
463
464impl Display for Concept {
465    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
466        write!(f, "{}", self.value)
467    }
468}
469
470/// Classifies a document by its role in the knowledge graph.
471#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
472#[serde(rename_all = "snake_case")]
473pub enum DocumentType {
474    #[default]
475    KgEntry,
476    Document,
477    ConfigDocument,
478}
479
480/// Routing directive specifying which LLM provider and model to use.
481#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
482pub struct RouteDirective {
483    pub provider: String,
484    pub model: String,
485    /// CLI action template with `{{ model }}` and `{{ prompt }}` placeholders.
486    #[serde(default)]
487    pub action: Option<String>,
488    /// Whether this model is free (zero cost).
489    #[serde(default)]
490    pub is_free: bool,
491}
492
493impl RouteDirective {
494    /// Extract the CLI basename from the first whitespace-delimited token of
495    /// the action template, e.g. `"opencode"` from
496    /// `"/home/alex/.bun/bin/opencode run -m {{ model }} ..."`.
497    ///
498    /// Returns `None` when the action template is missing or empty.
499    pub fn cli_basename(&self) -> Option<&str> {
500        let first = self.action.as_deref()?.split_whitespace().next()?;
501        std::path::Path::new(first).file_name()?.to_str()
502    }
503
504    /// Build a probe/health cache key from the route's `(cli, provider, model)`
505    /// triple. Used so that the same `(provider, model)` reached via two
506    /// different CLIs (e.g. opencode vs. pi-rust) has independent health.
507    ///
508    /// Returns an empty CLI segment when the action template is missing.
509    pub fn route_key(&self) -> String {
510        format!(
511            "{}:{}:{}",
512            self.cli_basename().unwrap_or(""),
513            self.provider,
514            self.model,
515        )
516    }
517}
518
519#[cfg(test)]
520mod route_directive_tests {
521    use super::*;
522
523    #[test]
524    fn cli_basename_extracts_opencode() {
525        let r = RouteDirective {
526            provider: "kimi".into(),
527            model: "kimi-for-coding/k2p6".into(),
528            action: Some(
529                "/home/alex/.bun/bin/opencode run -m {{ model }} --format json \"{{ prompt }}\""
530                    .into(),
531            ),
532            is_free: false,
533        };
534        assert_eq!(r.cli_basename(), Some("opencode"));
535    }
536
537    #[test]
538    fn cli_basename_extracts_pi_rust() {
539        let r = RouteDirective {
540            provider: "zai-coding-plan".into(),
541            model: "glm-5.1".into(),
542            action: Some(
543                "/home/alex/.local/bin/pi-rust --provider zai-coding-plan --model {{ model }} -p \"{{ prompt }}\""
544                    .into(),
545            ),
546            is_free: true,
547        };
548        assert_eq!(r.cli_basename(), Some("pi-rust"));
549    }
550
551    #[test]
552    fn cli_basename_extracts_claude() {
553        let r = RouteDirective {
554            provider: "anthropic".into(),
555            model: "opus".into(),
556            action: Some(
557                "/home/alex/.local/bin/claude --model {{ model }} -p \"{{ prompt }}\" --max-turns 50"
558                    .into(),
559            ),
560            is_free: false,
561        };
562        assert_eq!(r.cli_basename(), Some("claude"));
563    }
564
565    #[test]
566    fn cli_basename_none_when_action_missing() {
567        let r = RouteDirective {
568            provider: "x".into(),
569            model: "y".into(),
570            action: None,
571            is_free: false,
572        };
573        assert_eq!(r.cli_basename(), None);
574    }
575
576    #[test]
577    fn route_key_distinguishes_cli() {
578        let opencode_zai = RouteDirective {
579            provider: "zai-coding-plan".into(),
580            model: "glm-5.1".into(),
581            action: Some("/home/alex/.bun/bin/opencode run -m {{ model }}".into()),
582            is_free: true,
583        };
584        let pi_rust_zai = RouteDirective {
585            provider: "zai-coding-plan".into(),
586            model: "glm-5.1".into(),
587            action: Some("/home/alex/.local/bin/pi-rust --provider zai-coding-plan".into()),
588            is_free: true,
589        };
590        assert_eq!(opencode_zai.route_key(), "opencode:zai-coding-plan:glm-5.1");
591        assert_eq!(pi_rust_zai.route_key(), "pi-rust:zai-coding-plan:glm-5.1");
592        assert_ne!(opencode_zai.route_key(), pi_rust_zai.route_key());
593    }
594}
595
596/// Parsed directives extracted from the YAML front matter of a markdown KG entry.
597#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
598pub struct MarkdownDirectives {
599    #[serde(default)]
600    pub doc_type: DocumentType,
601    #[serde(default)]
602    pub synonyms: Vec<String>,
603    /// Primary route (first in the list). Kept for backward compatibility.
604    #[serde(default)]
605    pub route: Option<RouteDirective>,
606    /// All routes in priority order (primary first, fallbacks after).
607    /// Each route may have an `action::` template for CLI invocation.
608    #[serde(default)]
609    pub routes: Vec<RouteDirective>,
610    #[serde(default)]
611    pub priority: Option<u8>,
612    #[serde(default)]
613    pub trigger: Option<String>,
614    #[serde(default)]
615    pub pinned: bool,
616    /// First `# Heading` from the markdown file, preserving original case.
617    #[serde(default)]
618    pub heading: Option<String>,
619}
620
621/// The central document type representing indexed and searchable content.
622///
623/// Documents are the primary unit of content in Terraphim. They can come from
624/// various sources (local files, web pages, API responses) and are indexed for
625/// semantic search using knowledge graphs.
626///
627/// # Fields
628///
629/// * `id` - Unique identifier (typically a UUID or URL-based ID)
630/// * `url` - Source URL or file path
631/// * `title` - Document title (used for display and basic search)
632/// * `body` - Full text content
633/// * `description` - Optional short description (extracted or provided)
634/// * `summarization` - Optional AI-generated summary
635/// * `stub` - Optional brief excerpt
636/// * `tags` - Optional categorization tags (often from knowledge graph)
637/// * `rank` - Optional relevance score from search results
638/// * `source_haystack` - Optional identifier of the data source that provided this document
639///
640/// # Examples
641///
642/// ```
643/// use terraphim_types::{Document, DocumentType};
644///
645/// let doc = Document {
646///     id: "rust-book-ch1".to_string(),
647///     url: "https://doc.rust-lang.org/book/ch01-00-getting-started.html".to_string(),
648///     title: "Getting Started".to_string(),
649///     body: "Let's start your Rust journey...".to_string(),
650///     description: Some("Introduction to Rust programming".to_string()),
651///     summarization: None,
652///     stub: None,
653///     tags: Some(vec!["rust".to_string(), "tutorial".to_string()]),
654///     rank: Some(95),
655///     source_haystack: Some("rust-docs".to_string()),
656///     doc_type: DocumentType::KgEntry,
657///     synonyms: None,
658///     route: None,
659///     priority: None,
660///     quality_score: None,
661/// };
662/// ```
663#[derive(Deserialize, Serialize, Debug, Clone, Default)]
664#[cfg_attr(feature = "typescript", derive(Tsify))]
665#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
666pub struct Document {
667    /// Unique identifier for the document
668    pub id: String,
669    /// URL to the document
670    pub url: String,
671    /// Title of the document
672    pub title: String,
673    /// The document body
674    pub body: String,
675
676    /// A short description of the document (extracted from content)
677    pub description: Option<String>,
678    /// AI-generated summarization of the document content
679    pub summarization: Option<String>,
680    /// A short excerpt of the document
681    pub stub: Option<String>,
682    /// Tags for the document
683    pub tags: Option<Vec<String>>,
684    /// Rank of the document in the search results
685    pub rank: Option<u64>,
686    /// Source haystack location that this document came from
687    pub source_haystack: Option<String>,
688    /// Document classification derived from directives
689    #[serde(default)]
690    pub doc_type: DocumentType,
691    /// Synonyms extracted from directives (optional)
692    #[serde(default)]
693    pub synonyms: Option<Vec<String>>,
694    /// Optional route directive (provider/model)
695    #[serde(default)]
696    pub route: Option<RouteDirective>,
697    /// Optional priority directive (0-100)
698    #[serde(default)]
699    pub priority: Option<u8>,
700    /// Quality scores for K/L/S dimensions, populated by judge system or manual review
701    #[serde(default, skip_serializing_if = "Option::is_none")]
702    pub quality_score: Option<QualityScore>,
703}
704
705impl fmt::Display for Document {
706    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
707        // Start with title and body
708        write!(f, "{} {}", self.title, self.body)?;
709
710        // Append description if it exists
711        if let Some(ref description) = self.description {
712            write!(f, " {}", description)?;
713        }
714
715        // Append summarization if it exists and is different from description
716        if let Some(ref summarization) = self.summarization {
717            if Some(summarization) != self.description.as_ref() {
718                write!(f, " {}", summarization)?;
719            }
720        }
721
722        Ok(())
723    }
724}
725
726impl Document {
727    /// Set the source haystack for this document
728    pub fn with_source_haystack(mut self, haystack_location: String) -> Self {
729        self.source_haystack = Some(haystack_location);
730        self
731    }
732
733    /// Get the source haystack location
734    pub fn get_source_haystack(&self) -> Option<&String> {
735        self.source_haystack.as_ref()
736    }
737}
738
739/// A directed relationship between two nodes in the knowledge graph.
740#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
741pub struct Edge {
742    /// ID of the edge (u64)
743    pub id: u64,
744    /// Rank of the edge
745    pub rank: u64,
746    /// A hashmap of `document_id` to `rank`
747    pub doc_hash: AHashMap<String, u64>,
748    /// Medical edge type (only available with the `medical` feature)
749    #[cfg(feature = "medical")]
750    #[serde(default, skip_serializing_if = "Option::is_none")]
751    pub edge_type: Option<medical_types::MedicalEdgeType>,
752}
753
754impl Edge {
755    pub fn new(id: u64, document_id: String) -> Self {
756        let mut doc_hash = AHashMap::new();
757        doc_hash.insert(document_id, 1);
758        Self {
759            id,
760            rank: 1,
761            doc_hash,
762            #[cfg(feature = "medical")]
763            edge_type: None,
764        }
765    }
766}
767
768/// A `Node` represents single concept and its connections to other concepts.
769///
770/// Each node can have multiple edges to other nodes
771#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
772pub struct Node {
773    /// Unique identifier of the node (u64)
774    pub id: u64,
775    /// Number of co-occurrences
776    pub rank: u64,
777    /// List of connected edges
778    pub connected_with: HashSet<u64>,
779    /// Medical node type (only available with the `medical` feature)
780    #[cfg(feature = "medical")]
781    #[serde(default, skip_serializing_if = "Option::is_none")]
782    pub node_type: Option<medical_types::MedicalNodeType>,
783    /// Human-readable term for this node (only available with the `medical` feature)
784    #[cfg(feature = "medical")]
785    #[serde(default, skip_serializing_if = "Option::is_none")]
786    pub term: Option<String>,
787    /// SNOMED CT concept identifier (only available with the `medical` feature)
788    #[cfg(feature = "medical")]
789    #[serde(default, skip_serializing_if = "Option::is_none")]
790    pub snomed_id: Option<u64>,
791}
792
793impl Node {
794    /// Create a new node with a given id and edge
795    pub fn new(id: u64, edge: Edge) -> Self {
796        let mut connected_with = HashSet::new();
797        connected_with.insert(edge.id);
798        Self {
799            id,
800            rank: 1,
801            connected_with,
802            #[cfg(feature = "medical")]
803            node_type: None,
804            #[cfg(feature = "medical")]
805            term: None,
806            #[cfg(feature = "medical")]
807            snomed_id: None,
808        }
809    }
810
811    // pub fn sort_edges_by_value(&self) {
812    //     // let count_b: BTreeMap<&u64, &Edge> =
813    //     // self.connected_with.iter().map(|(k, v)| (v, k)).collect();
814    //     // for (k, v) in self.connected_with.iter().map(|(k, v)| (v.rank, k)) {
815    //     // log::warn!("k {:?} v {:?}", k, v);
816    //     // }
817    //     log::warn!("Connected with {:?}", self.connected_with);
818    // }
819}
820
821/// A thesaurus is a dictionary with synonyms which map to upper-level concepts.
822#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
823pub struct Thesaurus {
824    /// Name of the thesaurus
825    name: String,
826    /// The inner hashmap of normalized terms
827    data: AHashMap<NormalizedTermValue, NormalizedTerm>,
828    /// SHA-256 hash of the source markdown files used to build this thesaurus.
829    /// Used for cache invalidation: when the hash changes, the thesaurus is rebuilt.
830    #[serde(default, skip_serializing_if = "Option::is_none")]
831    pub source_hash: Option<String>,
832}
833
834impl Thesaurus {
835    /// Create a new, empty thesaurus
836    pub fn new(name: String) -> Self {
837        Self {
838            name,
839            data: AHashMap::new(),
840            source_hash: None,
841        }
842    }
843
844    /// Set the source hash for cache invalidation tracking.
845    pub fn with_source_hash(mut self, hash: String) -> Self {
846        self.source_hash = Some(hash);
847        self
848    }
849
850    /// Get the name of the thesaurus
851    pub fn name(&self) -> &str {
852        &self.name
853    }
854
855    /// Inserts a key-value pair into the thesaurus.
856    pub fn insert(&mut self, key: NormalizedTermValue, value: NormalizedTerm) {
857        self.data.insert(key, value);
858    }
859
860    /// Get the length of the thesaurus
861    pub fn len(&self) -> usize {
862        self.data.len()
863    }
864
865    /// Check if the thesaurus is empty
866    pub fn is_empty(&self) -> bool {
867        self.data.is_empty()
868    }
869
870    /// Custom `get` method for the thesaurus, which accepts a
871    /// `NormalizedTermValue` and returns a reference to the
872    /// `NormalizedTerm`.
873    pub fn get(&self, key: &NormalizedTermValue) -> Option<&NormalizedTerm> {
874        self.data.get(key)
875    }
876
877    pub fn keys(
878        &self,
879    ) -> std::collections::hash_map::Keys<'_, NormalizedTermValue, NormalizedTerm> {
880        self.data.keys()
881    }
882}
883
884// Implement `IntoIterator` for a reference to `Thesaurus`
885impl<'a> IntoIterator for &'a Thesaurus {
886    type Item = (&'a NormalizedTermValue, &'a NormalizedTerm);
887    type IntoIter = Iter<'a, NormalizedTermValue, NormalizedTerm>;
888
889    fn into_iter(self) -> Self::IntoIter {
890        self.data.iter()
891    }
892}
893
894/// An index is a hashmap of documents
895///
896/// It holds the documents that have been indexed
897/// and can be searched through using the `RoleGraph`.
898#[derive(Debug, Clone, Serialize, Deserialize)]
899pub struct Index {
900    inner: AHashMap<String, Document>,
901}
902
903impl Default for Index {
904    fn default() -> Self {
905        Self::new()
906    }
907}
908
909impl Index {
910    /// Create a new, empty index
911    pub fn new() -> Self {
912        Self {
913            inner: AHashMap::new(),
914        }
915    }
916
917    /// Converts all given indexed documents to documents
918    ///
919    /// Returns the all converted documents
920    pub fn get_documents(&self, docs: Vec<IndexedDocument>) -> Vec<Document> {
921        let mut documents: Vec<Document> = Vec::new();
922        for doc in docs {
923            log::trace!("doc: {:#?}", doc);
924            if let Some(document) = self.get_document(&doc) {
925                // Document found in cache
926                let mut document = document;
927                document.tags = Some(doc.tags.clone());
928                // rank only available for terraphim graph
929                // use scorer to populate the rank for all cases
930                document.rank = Some(doc.rank);
931                document.quality_score = doc.quality_score.clone();
932                documents.push(document.clone());
933            } else {
934                log::warn!("Document not found in cache. Cannot convert.");
935            }
936        }
937        documents
938    }
939    /// Returns all documents from the index for scorer without graph embeddings
940    pub fn get_all_documents(&self) -> Vec<Document> {
941        let documents: Vec<Document> = self.values().cloned().collect::<Vec<Document>>();
942        documents
943    }
944
945    /// Get a document from the index (if it exists in the index)
946    pub fn get_document(&self, doc: &IndexedDocument) -> Option<Document> {
947        if let Some(document) = self.inner.get(&doc.id).cloned() {
948            // Document found in cache
949            let mut document = document;
950            document.tags = Some(doc.tags.clone());
951            // Rank only available for terraphim graph
952            // use scorer to populate the rank for all cases
953            document.rank = Some(doc.rank);
954            document.quality_score = doc.quality_score.clone();
955            Some(document)
956        } else {
957            None
958        }
959    }
960}
961
962impl Deref for Index {
963    type Target = AHashMap<String, Document>;
964
965    fn deref(&self) -> &Self::Target {
966        &self.inner
967    }
968}
969
970impl DerefMut for Index {
971    fn deref_mut(&mut self) -> &mut Self::Target {
972        &mut self.inner
973    }
974}
975
976impl IntoIterator for Index {
977    type Item = (String, Document);
978    type IntoIter = std::collections::hash_map::IntoIter<String, Document>;
979
980    fn into_iter(self) -> Self::IntoIter {
981        self.inner.into_iter()
982    }
983}
984
985/// Quality scores for Knowledge/Learning/Synthesis (K/L/S) dimensions.
986///
987/// These scores represent the quality of a document across three dimensions:
988/// - Knowledge: Depth and accuracy of domain knowledge
989/// - Logic: Reasoning quality and clarity
990/// - Structure: Organisation of concepts and insight
991///
992/// All scores are optional and range from 0.0 to 1.0 when present.
993#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
994pub struct QualityScore {
995    /// Knowledge quality score (0.0-1.0)
996    pub knowledge: Option<f64>,
997    /// Logic quality score (0.0-1.0)
998    pub logic: Option<f64>,
999    /// Structure quality score (0.0-1.0)
1000    pub structure: Option<f64>,
1001    /// Timestamp when the quality was last evaluated
1002    pub last_evaluated: Option<chrono::DateTime<chrono::Utc>>,
1003}
1004
1005impl QualityScore {
1006    /// Calculate the composite score by averaging all available scores.
1007    ///
1008    /// Returns 0.0 if no scores are available.
1009    ///
1010    /// # Examples
1011    ///
1012    /// ```
1013    /// use terraphim_types::QualityScore;
1014    ///
1015    /// let score = QualityScore {
1016    ///     knowledge: Some(0.8),
1017    ///     logic: Some(0.6),
1018    ///     structure: None,
1019    ///     last_evaluated: None,
1020    /// };
1021    /// assert_eq!(score.composite(), 0.7); // (0.8 + 0.6) / 2
1022    ///
1023    /// let empty = QualityScore::default();
1024    /// assert_eq!(empty.composite(), 0.0);
1025    /// ```
1026    pub fn composite(&self) -> f64 {
1027        let mut sum = 0.0;
1028        let mut count = 0;
1029
1030        if let Some(k) = self.knowledge {
1031            sum += k;
1032            count += 1;
1033        }
1034        if let Some(l) = self.logic {
1035            sum += l;
1036            count += 1;
1037        }
1038        if let Some(s) = self.structure {
1039            sum += s;
1040            count += 1;
1041        }
1042
1043        if count == 0 { 0.0 } else { sum / count as f64 }
1044    }
1045}
1046
1047/// Reference to external storage of documents
1048#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1049pub struct IndexedDocument {
1050    /// UUID of the indexed document, matching external storage id
1051    pub id: String,
1052    /// Matched to edges
1053    pub matched_edges: Vec<Edge>,
1054    /// Graph rank (the sum of node rank, edge rank)
1055    /// Number of nodes and edges connected to the document
1056    pub rank: u64,
1057    /// Tags, which are nodes turned into concepts for human readability
1058    pub tags: Vec<String>,
1059    /// List of node IDs for validation of matching
1060    pub nodes: Vec<u64>,
1061    /// Quality scores for K/L/S dimensions
1062    #[serde(default)]
1063    pub quality_score: Option<QualityScore>,
1064}
1065
1066impl IndexedDocument {
1067    pub fn to_json_string(&self) -> Result<String, serde_json::Error> {
1068        serde_json::to_string(&self)
1069    }
1070    pub fn from_document(document: Document) -> Self {
1071        IndexedDocument {
1072            id: document.id,
1073            matched_edges: Vec::new(),
1074            rank: 0,
1075            tags: document.tags.unwrap_or_default(),
1076            nodes: Vec::new(),
1077            quality_score: None,
1078        }
1079    }
1080}
1081
1082/// Logical operators for combining multiple search terms
1083#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
1084#[cfg_attr(feature = "typescript", derive(Tsify))]
1085#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1086pub enum LogicalOperator {
1087    /// AND operator - documents must contain all terms
1088    #[serde(rename = "and")]
1089    And,
1090    /// OR operator - documents may contain any of the terms
1091    #[serde(rename = "or")]
1092    Or,
1093}
1094
1095/// Layered output levels for search results.
1096///
1097/// Controls how much content is returned per search result to optimize token usage:
1098/// - Layer 1: Title + tags only (~50 tokens/result)
1099/// - Layer 2: + first paragraph summary (~150 tokens/result)
1100/// - Layer 3: Full content (current default behaviour)
1101#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, JsonSchema)]
1102#[cfg_attr(feature = "typescript", derive(Tsify))]
1103#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1104pub enum Layer {
1105    /// Title + tags only (~50 tokens/result)
1106    #[serde(rename = "1")]
1107    #[default]
1108    One,
1109    /// + first paragraph summary (~150 tokens/result)
1110    #[serde(rename = "2")]
1111    Two,
1112    /// Full content (default)
1113    #[serde(rename = "3")]
1114    Three,
1115}
1116
1117impl Layer {
1118    /// Parse a layer from an integer value (1, 2, or 3)
1119    pub fn from_u8(value: u8) -> Option<Self> {
1120        match value {
1121            1 => Some(Layer::One),
1122            2 => Some(Layer::Two),
1123            3 => Some(Layer::Three),
1124            _ => None,
1125        }
1126    }
1127
1128    /// Returns true if this layer includes content (layer 2 or 3)
1129    pub fn includes_content(&self) -> bool {
1130        matches!(self, Layer::Two | Layer::Three)
1131    }
1132
1133    /// Returns true if this layer includes full content (layer 3)
1134    pub fn includes_full_content(&self) -> bool {
1135        matches!(self, Layer::Three)
1136    }
1137}
1138
1139impl std::fmt::Display for Layer {
1140    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1141        match self {
1142            Layer::One => write!(f, "1"),
1143            Layer::Two => write!(f, "2"),
1144            Layer::Three => write!(f, "3"),
1145        }
1146    }
1147}
1148
1149/// Extract the first paragraph from document body text.
1150///
1151/// Skips YAML frontmatter (content between `---` markers) and returns
1152/// the first non-empty line or the first paragraph.
1153pub fn extract_first_paragraph(body: &str) -> String {
1154    // Skip YAML frontmatter if present
1155    let content = if body.trim_start().starts_with("---") {
1156        // Find the end of frontmatter
1157        if let Some(end_pos) = body[3..].find("---") {
1158            &body[end_pos + 6..] // Skip past the closing ---
1159        } else {
1160            body
1161        }
1162    } else {
1163        body
1164    };
1165
1166    // Find first non-empty line
1167    for line in content.lines() {
1168        let trimmed = line.trim();
1169        if !trimmed.is_empty() {
1170            return trimmed.to_string();
1171        }
1172    }
1173
1174    // Fallback to empty string if no content found
1175    String::new()
1176}
1177
1178/// A search query for finding documents in the knowledge graph.
1179///
1180/// Supports both single-term and multi-term queries with logical operators (AND/OR).
1181/// Results can be paginated using `skip` and `limit`, and scoped to specific roles.
1182///
1183/// # Examples
1184///
1185/// ## Single-term query
1186///
1187/// ```
1188/// use terraphim_types::{SearchQuery, NormalizedTermValue, Layer, RoleName};
1189///
1190/// let query = SearchQuery {
1191///     search_term: NormalizedTermValue::from("machine learning"),
1192///     search_terms: None,
1193///     operator: None,
1194///     skip: None,
1195///     limit: Some(10),
1196///     role: Some(RoleName::new("data_scientist")),
1197///     layer: Layer::default(),
1198///     include_pinned: false,
1199///     min_quality: None,
1200/// };
1201/// ```
1202///
1203/// ## Multi-term AND query
1204///
1205/// ```
1206/// use terraphim_types::{SearchQuery, NormalizedTermValue, LogicalOperator, RoleName};
1207///
1208/// let query = SearchQuery::with_terms_and_operator(
1209///     NormalizedTermValue::from("rust"),
1210///     vec![NormalizedTermValue::from("async"), NormalizedTermValue::from("tokio")],
1211///     LogicalOperator::And,
1212///     Some(RoleName::new("engineer")),
1213/// );
1214/// assert!(query.is_multi_term_query());
1215/// assert_eq!(query.get_all_terms().len(), 3);
1216/// ```
1217#[derive(Debug, Serialize, Deserialize, Clone, Default)]
1218#[cfg_attr(feature = "typescript", derive(Tsify))]
1219#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1220pub struct SearchQuery {
1221    /// Primary search term for backward compatibility
1222    #[serde(alias = "query")]
1223    pub search_term: NormalizedTermValue,
1224    /// Multiple search terms for logical operations
1225    pub search_terms: Option<Vec<NormalizedTermValue>>,
1226    /// Logical operator for combining multiple terms (defaults to OR if not specified)
1227    pub operator: Option<LogicalOperator>,
1228    /// Number of results to skip (for pagination)
1229    pub skip: Option<usize>,
1230    /// Maximum number of results to return
1231    pub limit: Option<usize>,
1232    /// Role context for this search
1233    pub role: Option<RoleName>,
1234    /// Output layer for controlling result detail (1=minimal, 2=summary, 3=full)
1235    #[serde(default)]
1236    pub layer: Layer,
1237    /// Include pinned KG entries in results even if they don't match the query
1238    #[serde(default)]
1239    pub include_pinned: bool,
1240    /// Minimum composite quality score threshold (0.0–1.0). Documents with a composite
1241    /// score below this value are excluded from results.
1242    #[serde(default, skip_serializing_if = "Option::is_none")]
1243    pub min_quality: Option<f64>,
1244}
1245
1246impl SearchQuery {
1247    /// Get all search terms (both single and multiple)
1248    pub fn get_all_terms(&self) -> Vec<&NormalizedTermValue> {
1249        if let Some(ref multiple_terms) = self.search_terms {
1250            // For multi-term queries, include primary term + additional terms,
1251            // but avoid duplicates when the primary term is also present in `search_terms`.
1252            let mut all_terms: Vec<&NormalizedTermValue> =
1253                Vec::with_capacity(1 + multiple_terms.len());
1254            all_terms.push(&self.search_term);
1255
1256            for term in multiple_terms.iter() {
1257                if term.as_str() != self.search_term.as_str() {
1258                    all_terms.push(term);
1259                }
1260            }
1261
1262            all_terms
1263        } else {
1264            // For single-term queries, use search_term
1265            vec![&self.search_term]
1266        }
1267    }
1268
1269    /// Check if this is a multi-term query with logical operators
1270    pub fn is_multi_term_query(&self) -> bool {
1271        self.search_terms.is_some() && !self.search_terms.as_ref().unwrap().is_empty()
1272    }
1273
1274    /// Get the effective logical operator (defaults to Or for multi-term queries)
1275    pub fn get_operator(&self) -> LogicalOperator {
1276        self.operator
1277            .as_ref()
1278            .unwrap_or(&LogicalOperator::Or)
1279            .clone()
1280    }
1281
1282    /// Create a new SearchQuery with multiple terms and an operator
1283    pub fn with_terms_and_operator(
1284        primary_term: NormalizedTermValue,
1285        additional_terms: Vec<NormalizedTermValue>,
1286        operator: LogicalOperator,
1287        role: Option<RoleName>,
1288    ) -> Self {
1289        Self {
1290            search_term: primary_term,
1291            search_terms: Some(additional_terms),
1292            operator: Some(operator),
1293            skip: None,
1294            limit: None,
1295            role,
1296            layer: Layer::default(),
1297            include_pinned: false,
1298            min_quality: None,
1299        }
1300    }
1301}
1302
1303/// Defines the relevance function (scorer) to be used for ranking search
1304/// results for the `Role`.
1305#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, JsonSchema, Default)]
1306#[cfg_attr(feature = "typescript", derive(Tsify))]
1307#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1308pub enum RelevanceFunction {
1309    /// Scorer for ranking search results based on the Terraphim graph
1310    ///
1311    /// This is based on filtered result outputs according to the ranking of the
1312    /// knowledge graph. The node, which is most connected will produce the
1313    /// highest ranking
1314    #[serde(rename = "terraphim-graph")]
1315    TerraphimGraph,
1316    /// Scorer for ranking search results based on the title of a document
1317    #[default]
1318    #[serde(rename = "title-scorer")]
1319    TitleScorer,
1320    /// BM25 (Okapi BM25) relevance function for probabilistic ranking
1321    #[serde(rename = "bm25")]
1322    BM25,
1323    /// BM25F relevance function with field-specific weights (title, body, description, tags)
1324    #[serde(rename = "bm25f")]
1325    BM25F,
1326    /// BM25Plus relevance function with enhanced parameters for fine-tuning
1327    #[serde(rename = "bm25plus")]
1328    BM25Plus,
1329}
1330
1331/// Defines all supported inputs for the knowledge graph.
1332///
1333/// Every knowledge graph is built from a specific input, such as Markdown files
1334/// or JSON files.
1335#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, JsonSchema)]
1336#[cfg_attr(feature = "typescript", derive(Tsify))]
1337#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1338pub enum KnowledgeGraphInputType {
1339    /// A set of Markdown files
1340    #[serde(rename = "markdown")]
1341    Markdown,
1342    /// A JSON files
1343    #[serde(rename = "json")]
1344    Json,
1345}
1346
1347// Context Management Types for LLM Conversations
1348
1349/// Unique identifier for conversations
1350#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1351#[cfg_attr(feature = "typescript", derive(Tsify))]
1352#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1353pub struct ConversationId(pub String);
1354
1355impl ConversationId {
1356    pub fn new() -> Self {
1357        Self(uuid::Uuid::new_v4().to_string())
1358    }
1359
1360    pub fn from_string(id: String) -> Self {
1361        Self(id)
1362    }
1363
1364    pub fn as_str(&self) -> &str {
1365        &self.0
1366    }
1367}
1368
1369impl Default for ConversationId {
1370    fn default() -> Self {
1371        Self::new()
1372    }
1373}
1374
1375impl Display for ConversationId {
1376    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1377        write!(f, "{}", self.0)
1378    }
1379}
1380
1381/// Types of context that can be added to conversations
1382#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1383#[cfg_attr(feature = "typescript", derive(Tsify))]
1384#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1385pub enum ContextType {
1386    /// System-level context
1387    System,
1388    /// User-provided context
1389    UserInput,
1390    /// Document-based context
1391    Document,
1392    /// Search result context
1393    SearchResult,
1394    /// External data or API context
1395    External,
1396    /// Context from KG term definition with synonyms and metadata
1397    KGTermDefinition,
1398    /// Context from complete knowledge graph index
1399    KGIndex,
1400}
1401
1402/// Unique identifier for messages within conversations
1403#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1404#[cfg_attr(feature = "typescript", derive(Tsify))]
1405#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1406pub struct MessageId(pub String);
1407
1408impl MessageId {
1409    pub fn new() -> Self {
1410        Self(uuid::Uuid::new_v4().to_string())
1411    }
1412
1413    pub fn from_string(id: String) -> Self {
1414        Self(id)
1415    }
1416
1417    pub fn as_str(&self) -> &str {
1418        &self.0
1419    }
1420}
1421
1422impl Default for MessageId {
1423    fn default() -> Self {
1424        Self::new()
1425    }
1426}
1427
1428impl Display for MessageId {
1429    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1430        write!(f, "{}", self.0)
1431    }
1432}
1433
1434/// Context item that can be added to LLM conversations
1435#[derive(Debug, Clone, Serialize, Deserialize)]
1436#[cfg_attr(feature = "typescript", derive(Tsify))]
1437#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1438pub struct ContextItem {
1439    /// Unique identifier for the context item
1440    pub id: String,
1441    /// Type of context (document, search_result, user_input, etc.)
1442    pub context_type: ContextType,
1443    /// Title or summary of the context item
1444    pub title: String,
1445    /// Brief summary of the content (separate from full content)
1446    pub summary: Option<String>,
1447    /// The actual content to be included in the LLM context
1448    pub content: String,
1449    /// Metadata about the context (source, relevance score, etc.)
1450    pub metadata: AHashMap<String, String>,
1451    /// Timestamp when this context was added
1452    pub created_at: chrono::DateTime<chrono::Utc>,
1453    /// Relevance score for ordering context items
1454    pub relevance_score: Option<f64>,
1455}
1456
1457impl ContextItem {
1458    /// Create a new context item from a document
1459    pub fn from_document(document: &Document) -> Self {
1460        let mut metadata = AHashMap::new();
1461        metadata.insert("source_type".to_string(), "document".to_string());
1462        metadata.insert("document_id".to_string(), document.id.clone());
1463        if !document.url.is_empty() {
1464            metadata.insert("url".to_string(), document.url.clone());
1465        }
1466        if let Some(tags) = &document.tags {
1467            metadata.insert("tags".to_string(), tags.join(", "));
1468        }
1469        if let Some(rank) = document.rank {
1470            metadata.insert("rank".to_string(), rank.to_string());
1471        }
1472
1473        Self {
1474            id: uuid::Uuid::new_v4().to_string(),
1475            context_type: ContextType::Document,
1476            title: if document.title.is_empty() {
1477                document.id.clone()
1478            } else {
1479                document.title.clone()
1480            },
1481            summary: document.description.clone(),
1482            content: format!(
1483                "Title: {}\n\n{}\n\n{}",
1484                document.title,
1485                document.description.as_deref().unwrap_or(""),
1486                document.body
1487            ),
1488            metadata,
1489            created_at: chrono::Utc::now(),
1490            relevance_score: document.rank.map(|r| r as f64),
1491        }
1492    }
1493
1494    /// Create a new context item from search results
1495    pub fn from_search_result(query: &str, documents: &[Document]) -> Self {
1496        let mut metadata = AHashMap::new();
1497        metadata.insert("source_type".to_string(), "search_result".to_string());
1498        metadata.insert("query".to_string(), query.to_string());
1499        metadata.insert("result_count".to_string(), documents.len().to_string());
1500
1501        let content = if documents.is_empty() {
1502            format!("Search query: '{}'\nNo results found.", query)
1503        } else {
1504            let mut content = format!("Search query: '{}'\nResults:\n\n", query);
1505            for (i, doc) in documents.iter().take(5).enumerate() {
1506                content.push_str(&format!(
1507                    "{}. {}\n   {}\n   Rank: {}\n\n",
1508                    i + 1,
1509                    doc.title,
1510                    doc.description.as_deref().unwrap_or("No description"),
1511                    doc.rank.unwrap_or(0)
1512                ));
1513            }
1514            if documents.len() > 5 {
1515                content.push_str(&format!("... and {} more results\n", documents.len() - 5));
1516            }
1517            content
1518        };
1519
1520        Self {
1521            id: uuid::Uuid::new_v4().to_string(),
1522            context_type: ContextType::Document, // Changed from SearchResult to Document
1523            title: format!("Search: {}", query),
1524            summary: Some(format!(
1525                "Search results for '{}' - {} documents found",
1526                query,
1527                documents.len()
1528            )),
1529            content,
1530            metadata,
1531            created_at: chrono::Utc::now(),
1532            relevance_score: documents.first().and_then(|d| d.rank.map(|r| r as f64)),
1533        }
1534    }
1535
1536    /// Create a new context item from a KG term definition
1537    pub fn from_kg_term_definition(kg_term: &KGTermDefinition) -> Self {
1538        let mut metadata = AHashMap::new();
1539        metadata.insert("source_type".to_string(), "kg_term".to_string());
1540        metadata.insert("term_id".to_string(), kg_term.id.to_string());
1541        metadata.insert(
1542            "normalized_term".to_string(),
1543            kg_term.normalized_term.to_string(),
1544        );
1545        metadata.insert(
1546            "synonyms_count".to_string(),
1547            kg_term.synonyms.len().to_string(),
1548        );
1549        metadata.insert(
1550            "related_terms_count".to_string(),
1551            kg_term.related_terms.len().to_string(),
1552        );
1553        metadata.insert(
1554            "usage_examples_count".to_string(),
1555            kg_term.usage_examples.len().to_string(),
1556        );
1557
1558        if let Some(ref url) = kg_term.url {
1559            metadata.insert("url".to_string(), url.clone());
1560        }
1561
1562        // Add KG-specific metadata
1563        for (key, value) in &kg_term.metadata {
1564            metadata.insert(format!("kg_{}", key), value.clone());
1565        }
1566
1567        let mut content = format!("**Term:** {}\n", kg_term.term);
1568
1569        if let Some(ref definition) = kg_term.definition {
1570            content.push_str(&format!("**Definition:** {}\n", definition));
1571        }
1572
1573        if !kg_term.synonyms.is_empty() {
1574            content.push_str(&format!("**Synonyms:** {}\n", kg_term.synonyms.join(", ")));
1575        }
1576
1577        if !kg_term.related_terms.is_empty() {
1578            content.push_str(&format!(
1579                "**Related Terms:** {}\n",
1580                kg_term.related_terms.join(", ")
1581            ));
1582        }
1583
1584        if !kg_term.usage_examples.is_empty() {
1585            content.push_str("**Usage Examples:**\n");
1586            for (i, example) in kg_term.usage_examples.iter().enumerate() {
1587                content.push_str(&format!("{}. {}\n", i + 1, example));
1588            }
1589        }
1590
1591        Self {
1592            id: uuid::Uuid::new_v4().to_string(),
1593            context_type: ContextType::KGTermDefinition,
1594            title: format!("KG Term: {}", kg_term.term),
1595            summary: Some(format!(
1596                "Knowledge Graph term '{}' with {} synonyms and {} related terms",
1597                kg_term.term,
1598                kg_term.synonyms.len(),
1599                kg_term.related_terms.len()
1600            )),
1601            content,
1602            metadata,
1603            created_at: chrono::Utc::now(),
1604            relevance_score: kg_term.relevance_score,
1605        }
1606    }
1607
1608    /// Create a new context item from a complete KG index
1609    pub fn from_kg_index(kg_index: &KGIndexInfo) -> Self {
1610        let mut metadata = AHashMap::new();
1611        metadata.insert("source_type".to_string(), "kg_index".to_string());
1612        metadata.insert("kg_name".to_string(), kg_index.name.clone());
1613        metadata.insert("total_terms".to_string(), kg_index.total_terms.to_string());
1614        metadata.insert("total_nodes".to_string(), kg_index.total_nodes.to_string());
1615        metadata.insert("total_edges".to_string(), kg_index.total_edges.to_string());
1616        metadata.insert("source".to_string(), kg_index.source.clone());
1617        metadata.insert(
1618            "last_updated".to_string(),
1619            kg_index.last_updated.to_rfc3339(),
1620        );
1621
1622        if let Some(ref version) = kg_index.version {
1623            metadata.insert("version".to_string(), version.clone());
1624        }
1625
1626        let content = format!(
1627            "**Knowledge Graph Index: {}**\n\n\
1628            **Statistics:**\n\
1629            - Total Terms: {}\n\
1630            - Total Nodes: {}\n\
1631            - Total Edges: {}\n\
1632            - Source: {}\n\
1633            - Last Updated: {}\n\
1634            - Version: {}\n\n\
1635            This context includes the complete knowledge graph index with all terms, \
1636            relationships, and metadata available for reference.",
1637            kg_index.name,
1638            kg_index.total_terms,
1639            kg_index.total_nodes,
1640            kg_index.total_edges,
1641            kg_index.source,
1642            kg_index.last_updated.format("%Y-%m-%d %H:%M:%S UTC"),
1643            kg_index.version.as_deref().unwrap_or("N/A")
1644        );
1645
1646        Self {
1647            id: uuid::Uuid::new_v4().to_string(),
1648            context_type: ContextType::KGIndex,
1649            title: format!("KG Index: {}", kg_index.name),
1650            summary: Some(format!(
1651                "Complete knowledge graph index with {} terms, {} nodes, and {} edges",
1652                kg_index.total_terms, kg_index.total_nodes, kg_index.total_edges
1653            )),
1654            content,
1655            metadata,
1656            created_at: chrono::Utc::now(),
1657            relevance_score: Some(1.0), // High relevance for complete index
1658        }
1659    }
1660}
1661
1662/// Knowledge Graph term definition with comprehensive metadata
1663#[derive(Debug, Clone, Serialize, Deserialize)]
1664#[cfg_attr(feature = "typescript", derive(Tsify))]
1665#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1666pub struct KGTermDefinition {
1667    /// The primary term
1668    pub term: String,
1669    /// Normalized term value
1670    pub normalized_term: NormalizedTermValue,
1671    /// Unique identifier for the term
1672    pub id: u64,
1673    /// Definition of the term
1674    pub definition: Option<String>,
1675    /// Synonyms for the term
1676    pub synonyms: Vec<String>,
1677    /// Related terms
1678    pub related_terms: Vec<String>,
1679    /// Usage examples
1680    pub usage_examples: Vec<String>,
1681    /// URL reference if available
1682    pub url: Option<String>,
1683    /// Additional metadata
1684    pub metadata: AHashMap<String, String>,
1685    /// Relevance score for ranking
1686    pub relevance_score: Option<f64>,
1687}
1688
1689/// Knowledge Graph index information
1690#[derive(Debug, Clone, Serialize, Deserialize)]
1691#[cfg_attr(feature = "typescript", derive(Tsify))]
1692#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1693pub struct KGIndexInfo {
1694    /// Name of the knowledge graph
1695    pub name: String,
1696    /// Total number of terms in the index
1697    pub total_terms: usize,
1698    /// Number of nodes in the graph
1699    pub total_nodes: usize,
1700    /// Number of edges in the graph
1701    pub total_edges: usize,
1702    /// Last updated timestamp
1703    pub last_updated: chrono::DateTime<chrono::Utc>,
1704    /// Source of the knowledge graph
1705    pub source: String,
1706    /// Version of the knowledge graph
1707    pub version: Option<String>,
1708}
1709
1710/// A single message in a conversation, including metadata for cost tracking.
1711#[derive(Debug, Clone, Serialize, Deserialize)]
1712#[cfg_attr(feature = "typescript", derive(Tsify))]
1713#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1714pub struct ChatMessage {
1715    /// Unique identifier for this message
1716    pub id: MessageId,
1717    /// Role of the message sender
1718    pub role: String, // "system" | "user" | "assistant"
1719    /// The message content
1720    pub content: String,
1721    /// Context items associated with this message
1722    pub context_items: Vec<ContextItem>,
1723    /// Timestamp when the message was created
1724    pub created_at: chrono::DateTime<chrono::Utc>,
1725    /// Token count for this message (if available)
1726    pub token_count: Option<u32>,
1727    /// Model used to generate this message (for assistant messages)
1728    pub model: Option<String>,
1729}
1730
1731impl ChatMessage {
1732    /// Create a new user message
1733    pub fn user(content: String) -> Self {
1734        Self {
1735            id: MessageId::new(),
1736            role: "user".to_string(),
1737            content,
1738            context_items: Vec::new(),
1739            created_at: chrono::Utc::now(),
1740            token_count: None,
1741            model: None,
1742        }
1743    }
1744
1745    /// Create a new assistant message
1746    pub fn assistant(content: String, model: Option<String>) -> Self {
1747        Self {
1748            id: MessageId::new(),
1749            role: "assistant".to_string(),
1750            content,
1751            context_items: Vec::new(),
1752            created_at: chrono::Utc::now(),
1753            token_count: None,
1754            model,
1755        }
1756    }
1757
1758    /// Create a new system message
1759    pub fn system(content: String) -> Self {
1760        Self {
1761            id: MessageId::new(),
1762            role: "system".to_string(),
1763            content,
1764            context_items: Vec::new(),
1765            created_at: chrono::Utc::now(),
1766            token_count: None,
1767            model: None,
1768        }
1769    }
1770
1771    /// Add context item to this message
1772    pub fn add_context(&mut self, context: ContextItem) {
1773        self.context_items.push(context);
1774    }
1775
1776    /// Add multiple context items to this message
1777    pub fn add_contexts(&mut self, contexts: Vec<ContextItem>) {
1778        self.context_items.extend(contexts);
1779    }
1780}
1781
1782/// A conversation containing multiple messages and context
1783#[derive(Debug, Clone, Serialize, Deserialize)]
1784#[cfg_attr(feature = "typescript", derive(Tsify))]
1785#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1786pub struct Conversation {
1787    /// Unique identifier for this conversation
1788    pub id: ConversationId,
1789    /// Human-readable title for the conversation
1790    pub title: String,
1791    /// Messages in this conversation
1792    pub messages: Vec<ChatMessage>,
1793    /// Global context items for the entire conversation
1794    pub global_context: Vec<ContextItem>,
1795    /// Role used for this conversation
1796    pub role: RoleName,
1797    /// When this conversation was created
1798    pub created_at: chrono::DateTime<chrono::Utc>,
1799    /// When this conversation was last updated
1800    pub updated_at: chrono::DateTime<chrono::Utc>,
1801    /// Metadata about the conversation
1802    pub metadata: AHashMap<String, String>,
1803}
1804
1805impl Conversation {
1806    /// Create a new conversation
1807    pub fn new(title: String, role: RoleName) -> Self {
1808        let now = chrono::Utc::now();
1809        Self {
1810            id: ConversationId::new(),
1811            title,
1812            messages: Vec::new(),
1813            global_context: Vec::new(),
1814            role,
1815            created_at: now,
1816            updated_at: now,
1817            metadata: AHashMap::new(),
1818        }
1819    }
1820
1821    /// Add a message to the conversation
1822    pub fn add_message(&mut self, message: ChatMessage) {
1823        self.messages.push(message);
1824        self.updated_at = chrono::Utc::now();
1825    }
1826
1827    /// Add global context to the conversation
1828    pub fn add_global_context(&mut self, context: ContextItem) {
1829        self.global_context.push(context);
1830        self.updated_at = chrono::Utc::now();
1831    }
1832
1833    /// Get the total context length (approximation)
1834    pub fn estimated_context_length(&self) -> usize {
1835        let message_length: usize = self
1836            .messages
1837            .iter()
1838            .map(|m| {
1839                m.content.len()
1840                    + m.context_items
1841                        .iter()
1842                        .map(|c| c.content.len())
1843                        .sum::<usize>()
1844            })
1845            .sum();
1846        let global_context_length: usize =
1847            self.global_context.iter().map(|c| c.content.len()).sum();
1848        message_length + global_context_length
1849    }
1850}
1851
1852/// Summary of a conversation for listing purposes
1853#[derive(Debug, Clone, Serialize, Deserialize)]
1854#[cfg_attr(feature = "typescript", derive(Tsify))]
1855#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1856pub struct ConversationSummary {
1857    /// Unique identifier for this conversation
1858    pub id: ConversationId,
1859    /// Human-readable title for the conversation
1860    pub title: String,
1861    /// Role used for this conversation
1862    pub role: RoleName,
1863    /// Number of messages in the conversation
1864    pub message_count: usize,
1865    /// Number of context items in the conversation
1866    pub context_count: usize,
1867    /// When this conversation was created
1868    pub created_at: chrono::DateTime<chrono::Utc>,
1869    /// When this conversation was last updated
1870    pub updated_at: chrono::DateTime<chrono::Utc>,
1871    /// Preview of the first user message (if any)
1872    pub preview: Option<String>,
1873}
1874
1875// Note: Persistable implementation for Conversation will be added in the service layer
1876// to avoid circular dependencies
1877
1878impl From<&Conversation> for ConversationSummary {
1879    fn from(conversation: &Conversation) -> Self {
1880        let context_count = conversation.global_context.len()
1881            + conversation
1882                .messages
1883                .iter()
1884                .map(|m| m.context_items.len())
1885                .sum::<usize>();
1886
1887        let preview = conversation
1888            .messages
1889            .iter()
1890            .find(|m| m.role == "user")
1891            .map(|m| {
1892                if m.content.len() > 100 {
1893                    format!("{}...", &m.content[..100])
1894                } else {
1895                    m.content.clone()
1896                }
1897            });
1898
1899        Self {
1900            id: conversation.id.clone(),
1901            title: conversation.title.clone(),
1902            role: conversation.role.clone(),
1903            message_count: conversation.messages.len(),
1904            context_count,
1905            created_at: conversation.created_at,
1906            updated_at: conversation.updated_at,
1907            preview,
1908        }
1909    }
1910}
1911
1912/// Context history that tracks what context has been used across conversations
1913#[derive(Debug, Clone, Serialize, Deserialize)]
1914#[cfg_attr(feature = "typescript", derive(Tsify))]
1915#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1916pub struct ContextHistory {
1917    /// Items that have been used in conversations
1918    pub used_contexts: Vec<ContextHistoryEntry>,
1919    /// Maximum number of history entries to keep
1920    pub max_entries: usize,
1921}
1922
1923impl ContextHistory {
1924    pub fn new(max_entries: usize) -> Self {
1925        Self {
1926            used_contexts: Vec::new(),
1927            max_entries,
1928        }
1929    }
1930
1931    /// Record that a context item was used
1932    pub fn record_usage(
1933        &mut self,
1934        context_id: &str,
1935        conversation_id: &ConversationId,
1936        usage_type: ContextUsageType,
1937    ) {
1938        let entry = ContextHistoryEntry {
1939            context_id: context_id.to_string(),
1940            conversation_id: conversation_id.clone(),
1941            usage_type,
1942            used_at: chrono::Utc::now(),
1943            usage_count: 1,
1944        };
1945
1946        // Check if we already have this context for this conversation
1947        if let Some(existing) = self
1948            .used_contexts
1949            .iter_mut()
1950            .find(|e| e.context_id == context_id && e.conversation_id == *conversation_id)
1951        {
1952            existing.usage_count += 1;
1953            existing.used_at = chrono::Utc::now();
1954        } else {
1955            self.used_contexts.push(entry);
1956        }
1957
1958        // Trim to max entries if needed
1959        if self.used_contexts.len() > self.max_entries {
1960            self.used_contexts.sort_by_key(|e| e.used_at);
1961            self.used_contexts
1962                .drain(0..self.used_contexts.len() - self.max_entries);
1963        }
1964    }
1965
1966    /// Get frequently used contexts
1967    pub fn get_frequent_contexts(&self, limit: usize) -> Vec<&ContextHistoryEntry> {
1968        let mut entries = self.used_contexts.iter().collect::<Vec<_>>();
1969        entries.sort_by_key(|e| std::cmp::Reverse(e.usage_count));
1970        entries.into_iter().take(limit).collect()
1971    }
1972}
1973
1974/// Entry in context usage history
1975#[derive(Debug, Clone, Serialize, Deserialize)]
1976#[cfg_attr(feature = "typescript", derive(Tsify))]
1977#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1978pub struct ContextHistoryEntry {
1979    /// ID of the context item that was used
1980    pub context_id: String,
1981    /// Conversation where it was used
1982    pub conversation_id: ConversationId,
1983    /// How the context was used
1984    pub usage_type: ContextUsageType,
1985    /// When it was used
1986    pub used_at: chrono::DateTime<chrono::Utc>,
1987    /// How many times it's been used in this conversation
1988    pub usage_count: usize,
1989}
1990
1991/// How a context item was used
1992#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1993#[cfg_attr(feature = "typescript", derive(Tsify))]
1994#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
1995pub enum ContextUsageType {
1996    /// Added manually by user
1997    Manual,
1998    /// Added automatically by system
1999    Automatic,
2000    /// Added from search results
2001    SearchResult,
2002    /// Added from document reference
2003    DocumentReference,
2004}
2005
2006// Routing and Priority Types
2007
2008/// Priority level for routing rules and decisions
2009/// Higher numeric values indicate higher priority
2010#[derive(
2011    Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, JsonSchema, Default,
2012)]
2013#[cfg_attr(feature = "typescript", derive(Tsify))]
2014#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2015/// A clamped priority value in the range 0–100 (higher = more urgent).
2016pub struct Priority(pub u8);
2017
2018impl Priority {
2019    /// Create a new priority with the given value
2020    pub fn new(value: u8) -> Self {
2021        Self(value.clamp(0, 100))
2022    }
2023
2024    /// Get the priority value
2025    pub fn value(&self) -> u8 {
2026        self.0
2027    }
2028
2029    /// Check if this is high priority (>= 80)
2030    pub fn is_high(&self) -> bool {
2031        self.0 >= 80
2032    }
2033
2034    /// Check if this is medium priority (>= 40 && < 80)
2035    pub fn is_medium(&self) -> bool {
2036        self.0 >= 40 && self.0 < 80
2037    }
2038
2039    /// Check if this is low priority (< 40)
2040    pub fn is_low(&self) -> bool {
2041        self.0 < 40
2042    }
2043
2044    /// Maximum priority value
2045    pub const MAX: Self = Self(100);
2046
2047    /// High priority (default for fast/expensive rules)
2048    pub const HIGH: Self = Self(80);
2049
2050    /// Medium priority (default for standard rules)
2051    pub const MEDIUM: Self = Self(50);
2052
2053    /// Low priority (default for fallback rules)
2054    pub const LOW: Self = Self(20);
2055
2056    /// Minimum priority value
2057    pub const MIN: Self = Self(0);
2058}
2059
2060impl fmt::Display for Priority {
2061    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2062        write!(f, "{}", self.0)
2063    }
2064}
2065
2066impl From<u8> for Priority {
2067    fn from(value: u8) -> Self {
2068        Self::new(value)
2069    }
2070}
2071
2072impl From<i32> for Priority {
2073    fn from(value: i32) -> Self {
2074        Self::new(value as u8)
2075    }
2076}
2077
2078/// A routing rule with pattern matching and priority
2079#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2080#[cfg_attr(feature = "typescript", derive(Tsify))]
2081#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2082pub struct RoutingRule {
2083    /// Unique identifier for this rule
2084    pub id: String,
2085
2086    /// Name of the rule (human-readable)
2087    pub name: String,
2088
2089    /// Pattern to match (can be regex, exact string, or concept name)
2090    pub pattern: String,
2091
2092    /// Priority of this rule (higher = more important)
2093    pub priority: Priority,
2094
2095    /// Provider to route to when this rule matches
2096    pub provider: String,
2097
2098    /// Model to use when this rule matches
2099    pub model: String,
2100
2101    /// Optional description of when this rule applies
2102    pub description: Option<String>,
2103
2104    /// Tags for categorizing rules
2105    pub tags: Vec<String>,
2106
2107    /// Whether this rule is enabled
2108    pub enabled: bool,
2109
2110    /// When this rule was created
2111    pub created_at: chrono::DateTime<chrono::Utc>,
2112
2113    /// When this rule was last updated
2114    pub updated_at: chrono::DateTime<chrono::Utc>,
2115}
2116
2117impl RoutingRule {
2118    /// Create a new routing rule
2119    pub fn new(
2120        id: String,
2121        name: String,
2122        pattern: String,
2123        priority: Priority,
2124        provider: String,
2125        model: String,
2126    ) -> Self {
2127        let now = chrono::Utc::now();
2128        Self {
2129            id,
2130            name,
2131            pattern,
2132            priority,
2133            provider,
2134            model,
2135            description: None,
2136            tags: Vec::new(),
2137            enabled: true,
2138            created_at: now,
2139            updated_at: now,
2140        }
2141    }
2142
2143    /// Create a rule with default medium priority
2144    pub fn with_defaults(
2145        id: String,
2146        name: String,
2147        pattern: String,
2148        provider: String,
2149        model: String,
2150    ) -> Self {
2151        Self::new(id, name, pattern, Priority::MEDIUM, provider, model)
2152    }
2153
2154    /// Set the description
2155    pub fn with_description(mut self, description: String) -> Self {
2156        self.description = Some(description);
2157        self
2158    }
2159
2160    /// Add a tag
2161    pub fn with_tag(mut self, tag: String) -> Self {
2162        self.tags.push(tag);
2163        self
2164    }
2165
2166    /// Set enabled status
2167    pub fn with_enabled(mut self, enabled: bool) -> Self {
2168        self.enabled = enabled;
2169        self
2170    }
2171
2172    /// Update the rule's timestamp
2173    pub fn touch(&mut self) {
2174        self.updated_at = chrono::Utc::now();
2175    }
2176}
2177
2178/// Result of pattern matching with priority scoring
2179#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2180#[cfg_attr(feature = "typescript", derive(Tsify))]
2181#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2182pub struct PatternMatch {
2183    /// The concept that was matched
2184    pub concept: String,
2185
2186    /// Provider to route to
2187    pub provider: String,
2188
2189    /// Model to use
2190    pub model: String,
2191
2192    /// Match score (0.0 to 1.0)
2193    pub score: f64,
2194
2195    /// Priority of the matched rule
2196    pub priority: Priority,
2197
2198    /// Combined weighted score (score * priority_factor)
2199    pub weighted_score: f64,
2200
2201    /// The rule that was matched
2202    pub rule_id: String,
2203}
2204
2205impl PatternMatch {
2206    /// Create a new pattern match
2207    pub fn new(
2208        concept: String,
2209        provider: String,
2210        model: String,
2211        score: f64,
2212        priority: Priority,
2213        rule_id: String,
2214    ) -> Self {
2215        let priority_factor = priority.value() as f64 / 100.0;
2216        let weighted_score = score * priority_factor;
2217
2218        Self {
2219            concept,
2220            provider,
2221            model,
2222            score,
2223            priority,
2224            weighted_score,
2225            rule_id,
2226        }
2227    }
2228
2229    /// Create a simple pattern match with default priority
2230    pub fn simple(concept: String, provider: String, model: String, score: f64) -> Self {
2231        Self::new(
2232            concept,
2233            provider,
2234            model,
2235            score,
2236            Priority::MEDIUM,
2237            "default".to_string(),
2238        )
2239    }
2240}
2241
2242/// Routing decision with priority information
2243#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2244#[cfg_attr(feature = "typescript", derive(Tsify))]
2245#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2246pub struct RoutingDecision {
2247    /// Provider to route to
2248    pub provider: String,
2249
2250    /// Model to use
2251    pub model: String,
2252
2253    /// The scenario that was matched
2254    pub scenario: RoutingScenario,
2255
2256    /// Priority of this decision
2257    pub priority: Priority,
2258
2259    /// Confidence score (0.0 to 1.0)
2260    pub confidence: f64,
2261
2262    /// The rule that led to this decision (if any)
2263    pub rule_id: Option<String>,
2264
2265    /// Reason for this decision
2266    pub reason: String,
2267}
2268
2269impl RoutingDecision {
2270    /// Create a new routing decision
2271    pub fn new(
2272        provider: String,
2273        model: String,
2274        scenario: RoutingScenario,
2275        priority: Priority,
2276        confidence: f64,
2277        reason: String,
2278    ) -> Self {
2279        Self {
2280            provider,
2281            model,
2282            scenario,
2283            priority,
2284            confidence,
2285            rule_id: None,
2286            reason,
2287        }
2288    }
2289
2290    /// Create a decision with a specific rule
2291    pub fn with_rule(
2292        provider: String,
2293        model: String,
2294        scenario: RoutingScenario,
2295        priority: Priority,
2296        confidence: f64,
2297        rule_id: String,
2298        reason: String,
2299    ) -> Self {
2300        Self {
2301            provider,
2302            model,
2303            scenario,
2304            priority,
2305            confidence,
2306            rule_id: Some(rule_id),
2307            reason,
2308        }
2309    }
2310
2311    /// Create a simple default decision
2312    pub fn default(provider: String, model: String) -> Self {
2313        Self::new(
2314            provider,
2315            model,
2316            RoutingScenario::Default,
2317            Priority::LOW,
2318            0.5,
2319            "Default routing".to_string(),
2320        )
2321    }
2322}
2323
2324/// Routing scenario types
2325#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, Default)]
2326#[cfg_attr(feature = "typescript", derive(Tsify))]
2327#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2328pub enum RoutingScenario {
2329    /// Default routing scenario
2330    #[serde(rename = "default")]
2331    #[default]
2332    Default,
2333
2334    /// Background processing (low priority, cost-optimized)
2335    #[serde(rename = "background")]
2336    Background,
2337
2338    /// Thinking/reasoning tasks (high quality)
2339    #[serde(rename = "think")]
2340    Think,
2341
2342    /// Long context tasks
2343    #[serde(rename = "long_context")]
2344    LongContext,
2345
2346    /// Web search required
2347    #[serde(rename = "web_search")]
2348    WebSearch,
2349
2350    /// Image processing required
2351    #[serde(rename = "image")]
2352    Image,
2353
2354    /// Pattern-based routing with concept name
2355    #[serde(rename = "pattern")]
2356    Pattern(String),
2357
2358    /// Priority-based routing
2359    #[serde(rename = "priority")]
2360    Priority,
2361
2362    /// Custom scenario
2363    #[serde(rename = "custom")]
2364    Custom(String),
2365}
2366
2367impl fmt::Display for RoutingScenario {
2368    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2369        match self {
2370            Self::Default => write!(f, "default"),
2371            Self::Background => write!(f, "background"),
2372            Self::Think => write!(f, "think"),
2373            Self::LongContext => write!(f, "long_context"),
2374            Self::WebSearch => write!(f, "web_search"),
2375            Self::Image => write!(f, "image"),
2376            Self::Pattern(concept) => write!(f, "pattern:{}", concept),
2377            Self::Priority => write!(f, "priority"),
2378            Self::Custom(name) => write!(f, "custom:{}", name),
2379        }
2380    }
2381}
2382
2383/// Multi-agent context for coordinating between different AI agents
2384#[derive(Debug, Clone, Serialize, Deserialize)]
2385#[cfg_attr(feature = "typescript", derive(Tsify))]
2386#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2387pub struct MultiAgentContext {
2388    /// Unique identifier for the multi-agent session
2389    pub session_id: String,
2390    /// Agents participating in this context
2391    pub agents: Vec<AgentInfo>,
2392    /// Shared context items available to all agents
2393    pub shared_context: Vec<ContextItem>,
2394    /// Agent-specific context
2395    pub agent_contexts: AHashMap<String, Vec<ContextItem>>,
2396    /// Communication log between agents
2397    pub agent_communications: Vec<AgentCommunication>,
2398    /// When this session was created
2399    pub created_at: chrono::DateTime<chrono::Utc>,
2400    /// When this session was last updated
2401    pub updated_at: chrono::DateTime<chrono::Utc>,
2402}
2403
2404impl MultiAgentContext {
2405    pub fn new() -> Self {
2406        let now = chrono::Utc::now();
2407        Self {
2408            session_id: uuid::Uuid::new_v4().to_string(),
2409            agents: Vec::new(),
2410            shared_context: Vec::new(),
2411            agent_contexts: AHashMap::new(),
2412            agent_communications: Vec::new(),
2413            created_at: now,
2414            updated_at: now,
2415        }
2416    }
2417
2418    /// Add an agent to the session
2419    pub fn add_agent(&mut self, agent: AgentInfo) {
2420        self.agents.push(agent.clone());
2421        self.agent_contexts.insert(agent.id, Vec::new());
2422        self.updated_at = chrono::Utc::now();
2423    }
2424
2425    /// Add context for a specific agent
2426    pub fn add_agent_context(&mut self, agent_id: &str, context: ContextItem) {
2427        if let Some(contexts) = self.agent_contexts.get_mut(agent_id) {
2428            contexts.push(context);
2429            self.updated_at = chrono::Utc::now();
2430        }
2431    }
2432
2433    /// Record communication between agents
2434    pub fn record_communication(
2435        &mut self,
2436        from_agent: &str,
2437        to_agent: Option<&str>,
2438        message: String,
2439    ) {
2440        let communication = AgentCommunication {
2441            from_agent: from_agent.to_string(),
2442            to_agent: to_agent.map(|s| s.to_string()),
2443            message,
2444            timestamp: chrono::Utc::now(),
2445        };
2446        self.agent_communications.push(communication);
2447        self.updated_at = chrono::Utc::now();
2448    }
2449}
2450
2451impl Default for MultiAgentContext {
2452    fn default() -> Self {
2453        Self::new()
2454    }
2455}
2456
2457/// Information about an AI agent in a multi-agent context
2458#[derive(Debug, Clone, Serialize, Deserialize)]
2459#[cfg_attr(feature = "typescript", derive(Tsify))]
2460#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2461pub struct AgentInfo {
2462    /// Unique identifier for the agent
2463    pub id: String,
2464    /// Human-readable name of the agent
2465    pub name: String,
2466    /// Role/specialty of the agent
2467    pub role: String,
2468    /// Capabilities or description of what this agent does
2469    pub capabilities: Vec<String>,
2470    /// Model or provider powering this agent
2471    pub model: Option<String>,
2472}
2473
2474/// Communication between agents in a multi-agent context
2475#[derive(Debug, Clone, Serialize, Deserialize)]
2476#[cfg_attr(feature = "typescript", derive(Tsify))]
2477#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
2478pub struct AgentCommunication {
2479    /// ID of the agent sending the message
2480    pub from_agent: String,
2481    /// ID of the agent receiving the message (None for broadcast)
2482    pub to_agent: Option<String>,
2483    /// The communication message
2484    pub message: String,
2485    /// When this communication occurred
2486    pub timestamp: chrono::DateTime<chrono::Utc>,
2487}
2488
2489// ============================================================================
2490// Dynamic Ontology Types - Schema-First Knowledge Graph with Grounding
2491// ============================================================================
2492
2493/// Normalization method used for grounding
2494#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2495#[serde(rename_all = "snake_case")]
2496pub enum NormalizationMethod {
2497    /// Exact match via Aho-Corasick
2498    #[default]
2499    Exact,
2500    /// Fuzzy match via Levenshtein or Jaro-Winkler
2501    Fuzzy,
2502    /// Graph rank-based prioritization
2503    GraphRank,
2504}
2505
2506/// Grounding metadata for normalized terms (Dynamic Ontology)
2507#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2508pub struct GroundingMetadata {
2509    /// Canonical URI from ontology (NCIt, HGNC, etc.)
2510    pub normalized_uri: Option<String>,
2511    /// Human-friendly label for display
2512    pub normalized_label: Option<String>,
2513    /// Source ontology (NCIt, HGNC, custom)
2514    pub normalized_prov: Option<String>,
2515    /// Similarity/confidence score (0.0 - 1.0)
2516    pub normalized_score: Option<f32>,
2517    /// Method used for normalization
2518    pub normalized_method: Option<NormalizationMethod>,
2519}
2520
2521impl GroundingMetadata {
2522    /// Create new grounding metadata with URI and score
2523    pub fn new(
2524        uri: String,
2525        label: String,
2526        prov: String,
2527        score: f32,
2528        method: NormalizationMethod,
2529    ) -> Self {
2530        Self {
2531            normalized_uri: Some(uri),
2532            normalized_label: Some(label),
2533            normalized_prov: Some(prov),
2534            normalized_score: Some(score),
2535            normalized_method: Some(method),
2536        }
2537    }
2538}
2539
2540/// Coverage governance signal
2541#[derive(Debug, Clone, Serialize, Deserialize)]
2542pub struct CoverageSignal {
2543    /// Total categories in extracted schema
2544    pub total_categories: usize,
2545    /// Categories matched in ontology catalog
2546    pub matched_categories: usize,
2547    /// Coverage ratio = matched/total
2548    pub coverage_ratio: f32,
2549    /// Threshold for needing review
2550    pub threshold: f32,
2551    /// Whether this needs human review
2552    pub needs_review: bool,
2553}
2554
2555impl CoverageSignal {
2556    /// Compute coverage signal from categories and matched count
2557    pub fn compute(categories: &[String], matched: usize, threshold: f32) -> Self {
2558        let total = categories.len();
2559        let ratio = if total > 0 {
2560            matched as f32 / total as f32
2561        } else {
2562            0.0
2563        };
2564        Self {
2565            total_categories: total,
2566            matched_categories: matched,
2567            coverage_ratio: ratio,
2568            threshold,
2569            needs_review: ratio < threshold,
2570        }
2571    }
2572}
2573
2574/// Entity types for oncology schema (feature-gated)
2575#[cfg(feature = "medical")]
2576#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2577#[serde(rename_all = "snake_case")]
2578pub enum EntityType {
2579    CancerDiagnosis,
2580    Tumor,
2581    GenomicVariant,
2582    Biomarker,
2583    Drug,
2584    Treatment,
2585    SideEffect,
2586}
2587
2588/// Relationship types for oncology schema (feature-gated)
2589#[cfg(feature = "medical")]
2590#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2591#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
2592pub enum RelationshipType {
2593    HasTumor,
2594    HasVariant,
2595    HasBiomarker,
2596    TreatedWith,
2597    Causes,
2598    HasDiagnosis,
2599}
2600
2601/// Extracted entity from text
2602#[derive(Debug, Clone, Serialize, Deserialize)]
2603pub struct ExtractedEntity {
2604    /// Type of entity (string for generic cross-domain use)
2605    pub entity_type: String,
2606    /// Raw value from text
2607    pub raw_value: String,
2608    /// Normalized value if available
2609    pub normalized_value: Option<String>,
2610    /// Grounding metadata
2611    pub grounding: Option<GroundingMetadata>,
2612}
2613
2614/// Extracted relationship from text
2615#[derive(Debug, Clone, Serialize, Deserialize)]
2616pub struct ExtractedRelationship {
2617    /// Type of relationship (string for generic cross-domain use)
2618    pub relationship_type: String,
2619    /// Source entity
2620    pub source: String,
2621    /// Target entity
2622    pub target: String,
2623    /// Confidence score
2624    pub confidence: f32,
2625}
2626
2627/// Schema signal extracted from text
2628#[derive(Debug, Clone, Serialize, Deserialize)]
2629pub struct SchemaSignal {
2630    /// Extracted entities
2631    pub entities: Vec<ExtractedEntity>,
2632    /// Extracted relationships
2633    pub relationships: Vec<ExtractedRelationship>,
2634    /// Overall confidence score
2635    pub confidence: f32,
2636}
2637
2638// ============================================================================
2639// Ontology Schema Types - Schema-First Knowledge Graph Definition (#547)
2640// ============================================================================
2641
2642/// Entity type definition in an ontology schema
2643#[derive(Debug, Clone, Serialize, Deserialize)]
2644pub struct OntologyEntityType {
2645    /// Unique identifier within the schema (e.g., "chapter", "concept", "author")
2646    pub id: String,
2647    /// Human-readable label
2648    pub label: String,
2649    /// Canonical URI prefix for grounding (e.g., `https://schema.org/Chapter`)
2650    #[serde(default)]
2651    pub uri_prefix: Option<String>,
2652    /// Alternative names / synonyms for matching
2653    #[serde(default)]
2654    pub aliases: Vec<String>,
2655    /// Category for coverage grouping (e.g., "core", "supporting", "optional")
2656    #[serde(default)]
2657    pub category: Option<String>,
2658}
2659
2660/// Relationship type definition in an ontology schema
2661#[derive(Debug, Clone, Serialize, Deserialize)]
2662pub struct OntologyRelationshipType {
2663    /// Relationship identifier (e.g., "references", "defines")
2664    pub id: String,
2665    /// Human-readable label
2666    pub label: String,
2667    /// Source entity type ID
2668    pub source_type: String,
2669    /// Target entity type ID
2670    pub target_type: String,
2671}
2672
2673/// Anti-pattern definition for detection
2674#[derive(Debug, Clone, Serialize, Deserialize)]
2675pub struct OntologyAntiPattern {
2676    /// Anti-pattern identifier
2677    pub id: String,
2678    /// Description of what this anti-pattern represents
2679    pub description: String,
2680    /// Terms that indicate this anti-pattern
2681    pub indicators: Vec<String>,
2682}
2683
2684/// Schema-first ontology definition
2685///
2686/// Loaded from JSON file, used to build thesaurus for extraction.
2687#[derive(Debug, Clone, Serialize, Deserialize)]
2688pub struct OntologySchema {
2689    /// Schema name
2690    pub name: String,
2691    /// Schema version
2692    pub version: String,
2693    /// Entity type definitions
2694    pub entity_types: Vec<OntologyEntityType>,
2695    /// Relationship type definitions
2696    #[serde(default)]
2697    pub relationship_types: Vec<OntologyRelationshipType>,
2698    /// Anti-patterns to detect
2699    #[serde(default)]
2700    pub anti_patterns: Vec<OntologyAntiPattern>,
2701}
2702
2703impl OntologySchema {
2704    /// Load schema from JSON file
2705    pub fn load_from_file(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
2706        let content = std::fs::read_to_string(path)?;
2707        let schema: Self = serde_json::from_str(&content)?;
2708        Ok(schema)
2709    }
2710
2711    /// Build thesaurus entries from schema entity types + aliases
2712    ///
2713    /// Each entity type label and its aliases become thesaurus entries
2714    /// with the URI prefix as the URL for grounding.
2715    /// Returns tuples of (id, term, url).
2716    pub fn to_thesaurus_entries(&self) -> Vec<(String, String, Option<String>)> {
2717        let mut entries = Vec::new();
2718        for entity_type in &self.entity_types {
2719            let url = entity_type
2720                .uri_prefix
2721                .clone()
2722                .unwrap_or_else(|| format!("kg://{}", entity_type.id));
2723            // Primary label
2724            entries.push((
2725                entity_type.id.clone(),
2726                entity_type.label.clone(),
2727                Some(url.clone()),
2728            ));
2729            // Aliases
2730            for alias in &entity_type.aliases {
2731                entries.push((entity_type.id.clone(), alias.clone(), Some(url.clone())));
2732            }
2733        }
2734        entries
2735    }
2736
2737    /// Get all entity type IDs for coverage calculation
2738    pub fn category_ids(&self) -> Vec<String> {
2739        self.entity_types.iter().map(|e| e.id.clone()).collect()
2740    }
2741
2742    /// Get URI for a matched entity type ID
2743    pub fn uri_for(&self, entity_type_id: &str) -> Option<String> {
2744        self.entity_types
2745            .iter()
2746            .find(|e| e.id == entity_type_id)
2747            .and_then(|e| e.uri_prefix.clone())
2748    }
2749}
2750
2751#[cfg(test)]
2752mod tests {
2753    use super::*;
2754
2755    #[test]
2756    fn test_search_query_logical_operators() {
2757        // Test single term query (backward compatibility)
2758        let single_query = SearchQuery {
2759            search_term: NormalizedTermValue::new("rust".to_string()),
2760            search_terms: None,
2761            operator: None,
2762            skip: None,
2763            limit: Some(10),
2764            role: Some(RoleName::new("test")),
2765            layer: Layer::default(),
2766            include_pinned: false,
2767            min_quality: None,
2768        };
2769
2770        assert!(!single_query.is_multi_term_query());
2771        assert_eq!(single_query.get_all_terms().len(), 1);
2772        assert_eq!(single_query.get_operator(), LogicalOperator::Or); // Default
2773
2774        // Test multi-term query with AND operator
2775        let and_query = SearchQuery::with_terms_and_operator(
2776            NormalizedTermValue::new("machine".to_string()),
2777            vec![NormalizedTermValue::new("learning".to_string())],
2778            LogicalOperator::And,
2779            Some(RoleName::new("test")),
2780        );
2781
2782        assert!(and_query.is_multi_term_query());
2783        assert_eq!(and_query.get_all_terms().len(), 2);
2784        assert_eq!(and_query.get_operator(), LogicalOperator::And);
2785
2786        // Test multi-term query with OR operator
2787        let or_query = SearchQuery::with_terms_and_operator(
2788            NormalizedTermValue::new("neural".to_string()),
2789            vec![NormalizedTermValue::new("networks".to_string())],
2790            LogicalOperator::Or,
2791            Some(RoleName::new("test")),
2792        );
2793
2794        assert!(or_query.is_multi_term_query());
2795        assert_eq!(or_query.get_all_terms().len(), 2);
2796        assert_eq!(or_query.get_operator(), LogicalOperator::Or);
2797    }
2798
2799    #[test]
2800    fn test_logical_operator_serialization() {
2801        // Test LogicalOperator serialization
2802        let and_op = LogicalOperator::And;
2803        let or_op = LogicalOperator::Or;
2804
2805        let and_json = serde_json::to_string(&and_op).unwrap();
2806        let or_json = serde_json::to_string(&or_op).unwrap();
2807
2808        assert_eq!(and_json, "\"and\"");
2809        assert_eq!(or_json, "\"or\"");
2810
2811        // Test deserialization
2812        let and_deser: LogicalOperator = serde_json::from_str("\"and\"").unwrap();
2813        let or_deser: LogicalOperator = serde_json::from_str("\"or\"").unwrap();
2814
2815        assert_eq!(and_deser, LogicalOperator::And);
2816        assert_eq!(or_deser, LogicalOperator::Or);
2817    }
2818
2819    #[test]
2820    fn test_search_query_serialization() {
2821        let query = SearchQuery {
2822            search_term: NormalizedTermValue::new("test".to_string()),
2823            search_terms: Some(vec![
2824                NormalizedTermValue::new("additional".to_string()),
2825                NormalizedTermValue::new("terms".to_string()),
2826            ]),
2827            operator: Some(LogicalOperator::And),
2828            skip: Some(0),
2829            limit: Some(10),
2830            role: Some(RoleName::new("test_role")),
2831            layer: Layer::default(),
2832            include_pinned: false,
2833            min_quality: None,
2834        };
2835
2836        let json = serde_json::to_string(&query).unwrap();
2837        let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
2838
2839        assert_eq!(query.search_term, deserialized.search_term);
2840        assert_eq!(query.search_terms, deserialized.search_terms);
2841        assert_eq!(query.operator, deserialized.operator);
2842        assert_eq!(query.skip, deserialized.skip);
2843        assert_eq!(query.limit, deserialized.limit);
2844        assert_eq!(query.role, deserialized.role);
2845    }
2846
2847    #[test]
2848    fn test_priority_creation_and_comparison() {
2849        let high = Priority::HIGH;
2850        let medium = Priority::MEDIUM;
2851        let low = Priority::LOW;
2852        let custom = Priority::new(75);
2853
2854        assert_eq!(high.value(), 80);
2855        assert_eq!(medium.value(), 50);
2856        assert_eq!(low.value(), 20);
2857        assert_eq!(custom.value(), 75);
2858
2859        assert!(high.is_high());
2860        assert!(!medium.is_high());
2861        assert!(medium.is_medium());
2862        assert!(low.is_low());
2863
2864        // Test ordering
2865        assert!(high > medium);
2866        assert!(medium > low);
2867        assert!(custom > medium);
2868        assert!(custom < high);
2869
2870        // Test bounds
2871        let max = Priority::new(150);
2872        assert_eq!(max.value(), 100);
2873        let min = Priority::new(0);
2874        assert_eq!(min.value(), 0);
2875    }
2876
2877    #[test]
2878    fn test_routing_rule_creation() {
2879        let rule = RoutingRule::new(
2880            "test-rule".to_string(),
2881            "Test Rule".to_string(),
2882            "test.*pattern".to_string(),
2883            Priority::HIGH,
2884            "openai".to_string(),
2885            "gpt-4".to_string(),
2886        )
2887        .with_description("A test rule for unit testing".to_string())
2888        .with_tag("test".to_string())
2889        .with_tag("example".to_string());
2890
2891        assert_eq!(rule.id, "test-rule");
2892        assert_eq!(rule.name, "Test Rule");
2893        assert_eq!(rule.pattern, "test.*pattern");
2894        assert_eq!(rule.priority, Priority::HIGH);
2895        assert_eq!(rule.provider, "openai");
2896        assert_eq!(rule.model, "gpt-4");
2897        assert_eq!(
2898            rule.description,
2899            Some("A test rule for unit testing".to_string())
2900        );
2901        assert_eq!(rule.tags, vec!["test", "example"]);
2902        assert!(rule.enabled);
2903    }
2904
2905    #[test]
2906    fn test_routing_rule_defaults() {
2907        let rule = RoutingRule::with_defaults(
2908            "default-rule".to_string(),
2909            "Default Rule".to_string(),
2910            "default".to_string(),
2911            "anthropic".to_string(),
2912            "claude-3-sonnet".to_string(),
2913        );
2914
2915        assert_eq!(rule.priority, Priority::MEDIUM);
2916        assert!(rule.enabled);
2917        assert!(rule.tags.is_empty());
2918        assert!(rule.description.is_none());
2919    }
2920
2921    #[test]
2922    fn test_pattern_match() {
2923        let pattern_match = PatternMatch::new(
2924            "machine-learning".to_string(),
2925            "openai".to_string(),
2926            "gpt-4".to_string(),
2927            0.95,
2928            Priority::HIGH,
2929            "ml-rule".to_string(),
2930        );
2931
2932        assert_eq!(pattern_match.concept, "machine-learning");
2933        assert_eq!(pattern_match.provider, "openai");
2934        assert_eq!(pattern_match.model, "gpt-4");
2935        assert_eq!(pattern_match.score, 0.95);
2936        assert_eq!(pattern_match.priority, Priority::HIGH);
2937        assert_eq!(pattern_match.rule_id, "ml-rule");
2938
2939        // Weighted score should be score * priority_factor
2940        assert_eq!(pattern_match.weighted_score, 0.95 * 0.8);
2941    }
2942
2943    #[test]
2944    fn test_pattern_match_simple() {
2945        let simple = PatternMatch::simple(
2946            "test".to_string(),
2947            "anthropic".to_string(),
2948            "claude-3-haiku".to_string(),
2949            0.8,
2950        );
2951
2952        assert_eq!(simple.priority, Priority::MEDIUM);
2953        assert_eq!(simple.rule_id, "default");
2954        assert_eq!(simple.weighted_score, 0.8 * 0.5);
2955    }
2956
2957    #[test]
2958    fn test_routing_decision() {
2959        let decision = RoutingDecision::new(
2960            "openai".to_string(),
2961            "gpt-4".to_string(),
2962            RoutingScenario::Think,
2963            Priority::HIGH,
2964            0.9,
2965            "High priority thinking task".to_string(),
2966        );
2967
2968        assert_eq!(decision.provider, "openai");
2969        assert_eq!(decision.model, "gpt-4");
2970        assert_eq!(decision.scenario, RoutingScenario::Think);
2971        assert_eq!(decision.priority, Priority::HIGH);
2972        assert_eq!(decision.confidence, 0.9);
2973        assert_eq!(decision.reason, "High priority thinking task");
2974        assert!(decision.rule_id.is_none());
2975    }
2976
2977    #[test]
2978    fn test_routing_decision_with_rule() {
2979        let decision = RoutingDecision::with_rule(
2980            "anthropic".to_string(),
2981            "claude-3-sonnet".to_string(),
2982            RoutingScenario::Pattern("web-search".to_string()),
2983            Priority::MEDIUM,
2984            0.85,
2985            "web-rule".to_string(),
2986            "Web search pattern matched".to_string(),
2987        );
2988
2989        assert_eq!(decision.rule_id, Some("web-rule".to_string()));
2990        assert_eq!(
2991            decision.scenario,
2992            RoutingScenario::Pattern("web-search".to_string())
2993        );
2994    }
2995
2996    #[test]
2997    fn test_routing_decision_default() {
2998        let default = RoutingDecision::default("openai".to_string(), "gpt-3.5-turbo".to_string());
2999
3000        assert_eq!(default.provider, "openai");
3001        assert_eq!(default.model, "gpt-3.5-turbo");
3002        assert_eq!(default.scenario, RoutingScenario::Default);
3003        assert_eq!(default.priority, Priority::LOW);
3004        assert_eq!(default.confidence, 0.5);
3005        assert_eq!(default.reason, "Default routing");
3006    }
3007
3008    #[test]
3009    fn test_routing_scenario_serialization() {
3010        let scenarios = vec![
3011            RoutingScenario::Default,
3012            RoutingScenario::Background,
3013            RoutingScenario::Think,
3014            RoutingScenario::LongContext,
3015            RoutingScenario::WebSearch,
3016            RoutingScenario::Image,
3017            RoutingScenario::Pattern("test".to_string()),
3018            RoutingScenario::Priority,
3019            RoutingScenario::Custom("special".to_string()),
3020        ];
3021
3022        for scenario in scenarios {
3023            let json = serde_json::to_string(&scenario).unwrap();
3024            let deserialized: RoutingScenario = serde_json::from_str(&json).unwrap();
3025            assert_eq!(scenario, deserialized);
3026        }
3027    }
3028
3029    #[test]
3030    fn test_routing_scenario_display() {
3031        assert_eq!(format!("{}", RoutingScenario::Default), "default");
3032        assert_eq!(format!("{}", RoutingScenario::Think), "think");
3033        assert_eq!(
3034            format!("{}", RoutingScenario::Pattern("ml".to_string())),
3035            "pattern:ml"
3036        );
3037        assert_eq!(
3038            format!("{}", RoutingScenario::Custom("test".to_string())),
3039            "custom:test"
3040        );
3041    }
3042
3043    #[test]
3044    fn test_priority_serialization() {
3045        let priority = Priority::new(75);
3046        let json = serde_json::to_string(&priority).unwrap();
3047        let deserialized: Priority = serde_json::from_str(&json).unwrap();
3048        assert_eq!(priority, deserialized);
3049        assert_eq!(deserialized.value(), 75);
3050    }
3051
3052    #[test]
3053    fn test_routing_rule_serialization() {
3054        let rule = RoutingRule::new(
3055            "serialize-test".to_string(),
3056            "Serialize Test".to_string(),
3057            "test-pattern".to_string(),
3058            Priority::MEDIUM,
3059            "provider".to_string(),
3060            "model".to_string(),
3061        );
3062
3063        let json = serde_json::to_string(&rule).unwrap();
3064        let deserialized: RoutingRule = serde_json::from_str(&json).unwrap();
3065        assert_eq!(rule.id, deserialized.id);
3066        assert_eq!(rule.name, deserialized.name);
3067        assert_eq!(rule.priority, deserialized.priority);
3068        assert_eq!(rule.provider, deserialized.provider);
3069        assert_eq!(rule.model, deserialized.model);
3070    }
3071
3072    #[test]
3073    fn test_document_type_serialization() {
3074        let types = vec![
3075            DocumentType::KgEntry,
3076            DocumentType::Document,
3077            DocumentType::ConfigDocument,
3078        ];
3079
3080        for doc_type in types {
3081            let json = serde_json::to_string(&doc_type).unwrap();
3082            let deserialized: DocumentType = serde_json::from_str(&json).unwrap();
3083            assert_eq!(doc_type, deserialized);
3084        }
3085    }
3086
3087    #[test]
3088    fn test_document_defaults_for_new_fields() {
3089        let json = r#"{
3090            "id":"doc-1",
3091            "url":"file:///tmp/doc.md",
3092            "title":"Doc",
3093            "body":"Body"
3094        }"#;
3095
3096        let doc: Document = serde_json::from_str(json).unwrap();
3097        assert_eq!(doc.doc_type, DocumentType::KgEntry);
3098        assert!(doc.synonyms.is_none());
3099        assert!(doc.route.is_none());
3100        assert!(doc.priority.is_none());
3101    }
3102
3103    #[test]
3104    fn test_ontology_schema_deserialize() {
3105        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
3106        let schema: OntologySchema = serde_json::from_str(json).unwrap();
3107        assert_eq!(schema.name, "Publishing Domain Model");
3108        assert_eq!(schema.version, "1.0.0");
3109        assert_eq!(schema.entity_types.len(), 3);
3110        assert_eq!(schema.relationship_types.len(), 1);
3111        assert_eq!(schema.anti_patterns.len(), 1);
3112    }
3113
3114    #[test]
3115    fn test_ontology_schema_to_thesaurus_entries() {
3116        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
3117        let schema: OntologySchema = serde_json::from_str(json).unwrap();
3118        let entries = schema.to_thesaurus_entries();
3119        // 3 primary labels + 2 + 2 + 3 aliases = 10 entries
3120        assert_eq!(entries.len(), 10);
3121        // Check that primary labels are present
3122        assert!(entries.iter().any(|(_, term, _)| term == "Chapter"));
3123        assert!(entries.iter().any(|(_, term, _)| term == "Concept"));
3124        assert!(entries.iter().any(|(_, term, _)| term == "Knowledge Graph"));
3125        // Check that aliases are present
3126        assert!(entries.iter().any(|(_, term, _)| term == "section"));
3127        assert!(entries.iter().any(|(_, term, _)| term == "KG"));
3128        // Check URIs are populated
3129        assert!(entries.iter().all(|(_, _, url)| url.is_some()));
3130    }
3131
3132    #[test]
3133    fn test_ontology_schema_category_ids() {
3134        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
3135        let schema: OntologySchema = serde_json::from_str(json).unwrap();
3136        let ids = schema.category_ids();
3137        assert_eq!(ids.len(), 3);
3138        assert!(ids.contains(&"chapter".to_string()));
3139        assert!(ids.contains(&"concept".to_string()));
3140        assert!(ids.contains(&"knowledge_graph".to_string()));
3141    }
3142
3143    #[test]
3144    fn test_ontology_schema_uri_for() {
3145        let json = include_str!("../test-fixtures/sample_ontology_schema.json");
3146        let schema: OntologySchema = serde_json::from_str(json).unwrap();
3147        assert_eq!(
3148            schema.uri_for("chapter"),
3149            Some("https://schema.org/Chapter".to_string())
3150        );
3151        assert_eq!(
3152            schema.uri_for("concept"),
3153            Some("https://schema.org/DefinedTerm".to_string())
3154        );
3155        assert_eq!(schema.uri_for("nonexistent"), None);
3156    }
3157
3158    #[test]
3159    fn test_ontology_schema_minimal() {
3160        // Minimal schema with only required fields
3161        let json = r#"{
3162            "name": "Minimal",
3163            "version": "0.1.0",
3164            "entity_types": [
3165                {"id": "item", "label": "Item"}
3166            ]
3167        }"#;
3168        let schema: OntologySchema = serde_json::from_str(json).unwrap();
3169        assert_eq!(schema.name, "Minimal");
3170        assert_eq!(schema.entity_types.len(), 1);
3171        assert!(schema.relationship_types.is_empty());
3172        assert!(schema.anti_patterns.is_empty());
3173        assert!(schema.entity_types[0].aliases.is_empty());
3174        assert!(schema.entity_types[0].uri_prefix.is_none());
3175    }
3176
3177    #[test]
3178    fn test_layer_enum() {
3179        // Test default is Layer::One
3180        let default: Layer = Default::default();
3181        assert_eq!(default, Layer::One);
3182
3183        // Test from_u8
3184        assert_eq!(Layer::from_u8(1), Some(Layer::One));
3185        assert_eq!(Layer::from_u8(2), Some(Layer::Two));
3186        assert_eq!(Layer::from_u8(3), Some(Layer::Three));
3187        assert_eq!(Layer::from_u8(0), None);
3188        assert_eq!(Layer::from_u8(4), None);
3189
3190        // Test Display
3191        assert_eq!(format!("{}", Layer::One), "1");
3192        assert_eq!(format!("{}", Layer::Two), "2");
3193        assert_eq!(format!("{}", Layer::Three), "3");
3194
3195        // Test includes_content
3196        assert!(!Layer::One.includes_content());
3197        assert!(Layer::Two.includes_content());
3198        assert!(Layer::Three.includes_content());
3199
3200        // Test includes_full_content
3201        assert!(!Layer::One.includes_full_content());
3202        assert!(!Layer::Two.includes_full_content());
3203        assert!(Layer::Three.includes_full_content());
3204    }
3205
3206    #[test]
3207    fn test_extract_first_paragraph_simple() {
3208        let body = "First paragraph here.\n\nSecond paragraph here.";
3209        assert_eq!(extract_first_paragraph(body), "First paragraph here.");
3210    }
3211
3212    #[test]
3213    fn test_extract_first_paragraph_with_yaml_frontmatter() {
3214        let body = "---\ntitle: My Document\ntags: [rust, programming]\n---\n\nThis is the actual first paragraph.\nMore content here.";
3215        assert_eq!(
3216            extract_first_paragraph(body),
3217            "This is the actual first paragraph."
3218        );
3219    }
3220
3221    #[test]
3222    fn test_extract_first_paragraph_empty_lines() {
3223        let body = "\n\n\nFirst paragraph after empty lines.";
3224        assert_eq!(
3225            extract_first_paragraph(body),
3226            "First paragraph after empty lines."
3227        );
3228    }
3229
3230    #[test]
3231    fn test_extract_first_paragraph_single_line() {
3232        let body = "Just one line";
3233        assert_eq!(extract_first_paragraph(body), "Just one line");
3234    }
3235
3236    #[test]
3237    fn test_layer_serialization() {
3238        // Test that Layer serializes correctly
3239        let query = SearchQuery {
3240            search_term: NormalizedTermValue::new("test".to_string()),
3241            search_terms: None,
3242            operator: None,
3243            skip: None,
3244            limit: None,
3245            role: None,
3246            layer: Layer::Two,
3247            include_pinned: false,
3248            min_quality: None,
3249        };
3250
3251        let json = serde_json::to_string(&query).unwrap();
3252        assert!(json.contains("\"layer\""));
3253
3254        // Deserialize and check layer is preserved
3255        let deserialized: SearchQuery = serde_json::from_str(&json).unwrap();
3256        assert_eq!(deserialized.layer, Layer::Two);
3257    }
3258
3259    #[test]
3260    fn test_quality_score_composite() {
3261        // Test with all three scores
3262        let full_score = QualityScore {
3263            knowledge: Some(0.8),
3264            logic: Some(0.6),
3265            structure: Some(0.7),
3266            last_evaluated: None,
3267        };
3268        assert!((full_score.composite() - 0.7).abs() < f64::EPSILON); // (0.8 + 0.6 + 0.7) / 3
3269
3270        // Test with two scores
3271        let partial_score = QualityScore {
3272            knowledge: Some(0.9),
3273            logic: None,
3274            structure: Some(0.5),
3275            last_evaluated: None,
3276        };
3277        assert!((partial_score.composite() - 0.7).abs() < f64::EPSILON); // (0.9 + 0.5) / 2
3278
3279        // Test with one score
3280        let single_score = QualityScore {
3281            knowledge: Some(0.8),
3282            logic: None,
3283            structure: None,
3284            last_evaluated: None,
3285        };
3286        assert!((single_score.composite() - 0.8).abs() < f64::EPSILON);
3287
3288        // Test with no scores (default)
3289        let empty_score = QualityScore::default();
3290        assert_eq!(empty_score.composite(), 0.0);
3291    }
3292
3293    #[test]
3294    fn test_quality_score_serialization() {
3295        let score = QualityScore {
3296            knowledge: Some(0.8),
3297            logic: Some(0.6),
3298            structure: Some(0.7),
3299            last_evaluated: None,
3300        };
3301
3302        let json = serde_json::to_string(&score).unwrap();
3303        assert!(json.contains("0.8"));
3304        assert!(json.contains("0.6"));
3305        assert!(json.contains("0.7"));
3306
3307        let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
3308        assert_eq!(deserialized.knowledge, Some(0.8));
3309        assert_eq!(deserialized.logic, Some(0.6));
3310        assert_eq!(deserialized.structure, Some(0.7));
3311    }
3312
3313    #[test]
3314    fn test_quality_score_default_serialization() {
3315        // Test that default QualityScore serializes/deserializes correctly
3316        let score = QualityScore::default();
3317        let json = serde_json::to_string(&score).unwrap();
3318        let deserialized: QualityScore = serde_json::from_str(&json).unwrap();
3319        assert!(deserialized.knowledge.is_none());
3320        assert!(deserialized.logic.is_none());
3321        assert!(deserialized.structure.is_none());
3322        assert!(deserialized.last_evaluated.is_none());
3323    }
3324
3325    #[test]
3326    fn test_indexed_document_with_quality_score() {
3327        let doc = IndexedDocument {
3328            id: "test-doc-1".to_string(),
3329            matched_edges: vec![],
3330            rank: 10,
3331            tags: vec!["rust".to_string()],
3332            nodes: vec![1, 2],
3333            quality_score: Some(QualityScore {
3334                knowledge: Some(0.8),
3335                logic: Some(0.6),
3336                structure: Some(0.7),
3337                last_evaluated: None,
3338            }),
3339        };
3340
3341        assert_eq!(doc.id, "test-doc-1");
3342        assert!((doc.quality_score.as_ref().unwrap().composite() - 0.7).abs() < f64::EPSILON);
3343    }
3344
3345    #[test]
3346    fn test_indexed_document_from_document_quality_score_none() {
3347        let doc = Document {
3348            id: "doc-1".to_string(),
3349            url: "https://example.com".to_string(),
3350            title: "Test".to_string(),
3351            body: "Body".to_string(),
3352            description: None,
3353            summarization: None,
3354            stub: None,
3355            tags: None,
3356            rank: None,
3357            source_haystack: None,
3358            doc_type: DocumentType::Document,
3359            synonyms: None,
3360            route: None,
3361            priority: None,
3362            quality_score: None,
3363        };
3364
3365        let indexed = IndexedDocument::from_document(doc);
3366        assert!(indexed.quality_score.is_none());
3367    }
3368
3369    #[test]
3370    fn test_indexed_document_serialization_backward_compat() {
3371        // Test that IndexedDocument without quality_score deserializes correctly
3372        // This simulates old data that doesn't have the quality_score field
3373        // NOTE: node IDs are u64 integers
3374        let json = r#"{
3375            "id": "doc-1",
3376            "matched_edges": [],
3377            "rank": 5,
3378            "tags": ["test"],
3379            "nodes": [1]
3380        }"#;
3381
3382        let doc: IndexedDocument = serde_json::from_str(json).unwrap();
3383        assert_eq!(doc.id, "doc-1");
3384        assert!(doc.quality_score.is_none());
3385    }
3386
3387    #[test]
3388    fn test_thesaurus_source_hash_roundtrip() {
3389        let mut thesaurus = Thesaurus::new("test".to_string());
3390        thesaurus.source_hash = Some("abc123".to_string());
3391
3392        let json = serde_json::to_string(&thesaurus).unwrap();
3393        let deserialized: Thesaurus = serde_json::from_str(&json).unwrap();
3394
3395        assert_eq!(thesaurus.source_hash, deserialized.source_hash);
3396    }
3397
3398    #[test]
3399    fn test_thesaurus_source_hash_backward_compat() {
3400        // Old thesaurus JSON without source_hash should deserialize with None
3401        let json = r#"{"name":"test","data":{}}"#;
3402        let thesaurus: Thesaurus = serde_json::from_str(json).unwrap();
3403        assert!(thesaurus.source_hash.is_none());
3404    }
3405
3406    #[test]
3407    fn test_thesaurus_with_source_hash() {
3408        let thesaurus = Thesaurus::new("test".to_string()).with_source_hash("hash123".to_string());
3409        assert_eq!(thesaurus.source_hash, Some("hash123".to_string()));
3410    }
3411}