mpl_core/
ontology.rs

1//! Ontology Adherence Checking
2//!
3//! Verifies that responses conform to domain-specific ontology constraints
4//! beyond what JSON Schema can express.
5
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use tracing::debug;
9
10/// Ontology definition for a domain
11#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12pub struct Ontology {
13    /// Name of this ontology
14    #[serde(default)]
15    pub name: String,
16
17    /// Description
18    #[serde(default)]
19    pub description: String,
20
21    /// Allowed values for specific fields (enum constraints)
22    #[serde(default)]
23    pub allowed_values: HashMap<String, Vec<serde_json::Value>>,
24
25    /// Required field relationships
26    #[serde(default)]
27    pub relationships: Vec<Relationship>,
28
29    /// Field type constraints (more specific than JSON Schema)
30    #[serde(default)]
31    pub type_constraints: HashMap<String, TypeConstraint>,
32
33    /// Cardinality constraints
34    #[serde(default)]
35    pub cardinality: HashMap<String, CardinalityConstraint>,
36
37    /// Custom validation rules (CEL expressions)
38    #[serde(default)]
39    pub custom_rules: Vec<CustomRule>,
40}
41
42/// Relationship between fields
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct Relationship {
45    /// Unique identifier
46    pub id: String,
47
48    /// Source field path
49    pub from: String,
50
51    /// Target field path
52    pub to: String,
53
54    /// Type of relationship
55    pub relation_type: RelationType,
56
57    /// Optional condition (when this relationship applies)
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub condition: Option<String>,
60
61    /// Error message on violation
62    #[serde(default)]
63    pub message: String,
64}
65
66/// Types of relationships
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub enum RelationType {
70    /// from implies to must exist
71    Implies,
72    /// from and to are mutually exclusive
73    Excludes,
74    /// from must be less than to
75    LessThan,
76    /// from must be less than or equal to to
77    LessThanOrEqual,
78    /// from must be greater than to
79    GreaterThan,
80    /// from must be greater than or equal to to
81    GreaterThanOrEqual,
82    /// from must equal to
83    Equals,
84    /// from must not equal to
85    NotEquals,
86    /// from must be a subset of to (for arrays)
87    SubsetOf,
88    /// from must contain to
89    Contains,
90    /// from must start with to (for strings)
91    StartsWith,
92    /// from must end with to (for strings)
93    EndsWith,
94}
95
96/// Type constraint for a field
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct TypeConstraint {
99    /// Expected semantic type (e.g., "email", "url", "phone", "uuid")
100    pub semantic_type: String,
101
102    /// Optional regex pattern
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub pattern: Option<String>,
105
106    /// Optional min value (for numbers)
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub min: Option<f64>,
109
110    /// Optional max value (for numbers)
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub max: Option<f64>,
113
114    /// Optional min length (for strings/arrays)
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub min_length: Option<usize>,
117
118    /// Optional max length (for strings/arrays)
119    #[serde(skip_serializing_if = "Option::is_none")]
120    pub max_length: Option<usize>,
121}
122
123/// Cardinality constraint for arrays
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct CardinalityConstraint {
126    /// Minimum items
127    #[serde(default)]
128    pub min: usize,
129
130    /// Maximum items (None = unlimited)
131    #[serde(skip_serializing_if = "Option::is_none")]
132    pub max: Option<usize>,
133
134    /// Whether items must be unique
135    #[serde(default)]
136    pub unique: bool,
137}
138
139/// Custom validation rule
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct CustomRule {
142    /// Unique identifier
143    pub id: String,
144
145    /// CEL expression that must evaluate to true
146    pub expression: String,
147
148    /// Error message on violation
149    pub message: String,
150
151    /// Severity level
152    #[serde(default)]
153    pub severity: ViolationSeverity,
154}
155
156/// Severity of ontology violations
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
158#[serde(rename_all = "snake_case")]
159pub enum ViolationSeverity {
160    /// Critical violation - must be fixed
161    #[default]
162    Error,
163    /// Non-critical but should be addressed
164    Warning,
165    /// Informational only
166    Info,
167}
168
169/// A single ontology violation
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct OntologyViolation {
172    /// Rule or constraint ID that was violated
173    pub rule_id: String,
174
175    /// Type of violation
176    pub violation_type: ViolationType,
177
178    /// Field path involved
179    pub path: String,
180
181    /// Error message
182    pub message: String,
183
184    /// Severity
185    pub severity: ViolationSeverity,
186
187    /// Expected value (if applicable)
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub expected: Option<serde_json::Value>,
190
191    /// Actual value (if applicable)
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub actual: Option<serde_json::Value>,
194}
195
196/// Types of violations
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
198#[serde(rename_all = "snake_case")]
199pub enum ViolationType {
200    /// Value not in allowed set
201    InvalidValue,
202    /// Relationship constraint violated
203    RelationshipViolation,
204    /// Type constraint violated
205    TypeViolation,
206    /// Cardinality constraint violated
207    CardinalityViolation,
208    /// Custom rule violated
209    CustomRuleViolation,
210}
211
212/// Result of ontology adherence check
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct OntologyResult {
215    /// Overall adherence score (0.0 - 1.0)
216    pub score: f64,
217
218    /// Whether the payload adheres to the ontology
219    pub adheres: bool,
220
221    /// Total constraints checked
222    pub constraints_checked: usize,
223
224    /// Number of violations
225    pub violation_count: usize,
226
227    /// Error-level violations
228    pub error_count: usize,
229
230    /// Warning-level violations
231    pub warning_count: usize,
232
233    /// All violations found
234    pub violations: Vec<OntologyViolation>,
235}
236
237/// Ontology adherence checker
238pub struct OntologyChecker {
239    ontology: Ontology,
240}
241
242impl OntologyChecker {
243    /// Create a new checker with the given ontology
244    pub fn new(ontology: Ontology) -> Self {
245        Self { ontology }
246    }
247
248    /// Check a payload against the ontology
249    pub fn check(&self, payload: &serde_json::Value) -> OntologyResult {
250        let mut violations = Vec::new();
251        let mut constraints_checked = 0;
252
253        // Check allowed values
254        for (path, allowed) in &self.ontology.allowed_values {
255            constraints_checked += 1;
256            if let Some(value) = get_json_path(payload, path) {
257                if !allowed.contains(value) {
258                    violations.push(OntologyViolation {
259                        rule_id: format!("allowed_values:{}", path),
260                        violation_type: ViolationType::InvalidValue,
261                        path: path.clone(),
262                        message: format!(
263                            "Value '{}' not in allowed set",
264                            value
265                        ),
266                        severity: ViolationSeverity::Error,
267                        expected: Some(serde_json::Value::Array(allowed.clone())),
268                        actual: Some(value.clone()),
269                    });
270                }
271            }
272        }
273
274        // Check relationships
275        for rel in &self.ontology.relationships {
276            constraints_checked += 1;
277            if let Some(violation) = self.check_relationship(payload, rel) {
278                violations.push(violation);
279            }
280        }
281
282        // Check type constraints
283        for (path, constraint) in &self.ontology.type_constraints {
284            constraints_checked += 1;
285            if let Some(value) = get_json_path(payload, path) {
286                if let Some(violation) = self.check_type_constraint(path, value, constraint) {
287                    violations.push(violation);
288                }
289            }
290        }
291
292        // Check cardinality
293        for (path, constraint) in &self.ontology.cardinality {
294            constraints_checked += 1;
295            if let Some(value) = get_json_path(payload, path) {
296                if let Some(violation) = self.check_cardinality(path, value, constraint) {
297                    violations.push(violation);
298                }
299            }
300        }
301
302        // Count by severity
303        let error_count = violations
304            .iter()
305            .filter(|v| v.severity == ViolationSeverity::Error)
306            .count();
307        let warning_count = violations
308            .iter()
309            .filter(|v| v.severity == ViolationSeverity::Warning)
310            .count();
311
312        let score = if constraints_checked > 0 {
313            1.0 - (violations.len() as f64 / constraints_checked as f64)
314        } else {
315            1.0
316        };
317
318        let adheres = error_count == 0;
319
320        debug!(
321            "Ontology check: score={:.2}, violations={}, adheres={}",
322            score,
323            violations.len(),
324            adheres
325        );
326
327        OntologyResult {
328            score,
329            adheres,
330            constraints_checked,
331            violation_count: violations.len(),
332            error_count,
333            warning_count,
334            violations,
335        }
336    }
337
338    /// Check a relationship constraint
339    fn check_relationship(
340        &self,
341        payload: &serde_json::Value,
342        rel: &Relationship,
343    ) -> Option<OntologyViolation> {
344        let from_value = get_json_path(payload, &rel.from);
345        let to_value = get_json_path(payload, &rel.to);
346
347        let violated = match rel.relation_type {
348            RelationType::Implies => {
349                // If from exists and is truthy, to must exist
350                from_value.map(is_truthy).unwrap_or(false) && to_value.is_none()
351            }
352            RelationType::Excludes => {
353                // from and to cannot both exist
354                from_value.is_some() && to_value.is_some()
355            }
356            RelationType::LessThan => {
357                match (from_value.and_then(|v| v.as_f64()), to_value.and_then(|v| v.as_f64())) {
358                    (Some(f), Some(t)) => f >= t,
359                    _ => false,
360                }
361            }
362            RelationType::LessThanOrEqual => {
363                match (from_value.and_then(|v| v.as_f64()), to_value.and_then(|v| v.as_f64())) {
364                    (Some(f), Some(t)) => f > t,
365                    _ => false,
366                }
367            }
368            RelationType::GreaterThan => {
369                match (from_value.and_then(|v| v.as_f64()), to_value.and_then(|v| v.as_f64())) {
370                    (Some(f), Some(t)) => f <= t,
371                    _ => false,
372                }
373            }
374            RelationType::GreaterThanOrEqual => {
375                match (from_value.and_then(|v| v.as_f64()), to_value.and_then(|v| v.as_f64())) {
376                    (Some(f), Some(t)) => f < t,
377                    _ => false,
378                }
379            }
380            RelationType::Equals => from_value != to_value,
381            RelationType::NotEquals => from_value == to_value && from_value.is_some(),
382            RelationType::SubsetOf => {
383                match (from_value, to_value) {
384                    (Some(serde_json::Value::Array(from)), Some(serde_json::Value::Array(to))) => {
385                        let to_set: HashSet<_> = to.iter().collect();
386                        !from.iter().all(|v| to_set.contains(v))
387                    }
388                    _ => false,
389                }
390            }
391            RelationType::Contains => {
392                match (from_value, to_value) {
393                    (Some(serde_json::Value::String(s)), Some(serde_json::Value::String(sub))) => {
394                        !s.contains(sub.as_str())
395                    }
396                    (Some(serde_json::Value::Array(arr)), Some(item)) => !arr.contains(item),
397                    _ => false,
398                }
399            }
400            RelationType::StartsWith => {
401                match (from_value, to_value) {
402                    (Some(serde_json::Value::String(s)), Some(serde_json::Value::String(prefix))) => {
403                        !s.starts_with(prefix.as_str())
404                    }
405                    _ => false,
406                }
407            }
408            RelationType::EndsWith => {
409                match (from_value, to_value) {
410                    (Some(serde_json::Value::String(s)), Some(serde_json::Value::String(suffix))) => {
411                        !s.ends_with(suffix.as_str())
412                    }
413                    _ => false,
414                }
415            }
416        };
417
418        if violated {
419            Some(OntologyViolation {
420                rule_id: rel.id.clone(),
421                violation_type: ViolationType::RelationshipViolation,
422                path: rel.from.clone(),
423                message: if rel.message.is_empty() {
424                    format!(
425                        "Relationship {:?} between '{}' and '{}' violated",
426                        rel.relation_type, rel.from, rel.to
427                    )
428                } else {
429                    rel.message.clone()
430                },
431                severity: ViolationSeverity::Error,
432                expected: None,
433                actual: from_value.cloned(),
434            })
435        } else {
436            None
437        }
438    }
439
440    /// Check a type constraint
441    fn check_type_constraint(
442        &self,
443        path: &str,
444        value: &serde_json::Value,
445        constraint: &TypeConstraint,
446    ) -> Option<OntologyViolation> {
447        // Check semantic type patterns
448        let valid = match constraint.semantic_type.as_str() {
449            "email" => {
450                value.as_str().map(|s| s.contains('@') && s.contains('.')).unwrap_or(false)
451            }
452            "url" => {
453                value.as_str()
454                    .map(|s| s.starts_with("http://") || s.starts_with("https://"))
455                    .unwrap_or(false)
456            }
457            "uuid" => {
458                value.as_str()
459                    .map(|s| s.len() == 36 && s.chars().filter(|c| *c == '-').count() == 4)
460                    .unwrap_or(false)
461            }
462            "phone" => {
463                value.as_str()
464                    .map(|s| s.chars().filter(|c| c.is_ascii_digit()).count() >= 10)
465                    .unwrap_or(false)
466            }
467            "date" => {
468                value.as_str()
469                    .map(|s| s.len() == 10 && s.chars().filter(|c| *c == '-').count() == 2)
470                    .unwrap_or(false)
471            }
472            "datetime" => {
473                value.as_str()
474                    .map(|s| s.contains('T') || s.contains(' '))
475                    .unwrap_or(false)
476            }
477            _ => true, // Unknown type, skip
478        };
479
480        if !valid {
481            return Some(OntologyViolation {
482                rule_id: format!("type:{}", path),
483                violation_type: ViolationType::TypeViolation,
484                path: path.to_string(),
485                message: format!(
486                    "Value does not match semantic type '{}'",
487                    constraint.semantic_type
488                ),
489                severity: ViolationSeverity::Error,
490                expected: Some(serde_json::Value::String(constraint.semantic_type.clone())),
491                actual: Some(value.clone()),
492            });
493        }
494
495        // Check numeric range
496        if let Some(num) = value.as_f64() {
497            if let Some(min) = constraint.min {
498                if num < min {
499                    return Some(OntologyViolation {
500                        rule_id: format!("type:{}", path),
501                        violation_type: ViolationType::TypeViolation,
502                        path: path.to_string(),
503                        message: format!("Value {} is less than minimum {}", num, min),
504                        severity: ViolationSeverity::Error,
505                        expected: Some(serde_json::json!({"min": min})),
506                        actual: Some(value.clone()),
507                    });
508                }
509            }
510            if let Some(max) = constraint.max {
511                if num > max {
512                    return Some(OntologyViolation {
513                        rule_id: format!("type:{}", path),
514                        violation_type: ViolationType::TypeViolation,
515                        path: path.to_string(),
516                        message: format!("Value {} is greater than maximum {}", num, max),
517                        severity: ViolationSeverity::Error,
518                        expected: Some(serde_json::json!({"max": max})),
519                        actual: Some(value.clone()),
520                    });
521                }
522            }
523        }
524
525        // Check string length
526        if let Some(s) = value.as_str() {
527            if let Some(min_len) = constraint.min_length {
528                if s.len() < min_len {
529                    return Some(OntologyViolation {
530                        rule_id: format!("type:{}", path),
531                        violation_type: ViolationType::TypeViolation,
532                        path: path.to_string(),
533                        message: format!("String length {} is less than minimum {}", s.len(), min_len),
534                        severity: ViolationSeverity::Error,
535                        expected: Some(serde_json::json!({"min_length": min_len})),
536                        actual: Some(value.clone()),
537                    });
538                }
539            }
540            if let Some(max_len) = constraint.max_length {
541                if s.len() > max_len {
542                    return Some(OntologyViolation {
543                        rule_id: format!("type:{}", path),
544                        violation_type: ViolationType::TypeViolation,
545                        path: path.to_string(),
546                        message: format!("String length {} is greater than maximum {}", s.len(), max_len),
547                        severity: ViolationSeverity::Error,
548                        expected: Some(serde_json::json!({"max_length": max_len})),
549                        actual: Some(value.clone()),
550                    });
551                }
552            }
553        }
554
555        None
556    }
557
558    /// Check a cardinality constraint
559    fn check_cardinality(
560        &self,
561        path: &str,
562        value: &serde_json::Value,
563        constraint: &CardinalityConstraint,
564    ) -> Option<OntologyViolation> {
565        let arr = match value.as_array() {
566            Some(a) => a,
567            None => return None,
568        };
569
570        if arr.len() < constraint.min {
571            return Some(OntologyViolation {
572                rule_id: format!("cardinality:{}", path),
573                violation_type: ViolationType::CardinalityViolation,
574                path: path.to_string(),
575                message: format!(
576                    "Array has {} items, minimum is {}",
577                    arr.len(),
578                    constraint.min
579                ),
580                severity: ViolationSeverity::Error,
581                expected: Some(serde_json::json!({"min": constraint.min})),
582                actual: Some(serde_json::json!(arr.len())),
583            });
584        }
585
586        if let Some(max) = constraint.max {
587            if arr.len() > max {
588                return Some(OntologyViolation {
589                    rule_id: format!("cardinality:{}", path),
590                    violation_type: ViolationType::CardinalityViolation,
591                    path: path.to_string(),
592                    message: format!("Array has {} items, maximum is {}", arr.len(), max),
593                    severity: ViolationSeverity::Error,
594                    expected: Some(serde_json::json!({"max": max})),
595                    actual: Some(serde_json::json!(arr.len())),
596                });
597            }
598        }
599
600        if constraint.unique {
601            let unique_count: HashSet<_> = arr.iter().map(|v| v.to_string()).collect();
602            if unique_count.len() != arr.len() {
603                return Some(OntologyViolation {
604                    rule_id: format!("cardinality:{}", path),
605                    violation_type: ViolationType::CardinalityViolation,
606                    path: path.to_string(),
607                    message: "Array contains duplicate values".to_string(),
608                    severity: ViolationSeverity::Error,
609                    expected: Some(serde_json::json!({"unique": true})),
610                    actual: Some(value.clone()),
611                });
612            }
613        }
614
615        None
616    }
617}
618
619/// Get a value from JSON by dot-separated path
620fn get_json_path<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
621    let parts: Vec<&str> = path.split('.').collect();
622    let mut current = value;
623
624    for part in parts {
625        match current {
626            serde_json::Value::Object(obj) => {
627                current = obj.get(part)?;
628            }
629            serde_json::Value::Array(arr) => {
630                let index: usize = part.parse().ok()?;
631                current = arr.get(index)?;
632            }
633            _ => return None,
634        }
635    }
636
637    Some(current)
638}
639
640/// Check if a JSON value is truthy
641fn is_truthy(value: &serde_json::Value) -> bool {
642    match value {
643        serde_json::Value::Null => false,
644        serde_json::Value::Bool(b) => *b,
645        serde_json::Value::Number(n) => n.as_f64().map(|f| f != 0.0).unwrap_or(false),
646        serde_json::Value::String(s) => !s.is_empty(),
647        serde_json::Value::Array(a) => !a.is_empty(),
648        serde_json::Value::Object(o) => !o.is_empty(),
649    }
650}
651
652#[cfg(test)]
653mod tests {
654    use super::*;
655    use serde_json::json;
656
657    #[test]
658    fn test_allowed_values() {
659        let mut ontology = Ontology::default();
660        ontology.allowed_values.insert(
661            "status".to_string(),
662            vec![json!("active"), json!("inactive"), json!("pending")],
663        );
664
665        let checker = OntologyChecker::new(ontology);
666
667        // Valid
668        let result = checker.check(&json!({"status": "active"}));
669        assert!(result.adheres);
670
671        // Invalid
672        let result = checker.check(&json!({"status": "unknown"}));
673        assert!(!result.adheres);
674        assert_eq!(result.error_count, 1);
675    }
676
677    #[test]
678    fn test_relationship_implies() {
679        let mut ontology = Ontology::default();
680        ontology.relationships.push(Relationship {
681            id: "premium_features".to_string(),
682            from: "is_premium".to_string(),
683            to: "premium_expires_at".to_string(),
684            relation_type: RelationType::Implies,
685            condition: None,
686            message: "Premium users must have an expiration date".to_string(),
687        });
688
689        let checker = OntologyChecker::new(ontology);
690
691        // Valid: premium with expiration
692        let result = checker.check(&json!({
693            "is_premium": true,
694            "premium_expires_at": "2025-01-01"
695        }));
696        assert!(result.adheres);
697
698        // Invalid: premium without expiration
699        let result = checker.check(&json!({"is_premium": true}));
700        assert!(!result.adheres);
701    }
702
703    #[test]
704    fn test_relationship_excludes() {
705        let mut ontology = Ontology::default();
706        ontology.relationships.push(Relationship {
707            id: "draft_published".to_string(),
708            from: "is_draft".to_string(),
709            to: "published_at".to_string(),
710            relation_type: RelationType::Excludes,
711            condition: None,
712            message: "Draft cannot have published_at".to_string(),
713        });
714
715        let checker = OntologyChecker::new(ontology);
716
717        // Valid: draft without published_at
718        let result = checker.check(&json!({"is_draft": true}));
719        assert!(result.adheres);
720
721        // Invalid: both present
722        let result = checker.check(&json!({
723            "is_draft": true,
724            "published_at": "2025-01-01"
725        }));
726        assert!(!result.adheres);
727    }
728
729    #[test]
730    fn test_type_constraint_email() {
731        let mut ontology = Ontology::default();
732        ontology.type_constraints.insert(
733            "email".to_string(),
734            TypeConstraint {
735                semantic_type: "email".to_string(),
736                pattern: None,
737                min: None,
738                max: None,
739                min_length: None,
740                max_length: None,
741            },
742        );
743
744        let checker = OntologyChecker::new(ontology);
745
746        // Valid
747        let result = checker.check(&json!({"email": "test@example.com"}));
748        assert!(result.adheres);
749
750        // Invalid
751        let result = checker.check(&json!({"email": "not-an-email"}));
752        assert!(!result.adheres);
753    }
754
755    #[test]
756    fn test_cardinality_constraint() {
757        let mut ontology = Ontology::default();
758        ontology.cardinality.insert(
759            "tags".to_string(),
760            CardinalityConstraint {
761                min: 1,
762                max: Some(5),
763                unique: true,
764            },
765        );
766
767        let checker = OntologyChecker::new(ontology);
768
769        // Valid
770        let result = checker.check(&json!({"tags": ["a", "b", "c"]}));
771        assert!(result.adheres);
772
773        // Invalid: empty
774        let result = checker.check(&json!({"tags": []}));
775        assert!(!result.adheres);
776
777        // Invalid: duplicates
778        let result = checker.check(&json!({"tags": ["a", "a", "b"]}));
779        assert!(!result.adheres);
780    }
781}