mpl_core/
qom.rs

1//! Quality of Meaning (QoM)
2//!
3//! Framework for measuring and enforcing semantic quality through
4//! observable metrics, negotiated profiles, and actionable breach detection.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// QoM Profile - configuration defining metric thresholds and policies
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct QomProfile {
12    /// Profile name (e.g., "qom-strict-argcheck", "qom-basic")
13    pub name: String,
14
15    /// Metric thresholds
16    pub metrics: QomMetricThresholds,
17
18    /// Retry policy on failure
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub retry_policy: Option<RetryPolicy>,
21
22    /// Description
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub description: Option<String>,
25}
26
27impl QomProfile {
28    /// Create a basic profile with Schema Fidelity only
29    pub fn basic() -> Self {
30        Self {
31            name: "qom-basic".to_string(),
32            metrics: QomMetricThresholds {
33                schema_fidelity: Some(MetricThreshold::required(1.0)),
34                instruction_compliance: None,
35                groundedness: None,
36                determinism_jitter: None,
37                ontology_adherence: None,
38                tool_outcome_correctness: None,
39            },
40            retry_policy: None,
41            description: Some("Basic validation: Schema Fidelity only".to_string()),
42        }
43    }
44
45    /// Create a strict profile with SF and IC
46    pub fn strict_argcheck() -> Self {
47        Self {
48            name: "qom-strict-argcheck".to_string(),
49            metrics: QomMetricThresholds {
50                schema_fidelity: Some(MetricThreshold::required(1.0)),
51                instruction_compliance: Some(MetricThreshold::required(0.97)),
52                groundedness: None,
53                determinism_jitter: None,
54                ontology_adherence: None,
55                tool_outcome_correctness: None,
56            },
57            retry_policy: Some(RetryPolicy {
58                max_retries: 1,
59                degrade_to: Some("qom-basic".to_string()),
60                on_failure: FailureAction::Escalate,
61            }),
62            description: Some("Strict validation: SF=1.0, IC>=0.97".to_string()),
63        }
64    }
65
66    /// Create an outcome-focused profile with SF and TOC
67    pub fn outcome() -> Self {
68        Self {
69            name: "qom-outcome".to_string(),
70            metrics: QomMetricThresholds {
71                schema_fidelity: Some(MetricThreshold::required(1.0)),
72                instruction_compliance: None,
73                groundedness: None,
74                determinism_jitter: None,
75                ontology_adherence: None,
76                tool_outcome_correctness: Some(MetricThreshold::required(0.9)),
77            },
78            retry_policy: Some(RetryPolicy {
79                max_retries: 2,
80                degrade_to: Some("qom-basic".to_string()),
81                on_failure: FailureAction::Error,
82            }),
83            description: Some("Outcome validation: SF=1.0, TOC>=0.9".to_string()),
84        }
85    }
86
87    /// Create a comprehensive profile with all major metrics
88    pub fn comprehensive() -> Self {
89        Self {
90            name: "qom-comprehensive".to_string(),
91            metrics: QomMetricThresholds {
92                schema_fidelity: Some(MetricThreshold::required(1.0)),
93                instruction_compliance: Some(MetricThreshold::required(0.95)),
94                groundedness: Some(MetricThreshold::sampled(0.8, 0.5)),
95                determinism_jitter: Some(MetricThreshold::sampled(0.9, 0.3)),
96                ontology_adherence: Some(MetricThreshold::required(0.95)),
97                tool_outcome_correctness: Some(MetricThreshold::required(0.9)),
98            },
99            retry_policy: Some(RetryPolicy {
100                max_retries: 2,
101                degrade_to: Some("qom-strict-argcheck".to_string()),
102                on_failure: FailureAction::Escalate,
103            }),
104            description: Some("Comprehensive validation: all metrics enforced".to_string()),
105        }
106    }
107
108    /// Evaluate metrics against this profile
109    pub fn evaluate(&self, metrics: &QomMetrics) -> QomEvaluation {
110        let mut passed = true;
111        let mut failures = Vec::new();
112
113        // Schema Fidelity (mandatory)
114        if let Some(threshold) = &self.metrics.schema_fidelity {
115            if metrics.schema_fidelity < threshold.min {
116                passed = false;
117                failures.push(MetricFailure {
118                    metric: "schema_fidelity".to_string(),
119                    actual: metrics.schema_fidelity,
120                    threshold: threshold.min,
121                });
122            }
123        }
124
125        // Instruction Compliance
126        if let Some(threshold) = &self.metrics.instruction_compliance {
127            if let Some(ic) = metrics.instruction_compliance {
128                if ic < threshold.min {
129                    passed = false;
130                    failures.push(MetricFailure {
131                        metric: "instruction_compliance".to_string(),
132                        actual: ic,
133                        threshold: threshold.min,
134                    });
135                }
136            } else if threshold.min > 0.0 {
137                // IC required but not provided
138                passed = false;
139                failures.push(MetricFailure {
140                    metric: "instruction_compliance".to_string(),
141                    actual: 0.0,
142                    threshold: threshold.min,
143                });
144            }
145        }
146
147        // Groundedness (Phase 2+)
148        if let Some(threshold) = &self.metrics.groundedness {
149            if let Some(g) = metrics.groundedness {
150                if g < threshold.min {
151                    passed = false;
152                    failures.push(MetricFailure {
153                        metric: "groundedness".to_string(),
154                        actual: g,
155                        threshold: threshold.min,
156                    });
157                }
158            }
159        }
160
161        // Determinism Jitter (Phase 2+)
162        if let Some(threshold) = &self.metrics.determinism_jitter {
163            if let Some(dj) = metrics.determinism_jitter {
164                if dj < threshold.min {
165                    passed = false;
166                    failures.push(MetricFailure {
167                        metric: "determinism_jitter".to_string(),
168                        actual: dj,
169                        threshold: threshold.min,
170                    });
171                }
172            }
173        }
174
175        // Ontology Adherence
176        if let Some(threshold) = &self.metrics.ontology_adherence {
177            if let Some(oa) = metrics.ontology_adherence {
178                if oa < threshold.min {
179                    passed = false;
180                    failures.push(MetricFailure {
181                        metric: "ontology_adherence".to_string(),
182                        actual: oa,
183                        threshold: threshold.min,
184                    });
185                }
186            }
187        }
188
189        // Tool Outcome Correctness
190        if let Some(threshold) = &self.metrics.tool_outcome_correctness {
191            if let Some(toc) = metrics.tool_outcome_correctness {
192                if toc < threshold.min {
193                    passed = false;
194                    failures.push(MetricFailure {
195                        metric: "tool_outcome_correctness".to_string(),
196                        actual: toc,
197                        threshold: threshold.min,
198                    });
199                }
200            } else if threshold.min > 0.0 {
201                // TOC required but not provided
202                passed = false;
203                failures.push(MetricFailure {
204                    metric: "tool_outcome_correctness".to_string(),
205                    actual: 0.0,
206                    threshold: threshold.min,
207                });
208            }
209        }
210
211        QomEvaluation {
212            meets_profile: passed,
213            profile: self.name.clone(),
214            failures,
215        }
216    }
217}
218
219/// Metric thresholds for a QoM profile
220#[derive(Debug, Clone, Serialize, Deserialize, Default)]
221pub struct QomMetricThresholds {
222    /// Schema Fidelity: payload conforms to SType schema (mandatory, target: 1.0)
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub schema_fidelity: Option<MetricThreshold>,
225
226    /// Instruction Compliance: adherence to assertions/constraints
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub instruction_compliance: Option<MetricThreshold>,
229
230    /// Groundedness: claims supported by citations (Phase 2+)
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub groundedness: Option<MetricThreshold>,
233
234    /// Determinism under Jitter: output stability (Phase 2+)
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub determinism_jitter: Option<MetricThreshold>,
237
238    /// Ontology Adherence: domain constraint conformance
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub ontology_adherence: Option<MetricThreshold>,
241
242    /// Tool Outcome Correctness: side effects match expectations
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub tool_outcome_correctness: Option<MetricThreshold>,
245}
246
247/// Threshold configuration for a single metric
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct MetricThreshold {
250    /// Minimum acceptable value
251    pub min: f64,
252
253    /// Sampling rate (0.0-1.0) for expensive metrics
254    #[serde(default = "default_sample_rate")]
255    pub sample_rate: f64,
256}
257
258fn default_sample_rate() -> f64 {
259    1.0
260}
261
262impl MetricThreshold {
263    /// Create a required threshold (100% sampling)
264    pub fn required(min: f64) -> Self {
265        Self {
266            min,
267            sample_rate: 1.0,
268        }
269    }
270
271    /// Create a sampled threshold
272    pub fn sampled(min: f64, sample_rate: f64) -> Self {
273        Self { min, sample_rate }
274    }
275}
276
277/// Computed QoM metrics for a payload
278#[derive(Debug, Clone, Serialize, Deserialize, Default)]
279pub struct QomMetrics {
280    /// Schema Fidelity score (0.0-1.0)
281    pub schema_fidelity: f64,
282
283    /// Instruction Compliance score (0.0-1.0)
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub instruction_compliance: Option<f64>,
286
287    /// Groundedness score (0.0-1.0)
288    #[serde(skip_serializing_if = "Option::is_none")]
289    pub groundedness: Option<f64>,
290
291    /// Determinism Jitter score (0.0-1.0)
292    #[serde(skip_serializing_if = "Option::is_none")]
293    pub determinism_jitter: Option<f64>,
294
295    /// Ontology Adherence score (0.0-1.0)
296    #[serde(skip_serializing_if = "Option::is_none")]
297    pub ontology_adherence: Option<f64>,
298
299    /// Tool Outcome Correctness (pass/fail as 1.0/0.0)
300    #[serde(skip_serializing_if = "Option::is_none")]
301    pub tool_outcome_correctness: Option<f64>,
302}
303
304impl QomMetrics {
305    /// Create metrics with perfect schema fidelity
306    pub fn schema_valid() -> Self {
307        Self {
308            schema_fidelity: 1.0,
309            ..Default::default()
310        }
311    }
312
313    /// Create metrics indicating schema failure
314    pub fn schema_invalid() -> Self {
315        Self {
316            schema_fidelity: 0.0,
317            ..Default::default()
318        }
319    }
320
321    /// Set instruction compliance score
322    pub fn with_instruction_compliance(mut self, score: f64) -> Self {
323        self.instruction_compliance = Some(score);
324        self
325    }
326
327    /// Set tool outcome correctness score
328    pub fn with_tool_outcome_correctness(mut self, score: f64) -> Self {
329        self.tool_outcome_correctness = Some(score);
330        self
331    }
332
333    /// Set groundedness score
334    pub fn with_groundedness(mut self, score: f64) -> Self {
335        self.groundedness = Some(score);
336        self
337    }
338
339    /// Set ontology adherence score
340    pub fn with_ontology_adherence(mut self, score: f64) -> Self {
341        self.ontology_adherence = Some(score);
342        self
343    }
344
345    /// Set determinism jitter score
346    pub fn with_determinism_jitter(mut self, score: f64) -> Self {
347        self.determinism_jitter = Some(score);
348        self
349    }
350
351    /// Convert to a HashMap for reporting
352    pub fn to_map(&self) -> HashMap<String, f64> {
353        let mut map = HashMap::new();
354        map.insert("schema_fidelity".to_string(), self.schema_fidelity);
355        if let Some(ic) = self.instruction_compliance {
356            map.insert("instruction_compliance".to_string(), ic);
357        }
358        if let Some(g) = self.groundedness {
359            map.insert("groundedness".to_string(), g);
360        }
361        if let Some(dj) = self.determinism_jitter {
362            map.insert("determinism_jitter".to_string(), dj);
363        }
364        if let Some(oa) = self.ontology_adherence {
365            map.insert("ontology_adherence".to_string(), oa);
366        }
367        if let Some(toc) = self.tool_outcome_correctness {
368            map.insert("tool_outcome_correctness".to_string(), toc);
369        }
370        map
371    }
372}
373
374/// QoM evaluation result
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct QomEvaluation {
377    /// Whether the payload meets the profile
378    pub meets_profile: bool,
379    /// Profile name evaluated against
380    pub profile: String,
381    /// Failed metrics
382    pub failures: Vec<MetricFailure>,
383}
384
385/// Individual metric failure
386#[derive(Debug, Clone, Serialize, Deserialize)]
387pub struct MetricFailure {
388    pub metric: String,
389    pub actual: f64,
390    pub threshold: f64,
391}
392
393/// QoM Report attached to responses
394#[derive(Debug, Clone, Serialize, Deserialize)]
395pub struct QomReport {
396    /// Whether payload meets negotiated profile
397    pub meets_profile: bool,
398
399    /// Profile evaluated against
400    pub profile: String,
401
402    /// Computed metric scores
403    pub metrics: QomMetrics,
404
405    /// Evaluation details
406    #[serde(skip_serializing_if = "Option::is_none")]
407    pub evaluation: Option<QomEvaluation>,
408
409    /// References to detailed artifacts
410    #[serde(default, skip_serializing_if = "Vec::is_empty")]
411    pub artifact_refs: Vec<String>,
412
413    /// Hints for remediation if failed
414    #[serde(default, skip_serializing_if = "Vec::is_empty")]
415    pub hints: Vec<String>,
416}
417
418impl QomReport {
419    /// Create a passing report
420    pub fn pass(profile: impl Into<String>, metrics: QomMetrics) -> Self {
421        Self {
422            meets_profile: true,
423            profile: profile.into(),
424            metrics,
425            evaluation: None,
426            artifact_refs: Vec::new(),
427            hints: Vec::new(),
428        }
429    }
430
431    /// Create a failing report
432    pub fn fail(profile: impl Into<String>, metrics: QomMetrics, evaluation: QomEvaluation) -> Self {
433        let hints = evaluation
434            .failures
435            .iter()
436            .map(|f| {
437                format!(
438                    "{}: got {:.2}, expected >= {:.2}",
439                    f.metric, f.actual, f.threshold
440                )
441            })
442            .collect();
443
444        Self {
445            meets_profile: false,
446            profile: profile.into(),
447            metrics,
448            evaluation: Some(evaluation),
449            artifact_refs: Vec::new(),
450            hints,
451        }
452    }
453}
454
455/// Retry policy for QoM failures
456#[derive(Debug, Clone, Serialize, Deserialize)]
457pub struct RetryPolicy {
458    /// Maximum retry attempts
459    pub max_retries: u32,
460
461    /// Profile to degrade to on failure
462    #[serde(skip_serializing_if = "Option::is_none")]
463    pub degrade_to: Option<String>,
464
465    /// Action on final failure
466    pub on_failure: FailureAction,
467}
468
469/// Action to take on final QoM failure
470#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
471#[serde(rename_all = "snake_case")]
472pub enum FailureAction {
473    /// Escalate to human/supervisor
474    Escalate,
475    /// Return error to caller
476    Error,
477    /// Log and continue (best effort)
478    Warn,
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn test_basic_profile() {
487        let profile = QomProfile::basic();
488        let metrics = QomMetrics::schema_valid();
489        let eval = profile.evaluate(&metrics);
490        assert!(eval.meets_profile);
491    }
492
493    #[test]
494    fn test_strict_profile_pass() {
495        let profile = QomProfile::strict_argcheck();
496        let metrics = QomMetrics::schema_valid().with_instruction_compliance(0.98);
497        let eval = profile.evaluate(&metrics);
498        assert!(eval.meets_profile);
499    }
500
501    #[test]
502    fn test_strict_profile_fail() {
503        let profile = QomProfile::strict_argcheck();
504        let metrics = QomMetrics::schema_valid().with_instruction_compliance(0.90);
505        let eval = profile.evaluate(&metrics);
506        assert!(!eval.meets_profile);
507        assert_eq!(eval.failures.len(), 1);
508        assert_eq!(eval.failures[0].metric, "instruction_compliance");
509    }
510
511    #[test]
512    fn test_schema_failure() {
513        let profile = QomProfile::basic();
514        let metrics = QomMetrics::schema_invalid();
515        let eval = profile.evaluate(&metrics);
516        assert!(!eval.meets_profile);
517    }
518
519    #[test]
520    fn test_outcome_profile_pass() {
521        let profile = QomProfile::outcome();
522        let metrics = QomMetrics::schema_valid().with_tool_outcome_correctness(0.95);
523        let eval = profile.evaluate(&metrics);
524        assert!(eval.meets_profile);
525    }
526
527    #[test]
528    fn test_outcome_profile_fail() {
529        let profile = QomProfile::outcome();
530        let metrics = QomMetrics::schema_valid().with_tool_outcome_correctness(0.8);
531        let eval = profile.evaluate(&metrics);
532        assert!(!eval.meets_profile);
533        assert_eq!(eval.failures.len(), 1);
534        assert_eq!(eval.failures[0].metric, "tool_outcome_correctness");
535    }
536
537    #[test]
538    fn test_comprehensive_profile() {
539        let profile = QomProfile::comprehensive();
540        let metrics = QomMetrics::schema_valid()
541            .with_instruction_compliance(0.96)
542            .with_groundedness(0.85)
543            .with_ontology_adherence(0.98)
544            .with_tool_outcome_correctness(0.92);
545        let eval = profile.evaluate(&metrics);
546        assert!(eval.meets_profile);
547    }
548}