1use crate::assertions::{AssertionSet, AssertionSetResult, EvaluationContext};
12use crate::qom::QomMetrics;
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15use thiserror::Error;
16
17#[derive(Debug, Error)]
19pub enum MetricError {
20 #[error("Schema validation error: {0}")]
21 SchemaError(String),
22
23 #[error("Assertion evaluation error: {0}")]
24 AssertionError(String),
25
26 #[error("TOC verification error: {0}")]
27 TocError(String),
28
29 #[error("Groundedness computation error: {0}")]
30 GroundednessError(String),
31
32 #[error("Metric not supported: {0}")]
33 NotSupported(String),
34}
35
36#[derive(Debug, Clone, Default, Serialize, Deserialize)]
40pub struct MetricContext {
41 pub stype: String,
43
44 pub payload: serde_json::Value,
46
47 #[serde(skip_serializing_if = "Option::is_none")]
49 pub response: Option<serde_json::Value>,
50
51 #[serde(skip_serializing_if = "Option::is_none")]
53 pub tool_name: Option<String>,
54
55 #[serde(skip_serializing_if = "Option::is_none")]
57 pub arguments: Option<serde_json::Value>,
58
59 #[serde(skip_serializing_if = "Option::is_none")]
61 pub assertions: Option<AssertionSet>,
62
63 #[serde(skip_serializing_if = "Option::is_none")]
65 pub toc_result: Option<TocResult>,
66
67 #[serde(default, skip_serializing_if = "Vec::is_empty")]
69 pub sources: Vec<Source>,
70
71 #[serde(skip_serializing_if = "Option::is_none")]
73 pub previous_response: Option<serde_json::Value>,
74
75 #[serde(skip_serializing_if = "Option::is_none")]
77 pub ontology_constraints: Option<OntologyConstraints>,
78
79 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
81 pub metadata: HashMap<String, serde_json::Value>,
82}
83
84impl MetricContext {
85 pub fn new(stype: impl Into<String>, payload: serde_json::Value) -> Self {
87 Self {
88 stype: stype.into(),
89 payload,
90 ..Default::default()
91 }
92 }
93
94 pub fn with_response(mut self, response: serde_json::Value) -> Self {
96 self.response = Some(response);
97 self
98 }
99
100 pub fn with_assertions(mut self, assertions: AssertionSet) -> Self {
102 self.assertions = Some(assertions);
103 self
104 }
105
106 pub fn with_toc_result(mut self, result: TocResult) -> Self {
108 self.toc_result = Some(result);
109 self
110 }
111
112 pub fn with_sources(mut self, sources: Vec<Source>) -> Self {
114 self.sources = sources;
115 self
116 }
117
118 pub fn with_previous_response(mut self, previous: serde_json::Value) -> Self {
120 self.previous_response = Some(previous);
121 self
122 }
123
124 pub fn with_ontology(mut self, constraints: OntologyConstraints) -> Self {
126 self.ontology_constraints = Some(constraints);
127 self
128 }
129
130 pub fn to_evaluation_context(&self) -> EvaluationContext {
132 EvaluationContext {
133 stype: Some(self.stype.clone()),
134 tool_name: self.tool_name.clone(),
135 arguments: self.arguments.clone(),
136 response: self.response.clone(),
137 metadata: self.metadata.clone(),
138 }
139 }
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct TocResult {
145 pub verified: bool,
147
148 pub method: TocMethod,
150
151 #[serde(skip_serializing_if = "Option::is_none")]
153 pub details: Option<String>,
154
155 #[serde(skip_serializing_if = "Option::is_none")]
157 pub expected: Option<String>,
158
159 #[serde(skip_serializing_if = "Option::is_none")]
161 pub actual: Option<String>,
162}
163
164impl TocResult {
165 pub fn verified(method: TocMethod) -> Self {
167 Self {
168 verified: true,
169 method,
170 details: None,
171 expected: None,
172 actual: None,
173 }
174 }
175
176 pub fn failed(method: TocMethod, details: impl Into<String>) -> Self {
178 Self {
179 verified: false,
180 method,
181 details: Some(details.into()),
182 expected: None,
183 actual: None,
184 }
185 }
186
187 pub fn to_score(&self) -> f64 {
189 if self.verified { 1.0 } else { 0.0 }
190 }
191}
192
193#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
195#[serde(rename_all = "snake_case")]
196pub enum TocMethod {
197 Header,
199 Callback,
201 Poll,
203 None,
205}
206
207#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct Source {
210 pub id: String,
212
213 #[serde(skip_serializing_if = "Option::is_none")]
215 pub content: Option<String>,
216
217 #[serde(default = "default_confidence")]
219 pub confidence: f64,
220}
221
222fn default_confidence() -> f64 {
223 1.0
224}
225
226#[derive(Debug, Clone, Default, Serialize, Deserialize)]
228pub struct OntologyConstraints {
229 #[serde(default)]
231 pub allowed_values: HashMap<String, Vec<serde_json::Value>>,
232
233 #[serde(default)]
235 pub relationships: Vec<OntologyRelation>,
236
237 #[serde(default)]
239 pub type_constraints: HashMap<String, String>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct OntologyRelation {
245 pub from: String,
247 pub to: String,
249 pub relation: RelationType,
251}
252
253#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
255#[serde(rename_all = "snake_case")]
256pub enum RelationType {
257 Implies,
259 Excludes,
261 LessThan,
263 Equals,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct MetricComputeResult {
270 pub metrics: QomMetrics,
272
273 #[serde(skip_serializing_if = "Option::is_none")]
275 pub assertion_results: Option<AssertionSetResult>,
276
277 #[serde(skip_serializing_if = "Option::is_none")]
279 pub toc_details: Option<TocResult>,
280
281 #[serde(skip_serializing_if = "Option::is_none")]
283 pub groundedness_details: Option<GroundednessResult>,
284
285 #[serde(skip_serializing_if = "Option::is_none")]
287 pub determinism_details: Option<DeterminismResult>,
288
289 #[serde(skip_serializing_if = "Option::is_none")]
291 pub ontology_details: Option<OntologyResult>,
292
293 #[serde(default, skip_serializing_if = "Vec::is_empty")]
295 pub errors: Vec<String>,
296}
297
298#[derive(Debug, Clone, Serialize, Deserialize)]
300pub struct GroundednessResult {
301 pub score: f64,
303
304 pub claims: Vec<Claim>,
306
307 pub method: GroundednessMethod,
309}
310
311#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct Claim {
314 pub text: String,
316
317 pub grounded: bool,
319
320 #[serde(skip_serializing_if = "Option::is_none")]
322 pub source: Option<String>,
323
324 pub confidence: f64,
326}
327
328#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
330#[serde(rename_all = "snake_case")]
331pub enum GroundednessMethod {
332 Local,
334 Llm,
336 Hybrid,
338 None,
340}
341
342#[derive(Debug, Clone, Serialize, Deserialize)]
344pub struct DeterminismResult {
345 pub similarity: f64,
347
348 #[serde(default)]
350 pub differences: Vec<FieldDiff>,
351
352 pub acceptable: bool,
354}
355
356#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct FieldDiff {
359 pub path: String,
361 pub value1: serde_json::Value,
363 pub value2: serde_json::Value,
365}
366
367#[derive(Debug, Clone, Serialize, Deserialize)]
369pub struct OntologyResult {
370 pub score: f64,
372
373 #[serde(default)]
375 pub violations: Vec<OntologyViolation>,
376}
377
378#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct OntologyViolation {
381 pub kind: String,
383 pub message: String,
385 pub path: String,
387}
388
389pub struct QomComputer {
391 compute_ic: bool,
393 compute_toc: bool,
395 compute_groundedness: bool,
397 compute_determinism: bool,
399 compute_ontology: bool,
401}
402
403impl Default for QomComputer {
404 fn default() -> Self {
405 Self::new()
406 }
407}
408
409impl QomComputer {
410 pub fn new() -> Self {
412 Self {
413 compute_ic: true,
414 compute_toc: true,
415 compute_groundedness: false, compute_determinism: false, compute_ontology: false, }
419 }
420
421 pub fn with_ic(mut self, enabled: bool) -> Self {
423 self.compute_ic = enabled;
424 self
425 }
426
427 pub fn with_toc(mut self, enabled: bool) -> Self {
429 self.compute_toc = enabled;
430 self
431 }
432
433 pub fn with_groundedness(mut self, enabled: bool) -> Self {
435 self.compute_groundedness = enabled;
436 self
437 }
438
439 pub fn with_determinism(mut self, enabled: bool) -> Self {
441 self.compute_determinism = enabled;
442 self
443 }
444
445 pub fn with_ontology(mut self, enabled: bool) -> Self {
447 self.compute_ontology = enabled;
448 self
449 }
450
451 pub fn compute(&self, ctx: &MetricContext) -> MetricComputeResult {
453 let mut metrics = QomMetrics::default();
454 let mut errors = Vec::new();
455 let mut assertion_results = None;
456 let mut toc_details = None;
457 let mut groundedness_details = None;
458 let mut determinism_details = None;
459 let mut ontology_details = None;
460
461 metrics.schema_fidelity = 1.0;
464
465 if self.compute_ic {
467 if let Some(assertions) = &ctx.assertions {
468 match self.compute_ic_metric(ctx, assertions) {
469 Ok((score, results)) => {
470 metrics.instruction_compliance = Some(score);
471 assertion_results = Some(results);
472 }
473 Err(e) => {
474 errors.push(format!("IC computation failed: {}", e));
475 }
476 }
477 }
478 }
479
480 if self.compute_toc {
482 if let Some(toc) = &ctx.toc_result {
483 metrics.tool_outcome_correctness = Some(toc.to_score());
484 toc_details = Some(toc.clone());
485 }
486 }
487
488 if self.compute_groundedness && !ctx.sources.is_empty() {
490 match self.compute_groundedness_metric(ctx) {
491 Ok(result) => {
492 metrics.groundedness = Some(result.score);
493 groundedness_details = Some(result);
494 }
495 Err(e) => {
496 errors.push(format!("Groundedness computation failed: {}", e));
497 }
498 }
499 }
500
501 if self.compute_determinism {
503 if let Some(previous) = &ctx.previous_response {
504 if let Some(current) = &ctx.response {
505 match self.compute_determinism_metric(current, previous) {
506 Ok(result) => {
507 metrics.determinism_jitter = Some(result.similarity);
508 determinism_details = Some(result);
509 }
510 Err(e) => {
511 errors.push(format!("Determinism computation failed: {}", e));
512 }
513 }
514 }
515 }
516 }
517
518 if self.compute_ontology {
520 if let Some(constraints) = &ctx.ontology_constraints {
521 match self.compute_ontology_metric(ctx, constraints) {
522 Ok(result) => {
523 metrics.ontology_adherence = Some(result.score);
524 ontology_details = Some(result);
525 }
526 Err(e) => {
527 errors.push(format!("Ontology computation failed: {}", e));
528 }
529 }
530 }
531 }
532
533 MetricComputeResult {
534 metrics,
535 assertion_results,
536 toc_details,
537 groundedness_details,
538 determinism_details,
539 ontology_details,
540 errors,
541 }
542 }
543
544 fn compute_ic_metric(
546 &self,
547 ctx: &MetricContext,
548 assertions: &AssertionSet,
549 ) -> Result<(f64, AssertionSetResult), MetricError> {
550 let eval_ctx = ctx.to_evaluation_context();
551 let result = assertions
552 .evaluate_with_context(&ctx.payload, &eval_ctx)
553 .map_err(|e| MetricError::AssertionError(e.to_string()))?;
554
555 Ok((result.ic_score, result))
556 }
557
558 fn compute_groundedness_metric(
560 &self,
561 ctx: &MetricContext,
562 ) -> Result<GroundednessResult, MetricError> {
563 let response_text = ctx
567 .response
568 .as_ref()
569 .map(|r| r.to_string())
570 .unwrap_or_default();
571
572 let mut claims = Vec::new();
573 let mut grounded_count = 0;
574
575 for source in &ctx.sources {
577 if let Some(content) = &source.content {
578 let claim = Claim {
579 text: content.chars().take(100).collect(),
580 grounded: response_text.contains(content) || content.contains(&response_text),
581 source: Some(source.id.clone()),
582 confidence: source.confidence,
583 };
584 if claim.grounded {
585 grounded_count += 1;
586 }
587 claims.push(claim);
588 }
589 }
590
591 let score = if claims.is_empty() {
592 1.0 } else {
594 grounded_count as f64 / claims.len() as f64
595 };
596
597 Ok(GroundednessResult {
598 score,
599 claims,
600 method: GroundednessMethod::Local,
601 })
602 }
603
604 fn compute_determinism_metric(
606 &self,
607 current: &serde_json::Value,
608 previous: &serde_json::Value,
609 ) -> Result<DeterminismResult, MetricError> {
610 let mut differences = Vec::new();
611
612 fn compare_values(
614 v1: &serde_json::Value,
615 v2: &serde_json::Value,
616 path: &str,
617 diffs: &mut Vec<FieldDiff>,
618 ) -> bool {
619 match (v1, v2) {
620 (serde_json::Value::Object(o1), serde_json::Value::Object(o2)) => {
621 let mut all_match = true;
622 for (k, val1) in o1 {
623 let new_path = if path.is_empty() {
624 k.clone()
625 } else {
626 format!("{}.{}", path, k)
627 };
628 if let Some(val2) = o2.get(k) {
629 if !compare_values(val1, val2, &new_path, diffs) {
630 all_match = false;
631 }
632 } else {
633 diffs.push(FieldDiff {
634 path: new_path,
635 value1: val1.clone(),
636 value2: serde_json::Value::Null,
637 });
638 all_match = false;
639 }
640 }
641 for k in o2.keys() {
643 if !o1.contains_key(k) {
644 let new_path = if path.is_empty() {
645 k.clone()
646 } else {
647 format!("{}.{}", path, k)
648 };
649 diffs.push(FieldDiff {
650 path: new_path,
651 value1: serde_json::Value::Null,
652 value2: o2.get(k).cloned().unwrap_or(serde_json::Value::Null),
653 });
654 all_match = false;
655 }
656 }
657 all_match
658 }
659 (serde_json::Value::Array(a1), serde_json::Value::Array(a2)) => {
660 if a1.len() != a2.len() {
661 diffs.push(FieldDiff {
662 path: path.to_string(),
663 value1: v1.clone(),
664 value2: v2.clone(),
665 });
666 return false;
667 }
668 let mut all_match = true;
669 for (i, (item1, item2)) in a1.iter().zip(a2.iter()).enumerate() {
670 let new_path = format!("{}[{}]", path, i);
671 if !compare_values(item1, item2, &new_path, diffs) {
672 all_match = false;
673 }
674 }
675 all_match
676 }
677 _ => {
678 if v1 != v2 {
679 diffs.push(FieldDiff {
680 path: path.to_string(),
681 value1: v1.clone(),
682 value2: v2.clone(),
683 });
684 false
685 } else {
686 true
687 }
688 }
689 }
690 }
691
692 let matches = compare_values(current, previous, "", &mut differences);
693
694 let similarity = if matches {
696 1.0
697 } else {
698 let total_fields = count_fields(current) + count_fields(previous);
700 if total_fields == 0 {
701 1.0
702 } else {
703 1.0 - (differences.len() as f64 * 2.0 / total_fields as f64).min(1.0)
704 }
705 };
706
707 Ok(DeterminismResult {
708 similarity,
709 differences,
710 acceptable: similarity >= 0.9, })
712 }
713
714 fn compute_ontology_metric(
716 &self,
717 ctx: &MetricContext,
718 constraints: &OntologyConstraints,
719 ) -> Result<OntologyResult, MetricError> {
720 let mut violations = Vec::new();
721
722 for (path, allowed) in &constraints.allowed_values {
724 if let Some(value) = get_json_path(&ctx.payload, path) {
725 if !allowed.contains(value) {
726 violations.push(OntologyViolation {
727 kind: "allowed_values".to_string(),
728 message: format!(
729 "Value at '{}' is not in allowed set: {:?}",
730 path, allowed
731 ),
732 path: path.clone(),
733 });
734 }
735 }
736 }
737
738 for relation in &constraints.relationships {
740 let from_value = get_json_path(&ctx.payload, &relation.from);
741 let to_value = get_json_path(&ctx.payload, &relation.to);
742
743 match relation.relation {
744 RelationType::Implies => {
745 if from_value.is_some() && to_value.is_none() {
746 violations.push(OntologyViolation {
747 kind: "implies".to_string(),
748 message: format!(
749 "'{}' implies '{}' must exist",
750 relation.from, relation.to
751 ),
752 path: relation.to.clone(),
753 });
754 }
755 }
756 RelationType::Excludes => {
757 if from_value.is_some() && to_value.is_some() {
758 violations.push(OntologyViolation {
759 kind: "excludes".to_string(),
760 message: format!(
761 "'{}' and '{}' are mutually exclusive",
762 relation.from, relation.to
763 ),
764 path: relation.from.clone(),
765 });
766 }
767 }
768 RelationType::LessThan => {
769 if let (Some(v1), Some(v2)) = (from_value, to_value) {
770 if let (Some(n1), Some(n2)) = (v1.as_f64(), v2.as_f64()) {
771 if n1 >= n2 {
772 violations.push(OntologyViolation {
773 kind: "less_than".to_string(),
774 message: format!(
775 "'{}' must be less than '{}'",
776 relation.from, relation.to
777 ),
778 path: relation.from.clone(),
779 });
780 }
781 }
782 }
783 }
784 RelationType::Equals => {
785 if from_value != to_value {
786 violations.push(OntologyViolation {
787 kind: "equals".to_string(),
788 message: format!("'{}' must equal '{}'", relation.from, relation.to),
789 path: relation.from.clone(),
790 });
791 }
792 }
793 }
794 }
795
796 let total_constraints =
798 constraints.allowed_values.len() + constraints.relationships.len();
799 let score = if total_constraints == 0 {
800 1.0
801 } else {
802 1.0 - (violations.len() as f64 / total_constraints as f64)
803 };
804
805 Ok(OntologyResult { score, violations })
806 }
807}
808
809fn count_fields(value: &serde_json::Value) -> usize {
811 match value {
812 serde_json::Value::Object(obj) => {
813 obj.len() + obj.values().map(count_fields).sum::<usize>()
814 }
815 serde_json::Value::Array(arr) => arr.iter().map(count_fields).sum(),
816 _ => 1,
817 }
818}
819
820fn get_json_path<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
822 let parts: Vec<&str> = path.split('.').collect();
823 let mut current = value;
824
825 for part in parts {
826 match current {
827 serde_json::Value::Object(obj) => {
828 current = obj.get(part)?;
829 }
830 serde_json::Value::Array(arr) => {
831 let index: usize = part.parse().ok()?;
832 current = arr.get(index)?;
833 }
834 _ => return None,
835 }
836 }
837
838 Some(current)
839}
840
841#[cfg(test)]
842mod tests {
843 use super::*;
844 use crate::assertions::Assertion;
845 use serde_json::json;
846
847 #[test]
848 fn test_basic_compute() {
849 let computer = QomComputer::new();
850 let ctx = MetricContext::new("test.Type.v1", json!({"value": 42}));
851
852 let result = computer.compute(&ctx);
853 assert_eq!(result.metrics.schema_fidelity, 1.0);
854 assert!(result.errors.is_empty());
855 }
856
857 #[test]
858 fn test_ic_computation() {
859 let assertions = AssertionSet::new(vec![
860 Assertion::new("check1", "payload.value > 0", "Value must be positive"),
861 Assertion::new("check2", "payload.value < 100", "Value must be less than 100"),
862 ]);
863
864 let ctx = MetricContext::new("test.Type.v1", json!({"value": 42}))
865 .with_assertions(assertions);
866
867 let computer = QomComputer::new().with_ic(true);
868 let result = computer.compute(&ctx);
869
870 assert_eq!(result.metrics.instruction_compliance, Some(1.0));
871 assert!(result.assertion_results.is_some());
872 }
873
874 #[test]
875 fn test_ic_partial_failure() {
876 let assertions = AssertionSet::new(vec![
877 Assertion::new("check1", "payload.value > 0", "Value must be positive"),
878 Assertion::new("check2", "payload.value > 100", "Value must be greater than 100"),
879 ]);
880
881 let ctx = MetricContext::new("test.Type.v1", json!({"value": 42}))
882 .with_assertions(assertions);
883
884 let computer = QomComputer::new().with_ic(true);
885 let result = computer.compute(&ctx);
886
887 assert_eq!(result.metrics.instruction_compliance, Some(0.5));
888 }
889
890 #[test]
891 fn test_toc_verified() {
892 let toc = TocResult::verified(TocMethod::Header);
893 let ctx = MetricContext::new("test.Type.v1", json!({})).with_toc_result(toc);
894
895 let computer = QomComputer::new().with_toc(true);
896 let result = computer.compute(&ctx);
897
898 assert_eq!(result.metrics.tool_outcome_correctness, Some(1.0));
899 }
900
901 #[test]
902 fn test_toc_failed() {
903 let toc = TocResult::failed(TocMethod::Callback, "Side effect not observed");
904 let ctx = MetricContext::new("test.Type.v1", json!({})).with_toc_result(toc);
905
906 let computer = QomComputer::new().with_toc(true);
907 let result = computer.compute(&ctx);
908
909 assert_eq!(result.metrics.tool_outcome_correctness, Some(0.0));
910 }
911
912 #[test]
913 fn test_determinism_identical() {
914 let response = json!({"result": "hello", "count": 5});
915 let previous = json!({"result": "hello", "count": 5});
916
917 let ctx = MetricContext::new("test.Type.v1", json!({}))
918 .with_response(response)
919 .with_previous_response(previous);
920
921 let computer = QomComputer::new().with_determinism(true);
922 let result = computer.compute(&ctx);
923
924 assert_eq!(result.metrics.determinism_jitter, Some(1.0));
925 }
926
927 #[test]
928 fn test_determinism_different() {
929 let response = json!({"result": "hello", "count": 5});
930 let previous = json!({"result": "world", "count": 10});
931
932 let ctx = MetricContext::new("test.Type.v1", json!({}))
933 .with_response(response)
934 .with_previous_response(previous);
935
936 let computer = QomComputer::new().with_determinism(true);
937 let result = computer.compute(&ctx);
938
939 assert!(result.metrics.determinism_jitter.unwrap() < 1.0);
941 assert!(result.determinism_details.is_some());
942 assert!(!result.determinism_details.unwrap().differences.is_empty());
943 }
944
945 #[test]
946 fn test_ontology_allowed_values() {
947 let mut constraints = OntologyConstraints::default();
948 constraints.allowed_values.insert(
949 "status".to_string(),
950 vec![json!("active"), json!("inactive")],
951 );
952
953 let ctx = MetricContext::new("test.Type.v1", json!({"status": "active"}))
954 .with_ontology(constraints);
955
956 let computer = QomComputer::new().with_ontology(true);
957 let result = computer.compute(&ctx);
958
959 assert_eq!(result.metrics.ontology_adherence, Some(1.0));
960 }
961
962 #[test]
963 fn test_ontology_violation() {
964 let mut constraints = OntologyConstraints::default();
965 constraints.allowed_values.insert(
966 "status".to_string(),
967 vec![json!("active"), json!("inactive")],
968 );
969
970 let ctx = MetricContext::new("test.Type.v1", json!({"status": "unknown"}))
971 .with_ontology(constraints);
972
973 let computer = QomComputer::new().with_ontology(true);
974 let result = computer.compute(&ctx);
975
976 assert_eq!(result.metrics.ontology_adherence, Some(0.0));
977 assert!(result.ontology_details.is_some());
978 assert!(!result.ontology_details.unwrap().violations.is_empty());
979 }
980}