ainl_semantic_tagger/
tag.rs1use std::hash::{Hash, Hasher};
4
5#[derive(Debug, Clone)]
6#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
7pub struct SemanticTag {
8 pub namespace: TagNamespace,
9 pub value: String,
10 pub confidence: f32,
11}
12
13#[inline]
16pub fn quantize_confidence(c: f32) -> u8 {
17 if !c.is_finite() {
18 return 0;
19 }
20 (c.clamp(0.0, 1.0) * 100.0).round() as u8
21}
22
23impl PartialEq for SemanticTag {
24 fn eq(&self, other: &Self) -> bool {
25 self.namespace == other.namespace
26 && self.value == other.value
27 && quantize_confidence(self.confidence) == quantize_confidence(other.confidence)
28 }
29}
30
31impl Eq for SemanticTag {}
32
33impl Hash for SemanticTag {
34 fn hash<H: Hasher>(&self, state: &mut H) {
35 self.namespace.hash(state);
36 self.value.hash(state);
37 quantize_confidence(self.confidence).hash(state);
38 }
39}
40
41impl SemanticTag {
42 pub fn to_canonical_string(&self) -> String {
43 format!("{}:{}", self.namespace.prefix(), self.value)
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub enum TagNamespace {
50 Topic,
51 Preference,
52 Correction,
53 Behavior,
54 Task,
55 Tone,
56 Tool,
57 Domain,
58}
59
60impl TagNamespace {
61 pub fn prefix(&self) -> &'static str {
62 match self {
63 Self::Topic => "topic",
64 Self::Preference => "preference",
65 Self::Correction => "correction",
66 Self::Behavior => "behavior",
67 Self::Task => "task",
68 Self::Tone => "tone",
69 Self::Tool => "tool",
70 Self::Domain => "domain",
71 }
72 }
73}
74
75pub const PREFERENCE_BREVITY: &str = "preference:brevity";
76pub const PREFERENCE_DETAIL: &str = "preference:detail";
77pub const PREFERENCE_EXAMPLES: &str = "preference:examples";
78pub const PREFERENCE_DIRECTNESS: &str = "preference:directness";
79pub const TONE_FORMAL: &str = "tone:formal";
80pub const TONE_INFORMAL: &str = "tone:informal";
81pub const CORRECTION_AVOID_BULLETS: &str = "correction:avoid_bullets";
82pub const CORRECTION_AVOID_EMOJIS: &str = "correction:avoid_emojis";
83pub const BEHAVIOR_ADDING_CAVEATS: &str = "behavior:adding_caveats";
84pub const BEHAVIOR_OVEREXPLAINING: &str = "behavior:overexplaining";
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89 use std::collections::hash_map::DefaultHasher;
90 use std::hash::Hasher;
91
92 fn hash_one(tag: &SemanticTag) -> u64 {
93 let mut h = DefaultHasher::new();
94 tag.hash(&mut h);
95 h.finish()
96 }
97
98 #[test]
99 fn eq_and_hash_quantize_confidence() {
100 let a = SemanticTag {
101 namespace: TagNamespace::Topic,
102 value: "rust".into(),
103 confidence: 0.849,
104 };
105 let b = SemanticTag {
106 namespace: TagNamespace::Topic,
107 value: "rust".into(),
108 confidence: 0.851,
109 };
110 assert_eq!(a, b);
111 assert_eq!(hash_one(&a), hash_one(&b));
112 }
113
114 #[test]
115 fn nan_confidence_hashes_like_zero_quantized() {
116 let a = SemanticTag {
117 namespace: TagNamespace::Tool,
118 value: "x".into(),
119 confidence: f32::NAN,
120 };
121 let b = SemanticTag {
122 namespace: TagNamespace::Tool,
123 value: "x".into(),
124 confidence: 0.0,
125 };
126 assert_eq!(a, b);
127 assert_eq!(hash_one(&a), hash_one(&b));
128 }
129
130 #[test]
131 fn non_finite_confidence_quantizes_to_zero() {
132 assert_eq!(quantize_confidence(f32::NAN), 0);
133 assert_eq!(quantize_confidence(f32::INFINITY), 0);
134 assert_eq!(quantize_confidence(f32::NEG_INFINITY), 0);
135 }
136
137 #[test]
138 fn distinct_quantized_confidence_not_equal() {
139 let low = SemanticTag {
140 namespace: TagNamespace::Topic,
141 value: "t".into(),
142 confidence: 0.004,
143 };
144 let high = SemanticTag {
145 namespace: TagNamespace::Topic,
146 value: "t".into(),
147 confidence: 0.006,
148 };
149 assert_eq!(quantize_confidence(0.004), 0);
150 assert_eq!(quantize_confidence(0.006), 1);
151 assert_ne!(low, high);
152 assert_ne!(hash_one(&low), hash_one(&high));
153 }
154}