Skip to main content

ainl_semantic_tagger/
tag.rs

1//! Core semantic tag types and canonical string constants.
2
3use std::hash::{Hash, Hasher};
4
5#[derive(Debug, Clone)]
6#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
7pub struct SemanticTag {
8    pub namespace: TagNamespace,
9    pub value: String,
10    pub confidence: f32,
11}
12
13/// Quantize `confidence` to \[0, 100\] for equality and hashing (~two decimal places of resolution).
14/// Non-finite values (`NaN`, ±∞) map to **0** so they cannot poison [`Hash`] implementations.
15#[inline]
16pub fn quantize_confidence(c: f32) -> u8 {
17    if !c.is_finite() {
18        return 0;
19    }
20    (c.clamp(0.0, 1.0) * 100.0).round() as u8
21}
22
23impl PartialEq for SemanticTag {
24    fn eq(&self, other: &Self) -> bool {
25        self.namespace == other.namespace
26            && self.value == other.value
27            && quantize_confidence(self.confidence) == quantize_confidence(other.confidence)
28    }
29}
30
31impl Eq for SemanticTag {}
32
33impl Hash for SemanticTag {
34    fn hash<H: Hasher>(&self, state: &mut H) {
35        self.namespace.hash(state);
36        self.value.hash(state);
37        quantize_confidence(self.confidence).hash(state);
38    }
39}
40
41impl SemanticTag {
42    pub fn to_canonical_string(&self) -> String {
43        format!("{}:{}", self.namespace.prefix(), self.value)
44    }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub enum TagNamespace {
50    Topic,
51    Preference,
52    Correction,
53    Behavior,
54    Task,
55    Tone,
56    Tool,
57    Domain,
58}
59
60impl TagNamespace {
61    pub fn prefix(&self) -> &'static str {
62        match self {
63            Self::Topic => "topic",
64            Self::Preference => "preference",
65            Self::Correction => "correction",
66            Self::Behavior => "behavior",
67            Self::Task => "task",
68            Self::Tone => "tone",
69            Self::Tool => "tool",
70            Self::Domain => "domain",
71        }
72    }
73}
74
75pub const PREFERENCE_BREVITY: &str = "preference:brevity";
76pub const PREFERENCE_DETAIL: &str = "preference:detail";
77pub const PREFERENCE_EXAMPLES: &str = "preference:examples";
78pub const PREFERENCE_DIRECTNESS: &str = "preference:directness";
79pub const TONE_FORMAL: &str = "tone:formal";
80pub const TONE_INFORMAL: &str = "tone:informal";
81pub const CORRECTION_AVOID_BULLETS: &str = "correction:avoid_bullets";
82pub const CORRECTION_AVOID_EMOJIS: &str = "correction:avoid_emojis";
83pub const BEHAVIOR_ADDING_CAVEATS: &str = "behavior:adding_caveats";
84pub const BEHAVIOR_OVEREXPLAINING: &str = "behavior:overexplaining";
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use std::collections::hash_map::DefaultHasher;
90    use std::hash::Hasher;
91
92    fn hash_one(tag: &SemanticTag) -> u64 {
93        let mut h = DefaultHasher::new();
94        tag.hash(&mut h);
95        h.finish()
96    }
97
98    #[test]
99    fn eq_and_hash_quantize_confidence() {
100        let a = SemanticTag {
101            namespace: TagNamespace::Topic,
102            value: "rust".into(),
103            confidence: 0.849,
104        };
105        let b = SemanticTag {
106            namespace: TagNamespace::Topic,
107            value: "rust".into(),
108            confidence: 0.851,
109        };
110        assert_eq!(a, b);
111        assert_eq!(hash_one(&a), hash_one(&b));
112    }
113
114    #[test]
115    fn nan_confidence_hashes_like_zero_quantized() {
116        let a = SemanticTag {
117            namespace: TagNamespace::Tool,
118            value: "x".into(),
119            confidence: f32::NAN,
120        };
121        let b = SemanticTag {
122            namespace: TagNamespace::Tool,
123            value: "x".into(),
124            confidence: 0.0,
125        };
126        assert_eq!(a, b);
127        assert_eq!(hash_one(&a), hash_one(&b));
128    }
129
130    #[test]
131    fn non_finite_confidence_quantizes_to_zero() {
132        assert_eq!(quantize_confidence(f32::NAN), 0);
133        assert_eq!(quantize_confidence(f32::INFINITY), 0);
134        assert_eq!(quantize_confidence(f32::NEG_INFINITY), 0);
135    }
136
137    #[test]
138    fn distinct_quantized_confidence_not_equal() {
139        let low = SemanticTag {
140            namespace: TagNamespace::Topic,
141            value: "t".into(),
142            confidence: 0.004,
143        };
144        let high = SemanticTag {
145            namespace: TagNamespace::Topic,
146            value: "t".into(),
147            confidence: 0.006,
148        };
149        assert_eq!(quantize_confidence(0.004), 0);
150        assert_eq!(quantize_confidence(0.006), 1);
151        assert_ne!(low, high);
152        assert_ne!(hash_one(&low), hash_one(&high));
153    }
154}