Skip to main content

elara_visual/
face.rs

1//! Face State - Facial expression and gaze as state
2//!
3//! This is NOT facial recognition or tracking data.
4//! This is the STATE of facial expression for reality synchronization.
5
6use elara_core::StateTime;
7
8/// Face landmark indices (simplified 68-point model concept)
9/// We don't store raw coordinates - we store SEMANTIC state
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum FacialRegion {
12    LeftEye,
13    RightEye,
14    LeftEyebrow,
15    RightEyebrow,
16    Nose,
17    UpperLip,
18    LowerLip,
19    LeftCheek,
20    RightCheek,
21    Jaw,
22    Forehead,
23}
24
25/// Emotion vector - continuous blend of basic emotions
26#[derive(Debug, Clone, Copy, Default)]
27pub struct EmotionVector {
28    /// Joy/happiness [0.0 - 1.0]
29    pub joy: f32,
30    /// Sadness [0.0 - 1.0]
31    pub sadness: f32,
32    /// Anger [0.0 - 1.0]
33    pub anger: f32,
34    /// Fear [0.0 - 1.0]
35    pub fear: f32,
36    /// Surprise [0.0 - 1.0]
37    pub surprise: f32,
38    /// Disgust [0.0 - 1.0]
39    pub disgust: f32,
40    /// Contempt [0.0 - 1.0]
41    pub contempt: f32,
42}
43
44impl EmotionVector {
45    /// Neutral expression
46    pub fn neutral() -> Self {
47        Self::default()
48    }
49
50    /// Dominant emotion
51    pub fn dominant(&self) -> (&'static str, f32) {
52        let emotions = [
53            ("joy", self.joy),
54            ("sadness", self.sadness),
55            ("anger", self.anger),
56            ("fear", self.fear),
57            ("surprise", self.surprise),
58            ("disgust", self.disgust),
59            ("contempt", self.contempt),
60        ];
61
62        emotions
63            .iter()
64            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
65            .map(|(name, val)| (*name, *val))
66            .unwrap_or(("neutral", 0.0))
67    }
68
69    /// Blend with another emotion vector
70    pub fn blend(&self, other: &EmotionVector, factor: f32) -> EmotionVector {
71        let f = factor.clamp(0.0, 1.0);
72        let inv = 1.0 - f;
73
74        EmotionVector {
75            joy: self.joy * inv + other.joy * f,
76            sadness: self.sadness * inv + other.sadness * f,
77            anger: self.anger * inv + other.anger * f,
78            fear: self.fear * inv + other.fear * f,
79            surprise: self.surprise * inv + other.surprise * f,
80            disgust: self.disgust * inv + other.disgust * f,
81            contempt: self.contempt * inv + other.contempt * f,
82        }
83    }
84
85    /// Normalize so all values sum to 1.0
86    pub fn normalize(&self) -> EmotionVector {
87        let sum = self.joy
88            + self.sadness
89            + self.anger
90            + self.fear
91            + self.surprise
92            + self.disgust
93            + self.contempt;
94
95        if sum < 0.001 {
96            return EmotionVector::neutral();
97        }
98
99        EmotionVector {
100            joy: self.joy / sum,
101            sadness: self.sadness / sum,
102            anger: self.anger / sum,
103            fear: self.fear / sum,
104            surprise: self.surprise / sum,
105            disgust: self.disgust / sum,
106            contempt: self.contempt / sum,
107        }
108    }
109}
110
111/// Gaze direction state
112#[derive(Debug, Clone, Copy, Default)]
113pub struct GazeState {
114    /// Horizontal angle in radians (-π to π, 0 = forward)
115    pub yaw: f32,
116    /// Vertical angle in radians (-π/2 to π/2, 0 = forward)
117    pub pitch: f32,
118    /// Is the person looking at the camera/screen?
119    pub looking_at_camera: bool,
120    /// Blink state (0.0 = open, 1.0 = closed)
121    pub blink: f32,
122}
123
124impl GazeState {
125    /// Looking straight ahead
126    pub fn forward() -> Self {
127        Self {
128            yaw: 0.0,
129            pitch: 0.0,
130            looking_at_camera: true,
131            blink: 0.0,
132        }
133    }
134
135    /// Interpolate between two gaze states
136    pub fn lerp(&self, other: &GazeState, t: f32) -> GazeState {
137        let t = t.clamp(0.0, 1.0);
138        GazeState {
139            yaw: self.yaw + (other.yaw - self.yaw) * t,
140            pitch: self.pitch + (other.pitch - self.pitch) * t,
141            looking_at_camera: if t < 0.5 {
142                self.looking_at_camera
143            } else {
144                other.looking_at_camera
145            },
146            blink: self.blink + (other.blink - self.blink) * t,
147        }
148    }
149}
150
151/// Mouth state for speech visualization
152#[derive(Debug, Clone, Copy, Default)]
153pub struct MouthState {
154    /// Mouth openness (0.0 = closed, 1.0 = fully open)
155    pub openness: f32,
156    /// Smile amount (-1.0 = frown, 0.0 = neutral, 1.0 = smile)
157    pub smile: f32,
158    /// Current viseme (mouth shape for speech)
159    pub viseme: Viseme,
160}
161
162/// Viseme - mouth shapes for speech
163#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
164pub enum Viseme {
165    #[default]
166    Neutral, // Closed/neutral
167    AA, // "ah" as in "father"
168    AO, // "aw" as in "bought"
169    EH, // "eh" as in "bed"
170    IY, // "ee" as in "see"
171    UW, // "oo" as in "boot"
172    OW, // "oh" as in "boat"
173    AE, // "a" as in "cat"
174    AW, // "ow" as in "cow"
175    EY, // "ay" as in "say"
176    ER, // "er" as in "bird"
177    PP, // "p", "b", "m" (lips together)
178    FF, // "f", "v" (teeth on lip)
179    TH, // "th" (tongue between teeth)
180    DD, // "d", "t", "n" (tongue on ridge)
181    KK, // "k", "g" (back of tongue)
182    CH, // "ch", "j", "sh" (lips rounded)
183    SS, // "s", "z" (teeth together)
184    RR, // "r" (lips slightly rounded)
185    NN, // "n", "ng" (nasal)
186}
187
188impl Viseme {
189    /// Get viseme from phoneme hint
190    pub fn from_phoneme(phoneme: &str) -> Self {
191        match phoneme.to_lowercase().as_str() {
192            "aa" | "ah" => Viseme::AA,
193            "ao" | "aw" => Viseme::AO,
194            "eh" | "e" => Viseme::EH,
195            "iy" | "ee" | "i" => Viseme::IY,
196            "uw" | "oo" | "u" => Viseme::UW,
197            "ow" | "oh" | "o" => Viseme::OW,
198            "ae" | "a" => Viseme::AE,
199            "p" | "b" | "m" => Viseme::PP,
200            "f" | "v" => Viseme::FF,
201            "th" => Viseme::TH,
202            "d" | "t" | "n" => Viseme::DD,
203            "k" | "g" => Viseme::KK,
204            "ch" | "j" | "sh" => Viseme::CH,
205            "s" | "z" => Viseme::SS,
206            "r" => Viseme::RR,
207            _ => Viseme::Neutral,
208        }
209    }
210}
211
212/// Complete face state
213#[derive(Debug, Clone)]
214pub struct FaceState {
215    /// Timestamp of this face state
216    pub timestamp: StateTime,
217
218    /// Is a face detected/present?
219    pub present: bool,
220
221    /// Head rotation (yaw, pitch, roll in radians)
222    pub head_rotation: (f32, f32, f32),
223
224    /// Emotion vector
225    pub emotion: EmotionVector,
226
227    /// Gaze state
228    pub gaze: GazeState,
229
230    /// Mouth state
231    pub mouth: MouthState,
232
233    /// Is the person speaking?
234    pub speaking: bool,
235
236    /// Confidence of face detection [0.0 - 1.0]
237    pub confidence: f32,
238}
239
240impl FaceState {
241    /// Create a new face state with defaults
242    pub fn new(timestamp: StateTime) -> Self {
243        Self {
244            timestamp,
245            present: true,
246            head_rotation: (0.0, 0.0, 0.0),
247            emotion: EmotionVector::neutral(),
248            gaze: GazeState::forward(),
249            mouth: MouthState::default(),
250            speaking: false,
251            confidence: 1.0,
252        }
253    }
254
255    /// No face present
256    pub fn absent(timestamp: StateTime) -> Self {
257        Self {
258            timestamp,
259            present: false,
260            head_rotation: (0.0, 0.0, 0.0),
261            emotion: EmotionVector::neutral(),
262            gaze: GazeState::forward(),
263            mouth: MouthState::default(),
264            speaking: false,
265            confidence: 0.0,
266        }
267    }
268
269    /// Reduce to minimal state (for L4 degradation)
270    pub fn reduce_to_minimal(&mut self) {
271        self.emotion = EmotionVector::neutral();
272        self.head_rotation = (0.0, 0.0, 0.0);
273        // Keep only: present, speaking, basic gaze
274    }
275
276    /// Convert to latent state (for L5 degradation)
277    pub fn to_latent(self) -> FaceState {
278        FaceState {
279            timestamp: self.timestamp,
280            present: self.present,
281            head_rotation: (0.0, 0.0, 0.0),
282            emotion: EmotionVector::neutral(),
283            gaze: GazeState::forward(),
284            mouth: MouthState::default(),
285            speaking: false,
286            confidence: 0.1,
287        }
288    }
289
290    /// Interpolate between two face states
291    pub fn lerp(&self, other: &FaceState, t: f32) -> FaceState {
292        let t = t.clamp(0.0, 1.0);
293
294        FaceState {
295            timestamp: other.timestamp,
296            present: if t < 0.5 { self.present } else { other.present },
297            head_rotation: (
298                self.head_rotation.0 + (other.head_rotation.0 - self.head_rotation.0) * t,
299                self.head_rotation.1 + (other.head_rotation.1 - self.head_rotation.1) * t,
300                self.head_rotation.2 + (other.head_rotation.2 - self.head_rotation.2) * t,
301            ),
302            emotion: self.emotion.blend(&other.emotion, t),
303            gaze: self.gaze.lerp(&other.gaze, t),
304            mouth: MouthState {
305                openness: self.mouth.openness + (other.mouth.openness - self.mouth.openness) * t,
306                smile: self.mouth.smile + (other.mouth.smile - self.mouth.smile) * t,
307                viseme: if t < 0.5 {
308                    self.mouth.viseme
309                } else {
310                    other.mouth.viseme
311                },
312            },
313            speaking: if t < 0.5 {
314                self.speaking
315            } else {
316                other.speaking
317            },
318            confidence: self.confidence + (other.confidence - self.confidence) * t,
319        }
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn test_emotion_vector() {
329        let mut emotion = EmotionVector::neutral();
330        emotion.joy = 0.8;
331        emotion.surprise = 0.2;
332
333        let (dominant, value) = emotion.dominant();
334        assert_eq!(dominant, "joy");
335        assert_eq!(value, 0.8);
336    }
337
338    #[test]
339    fn test_emotion_blend() {
340        let happy = EmotionVector {
341            joy: 1.0,
342            ..Default::default()
343        };
344        let sad = EmotionVector {
345            sadness: 1.0,
346            ..Default::default()
347        };
348
349        let blended = happy.blend(&sad, 0.5);
350        assert!((blended.joy - 0.5).abs() < 0.01);
351        assert!((blended.sadness - 0.5).abs() < 0.01);
352    }
353
354    #[test]
355    fn test_face_state_lerp() {
356        let time1 = StateTime::from_millis(0);
357        let time2 = StateTime::from_millis(100);
358
359        let mut face1 = FaceState::new(time1);
360        face1.mouth.openness = 0.0;
361
362        let mut face2 = FaceState::new(time2);
363        face2.mouth.openness = 1.0;
364
365        let interpolated = face1.lerp(&face2, 0.5);
366        assert!((interpolated.mouth.openness - 0.5).abs() < 0.01);
367    }
368
369    #[test]
370    fn test_viseme_from_phoneme() {
371        assert_eq!(Viseme::from_phoneme("aa"), Viseme::AA);
372        assert_eq!(Viseme::from_phoneme("p"), Viseme::PP);
373        assert_eq!(Viseme::from_phoneme("s"), Viseme::SS);
374    }
375}