Skip to main content

elara_visual/
encoding.rs

1//! Visual State Encoding - Wire format for visual state
2//!
3//! This is NOT H.264/VP8/AV1. This is ELARA-native state encoding.
4//! We encode semantic visual state, not pixel data.
5
6use crate::{
7    BackgroundComplexity, Color, EmotionVector, EnvironmentType, FaceState, GazeState, JointState,
8    LightingCondition, MouthState, PoseState, Position3D, Rotation3D, SceneState, Viseme,
9    VisualState, VisualStateId,
10};
11use elara_core::{DegradationLevel, NodeId, StateTime};
12
13/// Encoding error
14#[derive(Debug, Clone)]
15pub enum EncodingError {
16    BufferTooSmall,
17    InvalidData,
18    UnsupportedVersion,
19}
20
21/// Visual state encoder
22pub struct VisualEncoder;
23
24impl VisualEncoder {
25    /// Encode a visual state to bytes
26    pub fn encode(state: &VisualState) -> Vec<u8> {
27        let mut buf = Vec::with_capacity(512);
28
29        // Header
30        buf.push(0x01); // Version
31        buf.push(if state.is_keyframe { 0x01 } else { 0x00 });
32        buf.push(state.degradation.level());
33        buf.push(0x00); // Reserved
34
35        // State ID (8 bytes)
36        buf.extend_from_slice(&state.id.0.to_le_bytes());
37
38        // Source node ID (8 bytes)
39        buf.extend_from_slice(&state.source.0.to_le_bytes());
40
41        // Timestamp (8 bytes)
42        buf.extend_from_slice(&state.timestamp.as_millis().to_le_bytes());
43
44        // Sequence (8 bytes)
45        buf.extend_from_slice(&state.sequence.to_le_bytes());
46
47        // Keyframe reference (8 bytes, 0 if none)
48        let keyframe_ref = state.keyframe_ref.map(|k| k.0).unwrap_or(0);
49        buf.extend_from_slice(&keyframe_ref.to_le_bytes());
50
51        // Flags for what's present
52        let mut flags: u8 = 0;
53        if state.face.is_some() {
54            flags |= 0x01;
55        }
56        if state.pose.is_some() {
57            flags |= 0x02;
58        }
59        if state.scene.is_some() {
60            flags |= 0x04;
61        }
62        buf.push(flags);
63
64        // Encode face if present
65        if let Some(ref face) = state.face {
66            Self::encode_face(face, &mut buf);
67        }
68
69        // Encode pose if present
70        if let Some(ref pose) = state.pose {
71            Self::encode_pose(pose, &mut buf);
72        }
73
74        // Encode scene if present
75        if let Some(ref scene) = state.scene {
76            Self::encode_scene(scene, &mut buf);
77        }
78
79        buf
80    }
81
82    fn encode_face(face: &FaceState, buf: &mut Vec<u8>) {
83        // Face flags
84        let mut flags: u8 = 0;
85        if face.present {
86            flags |= 0x01;
87        }
88        if face.speaking {
89            flags |= 0x02;
90        }
91        buf.push(flags);
92
93        // Confidence
94        buf.extend_from_slice(&face.confidence.to_le_bytes());
95
96        // Head rotation (3 x f32 = 12 bytes)
97        buf.extend_from_slice(&face.head_rotation.0.to_le_bytes());
98        buf.extend_from_slice(&face.head_rotation.1.to_le_bytes());
99        buf.extend_from_slice(&face.head_rotation.2.to_le_bytes());
100
101        // Emotion vector (7 x f32 = 28 bytes)
102        buf.extend_from_slice(&face.emotion.joy.to_le_bytes());
103        buf.extend_from_slice(&face.emotion.sadness.to_le_bytes());
104        buf.extend_from_slice(&face.emotion.anger.to_le_bytes());
105        buf.extend_from_slice(&face.emotion.fear.to_le_bytes());
106        buf.extend_from_slice(&face.emotion.surprise.to_le_bytes());
107        buf.extend_from_slice(&face.emotion.disgust.to_le_bytes());
108        buf.extend_from_slice(&face.emotion.contempt.to_le_bytes());
109
110        // Gaze (4 x f32 + 1 bool = 17 bytes)
111        buf.extend_from_slice(&face.gaze.yaw.to_le_bytes());
112        buf.extend_from_slice(&face.gaze.pitch.to_le_bytes());
113        buf.push(if face.gaze.looking_at_camera { 1 } else { 0 });
114        buf.extend_from_slice(&face.gaze.blink.to_le_bytes());
115
116        // Mouth (2 x f32 + 1 viseme = 9 bytes)
117        buf.extend_from_slice(&face.mouth.openness.to_le_bytes());
118        buf.extend_from_slice(&face.mouth.smile.to_le_bytes());
119        buf.push(face.mouth.viseme as u8);
120    }
121
122    fn encode_pose(pose: &PoseState, buf: &mut Vec<u8>) {
123        // Pose flags
124        let mut flags: u8 = 0;
125        if pose.present {
126            flags |= 0x01;
127        }
128        buf.push(flags);
129
130        // Confidence
131        buf.extend_from_slice(&pose.confidence.to_le_bytes());
132
133        // Gesture and activity
134        buf.push(pose.gesture as u8);
135        buf.push(pose.activity as u8);
136
137        // Velocity
138        buf.extend_from_slice(&pose.velocity.x.to_le_bytes());
139        buf.extend_from_slice(&pose.velocity.y.to_le_bytes());
140        buf.extend_from_slice(&pose.velocity.z.to_le_bytes());
141
142        // Number of joints
143        buf.push(pose.joints.len() as u8);
144
145        // Encode each joint (position + rotation + confidence = 32 bytes each)
146        for joint in &pose.joints {
147            buf.extend_from_slice(&joint.position.x.to_le_bytes());
148            buf.extend_from_slice(&joint.position.y.to_le_bytes());
149            buf.extend_from_slice(&joint.position.z.to_le_bytes());
150            buf.extend_from_slice(&joint.rotation.w.to_le_bytes());
151            buf.extend_from_slice(&joint.rotation.x.to_le_bytes());
152            buf.extend_from_slice(&joint.rotation.y.to_le_bytes());
153            buf.extend_from_slice(&joint.rotation.z.to_le_bytes());
154            buf.extend_from_slice(&joint.confidence.to_le_bytes());
155        }
156    }
157
158    fn encode_scene(scene: &SceneState, buf: &mut Vec<u8>) {
159        // Background color (3 x f32 = 12 bytes)
160        buf.extend_from_slice(&scene.background_color.r.to_le_bytes());
161        buf.extend_from_slice(&scene.background_color.g.to_le_bytes());
162        buf.extend_from_slice(&scene.background_color.b.to_le_bytes());
163
164        // Lighting, environment, complexity
165        buf.push(scene.lighting as u8);
166        buf.push(scene.environment as u8);
167        buf.push(scene.complexity as u8);
168
169        // Flags
170        let mut flags: u8 = 0;
171        if scene.background_motion {
172            flags |= 0x01;
173        }
174        buf.push(flags);
175
176        // Blur, noise, detail
177        buf.extend_from_slice(&scene.blur.to_le_bytes());
178        buf.extend_from_slice(&scene.noise.to_le_bytes());
179        buf.extend_from_slice(&scene.detail_level.to_le_bytes());
180
181        // Objects count (simplified - just count for now)
182        buf.push(scene.objects.len().min(255) as u8);
183    }
184
185    /// Decode a visual state from bytes
186    pub fn decode(data: &[u8]) -> Result<VisualState, EncodingError> {
187        if data.len() < 41 {
188            return Err(EncodingError::BufferTooSmall);
189        }
190
191        let mut pos = 0;
192
193        // Header
194        let version = data[pos];
195        pos += 1;
196        if version != 0x01 {
197            return Err(EncodingError::UnsupportedVersion);
198        }
199
200        let is_keyframe = data[pos] == 0x01;
201        pos += 1;
202        let degradation_level = data[pos];
203        pos += 1;
204        pos += 1; // Reserved
205
206        // State ID
207        let id = u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
208        pos += 8;
209
210        // Source node ID
211        let source = u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
212        pos += 8;
213
214        // Timestamp
215        let timestamp = i64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
216        pos += 8;
217
218        // Sequence
219        let sequence = u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
220        pos += 8;
221
222        // Keyframe reference
223        let keyframe_ref_val = u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
224        pos += 8;
225        let keyframe_ref = if keyframe_ref_val == 0 {
226            None
227        } else {
228            Some(VisualStateId(keyframe_ref_val))
229        };
230
231        // Flags
232        let flags = data[pos];
233        pos += 1;
234        let has_face = flags & 0x01 != 0;
235        let has_pose = flags & 0x02 != 0;
236        let has_scene = flags & 0x04 != 0;
237
238        // Decode face
239        let face = if has_face {
240            Some(Self::decode_face(&data[pos..])?)
241        } else {
242            None
243        };
244        if has_face {
245            pos += 75;
246        } // Face size
247
248        // Decode pose (variable size based on joints)
249        let pose = if has_pose {
250            let (p, size) = Self::decode_pose(&data[pos..])?;
251            pos += size;
252            Some(p)
253        } else {
254            None
255        };
256
257        // Decode scene
258        let scene = if has_scene {
259            Some(Self::decode_scene(&data[pos..])?)
260        } else {
261            None
262        };
263
264        let degradation = match degradation_level {
265            0 => DegradationLevel::L0_FullPerception,
266            1 => DegradationLevel::L1_DistortedPerception,
267            2 => DegradationLevel::L2_FragmentedPerception,
268            3 => DegradationLevel::L3_SymbolicPresence,
269            4 => DegradationLevel::L4_MinimalPresence,
270            _ => DegradationLevel::L5_LatentPresence,
271        };
272
273        Ok(VisualState {
274            id: VisualStateId(id),
275            source: NodeId::new(source),
276            timestamp: StateTime::from_millis(timestamp),
277            face,
278            pose,
279            scene,
280            degradation,
281            is_keyframe,
282            keyframe_ref,
283            sequence,
284        })
285    }
286
287    fn decode_face(data: &[u8]) -> Result<FaceState, EncodingError> {
288        if data.len() < 75 {
289            return Err(EncodingError::BufferTooSmall);
290        }
291
292        let mut pos = 0;
293
294        let flags = data[pos];
295        pos += 1;
296        let present = flags & 0x01 != 0;
297        let speaking = flags & 0x02 != 0;
298
299        let confidence = f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
300        pos += 4;
301
302        let head_rotation = (
303            f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
304            f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
305            f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap()),
306        );
307        pos += 12;
308
309        let emotion = EmotionVector {
310            joy: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
311            sadness: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
312            anger: f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap()),
313            fear: f32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap()),
314            surprise: f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap()),
315            disgust: f32::from_le_bytes(data[pos + 20..pos + 24].try_into().unwrap()),
316            contempt: f32::from_le_bytes(data[pos + 24..pos + 28].try_into().unwrap()),
317        };
318        pos += 28;
319
320        let gaze = GazeState {
321            yaw: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
322            pitch: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
323            looking_at_camera: data[pos + 8] != 0,
324            blink: f32::from_le_bytes(data[pos + 9..pos + 13].try_into().unwrap()),
325        };
326        pos += 13;
327
328        let mouth = MouthState {
329            openness: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
330            smile: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
331            viseme: Viseme::Neutral, // Simplified
332        };
333
334        Ok(FaceState {
335            timestamp: StateTime::from_millis(0), // Will be set from parent
336            present,
337            head_rotation,
338            emotion,
339            gaze,
340            mouth,
341            speaking,
342            confidence,
343        })
344    }
345
346    fn decode_pose(data: &[u8]) -> Result<(PoseState, usize), EncodingError> {
347        if data.len() < 20 {
348            return Err(EncodingError::BufferTooSmall);
349        }
350
351        let mut pos = 0;
352
353        let flags = data[pos];
354        pos += 1;
355        let present = flags & 0x01 != 0;
356
357        let confidence = f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
358        pos += 4;
359
360        let gesture = crate::Gesture::None; // Simplified
361        let activity = crate::ActivityState::Unknown; // Simplified
362        pos += 2;
363
364        let velocity = Position3D {
365            x: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
366            y: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
367            z: f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap()),
368        };
369        pos += 12;
370
371        let num_joints = data[pos] as usize;
372        pos += 1;
373
374        let mut joints = Vec::with_capacity(num_joints);
375        for _ in 0..num_joints {
376            if pos + 32 > data.len() {
377                return Err(EncodingError::BufferTooSmall);
378            }
379
380            let joint = JointState {
381                position: Position3D {
382                    x: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
383                    y: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
384                    z: f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap()),
385                },
386                rotation: Rotation3D {
387                    w: f32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap()),
388                    x: f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap()),
389                    y: f32::from_le_bytes(data[pos + 20..pos + 24].try_into().unwrap()),
390                    z: f32::from_le_bytes(data[pos + 24..pos + 28].try_into().unwrap()),
391                },
392                confidence: f32::from_le_bytes(data[pos + 28..pos + 32].try_into().unwrap()),
393            };
394            joints.push(joint);
395            pos += 32;
396        }
397
398        Ok((
399            PoseState {
400                timestamp: StateTime::from_millis(0),
401                present,
402                joints,
403                gesture,
404                activity,
405                confidence,
406                velocity,
407            },
408            pos,
409        ))
410    }
411
412    fn decode_scene(data: &[u8]) -> Result<SceneState, EncodingError> {
413        if data.len() < 25 {
414            return Err(EncodingError::BufferTooSmall);
415        }
416
417        let mut pos = 0;
418
419        let background_color = Color {
420            r: f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()),
421            g: f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()),
422            b: f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap()),
423        };
424        pos += 12;
425
426        let lighting = LightingCondition::Normal; // Simplified
427        let environment = EnvironmentType::Unknown; // Simplified
428        let complexity = BackgroundComplexity::Simple; // Simplified
429        pos += 3;
430
431        let flags = data[pos];
432        pos += 1;
433        let background_motion = flags & 0x01 != 0;
434
435        let blur = f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
436        pos += 4;
437        let noise = f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
438        pos += 4;
439        let detail_level = f32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
440
441        Ok(SceneState {
442            timestamp: StateTime::from_millis(0),
443            background_color,
444            lighting,
445            environment,
446            complexity,
447            objects: Vec::new(),
448            background_motion,
449            blur,
450            noise,
451            detail_level,
452        })
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn test_encode_decode_roundtrip() {
462        let node = NodeId::new(12345);
463        let time = StateTime::from_millis(1000);
464        let state = VisualState::keyframe(node, time, 1);
465
466        let encoded = VisualEncoder::encode(&state);
467        let decoded = VisualEncoder::decode(&encoded).unwrap();
468
469        assert_eq!(decoded.id.0, state.id.0);
470        assert_eq!(decoded.source.0, state.source.0);
471        assert_eq!(decoded.sequence, state.sequence);
472        assert_eq!(decoded.is_keyframe, state.is_keyframe);
473    }
474
475    #[test]
476    fn test_encode_with_face() {
477        let node = NodeId::new(1);
478        let time = StateTime::from_millis(0);
479        let face = FaceState::new(time);
480        let state = VisualState::keyframe(node, time, 1).with_face(face);
481
482        let encoded = VisualEncoder::encode(&state);
483        // Just verify encoding works and produces reasonable size
484        assert!(encoded.len() > 41); // Header + face data
485    }
486}