Skip to main content

voirs_spatial/
gestures.rs

1//! Gesture Control System for Spatial Audio
2//!
3//! This module provides hand and body gesture-based audio interaction capabilities
4//! for spatial audio processing, supporting both VR/AR controller input and
5//! computer vision-based hand tracking.
6
7use crate::{Error, Position3D, Result};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::time::{Duration, Instant};
11
12/// Supported gesture recognition methods
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub enum GestureRecognitionMethod {
15    /// VR/AR controller-based gesture recognition
16    Controller,
17    /// Computer vision-based hand tracking
18    HandTracking,
19    /// IMU-based body gesture recognition
20    BodyTracking,
21    /// Hybrid approach combining multiple methods
22    Hybrid,
23}
24
25/// Types of gestures that can be recognized
26#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
27pub enum GestureType {
28    // Hand Gestures
29    /// Point gesture for audio source selection
30    Point,
31    /// Grab gesture for audio source manipulation
32    Grab,
33    /// Pinch gesture for precise control
34    Pinch,
35    /// Open palm for area selection
36    Palm,
37    /// Swipe gesture for navigation
38    Swipe,
39    /// Rotation gesture for 3D manipulation
40    Rotate,
41    /// Scale gesture for distance/volume control
42    Scale,
43
44    // Body Gestures
45    /// Head tilt for spatial orientation
46    HeadTilt,
47    /// Shoulder shrug for attention
48    ShoulderShrug,
49    /// Lean forward/backward for engagement
50    Lean,
51    /// Turn body for spatial navigation
52    BodyTurn,
53
54    // Combined Gestures
55    /// Two-handed manipulation
56    TwoHanded,
57    /// Full body spatial positioning
58    FullBody,
59}
60
61/// Direction information for directional gestures
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63pub enum GestureDirection {
64    /// Leftward direction
65    Left,
66    /// Rightward direction
67    Right,
68    /// Upward direction
69    Up,
70    /// Downward direction
71    Down,
72    /// Forward direction
73    Forward,
74    /// Backward direction
75    Backward,
76    /// Clockwise rotation
77    Clockwise,
78    /// Counterclockwise rotation
79    Counterclockwise,
80}
81
82/// Hand information for hand-based gestures
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84pub enum Hand {
85    /// Left hand
86    Left,
87    /// Right hand
88    Right,
89    /// Both hands
90    Both,
91}
92
93/// Confidence level for gesture recognition
94#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
95pub struct GestureConfidence {
96    /// Overall confidence score (0.0 to 1.0)
97    pub score: f32,
98    /// Confidence in position accuracy
99    pub position_confidence: f32,
100    /// Confidence in gesture type recognition
101    pub type_confidence: f32,
102    /// Temporal stability confidence
103    pub temporal_confidence: f32,
104}
105
106/// Gesture data containing position and metadata
107#[derive(Debug, Clone, PartialEq)]
108pub struct GestureData {
109    /// Type of gesture
110    pub gesture_type: GestureType,
111    /// Timestamp when gesture was detected
112    pub timestamp: Instant,
113    /// 3D position of the gesture
114    pub position: Position3D,
115    /// Direction (if applicable)
116    pub direction: Option<GestureDirection>,
117    /// Hand information (if applicable)
118    pub hand: Option<Hand>,
119    /// Gesture velocity (for dynamic gestures)
120    pub velocity: Option<Position3D>,
121    /// Confidence in gesture recognition
122    pub confidence: GestureConfidence,
123    /// Additional gesture-specific parameters
124    pub parameters: HashMap<String, f32>,
125}
126
127/// Gesture event representing a complete gesture action
128#[derive(Debug, Clone, PartialEq)]
129pub struct GestureEvent {
130    /// Gesture data
131    pub data: GestureData,
132    /// Duration of the gesture
133    pub duration: Duration,
134    /// Whether this is a start, update, or end event
135    pub event_type: GestureEventType,
136}
137
138/// Type of gesture event
139#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
140pub enum GestureEventType {
141    /// Gesture has started
142    Start,
143    /// Gesture is continuing (update)
144    Update,
145    /// Gesture has ended
146    End,
147    /// Single-shot gesture
148    Trigger,
149}
150
151/// Audio action to be performed based on gesture
152#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
153pub enum AudioAction {
154    /// Select an audio source
155    SelectSource {
156        /// ID of the audio source to select
157        source_id: String,
158    },
159    /// Move an audio source to a position
160    MoveSource {
161        /// ID of the audio source to move
162        source_id: String,
163        /// New position for the audio source
164        position: Position3D,
165    },
166    /// Adjust volume
167    AdjustVolume {
168        /// Optional source ID (None for global volume)
169        source_id: Option<String>,
170        /// New volume level (0.0-1.0)
171        volume: f32,
172    },
173    /// Adjust spatial parameters
174    AdjustSpatial {
175        /// ID of the audio source
176        source_id: String,
177        /// Spatial parameters to adjust (parameter name -> value)
178        parameters: HashMap<String, f32>,
179    },
180    /// Create spatial zone
181    CreateZone {
182        /// Center position of the zone
183        position: Position3D,
184        /// Radius of the zone
185        radius: f32,
186    },
187    /// Navigate in 3D space
188    Navigate {
189        /// Direction of navigation
190        direction: GestureDirection,
191        /// Speed of navigation
192        speed: f32,
193    },
194    /// Toggle audio effects
195    ToggleEffect {
196        /// Type of effect to toggle
197        effect_type: String,
198        /// Whether to enable or disable the effect
199        enabled: bool,
200    },
201}
202
203/// Configuration for gesture recognition
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct GestureConfig {
206    /// Recognition method to use
207    pub recognition_method: GestureRecognitionMethod,
208    /// Minimum confidence threshold for gesture recognition
209    pub min_confidence: f32,
210    /// Enabled gesture types
211    pub enabled_gestures: Vec<GestureType>,
212    /// Gesture sensitivity (0.1 = low, 1.0 = high)
213    pub sensitivity: f32,
214    /// Smoothing factor for gesture tracking (0.0 = no smoothing, 1.0 = maximum)
215    pub smoothing: f32,
216    /// Maximum distance for gesture recognition (in meters)
217    pub max_distance: f32,
218    /// Minimum gesture duration to be considered valid
219    pub min_duration: Duration,
220    /// Maximum time between gesture updates before considering it ended
221    pub max_update_interval: Duration,
222}
223
224impl Default for GestureConfig {
225    fn default() -> Self {
226        Self {
227            recognition_method: GestureRecognitionMethod::Controller,
228            min_confidence: 0.7,
229            enabled_gestures: vec![
230                GestureType::Point,
231                GestureType::Grab,
232                GestureType::Pinch,
233                GestureType::Swipe,
234                GestureType::Rotate,
235                GestureType::Scale,
236            ],
237            sensitivity: 0.8,
238            smoothing: 0.3,
239            max_distance: 5.0,
240            min_duration: Duration::from_millis(100),
241            max_update_interval: Duration::from_millis(50),
242        }
243    }
244}
245
246/// Gesture recognition and processing system
247pub struct GestureController {
248    /// Configuration
249    config: GestureConfig,
250    /// Active gestures being tracked
251    active_gestures: HashMap<u32, GestureData>,
252    /// Gesture history for temporal analysis
253    gesture_history: Vec<GestureEvent>,
254    /// Next gesture ID
255    next_id: u32,
256    /// Gesture-to-action mappings
257    action_mappings: HashMap<GestureType, Vec<AudioAction>>,
258    /// Smoothing buffers for gesture positions
259    position_buffers: HashMap<u32, Vec<Position3D>>,
260}
261
262impl GestureController {
263    /// Create a new gesture controller
264    pub fn new(config: GestureConfig) -> Self {
265        Self {
266            config,
267            active_gestures: HashMap::new(),
268            gesture_history: Vec::new(),
269            next_id: 1,
270            action_mappings: HashMap::new(),
271            position_buffers: HashMap::new(),
272        }
273    }
274
275    /// Create a gesture controller with default configuration
276    pub fn with_default_config() -> Self {
277        Self::new(GestureConfig::default())
278    }
279
280    /// Add a gesture-to-action mapping
281    pub fn add_action_mapping(&mut self, gesture_type: GestureType, action: AudioAction) {
282        self.action_mappings
283            .entry(gesture_type)
284            .or_default()
285            .push(action);
286    }
287
288    /// Process incoming gesture data
289    pub fn process_gesture_data(&mut self, raw_data: GestureData) -> Result<Vec<GestureEvent>> {
290        // Check if gesture meets minimum confidence threshold
291        if raw_data.confidence.score < self.config.min_confidence {
292            return Ok(Vec::new());
293        }
294
295        // Check if gesture type is enabled
296        if !self
297            .config
298            .enabled_gestures
299            .contains(&raw_data.gesture_type)
300        {
301            return Ok(Vec::new());
302        }
303
304        // Apply smoothing to position data
305        let smoothed_data = self.apply_smoothing(raw_data)?;
306
307        // Determine if this is a new gesture or update to existing one
308        let gesture_id = self.find_or_create_gesture_id(&smoothed_data);
309
310        // Generate appropriate events
311        let events = self.generate_gesture_events(gesture_id, smoothed_data)?;
312
313        // Store in history
314        for event in &events {
315            self.gesture_history.push(event.clone());
316
317            // Limit history size
318            if self.gesture_history.len() > 1000 {
319                self.gesture_history.remove(0);
320            }
321        }
322
323        Ok(events)
324    }
325
326    /// Apply smoothing to gesture data
327    fn apply_smoothing(&mut self, mut data: GestureData) -> Result<GestureData> {
328        if self.config.smoothing > 0.0 {
329            let gesture_key = self.gesture_type_to_id(&data.gesture_type);
330            let buffer = self.position_buffers.entry(gesture_key).or_default();
331
332            buffer.push(data.position);
333
334            // Limit buffer size
335            if buffer.len() > 10 {
336                buffer.remove(0);
337            }
338
339            // Apply exponential moving average
340            if buffer.len() > 1 {
341                let alpha = 1.0 - self.config.smoothing;
342                let prev_pos = &buffer[buffer.len() - 2];
343
344                data.position.x = alpha * data.position.x + (1.0 - alpha) * prev_pos.x;
345                data.position.y = alpha * data.position.y + (1.0 - alpha) * prev_pos.y;
346                data.position.z = alpha * data.position.z + (1.0 - alpha) * prev_pos.z;
347            }
348        }
349
350        Ok(data)
351    }
352
353    /// Find existing gesture ID or create new one
354    fn find_or_create_gesture_id(&mut self, data: &GestureData) -> u32 {
355        // Simple approach: look for active gesture of same type within reasonable distance/time
356        for (&id, existing) in &self.active_gestures {
357            if existing.gesture_type == data.gesture_type {
358                let distance = existing.position.distance_to(&data.position);
359                let time_diff = data.timestamp.duration_since(existing.timestamp);
360
361                if distance < 0.2 && time_diff < self.config.max_update_interval {
362                    return id;
363                }
364            }
365        }
366
367        // Create new gesture ID
368        let id = self.next_id;
369        self.next_id += 1;
370        id
371    }
372
373    /// Generate gesture events for a gesture data point
374    fn generate_gesture_events(
375        &mut self,
376        gesture_id: u32,
377        data: GestureData,
378    ) -> Result<Vec<GestureEvent>> {
379        let mut events = Vec::new();
380
381        if let Some(existing) = self.active_gestures.get(&gesture_id) {
382            // This is an update to existing gesture
383            let duration = data.timestamp.duration_since(existing.timestamp);
384
385            events.push(GestureEvent {
386                data: data.clone(),
387                duration,
388                event_type: GestureEventType::Update,
389            });
390        } else {
391            // This is a new gesture
392            events.push(GestureEvent {
393                data: data.clone(),
394                duration: Duration::from_millis(0),
395                event_type: GestureEventType::Start,
396            });
397        }
398
399        // Update active gestures
400        self.active_gestures.insert(gesture_id, data);
401
402        Ok(events)
403    }
404
405    /// Process gesture timeout and generate end events
406    pub fn process_timeouts(&mut self) -> Vec<GestureEvent> {
407        let now = Instant::now();
408        let mut ended_gestures = Vec::new();
409        let mut events = Vec::new();
410
411        for (&id, data) in &self.active_gestures {
412            let time_since_update = now.duration_since(data.timestamp);
413            if time_since_update > self.config.max_update_interval {
414                ended_gestures.push(id);
415
416                events.push(GestureEvent {
417                    data: data.clone(),
418                    duration: time_since_update,
419                    event_type: GestureEventType::End,
420                });
421            }
422        }
423
424        // Remove ended gestures
425        for id in ended_gestures {
426            self.active_gestures.remove(&id);
427        }
428
429        events
430    }
431
432    /// Get actions for a gesture event
433    pub fn get_actions_for_gesture(&self, event: &GestureEvent) -> Vec<AudioAction> {
434        self.action_mappings
435            .get(&event.data.gesture_type)
436            .cloned()
437            .unwrap_or_default()
438    }
439
440    /// Get active gestures
441    pub fn get_active_gestures(&self) -> &HashMap<u32, GestureData> {
442        &self.active_gestures
443    }
444
445    /// Get gesture history
446    pub fn get_gesture_history(&self) -> &[GestureEvent] {
447        &self.gesture_history
448    }
449
450    /// Clear gesture history
451    pub fn clear_history(&mut self) {
452        self.gesture_history.clear();
453    }
454
455    /// Update configuration
456    pub fn update_config(&mut self, config: GestureConfig) {
457        self.config = config;
458    }
459
460    /// Get current configuration
461    pub fn get_config(&self) -> &GestureConfig {
462        &self.config
463    }
464
465    /// Helper function to convert gesture type to numeric ID for buffering
466    fn gesture_type_to_id(&self, gesture_type: &GestureType) -> u32 {
467        match gesture_type {
468            GestureType::Point => 1,
469            GestureType::Grab => 2,
470            GestureType::Pinch => 3,
471            GestureType::Palm => 4,
472            GestureType::Swipe => 5,
473            GestureType::Rotate => 6,
474            GestureType::Scale => 7,
475            GestureType::HeadTilt => 8,
476            GestureType::ShoulderShrug => 9,
477            GestureType::Lean => 10,
478            GestureType::BodyTurn => 11,
479            GestureType::TwoHanded => 12,
480            GestureType::FullBody => 13,
481        }
482    }
483}
484
485/// Gesture builder for easy construction of gesture data
486pub struct GestureBuilder {
487    gesture_type: GestureType,
488    position: Position3D,
489    direction: Option<GestureDirection>,
490    hand: Option<Hand>,
491    velocity: Option<Position3D>,
492    confidence: Option<GestureConfidence>,
493    parameters: HashMap<String, f32>,
494}
495
496impl GestureBuilder {
497    /// Create a new gesture builder
498    pub fn new(gesture_type: GestureType, position: Position3D) -> Self {
499        Self {
500            gesture_type,
501            position,
502            direction: None,
503            hand: None,
504            velocity: None,
505            confidence: None,
506            parameters: HashMap::new(),
507        }
508    }
509
510    /// Set gesture direction
511    pub fn direction(mut self, direction: GestureDirection) -> Self {
512        self.direction = Some(direction);
513        self
514    }
515
516    /// Set hand information
517    pub fn hand(mut self, hand: Hand) -> Self {
518        self.hand = Some(hand);
519        self
520    }
521
522    /// Set gesture velocity
523    pub fn velocity(mut self, velocity: Position3D) -> Self {
524        self.velocity = Some(velocity);
525        self
526    }
527
528    /// Set gesture confidence
529    pub fn confidence(mut self, confidence: GestureConfidence) -> Self {
530        self.confidence = Some(confidence);
531        self
532    }
533
534    /// Add a parameter
535    pub fn parameter(mut self, key: String, value: f32) -> Self {
536        self.parameters.insert(key, value);
537        self
538    }
539
540    /// Build the gesture data
541    pub fn build(self) -> GestureData {
542        GestureData {
543            gesture_type: self.gesture_type,
544            timestamp: Instant::now(),
545            position: self.position,
546            direction: self.direction,
547            hand: self.hand,
548            velocity: self.velocity,
549            confidence: self.confidence.unwrap_or(GestureConfidence {
550                score: 0.8,
551                position_confidence: 0.8,
552                type_confidence: 0.8,
553                temporal_confidence: 0.8,
554            }),
555            parameters: self.parameters,
556        }
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn test_gesture_controller_creation() {
566        let controller = GestureController::with_default_config();
567        assert_eq!(controller.config.min_confidence, 0.7);
568        assert!(controller
569            .config
570            .enabled_gestures
571            .contains(&GestureType::Point));
572    }
573
574    #[test]
575    fn test_gesture_builder() {
576        let gesture = GestureBuilder::new(GestureType::Point, Position3D::new(1.0, 2.0, 3.0))
577            .direction(GestureDirection::Forward)
578            .hand(Hand::Right)
579            .parameter("intensity".to_string(), 0.8)
580            .build();
581
582        assert_eq!(gesture.gesture_type, GestureType::Point);
583        assert_eq!(gesture.position, Position3D::new(1.0, 2.0, 3.0));
584        assert_eq!(gesture.direction, Some(GestureDirection::Forward));
585        assert_eq!(gesture.hand, Some(Hand::Right));
586        assert_eq!(gesture.parameters.get("intensity"), Some(&0.8));
587    }
588
589    #[test]
590    fn test_action_mapping() {
591        let mut controller = GestureController::with_default_config();
592        let action = AudioAction::SelectSource {
593            source_id: "test_source".to_string(),
594        };
595
596        controller.add_action_mapping(GestureType::Point, action.clone());
597
598        let mappings = controller
599            .action_mappings
600            .get(&GestureType::Point)
601            .expect("Should have Point gesture action mappings");
602        assert_eq!(mappings.len(), 1);
603    }
604
605    #[test]
606    fn test_gesture_confidence() {
607        let confidence = GestureConfidence {
608            score: 0.9,
609            position_confidence: 0.85,
610            type_confidence: 0.95,
611            temporal_confidence: 0.8,
612        };
613
614        assert!(confidence.score > 0.8);
615        assert!(confidence.type_confidence > confidence.position_confidence);
616    }
617
618    #[test]
619    fn test_gesture_processing() {
620        let mut controller = GestureController::with_default_config();
621
622        let gesture_data =
623            GestureBuilder::new(GestureType::Point, Position3D::new(0.0, 0.0, 1.0)).build();
624
625        let events = controller
626            .process_gesture_data(gesture_data)
627            .expect("Should successfully process gesture data");
628        assert_eq!(events.len(), 1);
629        assert_eq!(events[0].event_type, GestureEventType::Start);
630        assert_eq!(controller.active_gestures.len(), 1);
631    }
632
633    #[test]
634    fn test_gesture_timeout() {
635        let mut controller = GestureController::with_default_config();
636        controller.config.max_update_interval = Duration::from_millis(1);
637
638        let gesture_data =
639            GestureBuilder::new(GestureType::Grab, Position3D::new(1.0, 1.0, 1.0)).build();
640
641        // Process initial gesture
642        controller
643            .process_gesture_data(gesture_data)
644            .expect("Should successfully process initial gesture");
645        assert_eq!(controller.active_gestures.len(), 1);
646
647        // Wait and process timeouts
648        std::thread::sleep(Duration::from_millis(2));
649        let timeout_events = controller.process_timeouts();
650
651        assert_eq!(timeout_events.len(), 1);
652        assert_eq!(timeout_events[0].event_type, GestureEventType::End);
653        assert_eq!(controller.active_gestures.len(), 0);
654    }
655
656    #[test]
657    fn test_low_confidence_filtering() {
658        let mut controller = GestureController::with_default_config();
659        controller.config.min_confidence = 0.8;
660
661        let low_confidence_gesture = GestureData {
662            gesture_type: GestureType::Point,
663            timestamp: Instant::now(),
664            position: Position3D::new(0.0, 0.0, 0.0),
665            direction: None,
666            hand: None,
667            velocity: None,
668            confidence: GestureConfidence {
669                score: 0.5, // Below threshold
670                position_confidence: 0.5,
671                type_confidence: 0.5,
672                temporal_confidence: 0.5,
673            },
674            parameters: HashMap::new(),
675        };
676
677        let events = controller
678            .process_gesture_data(low_confidence_gesture)
679            .expect("Should successfully process low confidence gesture (even if filtered)");
680        assert_eq!(events.len(), 0); // Should be filtered out
681    }
682}