1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//! Pure-Rust dominant speaker identification for WebRTC applications.
//!
//! This crate implements the three-time-scale subband comparison algorithm
//! described in Volfin & Cohen, "Dominant Speaker Identification for
//! Multipoint Videoconferencing", IEEE 2012. The implementation follows
//! mediasoup's C++ `ActiveSpeakerObserver` for constants and Jitsi's Java
//! `DominantSpeakerIdentification` for the overall structure.
//!
//! Feed it RFC 6464 audio-level observations and it tells you who is talking.
//! No FFI, no WebRTC stack dependency, no unsafe code.
//!
//! # Quick start
//!
//! ```rust
//! use std::time::{Duration, Instant};
//! use dominant_speaker::{ActiveSpeakerDetector, TICK_INTERVAL};
//!
//! let mut detector = ActiveSpeakerDetector::new();
//! let t0 = Instant::now();
//!
//! // Register two participants.
//! detector.add_peer(1, t0);
//! detector.add_peer(2, t0);
//!
//! // Feed audio levels (0 = loud, 127 = silent, per RFC 6464).
//! // Simulate peer 1 speaking for 2 seconds.
//! let mut t = t0;
//! while t < t0 + Duration::from_millis(2000) {
//! detector.record_level(1, 5, t); // peer 1: active (low dBov = loud)
//! detector.record_level(2, 127, t); // peer 2: silent
//! t += Duration::from_millis(20);
//! }
//!
//! // Call tick() on a timer — returns Some(peer_id) only on speaker change.
//! if let Some(dominant) = detector.tick(t0 + TICK_INTERVAL) {
//! println!("Dominant speaker: peer {dominant}");
//! }
//! ```
//!
//! See the [README](https://github.com/anatolykoptev/rust-dominant-speaker)
//! for algorithm details, constants reference, and prior art.
pub use ActiveSpeakerDetector;
// Algorithm constants — ported verbatim from mediasoup's ActiveSpeakerObserver.
// `pub(crate)` so sibling modules can share them without exposing to users.
/// Immediate time-scale log-ratio threshold (mediasoup: C1).
pub const C1: f64 = 3.0;
/// Medium time-scale log-ratio threshold (mediasoup: C2).
pub const C2: f64 = 2.0;
/// Long time-scale log-ratio threshold; zero = long window disabled (mediasoup: C3).
pub const C3: f64 = 0.0;
/// Immediate subband count (mediasoup: N1).
pub const N1: u32 = 13;
/// Medium subband count (mediasoup: N2).
pub const N2: u32 = 5;
/// Long subband count (mediasoup: N3).
pub const N3: u32 = 10;
/// Milliseconds before a stale level entry is replaced with silence (mediasoup: LevelIdleTimeout).
pub const LEVEL_IDLE_TIMEOUT_MS: u64 = 40;
/// Milliseconds before an idle non-dominant speaker is paused (mediasoup: SpeakerIdleTimeout).
pub const SPEAKER_IDLE_TIMEOUT_MS: u64 = 60 * 60 * 1000;
/// Long-window threshold used when computing `longs` from `mediums` (mediasoup: LongThreashold).
pub const LONG_THRESHOLD: u8 = 4;
/// Maximum RFC 6464 audio-level value (mediasoup: MaxLevel).
pub const MAX_LEVEL: u8 = 127;
/// Minimum RFC 6464 audio-level value (mediasoup: MinLevel).
pub const MIN_LEVEL: u8 = 0;
/// Window length for adaptive minimum-level estimation (mediasoup: MinLevelWindowLen = 15*1000/20).
pub const MIN_LEVEL_WINDOW_LEN: u32 = 750;
/// Threshold for medium-window immediate-to-medium downsampling (mediasoup: MediumThreshold).
pub const MEDIUM_THRESHOLD: u8 = 7;
/// Subunit size for the immediate buffer (mediasoup: SubunitLengthN1 = (127-0+13-1)/13).
pub const SUBUNIT_LENGTH_N1: u8 = 10;
/// Immediate-buffer length: covers 1 second at 20ms cadence × 5 subbands (mediasoup: ImmediateBuffLen).
pub const IMMEDIATE_BUFF_LEN: usize = 50;
/// Medium-buffer length (mediasoup: MediumsBuffLen).
pub const MEDIUMS_BUFF_LEN: usize = 10;
/// Long-buffer length (mediasoup: LongsBuffLen).
pub const LONGS_BUFF_LEN: usize = 1;
/// Levels ring-buffer length (mediasoup: LevelsBuffLen).
pub const LEVELS_BUFF_LEN: usize = 50;
/// Floor score; prevents log(0) in ratio computation (mediasoup: MinActivityScore).
pub const MIN_ACTIVITY_SCORE: f64 = 1.0e-10;
/// Recommended tick interval matching mediasoup's production tuning.
///
/// Call [`ActiveSpeakerDetector::tick`] at this cadence for best results.
pub const TICK_INTERVAL: Duration = from_millis;