1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
//! Pure-Rust dominant speaker identification for WebRTC applications.
//!
//! This crate implements the three-time-scale subband comparison algorithm
//! described in Volfin & Cohen, "Dominant Speaker Identification for
//! Multipoint Videoconferencing", IEEE 2012. The implementation follows
//! mediasoup's C++ `ActiveSpeakerObserver` for constants and Jitsi's Java
//! `DominantSpeakerIdentification` for the overall structure.
//!
//! Feed it RFC 6464 audio-level observations and it tells you who is talking.
//! No FFI, no WebRTC stack dependency, no unsafe code.
//!
//! # Quick start
//!
//! ```rust
//! use std::time::{Duration, Instant};
//! use dominant_speaker::{ActiveSpeakerDetector, TICK_INTERVAL};
//!
//! let mut detector = ActiveSpeakerDetector::new();
//! let t0 = Instant::now();
//!
//! // Register two participants.
//! detector.add_peer(1, t0);
//! detector.add_peer(2, t0);
//!
//! // Feed audio levels (0 = loud, 127 = silent, per RFC 6464).
//! // Simulate peer 1 speaking for 2 seconds.
//! let mut t = t0;
//! while t < t0 + Duration::from_millis(2000) {
//! detector.record_level(1, 5, t); // peer 1: active (low dBov = loud)
//! detector.record_level(2, 127, t); // peer 2: silent
//! t += Duration::from_millis(20);
//! }
//!
//! // Call tick() on a timer — returns Some(SpeakerChange) only on speaker change.
//! if let Some(change) = detector.tick(t0 + TICK_INTERVAL) {
//! println!("Dominant speaker: peer {}", change.peer_id);
//! }
//! ```
//!
//! See the [README](https://github.com/anatolykoptev/rust-dominant-speaker)
//! for algorithm details, constants reference, and prior art.
pub use ActiveSpeakerDetector;
/// Emitted by [`ActiveSpeakerDetector::tick`] when the dominant speaker changes.
/// Convenience alias using `u64` peer IDs — backward-compatible with v0.1.x.
pub type DefaultDetector = ;
/// Tunable constants for the dominant-speaker election.
///
/// Defaults match mediasoup's production constants exactly.
///
/// # Example
///
/// ```rust
/// use dominant_speaker::{ActiveSpeakerDetector, DetectorConfig};
/// use std::time::Duration;
///
/// // Use defaults (mediasoup-identical behaviour).
/// let default_detector: ActiveSpeakerDetector<u64> = ActiveSpeakerDetector::new();
///
/// // Raise C1/C2 for a low-bitrate / mobile deployment: fewer speaker switches.
/// let config = DetectorConfig {
/// c1: 5.0,
/// c2: 4.0,
/// tick_interval: Duration::from_millis(500),
/// ..DetectorConfig::default()
/// };
/// let tuned_detector: ActiveSpeakerDetector<u64> = ActiveSpeakerDetector::with_config(config);
/// ```
///
/// # Serde
///
/// Enable the `serde` feature to serialize/deserialize this struct.
/// `tick_interval` is serialized as milliseconds (`u64`).
///
/// ```rust,ignore
/// // Requires `dominant_speaker` with `serde` feature and `serde_json` dev-dep.
/// use dominant_speaker::DetectorConfig;
/// let config = DetectorConfig::default();
/// let json = serde_json::to_string(&config).unwrap();
/// let back: DetectorConfig = serde_json::from_str(&json).unwrap();
/// assert!((back.c1 - config.c1).abs() < f64::EPSILON);
/// ```
// Algorithm constants — ported verbatim from mediasoup's ActiveSpeakerObserver.
// `pub(crate)` so sibling modules can share them without exposing to users.
/// Immediate time-scale log-ratio threshold (mediasoup: C1).
pub const C1: f64 = 3.0;
/// Medium time-scale log-ratio threshold (mediasoup: C2).
pub const C2: f64 = 2.0;
/// Long time-scale log-ratio threshold; zero = long window disabled (mediasoup: C3).
pub const C3: f64 = 0.0;
/// Immediate subband count (mediasoup: N1).
pub const N1: u32 = 13;
/// Medium subband count (mediasoup: N2).
pub const N2: u32 = 5;
/// Long subband count (mediasoup: N3).
pub const N3: u32 = 10;
/// Milliseconds before a stale level entry is replaced with silence (mediasoup: LevelIdleTimeout).
pub const LEVEL_IDLE_TIMEOUT_MS: u64 = 40;
/// Milliseconds before an idle non-dominant speaker is paused (mediasoup: SpeakerIdleTimeout).
pub const SPEAKER_IDLE_TIMEOUT_MS: u64 = 60 * 60 * 1000;
/// Long-window threshold used when computing `longs` from `mediums` (mediasoup: LongThreashold).
pub const LONG_THRESHOLD: u8 = 4;
/// Maximum RFC 6464 audio-level value (mediasoup: MaxLevel).
pub const MAX_LEVEL: u8 = 127;
/// Minimum RFC 6464 audio-level value (mediasoup: MinLevel).
pub const MIN_LEVEL: u8 = 0;
/// Window length for adaptive minimum-level estimation (mediasoup: MinLevelWindowLen = 15*1000/20).
pub const MIN_LEVEL_WINDOW_LEN: u32 = 750;
/// Threshold for medium-window immediate-to-medium downsampling (mediasoup: MediumThreshold).
pub const MEDIUM_THRESHOLD: u8 = 7;
/// Immediate-buffer length: covers 1 second at 20ms cadence × 5 subbands (mediasoup: ImmediateBuffLen).
pub const IMMEDIATE_BUFF_LEN: usize = 50;
/// Medium-buffer length (mediasoup: MediumsBuffLen).
pub const MEDIUMS_BUFF_LEN: usize = 10;
/// Long-buffer length (mediasoup: LongsBuffLen).
pub const LONGS_BUFF_LEN: usize = 1;
/// Levels ring-buffer length (mediasoup: LevelsBuffLen).
pub const LEVELS_BUFF_LEN: usize = 50;
/// Floor score; prevents log(0) in ratio computation (mediasoup: MinActivityScore).
pub const MIN_ACTIVITY_SCORE: f64 = 1.0e-10;
/// Recommended tick interval matching mediasoup's production tuning.
///
/// Call [`ActiveSpeakerDetector::tick`] at this cadence for best results.
pub const TICK_INTERVAL: Duration = from_millis;
/// Compute the subband width for a given N1 value.
///
/// Formula: `ceil(128 / n1)`. Mediasoup hard-codes 10 for N1=13 —
/// `ceil(128/13) = 10`. This function generalises it for custom configs.
pub