zng_view_api/audio.rs
1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11use crate::api_extension::{ApiExtensionId, ApiExtensionPayload};
12
13crate::declare_id! {
14 /// Audio device ID.
15 ///
16 /// In the View Process this is mapped to a system id.
17 ///
18 /// In the App Process this is mapped to an unique id, but does not survived View crashes.
19 ///
20 /// The View Process defines the ID.
21 pub struct AudioDeviceId(_);
22
23 /// Id of a decoded or on demand decoding audio track in the cache.
24 ///
25 /// The View Process defines the ID.
26 pub struct AudioId(_);
27
28 /// Audio playback stream ID.
29 ///
30 /// In the View Process this is mapped to a system id.
31 ///
32 /// In the App Process this is an unique id that survives View crashes.
33 ///
34 /// The App Process defines the ID.
35 pub struct AudioOutputId(_);
36
37 /// Audio playback request ID.
38 ///
39 /// The View Process defines the ID.
40 pub struct AudioPlayId(_);
41
42 /// Id of an audio encode task.
43 ///
44 /// The View Process defines the ID.
45 pub struct AudioEncodeId(_);
46}
47
48/// Info about an input or output device.
49#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[non_exhaustive]
51pub struct AudioDeviceInfo {
52 /// Device display name.
53 pub name: Txt,
54 /// Device input/output capabilities.
55 pub capabilities: AudioDeviceCapability,
56 /// Input stream modes this device can produce.
57 pub input_modes: Vec<AudioStreamMode>,
58 /// Output stream modes this device can consume.
59 pub output_modes: Vec<AudioStreamMode>,
60}
61
62bitflags! {
63 /// Represents audio device input/output capabilities.
64 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65 pub struct AudioDeviceCapability: u8 {
66 /// Device can generate audio streams.
67 const INPUT = 0b01;
68 /// Device can consume audio streams.
69 const OUTPUT = 0b11;
70 }
71}
72
73/// Represents steam capability of an audio device.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
75#[non_exhaustive]
76pub struct AudioStreamMode {
77 /// Number of audio channels.
78 pub channels: NonZeroU16,
79 /// Minimum and maximum sample rate.
80 pub sample_rate: SampleRate,
81 /// Minimum and maximum supported buffer size.
82 pub buffer_size: BufferSize,
83}
84
85/// Represents the minimum and maximum sample rate per audio channel.
86///
87/// Values are in samples processed per second.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
89pub struct SampleRate {
90 /// Minimum, inclusive.
91 pub min: u32,
92 /// Maximum, inclusive.
93 pub max: u32,
94}
95
96/// Represents the minimum and maximum supported buffer size for the device.
97#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
98#[non_exhaustive]
99pub enum BufferSize {
100 /// Range in frames per second.
101 Range {
102 /// Minimum, inclusive.
103 min: u32,
104 /// Maximum, inclusive.
105 max: u32,
106 },
107 /// Platform cannot describe buffer size for this device.
108 Unknown,
109}
110
111/// Represent an audio load/decode request.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113#[non_exhaustive]
114pub struct AudioRequest<D> {
115 /// Audio data format.
116 pub format: AudioDataFormat,
117
118 /// Audio data.
119 ///
120 /// Bytes layout depends on the `format`, data structure is [`IpcReadHandle`] or [`IpcReceiver<IpcBytes>`] in the view API.
121 ///
122 /// [`IpcReadHandle`]: zng_task::channel::IpcReadHandle
123 /// [`IpcReceiver<IpcBytes>`]: zng_task::channel::IpcReceiver
124 pub data: D,
125
126 /// Maximum allowed decoded size in bytes.
127 ///
128 /// View-process will avoid decoding and return an error if the track would exceed this limit.
129 pub max_decoded_len: u64,
130
131 /// Defines what tracks are decoded from multi image containers.
132 pub tracks: AudioTracksMode,
133
134 /// Audio is a track (or subtree) of this other audio.
135 ///
136 /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
137 /// an already decoded image is requested after a respawn to maintain the original container structure.
138 pub parent: Option<AudioTrackMetadata>,
139}
140impl<D> AudioRequest<D> {
141 /// New.
142 pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
143 Self {
144 format,
145 data,
146 max_decoded_len,
147 tracks: AudioTracksMode::PRIMARY,
148 parent: None,
149 }
150 }
151}
152
153/// Format of the audio bytes.
154#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
155#[non_exhaustive]
156pub enum AudioDataFormat {
157 /// Data is already decoded stream of interleaved `f32` samples.
158 InterleavedF32 {
159 /// Number of channels interleaved in the track.
160 channel_count: u16,
161 /// Samples per second.
162 ///
163 /// A sample is a single sequence of `channel_count`.
164 sample_rate: u32,
165 /// Total duration of the track, if it is known.
166 total_duration: Option<Duration>,
167 },
168
169 /// The audio is encoded.
170 ///
171 /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
172 /// is unknown or the file header does not match.
173 FileExtension(Txt),
174
175 /// The audio is encoded.
176 ///
177 /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
178 /// is unknown or the file header does not match.
179 MimeType(Txt),
180
181 /// The image is encoded.
182 ///
183 /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
184 Unknown,
185}
186impl From<Txt> for AudioDataFormat {
187 fn from(ext_or_mime: Txt) -> Self {
188 if ext_or_mime.contains('/') {
189 AudioDataFormat::MimeType(ext_or_mime)
190 } else {
191 AudioDataFormat::FileExtension(ext_or_mime)
192 }
193 }
194}
195impl From<&str> for AudioDataFormat {
196 fn from(ext_or_mime: &str) -> Self {
197 Txt::from_str(ext_or_mime).into()
198 }
199}
200
201/// Represents an audio codec capability.
202///
203/// This type will be used in the next breaking release of the view API.
204#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
205#[non_exhaustive]
206pub struct AudioFormat {
207 /// Display name of the format.
208 pub display_name: Txt,
209
210 /// Media types (MIME) associated with the format.
211 ///
212 /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
213 pub media_type_suffixes: Txt,
214
215 /// Common file extensions associated with the format.
216 ///
217 /// Lowercase, without dot, comma separated if there is more than one.
218 pub file_extensions: Txt,
219
220 /// Capabilities of this format.
221 pub capabilities: AudioFormatCapability,
222}
223impl AudioFormat {
224 /// From static str.
225 ///
226 /// # Panics
227 ///
228 /// Panics if `media_type_suffixes` not ASCII.
229 pub const fn from_static(
230 display_name: &'static str,
231 media_type_suffixes: &'static str,
232 file_extensions: &'static str,
233 capabilities: AudioFormatCapability,
234 ) -> Self {
235 assert!(media_type_suffixes.is_ascii());
236 Self {
237 display_name: Txt::from_static(display_name),
238 media_type_suffixes: Txt::from_static(media_type_suffixes),
239 file_extensions: Txt::from_static(file_extensions),
240 capabilities,
241 }
242 }
243
244 /// Iterate over media type suffixes.
245 pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
246 self.media_type_suffixes.split(',').map(|e| e.trim())
247 }
248
249 /// Iterate over full media types, with `"image/"` prefix.
250 pub fn media_types(&self) -> impl Iterator<Item = Txt> {
251 self.media_type_suffixes_iter().map(Txt::from_str)
252 }
253
254 /// Iterate over extensions.
255 pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
256 self.file_extensions.split(',').map(|e| e.trim())
257 }
258
259 /// Checks if `f` matches any of the mime types or any of the file extensions.
260 ///
261 /// File extensions comparison ignores dot and ASCII case.
262 pub fn matches(&self, f: &str) -> bool {
263 let f = f.strip_prefix('.').unwrap_or(f);
264 let f = f.strip_prefix("audio/").unwrap_or(f);
265 self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
266 }
267}
268
269bitflags! {
270 /// Capabilities of an [`AudioFormat`] implementation.
271 ///
272 /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
273 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
274 pub struct AudioFormatCapability: u8 {
275 /// View-process can encode audio in this format.
276 const ENCODE = 0b_0000_0001;
277 }
278}
279
280/// Represent a image encode request.
281#[derive(Debug, Clone, Serialize, Deserialize)]
282#[non_exhaustive]
283pub struct AudioEncodeRequest {
284 /// Audio to encode.
285 pub id: AudioId,
286
287 /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
288 pub format: Txt,
289
290 /// The audio to encode.
291 pub mix: AudioMix,
292}
293impl AudioEncodeRequest {
294 /// New.
295 pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
296 Self { id, format, mix }
297 }
298}
299
300/// Represents decoded header metadata about an audio track.
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302#[non_exhaustive]
303pub struct AudioMetadata {
304 /// Audio ID.
305 pub id: AudioId,
306
307 /// Number of channels interleaved in the track.
308 pub channel_count: u16,
309 /// Samples per second.
310 ///
311 /// A sample is a single sequence of `channel_count`.
312 pub sample_rate: u32,
313 /// Total duration of the track, if it is known.
314 pub total_duration: Option<Duration>,
315
316 /// Track is an entry (or subtree) of this other track.
317 pub parent: Option<AudioTrackMetadata>,
318
319 /// Custom metadata.
320 pub extensions: Vec<(ApiExtensionId, ApiExtensionPayload)>,
321}
322impl AudioMetadata {
323 /// New.
324 pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
325 Self {
326 id,
327 channel_count,
328 sample_rate,
329 total_duration: None,
330 parent: None,
331 extensions: vec![],
332 }
333 }
334}
335/// Invalid initial value.
336impl Default for AudioMetadata {
337 fn default() -> Self {
338 Self {
339 id: AudioId::INVALID,
340 channel_count: Default::default(),
341 sample_rate: Default::default(),
342 total_duration: Default::default(),
343 parent: Default::default(),
344 extensions: vec![],
345 }
346 }
347}
348
349/// Represents decoded header metadata about a track position in the container represented by another audio.
350#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
351#[non_exhaustive]
352pub struct AudioTrackMetadata {
353 /// Image this one belongs too.
354 ///
355 /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
356 pub parent: AudioId,
357 /// Sort index of the track in the list of tracks.
358 pub index: usize,
359}
360impl AudioTrackMetadata {
361 /// New.
362 pub fn new(parent: AudioId, index: usize) -> Self {
363 Self { parent, index }
364 }
365}
366
367/// Represents a partial or fully decoded audio.
368///
369/// See [`Event::AudioDecoded`] for more details.
370///
371/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
372#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
373#[non_exhaustive]
374pub struct AudioDecoded {
375 /// The audio track ID.
376 ///
377 /// An [`AudioMetadata`] for this ID was already notified before this event.
378 pub id: AudioId,
379
380 /// Offset of the `chunk` on the track.
381 ///
382 /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
383 ///
384 /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
385 ///
386 /// [`channel_count`]: AudioMetadata::channel_count
387 pub offset: usize,
388
389 /// Interleaved `f32` samples.
390 pub chunk: IpcBytesCast<f32>,
391
392 /// If the `chunk` is actually the full decoded audio.
393 ///
394 /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
395 /// will read directly from the cache.
396 ///
397 /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
398 /// Depending on the request the audio may never be fully cached, always decoding again on replay.
399 pub is_full: bool,
400}
401impl AudioDecoded {
402 /// New.
403 pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
404 Self {
405 id,
406 offset: 0,
407 chunk,
408 is_full: false,
409 }
410 }
411}
412/// Invalid initial value.
413impl Default for AudioDecoded {
414 fn default() -> Self {
415 Self {
416 id: AudioId::INVALID,
417 offset: Default::default(),
418 chunk: Default::default(),
419 is_full: Default::default(),
420 }
421 }
422}
423
424/// Represents a connection request to an audio output device.
425#[derive(Debug, Clone, Serialize, Deserialize)]
426#[non_exhaustive]
427pub struct AudioOutputRequest {
428 /// ID that will identify the new output.
429 pub id: AudioOutputId,
430
431 /// Initial config.
432 pub config: AudioOutputConfig,
433}
434impl AudioOutputRequest {
435 /// New.
436 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
437 Self { id, config }
438 }
439}
440
441/// Represents an audio playback update request.
442#[derive(Debug, Clone, Serialize, Deserialize)]
443#[non_exhaustive]
444pub struct AudioOutputUpdateRequest {
445 /// The output stream.
446 pub id: AudioOutputId,
447 /// New config.
448 pub config: AudioOutputConfig,
449}
450impl AudioOutputUpdateRequest {
451 /// New.
452 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
453 Self { id, config }
454 }
455}
456
457/// Represents an audio output stream capabilities.
458///
459/// Any audio played on this output is automatically converted to the channel count and sample rate.
460#[derive(Debug, Clone, Serialize, Deserialize)]
461#[non_exhaustive]
462pub struct AudioOutputOpenData {
463 /// Number of channels interleaved supported by this output.
464 pub channel_count: u16,
465 /// Samples per second.
466 ///
467 /// A sample is a single sequence of `channel_count`.
468 pub sample_rate: u32,
469}
470impl AudioOutputOpenData {
471 /// New.
472 pub fn new(channel_count: u16, sample_rate: u32) -> Self {
473 Self {
474 channel_count,
475 sample_rate,
476 }
477 }
478}
479
480/// Audio playback config.
481#[derive(Debug, Clone, Serialize, Deserialize)]
482#[non_exhaustive]
483pub struct AudioOutputConfig {
484 /// Playback state.
485 pub state: AudioOutputState,
486
487 /// Volume of the sound.
488 ///
489 /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
490 pub volume: Factor,
491
492 /// Speed of the sound.
493 ///
494 /// This is a multiplier of the playback speed and pitch.
495 ///
496 /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
497 /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
498 pub speed: Factor,
499}
500impl AudioOutputConfig {
501 /// New.
502 pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
503 Self { state, volume, speed }
504 }
505}
506
507/// Represents the playback state if an audio output stream.
508#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
509#[non_exhaustive]
510pub enum AudioOutputState {
511 /// Audio is sent to the device for playback as in the sequence they are cued.
512 Playing,
513 /// Audio playback is paused, cue requests are buffered.
514 Paused,
515 /// Audio playback is paused, all current cue requests are dropped.
516 Stopped,
517}
518impl AudioOutputState {
519 /// If is [`Playing`].
520 ///
521 /// [`Playing`]: Self::Playing
522 pub fn is_playing(&self) -> bool {
523 matches!(self, Self::Playing)
524 }
525
526 /// If is [`Paused`].
527 ///
528 /// [`Paused`]: Self::Paused
529 pub fn is_paused(&self) -> bool {
530 matches!(self, Self::Paused)
531 }
532
533 /// If is [`Stopped`].
534 ///
535 /// [`Stopped`]: Self::Stopped
536 pub fn is_stopped(&self) -> bool {
537 matches!(self, Self::Stopped)
538 }
539}
540
541/// Represents an audio playback request.
542#[derive(Debug, Clone, Serialize, Deserialize)]
543#[non_exhaustive]
544pub struct AudioPlayRequest {
545 /// The audio output stream.
546 ///
547 /// If another audio is already playing this request is appended to the end.
548 pub output: AudioOutputId,
549
550 /// The audio.
551 pub mix: AudioMix,
552}
553impl AudioPlayRequest {
554 /// New.
555 pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
556 Self { output, mix }
557 }
558}
559
560/// Represents an audio source.
561#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
562#[non_exhaustive]
563pub struct AudioMix {
564 /// Silent start padding.
565 pub delay: Duration,
566 /// Total duration.
567 ///
568 /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
569 /// plays silent, if layers exceed the duration the end is clipped.
570 pub total_duration: Option<Duration>,
571
572 /// Components of this mix.
573 ///
574 /// Each layer applies to the previous.
575 pub layers: Vec<AudioMixLayer>,
576}
577impl AudioMix {
578 /// New empty.
579 pub fn new() -> Self {
580 Self {
581 delay: Duration::ZERO,
582 total_duration: None,
583 layers: vec![],
584 }
585 }
586}
587impl Default for AudioMix {
588 fn default() -> Self {
589 Self::new()
590 }
591}
592
593/// Represents an audio source component.
594#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
595#[non_exhaustive]
596pub enum AudioMixLayer {
597 /// Play the cached audio.
598 ///
599 /// The audio samples are adapted to the output format and added to the under layers result.
600 Audio {
601 /// The audio.
602 audio: AudioId,
603 /// Clip the start of the audio.
604 ///
605 /// Set to [`Duration::ZERO`] to play from the start.
606 skip: Duration,
607 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
608 ///
609 /// Set to [`Duration::MAX`] to play to the end.
610 take: Duration,
611 },
612 /// Play the mix.
613 ///
614 /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
615 AudioMix {
616 /// The inner mix.
617 mix: AudioMix,
618 /// Clip the start of the audio.
619 ///
620 /// Set to [`Duration::ZERO`] to play from the start.
621 skip: Duration,
622 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
623 ///
624 /// Set to [`Duration::MAX`] to play to the end.
625 take: Duration,
626 },
627
628 /// Linear volume transition.
629 ///
630 /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
631 /// back to the output stream volume after the end, unless another volume control layer is in effect.
632 VolumeLinear {
633 /// Start time.
634 start: Duration,
635 /// Transition duration.
636 ///
637 /// The effect ends at `start + duration` time.
638 duration: Duration,
639
640 /// Volume at the start.
641 start_volume: Factor,
642 /// Volume at the end,
643 end_volume: Factor,
644 },
645
646 /// Generate a sine wave sound.
647 SineWave {
648 /// Sine frequency.
649 frequency: f32,
650 /// Duration of the sample.
651 duration: Duration,
652 },
653}
654
655bitflags! {
656 /// Defines what tracks are decoded from multi track containers.
657 #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
658 pub struct AudioTracksMode: u8 {
659 /// Decodes all tracks.
660 const TRACKS = 0b0001;
661 /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
662 ///
663 /// Note that this is 0, empty.
664 const PRIMARY = 0;
665 }
666}