Skip to main content

zng_view_api/
audio.rs

1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11use crate::api_extension::{ApiExtensionId, ApiExtensionPayload};
12
13crate::declare_id! {
14    /// Audio device ID.
15    ///
16    /// In the View Process this is mapped to a system id.
17    ///
18    /// In the App Process this is mapped to an unique id, but does not survived View crashes.
19    ///
20    /// The View Process defines the ID.
21    pub struct AudioDeviceId(_);
22
23    /// Id of a decoded or on demand decoding audio track in the cache.
24    ///
25    /// The View Process defines the ID.
26    pub struct AudioId(_);
27
28    /// Audio playback stream ID.
29    ///
30    /// In the View Process this is mapped to a system id.
31    ///
32    /// In the App Process this is an unique id that survives View crashes.
33    ///
34    /// The App Process defines the ID.
35    pub struct AudioOutputId(_);
36
37    /// Audio playback request ID.
38    ///
39    /// The View Process defines the ID.
40    pub struct AudioPlayId(_);
41
42    /// Id of an audio encode task.
43    ///
44    /// The View Process defines the ID.
45    pub struct AudioEncodeId(_);
46}
47
48/// Info about an input or output device.
49#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[non_exhaustive]
51pub struct AudioDeviceInfo {
52    /// Device display name.
53    pub name: Txt,
54    /// Device input/output capabilities.
55    pub capabilities: AudioDeviceCapability,
56    /// Input stream modes this device can produce.
57    pub input_modes: Vec<AudioStreamMode>,
58    /// Output stream modes this device can consume.
59    pub output_modes: Vec<AudioStreamMode>,
60}
61
62bitflags! {
63    /// Represents audio device input/output capabilities.
64    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65    pub struct AudioDeviceCapability: u8 {
66        /// Device can generate audio streams.
67        const INPUT = 0b01;
68        /// Device can consume audio streams.
69        const OUTPUT = 0b11;
70    }
71}
72
73/// Represents steam capability of an audio device.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
75#[non_exhaustive]
76pub struct AudioStreamMode {
77    /// Number of audio channels.
78    pub channels: NonZeroU16,
79    /// Minimum and maximum sample rate.
80    pub sample_rate: SampleRate,
81    /// Minimum and maximum supported buffer size.
82    pub buffer_size: BufferSize,
83}
84
85/// Represents the minimum and maximum sample rate per audio channel.
86///
87/// Values are in samples processed per second.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
89pub struct SampleRate {
90    /// Minimum, inclusive.
91    pub min: u32,
92    /// Maximum, inclusive.
93    pub max: u32,
94}
95
96/// Represents the minimum and maximum supported buffer size for the device.
97#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
98#[non_exhaustive]
99pub enum BufferSize {
100    /// Range in frames per second.
101    Range {
102        /// Minimum, inclusive.
103        min: u32,
104        /// Maximum, inclusive.
105        max: u32,
106    },
107    /// Platform cannot describe buffer size for this device.
108    Unknown,
109}
110
111/// Represent an audio load/decode request.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113#[non_exhaustive]
114pub struct AudioRequest<D> {
115    /// Audio data format.
116    pub format: AudioDataFormat,
117
118    /// Audio data.
119    ///
120    /// Bytes layout depends on the `format`, data structure is [`IpcReadHandle`] or [`IpcReceiver<IpcBytes>`] in the view API.
121    ///
122    /// [`IpcReadHandle`]: zng_task::channel::IpcReadHandle
123    /// [`IpcReceiver<IpcBytes>`]: zng_task::channel::IpcReceiver
124    pub data: D,
125
126    /// Maximum allowed decoded size in bytes.
127    ///
128    /// View-process will avoid decoding and return an error if the track would exceed this limit.
129    pub max_decoded_len: u64,
130
131    /// Defines what tracks are decoded from multi image containers.
132    pub tracks: AudioTracksMode,
133
134    /// Audio is a track (or subtree) of this other audio.
135    ///
136    /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
137    /// an already decoded image is requested after a respawn to maintain the original container structure.
138    pub parent: Option<AudioTrackMetadata>,
139}
140impl<D> AudioRequest<D> {
141    /// New.
142    pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
143        Self {
144            format,
145            data,
146            max_decoded_len,
147            tracks: AudioTracksMode::PRIMARY,
148            parent: None,
149        }
150    }
151}
152
153/// Format of the audio bytes.
154#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
155#[non_exhaustive]
156pub enum AudioDataFormat {
157    /// Data is already decoded stream of interleaved `f32` samples.
158    InterleavedF32 {
159        /// Number of channels interleaved in the track.
160        channel_count: u16,
161        /// Samples per second.
162        ///
163        /// A sample is a single sequence of `channel_count`.
164        sample_rate: u32,
165        /// Total duration of the track, if it is known.
166        total_duration: Option<Duration>,
167    },
168
169    /// The audio is encoded.
170    ///
171    /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
172    /// is unknown or the file header does not match.
173    FileExtension(Txt),
174
175    /// The audio is encoded.
176    ///
177    /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
178    /// is unknown or the file header does not match.
179    MimeType(Txt),
180
181    /// The image is encoded.
182    ///
183    /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
184    Unknown,
185}
186impl From<Txt> for AudioDataFormat {
187    fn from(ext_or_mime: Txt) -> Self {
188        if ext_or_mime.contains('/') {
189            AudioDataFormat::MimeType(ext_or_mime)
190        } else {
191            AudioDataFormat::FileExtension(ext_or_mime)
192        }
193    }
194}
195impl From<&str> for AudioDataFormat {
196    fn from(ext_or_mime: &str) -> Self {
197        Txt::from_str(ext_or_mime).into()
198    }
199}
200
201/// Represents an audio codec capability.
202///
203/// This type will be used in the next breaking release of the view API.
204#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
205#[non_exhaustive]
206pub struct AudioFormat {
207    /// Display name of the format.
208    pub display_name: Txt,
209
210    /// Media types (MIME) associated with the format.
211    ///
212    /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
213    pub media_type_suffixes: Txt,
214
215    /// Common file extensions associated with the format.
216    ///
217    /// Lowercase, without dot, comma separated if there is more than one.
218    pub file_extensions: Txt,
219
220    /// Capabilities of this format.
221    pub capabilities: AudioFormatCapability,
222}
223impl AudioFormat {
224    /// From static str.
225    ///
226    /// # Panics
227    ///
228    /// Panics if `media_type_suffixes` not ASCII.
229    pub const fn from_static(
230        display_name: &'static str,
231        media_type_suffixes: &'static str,
232        file_extensions: &'static str,
233        capabilities: AudioFormatCapability,
234    ) -> Self {
235        assert!(media_type_suffixes.is_ascii());
236        Self {
237            display_name: Txt::from_static(display_name),
238            media_type_suffixes: Txt::from_static(media_type_suffixes),
239            file_extensions: Txt::from_static(file_extensions),
240            capabilities,
241        }
242    }
243
244    /// Iterate over media type suffixes.
245    pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
246        self.media_type_suffixes.split(',').map(|e| e.trim())
247    }
248
249    /// Iterate over full media types, with `"image/"` prefix.
250    pub fn media_types(&self) -> impl Iterator<Item = Txt> {
251        self.media_type_suffixes_iter().map(Txt::from_str)
252    }
253
254    /// Iterate over extensions.
255    pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
256        self.file_extensions.split(',').map(|e| e.trim())
257    }
258
259    /// Checks if `f` matches any of the mime types or any of the file extensions.
260    ///
261    /// File extensions comparison ignores dot and ASCII case.
262    pub fn matches(&self, f: &str) -> bool {
263        let f = f.strip_prefix('.').unwrap_or(f);
264        let f = f.strip_prefix("audio/").unwrap_or(f);
265        self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
266    }
267}
268
269bitflags! {
270    /// Capabilities of an [`AudioFormat`] implementation.
271    ///
272    /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
273    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
274    pub struct AudioFormatCapability: u8 {
275        /// View-process can encode audio in this format.
276        const ENCODE = 0b_0000_0001;
277    }
278}
279
280/// Represent a image encode request.
281#[derive(Debug, Clone, Serialize, Deserialize)]
282#[non_exhaustive]
283pub struct AudioEncodeRequest {
284    /// Audio to encode.
285    pub id: AudioId,
286
287    /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
288    pub format: Txt,
289
290    /// The audio to encode.
291    pub mix: AudioMix,
292}
293impl AudioEncodeRequest {
294    /// New.
295    pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
296        Self { id, format, mix }
297    }
298}
299
300/// Represents decoded header metadata about an audio track.
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302#[non_exhaustive]
303pub struct AudioMetadata {
304    /// Audio ID.
305    pub id: AudioId,
306
307    /// Number of channels interleaved in the track.
308    pub channel_count: u16,
309    /// Samples per second.
310    ///
311    /// A sample is a single sequence of `channel_count`.
312    pub sample_rate: u32,
313    /// Total duration of the track, if it is known.
314    pub total_duration: Option<Duration>,
315
316    /// Track is an entry (or subtree) of this other track.
317    pub parent: Option<AudioTrackMetadata>,
318
319    /// Custom metadata.
320    pub extensions: Vec<(ApiExtensionId, ApiExtensionPayload)>,
321}
322impl AudioMetadata {
323    /// New.
324    pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
325        Self {
326            id,
327            channel_count,
328            sample_rate,
329            total_duration: None,
330            parent: None,
331            extensions: vec![],
332        }
333    }
334}
335/// Invalid initial value.
336impl Default for AudioMetadata {
337    fn default() -> Self {
338        Self {
339            id: AudioId::INVALID,
340            channel_count: Default::default(),
341            sample_rate: Default::default(),
342            total_duration: Default::default(),
343            parent: Default::default(),
344            extensions: vec![],
345        }
346    }
347}
348
349/// Represents decoded header metadata about a track position in the container represented by another audio.
350#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
351#[non_exhaustive]
352pub struct AudioTrackMetadata {
353    /// Image this one belongs too.
354    ///
355    /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
356    pub parent: AudioId,
357    /// Sort index of the track in the list of tracks.
358    pub index: usize,
359}
360impl AudioTrackMetadata {
361    /// New.
362    pub fn new(parent: AudioId, index: usize) -> Self {
363        Self { parent, index }
364    }
365}
366
367/// Represents a partial or fully decoded audio.
368///
369/// See [`Event::AudioDecoded`] for more details.
370///
371/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
372#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
373#[non_exhaustive]
374pub struct AudioDecoded {
375    /// The audio track ID.
376    ///
377    /// An [`AudioMetadata`] for this ID was already notified before this event.
378    pub id: AudioId,
379
380    /// Offset of the `chunk` on the track.
381    ///
382    /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
383    ///
384    /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
385    ///
386    /// [`channel_count`]: AudioMetadata::channel_count
387    pub offset: usize,
388
389    /// Interleaved `f32` samples.
390    pub chunk: IpcBytesCast<f32>,
391
392    /// If the `chunk` is actually the full decoded audio.
393    ///
394    /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
395    /// will read directly from the cache.
396    ///
397    /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
398    /// Depending on the request the audio may never be fully cached, always decoding again on replay.
399    pub is_full: bool,
400}
401impl AudioDecoded {
402    /// New.
403    pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
404        Self {
405            id,
406            offset: 0,
407            chunk,
408            is_full: false,
409        }
410    }
411}
412/// Invalid initial value.
413impl Default for AudioDecoded {
414    fn default() -> Self {
415        Self {
416            id: AudioId::INVALID,
417            offset: Default::default(),
418            chunk: Default::default(),
419            is_full: Default::default(),
420        }
421    }
422}
423
424/// Represents a connection request to an audio output device.
425#[derive(Debug, Clone, Serialize, Deserialize)]
426#[non_exhaustive]
427pub struct AudioOutputRequest {
428    /// ID that will identify the new output.
429    pub id: AudioOutputId,
430
431    /// Initial config.
432    pub config: AudioOutputConfig,
433}
434impl AudioOutputRequest {
435    /// New.
436    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
437        Self { id, config }
438    }
439}
440
441/// Represents an audio playback update request.
442#[derive(Debug, Clone, Serialize, Deserialize)]
443#[non_exhaustive]
444pub struct AudioOutputUpdateRequest {
445    /// The output stream.
446    pub id: AudioOutputId,
447    /// New config.
448    pub config: AudioOutputConfig,
449}
450impl AudioOutputUpdateRequest {
451    /// New.
452    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
453        Self { id, config }
454    }
455}
456
457/// Represents an audio output stream capabilities.
458///
459/// Any audio played on this output is automatically converted to the channel count and sample rate.
460#[derive(Debug, Clone, Serialize, Deserialize)]
461#[non_exhaustive]
462pub struct AudioOutputOpenData {
463    /// Number of channels interleaved supported by this output.
464    pub channel_count: u16,
465    /// Samples per second.
466    ///
467    /// A sample is a single sequence of `channel_count`.
468    pub sample_rate: u32,
469}
470impl AudioOutputOpenData {
471    /// New.
472    pub fn new(channel_count: u16, sample_rate: u32) -> Self {
473        Self {
474            channel_count,
475            sample_rate,
476        }
477    }
478}
479
480/// Audio playback config.
481#[derive(Debug, Clone, Serialize, Deserialize)]
482#[non_exhaustive]
483pub struct AudioOutputConfig {
484    /// Playback state.
485    pub state: AudioOutputState,
486
487    /// Volume of the sound.
488    ///
489    /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
490    pub volume: Factor,
491
492    /// Speed of the sound.
493    ///
494    /// This is a multiplier of the playback speed and pitch.
495    ///
496    /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
497    /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
498    pub speed: Factor,
499}
500impl AudioOutputConfig {
501    /// New.
502    pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
503        Self { state, volume, speed }
504    }
505}
506
507/// Represents the playback state if an audio output stream.
508#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
509#[non_exhaustive]
510pub enum AudioOutputState {
511    /// Audio is sent to the device for playback as in the sequence they are cued.
512    Playing,
513    /// Audio playback is paused, cue requests are buffered.
514    Paused,
515    /// Audio playback is paused, all current cue requests are dropped.
516    Stopped,
517}
518impl AudioOutputState {
519    /// If is [`Playing`].
520    ///
521    /// [`Playing`]: Self::Playing
522    pub fn is_playing(&self) -> bool {
523        matches!(self, Self::Playing)
524    }
525
526    /// If is [`Paused`].
527    ///
528    /// [`Paused`]: Self::Paused
529    pub fn is_paused(&self) -> bool {
530        matches!(self, Self::Paused)
531    }
532
533    /// If is [`Stopped`].
534    ///
535    /// [`Stopped`]: Self::Stopped
536    pub fn is_stopped(&self) -> bool {
537        matches!(self, Self::Stopped)
538    }
539}
540
541/// Represents an audio playback request.
542#[derive(Debug, Clone, Serialize, Deserialize)]
543#[non_exhaustive]
544pub struct AudioPlayRequest {
545    /// The audio output stream.
546    ///
547    /// If another audio is already playing this request is appended to the end.
548    pub output: AudioOutputId,
549
550    /// The audio.
551    pub mix: AudioMix,
552}
553impl AudioPlayRequest {
554    /// New.
555    pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
556        Self { output, mix }
557    }
558}
559
560/// Represents an audio source.
561#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
562#[non_exhaustive]
563pub struct AudioMix {
564    /// Silent start padding.
565    pub delay: Duration,
566    /// Total duration.
567    ///
568    /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
569    /// plays silent, if layers exceed the duration the end is clipped.
570    pub total_duration: Option<Duration>,
571
572    /// Components of this mix.
573    ///
574    /// Each layer applies to the previous.
575    pub layers: Vec<AudioMixLayer>,
576}
577impl AudioMix {
578    /// New empty.
579    pub fn new() -> Self {
580        Self {
581            delay: Duration::ZERO,
582            total_duration: None,
583            layers: vec![],
584        }
585    }
586}
587impl Default for AudioMix {
588    fn default() -> Self {
589        Self::new()
590    }
591}
592
593/// Represents an audio source component.
594#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
595#[non_exhaustive]
596pub enum AudioMixLayer {
597    /// Play the cached audio.
598    ///
599    /// The audio samples are adapted to the output format and added to the under layers result.
600    Audio {
601        /// The audio.
602        audio: AudioId,
603        /// Clip the start of the audio.
604        ///
605        /// Set to [`Duration::ZERO`] to play from the start.
606        skip: Duration,
607        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
608        ///
609        /// Set to [`Duration::MAX`] to play to the end.
610        take: Duration,
611    },
612    /// Play the mix.
613    ///
614    /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
615    AudioMix {
616        /// The inner mix.
617        mix: AudioMix,
618        /// Clip the start of the audio.
619        ///
620        /// Set to [`Duration::ZERO`] to play from the start.
621        skip: Duration,
622        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
623        ///
624        /// Set to [`Duration::MAX`] to play to the end.
625        take: Duration,
626    },
627
628    /// Linear volume transition.
629    ///
630    /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
631    /// back to the output stream volume after the end, unless another volume control layer is in effect.
632    VolumeLinear {
633        /// Start time.
634        start: Duration,
635        /// Transition duration.
636        ///
637        /// The effect ends at `start + duration` time.
638        duration: Duration,
639
640        /// Volume at the start.
641        start_volume: Factor,
642        /// Volume at the end,
643        end_volume: Factor,
644    },
645
646    /// Generate a sine wave sound.
647    SineWave {
648        /// Sine frequency.
649        frequency: f32,
650        /// Duration of the sample.
651        duration: Duration,
652    },
653}
654
655bitflags! {
656    /// Defines what tracks are decoded from multi track containers.
657    #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
658    pub struct AudioTracksMode: u8 {
659        /// Decodes all tracks.
660        const TRACKS = 0b0001;
661        /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
662        ///
663        /// Note that this is 0, empty.
664        const PRIMARY = 0;
665    }
666}