truce-core 0.48.11

Core types for the truce audio plugin framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
use truce_params::sample::Sample;

/// Non-interleaved audio buffer. Borrows host memory through the
/// format wrapper.
///
/// Generic over the sample type `S` (the plugin's chosen precision,
/// `f32` or `f64`). The format wrapper bridges between host-buffer
/// precision and `S` at the block boundary - see
/// [`RawBufferScratch::build`]. Plugin code under
/// `use truce::prelude::*;` (f32) or `use truce::prelude64::*;` (f64)
/// sees `AudioBuffer<S>` with `S` already picked.
///
/// **In-place I/O.** Some hosts (Reaper, pluginval) pass the same
/// buffer for both input and output of a given channel. By default
/// the wrapper copies the aliased inputs into per-channel scratch so
/// `input(ch)` and `output(ch)` are disjoint `&[S]` / `&mut [S]` -
/// no plugin code change required. Plugins that opt into
/// `Plugin::supports_in_place() = true` skip the copy and must use
/// [`Self::in_out_mut`] for channels where [`Self::is_in_place`]
/// returns `true`.
pub struct AudioBuffer<'a, S: Sample = f32> {
    inputs: &'a [&'a [S]],
    outputs: &'a mut [&'a mut [S]],
    /// Bit `ch` is set when `inputs[ch]` and `outputs[ch]` point to
    /// the same host memory. Channels ≥ 64 are always reported as
    /// non-aliased - formats with that many channels are exotic
    /// enough to be a follow-up.
    in_place_mask: u64,
    offset: usize,
    num_samples: usize,
}

impl<'a, S: Sample> AudioBuffer<'a, S> {
    /// Safe wrapper around [`Self::from_slices`] for callers that hold their
    /// own owned `Vec<Vec<S>>` (e.g. `truce-driver`'s test harness).
    /// Forwards to the unsafe constructor - the borrow checker proves
    /// the lifetime invariants the `unsafe fn` requires when both
    /// slice arrays and the buffer itself live in the same scope.
    /// `num_samples > slice length` still asserts in debug builds.
    pub fn from_slices_checked(
        inputs: &'a [&'a [S]],
        outputs: &'a mut [&'a mut [S]],
        num_samples: usize,
    ) -> Self {
        // SAFETY: caller hands us references that the borrow checker
        // already proved valid for `'a`; the debug-mode assertions
        // inside `from_slices` cover the `num_samples` bound.
        unsafe { Self::from_slices(inputs, outputs, num_samples) }
    }

    /// Create a buffer from pre-split channel slices.
    /// Used by format wrappers after converting from host-specific buffer types.
    ///
    /// # Safety
    /// The caller must ensure the slices are valid for the lifetime `'a`
    /// and that `num_samples` does not exceed any slice's length.
    ///
    /// # Panics
    ///
    /// In debug builds only, panics if any input channel aliases an
    /// output channel or `num_samples` exceeds the length of any
    /// input/output slice. Release builds skip these checks (they're
    /// safety preconditions, not runtime invariants).
    pub unsafe fn from_slices(
        inputs: &'a [&'a [S]],
        outputs: &'a mut [&'a mut [S]],
        num_samples: usize,
    ) -> Self {
        #[cfg(debug_assertions)]
        {
            // Verify no input channel aliases any output channel.
            for (i, inp) in inputs.iter().enumerate() {
                let i_start = inp.as_ptr() as usize;
                let i_end = i_start + std::mem::size_of_val(*inp);
                for (o, out) in outputs.iter().enumerate() {
                    let o_start = out.as_ptr() as usize;
                    let o_end = o_start + std::mem::size_of_val(*out);
                    assert!(
                        i_end <= o_start || o_end <= i_start,
                        "AudioBuffer: input channel {i} and output channel {o} alias \
                         - pass disjoint slices or use RawBufferScratch::build which \
                         handles aliasing automatically",
                    );
                }
            }
            // Verify num_samples doesn't exceed any slice length.
            for (i, inp) in inputs.iter().enumerate() {
                assert!(
                    num_samples <= inp.len(),
                    "AudioBuffer: num_samples ({num_samples}) exceeds input channel {i} length ({})",
                    inp.len(),
                );
            }
            for (o, out) in outputs.iter().enumerate() {
                assert!(
                    num_samples <= out.len(),
                    "AudioBuffer: num_samples ({num_samples}) exceeds output channel {o} length ({})",
                    out.len(),
                );
            }
        }
        AudioBuffer {
            inputs,
            outputs,
            in_place_mask: 0,
            offset: 0,
            num_samples,
        }
    }

    /// Set the in-place mask. Called by format wrappers (or
    /// `RawBufferScratch::build`) after construction once they've
    /// determined which channels alias on the host side.
    #[inline]
    pub fn set_in_place_mask(&mut self, mask: u64) {
        self.in_place_mask = mask;
    }

    /// `true` when the host passes a single buffer for both input and
    /// output of `ch` (in-place I/O). Use [`Self::in_out_mut`] to read
    /// and write that buffer directly when this returns `true`.
    #[must_use]
    pub fn is_in_place(&self, ch: usize) -> bool {
        ch < 64 && (self.in_place_mask >> ch) & 1 == 1
    }

    /// Read+write slice for an in-place channel - the same memory the
    /// host gave us for both input and output. Each sample reads as
    /// the input value before the plugin overwrites it.
    ///
    /// Only meaningful when [`Self::is_in_place`] returns `true`. On a
    /// non-in-place channel this returns the output slice with no
    /// input data in it; reading is allowed but produces uninitialized
    /// host-buffer contents.
    pub fn in_out_mut(&mut self, ch: usize) -> &mut [S] {
        let end = self.offset + self.num_samples;
        &mut self.outputs[ch][self.offset..end]
    }

    #[must_use]
    pub fn num_samples(&self) -> usize {
        self.num_samples
    }

    #[must_use]
    pub fn num_input_channels(&self) -> usize {
        self.inputs.len()
    }

    #[must_use]
    pub fn num_output_channels(&self) -> usize {
        self.outputs.len()
    }

    #[must_use]
    pub fn input(&self, channel: usize) -> &[S] {
        let end = self.offset + self.num_samples;
        &self.inputs[channel][self.offset..end]
    }

    pub fn output(&mut self, channel: usize) -> &mut [S] {
        let end = self.offset + self.num_samples;
        &mut self.outputs[channel][self.offset..end]
    }

    /// Number of channels (min of input and output).
    #[must_use]
    pub fn channels(&self) -> usize {
        self.inputs.len().min(self.outputs.len())
    }

    /// Get an input/output pair for a channel. Useful for in-place processing.
    pub fn io_pair(&mut self, in_ch: usize, out_ch: usize) -> (&[S], &mut [S]) {
        let end = self.offset + self.num_samples;
        let input = &self.inputs[in_ch][self.offset..end];
        let output = &mut self.outputs[out_ch][self.offset..end];
        (input, output)
    }

    /// Get an input/output pair for the same channel index. Shorthand for `io_pair(ch, ch)`.
    pub fn io(&mut self, ch: usize) -> (&[S], &mut [S]) {
        self.io_pair(ch, ch)
    }

    /// Iterate per-channel, in fixed-size `N`-sample chunks. The
    /// last chunk of each channel may be shorter than `N`; it's
    /// yielded as a [`ChunkItem::Tail`] with the actual remaining
    /// length, and the caller falls back to scalar for it. Full
    /// `N`-sample chunks arrive as [`ChunkItem::Full`] carrying
    /// `&[S; N]` / `&mut [S; N]` stack arrays - exactly the shape
    /// the per-op SIMD primitives in `truce-simd` expect.
    ///
    /// Iteration order is channel-major: all chunks of channel 0,
    /// then all chunks of channel 1, etc. Matches the natural
    /// orientation for per-channel state (biquad coefficients,
    /// per-channel meters) and lets the caller read its smoothed
    /// params once per chunk instead of once per sample.
    ///
    /// The returned object is a "lending iterator" - it doesn't
    /// implement [`Iterator`] because each yielded item borrows
    /// from the iterator itself. Use `while let Some(chunk) = …
    /// .next()`:
    ///
    /// ```ignore
    /// let mut chunks = buffer.chunks_mut::<32>();
    /// while let Some(chunk) = chunks.next() {
    ///     match chunk {
    ///         ChunkItem::Full { ch, inp, out } => {
    ///             // SIMD-friendly path, inp / out are &[f32; 32]
    ///         }
    ///         ChunkItem::Tail { ch, inp, out } => {
    ///             // scalar fallback for the trailing samples
    ///         }
    ///     }
    /// }
    /// ```
    ///
    /// Const-generic `N` is the chunk size; pick it to match the
    /// SIMD width × unroll factor for your inner op (32 / 64 are
    /// good defaults for current Apple Silicon + `x86_64`).
    pub fn chunks_mut<const N: usize>(&mut self) -> ChunksMut<'_, 'a, S, N> {
        ChunksMut {
            buffer: self,
            ch: 0,
            pos: 0,
        }
    }

    /// Iterate per-frame and hand a fixed-size `(input, output)`
    /// stack-array pair to `tick`. Sized at the type level by const
    /// generic `N`, which must equal [`Self::channels`].
    ///
    /// `io()` / `io_pair()` give a per-channel slice view, which is
    /// the right shape for "process channel `ch` in isolation"
    /// loops. But libraries that expect a per-frame `(in: &[S],
    /// out: &mut [S])` callback - `fundsp::AudioUnit::tick`,
    /// `nih_plug`'s frame iterators, custom per-sample DSP nodes -
    /// can't take that shape directly without either copying inputs
    /// into a scratch first (heap allocation on the audio thread)
    /// or fighting the borrow checker over two simultaneous `&mut`
    /// borrows of the buffer.
    ///
    /// This helper does the per-frame transpose in-place against a
    /// stack-allocated `[S; N]` pair, calls `tick` `num_samples()`
    /// times, and writes back. No heap, no borrow gymnastics at the
    /// call site:
    ///
    /// ```ignore
    /// // Stereo plugin delegating per-frame DSP to fundsp:
    /// buffer.for_each_frame::<2, _>(|frame_in, frame_out| {
    ///     self.graph.tick(frame_in, frame_out);
    /// });
    /// ```
    ///
    /// `&[S; N]` deref-coerces to `&[S]` at the call site, so
    /// callers can pass the arrays straight to slice-taking APIs
    /// like fundsp's `tick`.
    ///
    /// # Panics
    ///
    /// Debug builds panic if `N != self.channels()`. Release builds
    /// rely on the same precondition without checking; reading past
    /// the actual channel count would index out of bounds anyway.
    pub fn for_each_frame<const N: usize, F>(&mut self, mut tick: F)
    where
        F: FnMut(&[S; N], &mut [S; N]),
    {
        debug_assert_eq!(
            N,
            self.channels(),
            "for_each_frame::<{N}> requires the buffer to have exactly {N} channels"
        );
        let mut frame_in = [S::default(); N];
        let mut frame_out = [S::default(); N];
        let end = self.offset + self.num_samples;
        for i in self.offset..end {
            for (ch, slot) in frame_in.iter_mut().enumerate() {
                *slot = self.inputs[ch][i];
            }
            tick(&frame_in, &mut frame_out);
            for (ch, sample) in frame_out.iter().enumerate() {
                self.outputs[ch][i] = *sample;
            }
        }
    }

    /// Peak absolute value across an output channel, returned as `f32`
    /// because meters / UI display always work in `f32` regardless of
    /// the plugin's internal precision.
    ///
    /// Short-circuits and returns `f32::NAN` on the **first** NaN
    /// sample seen, so meters can flag runaway plugins instead of
    /// silently reporting "peaks within range" while NaN poison
    /// spreads downstream.
    #[must_use]
    pub fn output_peak(&self, ch: usize) -> f32 {
        let end = self.offset + self.num_samples;
        let mut peak = 0.0f32;
        for &b in &self.outputs[ch][self.offset..end] {
            let v = b.to_f32();
            if v.is_nan() {
                return f32::NAN;
            }
            let abs = v.abs();
            if abs > peak {
                peak = abs;
            }
        }
        peak
    }

    /// Return a sub-block view covering samples `start..start+len`.
    ///
    /// The returned buffer borrows `self` exclusively - you cannot use
    /// the original buffer while the slice is alive.
    ///
    /// # Panics
    /// Panics if `start + len > self.num_samples()`.
    pub fn slice(&mut self, start: usize, len: usize) -> AudioBuffer<'_, S> {
        assert!(
            start + len <= self.num_samples,
            "slice({start}, {len}) out of bounds for buffer of {} samples",
            self.num_samples,
        );
        let new_offset = self.offset + start;
        // SAFETY: We construct an AudioBuffer<'a, S> and transmute to AudioBuffer<'_, S>.
        // These have identical memory layout (lifetimes are erased at runtime).
        // This is sound because:
        // 1. &mut self prevents the caller from using self while the slice exists
        // 2. The underlying channel memory lives for 'a which outlives '_
        // 3. Bounds are checked by the assert above
        let self_ptr: *mut Self = self;
        unsafe {
            let s = &mut *self_ptr;
            std::mem::transmute::<AudioBuffer<'a, S>, AudioBuffer<'_, S>>(AudioBuffer {
                inputs: s.inputs,
                outputs: &mut *s.outputs,
                in_place_mask: s.in_place_mask,
                offset: new_offset,
                num_samples: len,
            })
        }
    }
}

/// One yielded chunk from [`AudioBuffer::chunks_mut`].
///
/// `Full` is the SIMD-friendly path: `inp` and `out` are stack
/// arrays of exactly `N` elements, ready to feed `truce-simd`'s
/// block ops. `Tail` is the trailing fragment when `num_samples()`
/// isn't a multiple of `N`; fall back to a scalar loop.
pub enum ChunkItem<'b, S: Sample, const N: usize> {
    /// Full N-sample chunk. The `&[S; N]` / `&mut [S; N]` are the
    /// shape `truce-simd` ops are written against - no slice
    /// length check at the call site.
    Full {
        /// Channel index this chunk belongs to.
        ch: usize,
        /// Sample offset within the audio block this chunk starts
        /// at. Use this when indexing into a precomputed envelope
        /// array - `chunks_mut` iterates channel-major, so the
        /// envelope (typically read once per audio block via
        /// `read_block::<num_samples>()`) is shared across all
        /// channel passes.
        sample: usize,
        /// Read-only N-sample input slice.
        inp: &'b [S; N],
        /// Mutable N-sample output slice.
        out: &'b mut [S; N],
    },
    /// Trailing chunk when `num_samples()` isn't a multiple of `N`.
    /// Length is in `(0, N)`. Fall back to scalar processing.
    Tail {
        /// Channel index this chunk belongs to.
        ch: usize,
        /// Sample offset within the audio block this chunk starts at.
        sample: usize,
        /// Read-only tail input slice; length < N.
        inp: &'b [S],
        /// Mutable tail output slice; length < N.
        out: &'b mut [S],
    },
}

/// Lending iterator returned by [`AudioBuffer::chunks_mut`].
///
/// Does not implement [`Iterator`] because each yielded
/// [`ChunkItem`] borrows from the iterator itself - the standard
/// "GATs would help here" pattern. Drive it with `while let
/// Some(chunk) = chunks.next()` instead. See
/// [`AudioBuffer::chunks_mut`] for a worked example.
pub struct ChunksMut<'b, 'a, S: Sample, const N: usize> {
    buffer: &'b mut AudioBuffer<'a, S>,
    /// Current channel being walked.
    ch: usize,
    /// Position within the current channel, relative to
    /// `buffer.offset`. Advances by N each Full chunk, then jumps
    /// to `num_samples` for the Tail (or directly past it when
    /// `num_samples` is a multiple of N).
    pos: usize,
}

impl<S: Sample, const N: usize> ChunksMut<'_, '_, S, N> {
    /// Yield the next chunk, or `None` when every channel has been
    /// fully walked.
    ///
    /// Method-on-self rather than `Iterator::next` because each
    /// yielded [`ChunkItem`] borrows from `self`; GATs would be
    /// needed to express that through the `Iterator` trait.
    #[allow(clippy::should_implement_trait, clippy::missing_panics_doc)]
    pub fn next(&mut self) -> Option<ChunkItem<'_, S, N>> {
        loop {
            if self.ch >= self.buffer.outputs.len() {
                return None;
            }
            let ns = self.buffer.num_samples;
            if self.pos >= ns {
                self.ch += 1;
                self.pos = 0;
                continue;
            }
            let abs_start = self.buffer.offset + self.pos;
            let remaining = ns - self.pos;
            let take = remaining.min(N);
            let abs_end = abs_start + take;
            let ch = self.ch;
            let sample = self.pos;

            let inp_slice = &self.buffer.inputs[ch][abs_start..abs_end];
            let out_slice: &mut [S] = &mut self.buffer.outputs[ch][abs_start..abs_end];

            self.pos += take;

            // Full vs Tail by length: full chunks convert to `&[S;
            // N]` / `&mut [S; N]` for the SIMD-friendly path; tails
            // fall back to slice form.
            return Some(if take == N {
                ChunkItem::Full {
                    ch,
                    sample,
                    // Length-checked above; `try_into` here is a
                    // free reinterpret.
                    inp: inp_slice.try_into().expect("len == N by construction"),
                    out: out_slice.try_into().expect("len == N by construction"),
                }
            } else {
                ChunkItem::Tail {
                    ch,
                    sample,
                    inp: inp_slice,
                    out: out_slice,
                }
            });
        }
    }
}

/// Scratch space for [`RawBufferScratch::build`].
///
/// Callers allocate this on the stack and pass it to `build`. The
/// buffer borrows the slices stored here, so this struct must outlive
/// the returned `AudioBuffer`.
///
/// Generic over the plugin's sample type `S`. When the host buffer
/// matches `S`, slices point into host memory (zero-copy). When the
/// host buffer is a different precision, the input is widened/narrowed
/// into per-channel scratch; the output is rendered into scratch and
/// the wrapper copies + casts it back to the host buffer at the end
/// of the block via [`Self::finish_widening_f32`].
pub struct RawBufferScratch<S: Sample = f32> {
    pub input_slices: Vec<&'static [S]>,
    pub output_slices: Vec<&'static mut [S]>,
    /// Per-channel input copies. Used (a) when the host passes the
    /// same buffer for input and output (in-place processing - VST3
    /// spec allows this and several real DAWs use it for effects),
    /// or (b) when the host buffer precision differs from `S` and
    /// we widen/narrow on the way in. In either case the slice the
    /// plugin sees points into the matching slot here.
    input_copies: Vec<Vec<S>>,
    /// Per-channel output scratch. Only populated by [`Self::build`]
    /// when the host buffer precision differs from `S`; the wrapper
    /// copies + casts these back to the host buffer at the end of the
    /// block via [`Self::finish_widening_f32`].
    output_buffers: Vec<Vec<S>>,
}

impl<S: Sample> RawBufferScratch<S> {
    /// Build an `AudioBuffer<S>` from raw `f32` host pointers - the
    /// common case (CLAP, LV2, AAX always; VST3/VST2/AU 32-bit mode).
    ///
    /// When `S = f32`, slices point directly into host memory (modulo
    /// in-place input copying). When `S = f64`, every channel is
    /// widened into per-channel scratch and the wrapper must call
    /// [`Self::finish_widening_f32`] at the end of the block to copy
    /// the rendered samples back to the host's `f32` output pointers.
    ///
    /// # Safety
    /// - `inputs` must point to `num_in` valid `*const f32` pointers
    ///   (each non-null pointer must address at least `num_frames`
    ///   readable samples; null is allowed and yields an empty slice).
    /// - `outputs` must point to `num_out` valid `*mut f32` pointers
    ///   (each non-null pointer must address at least `num_frames`
    ///   writable samples; null is allowed and yields an empty slice).
    /// - The pointed-to memory must remain valid for the lifetime of
    ///   the returned `AudioBuffer`.
    pub unsafe fn build(
        &mut self,
        inputs: *const *const f32,
        outputs: *mut *mut f32,
        num_in: u32,
        num_out: u32,
        num_frames: u32,
        supports_in_place: bool,
    ) -> AudioBuffer<'_, S> {
        // SAFETY: forwarded - caller's contract is the same.
        unsafe {
            self.build_inner(
                inputs,
                outputs,
                num_in,
                num_out,
                num_frames,
                supports_in_place,
            )
        }
    }

    /// Copy + narrow the rendered `S` output back to the host's
    /// `f32` output pointers. No-op when `S = f32` (the slices the
    /// plugin wrote already point directly at host memory).
    ///
    /// # Safety
    /// `outputs` and `num_out` / `num_frames` must match the values
    /// passed to the prior [`Self::build`] call on this scratch.
    pub unsafe fn finish_widening_f32(
        &self,
        outputs: *mut *mut f32,
        num_out: u32,
        num_frames: u32,
    ) {
        // When the plugin is `f32` we wrote straight into host memory.
        if std::any::TypeId::of::<S>() == std::any::TypeId::of::<f32>() {
            return;
        }
        unsafe {
            let nf = num_frames as usize;
            for ch in 0..(num_out as usize) {
                let ptr = *outputs.add(ch);
                if ptr.is_null() {
                    continue;
                }
                let host = std::slice::from_raw_parts_mut(ptr, nf);
                let plugin_out = &self.output_buffers[ch];
                for (h, &p) in host.iter_mut().zip(plugin_out.iter()) {
                    *h = p.to_f32();
                }
            }
        }
    }

    unsafe fn build_inner<'a>(
        &'a mut self,
        inputs: *const *const f32,
        outputs: *mut *mut f32,
        num_in: u32,
        num_out: u32,
        num_frames: u32,
        supports_in_place: bool,
    ) -> AudioBuffer<'a, S> {
        const MAX_CHANNELS_TRACKED: usize = 64;
        // Whether the plugin's chosen precision matches the host's.
        // When matched, we zero-copy host pointers into the slice
        // arrays; when not, we widen/narrow through input_copies and
        // output_buffers.
        let same_precision = std::any::TypeId::of::<S>() == std::any::TypeId::of::<f32>();

        unsafe {
            let nf = num_frames as usize;
            let num_out_u = num_out as usize;
            let num_in_u = num_in as usize;
            debug_assert!(
                num_out_u <= MAX_CHANNELS_TRACKED,
                "RawBufferScratch::build: alias detection only covers up to {MAX_CHANNELS_TRACKED} \
                 output channels; got {num_out_u}. Channels beyond the cap won't be \
                 detected as aliased.",
            );
            let out_ptrs: [Option<*mut f32>; MAX_CHANNELS_TRACKED] = std::array::from_fn(|ch| {
                if ch < num_out_u {
                    let p = *outputs.add(ch);
                    if p.is_null() { None } else { Some(p) }
                } else {
                    None
                }
            });
            let aliases_any_output = |in_ptr: *const f32| -> bool {
                let in_start = in_ptr as usize;
                let in_end = in_start + nf * std::mem::size_of::<f32>();
                out_ptrs
                    .iter()
                    .take(num_out_u.min(MAX_CHANNELS_TRACKED))
                    .any(|o| {
                        o.is_some_and(|op| {
                            let o_start = op as usize;
                            let o_end = o_start + nf * std::mem::size_of::<f32>();
                            !(in_end <= o_start || o_end <= in_start)
                        })
                    })
            };

            // Grow per-channel scratch slots if the bus widened or
            // we're widening precision and need every channel copied.
            while self.input_copies.len() < num_in_u {
                self.input_copies.push(Vec::new());
            }
            if !same_precision {
                while self.output_buffers.len() < num_out_u {
                    self.output_buffers.push(Vec::new());
                }
            }

            self.input_slices.clear();
            self.input_slices.reserve(num_in_u);
            let mut in_place_mask: u64 = 0;
            for ch in 0..num_in_u {
                let ptr = *inputs.add(ch);
                let slice: &[S] = if ptr.is_null() {
                    &[]
                } else if aliases_any_output(ptr) {
                    if ch < 64 {
                        in_place_mask |= 1 << ch;
                    }
                    if supports_in_place && same_precision {
                        // Plugin opted in: hand it nothing through
                        // input(ch); it must read+write via in_out_mut.
                        // Only supported in the same-precision case;
                        // the cross-precision path always copies.
                        &[]
                    } else {
                        // Snapshot the input (and widen if needed)
                        // before the plugin overwrites the shared
                        // buffer.
                        let host = std::slice::from_raw_parts(ptr, nf);
                        let copy = &mut self.input_copies[ch];
                        copy.clear();
                        copy.reserve(nf);
                        for &h in host {
                            copy.push(S::from_f32(h));
                        }
                        let p = copy.as_ptr();
                        let l = copy.len();
                        // SAFETY: `copy` lives as long as `self`, which
                        // outlives the returned `AudioBuffer<'a>`.
                        std::slice::from_raw_parts(p, l)
                    }
                } else if same_precision {
                    // SAFETY: the in-precision case is `&[f32]`. We
                    // transmute via raw parts because the function
                    // signature is generic over S but the runtime
                    // branch knows S == f32.
                    let raw = ptr.cast::<S>();
                    std::slice::from_raw_parts(raw, nf)
                } else {
                    // Different precision, no aliasing: widen into scratch.
                    let host = std::slice::from_raw_parts(ptr, nf);
                    let copy = &mut self.input_copies[ch];
                    copy.clear();
                    copy.reserve(nf);
                    for &h in host {
                        copy.push(S::from_f32(h));
                    }
                    let p = copy.as_ptr();
                    let l = copy.len();
                    std::slice::from_raw_parts(p, l)
                };
                self.input_slices.push(slice);
            }

            self.output_slices.clear();
            self.output_slices.reserve(num_out_u);
            for ch in 0..num_out_u {
                let ptr = *outputs.add(ch);
                let slice: &mut [S] = if ptr.is_null() {
                    &mut []
                } else if same_precision {
                    // SAFETY: same-precision branch - host pointer is
                    // already `*mut S` modulo runtime type identity.
                    let raw = ptr.cast::<S>();
                    std::slice::from_raw_parts_mut(raw, nf)
                } else {
                    // Different precision: render into per-channel
                    // scratch; finish_widening_f32 copies+narrows back.
                    let buf = &mut self.output_buffers[ch];
                    buf.clear();
                    buf.resize(nf, S::default());
                    let p = buf.as_mut_ptr();
                    let l = buf.len();
                    std::slice::from_raw_parts_mut(p, l)
                };
                self.output_slices.push(slice);
            }

            // SAFETY: Same transmute pattern as AudioBuffer::slice().
            // RawBufferScratch stores 'static slices but we return AudioBuffer<'a>.
            let self_ptr: *mut Self = self;
            let s = &mut *self_ptr;
            let mut buf = std::mem::transmute::<AudioBuffer<'static, S>, AudioBuffer<'a, S>>(
                AudioBuffer::from_slices(&s.input_slices, &mut s.output_slices, nf),
            );
            buf.set_in_place_mask(in_place_mask);
            buf
        }
    }

    /// Pre-allocate the per-channel scratch vectors so `build` runs
    /// allocation-free for buses up to `num_in` × `num_out` channels
    /// and blocks up to `max_frames`. Idempotent and growth-only.
    pub fn ensure_capacity(&mut self, num_in: usize, num_out: usize, max_frames: usize) {
        if self.input_slices.capacity() < num_in {
            self.input_slices
                .reserve_exact(num_in - self.input_slices.capacity());
        }
        if self.output_slices.capacity() < num_out {
            self.output_slices
                .reserve_exact(num_out - self.output_slices.capacity());
        }
        while self.input_copies.len() < num_in {
            self.input_copies.push(Vec::with_capacity(max_frames));
        }
        for buf in &mut self.input_copies {
            if buf.capacity() < max_frames {
                buf.reserve_exact(max_frames - buf.capacity());
            }
        }
        while self.output_buffers.len() < num_out {
            self.output_buffers.push(Vec::with_capacity(max_frames));
        }
        for buf in &mut self.output_buffers {
            if buf.capacity() < max_frames {
                buf.reserve_exact(max_frames - buf.capacity());
            }
        }
    }
}

impl<S: Sample> Default for RawBufferScratch<S> {
    fn default() -> Self {
        Self {
            input_slices: Vec::with_capacity(2),
            output_slices: Vec::with_capacity(2),
            input_copies: Vec::with_capacity(2),
            output_buffers: Vec::with_capacity(2),
        }
    }
}