1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//! SSE4.1 fast path for the entropy-constrained, matrix-weighted VQ used by
//! the SILK encoder when searching the 5-tap LTP codebook.
//!
//! The C implementation in `silk/x86/VQ_WMat_EC_sse4_1.c` relies on SSE4.1
//! intrinsics to accelerate the same math performed by the scalar
//! [`vq_wmat_ec`] helper. Runtime CPU dispatch is currently disabled via
//! `OPUS_ARCHMASK`, so this Rust version delegates to the safe scalar helper
//! while keeping the dedicated entry point that the x86 dispatch table expects.
use crate::silk::vq_wmat_ec::{LTP_ORDER, VqWMatEcResult, vq_wmat_ec};
/// Mirrors `silk_VQ_WMat_EC_sse4_1`.
#[inline]
#[allow(clippy::too_many_arguments)]
pub fn vq_wmat_ec_sse4_1(
xx_q17: &[i32; LTP_ORDER * LTP_ORDER],
x_x_q17: &[i32; LTP_ORDER],
cb_q7: &[[i8; LTP_ORDER]],
cb_gain_q7: &[u8],
cl_q5: &[u8],
subfr_len: i32,
max_gain_q7: i32,
) -> VqWMatEcResult {
// The SIMD fast path produces identical results to the scalar helper.
// We reuse the Rust translation until runtime CPU dispatch is enabled.
vq_wmat_ec(
xx_q17,
x_x_q17,
cb_q7,
cb_gain_q7,
cl_q5,
subfr_len,
max_gain_q7,
)
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn matches_scalar_vq() {
let xx_q17 = [
19_000, 900, 1_100, 1_200, 1_400, 900, 19_000, 1_000, 1_100, 1_200, 1_100, 1_000,
19_000, 1_050, 1_100, 1_200, 1_100, 1_050, 19_000, 1_000, 1_400, 1_200, 1_100, 1_000,
19_000,
];
let x_x_q17 = [250, 260, 270, 280, 290];
let cb_q7 = [[3, 2, 1, 0, -1], [-2, -2, -2, -2, -2]];
let cb_gain_q7 = [18u8, 10u8];
let cl_q5 = [10u8, 6u8];
let subfr_len = 80;
let max_gain_q7 = 25;
let scalar = vq_wmat_ec(
&xx_q17,
&x_x_q17,
&cb_q7,
&cb_gain_q7,
&cl_q5,
subfr_len,
max_gain_q7,
);
let simd = vq_wmat_ec_sse4_1(
&xx_q17,
&x_x_q17,
&cb_q7,
&cb_gain_q7,
&cl_q5,
subfr_len,
max_gain_q7,
);
assert_eq!(scalar, simd);
}
#[test]
fn accepts_arbitrary_codebook_layout() {
let mut cb = vec![[0i8; LTP_ORDER]; 4];
for (row_idx, row) in cb.iter_mut().enumerate() {
for (col_idx, value) in row.iter_mut().enumerate() {
*value = (row_idx as i8 - (2 * col_idx as i8)) as i8;
}
}
let xx_q17 = [
25_000, 0, 0, 0, 0, 0, 25_000, 0, 0, 0, 0, 0, 25_000, 0, 0, 0, 0, 0, 25_000, 0, 0, 0,
0, 0, 25_000,
];
let x_x_q17 = [0, 0, 0, 0, 0];
let cb_gain_q7 = [0u8; 4];
let cl_q5 = [0u8; 4];
let subfr_len = 20;
let max_gain_q7 = 100;
let simd = vq_wmat_ec_sse4_1(
&xx_q17,
&x_x_q17,
&cb,
&cb_gain_q7,
&cl_q5,
subfr_len,
max_gain_q7,
);
let scalar = vq_wmat_ec(
&xx_q17,
&x_x_q17,
&cb,
&cb_gain_q7,
&cl_q5,
subfr_len,
max_gain_q7,
);
assert_eq!(simd, scalar);
}
}