ruvector-cnn 2.0.6

CNN feature extraction for image embeddings with SIMD acceleration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
//! INT8 Quantization with π-Based Calibration
//!
//! Implements efficient INT8 quantization for CNN inference using π-derived
//! constants to avoid quantization boundary resonance artifacts.
//!
//! # Why π?
//!
//! In low-precision quantization, values tend to collapse into repeating buckets
//! when scale factors align with powers of two. Using π-derived constants
//! breaks this symmetry:
//!
//! - π is irrational (non-repeating, infinite structure)
//! - Avoids power-of-2 boundary alignment
//! - Provides deterministic anti-resonance offsets
//!
//! # Quantization Schemes
//!
//! - **Symmetric**: For weights (zero-centered distributions)
//! - **Asymmetric**: For activations (ReLU outputs are non-negative)
//! - **Per-channel**: Different scale per output channel (higher accuracy)
//! - **Per-tensor**: Single scale for entire tensor (faster)
//!
//! # Performance
//!
//! INT8 inference provides:
//! - 4x memory reduction vs FP32
//! - 2-3x speedup on AVX2/AVX-512 (VNNI)
//! - 2-4x speedup on ARM NEON (SDOT)

#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

/// π-based scale factors to avoid power-of-2 resonance
pub mod pi_constants {
    use std::f32::consts::PI;

    /// Anti-resonance offset derived from π fractional part
    pub const PI_FRAC: f32 = PI - 3.0; // 0.14159...

    /// Scale factor that avoids 2^n boundaries
    pub const PI_SCALE: f32 = PI / 4.0; // ~0.785

    /// Golden ratio approximation from π
    pub const PHI_APPROX: f32 = 2.0 / (PI - 1.0); // ~0.934

    /// First 16 digits of π for deterministic seeding
    pub const PI_DIGITS: [u8; 16] = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3];

    /// Compute anti-resonance offset for n-bit quantization
    #[inline]
    pub fn anti_resonance(bits: u8) -> f32 {
        PI_FRAC / (1u32 << bits) as f32
    }

    /// π-based jitter for tie-breaking in rounding
    #[inline]
    pub fn jitter(index: usize) -> f32 {
        let digit = PI_DIGITS[index % 16];
        (digit as f32) * 0.001 * PI_FRAC
    }
}

/// Quantization parameters for a tensor or channel
#[derive(Debug, Clone, Copy)]
pub struct QuantParams {
    /// Scale factor (float = quant * scale + zero_point)
    pub scale: f32,
    /// Zero point offset (for asymmetric quantization)
    pub zero_point: i8,
    /// Anti-resonance offset from π
    pub anti_resonance: f32,
    /// Quantization bits (7 for signed int8)
    pub bits: u8,
}

impl QuantParams {
    /// Create symmetric quantization params (for weights)
    ///
    /// Uses π-based anti-resonance to avoid boundary collapse.
    pub fn symmetric(min_val: f32, max_val: f32) -> Self {
        let abs_max = min_val.abs().max(max_val.abs());

        // 7 bits for signed int8 (-127 to 127)
        let bits = 7u8;
        let qmax = 127.0f32;

        // π-based scale with anti-resonance
        let anti_resonance = pi_constants::anti_resonance(bits);
        let scale = (abs_max + anti_resonance) / qmax;

        Self {
            scale: scale.max(1e-10), // Avoid division by zero
            zero_point: 0,
            anti_resonance,
            bits,
        }
    }

    /// Create asymmetric quantization params (for activations)
    ///
    /// Maps [min_val, max_val] to [-128, 127] with π-based calibration.
    pub fn asymmetric(min_val: f32, max_val: f32) -> Self {
        let bits = 8u8;
        let qmin = -128.0f32;
        let qmax = 127.0f32;

        let anti_resonance = pi_constants::anti_resonance(bits);
        let range = (max_val - min_val).max(1e-10) + anti_resonance;
        let scale = range / (qmax - qmin);

        // Compute zero point with π-jitter for tie-breaking
        let zero_point_float = qmin - min_val / scale + pi_constants::jitter(0);
        let zero_point = zero_point_float.round().clamp(-128.0, 127.0) as i8;

        Self {
            scale: scale.max(1e-10),
            zero_point,
            anti_resonance,
            bits,
        }
    }

    /// Quantize a single f32 value to i8
    #[inline]
    pub fn quantize(&self, value: f32) -> i8 {
        let scaled = value / self.scale + self.zero_point as f32;
        // Add small π-based offset for better rounding distribution
        let rounded = (scaled + self.anti_resonance * 0.5).round();
        rounded.clamp(-128.0, 127.0) as i8
    }

    /// Dequantize a single i8 value to f32
    #[inline]
    pub fn dequantize(&self, quantized: i8) -> f32 {
        (quantized as f32 - self.zero_point as f32) * self.scale
    }
}

impl Default for QuantParams {
    fn default() -> Self {
        Self::symmetric(-1.0, 1.0)
    }
}

/// Per-channel quantization parameters
#[derive(Debug, Clone)]
pub struct PerChannelQuantParams {
    /// Per-channel scales
    pub scales: Vec<f32>,
    /// Per-channel zero points
    pub zero_points: Vec<i8>,
    /// Number of channels
    pub num_channels: usize,
}

impl PerChannelQuantParams {
    /// Compute per-channel symmetric quantization params
    pub fn symmetric_per_channel(weights: &[f32], out_channels: usize, in_channels: usize) -> Self {
        let kernel_size = weights.len() / (out_channels * in_channels);
        let mut scales = Vec::with_capacity(out_channels);
        let zero_points = vec![0i8; out_channels];

        for oc in 0..out_channels {
            let start = oc * in_channels * kernel_size;
            let end = start + in_channels * kernel_size;
            let channel_weights = &weights[start..end];

            let abs_max = channel_weights
                .iter()
                .map(|x| x.abs())
                .fold(0.0f32, |a, b| a.max(b));

            let anti_res = pi_constants::anti_resonance(7);
            let scale = (abs_max + anti_res) / 127.0;
            scales.push(scale.max(1e-10));
        }

        Self {
            scales,
            zero_points,
            num_channels: out_channels,
        }
    }

    /// Get params for a specific channel
    #[inline]
    pub fn channel_params(&self, channel: usize) -> QuantParams {
        QuantParams {
            scale: self.scales[channel],
            zero_point: self.zero_points[channel],
            anti_resonance: pi_constants::anti_resonance(7),
            bits: 7,
        }
    }
}

/// Quantized INT8 tensor storage
#[derive(Debug, Clone)]
pub struct QuantizedTensor {
    /// INT8 data
    pub data: Vec<i8>,
    /// Shape
    pub shape: Vec<usize>,
    /// Per-tensor or per-channel quantization
    pub params: QuantizationType,
}

/// Quantization type
#[derive(Debug, Clone)]
pub enum QuantizationType {
    /// Single scale for entire tensor
    PerTensor(QuantParams),
    /// Different scale per output channel
    PerChannel(PerChannelQuantParams),
}

impl QuantizedTensor {
    /// Quantize a float tensor with per-tensor symmetric quantization
    pub fn from_float_symmetric(data: &[f32], shape: &[usize]) -> Self {
        let min_val = data.iter().fold(f32::MAX, |a, &b| a.min(b));
        let max_val = data.iter().fold(f32::MIN, |a, &b| a.max(b));
        let params = QuantParams::symmetric(min_val, max_val);

        let quantized: Vec<i8> = data.iter().map(|&v| params.quantize(v)).collect();

        Self {
            data: quantized,
            shape: shape.to_vec(),
            params: QuantizationType::PerTensor(params),
        }
    }

    /// Quantize weights with per-channel quantization
    pub fn from_weights_per_channel(
        weights: &[f32],
        out_channels: usize,
        in_channels: usize,
        kernel_h: usize,
        kernel_w: usize,
    ) -> Self {
        let per_channel = PerChannelQuantParams::symmetric_per_channel(weights, out_channels, in_channels);
        let kernel_size = kernel_h * kernel_w;

        let mut quantized = Vec::with_capacity(weights.len());

        for oc in 0..out_channels {
            let params = per_channel.channel_params(oc);
            let start = oc * in_channels * kernel_size;
            let end = start + in_channels * kernel_size;

            for &w in &weights[start..end] {
                quantized.push(params.quantize(w));
            }
        }

        Self {
            data: quantized,
            shape: vec![out_channels, in_channels, kernel_h, kernel_w],
            params: QuantizationType::PerChannel(per_channel),
        }
    }

    /// Dequantize back to float32
    pub fn dequantize(&self) -> Vec<f32> {
        match &self.params {
            QuantizationType::PerTensor(params) => {
                self.data.iter().map(|&q| params.dequantize(q)).collect()
            }
            QuantizationType::PerChannel(per_channel) => {
                let out_channels = self.shape[0];
                let channel_size = self.data.len() / out_channels;
                let mut output = Vec::with_capacity(self.data.len());

                for oc in 0..out_channels {
                    let params = per_channel.channel_params(oc);
                    let start = oc * channel_size;
                    let end = start + channel_size;

                    for &q in &self.data[start..end] {
                        output.push(params.dequantize(q));
                    }
                }
                output
            }
        }
    }

    /// Get the number of elements
    pub fn len(&self) -> usize {
        self.data.len()
    }

    /// Check if empty
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }
}

/// Batch quantize f32 to i8 using π-calibration
///
/// Faster than per-element quantization using SIMD.
pub fn quantize_batch(input: &[f32], output: &mut [i8], params: &QuantParams) {
    debug_assert_eq!(input.len(), output.len());

    let inv_scale = 1.0 / params.scale;
    let zp = params.zero_point as f32;
    let anti_res = params.anti_resonance * 0.5;

    for (i, &val) in input.iter().enumerate() {
        let scaled = val * inv_scale + zp + anti_res;
        output[i] = scaled.round().clamp(-128.0, 127.0) as i8;
    }
}

/// Batch dequantize i8 to f32
pub fn dequantize_batch(input: &[i8], output: &mut [f32], params: &QuantParams) {
    debug_assert_eq!(input.len(), output.len());

    let zp = params.zero_point as f32;

    for (i, &q) in input.iter().enumerate() {
        output[i] = (q as f32 - zp) * params.scale;
    }
}

/// AVX2 batch quantization (8 values at a time)
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
pub unsafe fn quantize_batch_avx2(input: &[f32], output: &mut [i8], params: &QuantParams) {
    let len = input.len();
    let chunks = len / 8;

    let inv_scale = _mm256_set1_ps(1.0 / params.scale);
    let zp = _mm256_set1_ps(params.zero_point as f32);
    let anti_res = _mm256_set1_ps(params.anti_resonance * 0.5);
    let half = _mm256_set1_ps(0.5);
    let min_val = _mm256_set1_ps(-128.0);
    let max_val = _mm256_set1_ps(127.0);

    for i in 0..chunks {
        let offset = i * 8;

        // Load 8 floats
        let v = _mm256_loadu_ps(input.as_ptr().add(offset));

        // Scale and offset: v * inv_scale + zp + anti_res
        let scaled = _mm256_add_ps(_mm256_mul_ps(v, inv_scale), zp);
        let adjusted = _mm256_add_ps(scaled, anti_res);

        // Round (add 0.5 and floor for positive, subtract 0.5 for negative)
        let rounded = _mm256_round_ps(adjusted, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);

        // Clamp to [-128, 127]
        let clamped = _mm256_min_ps(_mm256_max_ps(rounded, min_val), max_val);

        // Convert to i32 then pack to i8
        let i32_vals = _mm256_cvtps_epi32(clamped);

        // Extract and pack to i8 (need to do this manually for AVX2)
        let i32_array: [i32; 8] = std::mem::transmute(i32_vals);
        for j in 0..8 {
            output[offset + j] = i32_array[j] as i8;
        }
    }

    // Handle remainder
    let remainder_start = chunks * 8;
    for i in remainder_start..len {
        let scaled = input[i] / params.scale + params.zero_point as f32 + params.anti_resonance * 0.5;
        output[i] = scaled.round().clamp(-128.0, 127.0) as i8;
    }
}

/// AVX2 batch dequantization
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
pub unsafe fn dequantize_batch_avx2(input: &[i8], output: &mut [f32], params: &QuantParams) {
    let len = input.len();
    let chunks = len / 8;

    let scale = _mm256_set1_ps(params.scale);
    let zp = _mm256_set1_ps(params.zero_point as f32);

    for i in 0..chunks {
        let offset = i * 8;

        // Load 8 i8 values and convert to f32
        let mut i32_array = [0i32; 8];
        for j in 0..8 {
            i32_array[j] = input[offset + j] as i32;
        }
        let i32_vals: __m256i = std::mem::transmute(i32_array);
        let f32_vals = _mm256_cvtepi32_ps(i32_vals);

        // Dequantize: (val - zp) * scale
        let shifted = _mm256_sub_ps(f32_vals, zp);
        let result = _mm256_mul_ps(shifted, scale);

        _mm256_storeu_ps(output.as_mut_ptr().add(offset), result);
    }

    // Handle remainder
    let remainder_start = chunks * 8;
    for i in remainder_start..len {
        output[i] = (input[i] as f32 - params.zero_point as f32) * params.scale;
    }
}

// Non-x86_64 stubs
#[cfg(not(target_arch = "x86_64"))]
pub unsafe fn quantize_batch_avx2(_input: &[f32], _output: &mut [i8], _params: &QuantParams) {}

#[cfg(not(target_arch = "x86_64"))]
pub unsafe fn dequantize_batch_avx2(_input: &[i8], _output: &mut [f32], _params: &QuantParams) {}

/// SIMD-dispatched quantization
#[inline(always)]
pub fn quantize_simd(input: &[f32], output: &mut [i8], params: &QuantParams) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("avx2") {
            unsafe {
                quantize_batch_avx2(input, output, params);
            }
            return;
        }
    }
    quantize_batch(input, output, params);
}

/// SIMD-dispatched dequantization
#[inline(always)]
pub fn dequantize_simd(input: &[i8], output: &mut [f32], params: &QuantParams) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("avx2") {
            unsafe {
                dequantize_batch_avx2(input, output, params);
            }
            return;
        }
    }
    dequantize_batch(input, output, params);
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_symmetric_quantization() {
        let params = QuantParams::symmetric(-1.0, 1.0);

        let q = params.quantize(0.5);
        let dq = params.dequantize(q);

        // Should round-trip with small error
        assert!((0.5 - dq).abs() < 0.02);
    }

    #[test]
    fn test_asymmetric_quantization() {
        let params = QuantParams::asymmetric(0.0, 1.0);

        let q = params.quantize(0.5);
        let dq = params.dequantize(q);

        assert!((0.5 - dq).abs() < 0.02);
    }

    #[test]
    fn test_pi_anti_resonance() {
        use std::f32::consts::PI;
        let anti_res = pi_constants::anti_resonance(8);
        assert!(anti_res > 0.0);
        assert!(anti_res < 0.001);

        // Check it's π-derived
        let expected = (PI - 3.0) / 256.0;
        assert!((anti_res - expected).abs() < 1e-10);
    }

    #[test]
    fn test_quantized_tensor_roundtrip() {
        let data = vec![0.1, 0.2, 0.3, 0.4, -0.1, -0.2, -0.3, -0.4];
        let shape = vec![2, 4];

        let quantized = QuantizedTensor::from_float_symmetric(&data, &shape);
        let dequantized = quantized.dequantize();

        // Check all values round-trip within tolerance
        for (original, recovered) in data.iter().zip(dequantized.iter()) {
            assert!((original - recovered).abs() < 0.02);
        }
    }

    #[test]
    fn test_per_channel_quantization() {
        // 2 output channels, 2 input channels, 3x3 kernel
        let weights: Vec<f32> = (0..36).map(|i| (i as f32 - 18.0) * 0.1).collect();

        let quantized = QuantizedTensor::from_weights_per_channel(&weights, 2, 2, 3, 3);
        let dequantized = quantized.dequantize();

        // Per-channel should have better accuracy than per-tensor for diverse channels
        let max_error: f32 = weights
            .iter()
            .zip(dequantized.iter())
            .map(|(a, b)| (a - b).abs())
            .fold(0.0f32, |a, b| a.max(b));

        assert!(max_error < 0.05);
    }

    #[test]
    fn test_batch_quantize() {
        let input = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8];
        let mut output = vec![0i8; 8];
        let params = QuantParams::symmetric(-1.0, 1.0);

        quantize_batch(&input, &mut output, &params);

        // All values should be non-zero and in valid range
        for &q in &output {
            assert!(q >= -128 && q <= 127);
        }
    }

    #[test]
    fn test_batch_dequantize() {
        let input = vec![10i8, 20, 30, 40, -10, -20, -30, -40];
        let mut output = vec![0.0f32; 8];
        let params = QuantParams::symmetric(-1.0, 1.0);

        dequantize_batch(&input, &mut output, &params);

        // Positive quantized values should give positive floats
        assert!(output[0] > 0.0);
        assert!(output[4] < 0.0);
    }

    #[test]
    fn test_simd_dispatch() {
        let input = vec![0.1f32; 16];
        let mut output = vec![0i8; 16];
        let params = QuantParams::symmetric(-1.0, 1.0);

        quantize_simd(&input, &mut output, &params);

        // All should be same value
        let first = output[0];
        for &q in &output {
            assert_eq!(q, first);
        }
    }
}