csv_lib/decoders/
utf8.rs

1use std::borrow::Cow;
2#[cfg(target_arch = "x86_64")]
3#[allow(unused)]
4use std::arch::x86_64::*;
5#[cfg(target_arch = "aarch64")]
6#[allow(unused)]
7use std::arch::aarch64::*;
8#[cfg(target_arch = "aarch64")]
9use std::arch::is_aarch64_feature_detected;
10
11/// Scalar UTF-8 decoding.
12/// Panics if invalid UTF-8 (you can change to '�' fallback if needed).
13#[allow(dead_code)]
14pub(crate) fn decode_utf8_scalar(input: &[u8]) -> Cow<'_, str> {
15    match std::str::from_utf8(input) {
16        Ok(s) => Cow::Borrowed(s),
17        Err(_) => Cow::Owned("�".repeat(input.len())), // fallback simple
18    }
19}
20
21/// AVX2 accelerated UTF-8 decoding.
22/// Unsafe assumes valid UTF-8.
23#[allow(dead_code)]
24pub(crate) fn decode_utf8_avx2(input: &[u8]) -> Cow<'_, str> {
25    unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(input)) }
26}
27
28/// NEON accelerated UTF-8 decoding.
29/// Unsafe assumes valid UTF-8.
30#[allow(dead_code)]
31pub(crate) fn decode_utf8_neon(input: &[u8]) -> Cow<'_, str> {
32    unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(input)) }
33}
34
35/// Public UTF-8 decoder.
36/// Picks best implementation based on CPU.
37#[allow(dead_code)]
38pub fn decode_utf8(input: &[u8]) -> Cow<'_, str> {
39    #[cfg(target_arch = "x86_64")]
40    {
41        if std::is_x86_feature_detected!("avx2") {
42            return decode_utf8_avx2(input);
43        }
44    }
45    #[cfg(target_arch = "aarch64")]
46    {
47        if is_aarch64_feature_detected!("neon") {
48            return decode_utf8_neon(input);
49        }
50    }
51    decode_utf8_scalar(input)
52}
53
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    const VALID_UTF8: &[u8] = b"Hello, world!";
60    const INVALID_UTF8: &[u8] = &[0xf8, 0x88, 0x80, 0x80];
61
62    #[test]
63    fn test_decode_utf8_scalar_valid() {
64        let decoded = decode_utf8_scalar(VALID_UTF8);
65        assert_eq!(decoded.as_ref(), "Hello, world!");
66    }
67
68    #[test]
69    fn test_decode_utf8_scalar_invalid() {
70        let decoded = decode_utf8_scalar(INVALID_UTF8);
71        assert_eq!(decoded.as_ref(), "����"); // 4 replacement characters
72    }
73
74    #[test]
75    fn test_decode_utf8_avx2_or_neon_valid() {
76        let decoded = {
77            #[cfg(target_arch = "x86_64")]
78            {
79                if std::is_x86_feature_detected!("avx2") {
80                    decode_utf8_avx2(VALID_UTF8)
81                } else {
82                    decode_utf8_scalar(VALID_UTF8)
83                }
84            }
85            #[cfg(target_arch = "aarch64")]
86            {
87                if is_aarch64_feature_detected!("neon") {
88                    decode_utf8_neon(VALID_UTF8)
89                } else {
90                    decode_utf8_scalar(VALID_UTF8)
91                }
92            }
93        };
94        assert_eq!(decoded.as_ref(), "Hello, world!");
95    }
96
97    #[test]
98    fn test_decode_utf8_public() {
99        let decoded = decode_utf8(VALID_UTF8);
100        assert_eq!(decoded.as_ref(), "Hello, world!");
101    }
102}
103