1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#![allow(clippy::cast_ptr_alignment)]
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub fn hex_string(src: &[u8]) -> Result<String, usize> {
let mut buffer = vec![0; src.len() * 2];
hex_to(src, &mut buffer).map(|_| unsafe { String::from_utf8_unchecked(buffer) })
}
pub fn hex_to(src: &[u8], dst: &mut [u8]) -> Result<(), usize> {
let len = src.len().checked_mul(2).unwrap();
if dst.len() < len {
return Err(len);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe { hex_encode_avx2(src, dst) };
return Ok(());
}
if is_x86_feature_detected!("sse4.1") {
unsafe { hex_encode_sse41(src, dst) };
return Ok(());
}
}
hex_encode_fallback(src, dst);
Ok(())
}
#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_avx2(mut src: &[u8], dst: &mut [u8]) {
let ascii_zero = _mm256_set1_epi8(b'0' as i8);
let nines = _mm256_set1_epi8(9);
let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm256_set1_epi8(0xf);
let mut i = 0_isize;
while src.len() >= 32 {
let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
let masked1 = _mm256_and_si256(invec, and4bits);
let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
let res1 = _mm256_unpacklo_epi8(masked2, masked1);
let res2 = _mm256_unpackhi_epi8(masked2, masked1);
let base = dst.as_mut_ptr().offset(i * 2);
let base1 = base.offset(0) as *mut _;
let base2 = base.offset(16) as *mut _;
let base3 = base.offset(32) as *mut _;
let base4 = base.offset(48) as *mut _;
_mm256_storeu2_m128i(base3, base1, res1);
_mm256_storeu2_m128i(base4, base2, res2);
src = &src[32..];
i += 32;
}
let i = i as usize;
hex_encode_sse41(src, &mut dst[i * 2..]);
}
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9);
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm_set1_epi8(0xf);
let mut i = 0_isize;
while src.len() >= 16 {
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
let masked1 = _mm_and_si128(invec, and4bits);
let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
let res1 = _mm_unpacklo_epi8(masked2, masked1);
let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
src = &src[16..];
i += 16;
}
let i = i as usize;
hex_encode_fallback(src, &mut dst[i * 2..]);
}
fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
TABLE[byte as usize]
}
for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
slots[0] = hex((*byte >> 4) & 0xf);
slots[1] = hex(*byte & 0xf);
}
}