clickhouse_driver_cthrs/
lib.rs

1#![no_std]
2#![allow(arithmetic_overflow)]
3#![allow(clippy::many_single_char_names)]
4#[cfg(test)]
5extern crate clickhouse_driver_cth;
6#[cfg(test)]
7extern crate std;
8
9use core::mem::size_of;
10use core::ptr::read_unaligned;
11
12#[derive(Debug, PartialEq)]
13pub struct Pair(pub u64, pub u64);
14
15/// Compare Hash with first 16 byte of Clickhouse Packet Header
16/// @note it's work only with little endian system only
17impl PartialEq<&[u8]> for Pair {
18    fn eq(&self, other: &&[u8]) -> bool {
19        other.len() == 16 && (self.0 == fetch64(*other)) && (self.1 == fetch64(&other[8..]))
20    }
21}
22
23const K0: u64 = 0xc3a5c85c97cb3127;
24const K1: u64 = 0xb492b66fbe98f273;
25const K2: u64 = 0x9ae16a3b2f90404f;
26const K3: u64 = 0xc949d7c7509e6557;
27
28// #[inline(always)]
29// fn rotate(val: u64, shift: u64) -> u64 {
30//     if shift == 0 {
31//         val
32//     } else {
33//         (val >> shift) | (val << (64 - shift))
34//     }
35// }
36
37/// The same as rotate but `shift` must not be eq 0
38#[inline(always)]
39fn rotate_least(val: u64, shift: u64) -> u64 {
40    (val >> shift) | (val << (64 - shift))
41}
42
43#[inline(always)]
44fn shift_mix(val: u64) -> u64 {
45    val ^ (val >> 47)
46}
47
48#[cfg(target_endian = "little")]
49#[allow(clippy::cast_ptr_alignment)]
50#[inline]
51pub fn fetch64(src: &[u8]) -> u64 {
52    debug_assert!(src.len() >= size_of::<u64>());
53    let ptr = src.as_ptr() as *const u64;
54    unsafe { read_unaligned(ptr) }
55}
56
57#[cfg(target_endian = "little")]
58#[allow(clippy::cast_ptr_alignment)]
59#[inline]
60fn fetch32(src: &[u8]) -> u32 {
61    debug_assert!(src.len() >= size_of::<u32>());
62    let ptr = src.as_ptr() as *const u32;
63    unsafe { read_unaligned(ptr) }
64}
65
66#[cfg(not(target_endian = "little"))]
67#[allow(clippy::cast_ptr_alignment)]
68#[inline]
69fn fetch64(src: &[u8]) -> u64 {
70    debug_assert!(src.len() >= mem::size_of::<u64>());
71    let ptr = src.as_ptr() as *const u64;
72    let src = unsafe { read_unaligned(ptr) };
73    src.swap_bytes()
74}
75
76#[cfg(not(target_endian = "little"))]
77#[allow(clippy::cast_ptr_alignment)]
78#[inline]
79fn fetch32(src: &[u8]) -> u32 {
80    debug_assert!(src.len() >= size_of::<u32>());
81    let ptr = src.as_ptr() as *const u32;
82    let src = unsafe { read_unaligned(ptr) };
83    src.swap_bytes()
84}
85
86fn hash_len0to16(src: &[u8]) -> u64 {
87    if src.len() > 8 {
88        let a: u64 = fetch64(src);
89        let b: u64 = fetch64(&src[src.len() - 8..]);
90        b ^ hash_len16(
91            a,
92            rotate_least(b.wrapping_add(src.len() as u64), src.len() as u64),
93        )
94    } else if src.len() >= 4 {
95        let a = fetch32(src) as u64;
96        hash_len16(
97            (a << 3).wrapping_add(src.len() as u64),
98            fetch32(&src[src.len() - 4..]) as u64,
99        )
100    } else if !src.is_empty() {
101        let a: u8 = src[0];
102        let b: u8 = src[src.len() >> 1];
103        let c: u8 = src[src.len() - 1];
104        let y: u64 = (a as u64).wrapping_add((b as u64) << 8);
105        let z: u64 = (src.len() as u64).wrapping_add((c as u64) << 2);
106        shift_mix(y.wrapping_mul(K2) ^ z.wrapping_mul(K3)).wrapping_mul(K2)
107    } else {
108        K2
109    }
110}
111
112fn citymurmur(mut src: &[u8], seed: Pair) -> Pair {
113    let mut a: u64 = seed.0;
114    let mut b: u64 = seed.1;
115    let mut c: u64;
116    let mut d: u64;
117
118    if src.len() <= 16 {
119        a = shift_mix(a.wrapping_mul(K1)).wrapping_mul(K1);
120        c = b.wrapping_mul(K1).wrapping_add(hash_len0to16(src));
121        d = if src.len() >= 8 { fetch64(src) } else { c };
122        d = shift_mix(a.wrapping_add(d));
123    } else {
124        c = hash_len16(fetch64(&src[src.len() - 8..]).wrapping_add(K1), a);
125        d = hash_len16(
126            b.wrapping_add(src.len() as u64),
127            c.wrapping_add(fetch64(&src[src.len() - 16..])),
128        );
129        a = a.wrapping_add(d);
130        loop {
131            a ^= shift_mix(fetch64(src).wrapping_mul(K1)).wrapping_mul(K1);
132            a = a.wrapping_mul(K1);
133            b ^= a;
134            c ^= shift_mix(fetch64(&src[8..]).wrapping_mul(K1)).wrapping_mul(K1);
135            c = c.wrapping_mul(K1);
136            d ^= c;
137            src = &src[16..];
138            if src.len() <= 16 {
139                break;
140            }
141        }
142    }
143
144    a = hash_len16(a, c);
145    b = hash_len16(d, b);
146    Pair(a ^ b, hash_len16(b, a))
147}
148
149const KMUL: u64 = 0x9ddfea08eb382d69;
150
151#[inline(always)]
152fn hash_128to64(x: Pair) -> u64 {
153    let mut a: u64 = (x.0 ^ x.1).wrapping_mul(KMUL);
154    a = shift_mix(a);
155    let mut b: u64 = (x.1 ^ a).wrapping_mul(KMUL);
156    b = shift_mix(b);
157    b.wrapping_mul(KMUL)
158}
159
160#[inline]
161fn hash_len16(u: u64, v: u64) -> u64 {
162    hash_128to64(Pair(u, v))
163}
164
165#[inline(always)]
166#[allow(non_snake_case)]
167fn _weakHashLen32WithSeeds(w: u64, x: u64, y: u64, z: u64, mut a: u64, mut b: u64) -> Pair {
168    a = a.wrapping_add(w);
169    b = rotate_least(b.wrapping_add(a).wrapping_add(z), 21);
170    let c = a;
171    a = a.wrapping_add(x).wrapping_add(y);
172    b = b.wrapping_add(rotate_least(a, 44));
173    Pair(a.wrapping_add(z), b.wrapping_add(c))
174}
175
176fn weak_hash_len32with_seeds(src: &[u8], a: u64, b: u64) -> Pair {
177    _weakHashLen32WithSeeds(
178        fetch64(src),
179        fetch64(&src[8..]),
180        fetch64(&src[16..]),
181        fetch64(&src[24..]),
182        a,
183        b,
184    )
185}
186
187fn cityhash128withseed(mut src: &[u8], seed: Pair) -> Pair {
188    // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
189    // v, w, x, y, and z.
190    let mut x: u64 = seed.0;
191    let mut y: u64 = seed.1;
192
193    let mut z: u64 = K1.wrapping_mul(src.len() as u64);
194    let t: u64 = K1
195        .wrapping_mul(rotate_least(y ^ K1, 49))
196        .wrapping_add(fetch64(src));
197
198    let mut v = Pair(
199        t,
200        K1.wrapping_mul(rotate_least(t, 42))
201            .wrapping_add(fetch64(&src[8..])),
202    );
203    let mut w = Pair(
204        K1.wrapping_mul(rotate_least(y.wrapping_add(z), 35))
205            .wrapping_add(x),
206        K1.wrapping_mul(rotate_least(x.wrapping_add(fetch64(&src[88..])), 53)),
207    );
208
209    // This is the same inner loop as CityHash64(), manually unrolled.
210    loop {
211        x = K1.wrapping_mul(rotate_least(
212            x.wrapping_add(y)
213                .wrapping_add(v.0)
214                .wrapping_add(fetch64(&src[16..])),
215            37,
216        ));
217        y = K1.wrapping_mul(rotate_least(
218            y.wrapping_add(v.1).wrapping_add(fetch64(&src[48..])),
219            42,
220        ));
221        x ^= w.1;
222        y ^= v.0;
223        z = rotate_least(z ^ w.0, 33);
224        v = weak_hash_len32with_seeds(src, K1.wrapping_mul(v.1), x.wrapping_add(w.0));
225        w = weak_hash_len32with_seeds(&src[32..], z.wrapping_add(w.1), y);
226        core::mem::swap(&mut z, &mut x);
227
228        //next 64 byte block
229        src = &src[64..];
230
231        x = K1.wrapping_mul(rotate_least(
232            x.wrapping_add(y)
233                .wrapping_add(v.0)
234                .wrapping_add(fetch64(&src[16..])),
235            37,
236        ));
237        y = K1.wrapping_mul(rotate_least(
238            y.wrapping_add(v.1).wrapping_add(fetch64(&src[48..])),
239            42,
240        ));
241        x ^= w.1;
242        y ^= v.0;
243        z = rotate_least(z ^ w.0, 33);
244        v = weak_hash_len32with_seeds(src, K1.wrapping_mul(v.1), x.wrapping_add(w.0));
245        w = weak_hash_len32with_seeds(&src[32..], z.wrapping_add(w.1), y);
246        core::mem::swap(&mut z, &mut x);
247        // next 64 bytes
248        if src.len() < (128 + 64) {
249            break;
250        }
251        src = &src[64..];
252    }
253    y = y.wrapping_add(K0.wrapping_mul(rotate_least(w.0, 37)).wrapping_add(z));
254    x = x.wrapping_add(K0.wrapping_mul(rotate_least(v.0.wrapping_add(z), 49)));
255    // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
256    while src.len() > 64 {
257        y = K0
258            .wrapping_mul(rotate_least(y.wrapping_sub(x), 42))
259            .wrapping_add(v.1);
260        w.0 = w.0.wrapping_add(fetch64(&src[src.len() - 16..]));
261        x = K0.wrapping_mul(rotate_least(x, 49)).wrapping_add(w.0);
262        w.0 = w.0.wrapping_add(v.0);
263        v = weak_hash_len32with_seeds(&src[src.len() - 32..], v.0, v.1);
264        src = &src[0..src.len() - 32];
265    }
266    // At this point our 48 bytes of state should contain more than
267    // enough information for a strong 128-bit hash.  We use two
268    // different 48-byte-to-8-byte hashes to get a 16-byte final result.
269    x = hash_len16(x, v.0);
270    y = hash_len16(y, w.0);
271
272    Pair(
273        hash_len16(x.wrapping_add(v.1), w.1).wrapping_add(y),
274        hash_len16(x.wrapping_add(w.1), y.wrapping_add(v.1)),
275    )
276}
277
278#[inline]
279pub fn city_hash_128(src: &[u8]) -> Pair {
280    if src.len() >= 144 {
281        cityhash128withseed(&src[16..], Pair(fetch64(&src[..]) ^ K3, fetch64(&src[8..])))
282    } else if src.len() >= 16 {
283        citymurmur(&src[16..], Pair(fetch64(&src[..]) ^ K3, fetch64(&src[8..])))
284    } else if src.len() >= 8 {
285        citymurmur(
286            &[],
287            Pair(
288                fetch64(&src[..]) ^ (K0.wrapping_mul(src.len() as u64)),
289                fetch64(&src[src.len() - 8..]) ^ K1,
290            ),
291        )
292    } else {
293        citymurmur(src, Pair(K0, K1))
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use clickhouse_driver_cth::{_CityHash128, _CityMurmur, c_char, Hash128};
301    use std::vec::Vec;
302
303    fn city_hash_ref(source: &[u8]) -> Pair {
304        let h = unsafe { _CityHash128(source.as_ptr() as *const c_char, source.len()) };
305        Pair(h.0, h.1)
306    }
307
308    fn city_murmur_ref(source: &[u8], seed: Pair) -> Pair {
309        let h = unsafe {
310            _CityMurmur(
311                source.as_ptr() as *const c_char,
312                source.len(),
313                Hash128(seed.0, seed.1),
314            )
315        };
316        Pair(h.0, h.1)
317    }
318
319    // fn hash_len_0to16_ref(source: &[u8]) -> u64 {
320    //     unsafe { _HashLen0to16(source.as_ptr() as *const c_char, source.len()) }
321    // }
322
323    // #[test]
324    // fn test_hash_len_0to16() {
325    //     let src = b"";
326    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
327    //
328    //     let src = b"4444";
329    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
330    //
331    //     let src = b"999999999";
332    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
333    //
334    //     let src = b"1234567890123456";
335    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
336    //
337    //     let src = b"12";
338    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
339    //
340    //     let src = b"abcdef";
341    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
342    //
343    //     let src = b"0123456789abcdef";
344    //     assert_eq!(hash_len_0to16_ref(src), hash_len0to16(src));
345    // }
346
347    #[test]
348    fn test_cityhash_unaligned_load() {
349        let src = [1u8, 0, 0, 1, 0, 0, 0, 0xFF, 0x1F, 0x1F, 0, 0];
350        assert_eq!(fetch64(&src[..]), 0xFF00000001000001_u64);
351    }
352
353    // #[test]
354    // fn test_cityhash_hash16() {
355    //     let a: u64 = unsafe { testHashLen16(0xFF, 0xFF) };
356    //     let b: u64 = hash_len16(0xFF, 0xFF);
357    //     assert_eq!(a, b);
358    //
359    //     let a: u64 = unsafe { testHashLen16(0x1F, 0xFF) };
360    //     let b: u64 = hash_len16(0x1F, 0xFF);
361    //     assert_eq!(a, b);
362    //
363    //     let a: u64 = unsafe { testHashLen16(0x00, 0x7A) };
364    //     let b: u64 = hash_len16(0x00, 0x7A);
365    //     assert_eq!(a, b);
366    //
367    //     let a: u64 = unsafe { testHashLen16(0x00FF12A6, 0x7AF8375E) };
368    //     let b: u64 = hash_len16(0x00FF12A6, 0x7AF8375E);
369    //     assert_eq!(a, b);
370    // }
371
372    #[test]
373    fn test_citymurmur() {
374        assert_eq!(
375            city_murmur_ref(b"", Pair(K0, K1)),
376            citymurmur(b"", Pair(K0, K1))
377        );
378        assert_eq!(
379            city_murmur_ref(b"0123456789", Pair(K0, K1)),
380            citymurmur(b"0123456789", Pair(K0, K1))
381        );
382        assert_eq!(
383            city_murmur_ref(b"0123456789abcdef", Pair(K0, K1)),
384            citymurmur(b"0123456789abcdef", Pair(K0, K1))
385        );
386        let src = b"0123456789012345678901234567890123456789012345678901234567891234";
387        assert_eq!(
388            city_murmur_ref(src, Pair(K0, K1)),
389            citymurmur(src, Pair(K0, K1))
390        );
391    }
392
393    #[test]
394    fn test_hash_128() {
395        const MAX_SIZE: u32 = 1024 * 10;
396        const ITER_COUNT: u8 = 5;
397        use rand::Rng;
398        for s in 8..MAX_SIZE {
399            let mut b: Vec<u8> = Vec::with_capacity(s as usize);
400            unsafe {
401                b.set_len(s as usize);
402            }
403            for _ in 0..ITER_COUNT {
404                rand::thread_rng().fill(&mut b[..]);
405                assert_eq!(city_hash_ref(b.as_ref()), city_hash_128(b.as_ref()));
406            }
407        }
408    }
409}