fory_core/
util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::types::TypeId;
19use chrono::NaiveDate;
20use std::cell::UnsafeCell;
21use std::ops::{Deref, DerefMut};
22use std::sync::atomic::{AtomicBool, Ordering};
23use std::{ptr, thread};
24
25pub const EPOCH: NaiveDate = match NaiveDate::from_ymd_opt(1970, 1, 1) {
26    None => {
27        panic!("Unreachable code")
28    }
29    Some(epoch) => epoch,
30};
31
32// Swapping the high 8 bits and the low 8 bits of a 16-bit value
33fn swap_endian(value: u16) -> u16 {
34    value.rotate_right(8)
35}
36
37pub fn to_utf8(utf16: &[u16], is_little_endian: bool) -> Result<Vec<u8>, String> {
38    // Pre-allocating capacity to avoid dynamic resizing
39    // Longest case: 1 u16 to 3 u8
40    let mut utf8_bytes: Vec<u8> = Vec::with_capacity(utf16.len() * 3);
41    // For unsafe write to Vec
42    let ptr = utf8_bytes.as_mut_ptr();
43    let mut offset = 0;
44    let mut iter = utf16.iter();
45    while let Some(&wc) = iter.next() {
46        // Using big endian in this conversion
47        let wc = if is_little_endian {
48            swap_endian(wc)
49        } else {
50            wc
51        };
52        match wc {
53            code_point if code_point < 0x80 => {
54                // 1-byte UTF-8
55                // [0000|0000|0ccc|cccc] => [0ccc|cccc]
56                unsafe {
57                    ptr.add(offset).write(code_point as u8);
58                }
59                offset += 1;
60            }
61            code_point if code_point < 0x800 => {
62                // 2-byte UTF-8
63                // [0000|0bbb|bbcc|cccc] => [110|bbbbb], [10|cccccc]
64                let bytes = [
65                    ((code_point >> 6) & 0b1_1111) as u8 | 0b1100_0000,
66                    (code_point & 0b11_1111) as u8 | 0b1000_0000,
67                ];
68                unsafe {
69                    ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.add(offset), 2);
70                }
71                offset += 2;
72            }
73            wc1 if (0xd800..=0xdbff).contains(&wc1) => {
74                // Surrogate pair (4-byte UTF-8)
75                // Need extra u16, 2 u16 -> 4 u8
76                if let Some(&wc2) = iter.next() {
77                    let wc2 = if is_little_endian {
78                        swap_endian(wc2)
79                    } else {
80                        wc2
81                    };
82                    if !(0xdc00..=0xdfff).contains(&wc2) {
83                        return Err("Invalid UTF-16 string: wrong surrogate pair".to_string());
84                    }
85                    // utf16 to unicode
86                    let code_point =
87                        ((((wc1 as u32) - 0xd800) << 10) | ((wc2 as u32) - 0xdc00)) + 0x10000;
88                    // 11110??? 10?????? 10?????? 10??????
89                    // Need 21 bit suffix of code_point
90                    let bytes = [
91                        ((code_point >> 18) & 0b111) as u8 | 0b1111_0000,
92                        ((code_point >> 12) & 0b11_1111) as u8 | 0b1000_0000,
93                        ((code_point >> 6) & 0b11_1111) as u8 | 0b1000_0000,
94                        (code_point & 0b11_1111) as u8 | 0b1000_0000,
95                    ];
96                    unsafe {
97                        ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.add(offset), 4);
98                    }
99                    offset += 4;
100                } else {
101                    return Err("Invalid UTF-16 string: missing surrogate pair".to_string());
102                }
103            }
104            _ => {
105                // 3-byte UTF-8, 1 u16 -> 3 u8
106                // [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10|bbbbbb], [10|cccccc]
107                // Need 16 bit suffix of wc, as same as wc itself
108                let bytes = [
109                    ((wc >> 12) | 0b1110_0000) as u8,
110                    ((wc >> 6) & 0b11_1111) as u8 | 0b1000_0000,
111                    (wc & 0b11_1111) as u8 | 0b1000_0000,
112                ];
113                unsafe {
114                    ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.add(offset), 3);
115                }
116                offset += 3;
117            }
118        }
119    }
120    unsafe {
121        // As ptr.write don't change the length
122        utf8_bytes.set_len(offset);
123    }
124    Ok(utf8_bytes)
125}
126
127pub fn get_ext_actual_type_id(type_id: u32, register_by_name: bool) -> u32 {
128    (type_id << 8)
129        + if register_by_name {
130            TypeId::NAMED_EXT as u32
131        } else {
132            TypeId::EXT as u32
133        }
134}
135
136pub struct Spinlock<T> {
137    data: UnsafeCell<T>,
138    flag: AtomicBool,
139}
140
141unsafe impl<T: Send> Send for Spinlock<T> {}
142unsafe impl<T: Sync> Sync for Spinlock<T> {}
143
144impl<T> Spinlock<T> {
145    pub fn new(data: T) -> Self {
146        Spinlock {
147            data: UnsafeCell::new(data),
148            flag: AtomicBool::new(false),
149        }
150    }
151
152    pub fn lock(&self) -> SpinlockGuard<'_, T> {
153        let mut spins = 0;
154        while self
155            .flag
156            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
157            .is_err()
158        {
159            // Spin for a few iterations
160            if spins < 10 {
161                std::hint::spin_loop();
162                spins += 1;
163            } else {
164                // Then yield to the scheduler
165                thread::yield_now();
166                spins = 0; // reset spin counter
167            }
168        }
169        SpinlockGuard { lock: self }
170    }
171
172    fn unlock(&self) {
173        self.flag.store(false, Ordering::Release);
174    }
175}
176
177#[allow(clippy::needless_lifetimes)]
178pub struct SpinlockGuard<'a, T> {
179    lock: &'a Spinlock<T>,
180}
181#[allow(clippy::needless_lifetimes)]
182impl<'a, T> Drop for SpinlockGuard<'a, T> {
183    fn drop(&mut self) {
184        self.lock.unlock();
185    }
186}
187#[allow(clippy::needless_lifetimes)]
188impl<'a, T> Deref for SpinlockGuard<'a, T> {
189    type Target = T;
190    fn deref(&self) -> &Self::Target {
191        unsafe { &*self.lock.data.get() }
192    }
193}
194
195#[allow(clippy::needless_lifetimes)]
196impl<'a, T> DerefMut for SpinlockGuard<'a, T> {
197    fn deref_mut(&mut self) -> &mut Self::Target {
198        unsafe { &mut *self.lock.data.get() }
199    }
200}
201
202/// Global flag to check if ENABLE_FORY_DEBUG_OUTPUT environment variable is set at compile time.
203/// Set ENABLE_FORY_DEBUG_OUTPUT=1 at compile time to enable debug output.
204pub const ENABLE_FORY_DEBUG_OUTPUT: bool = option_env!("ENABLE_FORY_DEBUG_OUTPUT").is_some();