taos_query/common/raw/views/
n_char_view.rs

1use std::{
2    cell::{RefCell, UnsafeCell},
3    ffi::c_void,
4    fmt::Debug,
5    rc::Rc,
6};
7
8use super::{IsColumnView, Offsets, Version};
9
10use crate::{
11    common::{layout::Layout, BorrowedValue, Ty},
12    prelude::InlinableWrite,
13    util::{InlineNChar, InlineStr},
14};
15
16use bytes::Bytes;
17use itertools::Itertools;
18
19#[derive(Debug)]
20pub struct NCharView {
21    // version: Version,
22    pub(crate) offsets: Offsets,
23    pub(crate) data: Bytes,
24    /// TDengine v3 raw block use [char] for NChar data type, it's [str] in v2 websocket block.
25    pub is_chars: UnsafeCell<bool>,
26    pub(crate) version: Version,
27    /// Layout should set as NCHAR_DECODED when raw data decoded.
28    pub(crate) layout: Rc<RefCell<Layout>>,
29}
30impl Clone for NCharView {
31    fn clone(&self) -> Self {
32        unsafe {
33            self.nchar_to_utf8();
34        }
35        Self {
36            offsets: self.offsets.clone(),
37            data: self.data.clone(),
38            is_chars: UnsafeCell::new(false),
39            version: self.version,
40            layout: self.layout.clone(),
41        }
42    }
43}
44impl IsColumnView for NCharView {
45    fn ty(&self) -> Ty {
46        Ty::NChar
47    }
48    fn from_borrowed_value_iter<'b>(iter: impl Iterator<Item = BorrowedValue<'b>>) -> Self {
49        Self::from_iter::<String, _, _, _>(
50            iter.map(|v| v.to_str().map(|v| v.into_owned()))
51                .collect_vec(),
52        )
53    }
54}
55
56impl NCharView {
57    pub fn len(&self) -> usize {
58        self.offsets.len()
59    }
60
61    /// Check if the value at `row` index is NULL or not.
62    pub fn is_null(&self, row: usize) -> bool {
63        if row < self.len() {
64            unsafe { self.is_null_unchecked(row) }
65        } else {
66            false
67        }
68    }
69
70    /// Unsafe version for [methods.is_null]
71    pub unsafe fn is_null_unchecked(&self, row: usize) -> bool {
72        self.offsets.get_unchecked(row) < 0
73    }
74
75    #[inline]
76    pub unsafe fn nchar_to_utf8(&self) {
77        if self.version == Version::V3 && *self.is_chars.get() {
78            let mut ptr: *const u8 = std::ptr::null();
79            for offset in self.offsets.iter() {
80                if offset >= 0 {
81                    if ptr.is_null() {
82                        ptr = self.data.as_ptr().offset(offset as isize);
83                        InlineNChar::<u16>::from_ptr(self.data.as_ptr().offset(offset as isize))
84                            .into_inline_str();
85                    } else {
86                        let next = self.data.as_ptr().offset(offset as isize);
87                        if ptr != next {
88                            ptr = next;
89                            InlineNChar::<u16>::from_ptr(
90                                self.data.as_ptr().offset(offset as isize),
91                            )
92                            .into_inline_str();
93                        }
94                    }
95                }
96            }
97            *self.is_chars.get() = false;
98            self.layout.borrow_mut().with_nchar_decoded();
99        }
100    }
101
102    /// Get UTF-8 string at `row`.
103    ///
104    /// In this method, InlineNChar will directly converted to InlineStr, which means v3 raw block
105    /// will be changed in-place.
106    #[inline]
107    pub unsafe fn get_inline_str_unchecked(&self, row: usize) -> Option<&InlineStr> {
108        let offset = self.offsets.get_unchecked(row);
109        if offset >= 0 {
110            self.nchar_to_utf8();
111            //     // let me: &mut Self = unsafe { std::mem::transmute(&self) };
112            //     let is_chars = &mut *self.is_chars.get();
113            //     *is_chars = false;
114            //     Some(
115            //         InlineNChar::<u16>::from_ptr(self.data.as_ptr().offset(*offset as isize))
116            //             .into_inline_str(),
117            //     )
118            // } else {
119            Some(InlineStr::<u16>::from_ptr(
120                self.data.as_ptr().offset(offset as isize),
121            ))
122            // }
123        } else {
124            None
125        }
126    }
127
128    #[inline]
129    pub unsafe fn get_length_unchecked(&self, row: usize) -> Option<usize> {
130        let offset = self.offsets.get_unchecked(row);
131        if offset >= 0 {
132            self.nchar_to_utf8();
133            Some(InlineStr::<u16>::from_ptr(self.data.as_ptr().offset(offset as isize)).len())
134        } else {
135            None
136        }
137    }
138
139    #[inline]
140    pub fn lengths(&self) -> Vec<Option<usize>> {
141        (0..self.len())
142            .map(|i| unsafe { self.get_length_unchecked(i) })
143            .collect_vec()
144    }
145
146    #[inline]
147    pub fn max_length(&self) -> usize {
148        (0..self.len())
149            .filter_map(|i| unsafe { self.get_length_unchecked(i) })
150            .min()
151            .unwrap_or(0)
152    }
153
154    /// Get UTF-8 string at `row`.
155    #[inline]
156    pub unsafe fn get_unchecked(&self, row: usize) -> Option<&str> {
157        self.get_inline_str_unchecked(row).map(|s| s.as_str())
158    }
159
160    pub unsafe fn get_value_unchecked(&self, row: usize) -> BorrowedValue {
161        self.get_unchecked(row)
162            .map(|s| BorrowedValue::NChar(s.into()))
163            .unwrap_or(BorrowedValue::Null(Ty::NChar))
164    }
165
166    pub unsafe fn get_raw_value_unchecked(&self, row: usize) -> (Ty, u32, *const c_void) {
167        self.nchar_to_utf8();
168        match self.get_unchecked(row) {
169            Some(s) => (Ty::NChar, s.len() as _, s.as_ptr() as _),
170            None => (Ty::NChar, 0, std::ptr::null()),
171        }
172    }
173
174    pub fn slice(&self, mut range: std::ops::Range<usize>) -> Option<Self> {
175        if range.start >= self.len() {
176            return None;
177        }
178        if range.end > self.len() {
179            range.end = self.len();
180        }
181        if range.is_empty() {
182            return None;
183        }
184        let (offsets, range) = unsafe { self.offsets.slice_unchecked(range.clone()) };
185        let range = if let Some(range) = range {
186            range.0 as usize..range.1.map(|v| v as usize).unwrap_or(self.data.len())
187        } else {
188            0..0
189        };
190        let data = self.data.slice(range);
191        Some(Self {
192            offsets,
193            data,
194            is_chars: UnsafeCell::new(false),
195            version: self.version,
196            layout: self.layout.clone(),
197        })
198    }
199
200    /// Iterator for NCharView.
201    #[inline]
202    pub fn iter(&self) -> NCharViewIter {
203        NCharViewIter { view: self, row: 0 }
204    }
205
206    /// Collection to `str`s.
207    pub fn to_vec(&self) -> Vec<Option<&str>> {
208        self.iter().collect()
209    }
210
211    /// Write column data as raw bytes.
212    pub(crate) fn write_raw_into<W: std::io::Write>(&self, mut wtr: W) -> std::io::Result<usize> {
213        // if self.layout.borrow().nchar_is_decoded() {
214        let mut offsets = Vec::new();
215        let mut bytes: Vec<u8> = Vec::new();
216        for v in self.iter() {
217            if let Some(v) = v {
218                // dbg!(v);
219                let chars = v.chars().collect_vec();
220                offsets.push(bytes.len() as i32);
221                let chars = unsafe {
222                    std::slice::from_raw_parts(
223                        chars.as_ptr() as *const u8,
224                        chars.len() * std::mem::size_of::<char>(),
225                    )
226                };
227                // dbg!(chars);
228                bytes.write_inlined_bytes::<2>(chars).unwrap();
229            } else {
230                offsets.push(-1);
231            }
232        }
233        unsafe {
234            // dbg!(&offsets);
235            let offsets_bytes = std::slice::from_raw_parts(
236                offsets.as_ptr() as *const u8,
237                offsets.len() * std::mem::size_of::<i32>(),
238            );
239            wtr.write_all(offsets_bytes)?;
240            wtr.write_all(&bytes)?;
241            Ok(offsets_bytes.len() + bytes.len())
242        }
243        // }
244        // let offsets = self.offsets.as_bytes();
245        // wtr.write_all(offsets)?;
246        // wtr.write_all(&self.data)?;
247        // Ok(offsets.len() + self.data.len())
248    }
249
250    pub fn from_iter<
251        S: AsRef<str>,
252        T: Into<Option<S>>,
253        I: ExactSizeIterator<Item = T>,
254        V: IntoIterator<Item = T, IntoIter = I>,
255    >(
256        iter: V,
257    ) -> Self {
258        let mut offsets = Vec::new();
259        let mut data = Vec::new();
260
261        for i in iter.into_iter().map(|v| v.into()) {
262            if let Some(s) = i {
263                let s: &str = s.as_ref();
264                offsets.push(data.len() as i32);
265                data.write_inlined_str::<2>(s).unwrap();
266            } else {
267                offsets.push(-1);
268            }
269        }
270        let offsets_bytes = unsafe {
271            Vec::from_raw_parts(
272                offsets.as_mut_ptr() as *mut u8,
273                offsets.len() * 4,
274                offsets.capacity() * 4,
275            )
276        };
277        std::mem::forget(offsets);
278        NCharView {
279            offsets: Offsets(offsets_bytes.into()),
280            data: data.into(),
281            is_chars: UnsafeCell::new(false),
282            version: Version::V2,
283            layout: Rc::new(RefCell::new({
284                let mut layout = Layout::default();
285                layout.with_nchar_decoded();
286                layout
287            })),
288        }
289    }
290
291    pub fn concat(&self, rhs: &Self) -> Self {
292        Self::from_iter::<&str, _, _, _>(self.iter().chain(rhs.iter()).collect_vec())
293    }
294}
295
296pub struct NCharViewIter<'a> {
297    view: &'a NCharView,
298    row: usize,
299}
300
301impl<'a> Iterator for NCharViewIter<'a> {
302    type Item = Option<&'a str>;
303
304    fn next(&mut self) -> Option<Self::Item> {
305        if self.row < self.view.len() {
306            let row = self.row;
307            self.row += 1;
308            Some(unsafe { self.view.get_unchecked(row) })
309        } else {
310            None
311        }
312    }
313
314    #[inline]
315    fn size_hint(&self) -> (usize, Option<usize>) {
316        if self.row < self.view.len() {
317            let len = self.view.len() - self.row;
318            (len, Some(len))
319        } else {
320            (0, Some(0))
321        }
322    }
323}
324
325impl<'a> ExactSizeIterator for NCharViewIter<'a> {
326    fn len(&self) -> usize {
327        self.view.len() - self.row
328    }
329}
330
331#[test]
332fn test_slice() {
333    let data = [None, Some(""), Some("abc"), Some("中文"), None, None, Some("a loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooog string")];
334    let view = NCharView::from_iter::<&str, _, _, _>(data);
335    let slice = view.slice(0..0);
336    assert!(slice.is_none());
337    let slice = view.slice(100..1000);
338    assert!(slice.is_none());
339
340    for start in 0..data.len() {
341        let end = start + 1;
342        for end in end..data.len() {
343            let slice = view.slice(start..end).unwrap();
344            assert_eq!(slice.to_vec().as_slice(), &data[start..end]);
345        }
346    }
347}