hdbconnect_impl/types_impl/lob/
lob_writer_util.rs

1use crate::{HdbResult, impl_err};
2
3pub(crate) enum LobWriteMode {
4    //Offset(i64),
5    Append,
6    Last,
7}
8
9pub(crate) fn get_utf8_tail_len(bytes: &[u8]) -> HdbResult<usize> {
10    match bytes.last() {
11        None | Some(0..=127) => Ok(0),
12        Some(0xC0..=0xDF) => Ok(1),
13        Some(_) => {
14            let len = bytes.len();
15            for i in 0..len - 1 {
16                let index = len - 2 - i;
17                let utf8_char_start = get_utf8_char_start(&bytes[index..]);
18                if let Some(char_len) = match utf8_char_start {
19                    Utf8CharType::One => Some(1),
20                    Utf8CharType::Two => Some(2),
21                    Utf8CharType::Three => Some(3),
22                    Utf8CharType::Four => Some(4),
23                    Utf8CharType::NotAStart | Utf8CharType::Illegal | Utf8CharType::Empty => None,
24                } {
25                    return Ok(match (index + char_len).cmp(&len) {
26                        std::cmp::Ordering::Greater => len - index,
27                        std::cmp::Ordering::Equal => 0,
28                        std::cmp::Ordering::Less => len - index - char_len,
29                    });
30                }
31            }
32            Err(impl_err!("no valid utf8 cutoff point found!"))
33        }
34    }
35}
36
37enum Utf8CharType {
38    Empty,
39    Illegal,
40    NotAStart,
41    One,   // ...plain ascii
42    Two,   // ...two-byte char
43    Three, // ...three-byte char
44    Four,  // ...four-byte char
45}
46
47//   1: 0000_0000 to 0111_1111 (00 to 7F)
48//cont: 1000_0000 to 1011_1111 (80 to BF)
49//   2: 1100_0000 to 1101_1111 (C0 to DF)
50//   3: 1110_0000 to 1110_1111 (E0 to EF)
51//   4: 1111_0000 to 1111_0111 (F0 to F7)
52// ill: 1111_1000 to 1111_1111 (F8 to FF)
53fn get_utf8_char_start(bytes: &[u8]) -> Utf8CharType {
54    match bytes.len() {
55        0 => Utf8CharType::Empty,
56        _ => match bytes[0] {
57            0x00..=0x7F => Utf8CharType::One,
58            0x80..=0xBF => Utf8CharType::NotAStart,
59            0xC0..=0xDF => Utf8CharType::Two,
60            0xE0..=0xEF => Utf8CharType::Three,
61            0xF0..=0xF7 => Utf8CharType::Four,
62            _ => Utf8CharType::Illegal,
63        },
64    }
65}