re_types_core/
arrow_string.rs

1use arrow::buffer::Buffer;
2
3/// Convenience-wrapper around an arrow [`Buffer`] that is known to contain a
4/// UTF-8 encoded string.
5///
6/// The arrow [`Buffer`] object is internally reference-counted and can be
7/// easily converted back to a `&str` referencing the underlying storage.
8/// This avoids some of the lifetime complexities that would otherwise
9/// arise from returning a `&str` directly, but is significantly more
10/// performant than doing the full allocation necessary to return a `String`.
11#[derive(Clone, Debug)]
12pub struct ArrowString(Buffer);
13
14impl Default for ArrowString {
15    #[inline]
16    fn default() -> Self {
17        Self(Buffer::from_vec::<u8>(vec![]))
18    }
19}
20
21impl re_byte_size::SizeBytes for ArrowString {
22    #[inline]
23    fn heap_size_bytes(&self) -> u64 {
24        let Self(buf) = self;
25        std::mem::size_of_val(buf.as_slice()) as _
26    }
27}
28
29impl PartialEq for ArrowString {
30    #[inline]
31    fn eq(&self, other: &Self) -> bool {
32        self.as_str() == other.as_str()
33    }
34}
35
36impl Eq for ArrowString {}
37
38impl PartialOrd for ArrowString {
39    #[inline]
40    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41        Some(self.as_str().cmp(other.as_str()))
42    }
43}
44
45impl Ord for ArrowString {
46    #[inline]
47    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
48        self.as_str().cmp(other.as_str())
49    }
50}
51
52impl std::hash::Hash for ArrowString {
53    #[inline]
54    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
55        self.as_str().hash(state);
56    }
57}
58
59impl ArrowString {
60    #[inline]
61    pub fn as_str(&self) -> &str {
62        std::str::from_utf8(self.0.as_ref()).unwrap_or("INVALID UTF-8")
63    }
64
65    /// The raw UTF8 bytes.
66    #[inline]
67    pub fn as_bytes(&self) -> &[u8] {
68        self.0.as_slice()
69    }
70
71    #[inline]
72    pub fn into_arrow_buffer(self) -> arrow::buffer::Buffer {
73        self.0
74    }
75}
76
77impl From<arrow::buffer::Buffer> for ArrowString {
78    #[inline]
79    fn from(buf: arrow::buffer::Buffer) -> Self {
80        Self(buf)
81    }
82}
83
84impl From<String> for ArrowString {
85    #[inline]
86    fn from(value: String) -> Self {
87        Self(value.as_bytes().to_vec().into())
88    }
89}
90
91impl From<&str> for ArrowString {
92    #[inline]
93    fn from(value: &str) -> Self {
94        Self(value.as_bytes().to_vec().into())
95    }
96}
97
98impl std::fmt::Display for ArrowString {
99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100        self.as_str().fmt(f)
101    }
102}
103
104impl AsRef<str> for ArrowString {
105    #[inline]
106    fn as_ref(&self) -> &str {
107        self.as_str()
108    }
109}
110
111impl std::borrow::Borrow<str> for ArrowString {
112    #[inline]
113    fn borrow(&self) -> &str {
114        self.as_str()
115    }
116}
117
118#[test]
119fn borrow_hash_is_self_hash() {
120    use std::borrow::Borrow as _;
121    use std::hash::{Hash as _, Hasher as _};
122
123    let s = ArrowString::from("hello world");
124
125    let self_hash = {
126        let mut hasher = std::collections::hash_map::DefaultHasher::new();
127        s.hash(&mut hasher);
128        hasher.finish()
129    };
130
131    let borrowed_hash = {
132        let mut hasher = std::collections::hash_map::DefaultHasher::new();
133        let s: &str = s.borrow();
134        s.hash(&mut hasher);
135        hasher.finish()
136    };
137
138    assert_eq!(self_hash, borrowed_hash);
139}
140
141impl std::ops::Deref for ArrowString {
142    type Target = str;
143
144    #[inline]
145    fn deref(&self) -> &str {
146        self.as_str()
147    }
148}