typed_arrow/bridge/
strings.rs

1//! `Utf8` and `LargeUtf8` string bindings.
2
3#[cfg(feature = "views")]
4use arrow_array::Array;
5use arrow_array::{
6    LargeStringArray, StringArray,
7    builder::{LargeStringBuilder, StringBuilder},
8};
9use arrow_schema::DataType;
10
11use super::ArrowBinding;
12#[cfg(feature = "views")]
13use super::ArrowBindingView;
14
15/// Default estimated bytes per string value for buffer pre-allocation.
16const DEFAULT_STRING_BYTES: usize = 16;
17
18// Utf8/String
19impl ArrowBinding for String {
20    type Builder = StringBuilder;
21    type Array = StringArray;
22
23    #[inline]
24    fn data_type() -> DataType {
25        DataType::Utf8
26    }
27
28    #[inline]
29    fn new_builder(capacity: usize) -> Self::Builder {
30        StringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
31    }
32
33    #[inline]
34    fn estimated_bytes_per_value() -> usize {
35        DEFAULT_STRING_BYTES
36    }
37
38    #[inline]
39    fn append_value(b: &mut Self::Builder, v: &Self) {
40        b.append_value(v.as_str());
41    }
42
43    #[inline]
44    fn append_null(b: &mut Self::Builder) {
45        b.append_null();
46    }
47
48    #[inline]
49    fn finish(mut b: Self::Builder) -> Self::Array {
50        b.finish()
51    }
52}
53
54#[cfg(feature = "views")]
55impl ArrowBindingView for String {
56    type Array = StringArray;
57    type View<'a> = &'a str;
58
59    fn get_view(
60        array: &Self::Array,
61        index: usize,
62    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
63        if index >= array.len() {
64            return Err(crate::schema::ViewAccessError::OutOfBounds {
65                index,
66                len: array.len(),
67                field_name: None,
68            });
69        }
70        if array.is_null(index) {
71            return Err(crate::schema::ViewAccessError::UnexpectedNull {
72                index,
73                field_name: None,
74            });
75        }
76        Ok(array.value(index))
77    }
78}
79
80/// Wrapper denoting Arrow `LargeUtf8` values. Use when individual strings can be
81/// extremely large or when 64-bit offsets are preferred.
82pub struct LargeUtf8(String);
83
84impl LargeUtf8 {
85    /// Construct a new `LargeUtf8` from a `String`.
86    #[inline]
87    #[must_use]
88    pub fn new(value: String) -> Self {
89        Self(value)
90    }
91    /// Return the underlying string slice.
92    #[inline]
93    #[must_use]
94    pub fn as_str(&self) -> &str {
95        self.0.as_str()
96    }
97    /// Consume and return the underlying `String`.
98    #[inline]
99    #[must_use]
100    pub fn into_string(self) -> String {
101        self.0
102    }
103}
104
105impl From<String> for LargeUtf8 {
106    /// Convert a `String` into a `LargeUtf8`.
107    #[inline]
108    fn from(value: String) -> Self {
109        Self::new(value)
110    }
111}
112impl From<&str> for LargeUtf8 {
113    /// Convert a `&str` into a `LargeUtf8` by allocating a `String`.
114    #[inline]
115    fn from(s: &str) -> Self {
116        Self::new(s.to_string())
117    }
118}
119
120impl ArrowBinding for LargeUtf8 {
121    type Builder = LargeStringBuilder;
122    type Array = LargeStringArray;
123
124    #[inline]
125    fn data_type() -> DataType {
126        DataType::LargeUtf8
127    }
128
129    #[inline]
130    fn new_builder(capacity: usize) -> Self::Builder {
131        LargeStringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
132    }
133
134    #[inline]
135    fn estimated_bytes_per_value() -> usize {
136        DEFAULT_STRING_BYTES
137    }
138
139    #[inline]
140    fn append_value(b: &mut Self::Builder, v: &Self) {
141        b.append_value(v.0.as_str());
142    }
143
144    #[inline]
145    fn append_null(b: &mut Self::Builder) {
146        b.append_null();
147    }
148
149    #[inline]
150    fn finish(mut b: Self::Builder) -> Self::Array {
151        b.finish()
152    }
153}
154
155#[cfg(feature = "views")]
156impl ArrowBindingView for LargeUtf8 {
157    type Array = LargeStringArray;
158    type View<'a> = &'a str;
159
160    fn get_view(
161        array: &Self::Array,
162        index: usize,
163    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
164        if index >= array.len() {
165            return Err(crate::schema::ViewAccessError::OutOfBounds {
166                index,
167                len: array.len(),
168                field_name: None,
169            });
170        }
171        if array.is_null(index) {
172            return Err(crate::schema::ViewAccessError::UnexpectedNull {
173                index,
174                field_name: None,
175            });
176        }
177        Ok(array.value(index))
178    }
179}