typed_arrow/bridge/
strings.rs

1//! `Utf8` and `LargeUtf8` string bindings.
2
3use arrow_array::{
4    Array, LargeStringArray, StringArray,
5    builder::{LargeStringBuilder, StringBuilder},
6};
7use arrow_schema::DataType;
8
9use super::ArrowBinding;
10#[cfg(feature = "views")]
11use super::ArrowBindingView;
12
13/// Default estimated bytes per string value for buffer pre-allocation.
14const DEFAULT_STRING_BYTES: usize = 16;
15
16// Utf8/String
17impl ArrowBinding for String {
18    type Builder = StringBuilder;
19    type Array = StringArray;
20
21    #[inline]
22    fn data_type() -> DataType {
23        DataType::Utf8
24    }
25
26    #[inline]
27    fn new_builder(capacity: usize) -> Self::Builder {
28        StringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
29    }
30
31    #[inline]
32    fn estimated_bytes_per_value() -> usize {
33        DEFAULT_STRING_BYTES
34    }
35
36    #[inline]
37    fn append_value(b: &mut Self::Builder, v: &Self) {
38        b.append_value(v.as_str());
39    }
40
41    #[inline]
42    fn append_null(b: &mut Self::Builder) {
43        b.append_null();
44    }
45
46    #[inline]
47    fn finish(mut b: Self::Builder) -> Self::Array {
48        b.finish()
49    }
50}
51
52#[cfg(feature = "views")]
53impl ArrowBindingView for String {
54    type Array = StringArray;
55    type View<'a> = &'a str;
56
57    fn get_view(
58        array: &Self::Array,
59        index: usize,
60    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
61        if index >= array.len() {
62            return Err(crate::schema::ViewAccessError::OutOfBounds {
63                index,
64                len: array.len(),
65                field_name: None,
66            });
67        }
68        if array.is_null(index) {
69            return Err(crate::schema::ViewAccessError::UnexpectedNull {
70                index,
71                field_name: None,
72            });
73        }
74        Ok(array.value(index))
75    }
76}
77
78/// Wrapper denoting Arrow `LargeUtf8` values. Use when individual strings can be
79/// extremely large or when 64-bit offsets are preferred.
80pub struct LargeUtf8(String);
81
82impl LargeUtf8 {
83    /// Construct a new `LargeUtf8` from a `String`.
84    #[inline]
85    #[must_use]
86    pub fn new(value: String) -> Self {
87        Self(value)
88    }
89    /// Return the underlying string slice.
90    #[inline]
91    #[must_use]
92    pub fn as_str(&self) -> &str {
93        self.0.as_str()
94    }
95    /// Consume and return the underlying `String`.
96    #[inline]
97    #[must_use]
98    pub fn into_string(self) -> String {
99        self.0
100    }
101}
102
103impl From<String> for LargeUtf8 {
104    /// Convert a `String` into a `LargeUtf8`.
105    #[inline]
106    fn from(value: String) -> Self {
107        Self::new(value)
108    }
109}
110impl From<&str> for LargeUtf8 {
111    /// Convert a `&str` into a `LargeUtf8` by allocating a `String`.
112    #[inline]
113    fn from(s: &str) -> Self {
114        Self::new(s.to_string())
115    }
116}
117
118impl ArrowBinding for LargeUtf8 {
119    type Builder = LargeStringBuilder;
120    type Array = LargeStringArray;
121
122    #[inline]
123    fn data_type() -> DataType {
124        DataType::LargeUtf8
125    }
126
127    #[inline]
128    fn new_builder(capacity: usize) -> Self::Builder {
129        LargeStringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
130    }
131
132    #[inline]
133    fn estimated_bytes_per_value() -> usize {
134        DEFAULT_STRING_BYTES
135    }
136
137    #[inline]
138    fn append_value(b: &mut Self::Builder, v: &Self) {
139        b.append_value(v.0.as_str());
140    }
141
142    #[inline]
143    fn append_null(b: &mut Self::Builder) {
144        b.append_null();
145    }
146
147    #[inline]
148    fn finish(mut b: Self::Builder) -> Self::Array {
149        b.finish()
150    }
151}
152
153#[cfg(feature = "views")]
154impl ArrowBindingView for LargeUtf8 {
155    type Array = LargeStringArray;
156    type View<'a> = &'a str;
157
158    fn get_view(
159        array: &Self::Array,
160        index: usize,
161    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
162        if index >= array.len() {
163            return Err(crate::schema::ViewAccessError::OutOfBounds {
164                index,
165                len: array.len(),
166                field_name: None,
167            });
168        }
169        if array.is_null(index) {
170            return Err(crate::schema::ViewAccessError::UnexpectedNull {
171                index,
172                field_name: None,
173            });
174        }
175        Ok(array.value(index))
176    }
177}