Skip to main content

typed_arrow/bridge/
strings.rs

1//! `Utf8` and `LargeUtf8` string bindings.
2
3#[cfg(feature = "views")]
4use arrow_array::Array;
5use arrow_array::{
6    LargeStringArray, StringArray,
7    builder::{LargeStringBuilder, StringBuilder},
8};
9use arrow_schema::DataType;
10
11use super::ArrowBinding;
12#[cfg(feature = "views")]
13use super::ArrowBindingView;
14
15/// Default estimated bytes per string value for buffer pre-allocation.
16const DEFAULT_STRING_BYTES: usize = 16;
17
18// Utf8/String
19impl ArrowBinding for String {
20    type Builder = StringBuilder;
21    type Array = StringArray;
22
23    #[inline]
24    fn data_type() -> DataType {
25        DataType::Utf8
26    }
27
28    #[inline]
29    fn new_builder(capacity: usize) -> Self::Builder {
30        StringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
31    }
32
33    #[inline]
34    fn estimated_bytes_per_value() -> usize {
35        DEFAULT_STRING_BYTES
36    }
37
38    #[inline]
39    fn append_value(b: &mut Self::Builder, v: &Self) {
40        b.append_value(v.as_str());
41    }
42
43    #[inline]
44    fn append_null(b: &mut Self::Builder) {
45        b.append_null();
46    }
47
48    #[inline]
49    fn finish(mut b: Self::Builder) -> Self::Array {
50        b.finish()
51    }
52}
53
54#[cfg(feature = "views")]
55impl ArrowBindingView for String {
56    type Array = StringArray;
57    type View<'a> = &'a str;
58
59    fn get_view(
60        array: &Self::Array,
61        index: usize,
62    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
63        if index >= array.len() {
64            return Err(crate::schema::ViewAccessError::OutOfBounds {
65                index,
66                len: array.len(),
67                field_name: None,
68            });
69        }
70        if array.is_null(index) {
71            return Err(crate::schema::ViewAccessError::UnexpectedNull {
72                index,
73                field_name: None,
74            });
75        }
76        Ok(array.value(index))
77    }
78}
79
80/// Wrapper denoting Arrow `LargeUtf8` values. Use when individual strings can be
81/// extremely large or when 64-bit offsets are preferred.
82#[derive(Debug, Clone)]
83pub struct LargeUtf8(String);
84
85impl LargeUtf8 {
86    /// Construct a new `LargeUtf8` from a `String`.
87    #[inline]
88    #[must_use]
89    pub fn new(value: String) -> Self {
90        Self(value)
91    }
92    /// Return the underlying string slice.
93    #[inline]
94    #[must_use]
95    pub fn as_str(&self) -> &str {
96        self.0.as_str()
97    }
98    /// Consume and return the underlying `String`.
99    #[inline]
100    #[must_use]
101    pub fn into_string(self) -> String {
102        self.0
103    }
104}
105
106impl From<String> for LargeUtf8 {
107    /// Convert a `String` into a `LargeUtf8`.
108    #[inline]
109    fn from(value: String) -> Self {
110        Self::new(value)
111    }
112}
113impl From<&str> for LargeUtf8 {
114    /// Convert a `&str` into a `LargeUtf8` by allocating a `String`.
115    #[inline]
116    fn from(s: &str) -> Self {
117        Self::new(s.to_string())
118    }
119}
120
121impl ArrowBinding for LargeUtf8 {
122    type Builder = LargeStringBuilder;
123    type Array = LargeStringArray;
124
125    #[inline]
126    fn data_type() -> DataType {
127        DataType::LargeUtf8
128    }
129
130    #[inline]
131    fn new_builder(capacity: usize) -> Self::Builder {
132        LargeStringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
133    }
134
135    #[inline]
136    fn estimated_bytes_per_value() -> usize {
137        DEFAULT_STRING_BYTES
138    }
139
140    #[inline]
141    fn append_value(b: &mut Self::Builder, v: &Self) {
142        b.append_value(v.0.as_str());
143    }
144
145    #[inline]
146    fn append_null(b: &mut Self::Builder) {
147        b.append_null();
148    }
149
150    #[inline]
151    fn finish(mut b: Self::Builder) -> Self::Array {
152        b.finish()
153    }
154}
155
156#[cfg(feature = "views")]
157impl ArrowBindingView for LargeUtf8 {
158    type Array = LargeStringArray;
159    type View<'a> = &'a str;
160
161    fn get_view(
162        array: &Self::Array,
163        index: usize,
164    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
165        if index >= array.len() {
166            return Err(crate::schema::ViewAccessError::OutOfBounds {
167                index,
168                len: array.len(),
169                field_name: None,
170            });
171        }
172        if array.is_null(index) {
173            return Err(crate::schema::ViewAccessError::UnexpectedNull {
174                index,
175                field_name: None,
176            });
177        }
178        Ok(array.value(index))
179    }
180}