Skip to main content

typed_arrow/bridge/
strings.rs

1//! `Utf8` and `LargeUtf8` string bindings.
2
3#[cfg(feature = "views")]
4use arrow_array::Array;
5use arrow_array::{
6    LargeStringArray, StringArray,
7    builder::{LargeStringBuilder, StringBuilder},
8};
9use arrow_schema::DataType;
10
11use super::ArrowBinding;
12#[cfg(feature = "views")]
13use super::ArrowBindingView;
14
15/// Default estimated bytes per string value for buffer pre-allocation.
16const DEFAULT_STRING_BYTES: usize = 16;
17
18// Utf8/String
19impl ArrowBinding for String {
20    type Builder = StringBuilder;
21    type Array = StringArray;
22
23    #[inline]
24    fn data_type() -> DataType {
25        DataType::Utf8
26    }
27
28    #[inline]
29    fn new_builder(capacity: usize) -> Self::Builder {
30        StringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
31    }
32
33    #[inline]
34    fn estimated_bytes_per_value() -> usize {
35        DEFAULT_STRING_BYTES
36    }
37
38    #[inline]
39    fn append_value(b: &mut Self::Builder, v: &Self) {
40        b.append_value(v.as_str());
41    }
42
43    #[inline]
44    fn append_null(b: &mut Self::Builder) {
45        b.append_null();
46    }
47
48    #[inline]
49    fn finish(mut b: Self::Builder) -> Self::Array {
50        b.finish()
51    }
52}
53
54#[cfg(feature = "views")]
55impl ArrowBindingView for String {
56    type Array = StringArray;
57    type View<'a> = &'a str;
58
59    fn get_view(
60        array: &Self::Array,
61        index: usize,
62    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
63        if index >= array.len() {
64            return Err(crate::schema::ViewAccessError::OutOfBounds {
65                index,
66                len: array.len(),
67                field_name: None,
68            });
69        }
70        if array.is_null(index) {
71            return Err(crate::schema::ViewAccessError::UnexpectedNull {
72                index,
73                field_name: None,
74            });
75        }
76        Ok(array.value(index))
77    }
78}
79
80/// Wrapper denoting Arrow `LargeUtf8` values. Use when individual strings can be
81/// extremely large or when 64-bit offsets are preferred.
82#[derive(Debug, Clone, PartialEq)]
83pub struct LargeUtf8(String);
84
85impl LargeUtf8 {
86    /// Construct a new `LargeUtf8` from a `String`.
87    #[inline]
88    #[must_use]
89    pub fn new(value: String) -> Self {
90        Self(value)
91    }
92    /// Return the underlying string slice.
93    #[inline]
94    #[must_use]
95    pub fn as_str(&self) -> &str {
96        self.0.as_str()
97    }
98    /// Consume and return the underlying `String`.
99    #[inline]
100    #[must_use]
101    pub fn into_string(self) -> String {
102        self.0
103    }
104}
105
106impl From<String> for LargeUtf8 {
107    /// Convert a `String` into a `LargeUtf8`.
108    #[inline]
109    fn from(value: String) -> Self {
110        Self::new(value)
111    }
112}
113impl From<&str> for LargeUtf8 {
114    /// Convert a `&str` into a `LargeUtf8` by allocating a `String`.
115    #[inline]
116    fn from(s: &str) -> Self {
117        Self::new(s.to_string())
118    }
119}
120
121// Serialize/Deserialize implementation forwards to that for String.
122#[cfg(feature = "serde")]
123impl<'de> serde::de::Deserialize<'de> for LargeUtf8 {
124    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
125    where
126        D: serde::de::Deserializer<'de>,
127    {
128        Ok(String::deserialize(deserializer)?.into())
129    }
130}
131
132#[cfg(feature = "serde")]
133impl serde::Serialize for LargeUtf8 {
134    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
135        self.0.serialize(serializer)
136    }
137}
138
139impl ArrowBinding for LargeUtf8 {
140    type Builder = LargeStringBuilder;
141    type Array = LargeStringArray;
142
143    #[inline]
144    fn data_type() -> DataType {
145        DataType::LargeUtf8
146    }
147
148    #[inline]
149    fn new_builder(capacity: usize) -> Self::Builder {
150        LargeStringBuilder::with_capacity(capacity, capacity * Self::estimated_bytes_per_value())
151    }
152
153    #[inline]
154    fn estimated_bytes_per_value() -> usize {
155        DEFAULT_STRING_BYTES
156    }
157
158    #[inline]
159    fn append_value(b: &mut Self::Builder, v: &Self) {
160        b.append_value(v.0.as_str());
161    }
162
163    #[inline]
164    fn append_null(b: &mut Self::Builder) {
165        b.append_null();
166    }
167
168    #[inline]
169    fn finish(mut b: Self::Builder) -> Self::Array {
170        b.finish()
171    }
172}
173
174#[cfg(feature = "views")]
175impl ArrowBindingView for LargeUtf8 {
176    type Array = LargeStringArray;
177    type View<'a> = &'a str;
178
179    fn get_view(
180        array: &Self::Array,
181        index: usize,
182    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
183        if index >= array.len() {
184            return Err(crate::schema::ViewAccessError::OutOfBounds {
185                index,
186                len: array.len(),
187                field_name: None,
188            });
189        }
190        if array.is_null(index) {
191            return Err(crate::schema::ViewAccessError::UnexpectedNull {
192                index,
193                field_name: None,
194            });
195        }
196        Ok(array.value(index))
197    }
198}