typed_arrow/bridge/
strings.rs

1//! `Utf8` and `LargeUtf8` string bindings.
2
3use arrow_array::{
4    builder::{LargeStringBuilder, StringBuilder},
5    Array, LargeStringArray, StringArray,
6};
7use arrow_schema::DataType;
8
9use super::ArrowBinding;
10#[cfg(feature = "views")]
11use super::ArrowBindingView;
12
13// Utf8/String
14impl ArrowBinding for String {
15    type Builder = StringBuilder;
16    type Array = StringArray;
17    fn data_type() -> DataType {
18        DataType::Utf8
19    }
20    fn new_builder(capacity: usize) -> Self::Builder {
21        StringBuilder::with_capacity(capacity, 0)
22    }
23    fn append_value(b: &mut Self::Builder, v: &Self) {
24        b.append_value(v.as_str());
25    }
26    fn append_null(b: &mut Self::Builder) {
27        b.append_null();
28    }
29    fn finish(mut b: Self::Builder) -> Self::Array {
30        b.finish()
31    }
32}
33
34#[cfg(feature = "views")]
35impl ArrowBindingView for String {
36    type Array = StringArray;
37    type View<'a> = &'a str;
38
39    fn get_view(
40        array: &Self::Array,
41        index: usize,
42    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
43        if index >= array.len() {
44            return Err(crate::schema::ViewAccessError::OutOfBounds {
45                index,
46                len: array.len(),
47                field_name: None,
48            });
49        }
50        if array.is_null(index) {
51            return Err(crate::schema::ViewAccessError::UnexpectedNull {
52                index,
53                field_name: None,
54            });
55        }
56        Ok(array.value(index))
57    }
58}
59
60/// Wrapper denoting Arrow `LargeUtf8` values. Use when individual strings can be
61/// extremely large or when 64-bit offsets are preferred.
62pub struct LargeUtf8(String);
63
64impl LargeUtf8 {
65    /// Construct a new `LargeUtf8` from a `String`.
66    #[inline]
67    #[must_use]
68    pub fn new(value: String) -> Self {
69        Self(value)
70    }
71    /// Return the underlying string slice.
72    #[inline]
73    #[must_use]
74    pub fn as_str(&self) -> &str {
75        self.0.as_str()
76    }
77    /// Consume and return the underlying `String`.
78    #[inline]
79    #[must_use]
80    pub fn into_string(self) -> String {
81        self.0
82    }
83}
84
85impl From<String> for LargeUtf8 {
86    /// Convert a `String` into a `LargeUtf8`.
87    #[inline]
88    fn from(value: String) -> Self {
89        Self::new(value)
90    }
91}
92impl From<&str> for LargeUtf8 {
93    /// Convert a `&str` into a `LargeUtf8` by allocating a `String`.
94    #[inline]
95    fn from(s: &str) -> Self {
96        Self::new(s.to_string())
97    }
98}
99
100impl ArrowBinding for LargeUtf8 {
101    type Builder = LargeStringBuilder;
102    type Array = LargeStringArray;
103    fn data_type() -> DataType {
104        DataType::LargeUtf8
105    }
106    fn new_builder(capacity: usize) -> Self::Builder {
107        LargeStringBuilder::with_capacity(capacity, 0)
108    }
109    fn append_value(b: &mut Self::Builder, v: &Self) {
110        b.append_value(v.0.as_str());
111    }
112    fn append_null(b: &mut Self::Builder) {
113        b.append_null();
114    }
115    fn finish(mut b: Self::Builder) -> Self::Array {
116        b.finish()
117    }
118}
119
120#[cfg(feature = "views")]
121impl ArrowBindingView for LargeUtf8 {
122    type Array = LargeStringArray;
123    type View<'a> = &'a str;
124
125    fn get_view(
126        array: &Self::Array,
127        index: usize,
128    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
129        if index >= array.len() {
130            return Err(crate::schema::ViewAccessError::OutOfBounds {
131                index,
132                len: array.len(),
133                field_name: None,
134            });
135        }
136        if array.is_null(index) {
137            return Err(crate::schema::ViewAccessError::UnexpectedNull {
138                index,
139                field_name: None,
140            });
141        }
142        Ok(array.value(index))
143    }
144}