arrow_odbc/reader/
text.rs1use std::{char::decode_utf16, cmp::min, num::NonZeroUsize, sync::Arc};
2
3use arrow::array::{ArrayRef, StringBuilder};
4use odbc_api::{
5 DataType as OdbcDataType,
6 buffers::{AnySlice, BufferDesc},
7};
8
9use super::{ColumnFailure, MappingError, ReadStrategy};
10
11pub fn choose_text_strategy(
16 sql_type: OdbcDataType,
17 lazy_display_size: impl FnOnce() -> Result<Option<NonZeroUsize>, odbc_api::Error>,
18 max_text_size: Option<usize>,
19 trim_fixed_sized_character_strings: bool,
20 text_encoding: TextEncoding,
21) -> Result<Box<dyn ReadStrategy + Send>, ColumnFailure> {
22 let apply_buffer_limit = |len| match (len, max_text_size) {
23 (None, None) => Err(ColumnFailure::ZeroSizedColumn { sql_type }),
24 (None, Some(limit)) => Ok(limit),
25 (Some(len), None) => Ok(len),
26 (Some(len), Some(limit)) => Ok(min(len, limit)),
27 };
28 let is_fixed_sized_char = matches!(
29 sql_type,
30 OdbcDataType::Char { .. } | OdbcDataType::WChar { .. }
31 );
32 let trim = trim_fixed_sized_character_strings && is_fixed_sized_char;
33 let strategy: Box<dyn ReadStrategy + Send> = if text_encoding.use_utf16() {
34 let hex_len = sql_type
35 .utf16_len()
36 .map(Ok)
37 .or_else(|| lazy_display_size().transpose())
38 .transpose()
39 .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?;
40 let hex_len = apply_buffer_limit(hex_len.map(NonZeroUsize::get))?;
41 wide_text_strategy(hex_len, trim)
42 } else {
43 let octet_len = sql_type
44 .utf8_len()
45 .map(Ok)
46 .or_else(|| lazy_display_size().transpose())
47 .transpose()
48 .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?;
49 let octet_len = apply_buffer_limit(octet_len.map(NonZeroUsize::get))?;
50 narrow_text_strategy(octet_len, trim)
54 };
55
56 Ok(strategy)
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum TextEncoding {
62 Auto,
68 Utf8,
73 Utf16,
76}
77
78impl Default for TextEncoding {
79 fn default() -> Self {
80 Self::Auto
81 }
82}
83
84impl TextEncoding {
85 pub fn use_utf16(&self) -> bool {
86 match self {
87 Self::Auto => cfg!(target_os = "windows"),
88 Self::Utf8 => false,
89 Self::Utf16 => true,
90 }
91 }
92}
93
94fn wide_text_strategy(u16_len: usize, trim: bool) -> Box<dyn ReadStrategy + Send> {
95 Box::new(WideText::new(u16_len, trim))
96}
97
98fn narrow_text_strategy(octet_len: usize, trim: bool) -> Box<dyn ReadStrategy + Send> {
99 Box::new(NarrowText::new(octet_len, trim))
100}
101
102pub struct WideText {
106 max_str_len: usize,
108 trim: bool,
110}
111
112impl WideText {
113 pub fn new(max_str_len: usize, trim: bool) -> Self {
114 Self { max_str_len, trim }
115 }
116}
117
118impl ReadStrategy for WideText {
119 fn buffer_desc(&self) -> BufferDesc {
120 BufferDesc::WText {
121 max_str_len: self.max_str_len,
122 }
123 }
124
125 fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
126 let view = column_view.as_w_text_view().unwrap();
127 let item_capacity = view.len();
128 let data_capacity = self.max_str_len * item_capacity;
132 let mut builder = StringBuilder::with_capacity(item_capacity, data_capacity);
133 let mut buf_utf8 = String::new();
135 for value in view.iter() {
136 buf_utf8.clear();
137 let opt = if let Some(utf16) = value {
138 for c in decode_utf16(utf16.as_slice().iter().cloned()) {
139 buf_utf8.push(c.unwrap());
140 }
141 let slice = if self.trim {
142 buf_utf8.trim()
143 } else {
144 buf_utf8.as_str()
145 };
146 Some(slice)
147 } else {
148 None
149 };
150 builder.append_option(opt);
151 }
152 Ok(Arc::new(builder.finish()))
153 }
154}
155
156pub struct NarrowText {
157 max_str_len: usize,
159 trim: bool,
161}
162
163impl NarrowText {
164 pub fn new(max_str_len: usize, trim: bool) -> Self {
165 Self { max_str_len, trim }
166 }
167}
168
169impl ReadStrategy for NarrowText {
170 fn buffer_desc(&self) -> BufferDesc {
171 BufferDesc::Text {
172 max_str_len: self.max_str_len,
173 }
174 }
175
176 fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
177 let view = column_view.as_text_view().unwrap();
178 let mut builder = StringBuilder::with_capacity(view.len(), self.max_str_len * view.len());
179 for value in view.iter() {
180 builder.append_option(
181 value
182 .map(|bytes| {
183 let untrimmed =
184 std::str::from_utf8(bytes).map_err(|_| MappingError::InvalidUtf8 {
185 lossy_value: String::from_utf8_lossy(bytes).into_owned(),
186 })?;
187 Ok(if self.trim {
188 untrimmed.trim()
189 } else {
190 untrimmed
191 })
192 })
193 .transpose()?,
194 );
195 }
196 Ok(Arc::new(builder.finish()))
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use odbc_api::buffers::{AnySlice, ColumnBuffer, TextColumn};
203
204 use crate::reader::{MappingError, ReadStrategy as _};
205
206 use super::NarrowText;
207
208 #[test]
209 fn must_return_error_for_invalid_utf8() {
210 let mut column = TextColumn::new(1, 10);
212 column.set_value(0, Some(&[b'H', b'e', b'l', b'l', b'o', 0xc3]));
213 let column_view = AnySlice::Text(column.view(1));
214
215 let strategy = NarrowText::new(5, false);
217 let result = strategy.fill_arrow_array(column_view);
218
219 let error = result.unwrap_err();
221 let MappingError::InvalidUtf8 { lossy_value } = error else {
222 panic!("Not an InvalidUtf8 error")
223 };
224 assert_eq!(lossy_value, "Hello�");
225 }
226}