odbc_api/buffers/bin_column.rs
1use crate::{
2 DataType, Error,
3 buffers::{Indicator, columnar::Resize},
4 columnar_bulk_inserter::BoundInputSlice,
5 error::TooLargeBufferSize,
6 handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
7};
8
9use log::trace;
10use odbc_sys::{CDataType, NULL_DATA};
11use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
12
13/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
14/// variable amount of bytes up to a maximum length. Since elements of this type have variable
15/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
16/// Therefore this buffer type is used for variable-sized binary data, whether it is nullable or
17/// not.
18#[derive(Debug)]
19pub struct BinColumn {
20 /// Maximum element length.
21 max_len: usize,
22 /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
23 /// n-th elment at `n * max_len`.
24 values: Vec<u8>,
25 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
26 /// with the same index. Please note that this value may be larger than `max_len` if the value
27 /// has been truncated.
28 indicators: Vec<isize>,
29}
30
31impl BinColumn {
32 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
33 /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
34 /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
35 /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
36 /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
37 /// new.
38 pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
39 let len = element_size * batch_size;
40 let mut values = Vec::new();
41 values
42 .try_reserve_exact(len)
43 .map_err(|_| TooLargeBufferSize {
44 num_elements: batch_size,
45 element_size,
46 })?;
47 values.resize(len, 0);
48 Ok(BinColumn {
49 max_len: element_size,
50 values,
51 indicators: vec![0; batch_size],
52 })
53 }
54
55 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
56 /// have a maximum length of `max_len`.
57 pub fn new(batch_size: usize, element_size: usize) -> Self {
58 let len = element_size * batch_size;
59 let mut values = Vec::new();
60 values.reserve_exact(len);
61 values.resize(len, 0);
62 BinColumn {
63 max_len: element_size,
64 values,
65 indicators: vec![0; batch_size],
66 }
67 }
68
69 /// Return the value for the given row index.
70 ///
71 /// The column buffer does not know how many elements were in the last row group, and therefore
72 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
73 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
74 /// equal to the maximum number of elements in the buffer.
75 pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
76 self.content_length_at(row_index).map(|length| {
77 let offset = row_index * self.max_len;
78 &self.values[offset..offset + length]
79 })
80 }
81
82 /// Indicator value at the specified position. Useful to detect truncation of data.
83 ///
84 /// The column buffer does not know how many elements were in the last row group, and therefore
85 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
86 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
87 /// equal to the maximum number of elements in the buffer.
88 pub fn indicator_at(&self, row_index: usize) -> Indicator {
89 Indicator::from_isize(self.indicators[row_index])
90 }
91
92 /// Length of value at the specified position. This is different from an indicator as it refers
93 /// to the length of the value in the buffer, not to the length of the value in the datasource.
94 /// The two things are different for truncated values.
95 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
96 match self.indicator_at(row_index) {
97 Indicator::Null => None,
98 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
99 Indicator::NoTotal => Some(self.max_len),
100 Indicator::Length(length) => {
101 let length = min(self.max_len, length);
102 Some(length)
103 }
104 }
105 }
106
107 /// `Some` if any value is truncated in the range [0, num_rows).
108 ///
109 /// After fetching data we may want to know if any value has been truncated due to the buffer
110 /// not being able to hold elements of that size. This method checks the indicator buffer
111 /// element wise and reports one indicator which indicates a size large than the maximum element
112 /// size, if it exits.
113 pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
114 self.indicators
115 .iter()
116 .copied()
117 .take(num_rows)
118 .find_map(|indicator| {
119 let indicator = Indicator::from_isize(indicator);
120 indicator.is_truncated(self.max_len).then_some(indicator)
121 })
122 }
123
124 /// Changes the maximum element length the buffer can hold. This operation is useful if you find
125 /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
126 ///
127 /// # Parameters
128 ///
129 /// * `new_max_len`: New maximum string length without terminating zero.
130 pub fn set_max_len(&mut self, new_max_len: usize) {
131 let batch_size = self.indicators.len();
132 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
133 let new_values = vec![0u8; new_max_len * batch_size];
134 // Set all indicators to NULL
135 self.fill_null(0, batch_size);
136 self.values = new_values;
137 self.max_len = new_max_len;
138 }
139
140 /// Maximum length of elements in bytes.
141 pub fn max_len(&self) -> usize {
142 self.max_len
143 }
144
145 /// View of the first `num_rows` values of a binary column.
146 ///
147 /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
148 /// column buffer does not know how many elements were in the last row group, and therefore can
149 /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
150 /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
151 /// to the maximum number of elements in the buffer.
152 pub fn view(&self, num_rows: usize) -> BinColumnView<'_> {
153 BinColumnView {
154 num_rows,
155 col: self,
156 }
157 }
158
159 /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
160 /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
161 /// element length.
162 pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
163 if let Some(input) = input {
164 self.indicators[index] = input.len().try_into().unwrap();
165 if input.len() > self.max_len {
166 panic!(
167 "Tried to insert a value into a binary buffer which is larger than the maximum \
168 allowed element length for the buffer."
169 );
170 }
171 let start = self.max_len * index;
172 let end = start + input.len();
173 let buf = &mut self.values[start..end];
174 buf.copy_from_slice(input);
175 } else {
176 self.indicators[index] = NULL_DATA;
177 }
178 }
179
180 /// Fills the column with NULL, between From and To
181 pub fn fill_null(&mut self, from: usize, to: usize) {
182 for index in from..to {
183 self.indicators[index] = NULL_DATA;
184 }
185 }
186
187 /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
188 /// you find an unexpected large input during insertion.
189 ///
190 /// This is however costly, as not only does the new buffer have to be allocated, but all values
191 /// have to copied from the old to the new buffer.
192 ///
193 /// This method could also be used to reduce the maximum length, which would truncate values in
194 /// the process.
195 ///
196 /// This method does not adjust indicator buffers as these might hold values larger than the
197 /// maximum length.
198 ///
199 /// # Parameters
200 ///
201 /// * `new_max_len`: New maximum element length in bytes.
202 /// * `num_rows`: Number of valid rows currently stored in this buffer.
203 pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
204 #[cfg(not(feature = "structured_logging"))]
205 trace!(
206 "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
207 num_rows, self.max_len, new_max_len
208 );
209 #[cfg(feature = "structured_logging")]
210 trace!(
211 target: "odbc_api",
212 num_rows = num_rows,
213 old_max_len = self.max_len,
214 new_max_len = new_max_len;
215 "Binary column buffer resized"
216 );
217
218 let batch_size = self.indicators.len();
219 // Allocate a new buffer large enough to hold a batch of elements with maximum length.
220 let mut new_values = vec![0; new_max_len * batch_size];
221 // Copy values from old to new buffer.
222 let max_copy_length = min(self.max_len, new_max_len);
223 for ((&indicator, old_value), new_value) in self
224 .indicators
225 .iter()
226 .zip(self.values.chunks_exact_mut(self.max_len))
227 .zip(new_values.chunks_exact_mut(new_max_len))
228 .take(num_rows)
229 {
230 match Indicator::from_isize(indicator) {
231 Indicator::Null => (),
232 Indicator::NoTotal => {
233 // There is no good choice here in case we are expanding the buffer. Since
234 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
235 // be padded with 0. I currently cannot think of any use case there it would
236 // matter.
237 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
238 }
239 Indicator::Length(num_bytes_len) => {
240 let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
241 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
242 }
243 }
244 }
245 self.values = new_values;
246 self.max_len = new_max_len;
247 }
248
249 /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
250 /// length should the input be too large.
251 ///
252 /// # Parameters
253 ///
254 /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
255 /// currently in the buffer.
256 /// * `bytes`: Value to store.
257 pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
258 if let Some(bytes) = bytes {
259 if bytes.len() > self.max_len {
260 let new_max_len = (bytes.len() as f64 * 1.2) as usize;
261 self.resize_max_element_length(new_max_len, index)
262 }
263
264 let offset = index * self.max_len;
265 self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
266 // And of course set the indicator correctly.
267 self.indicators[index] = bytes.len().try_into().unwrap();
268 } else {
269 self.indicators[index] = NULL_DATA;
270 }
271 }
272
273 /// Maximum number of elements this buffer can hold.
274 pub fn capacity(&self) -> usize {
275 self.indicators.len()
276 }
277}
278
279unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
280 type SliceMut = BinColumnSliceMut<'a>;
281
282 unsafe fn as_view_mut(
283 &'a mut self,
284 parameter_index: u16,
285 stmt: StatementRef<'a>,
286 ) -> Self::SliceMut {
287 BinColumnSliceMut {
288 column: self,
289 stmt,
290 parameter_index,
291 }
292 }
293}
294
295/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
296/// values.
297pub struct BinColumnSliceMut<'a> {
298 column: &'a mut BinColumn,
299 // Needed to rebind the column in case of reallocation
300 stmt: StatementRef<'a>,
301 // Also needed to rebind the column in case of reallocation
302 parameter_index: u16,
303}
304
305impl BinColumnSliceMut<'_> {
306 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
307 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
308 /// allowed element length. `element` must be specified without the terminating zero.
309 pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
310 self.column.set_value(row_index, element)
311 }
312
313 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
314 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
315 /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
316 /// one. This makes this an extremly expensive operation.
317 pub fn ensure_max_element_length(
318 &mut self,
319 element_length: usize,
320 num_rows_to_copy: usize,
321 ) -> Result<(), Error> {
322 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
323 // in order to hold it. This invalidates the pointers previously bound to the statement. So
324 // we rebind them.
325 if element_length > self.column.max_len() {
326 self.column
327 .resize_max_element_length(element_length, num_rows_to_copy);
328 unsafe {
329 self.stmt
330 .bind_input_parameter(self.parameter_index, self.column)
331 .into_result(&self.stmt)?
332 }
333 }
334 Ok(())
335 }
336}
337
338#[derive(Debug, Clone, Copy)]
339pub struct BinColumnView<'c> {
340 num_rows: usize,
341 col: &'c BinColumn,
342}
343
344impl<'c> BinColumnView<'c> {
345 /// The number of valid elements in the text column.
346 pub fn len(&self) -> usize {
347 self.num_rows
348 }
349
350 /// True if, and only if there are no valid rows in the column buffer.
351 pub fn is_empty(&self) -> bool {
352 self.num_rows == 0
353 }
354
355 /// Slice of text at the specified row index without terminating zero.
356 pub fn get(&self, index: usize) -> Option<&'c [u8]> {
357 self.col.value_at(index)
358 }
359
360 /// Iterator over the valid elements of the text buffer
361 pub fn iter(&self) -> BinColumnIt<'c> {
362 BinColumnIt {
363 pos: 0,
364 num_rows: self.num_rows,
365 col: self.col,
366 }
367 }
368
369 /// Finds an indicator larger than max element in the range [0, num_rows).
370 ///
371 /// After fetching data we may want to know if any value has been truncated due to the buffer
372 /// not being able to hold elements of that size. This method checks the indicator buffer
373 /// element wise.
374 pub fn has_truncated_values(&self) -> Option<Indicator> {
375 self.col.has_truncated_values(self.num_rows)
376 }
377}
378
379/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
380#[derive(Debug)]
381pub struct BinColumnIt<'c> {
382 pos: usize,
383 num_rows: usize,
384 col: &'c BinColumn,
385}
386
387impl<'c> Iterator for BinColumnIt<'c> {
388 type Item = Option<&'c [u8]>;
389
390 fn next(&mut self) -> Option<Self::Item> {
391 if self.pos == self.num_rows {
392 None
393 } else {
394 let ret = Some(self.col.value_at(self.pos));
395 self.pos += 1;
396 ret
397 }
398 }
399
400 fn size_hint(&self) -> (usize, Option<usize>) {
401 let len = self.num_rows - self.pos;
402 (len, Some(len))
403 }
404}
405
406impl ExactSizeIterator for BinColumnIt<'_> {}
407
408unsafe impl CData for BinColumn {
409 fn cdata_type(&self) -> CDataType {
410 CDataType::Binary
411 }
412
413 fn indicator_ptr(&self) -> *const isize {
414 self.indicators.as_ptr()
415 }
416
417 fn value_ptr(&self) -> *const c_void {
418 self.values.as_ptr() as *const c_void
419 }
420
421 fn buffer_length(&self) -> isize {
422 self.max_len.try_into().unwrap()
423 }
424}
425
426impl HasDataType for BinColumn {
427 fn data_type(&self) -> DataType {
428 DataType::Varbinary {
429 length: NonZeroUsize::new(self.max_len),
430 }
431 }
432}
433
434unsafe impl CDataMut for BinColumn {
435 fn mut_indicator_ptr(&mut self) -> *mut isize {
436 self.indicators.as_mut_ptr()
437 }
438
439 fn mut_value_ptr(&mut self) -> *mut c_void {
440 self.values.as_mut_ptr() as *mut c_void
441 }
442}
443
444impl Resize for BinColumn {
445 fn resize(&mut self, new_capacity: usize) {
446 self.values.resize(new_capacity * self.max_len, 0);
447 self.indicators.resize(new_capacity, NULL_DATA);
448 }
449}
450
451#[cfg(test)]
452mod test {
453 use crate::{buffers::columnar::Resize, error::TooLargeBufferSize};
454
455 use super::BinColumn;
456
457 #[test]
458 #[ignore = "On windows this tests does cause containerized linux and WSL to allocate all \
459 memory instead of triggering a failed allocation."]
460 fn allocating_too_big_a_binary_column() {
461 let two_gib = 2_147_483_648;
462 let result = BinColumn::try_new(10_000, two_gib);
463 let error = result.unwrap_err();
464 assert!(matches!(
465 error,
466 TooLargeBufferSize {
467 num_elements: 10_000,
468 element_size: 2_147_483_648
469 }
470 ))
471 }
472
473 #[test]
474 fn resize_binary_column_buffer() {
475 // Given a binary column with 2 elements
476 let mut column = BinColumn::new(2, 10);
477 column.set_value(0, Some(b"Hello"));
478 column.set_value(1, Some(b"World"));
479
480 // When resizing the column to 3 elements
481 column.resize(3);
482
483 // Then
484 // the max element size is unchanged
485 assert_eq!(column.max_len(), 10);
486 // the values are still there
487 assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
488 assert_eq!(column.value_at(1), Some(b"World".as_slice()));
489 // the third element is None
490 assert_eq!(column.value_at(2), None);
491 }
492}