odbc_api/buffers/bin_column.rs
1use super::{ColumnBuffer, Indicator, Resize, Slice};
2
3use crate::{
4 DataType, Error,
5 columnar_bulk_inserter::BoundInputSlice,
6 error::TooLargeBufferSize,
7 handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
8};
9
10use log::trace;
11use odbc_sys::{CDataType, NULL_DATA};
12use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
13
14/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
15/// variable amount of bytes up to a maximum length. Since elements of this type have variable
16/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
17/// Therefore this buffer type is used for variable-sized binary data, whether it is nullable or
18/// not.
19#[derive(Debug)]
20pub struct BinColumn {
21 /// Maximum element length.
22 max_len: usize,
23 /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
24 /// n-th elment at `n * max_len`.
25 values: Vec<u8>,
26 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
27 /// with the same index. Please note that this value may be larger than `max_len` if the value
28 /// has been truncated.
29 indicators: Vec<isize>,
30}
31
32impl BinColumn {
33 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
34 /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
35 /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
36 /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
37 /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
38 /// new.
39 pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
40 let len = element_size * batch_size;
41 let mut values = Vec::new();
42 values
43 .try_reserve_exact(len)
44 .map_err(|_| TooLargeBufferSize {
45 num_elements: batch_size,
46 element_size,
47 })?;
48 values.resize(len, 0);
49 Ok(BinColumn {
50 max_len: element_size,
51 values,
52 indicators: vec![0; batch_size],
53 })
54 }
55
56 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
57 /// have a maximum length of `max_len`.
58 pub fn new(batch_size: usize, element_size: usize) -> Self {
59 let len = element_size * batch_size;
60 let mut values = Vec::new();
61 values.reserve_exact(len);
62 values.resize(len, 0);
63 BinColumn {
64 max_len: element_size,
65 values,
66 indicators: vec![0; batch_size],
67 }
68 }
69
70 /// Return the value for the given row index.
71 ///
72 /// The column buffer does not know how many elements were in the last row group, and therefore
73 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
74 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
75 /// equal to the maximum number of elements in the buffer.
76 pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
77 self.content_length_at(row_index).map(|length| {
78 let offset = row_index * self.max_len;
79 &self.values[offset..offset + length]
80 })
81 }
82
83 /// Indicator value at the specified position. Useful to detect truncation of data.
84 ///
85 /// The column buffer does not know how many elements were in the last row group, and therefore
86 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
87 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
88 /// equal to the maximum number of elements in the buffer.
89 pub fn indicator_at(&self, row_index: usize) -> Indicator {
90 Indicator::from_isize(self.indicators[row_index])
91 }
92
93 /// Length of value at the specified position. This is different from an indicator as it refers
94 /// to the length of the value in the buffer, not to the length of the value in the datasource.
95 /// The two things are different for truncated values.
96 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
97 match self.indicator_at(row_index) {
98 Indicator::Null => None,
99 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
100 Indicator::NoTotal => Some(self.max_len),
101 Indicator::Length(length) => {
102 let length = min(self.max_len, length);
103 Some(length)
104 }
105 }
106 }
107
108 /// Changes the maximum element length the buffer can hold. This operation is useful if you find
109 /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
110 ///
111 /// # Parameters
112 ///
113 /// * `new_max_len`: New maximum string length without terminating zero.
114 pub fn set_max_len(&mut self, new_max_len: usize) {
115 let batch_size = self.indicators.len();
116 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
117 let new_values = vec![0u8; new_max_len * batch_size];
118 // Set all indicators to NULL
119 self.fill_null(0, batch_size);
120 self.values = new_values;
121 self.max_len = new_max_len;
122 }
123
124 /// Maximum length of elements in bytes.
125 pub fn max_len(&self) -> usize {
126 self.max_len
127 }
128
129 /// View of the first `num_rows` values of a binary column.
130 ///
131 /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
132 /// column buffer does not know how many elements were in the last row group, and therefore can
133 /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
134 /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
135 /// to the maximum number of elements in the buffer.
136 pub fn view(&self, num_rows: usize) -> BinColumnSlice<'_> {
137 BinColumnSlice {
138 num_rows,
139 col: self,
140 }
141 }
142
143 /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
144 /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
145 /// element length.
146 pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
147 if let Some(input) = input {
148 self.indicators[index] = input.len().try_into().unwrap();
149 if input.len() > self.max_len {
150 panic!(
151 "Tried to insert a value into a binary buffer which is larger than the maximum \
152 allowed element length for the buffer."
153 );
154 }
155 let start = self.max_len * index;
156 let end = start + input.len();
157 let buf = &mut self.values[start..end];
158 buf.copy_from_slice(input);
159 } else {
160 self.indicators[index] = NULL_DATA;
161 }
162 }
163
164 /// Fills the column with NULL, between From and To
165 pub fn fill_null(&mut self, from: usize, to: usize) {
166 for index in from..to {
167 self.indicators[index] = NULL_DATA;
168 }
169 }
170
171 /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
172 /// you find an unexpected large input during insertion.
173 ///
174 /// This is however costly, as not only does the new buffer have to be allocated, but all values
175 /// have to copied from the old to the new buffer.
176 ///
177 /// This method could also be used to reduce the maximum length, which would truncate values in
178 /// the process.
179 ///
180 /// This method does not adjust indicator buffers as these might hold values larger than the
181 /// maximum length.
182 ///
183 /// # Parameters
184 ///
185 /// * `new_max_len`: New maximum element length in bytes.
186 /// * `num_rows`: Number of valid rows currently stored in this buffer.
187 pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
188 #[cfg(not(feature = "structured_logging"))]
189 trace!(
190 "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
191 num_rows, self.max_len, new_max_len
192 );
193 #[cfg(feature = "structured_logging")]
194 trace!(
195 target: "odbc_api",
196 num_rows = num_rows,
197 old_max_len = self.max_len,
198 new_max_len = new_max_len;
199 "Binary column buffer resized"
200 );
201
202 let batch_size = self.indicators.len();
203 // Allocate a new buffer large enough to hold a batch of elements with maximum length.
204 let mut new_values = vec![0; new_max_len * batch_size];
205 // Copy values from old to new buffer.
206 let max_copy_length = min(self.max_len, new_max_len);
207 for ((&indicator, old_value), new_value) in self
208 .indicators
209 .iter()
210 .zip(self.values.chunks_exact_mut(self.max_len))
211 .zip(new_values.chunks_exact_mut(new_max_len))
212 .take(num_rows)
213 {
214 match Indicator::from_isize(indicator) {
215 Indicator::Null => (),
216 Indicator::NoTotal => {
217 // There is no good choice here in case we are expanding the buffer. Since
218 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
219 // be padded with 0. I currently cannot think of any use case there it would
220 // matter.
221 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
222 }
223 Indicator::Length(num_bytes_len) => {
224 let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
225 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
226 }
227 }
228 }
229 self.values = new_values;
230 self.max_len = new_max_len;
231 }
232
233 /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
234 /// length should the input be too large.
235 ///
236 /// # Parameters
237 ///
238 /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
239 /// currently in the buffer.
240 /// * `bytes`: Value to store.
241 pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
242 if let Some(bytes) = bytes {
243 if bytes.len() > self.max_len {
244 let new_max_len = (bytes.len() as f64 * 1.2) as usize;
245 self.resize_max_element_length(new_max_len, index)
246 }
247
248 let offset = index * self.max_len;
249 self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
250 // And of course set the indicator correctly.
251 self.indicators[index] = bytes.len().try_into().unwrap();
252 } else {
253 self.indicators[index] = NULL_DATA;
254 }
255 }
256}
257
258unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
259 type SliceMut = BinColumnSliceMut<'a>;
260
261 unsafe fn as_view_mut(
262 &'a mut self,
263 parameter_index: u16,
264 stmt: StatementRef<'a>,
265 ) -> Self::SliceMut {
266 BinColumnSliceMut {
267 column: self,
268 stmt,
269 parameter_index,
270 }
271 }
272}
273
274/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
275/// values.
276pub struct BinColumnSliceMut<'a> {
277 column: &'a mut BinColumn,
278 // Needed to rebind the column in case of reallocation
279 stmt: StatementRef<'a>,
280 // Also needed to rebind the column in case of reallocation
281 parameter_index: u16,
282}
283
284impl BinColumnSliceMut<'_> {
285 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
286 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
287 /// allowed element length. `element` must be specified without the terminating zero.
288 pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
289 self.column.set_value(row_index, element)
290 }
291
292 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
293 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
294 /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
295 /// one. This makes this an extremly expensive operation.
296 pub fn ensure_max_element_length(
297 &mut self,
298 element_length: usize,
299 num_rows_to_copy: usize,
300 ) -> Result<(), Error> {
301 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
302 // in order to hold it. This invalidates the pointers previously bound to the statement. So
303 // we rebind them.
304 if element_length > self.column.max_len() {
305 self.column
306 .resize_max_element_length(element_length, num_rows_to_copy);
307 unsafe {
308 self.stmt
309 .bind_input_parameter(self.parameter_index, self.column)
310 .into_result(&self.stmt)?
311 }
312 }
313 Ok(())
314 }
315}
316
317#[derive(Debug, Clone, Copy)]
318pub struct BinColumnSlice<'c> {
319 num_rows: usize,
320 col: &'c BinColumn,
321}
322
323impl<'c> BinColumnSlice<'c> {
324 /// The number of valid elements in the text column.
325 pub fn len(&self) -> usize {
326 self.num_rows
327 }
328
329 /// True if, and only if there are no valid rows in the column buffer.
330 pub fn is_empty(&self) -> bool {
331 self.num_rows == 0
332 }
333
334 /// Slice of text at the specified row index without terminating zero.
335 pub fn get(&self, index: usize) -> Option<&'c [u8]> {
336 self.col.value_at(index)
337 }
338
339 /// Iterator over the valid elements of the text buffer
340 pub fn iter(&self) -> BinColumnIt<'c> {
341 BinColumnIt {
342 pos: 0,
343 num_rows: self.num_rows,
344 col: self.col,
345 }
346 }
347
348 /// Finds an indicator larger than max element in the range [0, num_rows).
349 ///
350 /// After fetching data we may want to know if any value has been truncated due to the buffer
351 /// not being able to hold elements of that size. This method checks the indicator buffer
352 /// element wise.
353 pub fn has_truncated_values(&self) -> Option<Indicator> {
354 self.col.has_truncated_values(self.num_rows)
355 }
356}
357
358unsafe impl Slice for BinColumn {
359 type Slice<'a> = BinColumnSlice<'a>;
360
361 fn slice(&self, valid_rows: usize) -> Self::Slice<'_> {
362 BinColumnSlice {
363 num_rows: valid_rows,
364 col: self,
365 }
366 }
367}
368
369/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
370#[derive(Debug)]
371pub struct BinColumnIt<'c> {
372 pos: usize,
373 num_rows: usize,
374 col: &'c BinColumn,
375}
376
377impl<'c> Iterator for BinColumnIt<'c> {
378 type Item = Option<&'c [u8]>;
379
380 fn next(&mut self) -> Option<Self::Item> {
381 if self.pos == self.num_rows {
382 None
383 } else {
384 let ret = Some(self.col.value_at(self.pos));
385 self.pos += 1;
386 ret
387 }
388 }
389
390 fn size_hint(&self) -> (usize, Option<usize>) {
391 let len = self.num_rows - self.pos;
392 (len, Some(len))
393 }
394}
395
396impl ExactSizeIterator for BinColumnIt<'_> {}
397
398unsafe impl CData for BinColumn {
399 fn cdata_type(&self) -> CDataType {
400 CDataType::Binary
401 }
402
403 fn indicator_ptr(&self) -> *const isize {
404 self.indicators.as_ptr()
405 }
406
407 fn value_ptr(&self) -> *const c_void {
408 self.values.as_ptr() as *const c_void
409 }
410
411 fn buffer_length(&self) -> isize {
412 self.max_len.try_into().unwrap()
413 }
414}
415
416impl HasDataType for BinColumn {
417 fn data_type(&self) -> DataType {
418 DataType::Varbinary {
419 length: NonZeroUsize::new(self.max_len),
420 }
421 }
422}
423
424unsafe impl CDataMut for BinColumn {
425 fn mut_indicator_ptr(&mut self) -> *mut isize {
426 self.indicators.as_mut_ptr()
427 }
428
429 fn mut_value_ptr(&mut self) -> *mut c_void {
430 self.values.as_mut_ptr() as *mut c_void
431 }
432}
433
434impl Resize for BinColumn {
435 fn resize(&mut self, new_capacity: usize) {
436 self.values.resize(new_capacity * self.max_len, 0);
437 self.indicators.resize(new_capacity, NULL_DATA);
438 }
439}
440
441unsafe impl ColumnBuffer for BinColumn {
442 fn capacity(&self) -> usize {
443 self.indicators.len()
444 }
445
446 fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
447 self.indicators
448 .iter()
449 .copied()
450 .take(num_rows)
451 .find_map(|indicator| {
452 let indicator = Indicator::from_isize(indicator);
453 indicator.is_truncated(self.max_len).then_some(indicator)
454 })
455 }
456}
457
458#[cfg(test)]
459mod test {
460 use crate::error::TooLargeBufferSize;
461
462 use super::{BinColumn, Resize};
463
464 #[test]
465 #[ignore = "On windows this tests does cause containerized linux and WSL to allocate all \
466 memory instead of triggering a failed allocation."]
467 fn allocating_too_big_a_binary_column() {
468 let two_gib = 2_147_483_648;
469 let result = BinColumn::try_new(10_000, two_gib);
470 let error = result.unwrap_err();
471 assert!(matches!(
472 error,
473 TooLargeBufferSize {
474 num_elements: 10_000,
475 element_size: 2_147_483_648
476 }
477 ))
478 }
479
480 #[test]
481 fn resize_binary_column_buffer() {
482 // Given a binary column with 2 elements
483 let mut column = BinColumn::new(2, 10);
484 column.set_value(0, Some(b"Hello"));
485 column.set_value(1, Some(b"World"));
486
487 // When resizing the column to 3 elements
488 column.resize(3);
489
490 // Then
491 // the max element size is unchanged
492 assert_eq!(column.max_len(), 10);
493 // the values are still there
494 assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
495 assert_eq!(column.value_at(1), Some(b"World".as_slice()));
496 // the third element is None
497 assert_eq!(column.value_at(2), None);
498 }
499}