odbc_api/buffers/
bin_column.rs

1use crate::{
2    DataType, Error,
3    buffers::{Indicator, columnar::Resize},
4    columnar_bulk_inserter::BoundInputSlice,
5    error::TooLargeBufferSize,
6    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
7};
8
9use log::debug;
10use odbc_sys::{CDataType, NULL_DATA};
11use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
12
13/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
14/// variable amount of bytes up to a maximum length. Since elements of this type have variable
15/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
16/// Therefore this buffer type is used for variable sized binary data whether it is nullable or not.
17#[derive(Debug)]
18pub struct BinColumn {
19    /// Maximum element length.
20    max_len: usize,
21    /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
22    /// n-th elment at `n * max_len`.
23    values: Vec<u8>,
24    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
25    /// with the same index. Please note that this value may be larger than `max_len` if the value
26    /// has been truncated.
27    indicators: Vec<isize>,
28}
29
30impl BinColumn {
31    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
32    /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
33    /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
34    /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
35    /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
36    /// new.
37    pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
38        let len = element_size * batch_size;
39        let mut values = Vec::new();
40        values
41            .try_reserve_exact(len)
42            .map_err(|_| TooLargeBufferSize {
43                num_elements: batch_size,
44                element_size,
45            })?;
46        values.resize(len, 0);
47        Ok(BinColumn {
48            max_len: element_size,
49            values,
50            indicators: vec![0; batch_size],
51        })
52    }
53
54    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
55    /// have a maximum length of `max_len`.
56    pub fn new(batch_size: usize, element_size: usize) -> Self {
57        let len = element_size * batch_size;
58        let mut values = Vec::new();
59        values.reserve_exact(len);
60        values.resize(len, 0);
61        BinColumn {
62            max_len: element_size,
63            values,
64            indicators: vec![0; batch_size],
65        }
66    }
67
68    /// Return the value for the given row index.
69    ///
70    /// The column buffer does not know how many elements were in the last row group, and therefore
71    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
72    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
73    /// equal to the maximum number of elements in the buffer.
74    pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
75        self.content_length_at(row_index).map(|length| {
76            let offset = row_index * self.max_len;
77            &self.values[offset..offset + length]
78        })
79    }
80
81    /// Indicator value at the specified position. Useful to detect truncation of data.
82    ///
83    /// The column buffer does not know how many elements were in the last row group, and therefore
84    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
85    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
86    /// equal to the maximum number of elements in the buffer.
87    pub fn indicator_at(&self, row_index: usize) -> Indicator {
88        Indicator::from_isize(self.indicators[row_index])
89    }
90
91    /// Length of value at the specified position. This is different from an indicator as it refers
92    /// to the length of the value in the buffer, not to the length of the value in the datasource.
93    /// The two things are different for truncated values.
94    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
95        match self.indicator_at(row_index) {
96            Indicator::Null => None,
97            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
98            Indicator::NoTotal => Some(self.max_len),
99            Indicator::Length(length) => {
100                let length = min(self.max_len, length);
101                Some(length)
102            }
103        }
104    }
105
106    /// `Some` if any value is truncated in the range [0, num_rows).
107    ///
108    /// After fetching data we may want to know if any value has been truncated due to the buffer
109    /// not being able to hold elements of that size. This method checks the indicator buffer
110    /// element wise and reports one indicator which indicates a size large than the maximum element
111    /// size, if it exits.
112    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
113        self.indicators
114            .iter()
115            .copied()
116            .take(num_rows)
117            .find_map(|indicator| {
118                let indicator = Indicator::from_isize(indicator);
119                indicator.is_truncated(self.max_len).then_some(indicator)
120            })
121    }
122
123    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
124    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
125    ///
126    /// # Parameters
127    ///
128    /// * `new_max_len`: New maximum string length without terminating zero.
129    pub fn set_max_len(&mut self, new_max_len: usize) {
130        let batch_size = self.indicators.len();
131        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
132        let new_values = vec![0u8; new_max_len * batch_size];
133        // Set all indicators to NULL
134        self.fill_null(0, batch_size);
135        self.values = new_values;
136        self.max_len = new_max_len;
137    }
138
139    /// Maximum length of elements in bytes.
140    pub fn max_len(&self) -> usize {
141        self.max_len
142    }
143
144    /// View of the first `num_rows` values of a binary column.
145    ///
146    /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
147    /// column buffer does not know how many elements were in the last row group, and therefore can
148    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
149    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
150    /// to the maximum number of elements in the buffer.
151    pub fn view(&self, num_rows: usize) -> BinColumnView<'_> {
152        BinColumnView {
153            num_rows,
154            col: self,
155        }
156    }
157
158    /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
159    /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
160    /// element length.
161    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
162        if let Some(input) = input {
163            self.indicators[index] = input.len().try_into().unwrap();
164            if input.len() > self.max_len {
165                panic!(
166                    "Tried to insert a value into a binary buffer which is larger than the maximum \
167                    allowed element length for the buffer."
168                );
169            }
170            let start = self.max_len * index;
171            let end = start + input.len();
172            let buf = &mut self.values[start..end];
173            buf.copy_from_slice(input);
174        } else {
175            self.indicators[index] = NULL_DATA;
176        }
177    }
178
179    /// Fills the column with NULL, between From and To
180    pub fn fill_null(&mut self, from: usize, to: usize) {
181        for index in from..to {
182            self.indicators[index] = NULL_DATA;
183        }
184    }
185
186    /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
187    /// you find an unexpected large input during insertion.
188    ///
189    /// This is however costly, as not only does the new buffer have to be allocated, but all values
190    /// have to copied from the old to the new buffer.
191    ///
192    /// This method could also be used to reduce the maximum length, which would truncate values in
193    /// the process.
194    ///
195    /// This method does not adjust indicator buffers as these might hold values larger than the
196    /// maximum length.
197    ///
198    /// # Parameters
199    ///
200    /// * `new_max_len`: New maximum element length in bytes.
201    /// * `num_rows`: Number of valid rows currently stored in this buffer.
202    pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
203        debug!(
204            "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
205            num_rows, self.max_len, new_max_len
206        );
207
208        let batch_size = self.indicators.len();
209        // Allocate a new buffer large enough to hold a batch of elements with maximum length.
210        let mut new_values = vec![0; new_max_len * batch_size];
211        // Copy values from old to new buffer.
212        let max_copy_length = min(self.max_len, new_max_len);
213        for ((&indicator, old_value), new_value) in self
214            .indicators
215            .iter()
216            .zip(self.values.chunks_exact_mut(self.max_len))
217            .zip(new_values.chunks_exact_mut(new_max_len))
218            .take(num_rows)
219        {
220            match Indicator::from_isize(indicator) {
221                Indicator::Null => (),
222                Indicator::NoTotal => {
223                    // There is no good choice here in case we are expanding the buffer. Since
224                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
225                    // be padded with 0. I currently cannot think of any use case there it would
226                    // matter.
227                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
228                }
229                Indicator::Length(num_bytes_len) => {
230                    let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
231                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
232                }
233            }
234        }
235        self.values = new_values;
236        self.max_len = new_max_len;
237    }
238
239    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
240    /// length should the input be too large.
241    ///
242    /// # Parameters
243    ///
244    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
245    ///   currently in the buffer.
246    /// * `bytes`: Value to store.
247    pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
248        if let Some(bytes) = bytes {
249            if bytes.len() > self.max_len {
250                let new_max_len = (bytes.len() as f64 * 1.2) as usize;
251                self.resize_max_element_length(new_max_len, index)
252            }
253
254            let offset = index * self.max_len;
255            self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
256            // And of course set the indicator correctly.
257            self.indicators[index] = bytes.len().try_into().unwrap();
258        } else {
259            self.indicators[index] = NULL_DATA;
260        }
261    }
262
263    /// Maximum number of elements this buffer can hold.
264    pub fn capacity(&self) -> usize {
265        self.indicators.len()
266    }
267}
268
269unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
270    type SliceMut = BinColumnSliceMut<'a>;
271
272    unsafe fn as_view_mut(
273        &'a mut self,
274        parameter_index: u16,
275        stmt: StatementRef<'a>,
276    ) -> Self::SliceMut {
277        BinColumnSliceMut {
278            column: self,
279            stmt,
280            parameter_index,
281        }
282    }
283}
284
285/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
286/// values.
287pub struct BinColumnSliceMut<'a> {
288    column: &'a mut BinColumn,
289    // Needed to rebind the column in case of reallocation
290    stmt: StatementRef<'a>,
291    // Also needed to rebind the column in case of reallocation
292    parameter_index: u16,
293}
294
295impl BinColumnSliceMut<'_> {
296    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
297    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
298    /// allowed element length. `element` must be specified without the terminating zero.
299    pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
300        self.column.set_value(row_index, element)
301    }
302
303    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
304    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
305    /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
306    /// one. This makes this an extremly expensive operation.
307    pub fn ensure_max_element_length(
308        &mut self,
309        element_length: usize,
310        num_rows_to_copy: usize,
311    ) -> Result<(), Error> {
312        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
313        // in order to hold it. This invalidates the pointers previously bound to the statement. So
314        // we rebind them.
315        if element_length > self.column.max_len() {
316            self.column
317                .resize_max_element_length(element_length, num_rows_to_copy);
318            unsafe {
319                self.stmt
320                    .bind_input_parameter(self.parameter_index, self.column)
321                    .into_result(&self.stmt)?
322            }
323        }
324        Ok(())
325    }
326}
327
328#[derive(Debug, Clone, Copy)]
329pub struct BinColumnView<'c> {
330    num_rows: usize,
331    col: &'c BinColumn,
332}
333
334impl<'c> BinColumnView<'c> {
335    /// The number of valid elements in the text column.
336    pub fn len(&self) -> usize {
337        self.num_rows
338    }
339
340    /// True if, and only if there are no valid rows in the column buffer.
341    pub fn is_empty(&self) -> bool {
342        self.num_rows == 0
343    }
344
345    /// Slice of text at the specified row index without terminating zero.
346    pub fn get(&self, index: usize) -> Option<&'c [u8]> {
347        self.col.value_at(index)
348    }
349
350    /// Iterator over the valid elements of the text buffer
351    pub fn iter(&self) -> BinColumnIt<'c> {
352        BinColumnIt {
353            pos: 0,
354            num_rows: self.num_rows,
355            col: self.col,
356        }
357    }
358
359    /// Finds an indicator larger than max element in the range [0, num_rows).
360    ///
361    /// After fetching data we may want to know if any value has been truncated due to the buffer
362    /// not being able to hold elements of that size. This method checks the indicator buffer
363    /// element wise.
364    pub fn has_truncated_values(&self) -> Option<Indicator> {
365        self.col.has_truncated_values(self.num_rows)
366    }
367}
368
369/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
370#[derive(Debug)]
371pub struct BinColumnIt<'c> {
372    pos: usize,
373    num_rows: usize,
374    col: &'c BinColumn,
375}
376
377impl<'c> Iterator for BinColumnIt<'c> {
378    type Item = Option<&'c [u8]>;
379
380    fn next(&mut self) -> Option<Self::Item> {
381        if self.pos == self.num_rows {
382            None
383        } else {
384            let ret = Some(self.col.value_at(self.pos));
385            self.pos += 1;
386            ret
387        }
388    }
389
390    fn size_hint(&self) -> (usize, Option<usize>) {
391        let len = self.num_rows - self.pos;
392        (len, Some(len))
393    }
394}
395
396impl ExactSizeIterator for BinColumnIt<'_> {}
397
398unsafe impl CData for BinColumn {
399    fn cdata_type(&self) -> CDataType {
400        CDataType::Binary
401    }
402
403    fn indicator_ptr(&self) -> *const isize {
404        self.indicators.as_ptr()
405    }
406
407    fn value_ptr(&self) -> *const c_void {
408        self.values.as_ptr() as *const c_void
409    }
410
411    fn buffer_length(&self) -> isize {
412        self.max_len.try_into().unwrap()
413    }
414}
415
416impl HasDataType for BinColumn {
417    fn data_type(&self) -> DataType {
418        DataType::Varbinary {
419            length: NonZeroUsize::new(self.max_len),
420        }
421    }
422}
423
424unsafe impl CDataMut for BinColumn {
425    fn mut_indicator_ptr(&mut self) -> *mut isize {
426        self.indicators.as_mut_ptr()
427    }
428
429    fn mut_value_ptr(&mut self) -> *mut c_void {
430        self.values.as_mut_ptr() as *mut c_void
431    }
432}
433
434impl Resize for BinColumn {
435    fn resize(&mut self, new_capacity: usize) {
436        self.values.resize(new_capacity * self.max_len, 0);
437        self.indicators.resize(new_capacity, NULL_DATA);
438    }
439}
440
441#[cfg(test)]
442mod test {
443    use crate::{buffers::columnar::Resize, error::TooLargeBufferSize};
444
445    use super::BinColumn;
446
447    #[test]
448    fn allocating_too_big_a_binary_column() {
449        let two_gib = 2_147_483_648;
450        let result = BinColumn::try_new(10_000, two_gib);
451        let error = result.unwrap_err();
452        assert!(matches!(
453            error,
454            TooLargeBufferSize {
455                num_elements: 10_000,
456                element_size: 2_147_483_648
457            }
458        ))
459    }
460
461    #[test]
462    fn resize_binary_column_buffer() {
463        // Given a binary column with 2 elements
464        let mut column = BinColumn::new(2, 10);
465        column.set_value(0, Some(b"Hello"));
466        column.set_value(1, Some(b"World"));
467
468        // When resizing the column to 3 elements
469        column.resize(3);
470
471        // Then
472        // the max element size is unchanged
473        assert_eq!(column.max_len(), 10);
474        // the values are still there
475        assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
476        assert_eq!(column.value_at(1), Some(b"World".as_slice()));
477        // the third element is None
478        assert_eq!(column.value_at(2), None);
479    }
480}