odbc_api/buffers/
bin_column.rs

1use crate::{
2    DataType, Error,
3    buffers::{Indicator, columnar::Resize},
4    columnar_bulk_inserter::BoundInputSlice,
5    error::TooLargeBufferSize,
6    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
7};
8
9use log::debug;
10use odbc_sys::{CDataType, NULL_DATA};
11use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
12
13/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
14/// variable amount of bytes up to a maximum length. Since elements of this type have variable
15/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
16/// Therefore this buffer type is used for variable-sized binary data, whether it is nullable or
17/// not.
18#[derive(Debug)]
19pub struct BinColumn {
20    /// Maximum element length.
21    max_len: usize,
22    /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
23    /// n-th elment at `n * max_len`.
24    values: Vec<u8>,
25    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
26    /// with the same index. Please note that this value may be larger than `max_len` if the value
27    /// has been truncated.
28    indicators: Vec<isize>,
29}
30
31impl BinColumn {
32    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
33    /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
34    /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
35    /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
36    /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
37    /// new.
38    pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
39        let len = element_size * batch_size;
40        let mut values = Vec::new();
41        values
42            .try_reserve_exact(len)
43            .map_err(|_| TooLargeBufferSize {
44                num_elements: batch_size,
45                element_size,
46            })?;
47        values.resize(len, 0);
48        Ok(BinColumn {
49            max_len: element_size,
50            values,
51            indicators: vec![0; batch_size],
52        })
53    }
54
55    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
56    /// have a maximum length of `max_len`.
57    pub fn new(batch_size: usize, element_size: usize) -> Self {
58        let len = element_size * batch_size;
59        let mut values = Vec::new();
60        values.reserve_exact(len);
61        values.resize(len, 0);
62        BinColumn {
63            max_len: element_size,
64            values,
65            indicators: vec![0; batch_size],
66        }
67    }
68
69    /// Return the value for the given row index.
70    ///
71    /// The column buffer does not know how many elements were in the last row group, and therefore
72    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
73    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
74    /// equal to the maximum number of elements in the buffer.
75    pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
76        self.content_length_at(row_index).map(|length| {
77            let offset = row_index * self.max_len;
78            &self.values[offset..offset + length]
79        })
80    }
81
82    /// Indicator value at the specified position. Useful to detect truncation of data.
83    ///
84    /// The column buffer does not know how many elements were in the last row group, and therefore
85    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
86    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
87    /// equal to the maximum number of elements in the buffer.
88    pub fn indicator_at(&self, row_index: usize) -> Indicator {
89        Indicator::from_isize(self.indicators[row_index])
90    }
91
92    /// Length of value at the specified position. This is different from an indicator as it refers
93    /// to the length of the value in the buffer, not to the length of the value in the datasource.
94    /// The two things are different for truncated values.
95    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
96        match self.indicator_at(row_index) {
97            Indicator::Null => None,
98            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
99            Indicator::NoTotal => Some(self.max_len),
100            Indicator::Length(length) => {
101                let length = min(self.max_len, length);
102                Some(length)
103            }
104        }
105    }
106
107    /// `Some` if any value is truncated in the range [0, num_rows).
108    ///
109    /// After fetching data we may want to know if any value has been truncated due to the buffer
110    /// not being able to hold elements of that size. This method checks the indicator buffer
111    /// element wise and reports one indicator which indicates a size large than the maximum element
112    /// size, if it exits.
113    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
114        self.indicators
115            .iter()
116            .copied()
117            .take(num_rows)
118            .find_map(|indicator| {
119                let indicator = Indicator::from_isize(indicator);
120                indicator.is_truncated(self.max_len).then_some(indicator)
121            })
122    }
123
124    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
125    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
126    ///
127    /// # Parameters
128    ///
129    /// * `new_max_len`: New maximum string length without terminating zero.
130    pub fn set_max_len(&mut self, new_max_len: usize) {
131        let batch_size = self.indicators.len();
132        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
133        let new_values = vec![0u8; new_max_len * batch_size];
134        // Set all indicators to NULL
135        self.fill_null(0, batch_size);
136        self.values = new_values;
137        self.max_len = new_max_len;
138    }
139
140    /// Maximum length of elements in bytes.
141    pub fn max_len(&self) -> usize {
142        self.max_len
143    }
144
145    /// View of the first `num_rows` values of a binary column.
146    ///
147    /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
148    /// column buffer does not know how many elements were in the last row group, and therefore can
149    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
150    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
151    /// to the maximum number of elements in the buffer.
152    pub fn view(&self, num_rows: usize) -> BinColumnView<'_> {
153        BinColumnView {
154            num_rows,
155            col: self,
156        }
157    }
158
159    /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
160    /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
161    /// element length.
162    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
163        if let Some(input) = input {
164            self.indicators[index] = input.len().try_into().unwrap();
165            if input.len() > self.max_len {
166                panic!(
167                    "Tried to insert a value into a binary buffer which is larger than the maximum \
168                    allowed element length for the buffer."
169                );
170            }
171            let start = self.max_len * index;
172            let end = start + input.len();
173            let buf = &mut self.values[start..end];
174            buf.copy_from_slice(input);
175        } else {
176            self.indicators[index] = NULL_DATA;
177        }
178    }
179
180    /// Fills the column with NULL, between From and To
181    pub fn fill_null(&mut self, from: usize, to: usize) {
182        for index in from..to {
183            self.indicators[index] = NULL_DATA;
184        }
185    }
186
187    /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
188    /// you find an unexpected large input during insertion.
189    ///
190    /// This is however costly, as not only does the new buffer have to be allocated, but all values
191    /// have to copied from the old to the new buffer.
192    ///
193    /// This method could also be used to reduce the maximum length, which would truncate values in
194    /// the process.
195    ///
196    /// This method does not adjust indicator buffers as these might hold values larger than the
197    /// maximum length.
198    ///
199    /// # Parameters
200    ///
201    /// * `new_max_len`: New maximum element length in bytes.
202    /// * `num_rows`: Number of valid rows currently stored in this buffer.
203    pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
204        debug!(
205            "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
206            num_rows, self.max_len, new_max_len
207        );
208
209        let batch_size = self.indicators.len();
210        // Allocate a new buffer large enough to hold a batch of elements with maximum length.
211        let mut new_values = vec![0; new_max_len * batch_size];
212        // Copy values from old to new buffer.
213        let max_copy_length = min(self.max_len, new_max_len);
214        for ((&indicator, old_value), new_value) in self
215            .indicators
216            .iter()
217            .zip(self.values.chunks_exact_mut(self.max_len))
218            .zip(new_values.chunks_exact_mut(new_max_len))
219            .take(num_rows)
220        {
221            match Indicator::from_isize(indicator) {
222                Indicator::Null => (),
223                Indicator::NoTotal => {
224                    // There is no good choice here in case we are expanding the buffer. Since
225                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
226                    // be padded with 0. I currently cannot think of any use case there it would
227                    // matter.
228                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
229                }
230                Indicator::Length(num_bytes_len) => {
231                    let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
232                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
233                }
234            }
235        }
236        self.values = new_values;
237        self.max_len = new_max_len;
238    }
239
240    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
241    /// length should the input be too large.
242    ///
243    /// # Parameters
244    ///
245    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
246    ///   currently in the buffer.
247    /// * `bytes`: Value to store.
248    pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
249        if let Some(bytes) = bytes {
250            if bytes.len() > self.max_len {
251                let new_max_len = (bytes.len() as f64 * 1.2) as usize;
252                self.resize_max_element_length(new_max_len, index)
253            }
254
255            let offset = index * self.max_len;
256            self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
257            // And of course set the indicator correctly.
258            self.indicators[index] = bytes.len().try_into().unwrap();
259        } else {
260            self.indicators[index] = NULL_DATA;
261        }
262    }
263
264    /// Maximum number of elements this buffer can hold.
265    pub fn capacity(&self) -> usize {
266        self.indicators.len()
267    }
268}
269
270unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
271    type SliceMut = BinColumnSliceMut<'a>;
272
273    unsafe fn as_view_mut(
274        &'a mut self,
275        parameter_index: u16,
276        stmt: StatementRef<'a>,
277    ) -> Self::SliceMut {
278        BinColumnSliceMut {
279            column: self,
280            stmt,
281            parameter_index,
282        }
283    }
284}
285
286/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
287/// values.
288pub struct BinColumnSliceMut<'a> {
289    column: &'a mut BinColumn,
290    // Needed to rebind the column in case of reallocation
291    stmt: StatementRef<'a>,
292    // Also needed to rebind the column in case of reallocation
293    parameter_index: u16,
294}
295
296impl BinColumnSliceMut<'_> {
297    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
298    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
299    /// allowed element length. `element` must be specified without the terminating zero.
300    pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
301        self.column.set_value(row_index, element)
302    }
303
304    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
305    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
306    /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
307    /// one. This makes this an extremly expensive operation.
308    pub fn ensure_max_element_length(
309        &mut self,
310        element_length: usize,
311        num_rows_to_copy: usize,
312    ) -> Result<(), Error> {
313        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
314        // in order to hold it. This invalidates the pointers previously bound to the statement. So
315        // we rebind them.
316        if element_length > self.column.max_len() {
317            self.column
318                .resize_max_element_length(element_length, num_rows_to_copy);
319            unsafe {
320                self.stmt
321                    .bind_input_parameter(self.parameter_index, self.column)
322                    .into_result(&self.stmt)?
323            }
324        }
325        Ok(())
326    }
327}
328
329#[derive(Debug, Clone, Copy)]
330pub struct BinColumnView<'c> {
331    num_rows: usize,
332    col: &'c BinColumn,
333}
334
335impl<'c> BinColumnView<'c> {
336    /// The number of valid elements in the text column.
337    pub fn len(&self) -> usize {
338        self.num_rows
339    }
340
341    /// True if, and only if there are no valid rows in the column buffer.
342    pub fn is_empty(&self) -> bool {
343        self.num_rows == 0
344    }
345
346    /// Slice of text at the specified row index without terminating zero.
347    pub fn get(&self, index: usize) -> Option<&'c [u8]> {
348        self.col.value_at(index)
349    }
350
351    /// Iterator over the valid elements of the text buffer
352    pub fn iter(&self) -> BinColumnIt<'c> {
353        BinColumnIt {
354            pos: 0,
355            num_rows: self.num_rows,
356            col: self.col,
357        }
358    }
359
360    /// Finds an indicator larger than max element in the range [0, num_rows).
361    ///
362    /// After fetching data we may want to know if any value has been truncated due to the buffer
363    /// not being able to hold elements of that size. This method checks the indicator buffer
364    /// element wise.
365    pub fn has_truncated_values(&self) -> Option<Indicator> {
366        self.col.has_truncated_values(self.num_rows)
367    }
368}
369
370/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
371#[derive(Debug)]
372pub struct BinColumnIt<'c> {
373    pos: usize,
374    num_rows: usize,
375    col: &'c BinColumn,
376}
377
378impl<'c> Iterator for BinColumnIt<'c> {
379    type Item = Option<&'c [u8]>;
380
381    fn next(&mut self) -> Option<Self::Item> {
382        if self.pos == self.num_rows {
383            None
384        } else {
385            let ret = Some(self.col.value_at(self.pos));
386            self.pos += 1;
387            ret
388        }
389    }
390
391    fn size_hint(&self) -> (usize, Option<usize>) {
392        let len = self.num_rows - self.pos;
393        (len, Some(len))
394    }
395}
396
397impl ExactSizeIterator for BinColumnIt<'_> {}
398
399unsafe impl CData for BinColumn {
400    fn cdata_type(&self) -> CDataType {
401        CDataType::Binary
402    }
403
404    fn indicator_ptr(&self) -> *const isize {
405        self.indicators.as_ptr()
406    }
407
408    fn value_ptr(&self) -> *const c_void {
409        self.values.as_ptr() as *const c_void
410    }
411
412    fn buffer_length(&self) -> isize {
413        self.max_len.try_into().unwrap()
414    }
415}
416
417impl HasDataType for BinColumn {
418    fn data_type(&self) -> DataType {
419        DataType::Varbinary {
420            length: NonZeroUsize::new(self.max_len),
421        }
422    }
423}
424
425unsafe impl CDataMut for BinColumn {
426    fn mut_indicator_ptr(&mut self) -> *mut isize {
427        self.indicators.as_mut_ptr()
428    }
429
430    fn mut_value_ptr(&mut self) -> *mut c_void {
431        self.values.as_mut_ptr() as *mut c_void
432    }
433}
434
435impl Resize for BinColumn {
436    fn resize(&mut self, new_capacity: usize) {
437        self.values.resize(new_capacity * self.max_len, 0);
438        self.indicators.resize(new_capacity, NULL_DATA);
439    }
440}
441
442#[cfg(test)]
443mod test {
444    use crate::{buffers::columnar::Resize, error::TooLargeBufferSize};
445
446    use super::BinColumn;
447
448    #[test]
449    fn allocating_too_big_a_binary_column() {
450        let two_gib = 2_147_483_648;
451        let result = BinColumn::try_new(10_000, two_gib);
452        let error = result.unwrap_err();
453        assert!(matches!(
454            error,
455            TooLargeBufferSize {
456                num_elements: 10_000,
457                element_size: 2_147_483_648
458            }
459        ))
460    }
461
462    #[test]
463    fn resize_binary_column_buffer() {
464        // Given a binary column with 2 elements
465        let mut column = BinColumn::new(2, 10);
466        column.set_value(0, Some(b"Hello"));
467        column.set_value(1, Some(b"World"));
468
469        // When resizing the column to 3 elements
470        column.resize(3);
471
472        // Then
473        // the max element size is unchanged
474        assert_eq!(column.max_len(), 10);
475        // the values are still there
476        assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
477        assert_eq!(column.value_at(1), Some(b"World".as_slice()));
478        // the third element is None
479        assert_eq!(column.value_at(2), None);
480    }
481}