Skip to main content

odbc_api/buffers/
bin_column.rs

1use crate::{
2    DataType, Error,
3    buffers::{Indicator, columnar::Resize},
4    columnar_bulk_inserter::BoundInputSlice,
5    error::TooLargeBufferSize,
6    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
7};
8
9use log::trace;
10use odbc_sys::{CDataType, NULL_DATA};
11use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
12
13/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
14/// variable amount of bytes up to a maximum length. Since elements of this type have variable
15/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
16/// Therefore this buffer type is used for variable-sized binary data, whether it is nullable or
17/// not.
18#[derive(Debug)]
19pub struct BinColumn {
20    /// Maximum element length.
21    max_len: usize,
22    /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
23    /// n-th elment at `n * max_len`.
24    values: Vec<u8>,
25    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
26    /// with the same index. Please note that this value may be larger than `max_len` if the value
27    /// has been truncated.
28    indicators: Vec<isize>,
29}
30
31impl BinColumn {
32    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
33    /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
34    /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
35    /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
36    /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
37    /// new.
38    pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
39        let len = element_size * batch_size;
40        let mut values = Vec::new();
41        values
42            .try_reserve_exact(len)
43            .map_err(|_| TooLargeBufferSize {
44                num_elements: batch_size,
45                element_size,
46            })?;
47        values.resize(len, 0);
48        Ok(BinColumn {
49            max_len: element_size,
50            values,
51            indicators: vec![0; batch_size],
52        })
53    }
54
55    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
56    /// have a maximum length of `max_len`.
57    pub fn new(batch_size: usize, element_size: usize) -> Self {
58        let len = element_size * batch_size;
59        let mut values = Vec::new();
60        values.reserve_exact(len);
61        values.resize(len, 0);
62        BinColumn {
63            max_len: element_size,
64            values,
65            indicators: vec![0; batch_size],
66        }
67    }
68
69    /// Return the value for the given row index.
70    ///
71    /// The column buffer does not know how many elements were in the last row group, and therefore
72    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
73    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
74    /// equal to the maximum number of elements in the buffer.
75    pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
76        self.content_length_at(row_index).map(|length| {
77            let offset = row_index * self.max_len;
78            &self.values[offset..offset + length]
79        })
80    }
81
82    /// Indicator value at the specified position. Useful to detect truncation of data.
83    ///
84    /// The column buffer does not know how many elements were in the last row group, and therefore
85    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
86    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
87    /// equal to the maximum number of elements in the buffer.
88    pub fn indicator_at(&self, row_index: usize) -> Indicator {
89        Indicator::from_isize(self.indicators[row_index])
90    }
91
92    /// Length of value at the specified position. This is different from an indicator as it refers
93    /// to the length of the value in the buffer, not to the length of the value in the datasource.
94    /// The two things are different for truncated values.
95    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
96        match self.indicator_at(row_index) {
97            Indicator::Null => None,
98            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
99            Indicator::NoTotal => Some(self.max_len),
100            Indicator::Length(length) => {
101                let length = min(self.max_len, length);
102                Some(length)
103            }
104        }
105    }
106
107    /// `Some` if any value is truncated in the range [0, num_rows).
108    ///
109    /// After fetching data we may want to know if any value has been truncated due to the buffer
110    /// not being able to hold elements of that size. This method checks the indicator buffer
111    /// element wise and reports one indicator which indicates a size large than the maximum element
112    /// size, if it exits.
113    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
114        self.indicators
115            .iter()
116            .copied()
117            .take(num_rows)
118            .find_map(|indicator| {
119                let indicator = Indicator::from_isize(indicator);
120                indicator.is_truncated(self.max_len).then_some(indicator)
121            })
122    }
123
124    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
125    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
126    ///
127    /// # Parameters
128    ///
129    /// * `new_max_len`: New maximum string length without terminating zero.
130    pub fn set_max_len(&mut self, new_max_len: usize) {
131        let batch_size = self.indicators.len();
132        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
133        let new_values = vec![0u8; new_max_len * batch_size];
134        // Set all indicators to NULL
135        self.fill_null(0, batch_size);
136        self.values = new_values;
137        self.max_len = new_max_len;
138    }
139
140    /// Maximum length of elements in bytes.
141    pub fn max_len(&self) -> usize {
142        self.max_len
143    }
144
145    /// View of the first `num_rows` values of a binary column.
146    ///
147    /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
148    /// column buffer does not know how many elements were in the last row group, and therefore can
149    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
150    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
151    /// to the maximum number of elements in the buffer.
152    pub fn view(&self, num_rows: usize) -> BinColumnView<'_> {
153        BinColumnView {
154            num_rows,
155            col: self,
156        }
157    }
158
159    /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
160    /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
161    /// element length.
162    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
163        if let Some(input) = input {
164            self.indicators[index] = input.len().try_into().unwrap();
165            if input.len() > self.max_len {
166                panic!(
167                    "Tried to insert a value into a binary buffer which is larger than the maximum \
168                    allowed element length for the buffer."
169                );
170            }
171            let start = self.max_len * index;
172            let end = start + input.len();
173            let buf = &mut self.values[start..end];
174            buf.copy_from_slice(input);
175        } else {
176            self.indicators[index] = NULL_DATA;
177        }
178    }
179
180    /// Fills the column with NULL, between From and To
181    pub fn fill_null(&mut self, from: usize, to: usize) {
182        for index in from..to {
183            self.indicators[index] = NULL_DATA;
184        }
185    }
186
187    /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
188    /// you find an unexpected large input during insertion.
189    ///
190    /// This is however costly, as not only does the new buffer have to be allocated, but all values
191    /// have to copied from the old to the new buffer.
192    ///
193    /// This method could also be used to reduce the maximum length, which would truncate values in
194    /// the process.
195    ///
196    /// This method does not adjust indicator buffers as these might hold values larger than the
197    /// maximum length.
198    ///
199    /// # Parameters
200    ///
201    /// * `new_max_len`: New maximum element length in bytes.
202    /// * `num_rows`: Number of valid rows currently stored in this buffer.
203    pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
204        #[cfg(not(feature = "structured_logging"))]
205        trace!(
206            "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
207            num_rows, self.max_len, new_max_len
208        );
209        #[cfg(feature = "structured_logging")]
210        trace!(
211            target: "odbc_api",
212            num_rows = num_rows,
213            old_max_len = self.max_len,
214            new_max_len = new_max_len;
215            "Binary column buffer resized"
216        );
217
218        let batch_size = self.indicators.len();
219        // Allocate a new buffer large enough to hold a batch of elements with maximum length.
220        let mut new_values = vec![0; new_max_len * batch_size];
221        // Copy values from old to new buffer.
222        let max_copy_length = min(self.max_len, new_max_len);
223        for ((&indicator, old_value), new_value) in self
224            .indicators
225            .iter()
226            .zip(self.values.chunks_exact_mut(self.max_len))
227            .zip(new_values.chunks_exact_mut(new_max_len))
228            .take(num_rows)
229        {
230            match Indicator::from_isize(indicator) {
231                Indicator::Null => (),
232                Indicator::NoTotal => {
233                    // There is no good choice here in case we are expanding the buffer. Since
234                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
235                    // be padded with 0. I currently cannot think of any use case there it would
236                    // matter.
237                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
238                }
239                Indicator::Length(num_bytes_len) => {
240                    let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
241                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
242                }
243            }
244        }
245        self.values = new_values;
246        self.max_len = new_max_len;
247    }
248
249    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
250    /// length should the input be too large.
251    ///
252    /// # Parameters
253    ///
254    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
255    ///   currently in the buffer.
256    /// * `bytes`: Value to store.
257    pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
258        if let Some(bytes) = bytes {
259            if bytes.len() > self.max_len {
260                let new_max_len = (bytes.len() as f64 * 1.2) as usize;
261                self.resize_max_element_length(new_max_len, index)
262            }
263
264            let offset = index * self.max_len;
265            self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
266            // And of course set the indicator correctly.
267            self.indicators[index] = bytes.len().try_into().unwrap();
268        } else {
269            self.indicators[index] = NULL_DATA;
270        }
271    }
272
273    /// Maximum number of elements this buffer can hold.
274    pub fn capacity(&self) -> usize {
275        self.indicators.len()
276    }
277}
278
279unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
280    type SliceMut = BinColumnSliceMut<'a>;
281
282    unsafe fn as_view_mut(
283        &'a mut self,
284        parameter_index: u16,
285        stmt: StatementRef<'a>,
286    ) -> Self::SliceMut {
287        BinColumnSliceMut {
288            column: self,
289            stmt,
290            parameter_index,
291        }
292    }
293}
294
295/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
296/// values.
297pub struct BinColumnSliceMut<'a> {
298    column: &'a mut BinColumn,
299    // Needed to rebind the column in case of reallocation
300    stmt: StatementRef<'a>,
301    // Also needed to rebind the column in case of reallocation
302    parameter_index: u16,
303}
304
305impl BinColumnSliceMut<'_> {
306    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
307    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
308    /// allowed element length. `element` must be specified without the terminating zero.
309    pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
310        self.column.set_value(row_index, element)
311    }
312
313    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
314    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
315    /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
316    /// one. This makes this an extremly expensive operation.
317    pub fn ensure_max_element_length(
318        &mut self,
319        element_length: usize,
320        num_rows_to_copy: usize,
321    ) -> Result<(), Error> {
322        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
323        // in order to hold it. This invalidates the pointers previously bound to the statement. So
324        // we rebind them.
325        if element_length > self.column.max_len() {
326            self.column
327                .resize_max_element_length(element_length, num_rows_to_copy);
328            unsafe {
329                self.stmt
330                    .bind_input_parameter(self.parameter_index, self.column)
331                    .into_result(&self.stmt)?
332            }
333        }
334        Ok(())
335    }
336}
337
338#[derive(Debug, Clone, Copy)]
339pub struct BinColumnView<'c> {
340    num_rows: usize,
341    col: &'c BinColumn,
342}
343
344impl<'c> BinColumnView<'c> {
345    /// The number of valid elements in the text column.
346    pub fn len(&self) -> usize {
347        self.num_rows
348    }
349
350    /// True if, and only if there are no valid rows in the column buffer.
351    pub fn is_empty(&self) -> bool {
352        self.num_rows == 0
353    }
354
355    /// Slice of text at the specified row index without terminating zero.
356    pub fn get(&self, index: usize) -> Option<&'c [u8]> {
357        self.col.value_at(index)
358    }
359
360    /// Iterator over the valid elements of the text buffer
361    pub fn iter(&self) -> BinColumnIt<'c> {
362        BinColumnIt {
363            pos: 0,
364            num_rows: self.num_rows,
365            col: self.col,
366        }
367    }
368
369    /// Finds an indicator larger than max element in the range [0, num_rows).
370    ///
371    /// After fetching data we may want to know if any value has been truncated due to the buffer
372    /// not being able to hold elements of that size. This method checks the indicator buffer
373    /// element wise.
374    pub fn has_truncated_values(&self) -> Option<Indicator> {
375        self.col.has_truncated_values(self.num_rows)
376    }
377}
378
379/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
380#[derive(Debug)]
381pub struct BinColumnIt<'c> {
382    pos: usize,
383    num_rows: usize,
384    col: &'c BinColumn,
385}
386
387impl<'c> Iterator for BinColumnIt<'c> {
388    type Item = Option<&'c [u8]>;
389
390    fn next(&mut self) -> Option<Self::Item> {
391        if self.pos == self.num_rows {
392            None
393        } else {
394            let ret = Some(self.col.value_at(self.pos));
395            self.pos += 1;
396            ret
397        }
398    }
399
400    fn size_hint(&self) -> (usize, Option<usize>) {
401        let len = self.num_rows - self.pos;
402        (len, Some(len))
403    }
404}
405
406impl ExactSizeIterator for BinColumnIt<'_> {}
407
408unsafe impl CData for BinColumn {
409    fn cdata_type(&self) -> CDataType {
410        CDataType::Binary
411    }
412
413    fn indicator_ptr(&self) -> *const isize {
414        self.indicators.as_ptr()
415    }
416
417    fn value_ptr(&self) -> *const c_void {
418        self.values.as_ptr() as *const c_void
419    }
420
421    fn buffer_length(&self) -> isize {
422        self.max_len.try_into().unwrap()
423    }
424}
425
426impl HasDataType for BinColumn {
427    fn data_type(&self) -> DataType {
428        DataType::Varbinary {
429            length: NonZeroUsize::new(self.max_len),
430        }
431    }
432}
433
434unsafe impl CDataMut for BinColumn {
435    fn mut_indicator_ptr(&mut self) -> *mut isize {
436        self.indicators.as_mut_ptr()
437    }
438
439    fn mut_value_ptr(&mut self) -> *mut c_void {
440        self.values.as_mut_ptr() as *mut c_void
441    }
442}
443
444impl Resize for BinColumn {
445    fn resize(&mut self, new_capacity: usize) {
446        self.values.resize(new_capacity * self.max_len, 0);
447        self.indicators.resize(new_capacity, NULL_DATA);
448    }
449}
450
451#[cfg(test)]
452mod test {
453    use crate::{buffers::columnar::Resize, error::TooLargeBufferSize};
454
455    use super::BinColumn;
456
457    #[test]
458    #[ignore = "On windows this tests does cause containerized linux and WSL to allocate all \
459        memory instead of triggering a failed allocation."]
460    fn allocating_too_big_a_binary_column() {
461        let two_gib = 2_147_483_648;
462        let result = BinColumn::try_new(10_000, two_gib);
463        let error = result.unwrap_err();
464        assert!(matches!(
465            error,
466            TooLargeBufferSize {
467                num_elements: 10_000,
468                element_size: 2_147_483_648
469            }
470        ))
471    }
472
473    #[test]
474    fn resize_binary_column_buffer() {
475        // Given a binary column with 2 elements
476        let mut column = BinColumn::new(2, 10);
477        column.set_value(0, Some(b"Hello"));
478        column.set_value(1, Some(b"World"));
479
480        // When resizing the column to 3 elements
481        column.resize(3);
482
483        // Then
484        // the max element size is unchanged
485        assert_eq!(column.max_len(), 10);
486        // the values are still there
487        assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
488        assert_eq!(column.value_at(1), Some(b"World".as_slice()));
489        // the third element is None
490        assert_eq!(column.value_at(2), None);
491    }
492}