Skip to main content

odbc_api/buffers/
bin_column.rs

1use super::{ColumnBuffer, Indicator, Resize, Slice};
2
3use crate::{
4    DataType, Error,
5    columnar_bulk_inserter::BoundInputSlice,
6    error::TooLargeBufferSize,
7    handles::{CData, CDataMut, HasDataType, Statement, StatementRef},
8};
9
10use log::trace;
11use odbc_sys::{CDataType, NULL_DATA};
12use std::{cmp::min, ffi::c_void, num::NonZeroUsize};
13
14/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
15/// variable amount of bytes up to a maximum length. Since elements of this type have variable
16/// length an additional indicator buffer is also maintained, whether the column is nullable or not.
17/// Therefore this buffer type is used for variable-sized binary data, whether it is nullable or
18/// not.
19#[derive(Debug)]
20pub struct BinColumn {
21    /// Maximum element length.
22    max_len: usize,
23    /// Consequitive bytes for all the elements in the buffer. We can find the first byte of the
24    /// n-th elment at `n * max_len`.
25    values: Vec<u8>,
26    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
27    /// with the same index. Please note that this value may be larger than `max_len` if the value
28    /// has been truncated.
29    indicators: Vec<isize>,
30}
31
32impl BinColumn {
33    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
34    /// have a maximum length of `element_size`. Uses a fallibale allocation for creating the
35    /// buffer. In applications often the `element_size` of the buffer, might be directly inspired
36    /// by the maximum size of the type, as reported, by ODBC. Which might get exceedingly large for
37    /// types like VARBINARY(MAX), or IMAGE. On the downside, this method is potentially slower than
38    /// new.
39    pub fn try_new(batch_size: usize, element_size: usize) -> Result<Self, TooLargeBufferSize> {
40        let len = element_size * batch_size;
41        let mut values = Vec::new();
42        values
43            .try_reserve_exact(len)
44            .map_err(|_| TooLargeBufferSize {
45                num_elements: batch_size,
46                element_size,
47            })?;
48        values.resize(len, 0);
49        Ok(BinColumn {
50            max_len: element_size,
51            values,
52            indicators: vec![0; batch_size],
53        })
54    }
55
56    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
57    /// have a maximum length of `max_len`.
58    pub fn new(batch_size: usize, element_size: usize) -> Self {
59        let len = element_size * batch_size;
60        let mut values = Vec::new();
61        values.reserve_exact(len);
62        values.resize(len, 0);
63        BinColumn {
64            max_len: element_size,
65            values,
66            indicators: vec![0; batch_size],
67        }
68    }
69
70    /// Return the value for the given row index.
71    ///
72    /// The column buffer does not know how many elements were in the last row group, and therefore
73    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
74    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
75    /// equal to the maximum number of elements in the buffer.
76    pub fn value_at(&self, row_index: usize) -> Option<&[u8]> {
77        self.content_length_at(row_index).map(|length| {
78            let offset = row_index * self.max_len;
79            &self.values[offset..offset + length]
80        })
81    }
82
83    /// Indicator value at the specified position. Useful to detect truncation of data.
84    ///
85    /// The column buffer does not know how many elements were in the last row group, and therefore
86    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
87    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
88    /// equal to the maximum number of elements in the buffer.
89    pub fn indicator_at(&self, row_index: usize) -> Indicator {
90        Indicator::from_isize(self.indicators[row_index])
91    }
92
93    /// Length of value at the specified position. This is different from an indicator as it refers
94    /// to the length of the value in the buffer, not to the length of the value in the datasource.
95    /// The two things are different for truncated values.
96    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
97        match self.indicator_at(row_index) {
98            Indicator::Null => None,
99            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
100            Indicator::NoTotal => Some(self.max_len),
101            Indicator::Length(length) => {
102                let length = min(self.max_len, length);
103                Some(length)
104            }
105        }
106    }
107
108    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
109    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
110    ///
111    /// # Parameters
112    ///
113    /// * `new_max_len`: New maximum string length without terminating zero.
114    pub fn set_max_len(&mut self, new_max_len: usize) {
115        let batch_size = self.indicators.len();
116        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
117        let new_values = vec![0u8; new_max_len * batch_size];
118        // Set all indicators to NULL
119        self.fill_null(0, batch_size);
120        self.values = new_values;
121        self.max_len = new_max_len;
122    }
123
124    /// Maximum length of elements in bytes.
125    pub fn max_len(&self) -> usize {
126        self.max_len
127    }
128
129    /// View of the first `num_rows` values of a binary column.
130    ///
131    /// Num rows may not exceed the actual amount of valid num_rows filled by the ODBC API. The
132    /// column buffer does not know how many elements were in the last row group, and therefore can
133    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
134    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
135    /// to the maximum number of elements in the buffer.
136    pub fn view(&self, num_rows: usize) -> BinColumnSlice<'_> {
137        BinColumnSlice {
138            num_rows,
139            col: self,
140        }
141    }
142
143    /// Sets the value of the buffer at index to NULL or the specified bytes. This method will panic
144    /// on out of bounds index, or if input holds a value which is longer than the maximum allowed
145    /// element length.
146    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
147        if let Some(input) = input {
148            self.indicators[index] = input.len().try_into().unwrap();
149            if input.len() > self.max_len {
150                panic!(
151                    "Tried to insert a value into a binary buffer which is larger than the maximum \
152                    allowed element length for the buffer."
153                );
154            }
155            let start = self.max_len * index;
156            let end = start + input.len();
157            let buf = &mut self.values[start..end];
158            buf.copy_from_slice(input);
159        } else {
160            self.indicators[index] = NULL_DATA;
161        }
162    }
163
164    /// Fills the column with NULL, between From and To
165    pub fn fill_null(&mut self, from: usize, to: usize) {
166        for index in from..to {
167            self.indicators[index] = NULL_DATA;
168        }
169    }
170
171    /// Changes the maximum number of bytes per row the buffer can hold. This operation is useful if
172    /// you find an unexpected large input during insertion.
173    ///
174    /// This is however costly, as not only does the new buffer have to be allocated, but all values
175    /// have to copied from the old to the new buffer.
176    ///
177    /// This method could also be used to reduce the maximum length, which would truncate values in
178    /// the process.
179    ///
180    /// This method does not adjust indicator buffers as these might hold values larger than the
181    /// maximum length.
182    ///
183    /// # Parameters
184    ///
185    /// * `new_max_len`: New maximum element length in bytes.
186    /// * `num_rows`: Number of valid rows currently stored in this buffer.
187    pub fn resize_max_element_length(&mut self, new_max_len: usize, num_rows: usize) {
188        #[cfg(not(feature = "structured_logging"))]
189        trace!(
190            "Rebinding binary column buffer with {} elements. Maximum length {} => {}",
191            num_rows, self.max_len, new_max_len
192        );
193        #[cfg(feature = "structured_logging")]
194        trace!(
195            target: "odbc_api",
196            num_rows = num_rows,
197            old_max_len = self.max_len,
198            new_max_len = new_max_len;
199            "Binary column buffer resized"
200        );
201
202        let batch_size = self.indicators.len();
203        // Allocate a new buffer large enough to hold a batch of elements with maximum length.
204        let mut new_values = vec![0; new_max_len * batch_size];
205        // Copy values from old to new buffer.
206        let max_copy_length = min(self.max_len, new_max_len);
207        for ((&indicator, old_value), new_value) in self
208            .indicators
209            .iter()
210            .zip(self.values.chunks_exact_mut(self.max_len))
211            .zip(new_values.chunks_exact_mut(new_max_len))
212            .take(num_rows)
213        {
214            match Indicator::from_isize(indicator) {
215                Indicator::Null => (),
216                Indicator::NoTotal => {
217                    // There is no good choice here in case we are expanding the buffer. Since
218                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
219                    // be padded with 0. I currently cannot think of any use case there it would
220                    // matter.
221                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
222                }
223                Indicator::Length(num_bytes_len) => {
224                    let num_bytes_to_copy = min(num_bytes_len, max_copy_length);
225                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
226                }
227            }
228        }
229        self.values = new_values;
230        self.max_len = new_max_len;
231    }
232
233    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum element
234    /// length should the input be too large.
235    ///
236    /// # Parameters
237    ///
238    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
239    ///   currently in the buffer.
240    /// * `bytes`: Value to store.
241    pub fn append(&mut self, index: usize, bytes: Option<&[u8]>) {
242        if let Some(bytes) = bytes {
243            if bytes.len() > self.max_len {
244                let new_max_len = (bytes.len() as f64 * 1.2) as usize;
245                self.resize_max_element_length(new_max_len, index)
246            }
247
248            let offset = index * self.max_len;
249            self.values[offset..offset + bytes.len()].copy_from_slice(bytes);
250            // And of course set the indicator correctly.
251            self.indicators[index] = bytes.len().try_into().unwrap();
252        } else {
253            self.indicators[index] = NULL_DATA;
254        }
255    }
256}
257
258unsafe impl<'a> BoundInputSlice<'a> for BinColumn {
259    type SliceMut = BinColumnSliceMut<'a>;
260
261    unsafe fn as_view_mut(
262        &'a mut self,
263        parameter_index: u16,
264        stmt: StatementRef<'a>,
265    ) -> Self::SliceMut {
266        BinColumnSliceMut {
267            column: self,
268            stmt,
269            parameter_index,
270        }
271    }
272}
273
274/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
275/// values.
276pub struct BinColumnSliceMut<'a> {
277    column: &'a mut BinColumn,
278    // Needed to rebind the column in case of reallocation
279    stmt: StatementRef<'a>,
280    // Also needed to rebind the column in case of reallocation
281    parameter_index: u16,
282}
283
284impl BinColumnSliceMut<'_> {
285    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
286    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
287    /// allowed element length. `element` must be specified without the terminating zero.
288    pub fn set_cell(&mut self, row_index: usize, element: Option<&[u8]>) {
289        self.column.set_value(row_index, element)
290    }
291
292    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
293    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
294    /// The first `num_rows_to_copy_elements` will be copied from the old value buffer to the new
295    /// one. This makes this an extremly expensive operation.
296    pub fn ensure_max_element_length(
297        &mut self,
298        element_length: usize,
299        num_rows_to_copy: usize,
300    ) -> Result<(), Error> {
301        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
302        // in order to hold it. This invalidates the pointers previously bound to the statement. So
303        // we rebind them.
304        if element_length > self.column.max_len() {
305            self.column
306                .resize_max_element_length(element_length, num_rows_to_copy);
307            unsafe {
308                self.stmt
309                    .bind_input_parameter(self.parameter_index, self.column)
310                    .into_result(&self.stmt)?
311            }
312        }
313        Ok(())
314    }
315}
316
317#[derive(Debug, Clone, Copy)]
318pub struct BinColumnSlice<'c> {
319    num_rows: usize,
320    col: &'c BinColumn,
321}
322
323impl<'c> BinColumnSlice<'c> {
324    /// The number of valid elements in the text column.
325    pub fn len(&self) -> usize {
326        self.num_rows
327    }
328
329    /// True if, and only if there are no valid rows in the column buffer.
330    pub fn is_empty(&self) -> bool {
331        self.num_rows == 0
332    }
333
334    /// Slice of text at the specified row index without terminating zero.
335    pub fn get(&self, index: usize) -> Option<&'c [u8]> {
336        self.col.value_at(index)
337    }
338
339    /// Iterator over the valid elements of the text buffer
340    pub fn iter(&self) -> BinColumnIt<'c> {
341        BinColumnIt {
342            pos: 0,
343            num_rows: self.num_rows,
344            col: self.col,
345        }
346    }
347
348    /// Finds an indicator larger than max element in the range [0, num_rows).
349    ///
350    /// After fetching data we may want to know if any value has been truncated due to the buffer
351    /// not being able to hold elements of that size. This method checks the indicator buffer
352    /// element wise.
353    pub fn has_truncated_values(&self) -> Option<Indicator> {
354        self.col.has_truncated_values(self.num_rows)
355    }
356}
357
358unsafe impl Slice for BinColumn {
359    type Slice<'a> = BinColumnSlice<'a>;
360
361    fn slice(&self, valid_rows: usize) -> Self::Slice<'_> {
362        BinColumnSlice {
363            num_rows: valid_rows,
364            col: self,
365        }
366    }
367}
368
369/// Iterator over a binary column. See [`crate::buffers::BinColumn`]
370#[derive(Debug)]
371pub struct BinColumnIt<'c> {
372    pos: usize,
373    num_rows: usize,
374    col: &'c BinColumn,
375}
376
377impl<'c> Iterator for BinColumnIt<'c> {
378    type Item = Option<&'c [u8]>;
379
380    fn next(&mut self) -> Option<Self::Item> {
381        if self.pos == self.num_rows {
382            None
383        } else {
384            let ret = Some(self.col.value_at(self.pos));
385            self.pos += 1;
386            ret
387        }
388    }
389
390    fn size_hint(&self) -> (usize, Option<usize>) {
391        let len = self.num_rows - self.pos;
392        (len, Some(len))
393    }
394}
395
396impl ExactSizeIterator for BinColumnIt<'_> {}
397
398unsafe impl CData for BinColumn {
399    fn cdata_type(&self) -> CDataType {
400        CDataType::Binary
401    }
402
403    fn indicator_ptr(&self) -> *const isize {
404        self.indicators.as_ptr()
405    }
406
407    fn value_ptr(&self) -> *const c_void {
408        self.values.as_ptr() as *const c_void
409    }
410
411    fn buffer_length(&self) -> isize {
412        self.max_len.try_into().unwrap()
413    }
414}
415
416impl HasDataType for BinColumn {
417    fn data_type(&self) -> DataType {
418        DataType::Varbinary {
419            length: NonZeroUsize::new(self.max_len),
420        }
421    }
422}
423
424unsafe impl CDataMut for BinColumn {
425    fn mut_indicator_ptr(&mut self) -> *mut isize {
426        self.indicators.as_mut_ptr()
427    }
428
429    fn mut_value_ptr(&mut self) -> *mut c_void {
430        self.values.as_mut_ptr() as *mut c_void
431    }
432}
433
434impl Resize for BinColumn {
435    fn resize(&mut self, new_capacity: usize) {
436        self.values.resize(new_capacity * self.max_len, 0);
437        self.indicators.resize(new_capacity, NULL_DATA);
438    }
439}
440
441unsafe impl ColumnBuffer for BinColumn {
442    fn capacity(&self) -> usize {
443        self.indicators.len()
444    }
445
446    fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
447        self.indicators
448            .iter()
449            .copied()
450            .take(num_rows)
451            .find_map(|indicator| {
452                let indicator = Indicator::from_isize(indicator);
453                indicator.is_truncated(self.max_len).then_some(indicator)
454            })
455    }
456}
457
458#[cfg(test)]
459mod test {
460    use crate::error::TooLargeBufferSize;
461
462    use super::{BinColumn, Resize};
463
464    #[test]
465    #[ignore = "On windows this tests does cause containerized linux and WSL to allocate all \
466        memory instead of triggering a failed allocation."]
467    fn allocating_too_big_a_binary_column() {
468        let two_gib = 2_147_483_648;
469        let result = BinColumn::try_new(10_000, two_gib);
470        let error = result.unwrap_err();
471        assert!(matches!(
472            error,
473            TooLargeBufferSize {
474                num_elements: 10_000,
475                element_size: 2_147_483_648
476            }
477        ))
478    }
479
480    #[test]
481    fn resize_binary_column_buffer() {
482        // Given a binary column with 2 elements
483        let mut column = BinColumn::new(2, 10);
484        column.set_value(0, Some(b"Hello"));
485        column.set_value(1, Some(b"World"));
486
487        // When resizing the column to 3 elements
488        column.resize(3);
489
490        // Then
491        // the max element size is unchanged
492        assert_eq!(column.max_len(), 10);
493        // the values are still there
494        assert_eq!(column.value_at(0), Some(b"Hello".as_slice()));
495        assert_eq!(column.value_at(1), Some(b"World".as_slice()));
496        // the third element is None
497        assert_eq!(column.value_at(2), None);
498    }
499}