odbc_api/buffers/
text_column.rs

1use crate::{
2    DataType, Error,
3    columnar_bulk_inserter::BoundInputSlice,
4    error::TooLargeBufferSize,
5    handles::{
6        ASSUMED_MAX_LENGTH_OF_W_VARCHAR, CData, CDataMut, HasDataType, Statement, StatementRef,
7    },
8};
9
10use super::{ColumnBuffer, Indicator};
11
12use log::debug;
13use odbc_sys::{CDataType, NULL_DATA};
14use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
15use widestring::U16Str;
16
17/// A column buffer for character data. The actual encoding used may depend on your system locale.
18pub type CharColumn = TextColumn<u8>;
19
20/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
21/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
22/// implied encoding does not depend on the system locale.
23pub type WCharColumn = TextColumn<u16>;
24
25/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
26/// variable amount of characters up to a maximum string length. Since most SQL types have a string
27/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
28/// manager should take care of the conversion. Since elements of this type have variable length an
29/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
30/// not matter for this buffer.
31///
32/// Character type `C` is intended to be either `u8` or `u16`.
33#[derive(Debug)]
34pub struct TextColumn<C> {
35    /// Maximum text length without terminating zero.
36    max_str_len: usize,
37    values: Vec<C>,
38    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
39    /// with the same index. Please note that this value may be larger than `max_str_len` if the
40    /// text has been truncated.
41    indicators: Vec<isize>,
42}
43
44impl<C> TextColumn<C> {
45    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
46    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
47    /// one in order to make space for the null terminating zero at the end of strings. Uses a
48    /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
49    /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
50    /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
51    pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
52    where
53        C: Default + Copy,
54    {
55        // Element size is +1 to account for terminating zero
56        let element_size = max_str_len + 1;
57        let len = element_size * batch_size;
58        let mut values = Vec::new();
59        values
60            .try_reserve_exact(len)
61            .map_err(|_| TooLargeBufferSize {
62                num_elements: batch_size,
63                // We want the element size in bytes
64                element_size: element_size * size_of::<C>(),
65            })?;
66        values.resize(len, C::default());
67        Ok(TextColumn {
68            max_str_len,
69            values,
70            indicators: vec![0; batch_size],
71        })
72    }
73
74    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
75    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
76    /// one in order to make space for the null terminating zero at the end of strings. All
77    /// indicators are set to [`crate::sys::NULL_DATA`] by default.
78    pub fn new(batch_size: usize, max_str_len: usize) -> Self
79    where
80        C: Default + Copy,
81    {
82        // Element size is +1 to account for terminating zero
83        let element_size = max_str_len + 1;
84        let len = element_size * batch_size;
85        let mut values = Vec::new();
86        values.reserve_exact(len);
87        values.resize(len, C::default());
88        TextColumn {
89            max_str_len,
90            values,
91            indicators: vec![NULL_DATA; batch_size],
92        }
93    }
94
95    /// Bytes of string at the specified position. Includes interior nuls, but excludes the
96    /// terminating nul.
97    ///
98    /// The column buffer does not know how many elements were in the last row group, and therefore
99    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
100    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
101    /// equal to the maximum number of elements in the buffer.
102    pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
103        self.content_length_at(row_index).map(|length| {
104            let offset = row_index * (self.max_str_len + 1);
105            &self.values[offset..offset + length]
106        })
107    }
108
109    /// Maximum length of elements
110    pub fn max_len(&self) -> usize {
111        self.max_str_len
112    }
113
114    /// Indicator value at the specified position. Useful to detect truncation of data.
115    ///
116    /// The column buffer does not know how many elements were in the last row group, and therefore
117    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
118    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
119    /// equal to the maximum number of elements in the buffer.
120    pub fn indicator_at(&self, row_index: usize) -> Indicator {
121        Indicator::from_isize(self.indicators[row_index])
122    }
123
124    /// Length of value at the specified position. This is different from an indicator as it refers
125    /// to the length of the value in the buffer, not to the length of the value in the datasource.
126    /// The two things are different for truncated values.
127    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
128        match self.indicator_at(row_index) {
129            Indicator::Null => None,
130            // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
131            Indicator::NoTotal => Some(self.max_str_len),
132            Indicator::Length(length_in_bytes) => {
133                let length_in_chars = length_in_bytes / size_of::<C>();
134                let length = min(self.max_str_len, length_in_chars);
135                Some(length)
136            }
137        }
138    }
139
140    /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
141    ///
142    /// After fetching data we may want to know if any value has been truncated due to the buffer
143    /// not being able to hold elements of that size. This method checks the indicator buffer
144    /// element wise.
145    pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
146        let max_bin_length = self.max_str_len * size_of::<C>();
147        self.indicators
148            .iter()
149            .copied()
150            .take(num_rows)
151            .find_map(|indicator| {
152                let indicator = Indicator::from_isize(indicator);
153                indicator.is_truncated(max_bin_length).then_some(indicator)
154            })
155    }
156
157    /// Changes the maximum string length the buffer can hold. This operation is useful if you find
158    /// an unexpected large input string during insertion.
159    ///
160    /// This is however costly, as not only does the new buffer have to be allocated, but all values
161    /// have to copied from the old to the new buffer.
162    ///
163    /// This method could also be used to reduce the maximum string length, which would truncate
164    /// strings in the process.
165    ///
166    /// This method does not adjust indicator buffers as these might hold values larger than the
167    /// maximum string length.
168    ///
169    /// # Parameters
170    ///
171    /// * `new_max_str_len`: New maximum string length without terminating zero.
172    /// * `num_rows`: Number of valid rows currently stored in this buffer.
173    pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
174    where
175        C: Default + Copy,
176    {
177        debug!(
178            "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
179            num_rows, self.max_str_len, new_max_str_len
180        );
181
182        let batch_size = self.indicators.len();
183        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
184        let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
185        // Copy values from old to new buffer.
186        let max_copy_length = min(self.max_str_len, new_max_str_len);
187        for ((&indicator, old_value), new_value) in self
188            .indicators
189            .iter()
190            .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
191            .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
192            .take(num_rows)
193        {
194            match Indicator::from_isize(indicator) {
195                Indicator::Null => (),
196                Indicator::NoTotal => {
197                    // There is no good choice here in case we are expanding the buffer. Since
198                    // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
199                    // be padded with 0. I currently cannot think of any use case there it would
200                    // matter.
201                    new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
202                }
203                Indicator::Length(num_bytes_len) => {
204                    let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
205                    new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
206                }
207            }
208        }
209        self.values = new_values;
210        self.max_str_len = new_max_str_len;
211    }
212
213    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
214    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
215    /// allowed element length. `input` must be specified without the terminating zero.
216    pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
217    where
218        C: Default + Copy,
219    {
220        if let Some(input) = input {
221            self.set_mut(index, input.len()).copy_from_slice(input);
222        } else {
223            self.indicators[index] = NULL_DATA;
224        }
225    }
226
227    /// Can be used to set a value at a specific row index without performing a memcopy on an input
228    /// slice and instead provides direct access to the underlying buffer.
229    ///
230    /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
231    /// be more convenient. This method is very useful if you want to `write!` a string value to the
232    /// buffer and the binary (**!**) length of the formatted string is known upfront.
233    ///
234    /// # Example: Write timestamp to text column.
235    ///
236    /// ```
237    /// use odbc_api::buffers::TextColumn;
238    /// use std::io::Write;
239    ///
240    /// /// Writes times formatted as hh::mm::ss.fff
241    /// fn write_time(
242    ///     col: &mut TextColumn<u8>,
243    ///     index: usize,
244    ///     hours: u8,
245    ///     minutes: u8,
246    ///     seconds: u8,
247    ///     milliseconds: u16)
248    /// {
249    ///     write!(
250    ///         col.set_mut(index, 12),
251    ///         "{:02}:{:02}:{:02}.{:03}",
252    ///         hours, minutes, seconds, milliseconds
253    ///     ).unwrap();
254    /// }
255    /// ```
256    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
257    where
258        C: Default,
259    {
260        if length > self.max_str_len {
261            panic!(
262                "Tried to insert a value into a text buffer which is larger than the maximum \
263                allowed string length for the buffer."
264            );
265        }
266        self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
267        let start = (self.max_str_len + 1) * index;
268        let end = start + length;
269        // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
270        // driver would not care about the value in the index buffer and only look for the
271        // terminating zero.
272        self.values[end] = C::default();
273        &mut self.values[start..end]
274    }
275
276    /// Fills the column with NULL, between From and To
277    pub fn fill_null(&mut self, from: usize, to: usize) {
278        for index in from..to {
279            self.indicators[index] = NULL_DATA;
280        }
281    }
282
283    /// Provides access to the raw underlying value buffer. Normal applications should have little
284    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
285    /// from the ODBC in memory representation into other kinds of buffers.
286    ///
287    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
288    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
289    /// terminating zero at the end of each string. For the actual value length call
290    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
291    pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
292        &self.values[..(self.max_str_len + 1) * num_valid_rows]
293    }
294
295    /// The maximum number of rows the TextColumn can hold.
296    pub fn row_capacity(&self) -> usize {
297        self.values.len()
298    }
299}
300
301impl WCharColumn {
302    /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
303    /// the terminating nul.
304    ///
305    /// # Safety
306    ///
307    /// The column buffer does not know how many elements were in the last row group, and therefore
308    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
309    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
310    /// equal to the maximum number of elements in the buffer.
311    pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
312        self.value_at(row_index).map(U16Str::from_slice)
313    }
314}
315
316unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
317where
318    TextColumn<C>: CDataMut + HasDataType,
319{
320    type View<'a> = TextColumnView<'a, C>;
321
322    fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
323        TextColumnView {
324            num_rows: valid_rows,
325            col: self,
326        }
327    }
328
329    fn fill_default(&mut self, from: usize, to: usize) {
330        self.fill_null(from, to)
331    }
332
333    /// Maximum number of text strings this column may hold.
334    fn capacity(&self) -> usize {
335        self.indicators.len()
336    }
337
338    fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
339        let max_bin_length = self.max_str_len * size_of::<C>();
340        self.indicators
341            .iter()
342            .copied()
343            .take(num_rows)
344            .find_map(|indicator| {
345                let indicator = Indicator::from_isize(indicator);
346                indicator.is_truncated(max_bin_length).then_some(indicator)
347            })
348    }
349}
350
351/// Allows read only access to the valid part of a text column.
352///
353/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
354/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
355/// are actually valid, and have been returned with the last row group of the the result set. That
356/// number is maintained on the level of the entire column buffer. So a text column knows the number
357/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
358/// every element acccessed through it, is valid.
359#[derive(Debug, Clone, Copy)]
360pub struct TextColumnView<'c, C> {
361    num_rows: usize,
362    col: &'c TextColumn<C>,
363}
364
365impl<'c, C> TextColumnView<'c, C> {
366    /// The number of valid elements in the text column.
367    pub fn len(&self) -> usize {
368        self.num_rows
369    }
370
371    /// True if, and only if there are no valid rows in the column buffer.
372    pub fn is_empty(&self) -> bool {
373        self.num_rows == 0
374    }
375
376    /// Slice of text at the specified row index without terminating zero. `None` if the value is
377    /// `NULL`. This method will panic if the index is larger than the number of valid rows in the
378    /// view as returned by [`Self::len`].
379    pub fn get(&self, index: usize) -> Option<&'c [C]> {
380        self.col.value_at(index)
381    }
382
383    /// Iterator over the valid elements of the text buffer
384    pub fn iter(&self) -> TextColumnIt<'c, C> {
385        TextColumnIt {
386            pos: 0,
387            num_rows: self.num_rows,
388            col: self.col,
389        }
390    }
391
392    /// Length of value at the specified position. This is different from an indicator as it refers
393    /// to the length of the value in the buffer, not to the length of the value in the datasource.
394    /// The two things are different for truncated values.
395    pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
396        if row_index >= self.num_rows {
397            panic!("Row index points beyond the range of valid values.")
398        }
399        self.col.content_length_at(row_index)
400    }
401
402    /// Provides access to the raw underlying value buffer. Normal applications should have little
403    /// reason to call this method. Yet it may be useful for writing bindings which copy directly
404    /// from the ODBC in memory representation into other kinds of buffers.
405    ///
406    /// The buffer contains the bytes for every non null valid element, padded to the maximum string
407    /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
408    /// terminating zero at the end of each string. For the actual value length call
409    /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
410    pub fn raw_value_buffer(&self) -> &'c [C] {
411        self.col.raw_value_buffer(self.num_rows)
412    }
413
414    pub fn max_len(&self) -> usize {
415        self.col.max_len()
416    }
417
418    /// `Some` if any value is truncated.
419    ///
420    /// After fetching data we may want to know if any value has been truncated due to the buffer
421    /// not being able to hold elements of that size. This method checks the indicator buffer
422    /// element wise.
423    pub fn has_truncated_values(&self) -> Option<Indicator> {
424        self.col.has_truncated_values(self.num_rows)
425    }
426}
427
428unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
429    type SliceMut = TextColumnSliceMut<'a, C>;
430
431    unsafe fn as_view_mut(
432        &'a mut self,
433        parameter_index: u16,
434        stmt: StatementRef<'a>,
435    ) -> Self::SliceMut {
436        TextColumnSliceMut {
437            column: self,
438            stmt,
439            parameter_index,
440        }
441    }
442}
443
444/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
445/// values.
446pub struct TextColumnSliceMut<'a, C> {
447    column: &'a mut TextColumn<C>,
448    // Needed to rebind the column in case of resize
449    stmt: StatementRef<'a>,
450    // Also needed to rebind the column in case of resize
451    parameter_index: u16,
452}
453
454impl<C> TextColumnSliceMut<'_, C>
455where
456    C: Default + Copy,
457{
458    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
459    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
460    /// allowed element length. `element` must be specified without the terminating zero.
461    pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
462        self.column.set_value(row_index, element)
463    }
464
465    /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
466    /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
467    /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
468    /// one. This makes this an extremely expensive operation.
469    pub fn ensure_max_element_length(
470        &mut self,
471        element_length: usize,
472        num_rows_to_copy: usize,
473    ) -> Result<(), Error>
474    where
475        TextColumn<C>: HasDataType + CData,
476    {
477        // Column buffer is not large enough to hold the element. We must allocate a larger buffer
478        // in order to hold it. This invalidates the pointers previously bound to the statement. So
479        // we rebind them.
480        if element_length > self.column.max_len() {
481            let new_max_str_len = element_length;
482            self.column
483                .resize_max_str(new_max_str_len, num_rows_to_copy);
484            unsafe {
485                self.stmt
486                    .bind_input_parameter(self.parameter_index, self.column)
487                    .into_result(&self.stmt)?
488            }
489        }
490        Ok(())
491    }
492
493    /// Can be used to set a value at a specific row index without performing a memcopy on an input
494    /// slice and instead provides direct access to the underlying buffer.
495    ///
496    /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
497    /// be more convenient. This method is very useful if you want to `write!` a string value to the
498    /// buffer and the binary (**!**) length of the formatted string is known upfront.
499    ///
500    /// # Example: Write timestamp to text column.
501    ///
502    /// ```
503    /// use odbc_api::buffers::TextColumnSliceMut;
504    /// use std::io::Write;
505    ///
506    /// /// Writes times formatted as hh::mm::ss.fff
507    /// fn write_time(
508    ///     col: &mut TextColumnSliceMut<u8>,
509    ///     index: usize,
510    ///     hours: u8,
511    ///     minutes: u8,
512    ///     seconds: u8,
513    ///     milliseconds: u16)
514    /// {
515    ///     write!(
516    ///         col.set_mut(index, 12),
517    ///         "{:02}:{:02}:{:02}.{:03}",
518    ///         hours, minutes, seconds, milliseconds
519    ///     ).unwrap();
520    /// }
521    /// ```
522    pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
523        self.column.set_mut(index, length)
524    }
525}
526
527/// Iterator over a text column. See [`TextColumnView::iter`]
528#[derive(Debug)]
529pub struct TextColumnIt<'c, C> {
530    pos: usize,
531    num_rows: usize,
532    col: &'c TextColumn<C>,
533}
534
535impl<'c, C> TextColumnIt<'c, C> {
536    fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
537        if self.pos == self.num_rows {
538            None
539        } else {
540            let ret = Some(self.col.value_at(self.pos));
541            self.pos += 1;
542            ret
543        }
544    }
545}
546
547impl<'c> Iterator for TextColumnIt<'c, u8> {
548    type Item = Option<&'c [u8]>;
549
550    fn next(&mut self) -> Option<Self::Item> {
551        self.next_impl()
552    }
553
554    fn size_hint(&self) -> (usize, Option<usize>) {
555        let len = self.num_rows - self.pos;
556        (len, Some(len))
557    }
558}
559
560impl ExactSizeIterator for TextColumnIt<'_, u8> {}
561
562impl<'c> Iterator for TextColumnIt<'c, u16> {
563    type Item = Option<&'c U16Str>;
564
565    fn next(&mut self) -> Option<Self::Item> {
566        self.next_impl().map(|opt| opt.map(U16Str::from_slice))
567    }
568
569    fn size_hint(&self) -> (usize, Option<usize>) {
570        let len = self.num_rows - self.pos;
571        (len, Some(len))
572    }
573}
574
575impl ExactSizeIterator for TextColumnIt<'_, u16> {}
576
577unsafe impl CData for CharColumn {
578    fn cdata_type(&self) -> CDataType {
579        CDataType::Char
580    }
581
582    fn indicator_ptr(&self) -> *const isize {
583        self.indicators.as_ptr()
584    }
585
586    fn value_ptr(&self) -> *const c_void {
587        self.values.as_ptr() as *const c_void
588    }
589
590    fn buffer_length(&self) -> isize {
591        (self.max_str_len + 1).try_into().unwrap()
592    }
593}
594
595unsafe impl CDataMut for CharColumn {
596    fn mut_indicator_ptr(&mut self) -> *mut isize {
597        self.indicators.as_mut_ptr()
598    }
599
600    fn mut_value_ptr(&mut self) -> *mut c_void {
601        self.values.as_mut_ptr() as *mut c_void
602    }
603}
604
605impl HasDataType for CharColumn {
606    fn data_type(&self) -> DataType {
607        DataType::Varchar {
608            length: NonZeroUsize::new(self.max_str_len),
609        }
610    }
611}
612
613unsafe impl CData for WCharColumn {
614    fn cdata_type(&self) -> CDataType {
615        CDataType::WChar
616    }
617
618    fn indicator_ptr(&self) -> *const isize {
619        self.indicators.as_ptr()
620    }
621
622    fn value_ptr(&self) -> *const c_void {
623        self.values.as_ptr() as *const c_void
624    }
625
626    fn buffer_length(&self) -> isize {
627        ((self.max_str_len + 1) * 2).try_into().unwrap()
628    }
629}
630
631unsafe impl CDataMut for WCharColumn {
632    fn mut_indicator_ptr(&mut self) -> *mut isize {
633        self.indicators.as_mut_ptr()
634    }
635
636    fn mut_value_ptr(&mut self) -> *mut c_void {
637        self.values.as_mut_ptr() as *mut c_void
638    }
639}
640
641impl HasDataType for WCharColumn {
642    fn data_type(&self) -> DataType {
643        if self.max_str_len <= ASSUMED_MAX_LENGTH_OF_W_VARCHAR {
644            DataType::WVarchar {
645                length: NonZeroUsize::new(self.max_str_len),
646            }
647        } else {
648            DataType::WLongVarchar {
649                length: NonZeroUsize::new(self.max_str_len),
650            }
651        }
652    }
653}