1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
use crate::{
    handles::{CData, CDataMut, HasDataType},
    DataType,
};

use log::debug;
use odbc_sys::{CDataType, NULL_DATA};
use std::{cmp::min, convert::TryInto, ffi::c_void};

/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
/// variable amount of characters up to a maximum string length. Since most SQL types have a string
/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
/// manager should take care of the conversion. Since elements of this type have variable length an
/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
/// not matter for this buffer.
#[derive(Debug)]
pub struct TextColumn {
    /// Maximum text length without terminating zero.
    max_str_len: usize,
    values: Vec<u8>,
    /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
    /// with the same index. Please note that this value may be larger than `max_str_len` if the
    /// text has been truncated.
    indicators: Vec<isize>,
}

impl TextColumn {
    /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
    /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
    /// one in order to make space for the null terminating zero at the end of strings.
    pub fn new(batch_size: usize, max_str_len: usize) -> Self {
        TextColumn {
            max_str_len,
            values: vec![0; (max_str_len + 1) * batch_size],
            indicators: vec![0; batch_size],
        }
    }

    /// Return the bytes of string at the specified position. Including interior nuls, but excluding
    /// the terminating nul.
    ///
    /// # Safety
    ///
    /// The column buffer does not know how many elements were in the last row group, and therefore
    /// can not guarantee the accessed element to be valid and in a defined state. It also can not
    /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
    /// equal to the maximum number of elements in the buffer.
    pub unsafe fn value_at(&self, row_index: usize) -> Option<&[u8]> {
        let str_len = self.indicators[row_index];
        if str_len == NULL_DATA {
            None
        } else {
            let offset = row_index * (self.max_str_len + 1);
            let length = min(self.max_str_len, str_len.try_into().unwrap());
            Some(&self.values[offset..offset + length])
        }
    }

    /// Changes the maximum string length the buffer can hold. This operation is useful if you find
    /// an unexpected large input string during insertion.
    ///
    /// This is however costly, as not only does the new buffer have to be allocated, but all values
    /// have to copied from the old to the new buffer.
    ///
    /// This method could also be used to reduce the maximum string length, which would truncate
    /// strings in the process.
    ///
    /// This method does not adjust indicator buffers as these might hold values larger than the
    /// maximum string length.
    ///
    /// # Parameters
    ///
    /// * `new_max_str_len`: New maximum string length without terminating zero.
    /// * `num_rows`: Number of valid rows currently stored in this buffer.
    pub fn rebind(&mut self, new_max_str_len: usize, num_rows: usize) {
        debug!(
            "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
            num_rows, self.max_str_len, new_max_str_len
        );

        let batch_size = self.indicators.len();
        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
        let mut new_values = vec![0u8; (new_max_str_len + 1) * batch_size];
        // Copy values from old to new buffer.
        let max_copy_length = min(self.max_str_len, new_max_str_len);
        for ((&indicator, old_value), new_value) in self
            .indicators
            .iter()
            .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
            .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
            .take(num_rows)
        {
            if indicator != NULL_DATA {
                let num_bytes_to_copy = min(indicator as usize, max_copy_length);
                new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
            }
        }
        self.values = new_values;
        self.max_str_len = new_max_str_len;
    }

    /// Changes the maximum element length the buffer can hold. This operation is useful if you find
    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
    ///
    /// # Parameters
    ///
    /// * `new_max_len`: New maximum string length without terminating zero.
    pub fn set_max_len(&mut self, new_max_len: usize) {
        let batch_size = self.indicators.len();
        // Allocate a new buffer large enough to hold a batch of strings with maximum length.
        let new_values = vec![0u8; (new_max_len + 1) * batch_size];
        // Set all indicators to NULL
        self.fill_null(0, batch_size);
        self.values = new_values;
        self.max_str_len = new_max_len;
    }

    /// Appends a new element to the column buffer. Rebinds the buffer to increase maximum string
    /// length should text be to large.
    ///
    /// # Parameters
    ///
    /// * `index`: Zero based index of the new row position. Must be equal to the number of rows
    ///   currently in the buffer.
    /// * `text`: Text to store without terminating zero.
    pub fn append(&mut self, index: usize, text: Option<&[u8]>) {
        if let Some(text) = text {
            if text.len() > self.max_str_len {
                let new_max_str_len = (text.len() as f64 * 1.2) as usize;
                self.rebind(new_max_str_len, index)
            }

            let offset = index * (self.max_str_len + 1);
            self.values[offset..offset + text.len()].copy_from_slice(text);
            // Add terminating zero to string.
            self.values[offset + text.len()] = 0;
            // And of course set the indicator correctly.
            self.indicators[index] = text.len().try_into().unwrap();
        } else {
            self.indicators[index] = NULL_DATA;
        }
    }

    /// Iterator over the first `num_rows` values of a text column.
    ///
    /// # Safety
    ///
    /// Num rows may not exceed the actualy amount of valid num_rows filled be the ODBC API. The
    /// column buffer does not know how many elements were in the last row group, and therefore can
    /// not guarantee the accessed element to be valid and in a defined state. It also can not panic
    /// on accessing an undefined element. It will panic however if `row_index` is larger or equal
    /// to the maximum number of elements in the buffer.
    pub unsafe fn iter(&self, num_rows: usize) -> TextColumnIt {
        TextColumnIt {
            pos: 0,
            num_rows,
            col: &self,
        }
    }

    /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
    /// panic on out of bounds index, or if input holds a text which is larger than the maximum
    /// allowed element length. `input` must be specified without the terminating zero.
    pub fn set_value(&mut self, index: usize, input: Option<&[u8]>) {
        if let Some(input) = input {
            self.indicators[index] = input.len().try_into().unwrap();
            if input.len() > self.max_str_len {
                panic!(
                    "Tried to insert a value into a text buffer which is larger than the \
                    maximum allowed string length for the buffer."
                );
            }
            let start = (self.max_str_len + 1) * index;
            let end = start + input.len();
            let buf = &mut self.values[start..end];
            buf.copy_from_slice(input);
            // Let's insert a terminating zero at the end to be on the safe side, in case the
            // ODBC driver would not care about the value in the index buffer and only look for the
            // terminating zero.
            self.values[end + 1] = 0;
        } else {
            self.indicators[index] = NULL_DATA;
        }
    }

    /// Fills the column with NULL, between From and To
    pub fn fill_null(&mut self, from: usize, to: usize) {
        for index in from..to {
            self.indicators[index] = NULL_DATA;
        }
    }

    /// A writer able to fill the first `n` elements of the buffer, from an iterator.
    pub fn writer_n(&mut self, n: usize) -> TextColumnWriter<'_> {
        TextColumnWriter {
            column: self,
            to: n,
        }
    }
}

/// Iterator over a text column. See [`TextColumn::iter`]
#[derive(Debug)]
pub struct TextColumnIt<'c> {
    pos: usize,
    num_rows: usize,
    col: &'c TextColumn,
}

impl<'c> Iterator for TextColumnIt<'c> {
    type Item = Option<&'c [u8]>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.pos == self.num_rows {
            None
        } else {
            let ret = unsafe { Some(self.col.value_at(self.pos)) };
            self.pos += 1;
            ret
        }
    }
}

/// Fills a text column buffer with elements from an Iterator.
#[derive(Debug)]
pub struct TextColumnWriter<'a> {
    column: &'a mut TextColumn,
    /// Upper limit, the text column writer will not write beyond this index.
    to: usize,
}

impl<'a> TextColumnWriter<'a> {
    /// Fill the text column with values by consuming the iterator and copying its items into the
    /// buffer. It will not extract more items from the iterator than the buffer may hold. This
    /// method panics if strings returned by the iterator are larger than the maximum element length
    /// of the buffer.
    pub fn write<'b>(&mut self, it: impl Iterator<Item = Option<&'b [u8]>>) {
        for (index, item) in it.enumerate().take(self.to) {
            self.column.set_value(index, item)
        }
    }

    /// Changes the maximum string length the buffer can hold. This operation is useful if you find
    /// an unexpected large input during insertion. All values in the buffer will be set to NULL.
    ///
    /// # Parameters
    ///
    /// * `new_max_len`: New maximum string length without terminating zero.
    pub fn set_max_len(&mut self, new_max_len: usize) {
        self.column.set_max_len(new_max_len)
    }
}

unsafe impl CData for TextColumn {
    fn cdata_type(&self) -> CDataType {
        CDataType::Char
    }

    fn indicator_ptr(&self) -> *const isize {
        self.indicators.as_ptr()
    }

    fn value_ptr(&self) -> *const c_void {
        self.values.as_ptr() as *const c_void
    }

    fn buffer_length(&self) -> isize {
        (self.max_str_len + 1).try_into().unwrap()
    }
}

unsafe impl HasDataType for TextColumn {
    fn data_type(&self) -> DataType {
        DataType::Varchar {
            length: self.max_str_len,
        }
    }
}

unsafe impl CDataMut for TextColumn {
    fn mut_indicator_ptr(&mut self) -> *mut isize {
        self.indicators.as_mut_ptr()
    }

    fn mut_value_ptr(&mut self) -> *mut c_void {
        self.values.as_mut_ptr() as *mut c_void
    }
}