odbc_api/buffers/text_column.rs
1use crate::{
2 DataType, Error,
3 columnar_bulk_inserter::BoundInputSlice,
4 error::TooLargeBufferSize,
5 handles::{
6 ASSUMED_MAX_LENGTH_OF_W_VARCHAR, CData, CDataMut, HasDataType, Statement, StatementRef,
7 },
8};
9
10use super::{ColumnBuffer, Indicator};
11
12use log::debug;
13use odbc_sys::{CDataType, NULL_DATA};
14use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
15use widestring::U16Str;
16
17/// A column buffer for character data. The actual encoding used may depend on your system locale.
18pub type CharColumn = TextColumn<u8>;
19
20/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
21/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
22/// implied encoding does not depend on the system locale.
23pub type WCharColumn = TextColumn<u16>;
24
25/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
26/// variable amount of characters up to a maximum string length. Since most SQL types have a string
27/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
28/// manager should take care of the conversion. Since elements of this type have variable length an
29/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
30/// not matter for this buffer.
31///
32/// Character type `C` is intended to be either `u8` or `u16`.
33#[derive(Debug)]
34pub struct TextColumn<C> {
35 /// Maximum text length without terminating zero.
36 max_str_len: usize,
37 values: Vec<C>,
38 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
39 /// with the same index. Please note that this value may be larger than `max_str_len` if the
40 /// text has been truncated.
41 indicators: Vec<isize>,
42}
43
44impl<C> TextColumn<C> {
45 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
46 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
47 /// one in order to make space for the null terminating zero at the end of strings. Uses a
48 /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
49 /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
50 /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
51 pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
52 where
53 C: Default + Copy,
54 {
55 // Element size is +1 to account for terminating zero
56 let element_size = max_str_len + 1;
57 let len = element_size * batch_size;
58 let mut values = Vec::new();
59 values
60 .try_reserve_exact(len)
61 .map_err(|_| TooLargeBufferSize {
62 num_elements: batch_size,
63 // We want the element size in bytes
64 element_size: element_size * size_of::<C>(),
65 })?;
66 values.resize(len, C::default());
67 Ok(TextColumn {
68 max_str_len,
69 values,
70 indicators: vec![0; batch_size],
71 })
72 }
73
74 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
75 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
76 /// one in order to make space for the null terminating zero at the end of strings. All
77 /// indicators are set to [`crate::sys::NULL_DATA`] by default.
78 pub fn new(batch_size: usize, max_str_len: usize) -> Self
79 where
80 C: Default + Copy,
81 {
82 // Element size is +1 to account for terminating zero
83 let element_size = max_str_len + 1;
84 let len = element_size * batch_size;
85 let mut values = Vec::new();
86 values.reserve_exact(len);
87 values.resize(len, C::default());
88 TextColumn {
89 max_str_len,
90 values,
91 indicators: vec![NULL_DATA; batch_size],
92 }
93 }
94
95 /// Bytes of string at the specified position. Includes interior nuls, but excludes the
96 /// terminating nul.
97 ///
98 /// The column buffer does not know how many elements were in the last row group, and therefore
99 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
100 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
101 /// equal to the maximum number of elements in the buffer.
102 pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
103 self.content_length_at(row_index).map(|length| {
104 let offset = row_index * (self.max_str_len + 1);
105 &self.values[offset..offset + length]
106 })
107 }
108
109 /// Maximum length of elements
110 pub fn max_len(&self) -> usize {
111 self.max_str_len
112 }
113
114 /// Indicator value at the specified position. Useful to detect truncation of data.
115 ///
116 /// The column buffer does not know how many elements were in the last row group, and therefore
117 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
118 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
119 /// equal to the maximum number of elements in the buffer.
120 pub fn indicator_at(&self, row_index: usize) -> Indicator {
121 Indicator::from_isize(self.indicators[row_index])
122 }
123
124 /// Length of value at the specified position. This is different from an indicator as it refers
125 /// to the length of the value in the buffer, not to the length of the value in the datasource.
126 /// The two things are different for truncated values.
127 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
128 match self.indicator_at(row_index) {
129 Indicator::Null => None,
130 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
131 Indicator::NoTotal => Some(self.max_str_len),
132 Indicator::Length(length_in_bytes) => {
133 let length_in_chars = length_in_bytes / size_of::<C>();
134 let length = min(self.max_str_len, length_in_chars);
135 Some(length)
136 }
137 }
138 }
139
140 /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
141 ///
142 /// After fetching data we may want to know if any value has been truncated due to the buffer
143 /// not being able to hold elements of that size. This method checks the indicator buffer
144 /// element wise.
145 pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
146 let max_bin_length = self.max_str_len * size_of::<C>();
147 self.indicators
148 .iter()
149 .copied()
150 .take(num_rows)
151 .find_map(|indicator| {
152 let indicator = Indicator::from_isize(indicator);
153 indicator.is_truncated(max_bin_length).then_some(indicator)
154 })
155 }
156
157 /// Changes the maximum string length the buffer can hold. This operation is useful if you find
158 /// an unexpected large input string during insertion.
159 ///
160 /// This is however costly, as not only does the new buffer have to be allocated, but all values
161 /// have to copied from the old to the new buffer.
162 ///
163 /// This method could also be used to reduce the maximum string length, which would truncate
164 /// strings in the process.
165 ///
166 /// This method does not adjust indicator buffers as these might hold values larger than the
167 /// maximum string length.
168 ///
169 /// # Parameters
170 ///
171 /// * `new_max_str_len`: New maximum string length without terminating zero.
172 /// * `num_rows`: Number of valid rows currently stored in this buffer.
173 pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
174 where
175 C: Default + Copy,
176 {
177 debug!(
178 "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
179 num_rows, self.max_str_len, new_max_str_len
180 );
181
182 let batch_size = self.indicators.len();
183 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
184 let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
185 // Copy values from old to new buffer.
186 let max_copy_length = min(self.max_str_len, new_max_str_len);
187 for ((&indicator, old_value), new_value) in self
188 .indicators
189 .iter()
190 .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
191 .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
192 .take(num_rows)
193 {
194 match Indicator::from_isize(indicator) {
195 Indicator::Null => (),
196 Indicator::NoTotal => {
197 // There is no good choice here in case we are expanding the buffer. Since
198 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
199 // be padded with 0. I currently cannot think of any use case there it would
200 // matter.
201 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
202 }
203 Indicator::Length(num_bytes_len) => {
204 let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
205 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
206 }
207 }
208 }
209 self.values = new_values;
210 self.max_str_len = new_max_str_len;
211 }
212
213 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
214 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
215 /// allowed element length. `input` must be specified without the terminating zero.
216 pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
217 where
218 C: Default + Copy,
219 {
220 if let Some(input) = input {
221 self.set_mut(index, input.len()).copy_from_slice(input);
222 } else {
223 self.indicators[index] = NULL_DATA;
224 }
225 }
226
227 /// Can be used to set a value at a specific row index without performing a memcopy on an input
228 /// slice and instead provides direct access to the underlying buffer.
229 ///
230 /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
231 /// be more convenient. This method is very useful if you want to `write!` a string value to the
232 /// buffer and the binary (**!**) length of the formatted string is known upfront.
233 ///
234 /// # Example: Write timestamp to text column.
235 ///
236 /// ```
237 /// use odbc_api::buffers::TextColumn;
238 /// use std::io::Write;
239 ///
240 /// /// Writes times formatted as hh::mm::ss.fff
241 /// fn write_time(
242 /// col: &mut TextColumn<u8>,
243 /// index: usize,
244 /// hours: u8,
245 /// minutes: u8,
246 /// seconds: u8,
247 /// milliseconds: u16)
248 /// {
249 /// write!(
250 /// col.set_mut(index, 12),
251 /// "{:02}:{:02}:{:02}.{:03}",
252 /// hours, minutes, seconds, milliseconds
253 /// ).unwrap();
254 /// }
255 /// ```
256 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
257 where
258 C: Default,
259 {
260 if length > self.max_str_len {
261 panic!(
262 "Tried to insert a value into a text buffer which is larger than the maximum \
263 allowed string length for the buffer."
264 );
265 }
266 self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
267 let start = (self.max_str_len + 1) * index;
268 let end = start + length;
269 // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
270 // driver would not care about the value in the index buffer and only look for the
271 // terminating zero.
272 self.values[end] = C::default();
273 &mut self.values[start..end]
274 }
275
276 /// Fills the column with NULL, between From and To
277 pub fn fill_null(&mut self, from: usize, to: usize) {
278 for index in from..to {
279 self.indicators[index] = NULL_DATA;
280 }
281 }
282
283 /// Provides access to the raw underlying value buffer. Normal applications should have little
284 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
285 /// from the ODBC in memory representation into other kinds of buffers.
286 ///
287 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
288 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
289 /// terminating zero at the end of each string. For the actual value length call
290 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
291 pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
292 &self.values[..(self.max_str_len + 1) * num_valid_rows]
293 }
294
295 /// The maximum number of rows the TextColumn can hold.
296 pub fn row_capacity(&self) -> usize {
297 self.values.len()
298 }
299}
300
301impl WCharColumn {
302 /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
303 /// the terminating nul.
304 ///
305 /// # Safety
306 ///
307 /// The column buffer does not know how many elements were in the last row group, and therefore
308 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
309 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
310 /// equal to the maximum number of elements in the buffer.
311 pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
312 self.value_at(row_index).map(U16Str::from_slice)
313 }
314}
315
316unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
317where
318 TextColumn<C>: CDataMut + HasDataType,
319{
320 type View<'a> = TextColumnView<'a, C>;
321
322 fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
323 TextColumnView {
324 num_rows: valid_rows,
325 col: self,
326 }
327 }
328
329 fn fill_default(&mut self, from: usize, to: usize) {
330 self.fill_null(from, to)
331 }
332
333 /// Maximum number of text strings this column may hold.
334 fn capacity(&self) -> usize {
335 self.indicators.len()
336 }
337
338 fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
339 let max_bin_length = self.max_str_len * size_of::<C>();
340 self.indicators
341 .iter()
342 .copied()
343 .take(num_rows)
344 .find_map(|indicator| {
345 let indicator = Indicator::from_isize(indicator);
346 indicator.is_truncated(max_bin_length).then_some(indicator)
347 })
348 }
349}
350
351/// Allows read only access to the valid part of a text column.
352///
353/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
354/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
355/// are actually valid, and have been returned with the last row group of the the result set. That
356/// number is maintained on the level of the entire column buffer. So a text column knows the number
357/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
358/// every element acccessed through it, is valid.
359#[derive(Debug, Clone, Copy)]
360pub struct TextColumnView<'c, C> {
361 num_rows: usize,
362 col: &'c TextColumn<C>,
363}
364
365impl<'c, C> TextColumnView<'c, C> {
366 /// The number of valid elements in the text column.
367 pub fn len(&self) -> usize {
368 self.num_rows
369 }
370
371 /// True if, and only if there are no valid rows in the column buffer.
372 pub fn is_empty(&self) -> bool {
373 self.num_rows == 0
374 }
375
376 /// Slice of text at the specified row index without terminating zero. `None` if the value is
377 /// `NULL`. This method will panic if the index is larger than the number of valid rows in the
378 /// view as returned by [`Self::len`].
379 pub fn get(&self, index: usize) -> Option<&'c [C]> {
380 self.col.value_at(index)
381 }
382
383 /// Iterator over the valid elements of the text buffer
384 pub fn iter(&self) -> TextColumnIt<'c, C> {
385 TextColumnIt {
386 pos: 0,
387 num_rows: self.num_rows,
388 col: self.col,
389 }
390 }
391
392 /// Length of value at the specified position. This is different from an indicator as it refers
393 /// to the length of the value in the buffer, not to the length of the value in the datasource.
394 /// The two things are different for truncated values.
395 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
396 if row_index >= self.num_rows {
397 panic!("Row index points beyond the range of valid values.")
398 }
399 self.col.content_length_at(row_index)
400 }
401
402 /// Provides access to the raw underlying value buffer. Normal applications should have little
403 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
404 /// from the ODBC in memory representation into other kinds of buffers.
405 ///
406 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
407 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
408 /// terminating zero at the end of each string. For the actual value length call
409 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
410 pub fn raw_value_buffer(&self) -> &'c [C] {
411 self.col.raw_value_buffer(self.num_rows)
412 }
413
414 pub fn max_len(&self) -> usize {
415 self.col.max_len()
416 }
417
418 /// `Some` if any value is truncated.
419 ///
420 /// After fetching data we may want to know if any value has been truncated due to the buffer
421 /// not being able to hold elements of that size. This method checks the indicator buffer
422 /// element wise.
423 pub fn has_truncated_values(&self) -> Option<Indicator> {
424 self.col.has_truncated_values(self.num_rows)
425 }
426}
427
428unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
429 type SliceMut = TextColumnSliceMut<'a, C>;
430
431 unsafe fn as_view_mut(
432 &'a mut self,
433 parameter_index: u16,
434 stmt: StatementRef<'a>,
435 ) -> Self::SliceMut {
436 TextColumnSliceMut {
437 column: self,
438 stmt,
439 parameter_index,
440 }
441 }
442}
443
444/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
445/// values.
446pub struct TextColumnSliceMut<'a, C> {
447 column: &'a mut TextColumn<C>,
448 // Needed to rebind the column in case of resize
449 stmt: StatementRef<'a>,
450 // Also needed to rebind the column in case of resize
451 parameter_index: u16,
452}
453
454impl<C> TextColumnSliceMut<'_, C>
455where
456 C: Default + Copy,
457{
458 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
459 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
460 /// allowed element length. `element` must be specified without the terminating zero.
461 pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
462 self.column.set_value(row_index, element)
463 }
464
465 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
466 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
467 /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
468 /// one. This makes this an extremely expensive operation.
469 pub fn ensure_max_element_length(
470 &mut self,
471 element_length: usize,
472 num_rows_to_copy: usize,
473 ) -> Result<(), Error>
474 where
475 TextColumn<C>: HasDataType + CData,
476 {
477 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
478 // in order to hold it. This invalidates the pointers previously bound to the statement. So
479 // we rebind them.
480 if element_length > self.column.max_len() {
481 let new_max_str_len = element_length;
482 self.column
483 .resize_max_str(new_max_str_len, num_rows_to_copy);
484 unsafe {
485 self.stmt
486 .bind_input_parameter(self.parameter_index, self.column)
487 .into_result(&self.stmt)?
488 }
489 }
490 Ok(())
491 }
492
493 /// Can be used to set a value at a specific row index without performing a memcopy on an input
494 /// slice and instead provides direct access to the underlying buffer.
495 ///
496 /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
497 /// be more convenient. This method is very useful if you want to `write!` a string value to the
498 /// buffer and the binary (**!**) length of the formatted string is known upfront.
499 ///
500 /// # Example: Write timestamp to text column.
501 ///
502 /// ```
503 /// use odbc_api::buffers::TextColumnSliceMut;
504 /// use std::io::Write;
505 ///
506 /// /// Writes times formatted as hh::mm::ss.fff
507 /// fn write_time(
508 /// col: &mut TextColumnSliceMut<u8>,
509 /// index: usize,
510 /// hours: u8,
511 /// minutes: u8,
512 /// seconds: u8,
513 /// milliseconds: u16)
514 /// {
515 /// write!(
516 /// col.set_mut(index, 12),
517 /// "{:02}:{:02}:{:02}.{:03}",
518 /// hours, minutes, seconds, milliseconds
519 /// ).unwrap();
520 /// }
521 /// ```
522 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
523 self.column.set_mut(index, length)
524 }
525}
526
527/// Iterator over a text column. See [`TextColumnView::iter`]
528#[derive(Debug)]
529pub struct TextColumnIt<'c, C> {
530 pos: usize,
531 num_rows: usize,
532 col: &'c TextColumn<C>,
533}
534
535impl<'c, C> TextColumnIt<'c, C> {
536 fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
537 if self.pos == self.num_rows {
538 None
539 } else {
540 let ret = Some(self.col.value_at(self.pos));
541 self.pos += 1;
542 ret
543 }
544 }
545}
546
547impl<'c> Iterator for TextColumnIt<'c, u8> {
548 type Item = Option<&'c [u8]>;
549
550 fn next(&mut self) -> Option<Self::Item> {
551 self.next_impl()
552 }
553
554 fn size_hint(&self) -> (usize, Option<usize>) {
555 let len = self.num_rows - self.pos;
556 (len, Some(len))
557 }
558}
559
560impl ExactSizeIterator for TextColumnIt<'_, u8> {}
561
562impl<'c> Iterator for TextColumnIt<'c, u16> {
563 type Item = Option<&'c U16Str>;
564
565 fn next(&mut self) -> Option<Self::Item> {
566 self.next_impl().map(|opt| opt.map(U16Str::from_slice))
567 }
568
569 fn size_hint(&self) -> (usize, Option<usize>) {
570 let len = self.num_rows - self.pos;
571 (len, Some(len))
572 }
573}
574
575impl ExactSizeIterator for TextColumnIt<'_, u16> {}
576
577unsafe impl CData for CharColumn {
578 fn cdata_type(&self) -> CDataType {
579 CDataType::Char
580 }
581
582 fn indicator_ptr(&self) -> *const isize {
583 self.indicators.as_ptr()
584 }
585
586 fn value_ptr(&self) -> *const c_void {
587 self.values.as_ptr() as *const c_void
588 }
589
590 fn buffer_length(&self) -> isize {
591 (self.max_str_len + 1).try_into().unwrap()
592 }
593}
594
595unsafe impl CDataMut for CharColumn {
596 fn mut_indicator_ptr(&mut self) -> *mut isize {
597 self.indicators.as_mut_ptr()
598 }
599
600 fn mut_value_ptr(&mut self) -> *mut c_void {
601 self.values.as_mut_ptr() as *mut c_void
602 }
603}
604
605impl HasDataType for CharColumn {
606 fn data_type(&self) -> DataType {
607 DataType::Varchar {
608 length: NonZeroUsize::new(self.max_str_len),
609 }
610 }
611}
612
613unsafe impl CData for WCharColumn {
614 fn cdata_type(&self) -> CDataType {
615 CDataType::WChar
616 }
617
618 fn indicator_ptr(&self) -> *const isize {
619 self.indicators.as_ptr()
620 }
621
622 fn value_ptr(&self) -> *const c_void {
623 self.values.as_ptr() as *const c_void
624 }
625
626 fn buffer_length(&self) -> isize {
627 ((self.max_str_len + 1) * 2).try_into().unwrap()
628 }
629}
630
631unsafe impl CDataMut for WCharColumn {
632 fn mut_indicator_ptr(&mut self) -> *mut isize {
633 self.indicators.as_mut_ptr()
634 }
635
636 fn mut_value_ptr(&mut self) -> *mut c_void {
637 self.values.as_mut_ptr() as *mut c_void
638 }
639}
640
641impl HasDataType for WCharColumn {
642 fn data_type(&self) -> DataType {
643 if self.max_str_len <= ASSUMED_MAX_LENGTH_OF_W_VARCHAR {
644 DataType::WVarchar {
645 length: NonZeroUsize::new(self.max_str_len),
646 }
647 } else {
648 DataType::WLongVarchar {
649 length: NonZeroUsize::new(self.max_str_len),
650 }
651 }
652 }
653}