odbc_api/buffers/text_column.rs
1use crate::{
2 DataType, Error,
3 columnar_bulk_inserter::BoundInputSlice,
4 error::TooLargeBufferSize,
5 handles::{
6 ASSUMED_MAX_LENGTH_OF_W_VARCHAR, CData, CDataMut, HasDataType, Statement, StatementRef,
7 },
8};
9
10use super::{ColumnBuffer, Indicator};
11
12use log::debug;
13use odbc_sys::{CDataType, NULL_DATA};
14use std::{cmp::min, ffi::c_void, mem::size_of, num::NonZeroUsize, panic};
15use widestring::U16Str;
16
17/// A column buffer for character data. The actual encoding used may depend on your system locale.
18pub type CharColumn = TextColumn<u8>;
19
20/// This buffer uses wide characters which implies UTF-16 encoding. UTF-8 encoding is preferable for
21/// most applications, but contrary to its sibling [`crate::buffers::CharColumn`] this buffer types
22/// implied encoding does not depend on the system locale.
23pub type WCharColumn = TextColumn<u16>;
24
25/// A buffer intended to be bound to a column of a cursor. Elements of the buffer will contain a
26/// variable amount of characters up to a maximum string length. Since most SQL types have a string
27/// representation this buffer can be bound to a column of almost any type, ODBC driver and driver
28/// manager should take care of the conversion. Since elements of this type have variable length an
29/// indicator buffer needs to be bound, whether the column is nullable or not, and therefore does
30/// not matter for this buffer.
31///
32/// Character type `C` is intended to be either `u8` or `u16`.
33#[derive(Debug)]
34pub struct TextColumn<C> {
35 /// Maximum text length without terminating zero.
36 max_str_len: usize,
37 values: Vec<C>,
38 /// Elements in this buffer are either `NULL_DATA` or hold the length of the element in value
39 /// with the same index. Please note that this value may be larger than `max_str_len` if the
40 /// text has been truncated.
41 indicators: Vec<isize>,
42}
43
44impl<C> TextColumn<C> {
45 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
46 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
47 /// one in order to make space for the null terminating zero at the end of strings. Uses a
48 /// fallible allocation for creating the buffer. In applications often the `max_str_len` size
49 /// of the buffer, might be directly inspired by the maximum size of the type, as reported, by
50 /// ODBC. Which might get exceedingly large for types like VARCHAR(MAX)
51 pub fn try_new(batch_size: usize, max_str_len: usize) -> Result<Self, TooLargeBufferSize>
52 where
53 C: Default + Copy,
54 {
55 // Element size is +1 to account for terminating zero
56 let element_size = max_str_len + 1;
57 let len = element_size * batch_size;
58 let mut values = Vec::new();
59 values
60 .try_reserve_exact(len)
61 .map_err(|_| TooLargeBufferSize {
62 num_elements: batch_size,
63 // We want the element size in bytes
64 element_size: element_size * size_of::<C>(),
65 })?;
66 values.resize(len, C::default());
67 Ok(TextColumn {
68 max_str_len,
69 values,
70 indicators: vec![0; batch_size],
71 })
72 }
73
74 /// This will allocate a value and indicator buffer for `batch_size` elements. Each value may
75 /// have a maximum length of `max_str_len`. This implies that `max_str_len` is increased by
76 /// one in order to make space for the null terminating zero at the end of strings. All
77 /// indicators are set to [`crate::sys::NULL_DATA`] by default.
78 pub fn new(batch_size: usize, max_str_len: usize) -> Self
79 where
80 C: Default + Copy,
81 {
82 // Element size is +1 to account for terminating zero
83 let element_size = max_str_len + 1;
84 let len = element_size * batch_size;
85 let mut values = Vec::new();
86 values.reserve_exact(len);
87 values.resize(len, C::default());
88 TextColumn {
89 max_str_len,
90 values,
91 indicators: vec![NULL_DATA; batch_size],
92 }
93 }
94
95 /// Bytes of string at the specified position. Includes interior nuls, but excludes the
96 /// terminating nul.
97 ///
98 /// The column buffer does not know how many elements were in the last row group, and therefore
99 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
100 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
101 /// equal to the maximum number of elements in the buffer.
102 pub fn value_at(&self, row_index: usize) -> Option<&[C]> {
103 self.content_length_at(row_index).map(|length| {
104 let offset = row_index * (self.max_str_len + 1);
105 &self.values[offset..offset + length]
106 })
107 }
108
109 /// Maximum length of elements
110 pub fn max_len(&self) -> usize {
111 self.max_str_len
112 }
113
114 /// Indicator value at the specified position. Useful to detect truncation of data.
115 ///
116 /// The column buffer does not know how many elements were in the last row group, and therefore
117 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
118 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
119 /// equal to the maximum number of elements in the buffer.
120 pub fn indicator_at(&self, row_index: usize) -> Indicator {
121 Indicator::from_isize(self.indicators[row_index])
122 }
123
124 /// Length of value at the specified position. This is different from an indicator as it refers
125 /// to the length of the value in the buffer, not to the length of the value in the datasource.
126 /// The two things are different for truncated values.
127 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
128 match self.indicator_at(row_index) {
129 Indicator::Null => None,
130 // Seen no total in the wild then binding shorter buffer to fixed sized CHAR in MSSQL.
131 Indicator::NoTotal => Some(self.max_str_len),
132 Indicator::Length(length_in_bytes) => {
133 let length_in_chars = length_in_bytes / size_of::<C>();
134 let length = min(self.max_str_len, length_in_chars);
135 Some(length)
136 }
137 }
138 }
139
140 /// Finds an indiactor larger than the maximum element size in the range [0, num_rows).
141 ///
142 /// After fetching data we may want to know if any value has been truncated due to the buffer
143 /// not being able to hold elements of that size. This method checks the indicator buffer
144 /// element wise.
145 pub fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
146 let max_bin_length = self.max_str_len * size_of::<C>();
147 self.indicators
148 .iter()
149 .copied()
150 .take(num_rows)
151 .find_map(|indicator| {
152 let indicator = Indicator::from_isize(indicator);
153 indicator.is_truncated(max_bin_length).then_some(indicator)
154 })
155 }
156
157 /// Changes the maximum string length the buffer can hold. This operation is useful if you find
158 /// an unexpected large input string during insertion.
159 ///
160 /// This is however costly, as not only does the new buffer have to be allocated, but all values
161 /// have to copied from the old to the new buffer.
162 ///
163 /// This method could also be used to reduce the maximum string length, which would truncate
164 /// strings in the process.
165 ///
166 /// This method does not adjust indicator buffers as these might hold values larger than the
167 /// maximum string length.
168 ///
169 /// # Parameters
170 ///
171 /// * `new_max_str_len`: New maximum string length without terminating zero.
172 /// * `num_rows`: Number of valid rows currently stored in this buffer.
173 pub fn resize_max_str(&mut self, new_max_str_len: usize, num_rows: usize)
174 where
175 C: Default + Copy,
176 {
177 debug!(
178 "Rebinding text column buffer with {} elements. Maximum string length {} => {}",
179 num_rows, self.max_str_len, new_max_str_len
180 );
181
182 let batch_size = self.indicators.len();
183 // Allocate a new buffer large enough to hold a batch of strings with maximum length.
184 let mut new_values = vec![C::default(); (new_max_str_len + 1) * batch_size];
185 // Copy values from old to new buffer.
186 let max_copy_length = min(self.max_str_len, new_max_str_len);
187 for ((&indicator, old_value), new_value) in self
188 .indicators
189 .iter()
190 .zip(self.values.chunks_exact_mut(self.max_str_len + 1))
191 .zip(new_values.chunks_exact_mut(new_max_str_len + 1))
192 .take(num_rows)
193 {
194 match Indicator::from_isize(indicator) {
195 Indicator::Null => (),
196 Indicator::NoTotal => {
197 // There is no good choice here in case we are expanding the buffer. Since
198 // NO_TOTAL indicates that we use the entire buffer, but in truth it would now
199 // be padded with 0. I currently cannot think of any use case there it would
200 // matter.
201 new_value[..max_copy_length].clone_from_slice(&old_value[..max_copy_length]);
202 }
203 Indicator::Length(num_bytes_len) => {
204 let num_bytes_to_copy = min(num_bytes_len / size_of::<C>(), max_copy_length);
205 new_value[..num_bytes_to_copy].copy_from_slice(&old_value[..num_bytes_to_copy]);
206 }
207 }
208 }
209 self.values = new_values;
210 self.max_str_len = new_max_str_len;
211 }
212
213 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
214 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
215 /// allowed element length. `input` must be specified without the terminating zero.
216 pub fn set_value(&mut self, index: usize, input: Option<&[C]>)
217 where
218 C: Default + Copy,
219 {
220 if let Some(input) = input {
221 self.set_mut(index, input.len()).copy_from_slice(input);
222 } else {
223 self.indicators[index] = NULL_DATA;
224 }
225 }
226
227 /// Can be used to set a value at a specific row index without performing a memcopy on an input
228 /// slice and instead provides direct access to the underlying buffer.
229 ///
230 /// In situations there the memcopy can not be avoided anyway [`Self::set_value`] is likely to
231 /// be more convenient. This method is very useful if you want to `write!` a string value to the
232 /// buffer and the binary (**!**) length of the formatted string is known upfront.
233 ///
234 /// # Example: Write timestamp to text column.
235 ///
236 /// ```
237 /// use odbc_api::buffers::TextColumn;
238 /// use std::io::Write;
239 ///
240 /// /// Writes times formatted as hh::mm::ss.fff
241 /// fn write_time(
242 /// col: &mut TextColumn<u8>,
243 /// index: usize,
244 /// hours: u8,
245 /// minutes: u8,
246 /// seconds: u8,
247 /// milliseconds: u16)
248 /// {
249 /// write!(
250 /// col.set_mut(index, 12),
251 /// "{:02}:{:02}:{:02}.{:03}",
252 /// hours, minutes, seconds, milliseconds
253 /// ).unwrap();
254 /// }
255 /// ```
256 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C]
257 where
258 C: Default,
259 {
260 if length > self.max_str_len {
261 panic!(
262 "Tried to insert a value into a text buffer which is larger than the maximum \
263 allowed string length for the buffer."
264 );
265 }
266 self.indicators[index] = (length * size_of::<C>()).try_into().unwrap();
267 let start = (self.max_str_len + 1) * index;
268 let end = start + length;
269 // Let's insert a terminating zero at the end to be on the safe side, in case the ODBC
270 // driver would not care about the value in the index buffer and only look for the
271 // terminating zero.
272 self.values[end] = C::default();
273 &mut self.values[start..end]
274 }
275
276 /// Fills the column with NULL, between From and To
277 pub fn fill_null(&mut self, from: usize, to: usize) {
278 for index in from..to {
279 self.indicators[index] = NULL_DATA;
280 }
281 }
282
283 /// Provides access to the raw underlying value buffer. Normal applications should have little
284 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
285 /// from the ODBC in memory representation into other kinds of buffers.
286 ///
287 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
288 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
289 /// terminating zero at the end of each string. For the actual value length call
290 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
291 pub fn raw_value_buffer(&self, num_valid_rows: usize) -> &[C] {
292 &self.values[..(self.max_str_len + 1) * num_valid_rows]
293 }
294
295 /// The maximum number of rows the TextColumn can hold.
296 pub fn row_capacity(&self) -> usize {
297 self.values.len()
298 }
299}
300
301impl WCharColumn {
302 /// The string slice at the specified position as `U16Str`. Includes interior nuls, but excludes
303 /// the terminating nul.
304 ///
305 /// # Safety
306 ///
307 /// The column buffer does not know how many elements were in the last row group, and therefore
308 /// can not guarantee the accessed element to be valid and in a defined state. It also can not
309 /// panic on accessing an undefined element. It will panic however if `row_index` is larger or
310 /// equal to the maximum number of elements in the buffer.
311 pub unsafe fn ustr_at(&self, row_index: usize) -> Option<&U16Str> {
312 self.value_at(row_index).map(U16Str::from_slice)
313 }
314}
315
316unsafe impl<C: 'static> ColumnBuffer for TextColumn<C>
317where
318 TextColumn<C>: CDataMut + HasDataType,
319{
320 type View<'a> = TextColumnView<'a, C>;
321
322 fn view(&self, valid_rows: usize) -> TextColumnView<'_, C> {
323 TextColumnView {
324 num_rows: valid_rows,
325 col: self,
326 }
327 }
328
329 fn fill_default(&mut self, from: usize, to: usize) {
330 self.fill_null(from, to)
331 }
332
333 /// Maximum number of text strings this column may hold.
334 fn capacity(&self) -> usize {
335 self.indicators.len()
336 }
337
338 fn has_truncated_values(&self, num_rows: usize) -> Option<Indicator> {
339 let max_bin_length = self.max_str_len * size_of::<C>();
340 self.indicators
341 .iter()
342 .copied()
343 .take(num_rows)
344 .find_map(|indicator| {
345 let indicator = Indicator::from_isize(indicator);
346 indicator.is_truncated(max_bin_length).then_some(indicator)
347 })
348 }
349}
350
351/// Allows read only access to the valid part of a text column.
352///
353/// You may ask, why is this type required, should we not just be able to use `&TextColumn`? The
354/// problem with `TextColumn` is, that it is a buffer, but it has no idea how many of its members
355/// are actually valid, and have been returned with the last row group of the the result set. That
356/// number is maintained on the level of the entire column buffer. So a text column knows the number
357/// of valid rows, in addition to holding a reference to the buffer, in order to guarantee, that
358/// every element acccessed through it, is valid.
359#[derive(Debug, Clone, Copy)]
360pub struct TextColumnView<'c, C> {
361 num_rows: usize,
362 col: &'c TextColumn<C>,
363}
364
365impl<'c, C> TextColumnView<'c, C> {
366 /// The number of valid elements in the text column.
367 pub fn len(&self) -> usize {
368 self.num_rows
369 }
370
371 /// True if, and only if there are no valid rows in the column buffer.
372 pub fn is_empty(&self) -> bool {
373 self.num_rows == 0
374 }
375
376 /// Slice of text at the specified row index without terminating zero.
377 pub fn get(&self, index: usize) -> Option<&'c [C]> {
378 self.col.value_at(index)
379 }
380
381 /// Iterator over the valid elements of the text buffer
382 pub fn iter(&self) -> TextColumnIt<'c, C> {
383 TextColumnIt {
384 pos: 0,
385 num_rows: self.num_rows,
386 col: self.col,
387 }
388 }
389
390 /// Length of value at the specified position. This is different from an indicator as it refers
391 /// to the length of the value in the buffer, not to the length of the value in the datasource.
392 /// The two things are different for truncated values.
393 pub fn content_length_at(&self, row_index: usize) -> Option<usize> {
394 if row_index >= self.num_rows {
395 panic!("Row index points beyond the range of valid values.")
396 }
397 self.col.content_length_at(row_index)
398 }
399
400 /// Provides access to the raw underlying value buffer. Normal applications should have little
401 /// reason to call this method. Yet it may be useful for writing bindings which copy directly
402 /// from the ODBC in memory representation into other kinds of buffers.
403 ///
404 /// The buffer contains the bytes for every non null valid element, padded to the maximum string
405 /// length. The content of the padding bytes is undefined. Usually ODBC drivers write a
406 /// terminating zero at the end of each string. For the actual value length call
407 /// [`Self::content_length_at`]. Any element starts at index * ([`Self::max_len`] + 1).
408 pub fn raw_value_buffer(&self) -> &'c [C] {
409 self.col.raw_value_buffer(self.num_rows)
410 }
411
412 pub fn max_len(&self) -> usize {
413 self.col.max_len()
414 }
415
416 /// `Some` if any value is truncated.
417 ///
418 /// After fetching data we may want to know if any value has been truncated due to the buffer
419 /// not being able to hold elements of that size. This method checks the indicator buffer
420 /// element wise.
421 pub fn has_truncated_values(&self) -> Option<Indicator> {
422 self.col.has_truncated_values(self.num_rows)
423 }
424}
425
426unsafe impl<'a, C: 'static> BoundInputSlice<'a> for TextColumn<C> {
427 type SliceMut = TextColumnSliceMut<'a, C>;
428
429 unsafe fn as_view_mut(
430 &'a mut self,
431 parameter_index: u16,
432 stmt: StatementRef<'a>,
433 ) -> Self::SliceMut {
434 TextColumnSliceMut {
435 column: self,
436 stmt,
437 parameter_index,
438 }
439 }
440}
441
442/// A view to a mutable array parameter text buffer, which allows for filling the buffer with
443/// values.
444pub struct TextColumnSliceMut<'a, C> {
445 column: &'a mut TextColumn<C>,
446 // Needed to rebind the column in case of resize
447 stmt: StatementRef<'a>,
448 // Also needed to rebind the column in case of resize
449 parameter_index: u16,
450}
451
452impl<C> TextColumnSliceMut<'_, C>
453where
454 C: Default + Copy,
455{
456 /// Sets the value of the buffer at index at Null or the specified binary Text. This method will
457 /// panic on out of bounds index, or if input holds a text which is larger than the maximum
458 /// allowed element length. `element` must be specified without the terminating zero.
459 pub fn set_cell(&mut self, row_index: usize, element: Option<&[C]>) {
460 self.column.set_value(row_index, element)
461 }
462
463 /// Ensures that the buffer is large enough to hold elements of `element_length`. Does nothing
464 /// if the buffer is already large enough. Otherwise it will reallocate and rebind the buffer.
465 /// The first `num_rows_to_copy` will be copied from the old value buffer to the new
466 /// one. This makes this an extremely expensive operation.
467 pub fn ensure_max_element_length(
468 &mut self,
469 element_length: usize,
470 num_rows_to_copy: usize,
471 ) -> Result<(), Error>
472 where
473 TextColumn<C>: HasDataType + CData,
474 {
475 // Column buffer is not large enough to hold the element. We must allocate a larger buffer
476 // in order to hold it. This invalidates the pointers previously bound to the statement. So
477 // we rebind them.
478 if element_length > self.column.max_len() {
479 let new_max_str_len = element_length;
480 self.column
481 .resize_max_str(new_max_str_len, num_rows_to_copy);
482 unsafe {
483 self.stmt
484 .bind_input_parameter(self.parameter_index, self.column)
485 .into_result(&self.stmt)?
486 }
487 }
488 Ok(())
489 }
490
491 /// Can be used to set a value at a specific row index without performing a memcopy on an input
492 /// slice and instead provides direct access to the underlying buffer.
493 ///
494 /// In situations there the memcopy can not be avoided anyway [`Self::set_cell`] is likely to
495 /// be more convenient. This method is very useful if you want to `write!` a string value to the
496 /// buffer and the binary (**!**) length of the formatted string is known upfront.
497 ///
498 /// # Example: Write timestamp to text column.
499 ///
500 /// ```
501 /// use odbc_api::buffers::TextColumnSliceMut;
502 /// use std::io::Write;
503 ///
504 /// /// Writes times formatted as hh::mm::ss.fff
505 /// fn write_time(
506 /// col: &mut TextColumnSliceMut<u8>,
507 /// index: usize,
508 /// hours: u8,
509 /// minutes: u8,
510 /// seconds: u8,
511 /// milliseconds: u16)
512 /// {
513 /// write!(
514 /// col.set_mut(index, 12),
515 /// "{:02}:{:02}:{:02}.{:03}",
516 /// hours, minutes, seconds, milliseconds
517 /// ).unwrap();
518 /// }
519 /// ```
520 pub fn set_mut(&mut self, index: usize, length: usize) -> &mut [C] {
521 self.column.set_mut(index, length)
522 }
523}
524
525/// Iterator over a text column. See [`TextColumnView::iter`]
526#[derive(Debug)]
527pub struct TextColumnIt<'c, C> {
528 pos: usize,
529 num_rows: usize,
530 col: &'c TextColumn<C>,
531}
532
533impl<'c, C> TextColumnIt<'c, C> {
534 fn next_impl(&mut self) -> Option<Option<&'c [C]>> {
535 if self.pos == self.num_rows {
536 None
537 } else {
538 let ret = Some(self.col.value_at(self.pos));
539 self.pos += 1;
540 ret
541 }
542 }
543}
544
545impl<'c> Iterator for TextColumnIt<'c, u8> {
546 type Item = Option<&'c [u8]>;
547
548 fn next(&mut self) -> Option<Self::Item> {
549 self.next_impl()
550 }
551
552 fn size_hint(&self) -> (usize, Option<usize>) {
553 let len = self.num_rows - self.pos;
554 (len, Some(len))
555 }
556}
557
558impl ExactSizeIterator for TextColumnIt<'_, u8> {}
559
560impl<'c> Iterator for TextColumnIt<'c, u16> {
561 type Item = Option<&'c U16Str>;
562
563 fn next(&mut self) -> Option<Self::Item> {
564 self.next_impl().map(|opt| opt.map(U16Str::from_slice))
565 }
566
567 fn size_hint(&self) -> (usize, Option<usize>) {
568 let len = self.num_rows - self.pos;
569 (len, Some(len))
570 }
571}
572
573impl ExactSizeIterator for TextColumnIt<'_, u16> {}
574
575unsafe impl CData for CharColumn {
576 fn cdata_type(&self) -> CDataType {
577 CDataType::Char
578 }
579
580 fn indicator_ptr(&self) -> *const isize {
581 self.indicators.as_ptr()
582 }
583
584 fn value_ptr(&self) -> *const c_void {
585 self.values.as_ptr() as *const c_void
586 }
587
588 fn buffer_length(&self) -> isize {
589 (self.max_str_len + 1).try_into().unwrap()
590 }
591}
592
593unsafe impl CDataMut for CharColumn {
594 fn mut_indicator_ptr(&mut self) -> *mut isize {
595 self.indicators.as_mut_ptr()
596 }
597
598 fn mut_value_ptr(&mut self) -> *mut c_void {
599 self.values.as_mut_ptr() as *mut c_void
600 }
601}
602
603impl HasDataType for CharColumn {
604 fn data_type(&self) -> DataType {
605 DataType::Varchar {
606 length: NonZeroUsize::new(self.max_str_len),
607 }
608 }
609}
610
611unsafe impl CData for WCharColumn {
612 fn cdata_type(&self) -> CDataType {
613 CDataType::WChar
614 }
615
616 fn indicator_ptr(&self) -> *const isize {
617 self.indicators.as_ptr()
618 }
619
620 fn value_ptr(&self) -> *const c_void {
621 self.values.as_ptr() as *const c_void
622 }
623
624 fn buffer_length(&self) -> isize {
625 ((self.max_str_len + 1) * 2).try_into().unwrap()
626 }
627}
628
629unsafe impl CDataMut for WCharColumn {
630 fn mut_indicator_ptr(&mut self) -> *mut isize {
631 self.indicators.as_mut_ptr()
632 }
633
634 fn mut_value_ptr(&mut self) -> *mut c_void {
635 self.values.as_mut_ptr() as *mut c_void
636 }
637}
638
639impl HasDataType for WCharColumn {
640 fn data_type(&self) -> DataType {
641 if self.max_str_len <= ASSUMED_MAX_LENGTH_OF_W_VARCHAR {
642 DataType::WVarchar {
643 length: NonZeroUsize::new(self.max_str_len),
644 }
645 } else {
646 DataType::WLongVarchar {
647 length: NonZeroUsize::new(self.max_str_len),
648 }
649 }
650 }
651}