clickhouse_native_client/column/
string.rs1use super::{
30 Column,
31 ColumnRef,
32};
33use crate::{
34 io::buffer_utils,
35 types::Type,
36 Error,
37 Result,
38};
39use bytes::{
40 Buf,
41 BufMut,
42 BytesMut,
43};
44use std::sync::Arc;
45
46pub struct ColumnFixedString {
52 type_: Type,
53 string_size: usize,
54 data: Vec<u8>,
55}
56
57impl ColumnFixedString {
58 pub fn new(type_: Type) -> Self {
61 let string_size = match &type_ {
62 Type::FixedString { size } => *size,
63 _ => panic!("Expected FixedString type"),
64 };
65
66 Self { type_, string_size, data: Vec::new() }
67 }
68
69 pub fn with_capacity(type_: Type, capacity: usize) -> Self {
72 let string_size = match &type_ {
73 Type::FixedString { size } => *size,
74 _ => panic!("Expected FixedString type"),
75 };
76
77 Self {
78 type_,
79 string_size,
80 data: Vec::with_capacity(string_size * capacity),
81 }
82 }
83
84 pub fn with_data(mut self, data: Vec<String>) -> Self {
86 for s in data {
87 self.append(s);
88 }
89 self
90 }
91
92 pub fn append(&mut self, s: String) {
94 let bytes = s.as_bytes();
95
96 if bytes.len() > self.string_size {
97 panic!(
98 "String too long for FixedString({}): got {} bytes",
99 self.string_size,
100 bytes.len()
101 );
102 }
103
104 self.data.extend_from_slice(bytes);
106
107 if bytes.len() < self.string_size {
109 self.data
110 .resize(self.data.len() + (self.string_size - bytes.len()), 0);
111 }
112 }
113
114 pub fn get(&self, index: usize) -> Option<String> {
116 if index >= self.size() {
117 return None;
118 }
119
120 let start = index * self.string_size;
121 let end = start + self.string_size;
122 let bytes = &self.data[start..end];
123
124 let trimmed =
126 bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
127 Some(String::from_utf8_lossy(&bytes[..trimmed]).to_string())
128 }
129
130 pub fn at(&self, index: usize) -> String {
132 self.get(index).unwrap()
133 }
134
135 pub fn len(&self) -> usize {
137 self.size()
138 }
139
140 pub fn is_empty(&self) -> bool {
142 self.data.is_empty()
143 }
144
145 pub fn fixed_size(&self) -> usize {
147 self.string_size
148 }
149}
150
151impl Column for ColumnFixedString {
152 fn column_type(&self) -> &Type {
153 &self.type_
154 }
155
156 fn size(&self) -> usize {
157 self.data.len() / self.string_size
158 }
159
160 fn clear(&mut self) {
161 self.data.clear();
162 }
163
164 fn reserve(&mut self, new_cap: usize) {
165 self.data.reserve(self.string_size * new_cap);
166 }
167
168 fn append_column(&mut self, other: ColumnRef) -> Result<()> {
169 let other = other
170 .as_any()
171 .downcast_ref::<ColumnFixedString>()
172 .ok_or_else(|| Error::TypeMismatch {
173 expected: self.type_.name(),
174 actual: other.column_type().name(),
175 })?;
176
177 if self.string_size != other.string_size {
178 return Err(Error::TypeMismatch {
179 expected: format!("FixedString({})", self.string_size),
180 actual: format!("FixedString({})", other.string_size),
181 });
182 }
183
184 self.data.extend_from_slice(&other.data);
185 Ok(())
186 }
187
188 fn load_from_buffer(
189 &mut self,
190 buffer: &mut &[u8],
191 rows: usize,
192 ) -> Result<()> {
193 let total_bytes = self.string_size * rows;
194
195 if buffer.len() < total_bytes {
196 return Err(Error::Protocol(format!(
197 "Not enough data for {} FixedString({}) values: need {}, have {}",
198 rows, self.string_size, total_bytes, buffer.len()
199 )));
200 }
201
202 self.data.extend_from_slice(&buffer[..total_bytes]);
203 buffer.advance(total_bytes);
204 Ok(())
205 }
206
207 fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
208 buffer.put_slice(&self.data);
209 Ok(())
210 }
211
212 fn clone_empty(&self) -> ColumnRef {
213 Arc::new(ColumnFixedString::new(self.type_.clone()))
214 }
215
216 fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
217 if begin + len > self.size() {
218 return Err(Error::InvalidArgument(format!(
219 "Slice out of bounds: begin={}, len={}, size={}",
220 begin,
221 len,
222 self.size()
223 )));
224 }
225
226 let start = begin * self.string_size;
227 let end = start + len * self.string_size;
228
229 let mut result = ColumnFixedString::new(self.type_.clone());
230 result.data = self.data[start..end].to_vec();
231
232 Ok(Arc::new(result))
233 }
234
235 fn as_any(&self) -> &dyn std::any::Any {
236 self
237 }
238
239 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
240 self
241 }
242}
243
244pub struct ColumnString {
246 type_: Type,
247 data: Vec<String>,
248}
249
250impl ColumnString {
251 pub fn new(type_: Type) -> Self {
253 Self { type_, data: Vec::new() }
254 }
255
256 pub fn with_capacity(type_: Type, capacity: usize) -> Self {
259 Self { type_, data: Vec::with_capacity(capacity) }
260 }
261
262 pub fn from_vec(type_: Type, data: Vec<String>) -> Self {
264 Self { type_, data }
265 }
266
267 pub fn with_data(mut self, data: Vec<String>) -> Self {
269 self.data = data;
270 self
271 }
272
273 pub fn append(&mut self, s: impl Into<String>) {
275 self.data.push(s.into());
276 }
277
278 pub fn get(&self, index: usize) -> Option<&str> {
281 self.data.get(index).map(|s| s.as_str())
282 }
283
284 pub fn at(&self, index: usize) -> String {
286 self.data[index].clone()
287 }
288
289 pub fn len(&self) -> usize {
291 self.data.len()
292 }
293
294 pub fn is_empty(&self) -> bool {
296 self.data.is_empty()
297 }
298
299 pub fn iter(&self) -> impl Iterator<Item = &str> {
301 self.data.iter().map(|s| s.as_str())
302 }
303}
304
305impl Default for ColumnString {
306 fn default() -> Self {
307 Self::new(Type::string())
308 }
309}
310
311impl Column for ColumnString {
312 fn column_type(&self) -> &Type {
313 &self.type_
314 }
315
316 fn size(&self) -> usize {
317 self.data.len()
318 }
319
320 fn clear(&mut self) {
321 self.data.clear();
322 }
323
324 fn reserve(&mut self, new_cap: usize) {
325 self.data.reserve(new_cap);
326 }
327
328 fn append_column(&mut self, other: ColumnRef) -> Result<()> {
329 let other = other.as_any().downcast_ref::<ColumnString>().ok_or_else(
330 || Error::TypeMismatch {
331 expected: self.type_.name(),
332 actual: other.column_type().name(),
333 },
334 )?;
335
336 self.data.extend(other.data.iter().cloned());
337 Ok(())
338 }
339
340 fn load_from_buffer(
341 &mut self,
342 buffer: &mut &[u8],
343 rows: usize,
344 ) -> Result<()> {
345 self.data.reserve(rows);
346
347 for _ in 0..rows {
348 let len = buffer_utils::read_varint(buffer)? as usize;
350
351 if buffer.len() < len {
352 return Err(Error::Protocol(format!(
353 "Not enough data for string: need {}, have {}",
354 len,
355 buffer.len()
356 )));
357 }
358
359 let string_data = &buffer[..len];
361 let s = String::from_utf8(string_data.to_vec()).map_err(|e| {
362 Error::Protocol(format!("Invalid UTF-8 in string: {}", e))
363 })?;
364
365 self.data.push(s);
366 buffer.advance(len);
367 }
368
369 Ok(())
370 }
371
372 fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
373 for s in &self.data {
374 buffer_utils::write_varint(buffer, s.len() as u64);
376 buffer.put_slice(s.as_bytes());
378 }
379 Ok(())
380 }
381
382 fn clone_empty(&self) -> ColumnRef {
383 Arc::new(ColumnString::new(self.type_.clone()))
384 }
385
386 fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
387 if begin + len > self.data.len() {
388 return Err(Error::InvalidArgument(format!(
389 "Slice out of bounds: begin={}, len={}, size={}",
390 begin,
391 len,
392 self.data.len()
393 )));
394 }
395
396 let sliced = self.data[begin..begin + len].to_vec();
397 Ok(Arc::new(ColumnString::from_vec(self.type_.clone(), sliced)))
398 }
399
400 fn as_any(&self) -> &dyn std::any::Any {
401 self
402 }
403
404 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
405 self
406 }
407}
408
409#[cfg(test)]
412#[cfg_attr(coverage_nightly, coverage(off))]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_fixed_string_creation() {
418 let col = ColumnFixedString::new(Type::fixed_string(10));
419 assert_eq!(col.size(), 0);
420 assert_eq!(col.fixed_size(), 10);
421 }
422
423 #[test]
424 fn test_fixed_string_append() {
425 let mut col = ColumnFixedString::new(Type::fixed_string(10));
426 col.append("hello".to_string());
427 col.append("world".to_string());
428
429 assert_eq!(col.size(), 2);
430 assert_eq!(col.get(0), Some("hello".to_string()));
431 assert_eq!(col.get(1), Some("world".to_string()));
432 }
433
434 #[test]
435 fn test_fixed_string_padding() {
436 let mut col = ColumnFixedString::new(Type::fixed_string(10));
437 col.append("hi".to_string());
438
439 assert_eq!(col.data.len(), 10);
441 assert_eq!(col.get(0), Some("hi".to_string()));
442 }
443
444 #[test]
445 #[should_panic(expected = "String too long")]
446 fn test_fixed_string_too_long() {
447 let mut col = ColumnFixedString::new(Type::fixed_string(5));
448 col.append("too long string".to_string());
449 }
450
451 #[test]
452 fn test_fixed_string_save_load() {
453 let mut col = ColumnFixedString::new(Type::fixed_string(8));
454 col.append("hello".to_string());
455 col.append("world".to_string());
456
457 let mut buffer = BytesMut::new();
458 col.save_to_buffer(&mut buffer).unwrap();
459
460 let mut col2 = ColumnFixedString::new(Type::fixed_string(8));
461 let mut reader = &buffer[..];
462 col2.load_from_buffer(&mut reader, 2).unwrap();
463
464 assert_eq!(col2.size(), 2);
465 assert_eq!(col2.get(0), Some("hello".to_string()));
466 assert_eq!(col2.get(1), Some("world".to_string()));
467 }
468
469 #[test]
470 fn test_string_creation() {
471 let col = ColumnString::new(Type::string());
472 assert_eq!(col.size(), 0);
473 }
474
475 #[test]
476 fn test_string_append() {
477 let mut col = ColumnString::new(Type::string());
478 col.append("hello");
479 col.append("world");
480 col.append(String::from("rust"));
481
482 assert_eq!(col.size(), 3);
483 assert_eq!(col.get(0), Some("hello"));
484 assert_eq!(col.get(1), Some("world"));
485 assert_eq!(col.get(2), Some("rust"));
486 }
487
488 #[test]
489 fn test_string_save_load() {
490 let mut col = ColumnString::new(Type::string());
491 col.append("hello");
492 col.append("мир"); col.append("🦀"); let mut buffer = BytesMut::new();
496 col.save_to_buffer(&mut buffer).unwrap();
497
498 let mut col2 = ColumnString::new(Type::string());
499 let mut reader = &buffer[..];
500 col2.load_from_buffer(&mut reader, 3).unwrap();
501
502 assert_eq!(col2.size(), 3);
503 assert_eq!(col2.get(0), Some("hello"));
504 assert_eq!(col2.get(1), Some("мир"));
505 assert_eq!(col2.get(2), Some("🦀"));
506 }
507
508 #[test]
509 fn test_string_slice() {
510 let mut col = ColumnString::new(Type::string());
511 for i in 0..10 {
512 col.append(format!("str_{}", i));
513 }
514
515 let sliced = col.slice(2, 5).unwrap();
516 let sliced_col =
517 sliced.as_any().downcast_ref::<ColumnString>().unwrap();
518
519 assert_eq!(sliced_col.size(), 5);
520 assert_eq!(sliced_col.get(0), Some("str_2"));
521 assert_eq!(sliced_col.get(4), Some("str_6"));
522 }
523
524 #[test]
525 fn test_varint_encode_decode() {
526 let test_values = vec![0u64, 1, 127, 128, 255, 256, 65535, u64::MAX];
527
528 for value in test_values {
529 let mut buffer = BytesMut::new();
530 buffer_utils::write_varint(&mut buffer, value);
531
532 let mut reader = &buffer[..];
533 let decoded = buffer_utils::read_varint(&mut reader).unwrap();
534
535 assert_eq!(value, decoded);
536 }
537 }
538}