reifydb_engine/function/blob/
utf8.rs1use reifydb_core::value::column::ColumnData;
5use reifydb_type::{OwnedFragment, value::Blob};
6
7use crate::function::{ScalarFunction, ScalarFunctionContext};
8
9pub struct BlobUtf8;
10
11impl BlobUtf8 {
12 pub fn new() -> Self {
13 Self
14 }
15}
16
17impl ScalarFunction for BlobUtf8 {
18 fn scalar(&self, ctx: ScalarFunctionContext) -> crate::Result<ColumnData> {
19 let columns = ctx.columns;
20 let row_count = ctx.row_count;
21
22 if columns.is_empty() {
23 return Ok(ColumnData::blob([]));
24 }
25
26 let column = columns.get(0).unwrap();
27
28 match &column.data() {
29 ColumnData::Utf8 {
30 container,
31 ..
32 } => {
33 let mut result_data = Vec::with_capacity(container.data().len());
34
35 for i in 0..row_count {
36 if container.is_defined(i) {
37 let utf8_str = &container[i];
38 let blob = Blob::from_utf8(OwnedFragment::internal(utf8_str));
39 result_data.push(blob);
40 } else {
41 result_data.push(Blob::empty())
42 }
43 }
44
45 Ok(ColumnData::blob_with_bitvec(result_data, container.bitvec().clone()))
46 }
47 _ => unimplemented!("BlobUtf8 only supports text input"),
48 }
49 }
50}
51
52#[cfg(test)]
53mod tests {
54 use reifydb_core::value::{
55 column::{Column, Columns},
56 container::Utf8Container,
57 };
58 use reifydb_type::{Fragment, value::constraint::bytes::MaxBytes};
59
60 use super::*;
61
62 #[test]
63 fn test_blob_utf8_simple_ascii() {
64 let function = BlobUtf8::new();
65
66 let utf8_data = vec!["Hello!".to_string()];
67 let bitvec = vec![true];
68 let input_column = Column {
69 name: Fragment::borrowed_internal("input"),
70 data: ColumnData::Utf8 {
71 container: Utf8Container::new(utf8_data, bitvec.into()),
72 max_bytes: MaxBytes::MAX,
73 },
74 };
75 let columns = Columns::new(vec![input_column]);
76 let ctx = ScalarFunctionContext {
77 columns: &columns,
78 row_count: 1,
79 };
80
81 let result = function.scalar(ctx).unwrap();
82
83 let ColumnData::Blob {
84 container,
85 ..
86 } = result
87 else {
88 panic!("Expected BLOB column data");
89 };
90 assert_eq!(container.len(), 1);
91 assert!(container.is_defined(0));
92 assert_eq!(container[0].as_bytes(), "Hello!".as_bytes());
93 }
94
95 #[test]
96 fn test_blob_utf8_empty_string() {
97 let function = BlobUtf8::new();
98
99 let utf8_data = vec!["".to_string()];
100 let bitvec = vec![true];
101 let input_column = Column {
102 name: Fragment::borrowed_internal("input"),
103 data: ColumnData::Utf8 {
104 container: Utf8Container::new(utf8_data, bitvec.into()),
105 max_bytes: MaxBytes::MAX,
106 },
107 };
108 let columns = Columns::new(vec![input_column]);
109 let ctx = ScalarFunctionContext {
110 columns: &columns,
111 row_count: 1,
112 };
113
114 let result = function.scalar(ctx).unwrap();
115
116 let ColumnData::Blob {
117 container,
118 ..
119 } = result
120 else {
121 panic!("Expected BLOB column data");
122 };
123 assert_eq!(container.len(), 1);
124 assert!(container.is_defined(0));
125 assert_eq!(container[0].as_bytes(), &[] as &[u8]);
126 }
127
128 #[test]
129 fn test_blob_utf8_unicode_characters() {
130 let function = BlobUtf8::new();
131
132 let utf8_data = vec!["Hello 🌍! Café naïve".to_string()];
134 let bitvec = vec![true];
135 let input_column = Column {
136 name: Fragment::borrowed_internal("input"),
137 data: ColumnData::Utf8 {
138 container: Utf8Container::new(utf8_data, bitvec.into()),
139 max_bytes: MaxBytes::MAX,
140 },
141 };
142 let columns = Columns::new(vec![input_column]);
143 let ctx = ScalarFunctionContext {
144 columns: &columns,
145 row_count: 1,
146 };
147
148 let result = function.scalar(ctx).unwrap();
149
150 let ColumnData::Blob {
151 container,
152 ..
153 } = result
154 else {
155 panic!("Expected BLOB column data");
156 };
157 assert_eq!(container.len(), 1);
158 assert!(container.is_defined(0));
159 assert_eq!(container[0].as_bytes(), "Hello 🌍! Café naïve".as_bytes());
160 }
161
162 #[test]
163 fn test_blob_utf8_multibyte_characters() {
164 let function = BlobUtf8::new();
165
166 let utf8_data = vec!["日本語 中文 한국어 العربية".to_string()];
168 let bitvec = vec![true];
169 let input_column = Column {
170 name: Fragment::borrowed_internal("input"),
171 data: ColumnData::Utf8 {
172 container: Utf8Container::new(utf8_data, bitvec.into()),
173 max_bytes: MaxBytes::MAX,
174 },
175 };
176 let columns = Columns::new(vec![input_column]);
177 let ctx = ScalarFunctionContext {
178 columns: &columns,
179 row_count: 1,
180 };
181
182 let result = function.scalar(ctx).unwrap();
183
184 let ColumnData::Blob {
185 container,
186 ..
187 } = result
188 else {
189 panic!("Expected BLOB column data");
190 };
191 assert_eq!(container.len(), 1);
192 assert!(container.is_defined(0));
193 assert_eq!(container[0].as_bytes(), "日本語 中文 한국어 العربية".as_bytes());
194 }
195
196 #[test]
197 fn test_blob_utf8_special_characters() {
198 let function = BlobUtf8::new();
199
200 let utf8_data = vec!["Line1\nLine2\tTabbed\r\nWindows".to_string()];
202 let bitvec = vec![true];
203 let input_column = Column {
204 name: Fragment::borrowed_internal("input"),
205 data: ColumnData::Utf8 {
206 container: Utf8Container::new(utf8_data, bitvec.into()),
207 max_bytes: MaxBytes::MAX,
208 },
209 };
210 let columns = Columns::new(vec![input_column]);
211 let ctx = ScalarFunctionContext {
212 columns: &columns,
213 row_count: 1,
214 };
215
216 let result = function.scalar(ctx).unwrap();
217
218 let ColumnData::Blob {
219 container,
220 ..
221 } = result
222 else {
223 panic!("Expected BLOB column data");
224 };
225 assert_eq!(container.len(), 1);
226 assert!(container.is_defined(0));
227 assert_eq!(container[0].as_bytes(), "Line1\nLine2\tTabbed\r\nWindows".as_bytes());
228 }
229
230 #[test]
231 fn test_blob_utf8_multiple_rows() {
232 let function = BlobUtf8::new();
233
234 let utf8_data = vec!["First".to_string(), "Second 🚀".to_string(), "Third café".to_string()];
235 let bitvec = vec![true, true, true];
236 let input_column = Column {
237 name: Fragment::borrowed_internal("input"),
238 data: ColumnData::Utf8 {
239 container: Utf8Container::new(utf8_data, bitvec.into()),
240 max_bytes: MaxBytes::MAX,
241 },
242 };
243 let columns = Columns::new(vec![input_column]);
244 let ctx = ScalarFunctionContext {
245 columns: &columns,
246 row_count: 3,
247 };
248
249 let result = function.scalar(ctx).unwrap();
250
251 let ColumnData::Blob {
252 container,
253 ..
254 } = result
255 else {
256 panic!("Expected BLOB column data");
257 };
258 assert_eq!(container.len(), 3);
259 assert!(container.is_defined(0));
260 assert!(container.is_defined(1));
261 assert!(container.is_defined(2));
262
263 assert_eq!(container[0].as_bytes(), "First".as_bytes());
264 assert_eq!(container[1].as_bytes(), "Second 🚀".as_bytes());
265 assert_eq!(container[2].as_bytes(), "Third café".as_bytes());
266 }
267
268 #[test]
269 fn test_blob_utf8_with_null_data() {
270 let function = BlobUtf8::new();
271
272 let utf8_data = vec!["First".to_string(), "".to_string(), "Third".to_string()];
273 let bitvec = vec![true, false, true];
274 let input_column = Column {
275 name: Fragment::borrowed_internal("input"),
276 data: ColumnData::Utf8 {
277 container: Utf8Container::new(utf8_data, bitvec.into()),
278 max_bytes: MaxBytes::MAX,
279 },
280 };
281 let columns = Columns::new(vec![input_column]);
282 let ctx = ScalarFunctionContext {
283 columns: &columns,
284 row_count: 3,
285 };
286
287 let result = function.scalar(ctx).unwrap();
288
289 let ColumnData::Blob {
290 container,
291 ..
292 } = result
293 else {
294 panic!("Expected BLOB column data");
295 };
296 assert_eq!(container.len(), 3);
297 assert!(container.is_defined(0));
298 assert!(!container.is_defined(1));
299 assert!(container.is_defined(2));
300
301 assert_eq!(container[0].as_bytes(), "First".as_bytes());
302 assert_eq!(container[1].as_bytes(), [].as_slice() as &[u8]);
303 assert_eq!(container[2].as_bytes(), "Third".as_bytes());
304 }
305
306 #[test]
307 fn test_blob_utf8_json_data() {
308 let function = BlobUtf8::new();
309
310 let utf8_data = vec![r#"{"name": "John", "age": 30, "city": "New York"}"#.to_string()];
312 let bitvec = vec![true];
313 let input_column = Column {
314 name: Fragment::borrowed_internal("input"),
315 data: ColumnData::Utf8 {
316 container: Utf8Container::new(utf8_data, bitvec.into()),
317 max_bytes: MaxBytes::MAX,
318 },
319 };
320 let columns = Columns::new(vec![input_column]);
321 let ctx = ScalarFunctionContext {
322 columns: &columns,
323 row_count: 1,
324 };
325
326 let result = function.scalar(ctx).unwrap();
327
328 let ColumnData::Blob {
329 container,
330 ..
331 } = result
332 else {
333 panic!("Expected BLOB column data");
334 };
335 assert_eq!(container.len(), 1);
336 assert!(container.is_defined(0));
337 assert_eq!(container[0].as_bytes(), r#"{"name": "John", "age": 30, "city": "New York"}"#.as_bytes());
338 }
339
340 #[test]
341 fn test_blob_utf8_long_string() {
342 let function = BlobUtf8::new();
343
344 let long_string = "A".repeat(1000);
346 let utf8_data = vec![long_string.clone()];
347 let bitvec = vec![true];
348 let input_column = Column {
349 name: Fragment::borrowed_internal("input"),
350 data: ColumnData::Utf8 {
351 container: Utf8Container::new(utf8_data, bitvec.into()),
352 max_bytes: MaxBytes::MAX,
353 },
354 };
355 let columns = Columns::new(vec![input_column]);
356 let ctx = ScalarFunctionContext {
357 columns: &columns,
358 row_count: 1,
359 };
360
361 let result = function.scalar(ctx).unwrap();
362
363 let ColumnData::Blob {
364 container,
365 ..
366 } = result
367 else {
368 panic!("Expected BLOB column data");
369 };
370 assert_eq!(container.len(), 1);
371 assert!(container.is_defined(0));
372 assert_eq!(container[0].as_bytes(), long_string.as_bytes());
373 assert_eq!(container[0].as_bytes().len(), 1000);
374 }
375}