reifydb_engine/function/text/
upper.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the AGPL-3.0-or-later, see license.md file
3
4use reifydb_core::value::{column::ColumnData, container::Utf8Container};
5
6use crate::function::{ScalarFunction, ScalarFunctionContext};
7
8pub struct TextUpper;
9
10impl TextUpper {
11	pub fn new() -> Self {
12		Self
13	}
14}
15
16impl ScalarFunction for TextUpper {
17	fn scalar(&self, ctx: ScalarFunctionContext) -> crate::Result<ColumnData> {
18		let columns = ctx.columns;
19		let row_count = ctx.row_count;
20
21		if columns.is_empty() {
22			return Ok(ColumnData::utf8(Vec::<String>::new()));
23		}
24
25		let column = columns.get(0).unwrap();
26
27		match &column.data() {
28			ColumnData::Utf8 {
29				container,
30				max_bytes,
31			} => {
32				let mut result_data = Vec::with_capacity(container.data().len());
33				let mut result_bitvec = Vec::with_capacity(row_count);
34
35				for i in 0..row_count {
36					if container.is_defined(i) {
37						let original_str = &container[i];
38						let upper_str = original_str.to_uppercase();
39						result_data.push(upper_str);
40						result_bitvec.push(true);
41					} else {
42						result_data.push(String::new());
43						result_bitvec.push(false);
44					}
45				}
46
47				Ok(ColumnData::Utf8 {
48					container: Utf8Container::new(result_data, result_bitvec.into()),
49					max_bytes: *max_bytes,
50				})
51			}
52			_ => unimplemented!("TextUpper only supports text input"),
53		}
54	}
55}
56
57#[cfg(test)]
58mod tests {
59	use reifydb_core::value::{
60		column::{Column, Columns},
61		container::Utf8Container,
62	};
63	use reifydb_type::{Fragment, value::constraint::bytes::MaxBytes};
64
65	use super::*;
66
67	#[test]
68	fn test_upper_simple() {
69		let function = TextUpper::new();
70
71		let utf8_data = vec!["hello world".to_string()];
72		let bitvec = vec![true];
73		let input_column = Column {
74			name: Fragment::borrowed_internal("input"),
75			data: ColumnData::Utf8 {
76				container: Utf8Container::new(utf8_data, bitvec.into()),
77				max_bytes: MaxBytes::MAX,
78			},
79		};
80		let columns = Columns::new(vec![input_column]);
81		let ctx = ScalarFunctionContext {
82			columns: &columns,
83			row_count: 1,
84		};
85
86		let result = function.scalar(ctx).unwrap();
87
88		let ColumnData::Utf8 {
89			container,
90			..
91		} = result
92		else {
93			panic!("Expected UTF8 column data");
94		};
95		assert_eq!(container.len(), 1);
96		assert!(container.is_defined(0));
97		assert_eq!(container[0], "HELLO WORLD");
98	}
99
100	#[test]
101	fn test_upper_mixed_case() {
102		let function = TextUpper::new();
103
104		let utf8_data = vec![
105			"Hello World".to_string(),
106			"MiXeD cAsE".to_string(),
107			"ALREADY UPPER".to_string(),
108			"lowercase".to_string(),
109		];
110		let bitvec = vec![true, true, true, true];
111		let input_column = Column {
112			name: Fragment::borrowed_internal("input"),
113			data: ColumnData::Utf8 {
114				container: Utf8Container::new(utf8_data, bitvec.into()),
115				max_bytes: MaxBytes::MAX,
116			},
117		};
118		let columns = Columns::new(vec![input_column]);
119		let ctx = ScalarFunctionContext {
120			columns: &columns,
121			row_count: 4,
122		};
123
124		let result = function.scalar(ctx).unwrap();
125
126		let ColumnData::Utf8 {
127			container,
128			..
129		} = result
130		else {
131			panic!("Expected UTF8 column data");
132		};
133		assert_eq!(container.len(), 4);
134		assert_eq!(container[0], "HELLO WORLD");
135		assert_eq!(container[1], "MIXED CASE");
136		assert_eq!(container[2], "ALREADY UPPER");
137		assert_eq!(container[3], "LOWERCASE");
138	}
139
140	#[test]
141	fn test_upper_special_characters() {
142		let function = TextUpper::new();
143
144		let utf8_data = vec![
145			"hello@world.com".to_string(),
146			"test-123_abc".to_string(),
147			"with spaces & punctuation!".to_string(),
148		];
149		let bitvec = vec![true, true, true];
150		let input_column = Column {
151			name: Fragment::borrowed_internal("input"),
152			data: ColumnData::Utf8 {
153				container: Utf8Container::new(utf8_data, bitvec.into()),
154				max_bytes: MaxBytes::MAX,
155			},
156		};
157		let columns = Columns::new(vec![input_column]);
158		let ctx = ScalarFunctionContext {
159			columns: &columns,
160			row_count: 3,
161		};
162
163		let result = function.scalar(ctx).unwrap();
164
165		let ColumnData::Utf8 {
166			container,
167			..
168		} = result
169		else {
170			panic!("Expected UTF8 column data");
171		};
172		assert_eq!(container.len(), 3);
173		assert_eq!(container[0], "HELLO@WORLD.COM");
174		assert_eq!(container[1], "TEST-123_ABC");
175		assert_eq!(container[2], "WITH SPACES & PUNCTUATION!");
176	}
177
178	#[test]
179	fn test_upper_unicode() {
180		let function = TextUpper::new();
181
182		let utf8_data = vec![
183			"café naïve".to_string(),
184			"straße".to_string(), // German ß
185			"ñoño".to_string(),   // Spanish ñ
186		];
187		let bitvec = vec![true, true, true];
188		let input_column = Column {
189			name: Fragment::borrowed_internal("input"),
190			data: ColumnData::Utf8 {
191				container: Utf8Container::new(utf8_data, bitvec.into()),
192				max_bytes: MaxBytes::MAX,
193			},
194		};
195		let columns = Columns::new(vec![input_column]);
196		let ctx = ScalarFunctionContext {
197			columns: &columns,
198			row_count: 3,
199		};
200
201		let result = function.scalar(ctx).unwrap();
202
203		let ColumnData::Utf8 {
204			container,
205			..
206		} = result
207		else {
208			panic!("Expected UTF8 column data");
209		};
210		assert_eq!(container.len(), 3);
211		assert_eq!(container[0], "CAFÉ NAÏVE");
212		assert_eq!(container[1], "STRASSE"); // ß becomes SS in uppercase
213		assert_eq!(container[2], "ÑOÑO");
214	}
215
216	#[test]
217	fn test_upper_empty_and_whitespace() {
218		let function = TextUpper::new();
219
220		let utf8_data = vec!["".to_string(), "   ".to_string(), "\t\n\r".to_string()];
221		let bitvec = vec![true, true, true];
222		let input_column = Column {
223			name: Fragment::borrowed_internal("input"),
224			data: ColumnData::Utf8 {
225				container: Utf8Container::new(utf8_data, bitvec.into()),
226				max_bytes: MaxBytes::MAX,
227			},
228		};
229		let columns = Columns::new(vec![input_column]);
230		let ctx = ScalarFunctionContext {
231			columns: &columns,
232			row_count: 3,
233		};
234
235		let result = function.scalar(ctx).unwrap();
236
237		let ColumnData::Utf8 {
238			container,
239			..
240		} = result
241		else {
242			panic!("Expected UTF8 column data");
243		};
244		assert_eq!(container.len(), 3);
245		assert_eq!(container[0], "");
246		assert_eq!(container[1], "   ");
247		assert_eq!(container[2], "\t\n\r");
248	}
249
250	#[test]
251	fn test_upper_with_null_data() {
252		let function = TextUpper::new();
253
254		let utf8_data = vec!["hello".to_string(), "".to_string(), "world".to_string()];
255		let bitvec = vec![true, false, true];
256		let input_column = Column {
257			name: Fragment::borrowed_internal("input"),
258			data: ColumnData::Utf8 {
259				container: Utf8Container::new(utf8_data, bitvec.into()),
260				max_bytes: MaxBytes::MAX,
261			},
262		};
263		let columns = Columns::new(vec![input_column]);
264		let ctx = ScalarFunctionContext {
265			columns: &columns,
266			row_count: 3,
267		};
268
269		let result = function.scalar(ctx).unwrap();
270
271		let ColumnData::Utf8 {
272			container,
273			..
274		} = result
275		else {
276			panic!("Expected UTF8 column data");
277		};
278		assert_eq!(container.len(), 3);
279		assert!(container.is_defined(0));
280		assert!(!container.is_defined(1));
281		assert!(container.is_defined(2));
282
283		assert_eq!(container[0], "HELLO");
284		assert_eq!(container[2], "WORLD");
285	}
286
287	#[test]
288	fn test_upper_multibyte_characters() {
289		let function = TextUpper::new();
290
291		let utf8_data = vec![
292			"日本語".to_string(),  // Japanese (no case change)
293			"中文".to_string(),    // Chinese (no case change)
294			"한국어".to_string(),  // Korean (no case change)
295			"العربية".to_string(), // Arabic (no case change)
296		];
297		let bitvec = vec![true, true, true, true];
298		let input_column = Column {
299			name: Fragment::borrowed_internal("input"),
300			data: ColumnData::Utf8 {
301				container: Utf8Container::new(utf8_data, bitvec.into()),
302				max_bytes: MaxBytes::MAX,
303			},
304		};
305		let columns = Columns::new(vec![input_column]);
306		let ctx = ScalarFunctionContext {
307			columns: &columns,
308			row_count: 4,
309		};
310
311		let result = function.scalar(ctx).unwrap();
312
313		let ColumnData::Utf8 {
314			container,
315			..
316		} = result
317		else {
318			panic!("Expected UTF8 column data");
319		};
320		assert_eq!(container.len(), 4);
321		// These languages don't have case distinctions, so they remain unchanged
322		assert_eq!(container[0], "日本語");
323		assert_eq!(container[1], "中文");
324		assert_eq!(container[2], "한국어");
325		assert_eq!(container[3], "العربية");
326	}
327
328	#[test]
329	fn test_upper_emoji_and_symbols() {
330		let function = TextUpper::new();
331
332		let utf8_data =
333			vec!["hello 🌍 world".to_string(), "test 💻 code".to_string(), "data 📊 analysis".to_string()];
334		let bitvec = vec![true, true, true];
335		let input_column = Column {
336			name: Fragment::borrowed_internal("input"),
337			data: ColumnData::Utf8 {
338				container: Utf8Container::new(utf8_data, bitvec.into()),
339				max_bytes: MaxBytes::MAX,
340			},
341		};
342		let columns = Columns::new(vec![input_column]);
343		let ctx = ScalarFunctionContext {
344			columns: &columns,
345			row_count: 3,
346		};
347
348		let result = function.scalar(ctx).unwrap();
349
350		let ColumnData::Utf8 {
351			container,
352			..
353		} = result
354		else {
355			panic!("Expected UTF8 column data");
356		};
357		assert_eq!(container.len(), 3);
358		assert_eq!(container[0], "HELLO 🌍 WORLD");
359		assert_eq!(container[1], "TEST 💻 CODE");
360		assert_eq!(container[2], "DATA 📊 ANALYSIS");
361	}
362}