Skip to main content

reifydb_core/encoded/
utf8.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::str;
5
6use reifydb_type::value::r#type::Type;
7
8use crate::encoded::{row::EncodedRow, shape::RowShape};
9
10impl RowShape {
11	pub fn set_utf8(&self, row: &mut EncodedRow, index: usize, value: impl AsRef<str>) {
12		debug_assert_eq!(*self.fields()[index].constraint.get_type().inner_type(), Type::Utf8);
13		self.replace_dynamic_data(row, index, value.as_ref().as_bytes());
14	}
15
16	pub fn get_utf8<'a>(&'a self, row: &'a EncodedRow, index: usize) -> &'a str {
17		let field = &self.fields()[index];
18		debug_assert_eq!(*field.constraint.get_type().inner_type(), Type::Utf8);
19
20		let ref_slice = &row.as_slice()[field.offset as usize..field.offset as usize + 8];
21		let offset = u32::from_le_bytes([ref_slice[0], ref_slice[1], ref_slice[2], ref_slice[3]]) as usize;
22		let length = u32::from_le_bytes([ref_slice[4], ref_slice[5], ref_slice[6], ref_slice[7]]) as usize;
23
24		let dynamic_start = self.dynamic_section_start();
25		let string_start = dynamic_start + offset;
26		let string_slice = &row.as_slice()[string_start..string_start + length];
27
28		unsafe { str::from_utf8_unchecked(string_slice) }
29	}
30
31	pub fn try_get_utf8<'a>(&'a self, row: &'a EncodedRow, index: usize) -> Option<&'a str> {
32		if row.is_defined(index) && self.fields()[index].constraint.get_type() == Type::Utf8 {
33			Some(self.get_utf8(row, index))
34		} else {
35			None
36		}
37	}
38}
39
40#[cfg(test)]
41pub mod tests {
42	use reifydb_type::value::r#type::Type;
43
44	use crate::encoded::shape::RowShape;
45
46	#[test]
47	fn test_set_get_utf8() {
48		let shape = RowShape::testing(&[Type::Utf8]);
49		let mut row = shape.allocate();
50		shape.set_utf8(&mut row, 0, "reifydb");
51		assert_eq!(shape.get_utf8(&row, 0), "reifydb");
52	}
53
54	#[test]
55	fn test_try_get_utf8() {
56		let shape = RowShape::testing(&[Type::Utf8]);
57		let mut row = shape.allocate();
58
59		assert_eq!(shape.try_get_utf8(&row, 0), None);
60
61		shape.set_utf8(&mut row, 0, "reifydb");
62		assert_eq!(shape.try_get_utf8(&row, 0), Some("reifydb"));
63	}
64
65	#[test]
66	fn test_empty_string() {
67		let shape = RowShape::testing(&[Type::Utf8]);
68		let mut row = shape.allocate();
69		shape.set_utf8(&mut row, 0, "");
70		assert_eq!(shape.get_utf8(&row, 0), "");
71		assert_eq!(shape.try_get_utf8(&row, 0), Some(""));
72	}
73
74	#[test]
75	fn test_unicode() {
76		let shape = RowShape::testing(&[Type::Utf8]);
77		let mut row = shape.allocate();
78
79		let unicode_text = "πŸš€βœ¨πŸŒŸ Hello δΈ–η•Œ πŸŽ‰";
80		shape.set_utf8(&mut row, 0, unicode_text);
81		assert_eq!(shape.get_utf8(&row, 0), unicode_text);
82		assert_eq!(shape.try_get_utf8(&row, 0), Some(unicode_text));
83	}
84
85	#[test]
86	fn test_large_string() {
87		let shape = RowShape::testing(&[Type::Utf8]);
88		let mut row = shape.allocate();
89
90		let large_string = "A".repeat(1000);
91		shape.set_utf8(&mut row, 0, &large_string);
92		assert_eq!(shape.get_utf8(&row, 0), large_string);
93		assert_eq!(shape.try_get_utf8(&row, 0), Some(large_string.as_str()));
94	}
95
96	#[test]
97	fn test_multiple_fields() {
98		let shape = RowShape::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
99		let mut row = shape.allocate();
100
101		shape.set_utf8(&mut row, 0, "first");
102		shape.set_utf8(&mut row, 1, "second string");
103		shape.set_utf8(&mut row, 2, "third");
104
105		assert_eq!(shape.get_utf8(&row, 0), "first");
106		assert_eq!(shape.get_utf8(&row, 1), "second string");
107		assert_eq!(shape.get_utf8(&row, 2), "third");
108	}
109
110	#[test]
111	fn test_mixed_with_static_fields() {
112		let shape = RowShape::testing(&[Type::Boolean, Type::Utf8, Type::Int4, Type::Utf8]);
113		let mut row = shape.allocate();
114
115		shape.set_bool(&mut row, 0, true);
116		shape.set_utf8(&mut row, 1, "hello world");
117		shape.set_i32(&mut row, 2, 42);
118		shape.set_utf8(&mut row, 3, "goodbye");
119
120		assert_eq!(shape.get_bool(&row, 0), true);
121		assert_eq!(shape.get_utf8(&row, 1), "hello world");
122		assert_eq!(shape.get_i32(&row, 2), 42);
123		assert_eq!(shape.get_utf8(&row, 3), "goodbye");
124	}
125
126	#[test]
127	fn test_different_sizes() {
128		let shape = RowShape::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
129		let mut row = shape.allocate();
130
131		shape.set_utf8(&mut row, 0, "");
132		shape.set_utf8(&mut row, 1, "medium length string here");
133		shape.set_utf8(&mut row, 2, "x");
134
135		assert_eq!(shape.get_utf8(&row, 0), "");
136		assert_eq!(shape.get_utf8(&row, 1), "medium length string here");
137		assert_eq!(shape.get_utf8(&row, 2), "x");
138	}
139
140	#[test]
141	fn test_arbitrary_setting_order() {
142		let shape = RowShape::testing(&[Type::Utf8, Type::Utf8, Type::Utf8, Type::Utf8]);
143		let mut row = shape.allocate();
144
145		// Set in reverse order
146		shape.set_utf8(&mut row, 3, "fourth");
147		shape.set_utf8(&mut row, 1, "second");
148		shape.set_utf8(&mut row, 0, "first");
149		shape.set_utf8(&mut row, 2, "third");
150
151		assert_eq!(shape.get_utf8(&row, 0), "first");
152		assert_eq!(shape.get_utf8(&row, 1), "second");
153		assert_eq!(shape.get_utf8(&row, 2), "third");
154		assert_eq!(shape.get_utf8(&row, 3), "fourth");
155	}
156
157	#[test]
158	fn test_special_characters() {
159		let shape = RowShape::testing(&[Type::Utf8]);
160
161		let special_strings = [
162			"",
163			" ",
164			"\n",
165			"\t",
166			"\r\n",
167			"\"quoted\"",
168			"'single quotes'",
169			"line1\nline2\nline3",
170			"tabs\there\tand\there",
171			"mixed\twhite\n \r\n\tspace",
172		];
173
174		for special_str in special_strings {
175			let mut row = shape.allocate();
176			shape.set_utf8(&mut row, 0, special_str);
177			assert_eq!(shape.get_utf8(&row, 0), special_str);
178		}
179	}
180
181	#[test]
182	fn test_undefined_handling() {
183		let shape = RowShape::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
184		let mut row = shape.allocate();
185
186		// Set only some fields
187		shape.set_utf8(&mut row, 0, "defined");
188		shape.set_utf8(&mut row, 2, "also defined");
189
190		assert_eq!(shape.try_get_utf8(&row, 0), Some("defined"));
191		assert_eq!(shape.try_get_utf8(&row, 1), None);
192		assert_eq!(shape.try_get_utf8(&row, 2), Some("also defined"));
193
194		// Set field as undefined
195		shape.set_none(&mut row, 0);
196		assert_eq!(shape.try_get_utf8(&row, 0), None);
197		assert_eq!(shape.try_get_utf8(&row, 2), Some("also defined"));
198	}
199
200	#[test]
201	fn test_try_get_utf8_wrong_type() {
202		let shape = RowShape::testing(&[Type::Boolean]);
203		let mut row = shape.allocate();
204
205		shape.set_bool(&mut row, 0, true);
206
207		assert_eq!(shape.try_get_utf8(&row, 0), None);
208	}
209
210	#[test]
211	fn test_update_utf8() {
212		let shape = RowShape::testing(&[Type::Utf8]);
213		let mut row = shape.allocate();
214
215		shape.set_utf8(&mut row, 0, "hello");
216		assert_eq!(shape.get_utf8(&row, 0), "hello");
217		let size_after_first = row.len();
218
219		// Overwrite with shorter string
220		shape.set_utf8(&mut row, 0, "hi");
221		assert_eq!(shape.get_utf8(&row, 0), "hi");
222		assert_eq!(row.len(), size_after_first - 3); // "hello"(5) -> "hi"(2)
223
224		// Overwrite with longer string
225		shape.set_utf8(&mut row, 0, "hello world");
226		assert_eq!(shape.get_utf8(&row, 0), "hello world");
227
228		// Overwrite with empty string
229		shape.set_utf8(&mut row, 0, "");
230		assert_eq!(shape.get_utf8(&row, 0), "");
231		assert_eq!(row.len(), shape.total_static_size());
232	}
233
234	#[test]
235	fn test_update_utf8_with_other_dynamic_fields() {
236		let shape = RowShape::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
237		let mut row = shape.allocate();
238
239		shape.set_utf8(&mut row, 0, "first");
240		shape.set_utf8(&mut row, 1, "second");
241		shape.set_utf8(&mut row, 2, "third");
242
243		// Update middle field with a longer string
244		shape.set_utf8(&mut row, 1, "much longer second string");
245
246		// All fields should read correctly
247		assert_eq!(shape.get_utf8(&row, 0), "first");
248		assert_eq!(shape.get_utf8(&row, 1), "much longer second string");
249		assert_eq!(shape.get_utf8(&row, 2), "third");
250
251		// Update first field with shorter string
252		shape.set_utf8(&mut row, 0, "f");
253		assert_eq!(shape.get_utf8(&row, 0), "f");
254		assert_eq!(shape.get_utf8(&row, 1), "much longer second string");
255		assert_eq!(shape.get_utf8(&row, 2), "third");
256
257		// No orphan data: total size = static + sum of current strings
258		let expected = shape.total_static_size() + 1 + 25 + 5;
259		assert_eq!(row.len(), expected);
260	}
261}