Skip to main content

reifydb_core/encoded/
utf8.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::str;
5
6use reifydb_type::value::r#type::Type;
7
8use crate::encoded::{row::EncodedRow, schema::RowSchema};
9
10impl RowSchema {
11	pub fn set_utf8(&self, row: &mut EncodedRow, index: usize, value: impl AsRef<str>) {
12		debug_assert_eq!(*self.fields()[index].constraint.get_type().inner_type(), Type::Utf8);
13		self.replace_dynamic_data(row, index, value.as_ref().as_bytes());
14	}
15
16	pub fn get_utf8<'a>(&'a self, row: &'a EncodedRow, index: usize) -> &'a str {
17		let field = &self.fields()[index];
18		debug_assert_eq!(*field.constraint.get_type().inner_type(), Type::Utf8);
19
20		// Read offset and length from static section
21		let ref_slice = &row.as_slice()[field.offset as usize..field.offset as usize + 8];
22		let offset = u32::from_le_bytes([ref_slice[0], ref_slice[1], ref_slice[2], ref_slice[3]]) as usize;
23		let length = u32::from_le_bytes([ref_slice[4], ref_slice[5], ref_slice[6], ref_slice[7]]) as usize;
24
25		// Get string from dynamic section
26		let dynamic_start = self.dynamic_section_start();
27		let string_start = dynamic_start + offset;
28		let string_slice = &row.as_slice()[string_start..string_start + length];
29
30		unsafe { str::from_utf8_unchecked(string_slice) }
31	}
32
33	pub fn try_get_utf8<'a>(&'a self, row: &'a EncodedRow, index: usize) -> Option<&'a str> {
34		if row.is_defined(index) && self.fields()[index].constraint.get_type() == Type::Utf8 {
35			Some(self.get_utf8(row, index))
36		} else {
37			None
38		}
39	}
40}
41
42#[cfg(test)]
43pub mod tests {
44	use reifydb_type::value::r#type::Type;
45
46	use crate::encoded::schema::RowSchema;
47
48	#[test]
49	fn test_set_get_utf8() {
50		let schema = RowSchema::testing(&[Type::Utf8]);
51		let mut row = schema.allocate();
52		schema.set_utf8(&mut row, 0, "reifydb");
53		assert_eq!(schema.get_utf8(&row, 0), "reifydb");
54	}
55
56	#[test]
57	fn test_try_get_utf8() {
58		let schema = RowSchema::testing(&[Type::Utf8]);
59		let mut row = schema.allocate();
60
61		assert_eq!(schema.try_get_utf8(&row, 0), None);
62
63		schema.set_utf8(&mut row, 0, "reifydb");
64		assert_eq!(schema.try_get_utf8(&row, 0), Some("reifydb"));
65	}
66
67	#[test]
68	fn test_empty_string() {
69		let schema = RowSchema::testing(&[Type::Utf8]);
70		let mut row = schema.allocate();
71		schema.set_utf8(&mut row, 0, "");
72		assert_eq!(schema.get_utf8(&row, 0), "");
73		assert_eq!(schema.try_get_utf8(&row, 0), Some(""));
74	}
75
76	#[test]
77	fn test_unicode() {
78		let schema = RowSchema::testing(&[Type::Utf8]);
79		let mut row = schema.allocate();
80
81		let unicode_text = "πŸš€βœ¨πŸŒŸ Hello δΈ–η•Œ πŸŽ‰";
82		schema.set_utf8(&mut row, 0, unicode_text);
83		assert_eq!(schema.get_utf8(&row, 0), unicode_text);
84		assert_eq!(schema.try_get_utf8(&row, 0), Some(unicode_text));
85	}
86
87	#[test]
88	fn test_large_string() {
89		let schema = RowSchema::testing(&[Type::Utf8]);
90		let mut row = schema.allocate();
91
92		let large_string = "A".repeat(1000);
93		schema.set_utf8(&mut row, 0, &large_string);
94		assert_eq!(schema.get_utf8(&row, 0), large_string);
95		assert_eq!(schema.try_get_utf8(&row, 0), Some(large_string.as_str()));
96	}
97
98	#[test]
99	fn test_multiple_fields() {
100		let schema = RowSchema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
101		let mut row = schema.allocate();
102
103		schema.set_utf8(&mut row, 0, "first");
104		schema.set_utf8(&mut row, 1, "second string");
105		schema.set_utf8(&mut row, 2, "third");
106
107		assert_eq!(schema.get_utf8(&row, 0), "first");
108		assert_eq!(schema.get_utf8(&row, 1), "second string");
109		assert_eq!(schema.get_utf8(&row, 2), "third");
110	}
111
112	#[test]
113	fn test_mixed_with_static_fields() {
114		let schema = RowSchema::testing(&[Type::Boolean, Type::Utf8, Type::Int4, Type::Utf8]);
115		let mut row = schema.allocate();
116
117		schema.set_bool(&mut row, 0, true);
118		schema.set_utf8(&mut row, 1, "hello world");
119		schema.set_i32(&mut row, 2, 42);
120		schema.set_utf8(&mut row, 3, "goodbye");
121
122		assert_eq!(schema.get_bool(&row, 0), true);
123		assert_eq!(schema.get_utf8(&row, 1), "hello world");
124		assert_eq!(schema.get_i32(&row, 2), 42);
125		assert_eq!(schema.get_utf8(&row, 3), "goodbye");
126	}
127
128	#[test]
129	fn test_different_sizes() {
130		let schema = RowSchema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
131		let mut row = schema.allocate();
132
133		schema.set_utf8(&mut row, 0, "");
134		schema.set_utf8(&mut row, 1, "medium length string here");
135		schema.set_utf8(&mut row, 2, "x");
136
137		assert_eq!(schema.get_utf8(&row, 0), "");
138		assert_eq!(schema.get_utf8(&row, 1), "medium length string here");
139		assert_eq!(schema.get_utf8(&row, 2), "x");
140	}
141
142	#[test]
143	fn test_arbitrary_setting_order() {
144		let schema = RowSchema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8, Type::Utf8]);
145		let mut row = schema.allocate();
146
147		// Set in reverse order
148		schema.set_utf8(&mut row, 3, "fourth");
149		schema.set_utf8(&mut row, 1, "second");
150		schema.set_utf8(&mut row, 0, "first");
151		schema.set_utf8(&mut row, 2, "third");
152
153		assert_eq!(schema.get_utf8(&row, 0), "first");
154		assert_eq!(schema.get_utf8(&row, 1), "second");
155		assert_eq!(schema.get_utf8(&row, 2), "third");
156		assert_eq!(schema.get_utf8(&row, 3), "fourth");
157	}
158
159	#[test]
160	fn test_special_characters() {
161		let schema = RowSchema::testing(&[Type::Utf8]);
162
163		let special_strings = [
164			"",
165			" ",
166			"\n",
167			"\t",
168			"\r\n",
169			"\"quoted\"",
170			"'single quotes'",
171			"line1\nline2\nline3",
172			"tabs\there\tand\there",
173			"mixed\twhite\n \r\n\tspace",
174		];
175
176		for special_str in special_strings {
177			let mut row = schema.allocate();
178			schema.set_utf8(&mut row, 0, special_str);
179			assert_eq!(schema.get_utf8(&row, 0), special_str);
180		}
181	}
182
183	#[test]
184	fn test_undefined_handling() {
185		let schema = RowSchema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
186		let mut row = schema.allocate();
187
188		// Set only some fields
189		schema.set_utf8(&mut row, 0, "defined");
190		schema.set_utf8(&mut row, 2, "also defined");
191
192		assert_eq!(schema.try_get_utf8(&row, 0), Some("defined"));
193		assert_eq!(schema.try_get_utf8(&row, 1), None);
194		assert_eq!(schema.try_get_utf8(&row, 2), Some("also defined"));
195
196		// Set field as undefined
197		schema.set_none(&mut row, 0);
198		assert_eq!(schema.try_get_utf8(&row, 0), None);
199		assert_eq!(schema.try_get_utf8(&row, 2), Some("also defined"));
200	}
201
202	#[test]
203	fn test_try_get_utf8_wrong_type() {
204		let schema = RowSchema::testing(&[Type::Boolean]);
205		let mut row = schema.allocate();
206
207		schema.set_bool(&mut row, 0, true);
208
209		assert_eq!(schema.try_get_utf8(&row, 0), None);
210	}
211
212	#[test]
213	fn test_update_utf8() {
214		let schema = RowSchema::testing(&[Type::Utf8]);
215		let mut row = schema.allocate();
216
217		schema.set_utf8(&mut row, 0, "hello");
218		assert_eq!(schema.get_utf8(&row, 0), "hello");
219		let size_after_first = row.len();
220
221		// Overwrite with shorter string
222		schema.set_utf8(&mut row, 0, "hi");
223		assert_eq!(schema.get_utf8(&row, 0), "hi");
224		assert_eq!(row.len(), size_after_first - 3); // "hello"(5) -> "hi"(2)
225
226		// Overwrite with longer string
227		schema.set_utf8(&mut row, 0, "hello world");
228		assert_eq!(schema.get_utf8(&row, 0), "hello world");
229
230		// Overwrite with empty string
231		schema.set_utf8(&mut row, 0, "");
232		assert_eq!(schema.get_utf8(&row, 0), "");
233		assert_eq!(row.len(), schema.total_static_size());
234	}
235
236	#[test]
237	fn test_update_utf8_with_other_dynamic_fields() {
238		let schema = RowSchema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
239		let mut row = schema.allocate();
240
241		schema.set_utf8(&mut row, 0, "first");
242		schema.set_utf8(&mut row, 1, "second");
243		schema.set_utf8(&mut row, 2, "third");
244
245		// Update middle field with a longer string
246		schema.set_utf8(&mut row, 1, "much longer second string");
247
248		// All fields should read correctly
249		assert_eq!(schema.get_utf8(&row, 0), "first");
250		assert_eq!(schema.get_utf8(&row, 1), "much longer second string");
251		assert_eq!(schema.get_utf8(&row, 2), "third");
252
253		// Update first field with shorter string
254		schema.set_utf8(&mut row, 0, "f");
255		assert_eq!(schema.get_utf8(&row, 0), "f");
256		assert_eq!(schema.get_utf8(&row, 1), "much longer second string");
257		assert_eq!(schema.get_utf8(&row, 2), "third");
258
259		// No orphan data: total size = static + sum of current strings
260		let expected = schema.total_static_size() + 1 + 25 + 5;
261		assert_eq!(row.len(), expected);
262	}
263}