reifydb_core/value/encoded/
utf8.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the AGPL-3.0-or-later, see license.md file
3
4use reifydb_type::Type;
5
6use crate::value::encoded::{EncodedValues, EncodedValuesLayout};
7
8impl EncodedValuesLayout {
9	pub fn set_utf8(&self, row: &mut EncodedValues, index: usize, value: impl AsRef<str>) {
10		let field = &self.fields[index];
11		debug_assert_eq!(field.r#type, Type::Utf8);
12		debug_assert!(!row.is_defined(index), "UTF8 field {} already set", index);
13
14		let bytes = value.as_ref().as_bytes();
15
16		// Calculate offset in dynamic section (relative to start of
17		// dynamic section)
18		let dynamic_offset = self.dynamic_section_size(row);
19
20		// Append string to dynamic section
21		row.0.extend_from_slice(bytes);
22
23		// Update reference in static section: [offset: u32][length:
24		// u32]
25		let ref_slice = &mut row.0.make_mut()[field.offset..field.offset + 8];
26		ref_slice[0..4].copy_from_slice(&(dynamic_offset as u32).to_le_bytes());
27		ref_slice[4..8].copy_from_slice(&(bytes.len() as u32).to_le_bytes());
28
29		row.set_valid(index, true);
30	}
31
32	pub fn get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> &'a str {
33		let field = &self.fields[index];
34		debug_assert_eq!(field.r#type, Type::Utf8);
35
36		// Read offset and length from static section
37		let ref_slice = &row.as_slice()[field.offset..field.offset + 8];
38		let offset = u32::from_le_bytes([ref_slice[0], ref_slice[1], ref_slice[2], ref_slice[3]]) as usize;
39		let length = u32::from_le_bytes([ref_slice[4], ref_slice[5], ref_slice[6], ref_slice[7]]) as usize;
40
41		// Get string from dynamic section
42		let dynamic_start = self.dynamic_section_start();
43		let string_start = dynamic_start + offset;
44		let string_slice = &row.as_slice()[string_start..string_start + length];
45
46		unsafe { std::str::from_utf8_unchecked(string_slice) }
47	}
48
49	pub fn try_get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> Option<&'a str> {
50		if row.is_defined(index) {
51			Some(self.get_utf8(row, index))
52		} else {
53			None
54		}
55	}
56}
57
58#[cfg(test)]
59mod tests {
60	use reifydb_type::Type;
61
62	use crate::value::encoded::EncodedValuesLayout;
63
64	#[test]
65	fn test_set_get_utf8() {
66		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
67		let mut row = layout.allocate();
68		layout.set_utf8(&mut row, 0, "reifydb");
69		assert_eq!(layout.get_utf8(&row, 0), "reifydb");
70	}
71
72	#[test]
73	fn test_try_get_utf8() {
74		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
75		let mut row = layout.allocate();
76
77		assert_eq!(layout.try_get_utf8(&row, 0), None);
78
79		layout.set_utf8(&mut row, 0, "reifydb");
80		assert_eq!(layout.try_get_utf8(&row, 0), Some("reifydb"));
81	}
82
83	#[test]
84	fn test_empty_string() {
85		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
86		let mut row = layout.allocate();
87		layout.set_utf8(&mut row, 0, "");
88		assert_eq!(layout.get_utf8(&row, 0), "");
89		assert_eq!(layout.try_get_utf8(&row, 0), Some(""));
90	}
91
92	#[test]
93	fn test_unicode() {
94		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
95		let mut row = layout.allocate();
96
97		let unicode_text = "πŸš€βœ¨πŸŒŸ Hello δΈ–η•Œ πŸŽ‰";
98		layout.set_utf8(&mut row, 0, unicode_text);
99		assert_eq!(layout.get_utf8(&row, 0), unicode_text);
100		assert_eq!(layout.try_get_utf8(&row, 0), Some(unicode_text));
101	}
102
103	#[test]
104	fn test_large_string() {
105		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
106		let mut row = layout.allocate();
107
108		let large_string = "A".repeat(1000);
109		layout.set_utf8(&mut row, 0, &large_string);
110		assert_eq!(layout.get_utf8(&row, 0), large_string);
111		assert_eq!(layout.try_get_utf8(&row, 0), Some(large_string.as_str()));
112	}
113
114	#[test]
115	fn test_multiple_fields() {
116		let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
117		let mut row = layout.allocate();
118
119		layout.set_utf8(&mut row, 0, "first");
120		layout.set_utf8(&mut row, 1, "second string");
121		layout.set_utf8(&mut row, 2, "third");
122
123		assert_eq!(layout.get_utf8(&row, 0), "first");
124		assert_eq!(layout.get_utf8(&row, 1), "second string");
125		assert_eq!(layout.get_utf8(&row, 2), "third");
126	}
127
128	#[test]
129	fn test_mixed_with_static_fields() {
130		let layout = EncodedValuesLayout::new(&[Type::Boolean, Type::Utf8, Type::Int4, Type::Utf8]);
131		let mut row = layout.allocate();
132
133		layout.set_bool(&mut row, 0, true);
134		layout.set_utf8(&mut row, 1, "hello world");
135		layout.set_i32(&mut row, 2, 42);
136		layout.set_utf8(&mut row, 3, "goodbye");
137
138		assert_eq!(layout.get_bool(&row, 0), true);
139		assert_eq!(layout.get_utf8(&row, 1), "hello world");
140		assert_eq!(layout.get_i32(&row, 2), 42);
141		assert_eq!(layout.get_utf8(&row, 3), "goodbye");
142	}
143
144	#[test]
145	fn test_different_sizes() {
146		let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
147		let mut row = layout.allocate();
148
149		layout.set_utf8(&mut row, 0, "");
150		layout.set_utf8(&mut row, 1, "medium length string here");
151		layout.set_utf8(&mut row, 2, "x");
152
153		assert_eq!(layout.get_utf8(&row, 0), "");
154		assert_eq!(layout.get_utf8(&row, 1), "medium length string here");
155		assert_eq!(layout.get_utf8(&row, 2), "x");
156	}
157
158	#[test]
159	fn test_arbitrary_setting_order() {
160		let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8, Type::Utf8]);
161		let mut row = layout.allocate();
162
163		// Set in reverse order
164		layout.set_utf8(&mut row, 3, "fourth");
165		layout.set_utf8(&mut row, 1, "second");
166		layout.set_utf8(&mut row, 0, "first");
167		layout.set_utf8(&mut row, 2, "third");
168
169		assert_eq!(layout.get_utf8(&row, 0), "first");
170		assert_eq!(layout.get_utf8(&row, 1), "second");
171		assert_eq!(layout.get_utf8(&row, 2), "third");
172		assert_eq!(layout.get_utf8(&row, 3), "fourth");
173	}
174
175	#[test]
176	fn test_special_characters() {
177		let layout = EncodedValuesLayout::new(&[Type::Utf8]);
178
179		let special_strings = [
180			"",
181			" ",
182			"\n",
183			"\t",
184			"\r\n",
185			"\"quoted\"",
186			"'single quotes'",
187			"line1\nline2\nline3",
188			"tabs\there\tand\there",
189			"mixed\twhite\n \r\n\tspace",
190		];
191
192		for special_str in special_strings {
193			let mut row = layout.allocate();
194			layout.set_utf8(&mut row, 0, special_str);
195			assert_eq!(layout.get_utf8(&row, 0), special_str);
196		}
197	}
198
199	#[test]
200	fn test_undefined_handling() {
201		let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
202		let mut row = layout.allocate();
203
204		// Set only some fields
205		layout.set_utf8(&mut row, 0, "defined");
206		layout.set_utf8(&mut row, 2, "also defined");
207
208		assert_eq!(layout.try_get_utf8(&row, 0), Some("defined"));
209		assert_eq!(layout.try_get_utf8(&row, 1), None);
210		assert_eq!(layout.try_get_utf8(&row, 2), Some("also defined"));
211
212		// Set field as undefined
213		layout.set_undefined(&mut row, 0);
214		assert_eq!(layout.try_get_utf8(&row, 0), None);
215		assert_eq!(layout.try_get_utf8(&row, 2), Some("also defined"));
216	}
217}