reifydb_core/value/encoded/
utf8.rs1use reifydb_type::Type;
5
6use crate::value::encoded::{EncodedValues, EncodedValuesLayout};
7
8impl EncodedValuesLayout {
9 pub fn set_utf8(&self, row: &mut EncodedValues, index: usize, value: impl AsRef<str>) {
10 let field = &self.fields[index];
11 debug_assert_eq!(field.r#type, Type::Utf8);
12 debug_assert!(!row.is_defined(index), "UTF8 field {} already set", index);
13
14 let bytes = value.as_ref().as_bytes();
15
16 let dynamic_offset = self.dynamic_section_size(row);
19
20 row.0.extend_from_slice(bytes);
22
23 let ref_slice = &mut row.0.make_mut()[field.offset..field.offset + 8];
26 ref_slice[0..4].copy_from_slice(&(dynamic_offset as u32).to_le_bytes());
27 ref_slice[4..8].copy_from_slice(&(bytes.len() as u32).to_le_bytes());
28
29 row.set_valid(index, true);
30 }
31
32 pub fn get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> &'a str {
33 let field = &self.fields[index];
34 debug_assert_eq!(field.r#type, Type::Utf8);
35
36 let ref_slice = &row.as_slice()[field.offset..field.offset + 8];
38 let offset = u32::from_le_bytes([ref_slice[0], ref_slice[1], ref_slice[2], ref_slice[3]]) as usize;
39 let length = u32::from_le_bytes([ref_slice[4], ref_slice[5], ref_slice[6], ref_slice[7]]) as usize;
40
41 let dynamic_start = self.dynamic_section_start();
43 let string_start = dynamic_start + offset;
44 let string_slice = &row.as_slice()[string_start..string_start + length];
45
46 unsafe { std::str::from_utf8_unchecked(string_slice) }
47 }
48
49 pub fn try_get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> Option<&'a str> {
50 if row.is_defined(index) && self.fields[index].r#type == Type::Utf8 {
51 Some(self.get_utf8(row, index))
52 } else {
53 None
54 }
55 }
56}
57
58#[cfg(test)]
59mod tests {
60 use reifydb_type::Type;
61
62 use crate::value::encoded::EncodedValuesLayout;
63
64 #[test]
65 fn test_set_get_utf8() {
66 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
67 let mut row = layout.allocate();
68 layout.set_utf8(&mut row, 0, "reifydb");
69 assert_eq!(layout.get_utf8(&row, 0), "reifydb");
70 }
71
72 #[test]
73 fn test_try_get_utf8() {
74 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
75 let mut row = layout.allocate();
76
77 assert_eq!(layout.try_get_utf8(&row, 0), None);
78
79 layout.set_utf8(&mut row, 0, "reifydb");
80 assert_eq!(layout.try_get_utf8(&row, 0), Some("reifydb"));
81 }
82
83 #[test]
84 fn test_empty_string() {
85 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
86 let mut row = layout.allocate();
87 layout.set_utf8(&mut row, 0, "");
88 assert_eq!(layout.get_utf8(&row, 0), "");
89 assert_eq!(layout.try_get_utf8(&row, 0), Some(""));
90 }
91
92 #[test]
93 fn test_unicode() {
94 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
95 let mut row = layout.allocate();
96
97 let unicode_text = "πβ¨π Hello δΈη π";
98 layout.set_utf8(&mut row, 0, unicode_text);
99 assert_eq!(layout.get_utf8(&row, 0), unicode_text);
100 assert_eq!(layout.try_get_utf8(&row, 0), Some(unicode_text));
101 }
102
103 #[test]
104 fn test_large_string() {
105 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
106 let mut row = layout.allocate();
107
108 let large_string = "A".repeat(1000);
109 layout.set_utf8(&mut row, 0, &large_string);
110 assert_eq!(layout.get_utf8(&row, 0), large_string);
111 assert_eq!(layout.try_get_utf8(&row, 0), Some(large_string.as_str()));
112 }
113
114 #[test]
115 fn test_multiple_fields() {
116 let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
117 let mut row = layout.allocate();
118
119 layout.set_utf8(&mut row, 0, "first");
120 layout.set_utf8(&mut row, 1, "second string");
121 layout.set_utf8(&mut row, 2, "third");
122
123 assert_eq!(layout.get_utf8(&row, 0), "first");
124 assert_eq!(layout.get_utf8(&row, 1), "second string");
125 assert_eq!(layout.get_utf8(&row, 2), "third");
126 }
127
128 #[test]
129 fn test_mixed_with_static_fields() {
130 let layout = EncodedValuesLayout::new(&[Type::Boolean, Type::Utf8, Type::Int4, Type::Utf8]);
131 let mut row = layout.allocate();
132
133 layout.set_bool(&mut row, 0, true);
134 layout.set_utf8(&mut row, 1, "hello world");
135 layout.set_i32(&mut row, 2, 42);
136 layout.set_utf8(&mut row, 3, "goodbye");
137
138 assert_eq!(layout.get_bool(&row, 0), true);
139 assert_eq!(layout.get_utf8(&row, 1), "hello world");
140 assert_eq!(layout.get_i32(&row, 2), 42);
141 assert_eq!(layout.get_utf8(&row, 3), "goodbye");
142 }
143
144 #[test]
145 fn test_different_sizes() {
146 let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
147 let mut row = layout.allocate();
148
149 layout.set_utf8(&mut row, 0, "");
150 layout.set_utf8(&mut row, 1, "medium length string here");
151 layout.set_utf8(&mut row, 2, "x");
152
153 assert_eq!(layout.get_utf8(&row, 0), "");
154 assert_eq!(layout.get_utf8(&row, 1), "medium length string here");
155 assert_eq!(layout.get_utf8(&row, 2), "x");
156 }
157
158 #[test]
159 fn test_arbitrary_setting_order() {
160 let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8, Type::Utf8]);
161 let mut row = layout.allocate();
162
163 layout.set_utf8(&mut row, 3, "fourth");
165 layout.set_utf8(&mut row, 1, "second");
166 layout.set_utf8(&mut row, 0, "first");
167 layout.set_utf8(&mut row, 2, "third");
168
169 assert_eq!(layout.get_utf8(&row, 0), "first");
170 assert_eq!(layout.get_utf8(&row, 1), "second");
171 assert_eq!(layout.get_utf8(&row, 2), "third");
172 assert_eq!(layout.get_utf8(&row, 3), "fourth");
173 }
174
175 #[test]
176 fn test_special_characters() {
177 let layout = EncodedValuesLayout::new(&[Type::Utf8]);
178
179 let special_strings = [
180 "",
181 " ",
182 "\n",
183 "\t",
184 "\r\n",
185 "\"quoted\"",
186 "'single quotes'",
187 "line1\nline2\nline3",
188 "tabs\there\tand\there",
189 "mixed\twhite\n \r\n\tspace",
190 ];
191
192 for special_str in special_strings {
193 let mut row = layout.allocate();
194 layout.set_utf8(&mut row, 0, special_str);
195 assert_eq!(layout.get_utf8(&row, 0), special_str);
196 }
197 }
198
199 #[test]
200 fn test_undefined_handling() {
201 let layout = EncodedValuesLayout::new(&[Type::Utf8, Type::Utf8, Type::Utf8]);
202 let mut row = layout.allocate();
203
204 layout.set_utf8(&mut row, 0, "defined");
206 layout.set_utf8(&mut row, 2, "also defined");
207
208 assert_eq!(layout.try_get_utf8(&row, 0), Some("defined"));
209 assert_eq!(layout.try_get_utf8(&row, 1), None);
210 assert_eq!(layout.try_get_utf8(&row, 2), Some("also defined"));
211
212 layout.set_undefined(&mut row, 0);
214 assert_eq!(layout.try_get_utf8(&row, 0), None);
215 assert_eq!(layout.try_get_utf8(&row, 2), Some("also defined"));
216 }
217
218 #[test]
219 fn test_try_get_utf8_wrong_type() {
220 let layout = EncodedValuesLayout::new(&[Type::Boolean]);
221 let mut row = layout.allocate();
222
223 layout.set_bool(&mut row, 0, true);
224
225 assert_eq!(layout.try_get_utf8(&row, 0), None);
226 }
227}