Skip to main content

reifydb_core/encoded/
utf8.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (c) 2025 ReifyDB
3
4use std::str;
5
6use reifydb_type::value::r#type::Type;
7
8use crate::encoded::{encoded::EncodedValues, schema::Schema};
9
10impl Schema {
11	pub fn set_utf8(&self, row: &mut EncodedValues, index: usize, value: impl AsRef<str>) {
12		let field = &self.fields()[index];
13		debug_assert_eq!(*field.constraint.get_type().inner_type(), Type::Utf8);
14		debug_assert!(!row.is_defined(index), "UTF8 field {} already set", index);
15
16		let bytes = value.as_ref().as_bytes();
17
18		// Calculate offset in dynamic section (relative to start of
19		// dynamic section)
20		let dynamic_offset = self.dynamic_section_size(row);
21
22		// Append string to dynamic section
23		row.0.extend_from_slice(bytes);
24
25		// Update reference in static section: [offset: u32][length:
26		// u32]
27		let ref_slice = &mut row.0.make_mut()[field.offset as usize..field.offset as usize + 8];
28		ref_slice[0..4].copy_from_slice(&(dynamic_offset as u32).to_le_bytes());
29		ref_slice[4..8].copy_from_slice(&(bytes.len() as u32).to_le_bytes());
30
31		row.set_valid(index, true);
32	}
33
34	pub fn get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> &'a str {
35		let field = &self.fields()[index];
36		debug_assert_eq!(*field.constraint.get_type().inner_type(), Type::Utf8);
37
38		// Read offset and length from static section
39		let ref_slice = &row.as_slice()[field.offset as usize..field.offset as usize + 8];
40		let offset = u32::from_le_bytes([ref_slice[0], ref_slice[1], ref_slice[2], ref_slice[3]]) as usize;
41		let length = u32::from_le_bytes([ref_slice[4], ref_slice[5], ref_slice[6], ref_slice[7]]) as usize;
42
43		// Get string from dynamic section
44		let dynamic_start = self.dynamic_section_start();
45		let string_start = dynamic_start + offset;
46		let string_slice = &row.as_slice()[string_start..string_start + length];
47
48		unsafe { str::from_utf8_unchecked(string_slice) }
49	}
50
51	pub fn try_get_utf8<'a>(&'a self, row: &'a EncodedValues, index: usize) -> Option<&'a str> {
52		if row.is_defined(index) && self.fields()[index].constraint.get_type() == Type::Utf8 {
53			Some(self.get_utf8(row, index))
54		} else {
55			None
56		}
57	}
58}
59
60#[cfg(test)]
61pub mod tests {
62	use reifydb_type::value::r#type::Type;
63
64	use crate::encoded::schema::Schema;
65
66	#[test]
67	fn test_set_get_utf8() {
68		let schema = Schema::testing(&[Type::Utf8]);
69		let mut row = schema.allocate();
70		schema.set_utf8(&mut row, 0, "reifydb");
71		assert_eq!(schema.get_utf8(&row, 0), "reifydb");
72	}
73
74	#[test]
75	fn test_try_get_utf8() {
76		let schema = Schema::testing(&[Type::Utf8]);
77		let mut row = schema.allocate();
78
79		assert_eq!(schema.try_get_utf8(&row, 0), None);
80
81		schema.set_utf8(&mut row, 0, "reifydb");
82		assert_eq!(schema.try_get_utf8(&row, 0), Some("reifydb"));
83	}
84
85	#[test]
86	fn test_empty_string() {
87		let schema = Schema::testing(&[Type::Utf8]);
88		let mut row = schema.allocate();
89		schema.set_utf8(&mut row, 0, "");
90		assert_eq!(schema.get_utf8(&row, 0), "");
91		assert_eq!(schema.try_get_utf8(&row, 0), Some(""));
92	}
93
94	#[test]
95	fn test_unicode() {
96		let schema = Schema::testing(&[Type::Utf8]);
97		let mut row = schema.allocate();
98
99		let unicode_text = "πŸš€βœ¨πŸŒŸ Hello δΈ–η•Œ πŸŽ‰";
100		schema.set_utf8(&mut row, 0, unicode_text);
101		assert_eq!(schema.get_utf8(&row, 0), unicode_text);
102		assert_eq!(schema.try_get_utf8(&row, 0), Some(unicode_text));
103	}
104
105	#[test]
106	fn test_large_string() {
107		let schema = Schema::testing(&[Type::Utf8]);
108		let mut row = schema.allocate();
109
110		let large_string = "A".repeat(1000);
111		schema.set_utf8(&mut row, 0, &large_string);
112		assert_eq!(schema.get_utf8(&row, 0), large_string);
113		assert_eq!(schema.try_get_utf8(&row, 0), Some(large_string.as_str()));
114	}
115
116	#[test]
117	fn test_multiple_fields() {
118		let schema = Schema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
119		let mut row = schema.allocate();
120
121		schema.set_utf8(&mut row, 0, "first");
122		schema.set_utf8(&mut row, 1, "second string");
123		schema.set_utf8(&mut row, 2, "third");
124
125		assert_eq!(schema.get_utf8(&row, 0), "first");
126		assert_eq!(schema.get_utf8(&row, 1), "second string");
127		assert_eq!(schema.get_utf8(&row, 2), "third");
128	}
129
130	#[test]
131	fn test_mixed_with_static_fields() {
132		let schema = Schema::testing(&[Type::Boolean, Type::Utf8, Type::Int4, Type::Utf8]);
133		let mut row = schema.allocate();
134
135		schema.set_bool(&mut row, 0, true);
136		schema.set_utf8(&mut row, 1, "hello world");
137		schema.set_i32(&mut row, 2, 42);
138		schema.set_utf8(&mut row, 3, "goodbye");
139
140		assert_eq!(schema.get_bool(&row, 0), true);
141		assert_eq!(schema.get_utf8(&row, 1), "hello world");
142		assert_eq!(schema.get_i32(&row, 2), 42);
143		assert_eq!(schema.get_utf8(&row, 3), "goodbye");
144	}
145
146	#[test]
147	fn test_different_sizes() {
148		let schema = Schema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
149		let mut row = schema.allocate();
150
151		schema.set_utf8(&mut row, 0, "");
152		schema.set_utf8(&mut row, 1, "medium length string here");
153		schema.set_utf8(&mut row, 2, "x");
154
155		assert_eq!(schema.get_utf8(&row, 0), "");
156		assert_eq!(schema.get_utf8(&row, 1), "medium length string here");
157		assert_eq!(schema.get_utf8(&row, 2), "x");
158	}
159
160	#[test]
161	fn test_arbitrary_setting_order() {
162		let schema = Schema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8, Type::Utf8]);
163		let mut row = schema.allocate();
164
165		// Set in reverse order
166		schema.set_utf8(&mut row, 3, "fourth");
167		schema.set_utf8(&mut row, 1, "second");
168		schema.set_utf8(&mut row, 0, "first");
169		schema.set_utf8(&mut row, 2, "third");
170
171		assert_eq!(schema.get_utf8(&row, 0), "first");
172		assert_eq!(schema.get_utf8(&row, 1), "second");
173		assert_eq!(schema.get_utf8(&row, 2), "third");
174		assert_eq!(schema.get_utf8(&row, 3), "fourth");
175	}
176
177	#[test]
178	fn test_special_characters() {
179		let schema = Schema::testing(&[Type::Utf8]);
180
181		let special_strings = [
182			"",
183			" ",
184			"\n",
185			"\t",
186			"\r\n",
187			"\"quoted\"",
188			"'single quotes'",
189			"line1\nline2\nline3",
190			"tabs\there\tand\there",
191			"mixed\twhite\n \r\n\tspace",
192		];
193
194		for special_str in special_strings {
195			let mut row = schema.allocate();
196			schema.set_utf8(&mut row, 0, special_str);
197			assert_eq!(schema.get_utf8(&row, 0), special_str);
198		}
199	}
200
201	#[test]
202	fn test_undefined_handling() {
203		let schema = Schema::testing(&[Type::Utf8, Type::Utf8, Type::Utf8]);
204		let mut row = schema.allocate();
205
206		// Set only some fields
207		schema.set_utf8(&mut row, 0, "defined");
208		schema.set_utf8(&mut row, 2, "also defined");
209
210		assert_eq!(schema.try_get_utf8(&row, 0), Some("defined"));
211		assert_eq!(schema.try_get_utf8(&row, 1), None);
212		assert_eq!(schema.try_get_utf8(&row, 2), Some("also defined"));
213
214		// Set field as undefined
215		schema.set_none(&mut row, 0);
216		assert_eq!(schema.try_get_utf8(&row, 0), None);
217		assert_eq!(schema.try_get_utf8(&row, 2), Some("also defined"));
218	}
219
220	#[test]
221	fn test_try_get_utf8_wrong_type() {
222		let schema = Schema::testing(&[Type::Boolean]);
223		let mut row = schema.allocate();
224
225		schema.set_bool(&mut row, 0, true);
226
227		assert_eq!(schema.try_get_utf8(&row, 0), None);
228	}
229}