reifydb-engine 0.4.12

Query execution and processing engine for ReifyDB
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2025 ReifyDB

use postcard::{from_bytes, to_stdvec};
use reifydb_core::{
	common::CommitVersion,
	encoded::row::EncodedRow,
	interface::{
		catalog::{dictionary::Dictionary, shape::ShapeId},
		change::{Change, ChangeOrigin, Diff},
	},
	internal_error,
	key::{
		EncodableKey,
		dictionary::{DictionaryEntryIndexKey, DictionaryEntryKey, DictionarySequenceKey},
	},
	value::column::columns::Columns,
};
use reifydb_runtime::hash::xxh3_128;
use reifydb_transaction::{
	interceptor::dictionary_row::DictionaryRowInterceptor,
	transaction::{Transaction, admin::AdminTransaction, command::CommandTransaction},
};
use reifydb_type::{
	util::cowvec::CowVec,
	value::{Value, datetime::DateTime, dictionary::DictionaryEntryId},
};

use crate::Result;

pub(crate) trait DictionaryOperations {
	/// Insert a value into the dictionary, returning its ID.
	/// If the value already exists, returns the existing ID.
	/// If the value is new, assigns a new ID and stores it.
	/// The returned ID type matches the dictionary's `id_type`.
	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId>;

	/// Get a value from the dictionary by its ID.
	/// Returns None if the ID doesn't exist.
	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>>;

	/// Find the ID of a value in the dictionary without inserting.
	/// Returns the ID if the value exists, None otherwise.
	/// The returned ID type matches the dictionary's `id_type`.
	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>>;
}

impl DictionaryOperations for CommandTransaction {
	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
		let value = DictionaryRowInterceptor::pre_insert(self, dictionary, value.clone())?;

		// 1. Serialize value and compute hash
		let value_bytes = to_stdvec(&value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
		let hash = xxh3_128(&value_bytes).0.to_be_bytes();

		// 2. Check if value already exists (lookup by hash)
		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
		if let Some(existing) = self.get(&entry_key)? {
			// Value exists, return existing ID
			let id = u128::from_be_bytes(existing.row[..16].try_into().unwrap());
			return DictionaryEntryId::from_u128(id, dictionary.id_type.clone());
		}

		// 3. Value doesn't exist - get next ID from sequence
		let seq_key = DictionarySequenceKey::encoded(dictionary.id);
		let next_id = match self.get(&seq_key)? {
			Some(v) => u128::from_be_bytes(v.row[..16].try_into().unwrap()) + 1,
			None => 1, // First entry
		};

		// 4. Validate the new ID fits in the dictionary's id_type (early check)
		let entry_id = DictionaryEntryId::from_u128(next_id, dictionary.id_type.clone())?;

		// 5. Store the entry (hash -> id + value_bytes)
		let mut entry_value = Vec::with_capacity(16 + value_bytes.len());
		entry_value.extend_from_slice(&next_id.to_be_bytes());
		entry_value.extend_from_slice(&value_bytes);
		self.set(&entry_key, EncodedRow(CowVec::new(entry_value)))?;

		// 6. Store reverse index (id -> value_bytes)
		// Note: DictionaryEntryIndexKey currently uses u64, so we truncate
		// This limits practical dictionary size to u64::MAX entries
		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, next_id as u64);
		self.set(&index_key, EncodedRow(CowVec::new(value_bytes)))?;

		// 7. Update sequence
		self.set(&seq_key, EncodedRow(CowVec::new(next_id.to_be_bytes().to_vec())))?;

		DictionaryRowInterceptor::post_insert(self, dictionary, entry_id, &value)?;

		Ok(entry_id)
	}

	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
		// Note: DictionaryEntryIndexKey currently uses u64, so we truncate
		let index_key = DictionaryEntryIndexKey::new(dictionary.id, id.to_u128() as u64).encode();
		match self.get(&index_key)? {
			Some(v) => {
				let value: Value = from_bytes(&v.row)
					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
				Ok(Some(value))
			}
			None => Ok(None),
		}
	}

	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
		let hash = xxh3_128(&value_bytes).0.to_be_bytes();

		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
		match self.get(&entry_key)? {
			Some(v) => {
				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
				Ok(Some(entry_id))
			}
			None => Ok(None),
		}
	}
}

impl DictionaryOperations for AdminTransaction {
	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
		let value = DictionaryRowInterceptor::pre_insert(self, dictionary, value.clone())?;

		// 1. Serialize value and compute hash
		let value_bytes = to_stdvec(&value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
		let hash = xxh3_128(&value_bytes).0.to_be_bytes();

		// 2. Check if value already exists (lookup by hash)
		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
		if let Some(existing) = self.get(&entry_key)? {
			// Value exists, return existing ID
			let id = u128::from_be_bytes(existing.row[..16].try_into().unwrap());
			return DictionaryEntryId::from_u128(id, dictionary.id_type.clone());
		}

		// 3. Value doesn't exist - get next ID from sequence
		let seq_key = DictionarySequenceKey::encoded(dictionary.id);
		let next_id = match self.get(&seq_key)? {
			Some(v) => u128::from_be_bytes(v.row[..16].try_into().unwrap()) + 1,
			None => 1, // First entry
		};

		// 4. Validate the new ID fits in the dictionary's id_type (early check)
		let entry_id = DictionaryEntryId::from_u128(next_id, dictionary.id_type.clone())?;

		// 5. Store the entry (hash -> id + value_bytes)
		let mut entry_value = Vec::with_capacity(16 + value_bytes.len());
		entry_value.extend_from_slice(&next_id.to_be_bytes());
		entry_value.extend_from_slice(&value_bytes);
		self.set(&entry_key, EncodedRow(CowVec::new(entry_value)))?;

		// 6. Store reverse index (id -> value_bytes)
		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, next_id as u64);
		self.set(&index_key, EncodedRow(CowVec::new(value_bytes)))?;

		// 7. Update sequence
		self.set(&seq_key, EncodedRow(CowVec::new(next_id.to_be_bytes().to_vec())))?;

		DictionaryRowInterceptor::post_insert(self, dictionary, entry_id, &value)?;

		// Track for testing::dictionaries::changed()
		self.track_flow_change(Change {
			origin: ChangeOrigin::Shape(ShapeId::Dictionary(dictionary.id)),
			version: CommitVersion(0),
			diffs: vec![Diff::Insert {
				post: Columns::single_row([("value", value)]),
			}],
			changed_at: DateTime::default(),
		});

		Ok(entry_id)
	}

	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
		let index_key = DictionaryEntryIndexKey::new(dictionary.id, id.to_u128() as u64).encode();
		match self.get(&index_key)? {
			Some(v) => {
				let value: Value = from_bytes(&v.row)
					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
				Ok(Some(value))
			}
			None => Ok(None),
		}
	}

	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
		let hash = xxh3_128(&value_bytes).0.to_be_bytes();

		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
		match self.get(&entry_key)? {
			Some(v) => {
				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
				Ok(Some(entry_id))
			}
			None => Ok(None),
		}
	}
}

/// Implementation for Transaction (both Command and Query)
/// This provides read-only access to dictionaries for query operations.
impl DictionaryOperations for Transaction<'_> {
	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
		// Only command and admin transactions can insert
		match self {
			Transaction::Command(cmd) => cmd.insert_into_dictionary(dictionary, value),
			Transaction::Admin(admin) => admin.insert_into_dictionary(dictionary, value),
			Transaction::Test(t) => t.inner.insert_into_dictionary(dictionary, value),
			Transaction::Query(_) => {
				Err(internal_error!("Cannot insert into dictionary during a query transaction"))
			}
			Transaction::Replica(_) => {
				Err(internal_error!("Cannot insert into dictionary during a replica transaction"))
			}
		}
	}

	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
		// Both command and query transactions can read
		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, id.to_u128() as u64);
		match self.get(&index_key)? {
			Some(v) => {
				let value: Value = from_bytes(&v.row)
					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
				Ok(Some(value))
			}
			None => Ok(None),
		}
	}

	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
		// Both command and query transactions can read
		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
		let hash = xxh3_128(&value_bytes).0.to_be_bytes();

		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
		match self.get(&entry_key)? {
			Some(v) => {
				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
				Ok(Some(entry_id))
			}
			None => Ok(None),
		}
	}
}

#[cfg(test)]
pub mod tests {
	use reifydb_core::interface::catalog::{dictionary::Dictionary, id::NamespaceId};
	use reifydb_type::value::{
		Value,
		dictionary::{DictionaryEntryId, DictionaryId},
		r#type::Type,
	};

	use super::DictionaryOperations;
	use crate::test_harness::create_test_admin_transaction;

	fn test_dictionary() -> Dictionary {
		Dictionary {
			id: DictionaryId(1),
			namespace: NamespaceId::SYSTEM,
			name: "test_dict".to_string(),
			value_type: Type::Utf8,
			id_type: Type::Uint8,
		}
	}

	#[test]
	fn test_insert_into_dictionary() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();
		let value = Value::Utf8("hello".to_string());

		let id = txn.insert_into_dictionary(&dict, &value).unwrap();
		assert_eq!(id, DictionaryEntryId::U8(1)); // First entry gets ID 1
	}

	#[test]
	fn test_insert_duplicate_value() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();
		let value = Value::Utf8("hello".to_string());

		let id1 = txn.insert_into_dictionary(&dict, &value).unwrap();
		let id2 = txn.insert_into_dictionary(&dict, &value).unwrap();

		// Same value should return same ID
		assert_eq!(id1, id2);
		assert_eq!(id1, DictionaryEntryId::U8(1));
	}

	#[test]
	fn test_insert_multiple_values() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();

		let id1 = txn.insert_into_dictionary(&dict, &Value::Utf8("hello".to_string())).unwrap();
		let id2 = txn.insert_into_dictionary(&dict, &Value::Utf8("world".to_string())).unwrap();
		let id3 = txn.insert_into_dictionary(&dict, &Value::Utf8("foo".to_string())).unwrap();

		// Different values get sequential IDs
		assert_eq!(id1, DictionaryEntryId::U8(1));
		assert_eq!(id2, DictionaryEntryId::U8(2));
		assert_eq!(id3, DictionaryEntryId::U8(3));
	}

	#[test]
	fn test_get_from_dictionary() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();
		let value = Value::Utf8("hello".to_string());

		let id = txn.insert_into_dictionary(&dict, &value).unwrap();
		let retrieved = txn.get_from_dictionary(&dict, id).unwrap();

		assert_eq!(retrieved, Some(value));
	}

	#[test]
	fn test_get_nonexistent_id() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();

		// Try to get an ID that doesn't exist
		let retrieved = txn.get_from_dictionary(&dict, DictionaryEntryId::U8(999)).unwrap();
		assert_eq!(retrieved, None);
	}

	#[test]
	fn test_find_in_dictionary() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();
		let value = Value::Utf8("hello".to_string());

		// First insert a value
		let id = txn.insert_into_dictionary(&dict, &value).unwrap();

		// Then find should locate it
		let found = txn.find_in_dictionary(&dict, &value).unwrap();
		assert_eq!(found, Some(id));
	}

	#[test]
	fn test_find_nonexistent_value() {
		let mut txn = create_test_admin_transaction();
		let dict = test_dictionary();
		let value = Value::Utf8("not_inserted".to_string());

		// Find without inserting should return None
		let found = txn.find_in_dictionary(&dict, &value).unwrap();
		assert_eq!(found, None);
	}

	#[test]
	fn test_dictionary_with_uint1_id() {
		let mut txn = create_test_admin_transaction();
		let dict = Dictionary {
			id: DictionaryId(2),
			namespace: NamespaceId::SYSTEM,
			name: "dict_u1".to_string(),
			value_type: Type::Utf8,
			id_type: Type::Uint1,
		};

		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
		assert_eq!(id, DictionaryEntryId::U1(1));
		assert_eq!(id.id_type(), Type::Uint1);
	}

	#[test]
	fn test_dictionary_with_uint2_id() {
		let mut txn = create_test_admin_transaction();
		let dict = Dictionary {
			id: DictionaryId(3),
			namespace: NamespaceId::SYSTEM,
			name: "dict_u2".to_string(),
			value_type: Type::Utf8,
			id_type: Type::Uint2,
		};

		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
		assert_eq!(id, DictionaryEntryId::U2(1));
		assert_eq!(id.id_type(), Type::Uint2);
	}

	#[test]
	fn test_dictionary_with_uint4_id() {
		let mut txn = create_test_admin_transaction();
		let dict = Dictionary {
			id: DictionaryId(4),
			namespace: NamespaceId::SYSTEM,
			name: "dict_u4".to_string(),
			value_type: Type::Utf8,
			id_type: Type::Uint4,
		};

		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
		assert_eq!(id, DictionaryEntryId::U4(1));
		assert_eq!(id.id_type(), Type::Uint4);
	}
}