Skip to main content

reifydb_engine/transaction/operation/
dictionary.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use postcard::{from_bytes, to_stdvec};
5use reifydb_core::{
6	common::CommitVersion,
7	encoded::row::EncodedRow,
8	interface::{
9		catalog::{dictionary::Dictionary, shape::ShapeId},
10		change::{Change, ChangeOrigin, Diff},
11	},
12	internal_error,
13	key::{
14		EncodableKey,
15		dictionary::{DictionaryEntryIndexKey, DictionaryEntryKey, DictionarySequenceKey},
16	},
17	value::column::columns::Columns,
18};
19use reifydb_runtime::hash::xxh3_128;
20use reifydb_transaction::{
21	interceptor::dictionary_row::DictionaryRowInterceptor,
22	transaction::{Transaction, admin::AdminTransaction, command::CommandTransaction},
23};
24use reifydb_type::{
25	util::cowvec::CowVec,
26	value::{Value, datetime::DateTime, dictionary::DictionaryEntryId},
27};
28
29use crate::Result;
30
31pub(crate) trait DictionaryOperations {
32	/// Insert a value into the dictionary, returning its ID.
33	/// If the value already exists, returns the existing ID.
34	/// If the value is new, assigns a new ID and stores it.
35	/// The returned ID type matches the dictionary's `id_type`.
36	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId>;
37
38	/// Get a value from the dictionary by its ID.
39	/// Returns None if the ID doesn't exist.
40	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>>;
41
42	/// Find the ID of a value in the dictionary without inserting.
43	/// Returns the ID if the value exists, None otherwise.
44	/// The returned ID type matches the dictionary's `id_type`.
45	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>>;
46}
47
48impl DictionaryOperations for CommandTransaction {
49	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
50		let value = DictionaryRowInterceptor::pre_insert(self, dictionary, value.clone())?;
51
52		// 1. Serialize value and compute hash
53		let value_bytes = to_stdvec(&value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
54		let hash = xxh3_128(&value_bytes).0.to_be_bytes();
55
56		// 2. Check if value already exists (lookup by hash)
57		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
58		if let Some(existing) = self.get(&entry_key)? {
59			// Value exists, return existing ID
60			let id = u128::from_be_bytes(existing.row[..16].try_into().unwrap());
61			return DictionaryEntryId::from_u128(id, dictionary.id_type.clone());
62		}
63
64		// 3. Value doesn't exist - get next ID from sequence
65		let seq_key = DictionarySequenceKey::encoded(dictionary.id);
66		let next_id = match self.get(&seq_key)? {
67			Some(v) => u128::from_be_bytes(v.row[..16].try_into().unwrap()) + 1,
68			None => 1, // First entry
69		};
70
71		// 4. Validate the new ID fits in the dictionary's id_type (early check)
72		let entry_id = DictionaryEntryId::from_u128(next_id, dictionary.id_type.clone())?;
73
74		// 5. Store the entry (hash -> id + value_bytes)
75		let mut entry_value = Vec::with_capacity(16 + value_bytes.len());
76		entry_value.extend_from_slice(&next_id.to_be_bytes());
77		entry_value.extend_from_slice(&value_bytes);
78		self.set(&entry_key, EncodedRow(CowVec::new(entry_value)))?;
79
80		// 6. Store reverse index (id -> value_bytes)
81		// Note: DictionaryEntryIndexKey currently uses u64, so we truncate
82		// This limits practical dictionary size to u64::MAX entries
83		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, next_id as u64);
84		self.set(&index_key, EncodedRow(CowVec::new(value_bytes)))?;
85
86		// 7. Update sequence
87		self.set(&seq_key, EncodedRow(CowVec::new(next_id.to_be_bytes().to_vec())))?;
88
89		DictionaryRowInterceptor::post_insert(self, dictionary, entry_id, &value)?;
90
91		Ok(entry_id)
92	}
93
94	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
95		// Note: DictionaryEntryIndexKey currently uses u64, so we truncate
96		let index_key = DictionaryEntryIndexKey::new(dictionary.id, id.to_u128() as u64).encode();
97		match self.get(&index_key)? {
98			Some(v) => {
99				let value: Value = from_bytes(&v.row)
100					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
101				Ok(Some(value))
102			}
103			None => Ok(None),
104		}
105	}
106
107	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
108		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
109		let hash = xxh3_128(&value_bytes).0.to_be_bytes();
110
111		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
112		match self.get(&entry_key)? {
113			Some(v) => {
114				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
115				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
116				Ok(Some(entry_id))
117			}
118			None => Ok(None),
119		}
120	}
121}
122
123impl DictionaryOperations for AdminTransaction {
124	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
125		let value = DictionaryRowInterceptor::pre_insert(self, dictionary, value.clone())?;
126
127		// 1. Serialize value and compute hash
128		let value_bytes = to_stdvec(&value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
129		let hash = xxh3_128(&value_bytes).0.to_be_bytes();
130
131		// 2. Check if value already exists (lookup by hash)
132		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
133		if let Some(existing) = self.get(&entry_key)? {
134			// Value exists, return existing ID
135			let id = u128::from_be_bytes(existing.row[..16].try_into().unwrap());
136			return DictionaryEntryId::from_u128(id, dictionary.id_type.clone());
137		}
138
139		// 3. Value doesn't exist - get next ID from sequence
140		let seq_key = DictionarySequenceKey::encoded(dictionary.id);
141		let next_id = match self.get(&seq_key)? {
142			Some(v) => u128::from_be_bytes(v.row[..16].try_into().unwrap()) + 1,
143			None => 1, // First entry
144		};
145
146		// 4. Validate the new ID fits in the dictionary's id_type (early check)
147		let entry_id = DictionaryEntryId::from_u128(next_id, dictionary.id_type.clone())?;
148
149		// 5. Store the entry (hash -> id + value_bytes)
150		let mut entry_value = Vec::with_capacity(16 + value_bytes.len());
151		entry_value.extend_from_slice(&next_id.to_be_bytes());
152		entry_value.extend_from_slice(&value_bytes);
153		self.set(&entry_key, EncodedRow(CowVec::new(entry_value)))?;
154
155		// 6. Store reverse index (id -> value_bytes)
156		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, next_id as u64);
157		self.set(&index_key, EncodedRow(CowVec::new(value_bytes)))?;
158
159		// 7. Update sequence
160		self.set(&seq_key, EncodedRow(CowVec::new(next_id.to_be_bytes().to_vec())))?;
161
162		DictionaryRowInterceptor::post_insert(self, dictionary, entry_id, &value)?;
163
164		// Track for testing::dictionaries::changed()
165		self.track_flow_change(Change {
166			origin: ChangeOrigin::Shape(ShapeId::Dictionary(dictionary.id)),
167			version: CommitVersion(0),
168			diffs: vec![Diff::Insert {
169				post: Columns::single_row([("value", value)]),
170			}],
171			changed_at: DateTime::default(),
172		});
173
174		Ok(entry_id)
175	}
176
177	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
178		let index_key = DictionaryEntryIndexKey::new(dictionary.id, id.to_u128() as u64).encode();
179		match self.get(&index_key)? {
180			Some(v) => {
181				let value: Value = from_bytes(&v.row)
182					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
183				Ok(Some(value))
184			}
185			None => Ok(None),
186		}
187	}
188
189	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
190		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
191		let hash = xxh3_128(&value_bytes).0.to_be_bytes();
192
193		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
194		match self.get(&entry_key)? {
195			Some(v) => {
196				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
197				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
198				Ok(Some(entry_id))
199			}
200			None => Ok(None),
201		}
202	}
203}
204
205/// Implementation for Transaction (both Command and Query)
206/// This provides read-only access to dictionaries for query operations.
207impl DictionaryOperations for Transaction<'_> {
208	fn insert_into_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<DictionaryEntryId> {
209		// Only command and admin transactions can insert
210		match self {
211			Transaction::Command(cmd) => cmd.insert_into_dictionary(dictionary, value),
212			Transaction::Admin(admin) => admin.insert_into_dictionary(dictionary, value),
213			Transaction::Test(t) => t.inner.insert_into_dictionary(dictionary, value),
214			Transaction::Query(_) => {
215				Err(internal_error!("Cannot insert into dictionary during a query transaction"))
216			}
217			Transaction::Replica(_) => {
218				Err(internal_error!("Cannot insert into dictionary during a replica transaction"))
219			}
220		}
221	}
222
223	fn get_from_dictionary(&mut self, dictionary: &Dictionary, id: DictionaryEntryId) -> Result<Option<Value>> {
224		// Both command and query transactions can read
225		let index_key = DictionaryEntryIndexKey::encoded(dictionary.id, id.to_u128() as u64);
226		match self.get(&index_key)? {
227			Some(v) => {
228				let value: Value = from_bytes(&v.row)
229					.map_err(|e| internal_error!("Failed to deserialize value: {}", e))?;
230				Ok(Some(value))
231			}
232			None => Ok(None),
233		}
234	}
235
236	fn find_in_dictionary(&mut self, dictionary: &Dictionary, value: &Value) -> Result<Option<DictionaryEntryId>> {
237		// Both command and query transactions can read
238		let value_bytes = to_stdvec(value).map_err(|e| internal_error!("Failed to serialize value: {}", e))?;
239		let hash = xxh3_128(&value_bytes).0.to_be_bytes();
240
241		let entry_key = DictionaryEntryKey::encoded(dictionary.id, hash);
242		match self.get(&entry_key)? {
243			Some(v) => {
244				let id = u128::from_be_bytes(v.row[..16].try_into().unwrap());
245				let entry_id = DictionaryEntryId::from_u128(id, dictionary.id_type.clone())?;
246				Ok(Some(entry_id))
247			}
248			None => Ok(None),
249		}
250	}
251}
252
253#[cfg(test)]
254pub mod tests {
255	use reifydb_core::interface::catalog::{dictionary::Dictionary, id::NamespaceId};
256	use reifydb_type::value::{
257		Value,
258		dictionary::{DictionaryEntryId, DictionaryId},
259		r#type::Type,
260	};
261
262	use super::DictionaryOperations;
263	use crate::test_harness::create_test_admin_transaction;
264
265	fn test_dictionary() -> Dictionary {
266		Dictionary {
267			id: DictionaryId(1),
268			namespace: NamespaceId::SYSTEM,
269			name: "test_dict".to_string(),
270			value_type: Type::Utf8,
271			id_type: Type::Uint8,
272		}
273	}
274
275	#[test]
276	fn test_insert_into_dictionary() {
277		let mut txn = create_test_admin_transaction();
278		let dict = test_dictionary();
279		let value = Value::Utf8("hello".to_string());
280
281		let id = txn.insert_into_dictionary(&dict, &value).unwrap();
282		assert_eq!(id, DictionaryEntryId::U8(1)); // First entry gets ID 1
283	}
284
285	#[test]
286	fn test_insert_duplicate_value() {
287		let mut txn = create_test_admin_transaction();
288		let dict = test_dictionary();
289		let value = Value::Utf8("hello".to_string());
290
291		let id1 = txn.insert_into_dictionary(&dict, &value).unwrap();
292		let id2 = txn.insert_into_dictionary(&dict, &value).unwrap();
293
294		// Same value should return same ID
295		assert_eq!(id1, id2);
296		assert_eq!(id1, DictionaryEntryId::U8(1));
297	}
298
299	#[test]
300	fn test_insert_multiple_values() {
301		let mut txn = create_test_admin_transaction();
302		let dict = test_dictionary();
303
304		let id1 = txn.insert_into_dictionary(&dict, &Value::Utf8("hello".to_string())).unwrap();
305		let id2 = txn.insert_into_dictionary(&dict, &Value::Utf8("world".to_string())).unwrap();
306		let id3 = txn.insert_into_dictionary(&dict, &Value::Utf8("foo".to_string())).unwrap();
307
308		// Different values get sequential IDs
309		assert_eq!(id1, DictionaryEntryId::U8(1));
310		assert_eq!(id2, DictionaryEntryId::U8(2));
311		assert_eq!(id3, DictionaryEntryId::U8(3));
312	}
313
314	#[test]
315	fn test_get_from_dictionary() {
316		let mut txn = create_test_admin_transaction();
317		let dict = test_dictionary();
318		let value = Value::Utf8("hello".to_string());
319
320		let id = txn.insert_into_dictionary(&dict, &value).unwrap();
321		let retrieved = txn.get_from_dictionary(&dict, id).unwrap();
322
323		assert_eq!(retrieved, Some(value));
324	}
325
326	#[test]
327	fn test_get_nonexistent_id() {
328		let mut txn = create_test_admin_transaction();
329		let dict = test_dictionary();
330
331		// Try to get an ID that doesn't exist
332		let retrieved = txn.get_from_dictionary(&dict, DictionaryEntryId::U8(999)).unwrap();
333		assert_eq!(retrieved, None);
334	}
335
336	#[test]
337	fn test_find_in_dictionary() {
338		let mut txn = create_test_admin_transaction();
339		let dict = test_dictionary();
340		let value = Value::Utf8("hello".to_string());
341
342		// First insert a value
343		let id = txn.insert_into_dictionary(&dict, &value).unwrap();
344
345		// Then find should locate it
346		let found = txn.find_in_dictionary(&dict, &value).unwrap();
347		assert_eq!(found, Some(id));
348	}
349
350	#[test]
351	fn test_find_nonexistent_value() {
352		let mut txn = create_test_admin_transaction();
353		let dict = test_dictionary();
354		let value = Value::Utf8("not_inserted".to_string());
355
356		// Find without inserting should return None
357		let found = txn.find_in_dictionary(&dict, &value).unwrap();
358		assert_eq!(found, None);
359	}
360
361	#[test]
362	fn test_dictionary_with_uint1_id() {
363		let mut txn = create_test_admin_transaction();
364		let dict = Dictionary {
365			id: DictionaryId(2),
366			namespace: NamespaceId::SYSTEM,
367			name: "dict_u1".to_string(),
368			value_type: Type::Utf8,
369			id_type: Type::Uint1,
370		};
371
372		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
373		assert_eq!(id, DictionaryEntryId::U1(1));
374		assert_eq!(id.id_type(), Type::Uint1);
375	}
376
377	#[test]
378	fn test_dictionary_with_uint2_id() {
379		let mut txn = create_test_admin_transaction();
380		let dict = Dictionary {
381			id: DictionaryId(3),
382			namespace: NamespaceId::SYSTEM,
383			name: "dict_u2".to_string(),
384			value_type: Type::Utf8,
385			id_type: Type::Uint2,
386		};
387
388		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
389		assert_eq!(id, DictionaryEntryId::U2(1));
390		assert_eq!(id.id_type(), Type::Uint2);
391	}
392
393	#[test]
394	fn test_dictionary_with_uint4_id() {
395		let mut txn = create_test_admin_transaction();
396		let dict = Dictionary {
397			id: DictionaryId(4),
398			namespace: NamespaceId::SYSTEM,
399			name: "dict_u4".to_string(),
400			value_type: Type::Utf8,
401			id_type: Type::Uint4,
402		};
403
404		let id = txn.insert_into_dictionary(&dict, &Value::Utf8("test".to_string())).unwrap();
405		assert_eq!(id, DictionaryEntryId::U4(1));
406		assert_eq!(id.id_type(), Type::Uint4);
407	}
408}