bonsaidb_core/document/
id.rs1use std::borrow::Cow;
2use std::fmt::{Display, Write};
3use std::hash::Hash;
4use std::mem::size_of;
5use std::ops::Deref;
6use std::str::FromStr;
7
8use actionable::Identifier;
9use serde::de::Visitor;
10use serde::{Deserialize, Serialize};
11use tinyvec::{Array, TinyVec};
12
13use crate::key::{ByteSource, Key, KeyEncoding, KeyKind, KeyVisitor};
14
15#[derive(Default, Ord, Hash, Eq, PartialEq, PartialOrd, Clone)]
17pub struct DocumentId(TinyVec<[u8; Self::INLINE_SIZE]>);
18
19impl Deref for DocumentId {
20 type Target = [u8];
21
22 fn deref(&self) -> &[u8] {
23 &self.0
24 }
25}
26
27impl std::fmt::Debug for DocumentId {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 f.write_str("DocumentId(")?;
30 arc_bytes::print_bytes(self, f)?;
31 f.write_char(')')
32 }
33}
34
35impl Display for DocumentId {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 if let Ok(string) = std::str::from_utf8(self.as_ref()) {
38 if string.bytes().all(|b| (32..=127).contains(&b)) {
39 return f.write_str(string);
40 }
41 }
42
43 if let Some((first_nonzero_byte, _)) = self
44 .as_ref()
45 .iter()
46 .copied()
47 .enumerate()
48 .find(|(_index, b)| *b != 0)
49 {
50 if first_nonzero_byte > 0 {
51 write!(f, "{first_nonzero_byte:x}$")?;
52 } else {
53 f.write_char('$')?;
54 }
55
56 for (index, byte) in self[first_nonzero_byte..].iter().enumerate() {
57 if index > 0 {
58 write!(f, "{byte:02x}")?;
59 } else {
60 write!(f, "{byte:x}")?;
61 }
62 }
63 Ok(())
64 } else {
65 write!(f, "{:x}$", self.len())
67 }
68 }
69}
70
71impl<'a> From<DocumentId> for Identifier<'a> {
72 fn from(id: DocumentId) -> Self {
73 Identifier::from(id.to_vec())
74 }
75}
76
77impl<'a> From<&'a DocumentId> for Identifier<'a> {
78 fn from(id: &'a DocumentId) -> Self {
79 Identifier::from(&**id)
80 }
81}
82
83#[test]
84fn document_id_identifier_tests() {
85 assert_eq!(
86 Identifier::from(DocumentId::new("hello").unwrap()),
87 Identifier::from("hello")
88 );
89 assert_eq!(
90 Identifier::from(DocumentId::from_u64(1)),
91 Identifier::from(1)
92 );
93}
94
95#[derive(thiserror::Error, Debug)]
97#[error("invalid hexadecimal bytes")]
98pub struct InvalidHexadecimal;
99
100const fn decode_hex_nibble(byte: u8) -> Result<u8, InvalidHexadecimal> {
101 match byte {
102 b'0'..=b'9' => Ok(byte - b'0'),
103 b'A'..=b'F' => Ok(byte - b'A' + 10),
104 b'a'..=b'f' => Ok(byte - b'a' + 10),
105 _ => Err(InvalidHexadecimal),
106 }
107}
108
109impl FromStr for DocumentId {
110 type Err = crate::Error;
111
112 fn from_str(s: &str) -> Result<Self, Self::Err> {
113 if s.is_empty() {
114 return Ok(Self::default());
115 }
116
117 let bytes = s.as_bytes();
118 if let Some((pound_offset, _)) = s.bytes().enumerate().find(|(_index, b)| *b == b'$') {
119 if pound_offset > 5 {
120 return Err(crate::Error::DocumentIdTooLong);
121 }
122
123 let preceding_zeroes = if pound_offset > 0 {
124 let mut length = TinyVec::<[u8; 1]>::new();
125 decode_big_endian_hex(&bytes[0..pound_offset], &mut length)?;
126 let mut zeroes = [0_u8; size_of::<usize>()];
127 let offset = zeroes.len() - length.len();
128 zeroes[offset..].copy_from_slice(&length);
129 usize::from_be_bytes(zeroes)
130 } else {
131 0
132 };
133
134 let mut id = TinyVec::new();
135 decode_big_endian_hex(&bytes[pound_offset + 1..], &mut id)?;
136 if preceding_zeroes > 0 {
137 let total_length = preceding_zeroes + id.len();
138 if total_length > Self::MAX_LENGTH {
139 return Err(crate::Error::DocumentIdTooLong);
140 }
141 id.splice(0..0, std::iter::repeat(0).take(preceding_zeroes));
143 }
144 Ok(Self(id))
145 } else if bytes.len() > Self::MAX_LENGTH {
146 Err(crate::Error::DocumentIdTooLong)
147 } else {
148 Self::try_from(bytes)
150 }
151 }
152}
153
154fn decode_big_endian_hex<A: Array<Item = u8>>(
155 bytes: &[u8],
156 output: &mut TinyVec<A>,
157) -> Result<(), crate::Error> {
158 let mut chunks = if bytes.len() & 1 == 0 {
159 bytes.chunks_exact(2)
160 } else {
161 output.push(decode_hex_nibble(bytes[0])?);
163 bytes[1..].chunks_exact(2)
164 };
165 for chunk in &mut chunks {
166 let upper = decode_hex_nibble(chunk[0])?;
167 let lower = decode_hex_nibble(chunk[1])?;
168 output.push(upper << 4 | lower);
169 }
170 if !chunks.remainder().is_empty() {
171 return Err(crate::Error::from(InvalidHexadecimal));
172 }
173 Ok(())
174}
175
176#[test]
177fn document_id_parsing() {
178 fn test_id(bytes: &[u8], display: &str) {
179 let id = DocumentId::try_from(bytes).unwrap();
180 let as_string = id.to_string();
181 assert_eq!(as_string, display);
182 let parsed = DocumentId::from_str(&as_string).unwrap();
183 assert_eq!(&*parsed, bytes);
184 }
185
186 test_id(b"hello", "hello");
187 test_id(b"\x00\x0a\xaf\xfa", "1$aaffa");
188 test_id(&1_u128.to_be_bytes(), "f$1");
189 test_id(&17_u8.to_be_bytes(), "$11");
190 test_id(&[0_u8; 63], "3f$");
191 test_id(
195 &vec![0_u8; DocumentId::MAX_LENGTH],
196 &format!("{:x}$", DocumentId::MAX_LENGTH),
197 );
198}
199
200impl<'a> TryFrom<&'a [u8]> for DocumentId {
201 type Error = crate::Error;
202
203 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
204 if bytes.len() <= Self::MAX_LENGTH {
205 Ok(Self(TinyVec::from(bytes)))
206 } else {
207 Err(crate::Error::DocumentIdTooLong)
208 }
209 }
210}
211
212impl<'a> TryFrom<Cow<'a, [u8]>> for DocumentId {
213 type Error = crate::Error;
214
215 fn try_from(bytes: Cow<'a, [u8]>) -> Result<Self, Self::Error> {
216 Self::try_from(bytes.as_ref())
217 }
218}
219
220impl<const N: usize> TryFrom<[u8; N]> for DocumentId {
221 type Error = crate::Error;
222
223 fn try_from(bytes: [u8; N]) -> Result<Self, Self::Error> {
224 Self::try_from(&bytes[..])
225 }
226}
227
228impl DocumentId {
229 const INLINE_SIZE: usize = 16;
230 pub const MAX_LENGTH: usize = 65_535;
232
233 pub fn new<PrimaryKey: for<'k> Key<'k>, PrimaryKeyRef: KeyEncoding<PrimaryKey> + ?Sized>(
235 value: &PrimaryKeyRef,
236 ) -> Result<Self, crate::Error> {
237 let bytes = value
238 .as_ord_bytes()
239 .map_err(|err| crate::Error::other("key serialization", err))?;
240 Self::try_from(&bytes[..])
241 }
242
243 #[must_use]
247 #[allow(clippy::missing_panics_doc)] pub fn from_u64(id: u64) -> Self {
249 Self::try_from(&id.to_be_bytes()[..]).unwrap()
250 }
251
252 #[must_use]
256 #[allow(clippy::missing_panics_doc)] pub fn from_u32(id: u32) -> Self {
258 Self::try_from(&id.to_be_bytes()[..]).unwrap()
259 }
260
261 pub fn deserialize<'k, PrimaryKey: Key<'k>>(&'k self) -> Result<PrimaryKey, crate::Error> {
263 PrimaryKey::from_ord_bytes(ByteSource::Borrowed(self.as_ref()))
264 .map_err(|err| crate::Error::other("key serialization", err))
265 }
266}
267
268impl Serialize for DocumentId {
269 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
270 where
271 S: serde::Serializer,
272 {
273 serializer.serialize_bytes(self.as_ref())
274 }
275}
276
277impl<'de> Deserialize<'de> for DocumentId {
278 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
279 where
280 D: serde::Deserializer<'de>,
281 {
282 deserializer.deserialize_byte_buf(DocumentIdVisitor)
283 }
284}
285
286struct DocumentIdVisitor;
287
288impl<'de> Visitor<'de> for DocumentIdVisitor {
289 type Value = DocumentId;
290
291 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292 formatter.write_str("a document id (bytes)")
293 }
294
295 fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
296 where
297 E: serde::de::Error,
298 {
299 Ok(DocumentId(TinyVec::from(v)))
300 }
301}
302
303impl<'k> Key<'k> for DocumentId {
304 const CAN_OWN_BYTES: bool = false;
305
306 fn from_ord_bytes<'e>(bytes: ByteSource<'k, 'e>) -> Result<Self, Self::Error> {
307 Self::try_from(bytes.as_ref())
308 }
309}
310
311impl<PrimaryKey> KeyEncoding<PrimaryKey> for DocumentId
312where
313 PrimaryKey: for<'pk> Key<'pk>,
314{
315 type Error = crate::Error;
316
317 const LENGTH: Option<usize> = None;
318
319 fn describe<Visitor>(visitor: &mut Visitor)
320 where
321 Visitor: KeyVisitor,
322 {
323 visitor.visit_type(KeyKind::Bytes);
324 }
325
326 fn as_ord_bytes(&self) -> Result<Cow<'_, [u8]>, Self::Error> {
327 Ok(Cow::Borrowed(self))
328 }
329}