1use std::collections::BTreeMap;
13
14pub const GRAPH_ADJACENCY_MAGIC: &[u8; 4] = b"RDGA";
16pub const FULLTEXT_INDEX_MAGIC: &[u8; 4] = b"RDFT";
18pub const DOC_PATHVALUE_MAGIC: &[u8; 4] = b"RDDP";
20
21#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum NativeArtifactError {
24 InvalidHeader(&'static str),
26 Truncated(&'static str),
28 InvalidUtf8,
30 EntryCountMismatch,
32}
33
34impl std::fmt::Display for NativeArtifactError {
35 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 match self {
37 NativeArtifactError::InvalidHeader(m) => write!(f, "{}", m),
38 NativeArtifactError::Truncated(m) => write!(f, "{}", m),
39 NativeArtifactError::InvalidUtf8 => write!(f, "invalid utf-8 in native artifact"),
40 NativeArtifactError::EntryCountMismatch => {
41 write!(f, "document path/value artifact entry count mismatch")
42 }
43 }
44 }
45}
46
47impl std::error::Error for NativeArtifactError {}
48
49fn push_string(buf: &mut Vec<u8>, value: &str) {
50 let bytes = value.as_bytes();
51 buf.extend_from_slice(&(bytes.len() as u32).to_le_bytes());
52 buf.extend_from_slice(bytes);
53}
54
55fn read_string(bytes: &[u8], pos: &mut usize) -> Result<String, NativeArtifactError> {
56 if *pos + 4 > bytes.len() {
57 return Err(NativeArtifactError::Truncated(
58 "truncated native artifact string length",
59 ));
60 }
61 let len = u32::from_le_bytes([
62 bytes[*pos],
63 bytes[*pos + 1],
64 bytes[*pos + 2],
65 bytes[*pos + 3],
66 ]) as usize;
67 *pos += 4;
68 if *pos + len > bytes.len() {
69 return Err(NativeArtifactError::Truncated(
70 "truncated native artifact string content",
71 ));
72 }
73 let value = std::str::from_utf8(&bytes[*pos..*pos + len])
74 .map_err(|_| NativeArtifactError::InvalidUtf8)?
75 .to_string();
76 *pos += len;
77 Ok(value)
78}
79
80fn read_u32(bytes: &[u8], pos: &mut usize, ctx: &'static str) -> Result<u32, NativeArtifactError> {
81 if *pos + 4 > bytes.len() {
82 return Err(NativeArtifactError::Truncated(ctx));
83 }
84 let value = u32::from_le_bytes([
85 bytes[*pos],
86 bytes[*pos + 1],
87 bytes[*pos + 2],
88 bytes[*pos + 3],
89 ]);
90 *pos += 4;
91 Ok(value)
92}
93
94fn read_u64(bytes: &[u8], pos: &mut usize, ctx: &'static str) -> Result<u64, NativeArtifactError> {
95 if *pos + 8 > bytes.len() {
96 return Err(NativeArtifactError::Truncated(ctx));
97 }
98 let value = u64::from_le_bytes([
99 bytes[*pos],
100 bytes[*pos + 1],
101 bytes[*pos + 2],
102 bytes[*pos + 3],
103 bytes[*pos + 4],
104 bytes[*pos + 5],
105 bytes[*pos + 6],
106 bytes[*pos + 7],
107 ]);
108 *pos += 8;
109 Ok(value)
110}
111
112#[derive(Debug, Clone, PartialEq)]
118pub struct GraphAdjacencyEdge {
119 pub edge_id: u64,
121 pub from_node: String,
123 pub to_node: String,
125 pub label: String,
127 pub weight: f32,
129}
130
131pub fn encode_graph_adjacency(edges: &[GraphAdjacencyEdge]) -> Vec<u8> {
133 let mut data = Vec::with_capacity(32 + edges.len() * 48);
134 data.extend_from_slice(GRAPH_ADJACENCY_MAGIC);
135 data.extend_from_slice(&(edges.len() as u32).to_le_bytes());
136 for edge in edges {
137 data.extend_from_slice(&edge.edge_id.to_le_bytes());
138 push_string(&mut data, &edge.from_node);
139 push_string(&mut data, &edge.to_node);
140 push_string(&mut data, &edge.label);
141 data.extend_from_slice(&edge.weight.to_le_bytes());
142 }
143 data
144}
145
146pub fn decode_graph_adjacency(
148 bytes: &[u8],
149) -> Result<Vec<GraphAdjacencyEdge>, NativeArtifactError> {
150 if bytes.len() < 8 || &bytes[0..4] != GRAPH_ADJACENCY_MAGIC {
151 return Err(NativeArtifactError::InvalidHeader(
152 "invalid graph adjacency artifact",
153 ));
154 }
155 let mut pos = 4usize;
156 let edge_count = read_u32(bytes, &mut pos, "truncated graph adjacency artifact")? as usize;
157 let mut edges = Vec::with_capacity(edge_count);
158 for _ in 0..edge_count {
159 let edge_id = read_u64(bytes, &mut pos, "truncated graph adjacency artifact")?;
160 let from_node = read_string(bytes, &mut pos)?;
161 let to_node = read_string(bytes, &mut pos)?;
162 let label = read_string(bytes, &mut pos)?;
163 if pos + 4 > bytes.len() {
164 return Err(NativeArtifactError::Truncated(
165 "truncated graph adjacency artifact weight",
166 ));
167 }
168 let weight =
169 f32::from_le_bytes([bytes[pos], bytes[pos + 1], bytes[pos + 2], bytes[pos + 3]]);
170 pos += 4;
171 edges.push(GraphAdjacencyEdge {
172 edge_id,
173 from_node,
174 to_node,
175 label,
176 weight,
177 });
178 }
179 Ok(edges)
180}
181
182#[derive(Debug, Clone, PartialEq)]
188pub struct FulltextIndex {
189 pub collection: String,
191 pub doc_count: u32,
193 pub postings: BTreeMap<String, Vec<(u64, u32)>>,
195}
196
197pub fn encode_fulltext_index(
203 collection: &str,
204 doc_count: usize,
205 postings: &BTreeMap<String, Vec<(u64, u32)>>,
206) -> Vec<u8> {
207 let mut data = Vec::with_capacity(64 + postings.len() * 32);
208 data.extend_from_slice(FULLTEXT_INDEX_MAGIC);
209 push_string(&mut data, collection);
210 data.extend_from_slice(&(doc_count as u32).to_le_bytes());
211 data.extend_from_slice(&(postings.len() as u32).to_le_bytes());
212 for (term, entries) in postings {
213 push_string(&mut data, term);
214 data.extend_from_slice(&(entries.len() as u32).to_le_bytes());
215 for (entity_id, term_count) in entries {
216 data.extend_from_slice(&entity_id.to_le_bytes());
217 data.extend_from_slice(&term_count.to_le_bytes());
218 }
219 }
220 data
221}
222
223pub fn decode_fulltext_index(bytes: &[u8]) -> Result<FulltextIndex, NativeArtifactError> {
225 if bytes.len() < 12 || &bytes[0..4] != FULLTEXT_INDEX_MAGIC {
226 return Err(NativeArtifactError::InvalidHeader(
227 "invalid fulltext artifact",
228 ));
229 }
230 let mut pos = 4usize;
231 let collection = read_string(bytes, &mut pos)?;
232 let doc_count = read_u32(bytes, &mut pos, "truncated fulltext artifact")?;
233 let term_count = read_u32(bytes, &mut pos, "truncated fulltext artifact")? as usize;
234 let mut postings = BTreeMap::new();
235 for _ in 0..term_count {
236 let term = read_string(bytes, &mut pos)?;
237 let entry_count = read_u32(bytes, &mut pos, "truncated fulltext posting count")? as usize;
238 let mut entries = Vec::with_capacity(entry_count);
239 for _ in 0..entry_count {
240 let entity_id = read_u64(bytes, &mut pos, "truncated fulltext postings")?;
241 let term_freq = read_u32(bytes, &mut pos, "truncated fulltext postings")?;
242 entries.push((entity_id, term_freq));
243 }
244 postings.insert(term, entries);
245 }
246 Ok(FulltextIndex {
247 collection,
248 doc_count,
249 postings,
250 })
251}
252
253#[derive(Debug, Clone, PartialEq)]
259pub struct DocPathValueRecord {
260 pub entity_id: u64,
262 pub entries: Vec<(String, String)>,
264}
265
266#[derive(Debug, Clone, PartialEq)]
268pub struct DocPathValueIndex {
269 pub collection: String,
271 pub documents: Vec<DocPathValueRecord>,
273}
274
275pub fn encode_document_pathvalue(collection: &str, documents: &[DocPathValueRecord]) -> Vec<u8> {
280 let total_entries: usize = documents.iter().map(|doc| doc.entries.len()).sum();
281 let mut data = Vec::with_capacity(64 + total_entries * 48);
282 data.extend_from_slice(DOC_PATHVALUE_MAGIC);
283 push_string(&mut data, collection);
284 data.extend_from_slice(&(documents.len() as u32).to_le_bytes());
285 data.extend_from_slice(&(total_entries as u32).to_le_bytes());
286 for doc in documents {
287 data.extend_from_slice(&doc.entity_id.to_le_bytes());
288 data.extend_from_slice(&(doc.entries.len() as u32).to_le_bytes());
289 for (path, value) in &doc.entries {
290 push_string(&mut data, path);
291 push_string(&mut data, value);
292 }
293 }
294 data
295}
296
297pub fn decode_document_pathvalue(bytes: &[u8]) -> Result<DocPathValueIndex, NativeArtifactError> {
300 if bytes.len() < 12 || &bytes[0..4] != DOC_PATHVALUE_MAGIC {
301 return Err(NativeArtifactError::InvalidHeader(
302 "invalid document path/value artifact",
303 ));
304 }
305 let mut pos = 4usize;
306 let collection = read_string(bytes, &mut pos)?;
307 let doc_count = read_u32(bytes, &mut pos, "truncated document path/value artifact")? as usize;
308 let total_entries = read_u32(bytes, &mut pos, "truncated document path/value artifact")? as u64;
309 let mut documents = Vec::with_capacity(doc_count);
310 let mut seen_entries = 0u64;
311 for _ in 0..doc_count {
312 let entity_id = read_u64(bytes, &mut pos, "truncated document path/value record")?;
313 let entry_count =
314 read_u32(bytes, &mut pos, "truncated document path/value record")? as usize;
315 let mut entries = Vec::with_capacity(entry_count);
316 for _ in 0..entry_count {
317 let path = read_string(bytes, &mut pos)?;
318 let value = read_string(bytes, &mut pos)?;
319 entries.push((path, value));
320 seen_entries += 1;
321 }
322 documents.push(DocPathValueRecord { entity_id, entries });
323 }
324 if seen_entries != total_entries {
325 return Err(NativeArtifactError::EntryCountMismatch);
326 }
327 Ok(DocPathValueIndex {
328 collection,
329 documents,
330 })
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336
337 #[test]
338 fn graph_adjacency_round_trip() {
339 let edges = vec![
340 GraphAdjacencyEdge {
341 edge_id: 42,
342 from_node: "a".to_string(),
343 to_node: "b".to_string(),
344 label: "knows".to_string(),
345 weight: 0.5,
346 },
347 GraphAdjacencyEdge {
348 edge_id: 7,
349 from_node: "b".to_string(),
350 to_node: "c".to_string(),
351 label: "likes".to_string(),
352 weight: -1.25,
353 },
354 ];
355 let bytes = encode_graph_adjacency(&edges);
356 assert_eq!(&bytes[0..4], b"RDGA");
357 assert_eq!(&bytes[8..16], &42u64.to_le_bytes());
359 assert_eq!(decode_graph_adjacency(&bytes).unwrap(), edges);
360 }
361
362 #[test]
363 fn fulltext_round_trip() {
364 let mut postings: BTreeMap<String, Vec<(u64, u32)>> = BTreeMap::new();
365 postings.insert("alpha".to_string(), vec![(1, 2), (3, 1)]);
366 postings.insert("beta".to_string(), vec![(3, 5)]);
367 let bytes = encode_fulltext_index("docs", 2, &postings);
368 assert_eq!(&bytes[0..4], b"RDFT");
369 let decoded = decode_fulltext_index(&bytes).unwrap();
370 assert_eq!(decoded.collection, "docs");
371 assert_eq!(decoded.doc_count, 2);
372 assert_eq!(decoded.postings, postings);
373 }
374
375 #[test]
376 fn doc_pathvalue_round_trip_pins_u64_entity_id() {
377 let documents = vec![
378 DocPathValueRecord {
379 entity_id: 0xDEAD_BEEF_0000_0001,
380 entries: vec![
381 ("name".to_string(), "alice".to_string()),
382 ("age".to_string(), "30".to_string()),
383 ],
384 },
385 DocPathValueRecord {
386 entity_id: 2,
387 entries: vec![("name".to_string(), "bob".to_string())],
388 },
389 ];
390 let bytes = encode_document_pathvalue("people", &documents);
391 assert_eq!(&bytes[0..4], b"RDDP");
392
393 let mut pos = 4usize;
396 let coll_len =
397 u32::from_le_bytes([bytes[pos], bytes[pos + 1], bytes[pos + 2], bytes[pos + 3]])
398 as usize;
399 pos += 4 + coll_len; pos += 4; pos += 4; let entity_id = u64::from_le_bytes(bytes[pos..pos + 8].try_into().unwrap());
403 assert_eq!(entity_id, 0xDEAD_BEEF_0000_0001);
404
405 let decoded = decode_document_pathvalue(&bytes).unwrap();
406 assert_eq!(decoded.collection, "people");
407 assert_eq!(decoded.documents, documents);
408 }
409
410 #[test]
411 fn doc_pathvalue_detects_entry_count_mismatch() {
412 let documents = vec![DocPathValueRecord {
413 entity_id: 1,
414 entries: vec![("p".to_string(), "v".to_string())],
415 }];
416 let mut bytes = encode_document_pathvalue("c", &documents);
417 let total_off = 4 + 4 + 1 + 4;
420 bytes[total_off] = 9;
421 assert_eq!(
422 decode_document_pathvalue(&bytes),
423 Err(NativeArtifactError::EntryCountMismatch)
424 );
425 }
426
427 #[test]
428 fn rejects_bad_magic() {
429 assert!(matches!(
430 decode_graph_adjacency(b"XXXX\0\0\0\0"),
431 Err(NativeArtifactError::InvalidHeader(_))
432 ));
433 assert!(matches!(
434 decode_fulltext_index(&[0u8; 12]),
435 Err(NativeArtifactError::InvalidHeader(_))
436 ));
437 assert!(matches!(
438 decode_document_pathvalue(&[0u8; 12]),
439 Err(NativeArtifactError::InvalidHeader(_))
440 ));
441 }
442}