1use std::collections::BTreeMap;
14
15use ipld_core::ipld::Ipld;
16use serde::{Deserialize, Deserializer, Serialize, Serializer};
17
18use crate::codec::{from_canonical_bytes, hash_to_cid};
19use crate::error::Error;
20use crate::id::Cid;
21use crate::prolly::chunker::Chunker;
22use crate::prolly::constants::ProllyKey;
23use crate::store::Blockstore;
24
25#[derive(Clone, Debug, PartialEq, Eq)]
29pub struct Leaf {
30 pub entries: Vec<(ProllyKey, Cid)>,
32 pub extra: BTreeMap<String, Ipld>,
34}
35
36impl Leaf {
37 #[must_use]
39 pub const fn new(entries: Vec<(ProllyKey, Cid)>) -> Self {
40 Self {
41 entries,
42 extra: BTreeMap::new(),
43 }
44 }
45}
46
47#[derive(Clone, Debug, PartialEq, Eq)]
49pub struct Internal {
50 pub boundaries: Vec<ProllyKey>,
53 pub children: Vec<Cid>,
55 pub extra: BTreeMap<String, Ipld>,
57}
58
59impl Internal {
60 #[must_use]
66 pub fn new(boundaries: Vec<ProllyKey>, children: Vec<Cid>) -> Self {
67 debug_assert_eq!(boundaries.len() + 1, children.len());
68 Self {
69 boundaries,
70 children,
71 extra: BTreeMap::new(),
72 }
73 }
74}
75
76#[derive(Clone, Debug, PartialEq, Eq)]
78pub enum TreeChunk {
79 Leaf(Leaf),
81 Internal(Internal),
83}
84
85impl TreeChunk {
86 pub const KIND: &'static str = "tree";
88
89 #[must_use]
91 pub const fn is_leaf(&self) -> bool {
92 matches!(self, Self::Leaf(_))
93 }
94
95 #[must_use]
97 pub const fn len(&self) -> usize {
98 match self {
99 Self::Leaf(l) => l.entries.len(),
100 Self::Internal(i) => i.children.len(),
101 }
102 }
103
104 #[must_use]
106 pub const fn is_empty(&self) -> bool {
107 self.len() == 0
108 }
109}
110
111#[derive(Serialize, Deserialize)]
114struct TreeWire {
115 #[serde(rename = "_kind")]
116 kind: String,
117 leaf: bool,
118 #[serde(default, skip_serializing_if = "Option::is_none")]
119 entries: Option<Vec<(ProllyKey, Cid)>>,
120 #[serde(default, skip_serializing_if = "Option::is_none")]
121 boundaries: Option<Vec<ProllyKey>>,
122 #[serde(default, skip_serializing_if = "Option::is_none")]
123 children: Option<Vec<Cid>>,
124 #[serde(flatten, default, skip_serializing_if = "BTreeMap::is_empty")]
125 extra: BTreeMap<String, Ipld>,
126}
127
128impl Serialize for TreeChunk {
129 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
130 let wire = match self {
131 Self::Leaf(l) => TreeWire {
132 kind: Self::KIND.into(),
133 leaf: true,
134 entries: Some(l.entries.clone()),
135 boundaries: None,
136 children: None,
137 extra: l.extra.clone(),
138 },
139 Self::Internal(i) => TreeWire {
140 kind: Self::KIND.into(),
141 leaf: false,
142 entries: None,
143 boundaries: Some(i.boundaries.clone()),
144 children: Some(i.children.clone()),
145 extra: i.extra.clone(),
146 },
147 };
148 wire.serialize(serializer)
149 }
150}
151
152impl<'de> Deserialize<'de> for TreeChunk {
153 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
154 use serde::de::Error as _;
155 let w = TreeWire::deserialize(deserializer)?;
156 if w.kind != Self::KIND {
157 return Err(D::Error::custom(format!(
158 "expected _kind='tree', got '{}'",
159 w.kind
160 )));
161 }
162 if w.leaf {
163 let entries = w
164 .entries
165 .ok_or_else(|| D::Error::custom("leaf chunk missing 'entries'"))?;
166 if w.boundaries.is_some() || w.children.is_some() {
167 return Err(D::Error::custom(
168 "leaf chunk must not carry 'boundaries' or 'children'",
169 ));
170 }
171 Ok(Self::Leaf(Leaf {
172 entries,
173 extra: w.extra,
174 }))
175 } else {
176 let boundaries = w
177 .boundaries
178 .ok_or_else(|| D::Error::custom("internal chunk missing 'boundaries'"))?;
179 let children = w
180 .children
181 .ok_or_else(|| D::Error::custom("internal chunk missing 'children'"))?;
182 if w.entries.is_some() {
183 return Err(D::Error::custom("internal chunk must not carry 'entries'"));
184 }
185 if boundaries.len() + 1 != children.len() {
186 return Err(D::Error::custom(format!(
187 "internal chunk: boundaries.len()={} must equal children.len()-1 ({}-1)",
188 boundaries.len(),
189 children.len()
190 )));
191 }
192 Ok(Self::Internal(Internal {
193 boundaries,
194 children,
195 extra: w.extra,
196 }))
197 }
198 }
199}
200
201pub fn load_chunk(bytes: &[u8]) -> Result<TreeChunk, Error> {
211 Ok(from_canonical_bytes(bytes)?)
212}
213
214pub fn load_tree_chunk<B: Blockstore + ?Sized>(store: &B, cid: &Cid) -> Result<TreeChunk, Error> {
224 let bytes = store
225 .get(cid)?
226 .ok_or_else(|| crate::error::StoreError::NotFound { cid: cid.clone() })?;
227 load_chunk(&bytes)
228}
229
230pub fn build_tree<B, I>(blockstore: &B, entries: I) -> Result<Cid, Error>
244where
245 B: Blockstore + ?Sized,
246 I: IntoIterator<Item = (ProllyKey, Cid)>,
247{
248 let level_zero = build_leaf_level(blockstore, entries)?;
249 let mut current = level_zero;
250 while current.len() > 1 {
251 current = build_internal_level(blockstore, current)?;
252 }
253 Ok(current
256 .into_iter()
257 .next()
258 .expect("build_leaf_level emits >= 1 chunk")
259 .1)
260}
261
262fn build_leaf_level<B, I>(blockstore: &B, entries: I) -> Result<Vec<(ProllyKey, Cid)>, Error>
263where
264 B: Blockstore + ?Sized,
265 I: IntoIterator<Item = (ProllyKey, Cid)>,
266{
267 let mut chunker = Chunker::new();
268 let mut cur: Vec<(ProllyKey, Cid)> = Vec::with_capacity(128);
269 let mut out: Vec<(ProllyKey, Cid)> = Vec::new();
270
271 for (k, v) in entries {
272 chunker.push_key(&k);
273 cur.push((k, v));
274 if chunker.should_split_at(cur.len()) {
275 emit_leaf(blockstore, &mut cur, &mut out)?;
276 chunker.reset();
277 }
278 }
279
280 if !cur.is_empty() {
281 emit_leaf(blockstore, &mut cur, &mut out)?;
282 }
283
284 if out.is_empty() {
285 let chunk = TreeChunk::Leaf(Leaf::new(Vec::new()));
287 let (bytes, cid) = hash_to_cid(&chunk)?;
288 blockstore.put_trusted(cid.clone(), bytes)?;
290 out.push((ProllyKey::default(), cid));
291 }
292
293 Ok(out)
294}
295
296fn emit_leaf<B: Blockstore + ?Sized>(
297 blockstore: &B,
298 cur: &mut Vec<(ProllyKey, Cid)>,
299 out: &mut Vec<(ProllyKey, Cid)>,
300) -> Result<(), Error> {
301 let first = cur[0].0;
302 let chunk = TreeChunk::Leaf(Leaf::new(std::mem::take(cur)));
303 let (bytes, cid) = hash_to_cid(&chunk)?;
304 blockstore.put_trusted(cid.clone(), bytes)?;
306 out.push((first, cid));
307 Ok(())
308}
309
310fn build_internal_level<B: Blockstore + ?Sized>(
311 blockstore: &B,
312 children: Vec<(ProllyKey, Cid)>,
313) -> Result<Vec<(ProllyKey, Cid)>, Error> {
314 let mut chunker = Chunker::new();
315 let mut out: Vec<(ProllyKey, Cid)> = Vec::new();
316 let mut cur_children: Vec<Cid> = Vec::new();
317 let mut cur_boundaries: Vec<ProllyKey> = Vec::new();
318 let mut cur_first: Option<ProllyKey> = None;
319
320 for (first_key, child_cid) in children {
321 chunker.push_key(&first_key);
322 if cur_first.is_none() {
323 cur_first = Some(first_key);
324 } else {
325 cur_boundaries.push(first_key);
327 }
328 cur_children.push(child_cid);
329
330 if chunker.should_split_at(cur_children.len()) {
331 emit_internal(
332 blockstore,
333 &mut cur_boundaries,
334 &mut cur_children,
335 &mut cur_first,
336 &mut out,
337 )?;
338 chunker.reset();
339 }
340 }
341
342 if !cur_children.is_empty() {
343 emit_internal(
344 blockstore,
345 &mut cur_boundaries,
346 &mut cur_children,
347 &mut cur_first,
348 &mut out,
349 )?;
350 }
351
352 Ok(out)
353}
354
355fn emit_internal<B: Blockstore + ?Sized>(
356 blockstore: &B,
357 cur_boundaries: &mut Vec<ProllyKey>,
358 cur_children: &mut Vec<Cid>,
359 cur_first: &mut Option<ProllyKey>,
360 out: &mut Vec<(ProllyKey, Cid)>,
361) -> Result<(), Error> {
362 debug_assert_eq!(cur_boundaries.len() + 1, cur_children.len());
363 let chunk = TreeChunk::Internal(Internal::new(
364 std::mem::take(cur_boundaries),
365 std::mem::take(cur_children),
366 ));
367 let (bytes, cid) = hash_to_cid(&chunk)?;
368 blockstore.put_trusted(cid.clone(), bytes)?;
370 out.push((cur_first.take().expect("first key set above"), cid));
371 Ok(())
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377 use crate::codec::{from_canonical_bytes, to_canonical_bytes};
378 use crate::id::{CODEC_RAW, Multihash};
379 use crate::store::MemoryBlockstore;
380
381 fn dummy_cid(n: u32) -> Cid {
382 Cid::new(CODEC_RAW, Multihash::sha2_256(&n.to_be_bytes()))
383 }
384
385 fn keyed(i: u32) -> ProllyKey {
386 let mut k = [0u8; 16];
387 k[12..16].copy_from_slice(&i.to_be_bytes());
388 ProllyKey(k)
389 }
390
391 #[test]
392 fn leaf_round_trip() {
393 let leaf = TreeChunk::Leaf(Leaf::new(vec![
394 (keyed(1), dummy_cid(1)),
395 (keyed(2), dummy_cid(2)),
396 ]));
397 let bytes = to_canonical_bytes(&leaf).unwrap();
398 let decoded: TreeChunk = from_canonical_bytes(&bytes).unwrap();
399 assert_eq!(leaf, decoded);
400 let bytes2 = to_canonical_bytes(&decoded).unwrap();
401 assert_eq!(bytes, bytes2);
402 }
403
404 #[test]
405 fn internal_round_trip() {
406 let internal = TreeChunk::Internal(Internal::new(
407 vec![keyed(10), keyed(20)],
408 vec![dummy_cid(100), dummy_cid(200), dummy_cid(300)],
409 ));
410 let bytes = to_canonical_bytes(&internal).unwrap();
411 let decoded: TreeChunk = from_canonical_bytes(&bytes).unwrap();
412 assert_eq!(internal, decoded);
413 }
414
415 #[test]
416 fn wrong_kind_rejected() {
417 let w = TreeWire {
419 kind: "node".into(),
420 leaf: true,
421 entries: Some(vec![]),
422 boundaries: None,
423 children: None,
424 extra: BTreeMap::new(),
425 };
426 let bytes = serde_ipld_dagcbor::to_vec(&w).unwrap();
427 let err = serde_ipld_dagcbor::from_slice::<TreeChunk>(&bytes).unwrap_err();
428 assert!(err.to_string().contains("_kind"));
429 }
430
431 #[test]
432 fn internal_rejects_inconsistent_lengths() {
433 let w = TreeWire {
434 kind: "tree".into(),
435 leaf: false,
436 entries: None,
437 boundaries: Some(vec![keyed(10), keyed(20)]),
438 children: Some(vec![dummy_cid(1)]),
439 extra: BTreeMap::new(),
440 };
441 let bytes = serde_ipld_dagcbor::to_vec(&w).unwrap();
442 let err = serde_ipld_dagcbor::from_slice::<TreeChunk>(&bytes).unwrap_err();
443 assert!(err.to_string().contains("boundaries.len()"));
444 }
445
446 #[test]
447 fn build_empty_produces_single_empty_leaf() {
448 let store = MemoryBlockstore::new();
449 let root = build_tree(&store, std::iter::empty()).unwrap();
450 assert_eq!(store.len(), 1);
451 let bytes = store.get(&root).unwrap().unwrap();
452 let chunk: TreeChunk = from_canonical_bytes(&bytes).unwrap();
453 match chunk {
454 TreeChunk::Leaf(l) => assert!(l.entries.is_empty()),
455 TreeChunk::Internal(_) => panic!("empty tree should be a leaf"),
456 }
457 }
458
459 #[test]
460 fn build_single_entry_is_leaf() {
461 let store = MemoryBlockstore::new();
462 let root = build_tree(&store, [(keyed(1), dummy_cid(1))]).unwrap();
463 let bytes = store.get(&root).unwrap().unwrap();
464 let chunk: TreeChunk = from_canonical_bytes(&bytes).unwrap();
465 assert!(chunk.is_leaf());
466 assert_eq!(chunk.len(), 1);
467 }
468
469 #[test]
470 fn build_many_entries_produces_internal_root() {
471 let store = MemoryBlockstore::new();
472 let entries: Vec<_> = (0..1000u32).map(|i| (keyed(i), dummy_cid(i))).collect();
473 let root = build_tree(&store, entries).unwrap();
474 let bytes = store.get(&root).unwrap().unwrap();
475 let chunk: TreeChunk = from_canonical_bytes(&bytes).unwrap();
476 assert!(
477 !chunk.is_leaf(),
478 "1000 entries should require an internal root"
479 );
480 }
481
482 #[test]
483 fn build_is_deterministic_across_builds() {
484 let entries: Vec<_> = (0..1_000u32).map(|i| (keyed(i), dummy_cid(i))).collect();
485 let s1 = MemoryBlockstore::new();
486 let r1 = build_tree(&s1, entries.clone()).unwrap();
487 let s2 = MemoryBlockstore::new();
488 let r2 = build_tree(&s2, entries).unwrap();
489 assert_eq!(r1, r2);
490 assert_eq!(s1.len(), s2.len(), "chunk count must match");
491 }
492
493 #[test]
494 fn internal_children_are_reachable_from_root() {
495 let store = MemoryBlockstore::new();
496 let entries: Vec<_> = (0..500u32).map(|i| (keyed(i), dummy_cid(i))).collect();
497 let root = build_tree(&store, entries).unwrap();
498
499 fn walk(cid: &Cid, store: &MemoryBlockstore, leaves: &mut usize, internals: &mut usize) {
501 let bytes = store.get(cid).unwrap().unwrap();
502 let chunk: TreeChunk = from_canonical_bytes(&bytes).unwrap();
503 match chunk {
504 TreeChunk::Leaf(_) => *leaves += 1,
505 TreeChunk::Internal(i) => {
506 *internals += 1;
507 for c in &i.children {
508 walk(c, store, leaves, internals);
509 }
510 }
511 }
512 }
513
514 let mut leaves = 0;
515 let mut internals = 0;
516 walk(&root, &store, &mut leaves, &mut internals);
517 assert!(leaves >= 1);
518 assert!(
519 internals >= 1,
520 "500 entries should yield at least one internal chunk"
521 );
522 assert_eq!(leaves + internals, store.len(), "no unreachable chunks");
523 }
524}