nmp_threading/grouper.rs
1//! `Grouper<R>` — kind-agnostic timeline grouping. Given a stream of
2//! `KernelEvent`s and a `ParentResolver`, emits a `Vec<TimelineBlock>` where
3//! reply chains collapse into Twitter-style modules.
4//!
5//! ## Algorithm sketch
6//!
7//! On each event insert:
8//! 1. Ignore if already known.
9//! 2. Resolve parent via the per-NIP `ParentResolver`.
10//! 3. If parent is an `Event` already in store AND occupies the leaf of an
11//! existing block, splice the new event onto that block (promoting
12//! Standalone → Module if needed) up to `policy.max_module_size`.
13//! 4. Otherwise walk ancestors up to `policy.max_ancestor_hops`, picking
14//! up `Event` ids that are in the store and not yet `seen`. `Address`
15//! / `External` parents terminate the walk and become the module's
16//! `root` pointer.
17//! 5. Wrap the chain in a `TimelineBlock`; `blocks` is kept sorted by
18//! newest event timestamp, regardless of relay arrival order.
19//! 6. If the parent is unknown locally, buffer the child in `orphans`
20//! keyed by the missing parent id. Parent arrival replays children.
21//!
22//! Adjacent-block collapse runs after every mutation: two `Module` blocks
23//! sharing the same `root` pointer merge if `policy.collapse_adjacent_same_
24//! root` is set and the merged length would fit `max_module_size`.
25//!
26//! ## Why no dynamic dependency injection
27//!
28//! A view's `dependencies` is a pure function of its spec. There is no API
29//! to re-publish dependencies with `pending_ancestor_ids` learned at
30//! runtime. `ThreadView` lives with the same constraint and relies on the
31//! surrounding planner subscription (broad `("e", target)` tag-ref) to
32//! surface ancestors. Wrappers around this grouper inherit that contract;
33//! `pending_ancestor_ids` is kept as internal diagnostic state.
34//!
35//! ## Module layout
36//!
37//! The algorithm is split by phase across submodules; this file is the
38//! spine (state + construction + read accessors) that the phases share:
39//! - [`lifecycle`] — insert/remove/replace entry points, orphan replay,
40//! supersession bookkeeping.
41//! - [`placement`] — splicing an event onto an existing block or walking
42//! ancestors to build a fresh chain.
43//! - [`ordering`] — block recency ordering and delta-index resolution.
44//! - [`collapse`] — adjacent same-root module merging.
45
46use std::collections::{BTreeMap, BTreeSet, HashSet};
47
48use nmp_core::substrate::{EventId, KernelEvent};
49use serde::{Deserialize, Serialize};
50
51use crate::block::TimelineBlock;
52use crate::policy::ModulePolicy;
53use crate::resolver::ParentResolver;
54
55mod collapse;
56mod lifecycle;
57mod ordering;
58mod placement;
59
60/// Delta surface for the grouper. Wrappers map this into their own
61/// view-module `Delta` type (typically a 1:1 forward).
62#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
63pub enum GroupDelta {
64 /// A new block was inserted at the given display index.
65 BlockInserted(usize),
66 /// A block at the given index was replaced (length / membership /
67 /// `has_gap` changed). Wrappers re-emit the full block from
68 /// [`Grouper::blocks`].
69 BlockReplaced(usize),
70 /// A block at the given index was removed.
71 BlockRemoved(usize),
72}
73
74/// Owning state for the algorithm. One instance per open view.
75pub struct Grouper<R: ParentResolver> {
76 resolver: R,
77 policy: ModulePolicy,
78 /// Display-order blocks: index 0 is newest.
79 blocks: Vec<TimelineBlock>,
80 /// Every event id the grouper has accepted into some block.
81 seen: HashSet<EventId>,
82 /// Full event payloads we have observed (parent lookups + replay).
83 by_id: BTreeMap<EventId, KernelEvent>,
84 /// Children waiting on a parent id. Replayed on parent arrival.
85 orphans: BTreeMap<EventId, BTreeSet<EventId>>,
86 /// Events currently buffered as orphans (their own parent is still
87 /// unknown). They must NOT be absorbed by another event's ancestor walk
88 /// — when their parent later arrives we want a clean stitch, not a
89 /// half-attached chain that needs re-stitching.
90 orphaned: HashSet<EventId>,
91 /// Ancestor event ids the grouper would like the planner to surface —
92 /// declared but the substrate has no dynamic-deps API yet. Kept for
93 /// diagnostics / a future trait extension.
94 pending_ancestor_ids: BTreeSet<EventId>,
95 /// Per-target set of superseding event ids. While a target's set is
96 /// non-empty, its standalone block is suppressed from the layout — a
97 /// late-arriving target won't get its own block either. The target stays
98 /// in `by_id` so reply chains can still locate it as a parent, and so the
99 /// block can be restored if all its superseders are later removed.
100 ///
101 /// Populated by `ParentResolver::supersedes` (e.g., a NIP-18 repost names
102 /// the note it should bump in the feed).
103 superseded_by: BTreeMap<EventId, BTreeSet<EventId>>,
104}
105
106impl<R: ParentResolver> Grouper<R> {
107 #[must_use]
108 pub fn new(resolver: R, policy: ModulePolicy) -> Self {
109 Self {
110 resolver,
111 policy,
112 blocks: Vec::new(),
113 seen: HashSet::new(),
114 by_id: BTreeMap::new(),
115 orphans: BTreeMap::new(),
116 orphaned: HashSet::new(),
117 pending_ancestor_ids: BTreeSet::new(),
118 superseded_by: BTreeMap::new(),
119 }
120 }
121
122 #[must_use]
123 pub fn blocks(&self) -> &[TimelineBlock] {
124 &self.blocks
125 }
126
127 #[must_use]
128 pub fn pending_ancestor_ids(&self) -> &BTreeSet<EventId> {
129 &self.pending_ancestor_ids
130 }
131
132 #[must_use]
133 pub fn event(&self, id: &EventId) -> Option<&KernelEvent> {
134 self.by_id.get(id)
135 }
136}