Skip to main content

nmp_threading/
grouper.rs

1//! `Grouper<R>` — kind-agnostic timeline grouping. Given a stream of
2//! `KernelEvent`s and a `ParentResolver`, emits a `Vec<TimelineBlock>` where
3//! reply chains collapse into Twitter-style modules.
4//!
5//! ## Algorithm sketch
6//!
7//! On each event insert:
8//!   1. Ignore if already known.
9//!   2. Resolve parent via the per-NIP `ParentResolver`.
10//!   3. If parent is an `Event` already in store AND occupies the leaf of an
11//!      existing block, splice the new event onto that block (promoting
12//!      Standalone → Module if needed) up to `policy.max_module_size`.
13//!   4. Otherwise walk ancestors up to `policy.max_ancestor_hops`, picking
14//!      up `Event` ids that are in the store and not yet `seen`. `Address`
15//!      / `External` parents terminate the walk and become the module's
16//!      `root` pointer.
17//!   5. Wrap the chain in a `TimelineBlock`; `blocks` is kept sorted by
18//!      newest event timestamp, regardless of relay arrival order.
19//!   6. If the parent is unknown locally, buffer the child in `orphans`
20//!      keyed by the missing parent id. Parent arrival replays children.
21//!
22//! Adjacent-block collapse runs after every mutation: two `Module` blocks
23//! sharing the same `root` pointer merge if `policy.collapse_adjacent_same_
24//! root` is set and the merged length would fit `max_module_size`.
25//!
26//! ## Why no dynamic dependency injection
27//!
28//! A view's `dependencies` is a pure function of its spec. There is no API
29//! to re-publish dependencies with `pending_ancestor_ids` learned at
30//! runtime. `ThreadView` lives with the same constraint and relies on the
31//! surrounding planner subscription (broad `("e", target)` tag-ref) to
32//! surface ancestors. Wrappers around this grouper inherit that contract;
33//! `pending_ancestor_ids` is kept as internal diagnostic state.
34//!
35//! ## Module layout
36//!
37//! The algorithm is split by phase across submodules; this file is the
38//! spine (state + construction + read accessors) that the phases share:
39//!   - [`lifecycle`] — insert/remove/replace entry points, orphan replay,
40//!     supersession bookkeeping.
41//!   - [`placement`] — splicing an event onto an existing block or walking
42//!     ancestors to build a fresh chain.
43//!   - [`ordering`] — block recency ordering and delta-index resolution.
44//!   - [`collapse`] — adjacent same-root module merging.
45
46use std::collections::{BTreeMap, BTreeSet, HashSet};
47
48use nmp_core::substrate::{EventId, KernelEvent};
49use serde::{Deserialize, Serialize};
50
51use crate::block::TimelineBlock;
52use crate::policy::ModulePolicy;
53use crate::resolver::ParentResolver;
54
55mod collapse;
56mod lifecycle;
57mod ordering;
58mod placement;
59
60/// Delta surface for the grouper. Wrappers map this into their own
61/// view-module `Delta` type (typically a 1:1 forward).
62#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
63pub enum GroupDelta {
64    /// A new block was inserted at the given display index.
65    BlockInserted(usize),
66    /// A block at the given index was replaced (length / membership /
67    /// `has_gap` changed). Wrappers re-emit the full block from
68    /// [`Grouper::blocks`].
69    BlockReplaced(usize),
70    /// A block at the given index was removed.
71    BlockRemoved(usize),
72}
73
74/// Owning state for the algorithm. One instance per open view.
75pub struct Grouper<R: ParentResolver> {
76    resolver: R,
77    policy: ModulePolicy,
78    /// Display-order blocks: index 0 is newest.
79    blocks: Vec<TimelineBlock>,
80    /// Every event id the grouper has accepted into some block.
81    seen: HashSet<EventId>,
82    /// Full event payloads we have observed (parent lookups + replay).
83    by_id: BTreeMap<EventId, KernelEvent>,
84    /// Children waiting on a parent id. Replayed on parent arrival.
85    orphans: BTreeMap<EventId, BTreeSet<EventId>>,
86    /// Events currently buffered as orphans (their own parent is still
87    /// unknown). They must NOT be absorbed by another event's ancestor walk
88    /// — when their parent later arrives we want a clean stitch, not a
89    /// half-attached chain that needs re-stitching.
90    orphaned: HashSet<EventId>,
91    /// Ancestor event ids the grouper would like the planner to surface —
92    /// declared but the substrate has no dynamic-deps API yet. Kept for
93    /// diagnostics / a future trait extension.
94    pending_ancestor_ids: BTreeSet<EventId>,
95    /// Per-target set of superseding event ids. While a target's set is
96    /// non-empty, its standalone block is suppressed from the layout — a
97    /// late-arriving target won't get its own block either. The target stays
98    /// in `by_id` so reply chains can still locate it as a parent, and so the
99    /// block can be restored if all its superseders are later removed.
100    ///
101    /// Populated by `ParentResolver::supersedes` (e.g., a NIP-18 repost names
102    /// the note it should bump in the feed).
103    superseded_by: BTreeMap<EventId, BTreeSet<EventId>>,
104}
105
106impl<R: ParentResolver> Grouper<R> {
107    #[must_use]
108    pub fn new(resolver: R, policy: ModulePolicy) -> Self {
109        Self {
110            resolver,
111            policy,
112            blocks: Vec::new(),
113            seen: HashSet::new(),
114            by_id: BTreeMap::new(),
115            orphans: BTreeMap::new(),
116            orphaned: HashSet::new(),
117            pending_ancestor_ids: BTreeSet::new(),
118            superseded_by: BTreeMap::new(),
119        }
120    }
121
122    #[must_use]
123    pub fn blocks(&self) -> &[TimelineBlock] {
124        &self.blocks
125    }
126
127    #[must_use]
128    pub fn pending_ancestor_ids(&self) -> &BTreeSet<EventId> {
129        &self.pending_ancestor_ids
130    }
131
132    #[must_use]
133    pub fn event(&self, id: &EventId) -> Option<&KernelEvent> {
134        self.by_id.get(id)
135    }
136}