ankurah_core/
node.rs

1use crate::selection::filter::Filterable;
2use ankurah_proto::{self as proto, Attested, CollectionId, EntityState};
3use anyhow::anyhow;
4
5use rand::prelude::*;
6use std::{
7    fmt,
8    hash::Hash,
9    ops::Deref,
10    sync::{Arc, Weak},
11};
12use tokio::sync::oneshot;
13
14use crate::{
15    action_error, action_info,
16    changes::EntityChange,
17    collectionset::CollectionSet,
18    connector::{PeerSender, SendError},
19    context::Context,
20    entity::{Entity, WeakEntitySet},
21    error::{MutationError, RequestError, RetrievalError},
22    notice_info,
23    peer_subscription::{SubscriptionHandler, SubscriptionRelay},
24    policy::{AccessDenied, PolicyAgent},
25    reactor::{AbstractEntity, Reactor},
26    retrieval::LocalRetriever,
27    storage::StorageEngine,
28    system::SystemManager,
29    util::{safemap::SafeMap, safeset::SafeSet, Iterable},
30};
31use itertools::Itertools;
32#[cfg(feature = "instrument")]
33use tracing::instrument;
34
35use tracing::{debug, error, warn};
36
37pub struct PeerState {
38    sender: Box<dyn PeerSender>,
39    _durable: bool,
40    subscription_handler: SubscriptionHandler,
41    pending_requests: SafeMap<proto::RequestId, oneshot::Sender<Result<proto::NodeResponseBody, RequestError>>>,
42    pending_updates: SafeMap<proto::UpdateId, oneshot::Sender<Result<proto::NodeResponseBody, RequestError>>>,
43}
44
45impl PeerState {
46    pub fn send_message(&self, message: proto::NodeMessage) -> Result<(), SendError> { self.sender.send_message(message) }
47}
48
49pub struct MatchArgs {
50    pub selection: ankql::ast::Selection,
51    pub cached: bool,
52}
53
54impl TryInto<MatchArgs> for &str {
55    type Error = ankql::error::ParseError;
56    fn try_into(self) -> Result<MatchArgs, Self::Error> { Ok(MatchArgs { selection: ankql::parser::parse_selection(self)?, cached: true }) }
57}
58impl TryInto<MatchArgs> for String {
59    type Error = ankql::error::ParseError;
60    fn try_into(self) -> Result<MatchArgs, Self::Error> {
61        Ok(MatchArgs { selection: ankql::parser::parse_selection(&self)?, cached: true })
62    }
63}
64
65impl From<ankql::ast::Predicate> for MatchArgs {
66    fn from(val: ankql::ast::Predicate) -> Self {
67        MatchArgs { selection: ankql::ast::Selection { predicate: val, order_by: None, limit: None }, cached: true }
68    }
69}
70
71impl From<ankql::ast::Selection> for MatchArgs {
72    fn from(val: ankql::ast::Selection) -> Self { MatchArgs { selection: val, cached: true } }
73}
74
75impl From<ankql::error::ParseError> for RetrievalError {
76    fn from(e: ankql::error::ParseError) -> Self { RetrievalError::ParseError(e) }
77}
78
79pub fn nocache<T: TryInto<ankql::ast::Selection, Error = ankql::error::ParseError>>(s: T) -> Result<MatchArgs, ankql::error::ParseError> {
80    MatchArgs::nocache(s)
81}
82impl MatchArgs {
83    pub fn nocache<T>(s: T) -> Result<Self, ankql::error::ParseError>
84    where T: TryInto<ankql::ast::Selection, Error = ankql::error::ParseError> {
85        Ok(Self { selection: s.try_into()?, cached: false })
86    }
87}
88
89/// A participant in the Ankurah network, and primary place where queries are initiated
90
91pub struct Node<SE, PA>(pub(crate) Arc<NodeInner<SE, PA>>)
92where PA: PolicyAgent;
93impl<SE, PA> Clone for Node<SE, PA>
94where PA: PolicyAgent
95{
96    fn clone(&self) -> Self { Self(self.0.clone()) }
97}
98
99pub struct WeakNode<SE, PA>(Weak<NodeInner<SE, PA>>)
100where PA: PolicyAgent;
101impl<SE, PA> Clone for WeakNode<SE, PA>
102where PA: PolicyAgent
103{
104    fn clone(&self) -> Self { Self(self.0.clone()) }
105}
106
107impl<SE, PA> WeakNode<SE, PA>
108where PA: PolicyAgent
109{
110    pub fn upgrade(&self) -> Option<Node<SE, PA>> { self.0.upgrade().map(Node) }
111}
112
113impl<SE, PA> Deref for Node<SE, PA>
114where PA: PolicyAgent
115{
116    type Target = Arc<NodeInner<SE, PA>>;
117    fn deref(&self) -> &Self::Target { &self.0 }
118}
119
120/// Represents the user session - or whatever other context the PolicyAgent
121/// Needs to perform it's evaluation.
122pub trait ContextData: Send + Sync + Clone + Hash + Eq + 'static {}
123
124pub struct NodeInner<SE, PA>
125where PA: PolicyAgent
126{
127    pub id: proto::EntityId,
128    pub durable: bool,
129    pub collections: CollectionSet<SE>,
130
131    pub(crate) entities: WeakEntitySet,
132    peer_connections: SafeMap<proto::EntityId, Arc<PeerState>>,
133    durable_peers: SafeSet<proto::EntityId>,
134
135    pub(crate) predicate_context: SafeMap<proto::QueryId, PA::ContextData>,
136
137    /// The reactor for handling subscriptions
138    pub(crate) reactor: Reactor,
139    pub(crate) policy_agent: PA,
140    pub system: SystemManager<SE, PA>,
141
142    pub(crate) subscription_relay: Option<SubscriptionRelay<PA::ContextData, crate::livequery::WeakEntityLiveQuery>>,
143
144    /// Type resolver for AST preparation (temporary heuristic until Phase 3 schema)
145    pub(crate) type_resolver: crate::TypeResolver,
146}
147
148impl<SE, PA> Node<SE, PA>
149where
150    SE: StorageEngine + Send + Sync + 'static,
151    PA: PolicyAgent + Send + Sync + 'static,
152{
153    pub fn new(engine: Arc<SE>, policy_agent: PA) -> Self {
154        let collections = CollectionSet::new(engine.clone());
155        let entityset: WeakEntitySet = Default::default();
156        let id = proto::EntityId::new();
157        let reactor = Reactor::new();
158        notice_info!("Node {id:#} created as ephemeral");
159
160        let system_manager = SystemManager::new(collections.clone(), entityset.clone(), reactor.clone(), false);
161
162        // Create subscription relay for ephemeral nodes
163        let subscription_relay = Some(SubscriptionRelay::new());
164
165        let node = Node(Arc::new(NodeInner {
166            id,
167            collections,
168            entities: entityset,
169            peer_connections: SafeMap::new(),
170            durable_peers: SafeSet::new(),
171            reactor,
172            durable: false,
173            policy_agent,
174            system: system_manager,
175            predicate_context: SafeMap::new(),
176            subscription_relay,
177            type_resolver: crate::TypeResolver::new(),
178        }));
179
180        // Set up the message sender for the subscription relay
181        if let Some(ref relay) = node.subscription_relay {
182            let weak_node = node.weak();
183            if relay.set_node(Arc::new(weak_node)).is_err() {
184                warn!("Failed to set message sender for subscription relay");
185            }
186        }
187
188        node
189    }
190    pub fn new_durable(engine: Arc<SE>, policy_agent: PA) -> Self {
191        let collections = CollectionSet::new(engine);
192        let entityset: WeakEntitySet = Default::default();
193        let id = proto::EntityId::new();
194        let reactor = Reactor::new();
195        notice_info!("Node {id:#} created as durable");
196
197        let system_manager = SystemManager::new(collections.clone(), entityset.clone(), reactor.clone(), true);
198
199        Node(Arc::new(NodeInner {
200            id,
201            collections,
202            entities: entityset,
203            peer_connections: SafeMap::new(),
204            durable_peers: SafeSet::new(),
205            reactor,
206            durable: true,
207            policy_agent,
208            system: system_manager,
209            predicate_context: SafeMap::new(),
210            subscription_relay: None,
211            type_resolver: crate::TypeResolver::new(),
212        }))
213    }
214    pub fn weak(&self) -> WeakNode<SE, PA> { WeakNode(Arc::downgrade(&self.0)) }
215
216    #[cfg_attr(feature = "instrument", instrument(level = "debug", skip_all, fields(node_id = %presence.node_id.to_base64_short(), durable = %presence.durable)))]
217    pub fn register_peer(&self, presence: proto::Presence, sender: Box<dyn PeerSender>) {
218        action_info!(self, "register_peer", "{}", &presence);
219
220        let subscription_handler = SubscriptionHandler::new(presence.node_id, self);
221        self.peer_connections.insert(
222            presence.node_id,
223            Arc::new(PeerState {
224                sender,
225                _durable: presence.durable,
226                subscription_handler,
227                pending_requests: SafeMap::new(),
228                pending_updates: SafeMap::new(),
229            }),
230        );
231        if presence.durable {
232            self.durable_peers.insert(presence.node_id);
233
234            // Notify subscription relay of new durable peer connection
235            if let Some(ref relay) = self.subscription_relay {
236                relay.notify_peer_connected(presence.node_id);
237            }
238
239            if !self.durable {
240                if let Some(system_root) = presence.system_root {
241                    action_info!(self, "received system root", "{}", &system_root.payload);
242                    let me = self.clone();
243                    crate::task::spawn(async move {
244                        if let Err(e) = me.system.join_system(system_root).await {
245                            action_error!(me, "failed to join system", "{}", &e);
246                        } else {
247                            action_info!(me, "successfully joined system");
248                        }
249                    });
250                } else {
251                    error!("Node({}) durable peer {} has no system root", self.id, presence.node_id);
252                }
253            }
254        }
255        // TODO send hello message to the peer, including present head state for all relevant collections
256    }
257    #[cfg_attr(feature = "instrument", instrument(level = "debug", skip_all, fields(node_id = %node_id.to_base64_short())))]
258    pub fn deregister_peer(&self, node_id: proto::EntityId) {
259        notice_info!("Node({:#}) deregister_peer {:#}", self.id, node_id);
260
261        self.durable_peers.remove(&node_id);
262        // Get and cleanup subscriptions before removing the peer
263        if let Some(peer_state) = self.peer_connections.remove(&node_id) {
264            action_info!(self, "unsubscribing", "subscription {} for peer {}", peer_state.subscription_handler.subscription_id(), node_id);
265            // ReactorSubscription is automatically unsubscribed on drop
266        }
267
268        // Notify subscription relay of peer disconnection (unconditional - relay handles filtering)
269        if let Some(ref relay) = self.subscription_relay {
270            relay.notify_peer_disconnected(node_id);
271        }
272    }
273    #[cfg_attr(feature = "instrument", instrument(skip_all, fields(node_id = %node_id, request_body = %request_body)))]
274    pub async fn request<'a, C>(
275        &self,
276        node_id: proto::EntityId,
277        cdata: &C,
278        request_body: proto::NodeRequestBody,
279    ) -> Result<proto::NodeResponseBody, RequestError>
280    where
281        C: Iterable<PA::ContextData>,
282    {
283        let (response_tx, response_rx) = oneshot::channel::<Result<proto::NodeResponseBody, RequestError>>();
284        let request_id = proto::RequestId::new();
285
286        let request = proto::NodeRequest { id: request_id.clone(), to: node_id, from: self.id, body: request_body };
287        let auth = self.policy_agent.sign_request(self, cdata, &request)?;
288
289        // Get the peer connection
290        let connection = self.peer_connections.get(&node_id).ok_or(RequestError::PeerNotConnected)?;
291
292        connection.pending_requests.insert(request_id, response_tx);
293        connection.send_message(proto::NodeMessage::Request { auth, request })?;
294
295        // Wait for response
296        response_rx.await.map_err(|_| RequestError::InternalChannelClosed)?
297    }
298
299    // TODO LATER: rework this to be retried in the background some number of times
300    pub fn send_update(&self, node_id: proto::EntityId, notification: proto::NodeUpdateBody) {
301        // same as request, minus cdata and the sign_request step
302        debug!("{self}.send_update({node_id:#}, {notification})");
303        let (response_tx, _response_rx) = oneshot::channel::<Result<proto::NodeResponseBody, RequestError>>();
304        let id = proto::UpdateId::new();
305
306        // Get the peer connection
307        let Some(connection) = self.peer_connections.get(&node_id) else {
308            warn!("Failed to send update to peer {}: {}", node_id, RequestError::PeerNotConnected);
309            return;
310        };
311
312        // Store the response channel
313        connection.pending_updates.insert(id.clone(), response_tx);
314
315        let notification = proto::NodeMessage::Update(proto::NodeUpdate { id, from: self.id, to: node_id, body: notification });
316
317        match connection.send_message(notification) {
318            Ok(_) => {}
319            Err(e) => {
320                warn!("Failed to send update to peer {}: {}", node_id, e);
321            }
322        };
323
324        // response_rx.await.map_err(|_| RequestError::InternalChannelClosed)??;
325    }
326
327    // TODO add a node id argument to this function rather than getting it from the message
328    // (does this actually make it more secure? or just move the place they could lie to us to the handshake?)
329    // Not if its signed by a node key.
330    #[cfg_attr(feature = "instrument", instrument(level = "debug", skip_all, fields(message = %message)))]
331    pub async fn handle_message(&self, message: proto::NodeMessage) -> anyhow::Result<()> {
332        match message {
333            proto::NodeMessage::Update(update) => {
334                debug!("Node({}) received update {}", self.id, update);
335
336                if let Some(sender) = { self.peer_connections.get(&update.from).map(|c| c.sender.cloned()) } {
337                    let _from = update.from;
338                    let _id = update.id.clone();
339                    if update.to != self.id {
340                        warn!("{} received message from {} but is not the intended recipient", self.id, update.from);
341                        return Ok(());
342                    }
343
344                    // take down the return address
345                    let id = update.id.clone();
346                    let to = update.from;
347                    let from = self.id;
348
349                    // TODO - validate the from node id is the one we're connected to
350                    let body = match self.handle_update(update).await {
351                        Ok(_) => proto::NodeUpdateAckBody::Success,
352                        Err(e) => proto::NodeUpdateAckBody::Error(e.to_string()),
353                    };
354
355                    sender.send_message(proto::NodeMessage::UpdateAck(proto::NodeUpdateAck { id, from, to, body }))?;
356                }
357            }
358            proto::NodeMessage::UpdateAck(ack) => {
359                debug!("Node({}) received ack notification {} {}", self.id, ack.id, ack.body);
360                // let connection = self.peer_connections.get(&ack.from).ok_or(RequestError::PeerNotConnected)?;
361                // if let Some(tx) = connection.pending_updates.remove(&ack.id) {
362                //     tx.send(Ok(proto::NodeResponseBody::Success)).unwrap();
363                // }
364            }
365            proto::NodeMessage::Request { auth, request } => {
366                debug!("Node({}) received request {}", self.id, request);
367                // TODO: Should we spawn a task here and make handle_message synchronous?
368                // I think this depends on how we want to handle timeouts.
369                // I think we want timeouts to be handled by the node, not the connector,
370                // which would lend itself to spawning a task here and making this function synchronous.
371
372                // double check to make sure we have a connection to the peer based on the node id
373                if let Some(sender) = { self.peer_connections.get(&request.from).map(|c| c.sender.cloned()) } {
374                    let from = request.from;
375                    let request_id = request.id.clone();
376                    if request.to != self.id {
377                        warn!("{} received message from {} but is not the intended recipient", self.id, request.from);
378                        return Ok(());
379                    }
380
381                    // Validate the request auth first, converting errors to error responses
382                    let body = match self.policy_agent.check_request(self, &auth, &request).await {
383                        Ok(cdata) => match self.handle_request(&cdata, request).await {
384                            Ok(result) => result,
385                            Err(e) => proto::NodeResponseBody::Error(e.to_string()),
386                        },
387                        Err(e) => proto::NodeResponseBody::Error(e.to_string()),
388                    };
389                    let _result = sender.send_message(proto::NodeMessage::Response(proto::NodeResponse {
390                        request_id,
391                        from: self.id,
392                        to: from,
393                        body,
394                    }));
395                }
396            }
397            proto::NodeMessage::Response(response) => {
398                debug!("Node {} received response {}", self.id, response);
399                let connection = self.peer_connections.get(&response.from).ok_or(RequestError::PeerNotConnected)?;
400                if let Some(tx) = connection.pending_requests.remove(&response.request_id) {
401                    tx.send(Ok(response.body)).map_err(|e| anyhow!("Failed to send response: {:?}", e))?;
402                }
403            }
404            proto::NodeMessage::UnsubscribeQuery { from, query_id } => {
405                // Remove predicate from the peer's subscription
406                if let Some(peer_state) = self.peer_connections.get(&from) {
407                    peer_state.subscription_handler.remove_predicate(query_id)?;
408                }
409            }
410        }
411        Ok(())
412    }
413
414    #[cfg_attr(feature = "instrument", instrument(level = "debug", skip_all, fields(request = %request)))]
415    async fn handle_request<C>(&self, cdata: &C, request: proto::NodeRequest) -> anyhow::Result<proto::NodeResponseBody>
416    where C: Iterable<PA::ContextData> {
417        match request.body {
418            proto::NodeRequestBody::CommitTransaction { id, events } => {
419                // TODO - relay to peers in a gossipy/resource-available manner, so as to improve propagation
420                // With moderate potential for duplication, while not creating message loops
421                // Doing so would be a secondary/tertiary/etc hop for this message
422                let cdata = cdata.iterable().exactly_one().map_err(|_| anyhow!("Only one cdata is permitted for CommitTransaction"))?;
423                match self.commit_remote_transaction(cdata, id.clone(), events).await {
424                    Ok(_) => Ok(proto::NodeResponseBody::CommitComplete { id }),
425                    Err(e) => Ok(proto::NodeResponseBody::Error(e.to_string())),
426                }
427            }
428            proto::NodeRequestBody::Fetch { collection, mut selection, known_matches } => {
429                self.policy_agent.can_access_collection(cdata, &collection)?;
430                let storage_collection = self.collections.get(&collection).await?;
431                selection.predicate = self.policy_agent.filter_predicate(cdata, &collection, selection.predicate)?;
432
433                // Expand initial_states to include entities from known_matches that weren't in the predicate results
434                let expanded_states = crate::util::expand_states::expand_states(
435                    storage_collection.fetch_states(&selection).await?,
436                    known_matches.iter().map(|k| k.entity_id).collect::<Vec<_>>(),
437                    &storage_collection,
438                )
439                .await?;
440
441                let known_map: std::collections::HashMap<_, _> = known_matches.into_iter().map(|k| (k.entity_id, k.head)).collect();
442
443                let mut deltas = Vec::new();
444                for state in expanded_states {
445                    if self.policy_agent.check_read(cdata, &state.payload.entity_id, &collection, &state.payload.state).is_err() {
446                        continue;
447                    }
448
449                    // Generate delta based on known_matches (returns None if heads are equal)
450                    // No need to reconstruct Entity - work directly with EntityState
451                    if let Some(delta) = self.generate_entity_delta(&known_map, state, &storage_collection).await? {
452                        deltas.push(delta);
453                    }
454                }
455                Ok(proto::NodeResponseBody::Fetch(deltas))
456            }
457            proto::NodeRequestBody::Get { collection, ids } => {
458                self.policy_agent.can_access_collection(cdata, &collection)?;
459                let storage_collection = self.collections.get(&collection).await?;
460
461                // filter out any that the policy agent says we don't have access to
462                let mut states = Vec::new();
463                for state in storage_collection.get_states(ids).await? {
464                    match self.policy_agent.check_read(cdata, &state.payload.entity_id, &collection, &state.payload.state) {
465                        Ok(_) => states.push(state),
466                        Err(AccessDenied::ByPolicy(_)) => {}
467                        // TODO: we need to have a cleaner delineation between actual access denied versus processing errors
468                        Err(e) => return Err(anyhow!("Error from peer get: {}", e)),
469                    }
470                }
471
472                Ok(proto::NodeResponseBody::Get(states))
473            }
474            proto::NodeRequestBody::GetEvents { collection, event_ids } => {
475                self.policy_agent.can_access_collection(cdata, &collection)?;
476                let storage_collection = self.collections.get(&collection).await?;
477
478                // filter out any that the policy agent says we don't have access to
479                let mut events = Vec::new();
480                for event in storage_collection.get_events(event_ids).await? {
481                    match self.policy_agent.check_read_event(cdata, &event) {
482                        Ok(_) => events.push(event),
483                        Err(AccessDenied::ByPolicy(_)) => {}
484                        // TODO: we need to have a cleaner delineation between actual access denied versus processing errors
485                        Err(e) => return Err(anyhow!("Error from peer subscription: {}", e)),
486                    }
487                }
488
489                Ok(proto::NodeResponseBody::GetEvents(events))
490            }
491            proto::NodeRequestBody::SubscribeQuery { query_id, collection, selection, version, known_matches } => {
492                let peer_state = self.peer_connections.get(&request.from).ok_or_else(|| anyhow!("Peer {} not connected", request.from))?;
493                // only one cdata is permitted for SubscribePredicate
494                use itertools::Itertools;
495                let cdata = cdata.iterable().exactly_one().map_err(|_| anyhow!("Only one cdata is permitted for SubscribePredicate"))?;
496                peer_state.subscription_handler.subscribe_query(self, query_id, collection, selection, cdata, version, known_matches).await
497            }
498        }
499    }
500
501    async fn handle_update(&self, notification: proto::NodeUpdate) -> anyhow::Result<()> {
502        let Some(_connection) = self.peer_connections.get(&notification.from) else {
503            return Err(anyhow!("Rejected notification from unknown node {}", notification.from));
504        };
505
506        match notification.body {
507            proto::NodeUpdateBody::SubscriptionUpdate { items } => {
508                tracing::debug!("Node({}) received subscription update from peer {}", self.id, notification.from);
509                crate::node_applier::NodeApplier::apply_updates(self, &notification.from, items).await?;
510                Ok(())
511            }
512        }
513    }
514
515    pub(crate) async fn relay_to_required_peers(
516        &self,
517        cdata: &PA::ContextData,
518        id: proto::TransactionId,
519        events: &[Attested<proto::Event>],
520    ) -> Result<(), MutationError> {
521        // TODO determine how many durable peers need to respond before we can proceed. The others should continue in the background.
522        // as of this writing, we only have one durable peer, so we can just await the response from "all" of them
523        for peer_id in self.get_durable_peers() {
524            match self.request(peer_id, cdata, proto::NodeRequestBody::CommitTransaction { id: id.clone(), events: events.to_vec() }).await
525            {
526                Ok(proto::NodeResponseBody::CommitComplete { .. }) => (),
527                Ok(proto::NodeResponseBody::Error(e)) => {
528                    return Err(MutationError::General(Box::new(std::io::Error::other(format!("Peer {} rejected: {}", peer_id, e)))));
529                }
530                _ => {
531                    return Err(MutationError::General(Box::new(std::io::Error::other(format!(
532                        "Peer {} returned unexpected response",
533                        peer_id
534                    )))));
535                }
536            }
537        }
538        Ok(())
539    }
540
541    /// Does all the things necessary to commit a remote transaction
542    pub async fn commit_remote_transaction(
543        &self,
544        cdata: &PA::ContextData,
545        id: proto::TransactionId,
546        mut events: Vec<Attested<proto::Event>>,
547    ) -> Result<(), MutationError> {
548        debug!("{self} commiting transaction {id} with {} events", events.len());
549        let mut changes = Vec::new();
550
551        for event in events.iter_mut() {
552            let collection = self.collections.get(&event.payload.collection).await?;
553
554            // When applying an event, we should only look at the local storage for the lineage
555            let retriever = LocalRetriever::new(collection.clone());
556            let entity = self.entities.get_retrieve_or_create(&retriever, &event.payload.collection, &event.payload.entity_id).await?;
557
558            // Handle creates vs updates differently for policy validation
559            let (entity_before, entity_after, already_applied) = if event.payload.is_entity_create() && entity.head().is_empty() {
560                // Create: apply to entity directly, use as both before/after
561                entity.apply_event(&retriever, &event.payload).await?;
562                (entity.clone(), entity.clone(), true)
563            } else {
564                // Update: snapshot, apply to fork for validation
565                use std::sync::atomic::AtomicBool;
566                let trx_alive = Arc::new(AtomicBool::new(true));
567                let forked = entity.snapshot(trx_alive);
568                forked.apply_event(&retriever, &event.payload).await?;
569                (entity.clone(), forked, false)
570            };
571
572            // Check policy with before/after states
573            if let Some(attestation) = self.policy_agent.check_event(self, cdata, &entity_before, &entity_after, &event.payload)? {
574                event.attestations.push(attestation);
575            }
576
577            // For updates only: apply event to real entity (creates already applied above)
578            let applied = if already_applied { true } else { entity.apply_event(&retriever, &event.payload).await? };
579
580            if applied {
581                let state = entity.to_state()?;
582                let entity_state = EntityState { entity_id: entity.id(), collection: entity.collection().clone(), state };
583                let attestation = self.policy_agent.attest_state(self, &entity_state);
584                let attested = Attested::opt(entity_state, attestation);
585                collection.add_event(event).await?;
586                collection.set_state(attested).await?;
587                changes.push(EntityChange::new(entity.clone(), vec![event.clone()])?);
588            }
589        }
590
591        self.reactor.notify_change(changes).await;
592
593        Ok(())
594    }
595
596    /// Generate EntityDelta for an entity state, using known_matches to decide between StateSnapshot and EventBridge
597    /// Returns None if the entity is in known_matches with equal heads (client already has current state)
598    pub(crate) async fn generate_entity_delta(
599        &self,
600        known_map: &std::collections::HashMap<proto::EntityId, proto::Clock>,
601        entity_state: proto::Attested<proto::EntityState>,
602        storage_collection: &crate::storage::StorageCollectionWrapper,
603    ) -> anyhow::Result<Option<proto::EntityDelta>>
604    where
605        SE: StorageEngine + Send + Sync + 'static,
606        PA: PolicyAgent + Send + Sync + 'static,
607    {
608        // Destructure to take ownership and avoid clones
609        let proto::Attested { payload: proto::EntityState { entity_id, collection, state }, attestations } = entity_state;
610        let current_head = &state.head;
611
612        // Entity is in known_matches - try to optimize the response
613        if let Some(known_head) = known_map.get(&entity_id) {
614            // Case 1: Heads equal → return None (omit entity, client already has current state) ✓
615            if known_head == current_head {
616                return Ok(None);
617            }
618
619            // Case 2: Heads differ → try to build EventBridge (cheaper than full state) ✓
620            match self.collect_event_bridge(storage_collection, known_head, current_head).await {
621                Ok(attested_events) if !attested_events.is_empty() => {
622                    // Convert Attested<Event> to EventFragments (strips entity_id and collection)
623                    let event_fragments: Vec<proto::EventFragment> = attested_events.into_iter().map(|e| e.into()).collect();
624
625                    return Ok(Some(proto::EntityDelta {
626                        entity_id,
627                        collection,
628                        content: proto::DeltaContent::EventBridge { events: event_fragments },
629                    }));
630                }
631                _ => {
632                    // Fall through to StateSnapshot if bridge building failed or returned empty
633                }
634            }
635        }
636
637        // Case 3: Entity not in known_matches OR bridge building failed → send full StateSnapshot ✓
638        let state_fragment = proto::StateFragment { state, attestations };
639        Ok(Some(proto::EntityDelta { entity_id, collection, content: proto::DeltaContent::StateSnapshot { state: state_fragment } }))
640    }
641
642    /// Collect events between known_head and current_head using lineage comparison
643    pub(crate) async fn collect_event_bridge(
644        &self,
645        storage_collection: &crate::storage::StorageCollectionWrapper,
646        known_head: &proto::Clock,
647        current_head: &proto::Clock,
648    ) -> anyhow::Result<Vec<proto::Attested<proto::Event>>>
649    where
650        SE: StorageEngine + Send + Sync + 'static,
651        PA: PolicyAgent + Send + Sync + 'static,
652    {
653        use crate::lineage::{EventAccumulator, Ordering};
654        use crate::retrieval::LocalRetriever;
655
656        let retriever = LocalRetriever::new(storage_collection.clone());
657        let accumulator = EventAccumulator::new(None); // No limit for Phase 1
658        let mut comparison = crate::lineage::Comparison::new_with_accumulator(
659            &retriever,
660            current_head,
661            known_head,
662            100000, // TODO: make budget configurable
663            Some(accumulator),
664        );
665
666        // Run comparison
667        loop {
668            match comparison.step().await? {
669                Some(Ordering::Descends) => {
670                    // Current descends from known - perfect for event bridge
671                    break;
672                }
673                Some(Ordering::Equal) => {
674                    // Heads are equal - no events needed
675                    break;
676                }
677                Some(_) => {
678                    // Other relationships (NotDescends, Incomparable, etc.) - can't build bridge
679                    return Ok(vec![]);
680                }
681                None => {
682                    // Continue stepping
683                }
684            }
685        }
686
687        // Extract accumulated events
688        Ok(comparison.take_accumulated_events().unwrap_or_default())
689    }
690
691    pub fn next_entity_id(&self) -> proto::EntityId { proto::EntityId::new() }
692
693    pub fn context(&self, data: PA::ContextData) -> Result<Context, anyhow::Error> {
694        if !self.system.is_system_ready() {
695            return Err(anyhow!("System is not ready"));
696        }
697        Ok(Context::new(Node::clone(self), data))
698    }
699
700    pub async fn context_async(&self, data: PA::ContextData) -> Context {
701        self.system.wait_system_ready().await;
702        Context::new(Node::clone(self), data)
703    }
704
705    pub(crate) async fn get_from_peer(
706        &self,
707        collection_id: &CollectionId,
708        ids: Vec<proto::EntityId>,
709        cdata: &PA::ContextData,
710    ) -> Result<(), RetrievalError> {
711        let peer_id = self.get_durable_peer_random().ok_or(RetrievalError::NoDurablePeers)?;
712
713        match self
714            .request(peer_id, cdata, proto::NodeRequestBody::Get { collection: collection_id.clone(), ids })
715            .await
716            .map_err(|e| RetrievalError::Other(format!("{:?}", e)))?
717        {
718            proto::NodeResponseBody::Get(states) => {
719                let collection = self.collections.get(collection_id).await?;
720
721                // do we have the ability to merge states?
722                // because that's what we have to do I think
723                for state in states {
724                    self.policy_agent.validate_received_state(self, &peer_id, &state)?;
725                    collection.set_state(state).await.map_err(|e| RetrievalError::Other(format!("{:?}", e)))?;
726                }
727                Ok(())
728            }
729            proto::NodeResponseBody::Error(e) => {
730                debug!("Error from peer fetch: {}", e);
731                Err(RetrievalError::Other(format!("{:?}", e)))
732            }
733            _ => {
734                debug!("Unexpected response type from peer get");
735                Err(RetrievalError::Other("Unexpected response type".to_string()))
736            }
737        }
738    }
739
740    /// Get a random durable peer node ID
741    pub fn get_durable_peer_random(&self) -> Option<proto::EntityId> {
742        let mut rng = rand::thread_rng();
743        // Convert to Vec since DashSet iterator doesn't support random selection
744        let peers: Vec<_> = self.durable_peers.to_vec();
745        peers.choose(&mut rng).copied()
746    }
747
748    /// Get all durable peer node IDs
749    pub fn get_durable_peers(&self) -> Vec<proto::EntityId> { self.durable_peers.to_vec() }
750}
751
752impl<SE, PA> NodeInner<SE, PA>
753where
754    SE: StorageEngine + Send + Sync + 'static,
755    PA: PolicyAgent + Send + Sync + 'static,
756{
757    pub async fn request_remote_unsubscribe(&self, query_id: proto::QueryId, peers: Vec<proto::EntityId>) -> anyhow::Result<()> {
758        for (peer_id, item) in self.peer_connections.get_list(peers) {
759            if let Some(connection) = item {
760                connection.send_message(proto::NodeMessage::UnsubscribeQuery { from: peer_id, query_id })?;
761            } else {
762                warn!("Peer {} not connected", peer_id);
763            }
764        }
765
766        Ok(())
767    }
768}
769
770impl<SE, PA> Drop for NodeInner<SE, PA>
771where PA: PolicyAgent
772{
773    fn drop(&mut self) {
774        notice_info!("Node({}) dropped", self.id);
775    }
776}
777
778impl<SE, PA> Node<SE, PA>
779where
780    SE: StorageEngine + Send + Sync + 'static,
781    PA: PolicyAgent + Send + Sync + 'static,
782{
783    pub(crate) fn subscribe_remote_query(
784        &self,
785        query_id: proto::QueryId,
786        collection_id: CollectionId,
787        selection: ankql::ast::Selection,
788        cdata: PA::ContextData,
789        version: u32,
790        livequery: crate::livequery::WeakEntityLiveQuery,
791    ) {
792        if let Some(ref relay) = self.subscription_relay {
793            // Resolve types in the AST (converts literals for JSON path comparisons)
794            let selection = self.type_resolver.resolve_selection_types(selection);
795            self.predicate_context.insert(query_id, cdata.clone());
796            relay.subscribe_query(query_id, collection_id, selection, cdata, version, livequery);
797        }
798    }
799
800    pub async fn fetch_entities_from_local(
801        &self,
802        collection_id: &CollectionId,
803        selection: &ankql::ast::Selection,
804    ) -> Result<Vec<Entity>, RetrievalError> {
805        let storage_collection = self.collections.get(collection_id).await?;
806        let initial_states = storage_collection.fetch_states(selection).await?;
807        let retriever = crate::retrieval::LocalRetriever::new(storage_collection);
808        let mut entities = Vec::with_capacity(initial_states.len());
809        for state in initial_states {
810            let (_, entity) =
811                self.entities.with_state(&retriever, state.payload.entity_id, collection_id.clone(), state.payload.state).await?;
812            entities.push(entity);
813        }
814        Ok(entities)
815    }
816}
817#[async_trait::async_trait]
818pub trait TNodeErased<E: AbstractEntity + Filterable + Send + 'static = Entity>: Send + Sync + 'static {
819    fn unsubscribe_remote_predicate(&self, query_id: proto::QueryId);
820    fn update_remote_query(&self, query_id: proto::QueryId, selection: ankql::ast::Selection, version: u32) -> Result<(), anyhow::Error>;
821    async fn fetch_entities_from_local(
822        &self,
823        collection_id: &CollectionId,
824        selection: &ankql::ast::Selection,
825    ) -> Result<Vec<E>, RetrievalError>;
826    fn reactor(&self) -> &Reactor<E>;
827    fn has_subscription_relay(&self) -> bool;
828}
829
830#[async_trait::async_trait]
831impl<SE, PA> TNodeErased<Entity> for Node<SE, PA>
832where
833    SE: StorageEngine + Send + Sync + 'static,
834    PA: PolicyAgent + Send + Sync + 'static,
835{
836    fn unsubscribe_remote_predicate(&self, query_id: proto::QueryId) {
837        // Clean up subscription context
838        self.predicate_context.remove(&query_id);
839
840        // Notify subscription relay for remote cleanup
841        if let Some(ref relay) = self.subscription_relay {
842            relay.unsubscribe_predicate(query_id);
843        }
844    }
845
846    fn update_remote_query(&self, query_id: proto::QueryId, selection: ankql::ast::Selection, version: u32) -> Result<(), anyhow::Error> {
847        if let Some(ref relay) = self.subscription_relay {
848            // Resolve types in the AST (converts literals for JSON path comparisons)
849            let selection = self.type_resolver.resolve_selection_types(selection);
850            relay.update_query(query_id, selection, version)?;
851        }
852        Ok(())
853    }
854
855    async fn fetch_entities_from_local(
856        &self,
857        collection_id: &CollectionId,
858        selection: &ankql::ast::Selection,
859    ) -> Result<Vec<Entity>, RetrievalError> {
860        Node::fetch_entities_from_local(self, collection_id, selection).await
861    }
862
863    fn reactor(&self) -> &Reactor<Entity> { &self.0.reactor }
864
865    fn has_subscription_relay(&self) -> bool { self.subscription_relay.is_some() }
866}
867
868impl<SE, PA> fmt::Display for Node<SE, PA>
869where PA: PolicyAgent
870{
871    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
872        // bold blue, dimmed brackets
873        write!(f, "\x1b[1;34mnode\x1b[2m[\x1b[1;34m{}\x1b[2m]\x1b[0m", self.id.to_base64_short())
874    }
875}