use crate::client::real_messages_control::acknowledgement_control::PendingAcknowledgement;
use crate::client::real_messages_control::message_handler::{
FragmentWithMaxRetransmissions, MessageHandler, PreparationError,
};
use crate::client::replies::reply_controller::Config;
use crate::client::replies::reply_controller::key_rotation_helpers::SurbRefreshState;
use crate::client::topology_control::TopologyAccessor;
use crate::client::transmission_buffer::TransmissionBuffer;
use futures::channel::oneshot;
use nym_client_core_surb_storage::{ReceivedReplySurb, ReceivedReplySurbsMap};
use nym_crypto::aes::cipher::crypto_common::rand_core::CryptoRng;
use nym_sphinx::anonymous_replies::ReplySurbWithKeyRotation;
use nym_sphinx::anonymous_replies::requests::AnonymousSenderTag;
use nym_sphinx::chunking::fragment::FragmentIdentifier;
use nym_task::connections::{ConnectionId, TransmissionLane};
use nym_topology::NymTopologyMetadata;
use rand::Rng;
use std::cmp::{max, min};
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::mem;
use std::sync::{Arc, Weak};
use time::OffsetDateTime;
use tracing::{debug, error, info, trace, warn};
struct SenderData {
current_clear_rerequest_counter: usize,
pending_replies: TransmissionBuffer<FragmentWithMaxRetransmissions>,
pending_retransmissions: BTreeMap<FragmentIdentifier, Weak<PendingAcknowledgement>>,
last_request_failure: OffsetDateTime,
}
impl Default for SenderData {
fn default() -> Self {
SenderData {
current_clear_rerequest_counter: 0,
pending_replies: Default::default(),
pending_retransmissions: Default::default(),
last_request_failure: OffsetDateTime::UNIX_EPOCH,
}
}
}
impl SenderData {
fn total_pending(&self) -> usize {
let pending_replies = self.pending_replies.total_size();
let pending_retransmissions = self.pending_retransmissions.len();
let total_pending = pending_retransmissions + pending_replies;
debug!(
"total queue size: {total_pending} = pending data {pending_replies} + pending retransmission {pending_retransmissions}"
);
total_pending
}
pub(crate) fn increment_current_clear_rerequest_counter(&mut self) {
self.current_clear_rerequest_counter += 1;
}
pub(crate) fn reset_current_clear_rerequest_counter(&mut self) {
self.current_clear_rerequest_counter = 0;
}
pub(crate) fn reset_last_request_failure(&mut self, now: OffsetDateTime) -> OffsetDateTime {
mem::replace(&mut self.last_request_failure, now)
}
}
pub struct ReceiverReplyController<R> {
config: Config,
surb_refresh_state: SurbRefreshState,
topology_access: TopologyAccessor,
surb_senders: HashMap<AnonymousSenderTag, SenderData>,
unavailable: HashMap<AnonymousSenderTag, OffsetDateTime>,
surbs_storage: ReceivedReplySurbsMap,
message_handler: MessageHandler<R>,
}
impl<R> ReceiverReplyController<R>
where
R: CryptoRng + Rng,
{
pub(crate) fn new(
config: Config,
storage: ReceivedReplySurbsMap,
message_handler: MessageHandler<R>,
) -> Self {
let topology_access = message_handler.topology_access_handle().clone();
ReceiverReplyController {
config,
surb_refresh_state: SurbRefreshState::WaitingForNextRotation {
last_known: config
.key_rotation
.expected_current_key_rotation_id(OffsetDateTime::now_utc()),
},
topology_access,
surb_senders: Default::default(),
unavailable: Default::default(),
surbs_storage: storage,
message_handler,
}
}
fn get_or_create_surb_sender(&mut self, tag: &AnonymousSenderTag) -> &mut SenderData {
self.surb_senders.entry(*tag).or_default()
}
async fn current_topology_metadata(&self) -> Option<NymTopologyMetadata> {
self.topology_access.current_metadata().await
}
fn insert_pending_replies<I: IntoIterator<Item = FragmentWithMaxRetransmissions>>(
&mut self,
recipient: &AnonymousSenderTag,
fragments: I,
lane: TransmissionLane,
) {
trace!("buffering pending replies for {recipient}");
self.surb_senders
.entry(*recipient)
.or_default()
.pending_replies
.store(&lane, fragments)
}
fn re_insert_pending_replies(
&mut self,
recipient: &AnonymousSenderTag,
fragments: Vec<(TransmissionLane, FragmentWithMaxRetransmissions)>,
) {
trace!("re-inserting pending replies for {recipient}");
self.surb_senders
.entry(*recipient)
.or_default()
.pending_replies
.store_multiple(fragments)
}
fn re_insert_pending_retransmission(
&mut self,
recipient: &AnonymousSenderTag,
data: Vec<Arc<PendingAcknowledgement>>,
) {
trace!("re-inserting pending retransmissions for {recipient}");
#[allow(clippy::expect_used)]
let map_entry = &mut self
.surb_senders
.get_mut(recipient)
.expect("our pending retransmission entry is somehow gone!")
.pending_retransmissions;
for pending in data {
if Arc::strong_count(&pending) > 1 {
let id = pending.inner_fragment_identifier();
let downgraded = Arc::downgrade(&pending);
map_entry.insert(id, downgraded);
}
}
}
fn should_request_more_surbs(&self, target: &AnonymousSenderTag) -> bool {
trace!("checking if we should request more surbs from {target}");
let total_queue = self
.surb_senders
.get(target)
.map(|pending| pending.total_pending())
.unwrap_or_default();
let available_surbs = self.surbs_storage.available_fresh_surbs(target);
let pending_surbs = self.surbs_storage.pending_reception(target) as usize;
let min_surbs_threshold = self.surbs_storage.min_surb_threshold();
let max_surbs_threshold = self.surbs_storage.max_surb_threshold();
let min_surbs_threshold_buffer =
self.config.reply_surbs.minimum_reply_surb_threshold_buffer;
let target_surbs_after_clearing_queue = min_surbs_threshold + min_surbs_threshold_buffer;
let total_required_surbs = total_queue + target_surbs_after_clearing_queue;
let total_available_surbs = pending_surbs + available_surbs;
debug!(
"available surbs: {available_surbs} pending surbs: {pending_surbs} threshold range: {min_surbs_threshold}..+{min_surbs_threshold_buffer}..{max_surbs_threshold}"
);
let is_below_max_threshold = total_available_surbs < max_surbs_threshold;
let is_below_required_surbs = total_available_surbs < total_required_surbs;
is_below_max_threshold && is_below_required_surbs
}
pub(crate) async fn handle_send_reply(
&mut self,
recipient_tag: AnonymousSenderTag,
data: Vec<u8>,
lane: TransmissionLane,
max_retransmissions: Option<u32>,
) {
if !self.surbs_storage.contains_surbs_for(&recipient_tag) {
if self
.unavailable
.insert(recipient_tag, OffsetDateTime::now_utc())
.is_none()
{
warn!(
"received reply request for {recipient_tag} but we don't have any surbs stored for that recipient!"
);
} else {
trace!(
"received reply request for {recipient_tag} but we don't have any surbs stored for that recipient!"
);
}
return;
}
trace!("handling reply to {recipient_tag}");
let mut fragments = self.message_handler.split_reply_message(data);
let total_size = fragments.len();
trace!("This reply requires {total_size} SURBs");
let available_surbs = self.surbs_storage.available_surbs(&recipient_tag);
let min_surbs_threshold = self.surbs_storage.min_surb_threshold();
let max_to_send = if available_surbs > min_surbs_threshold {
min(fragments.len(), available_surbs - min_surbs_threshold)
} else {
0
};
if max_to_send > 0 {
let (surbs, surbs_left) = self
.surbs_storage
.get_reply_surbs(&recipient_tag, max_to_send);
debug!(
"retrieved {} reply surbs. {surbs_left} surbs remaining in storage",
surbs.as_ref().map(|s| s.len()).unwrap_or_default()
);
if let Some(reply_surbs) = surbs {
let to_send = fragments
.drain(..reply_surbs.len())
.map(|f| FragmentWithMaxRetransmissions {
fragment: f,
max_retransmissions,
})
.collect::<Vec<_>>();
if let Err(err) = self
.message_handler
.try_send_reply_chunks_on_lane(
recipient_tag,
to_send.clone(),
reply_surbs,
lane,
)
.await
{
let err = err.return_unused_surbs(&self.surbs_storage, &recipient_tag);
warn!("failed to send reply to {recipient_tag}: {err}");
info!(
"buffering {no_fragments} fragments for {recipient_tag}",
no_fragments = to_send.len()
);
self.insert_pending_replies(&recipient_tag, to_send, lane);
}
}
}
if !fragments.is_empty() {
debug!(
"buffering {no_fragments} fragments for {recipient_tag}",
no_fragments = fragments.len()
);
let fragments: Vec<_> = fragments
.into_iter()
.map(|fragment| FragmentWithMaxRetransmissions {
fragment,
max_retransmissions,
})
.collect();
self.insert_pending_replies(&recipient_tag, fragments, lane);
}
if self.should_request_more_surbs(&recipient_tag) {
self.request_reply_surbs_for_queue_clearing(recipient_tag)
.await;
}
}
async fn request_additional_reply_surbs(
&mut self,
target: AnonymousSenderTag,
amount: u32,
) -> Result<(), PreparationError> {
debug!("requesting {amount} additional reply surbs for {target}");
let (reply_surb, _) = self
.surbs_storage
.get_reply_surb_ignoring_threshold(&target);
let reply_surb = reply_surb.ok_or(PreparationError::NotEnoughSurbs {
available: 0,
required: 1,
})?;
if let Err(err) = self
.message_handler
.try_request_additional_reply_surbs(target, reply_surb, amount)
.await
{
let err = err.return_unused_surbs(&self.surbs_storage, &target);
warn!("failed to request additional surbs from {target}: {err}",);
return Err(err);
} else {
self.surbs_storage
.increment_pending_reception(&target, amount);
}
Ok(())
}
async fn try_clear_pending_retransmission(&mut self, target: AnonymousSenderTag) {
trace!("trying to clear pending retransmission queue");
let available_surbs = self.surbs_storage.available_surbs(&target);
let min_surbs_threshold = self.surbs_storage.min_surb_threshold();
let max_to_clear = if available_surbs > min_surbs_threshold {
available_surbs - min_surbs_threshold
} else {
trace!("we don't have enough surbs for retransmission queue clearing...");
return;
};
trace!("we can clear up to {max_to_clear} entries");
let Some(pending) = self.surb_senders.get_mut(&target) else {
trace!("no pending entry for {target}!");
return;
};
let mut to_take = Vec::new();
while to_take.len() < max_to_clear {
if let Some((_, data)) = pending.pending_retransmissions.pop_first() {
if let Some(upgraded) = data.upgrade() {
to_take.push(upgraded)
}
} else {
break;
}
}
if to_take.is_empty() {
return;
}
let (surbs_for_reply, _) = self.surbs_storage.get_reply_surbs(&target, to_take.len());
let Some(surbs_for_reply) = surbs_for_reply else {
error!(
"somehow different task has stolen our reply surbs! - this should have been impossible"
);
self.re_insert_pending_retransmission(&target, to_take);
return;
};
let to_send_vec = to_take.iter().map(|ack| ack.fragment_data()).collect();
let prepared_fragments = match self
.message_handler
.prepare_reply_chunks_for_sending(to_send_vec, surbs_for_reply)
.await
{
Ok(prepared) => prepared,
Err(err) => {
let err = err.return_unused_surbs(&self.surbs_storage, &target);
self.re_insert_pending_retransmission(&target, to_take);
warn!("failed to clear pending retransmission queue for {target}: {err}",);
return;
}
};
drop(to_take);
self.message_handler
.send_retransmission_reply_chunks(prepared_fragments, TransmissionLane::Retransmission)
.await;
}
fn pop_at_most_pending_replies(
&mut self,
from: &AnonymousSenderTag,
amount: usize,
) -> Option<Vec<(TransmissionLane, FragmentWithMaxRetransmissions)>> {
let pending = self.surb_senders.get_mut(from)?;
let total = pending.pending_replies.total_size();
trace!("pending queue has {total} elements");
if total == 0 {
return None;
}
pending
.pending_replies
.pop_at_most_n_next_messages_at_random(amount)
}
#[allow(clippy::panic)]
async fn try_clear_pending_queue(&mut self, target: AnonymousSenderTag) {
trace!("trying to clear pending queue");
let available_surbs = self.surbs_storage.available_surbs(&target);
let min_surbs_threshold = self.surbs_storage.min_surb_threshold();
let max_to_clear = if available_surbs > min_surbs_threshold {
available_surbs - min_surbs_threshold
} else {
trace!("we don't have enough surbs for queue clearing...");
return;
};
trace!("we can clear up to {max_to_clear} entries");
if let Some(to_send) = self.pop_at_most_pending_replies(&target, max_to_clear) {
let to_send_clone = to_send.clone();
if to_send_clone.is_empty() {
panic!(
"please let the devs know if you ever see this message (reply_controller.rs)"
);
}
let (surbs_for_reply, _) = self
.surbs_storage
.get_reply_surbs(&target, to_send_clone.len());
let Some(surbs_for_reply) = surbs_for_reply else {
error!(
"somehow different task has stolen our reply surbs! - this should have been impossible"
);
self.re_insert_pending_replies(&target, to_send);
return;
};
if let Err(err) = self
.message_handler
.try_send_reply_chunks(target, to_send_clone, surbs_for_reply)
.await
{
let err = err.return_unused_surbs(&self.surbs_storage, &target);
self.re_insert_pending_replies(&target, to_send);
warn!("failed to clear pending queue for {target}: {err}");
}
} else {
trace!("the pending queue is empty");
}
}
fn reset_rerequest_counter(&mut self, from: &AnonymousSenderTag) {
if let Some(pending) = self.surb_senders.get_mut(from) {
pending.reset_current_clear_rerequest_counter()
}
}
pub(crate) async fn handle_received_surbs(
&mut self,
from: AnonymousSenderTag,
reply_surbs: Vec<ReplySurbWithKeyRotation>,
from_surb_request: bool,
) {
trace!("handling received surbs");
if from_surb_request {
self.surbs_storage
.decrement_pending_reception(&from, reply_surbs.len() as u32);
}
self.surbs_storage.insert_fresh_surbs(&from, reply_surbs);
self.reset_rerequest_counter(&from);
self.try_clear_pending_retransmission(from).await;
self.try_clear_pending_queue(from).await;
if self.should_request_more_surbs(&from) {
self.request_reply_surbs_for_queue_clearing(from).await;
}
}
fn buffer_pending_ack(
&mut self,
recipient: AnonymousSenderTag,
ack_ref: Arc<PendingAcknowledgement>,
weak_ack_ref: Weak<PendingAcknowledgement>,
) {
let frag_id = ack_ref.inner_fragment_identifier();
let pending = self.surb_senders.entry(recipient).or_default();
if let Entry::Vacant(e) = pending.pending_retransmissions.entry(frag_id) {
e.insert(weak_ack_ref);
} else {
warn!(
"we're already trying to retransmit {frag_id}. We must be really behind in surbs!"
);
}
}
pub(crate) async fn handle_reply_retransmission(
&mut self,
recipient_tag: AnonymousSenderTag,
timed_out_ack: Weak<PendingAcknowledgement>,
extra_surbs_request: bool,
) {
let ack_ref = match timed_out_ack.upgrade() {
Some(ack) => ack,
None => {
debug!(
"we received the ack for one of the reply packets as we were putting it in the retransmission queue"
);
return;
}
};
let (maybe_reply_surb, _) = if extra_surbs_request {
self.surbs_storage
.get_reply_surb_ignoring_threshold(&recipient_tag)
} else {
self.surbs_storage.get_reply_surb(&recipient_tag)
};
if let Some(reply_surb) = maybe_reply_surb {
match self
.message_handler
.try_prepare_single_reply_chunk_for_sending(reply_surb, ack_ref.fragment_data())
.await
{
Ok(prepared) => {
drop(ack_ref);
self.message_handler
.update_ack_delay(prepared.fragment_identifier, prepared.total_delay);
self.message_handler
.forward_messages(vec![prepared.into()], TransmissionLane::Retransmission)
.await;
}
Err(err) => {
let err = err.return_unused_surbs(&self.surbs_storage, &recipient_tag);
warn!("failed to prepare message for retransmission - {err}");
self.buffer_pending_ack(recipient_tag, ack_ref, timed_out_ack);
if self.should_request_more_surbs(&recipient_tag) {
self.request_reply_surbs_for_queue_clearing(recipient_tag)
.await;
}
}
};
} else {
self.buffer_pending_ack(recipient_tag, ack_ref, timed_out_ack);
if self.should_request_more_surbs(&recipient_tag) {
self.request_reply_surbs_for_queue_clearing(recipient_tag)
.await;
}
}
}
pub(crate) fn handle_lane_queue_length(
&self,
connection_id: ConnectionId,
response_channel: oneshot::Sender<usize>,
) {
let lane = TransmissionLane::ConnectionId(connection_id);
for buf in self.surb_senders.values().map(|p| &p.pending_replies) {
if let Some(length) = buf.lane_length(&lane) {
if response_channel.send(length).is_err() {
error!("the requester for lane queue length has dropped the response channel!")
}
return;
}
}
if response_channel.send(0).is_err() {
error!("the requester for lane queue length has dropped the response channel!")
}
}
async fn request_reply_surbs_for_queue_clearing(&mut self, target: AnonymousSenderTag) {
trace!("requesting surbs for queue clearing");
let total_queue = self
.surb_senders
.get(&target)
.map(|pending| pending.total_pending() as u32)
.unwrap_or_default();
let min_surbs_buffer = self.config.reply_surbs.minimum_reply_surb_threshold_buffer as u32;
let total_queue_with_buffer = total_queue + min_surbs_buffer;
let request_size = min(
self.config.reply_surbs.maximum_reply_surb_request_size,
max(
total_queue_with_buffer,
self.config.reply_surbs.minimum_reply_surb_request_size,
),
);
if let Err(err) = self
.request_additional_reply_surbs(target, request_size)
.await
{
let now = OffsetDateTime::now_utc();
let sender_info = self.get_or_create_surb_sender(&target);
let last_failure = sender_info.reset_last_request_failure(now);
if now - last_failure > time::Duration::seconds(30) {
warn!(
"failed to request more surbs to clear pending queue of size {total_queue} (attempted to request: {request_size}): {err}"
)
} else {
debug!(
"failed to request more surbs to clear pending queue of size {total_queue} (attempted to request: {request_size}): {err}"
)
}
}
}
pub(crate) async fn inspect_stale_pending_data(&mut self) {
let mut to_request = Vec::new();
let mut to_remove = Vec::new();
let now = OffsetDateTime::now_utc();
for (pending_reply_target, vals) in self.surb_senders.iter_mut() {
let retransmission_buf = &vals.pending_replies;
if retransmission_buf.is_empty() {
continue;
}
let Some(last_received_time) = self
.surbs_storage
.surbs_last_received_at(pending_reply_target)
else {
error!(
"we have {} pending replies for {pending_reply_target}, but we somehow never received any reply surbs from them!",
retransmission_buf.total_size()
);
to_remove.push(*pending_reply_target);
continue;
};
let diff = now - last_received_time;
let max_rerequest_wait = self
.config
.reply_surbs
.maximum_reply_surb_rerequest_waiting_period;
let max_drop_wait = self
.config
.reply_surbs
.maximum_reply_surb_drop_waiting_period;
let max_rerequests = self.config.reply_surbs.maximum_reply_surbs_rerequests;
if vals.current_clear_rerequest_counter > max_rerequests {
to_remove.push(*pending_reply_target);
debug!(
"we have reached the maximum threshold of attempting to request surbs from {pending_reply_target}. dropping the sender"
);
continue;
}
if diff > max_rerequest_wait {
if diff > max_drop_wait {
to_remove.push(*pending_reply_target)
} else {
debug!(
"We haven't received any surbs in {} from {pending_reply_target}. Going to explicitly ask for more",
humantime::format_duration(diff.unsigned_abs())
);
vals.increment_current_clear_rerequest_counter();
to_request.push(*pending_reply_target);
}
}
}
for pending_reply_target in to_request {
self.request_reply_surbs_for_queue_clearing(pending_reply_target)
.await;
self.surbs_storage
.reset_pending_reception(&pending_reply_target)
}
for to_remove in to_remove {
self.surb_senders.remove(&to_remove);
}
}
pub(crate) async fn check_surb_refresh(&mut self) {
let Some(current_rotation_id) = self.topology_access.current_key_rotation_id().await else {
warn!("failed to retrieve current key rotation id from the network topology");
return;
};
if let SurbRefreshState::WaitingForNextRotation { last_known } = self.surb_refresh_state {
if last_known == current_rotation_id {
trace!("no changes in key rotation id");
} else {
self.surb_refresh_state = SurbRefreshState::ScheduledForNextInvocation;
}
return;
}
let mut marked_as_stale = HashMap::new();
for mut map_entry in self.surbs_storage.as_raw_iter_mut() {
let (sender, received) = map_entry.pair_mut();
let num_downgraded = received.downgrade_freshness();
trace!("{sender}: {num_downgraded} downgraded");
if num_downgraded != 0 {
marked_as_stale.insert(*sender, num_downgraded);
}
}
for (sender, num_to_request) in marked_as_stale {
if self
.request_additional_reply_surbs(sender, num_to_request as u32)
.await
.is_err()
{
warn!("surb refresh request failed")
}
}
self.surb_refresh_state = SurbRefreshState::WaitingForNextRotation {
last_known: current_rotation_id,
};
}
pub(crate) async fn inspect_and_clear_stale_data(&mut self, now: OffsetDateTime) {
let is_epoch_stuck = self
.current_topology_metadata()
.await
.map(|m| self.config.key_rotation.epoch_stuck(m))
.unwrap_or(false);
let expected_current_key_rotation_start = self
.config
.key_rotation
.expected_current_key_rotation_start(now);
let expected_current_key_rotation = self
.config
.key_rotation
.expected_current_key_rotation_id(now);
let prior_epoch_start =
expected_current_key_rotation_start - self.config.key_rotation.epoch_duration;
let following_epoch_start =
expected_current_key_rotation_start + self.config.key_rotation.epoch_duration;
let basic_surb_retention_logic = |received_surb: &ReceivedReplySurb| {
if is_epoch_stuck {
let diff = now - received_surb.received_at();
return diff < self.config.key_rotation.rotation_lifetime();
}
if received_surb.received_at() < prior_epoch_start {
return false;
}
let surb_rotation = received_surb.key_rotation();
if surb_rotation.is_unknown() {
return true;
}
if surb_rotation.is_even() && expected_current_key_rotation % 2 == 1 {
return false;
}
if surb_rotation.is_odd() && expected_current_key_rotation % 2 == 0 {
return false;
}
true
};
self.surbs_storage.retain(|_, received| {
if is_epoch_stuck {
let diff = now - received.surbs_last_received_at();
return diff < self.config.key_rotation.rotation_lifetime();
}
if received.surbs_last_received_at() < prior_epoch_start {
return false;
}
received.retain_fresh_surbs(&basic_surb_retention_logic);
if now > following_epoch_start {
received.drop_possibly_stale_surbs();
}
received.retain_possibly_stale_surbs(&basic_surb_retention_logic);
let max_drop_wait = self
.config
.reply_surbs
.maximum_reply_surb_drop_waiting_period;
let last_received = received.surbs_last_received_at();
let possibly_abandoned = last_received + max_drop_wait < now;
if received.is_empty() && received.pending_reception() == 0 && possibly_abandoned {
return false;
}
true
});
self.unavailable
.retain(|_, last_reported| now - *last_reported < time::Duration::seconds(30));
}
}