solana_quic_client/nonblocking/
quic_client.rs

1//! Simple nonblocking client that connects to a given UDP port with the QUIC protocol
2//! and provides an interface for sending data which is restricted by the
3//! server's flow control.
4use {
5    async_lock::Mutex,
6    async_trait::async_trait,
7    futures::future::TryFutureExt,
8    log::*,
9    quinn::{
10        crypto::rustls::QuicClientConfig, ClientConfig, ClosedStream, ConnectError, Connection,
11        ConnectionError, Endpoint, EndpointConfig, IdleTimeout, TokioRuntime, TransportConfig,
12        WriteError,
13    },
14    solana_connection_cache::{
15        client_connection::ClientStats, connection_cache_stats::ConnectionCacheStats,
16        nonblocking::client_connection::ClientConnection,
17    },
18    solana_keypair::Keypair,
19    solana_measure::measure::Measure,
20    solana_net_utils::sockets,
21    solana_quic_definitions::{
22        QUIC_CONNECTION_HANDSHAKE_TIMEOUT, QUIC_KEEP_ALIVE, QUIC_MAX_TIMEOUT, QUIC_SEND_FAIRNESS,
23    },
24    solana_rpc_client_api::client_error::ErrorKind as ClientErrorKind,
25    solana_streamer::nonblocking::quic::ALPN_TPU_PROTOCOL_ID,
26    solana_tls_utils::{
27        new_dummy_x509_certificate, socket_addr_to_quic_server_name, tls_client_config_builder,
28        QuicClientCertificate,
29    },
30    solana_transaction_error::TransportResult,
31    std::{
32        net::{SocketAddr, UdpSocket},
33        sync::{atomic::Ordering, Arc},
34        thread,
35    },
36    thiserror::Error,
37    tokio::{sync::OnceCell, time::timeout},
38};
39
40/// A lazy-initialized Quic Endpoint
41pub struct QuicLazyInitializedEndpoint {
42    endpoint: OnceCell<Arc<Endpoint>>,
43    client_certificate: Arc<QuicClientCertificate>,
44    client_endpoint: Option<Endpoint>,
45}
46
47#[derive(Error, Debug)]
48pub enum QuicError {
49    #[error(transparent)]
50    WriteError(#[from] WriteError),
51    #[error(transparent)]
52    ConnectionError(#[from] ConnectionError),
53    #[error(transparent)]
54    ConnectError(#[from] ConnectError),
55    #[error(transparent)]
56    ClosedStream(#[from] ClosedStream),
57}
58
59impl From<QuicError> for ClientErrorKind {
60    fn from(quic_error: QuicError) -> Self {
61        Self::Custom(format!("{quic_error:?}"))
62    }
63}
64
65impl QuicLazyInitializedEndpoint {
66    pub fn new(
67        client_certificate: Arc<QuicClientCertificate>,
68        client_endpoint: Option<Endpoint>,
69    ) -> Self {
70        Self {
71            endpoint: OnceCell::<Arc<Endpoint>>::new(),
72            client_certificate,
73            client_endpoint,
74        }
75    }
76
77    fn create_endpoint(&self) -> Endpoint {
78        let mut endpoint = if let Some(endpoint) = &self.client_endpoint {
79            endpoint.clone()
80        } else {
81            // This will bind to random ports, but VALIDATOR_PORT_RANGE is outside
82            // of the range for CI tests when this is running in CI
83            let client_socket = sockets::bind_in_range_with_config(
84                std::net::IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED),
85                solana_net_utils::VALIDATOR_PORT_RANGE,
86                sockets::SocketConfiguration::default(),
87            )
88            .expect("QuicLazyInitializedEndpoint::create_endpoint bind_in_range")
89            .1;
90            info!("Local endpoint is : {client_socket:?}");
91
92            QuicNewConnection::create_endpoint(EndpointConfig::default(), client_socket)
93        };
94
95        let mut crypto = tls_client_config_builder()
96            .with_client_auth_cert(
97                vec![self.client_certificate.certificate.clone()],
98                self.client_certificate.key.clone_key(),
99            )
100            .expect("Failed to set QUIC client certificates");
101        crypto.enable_early_data = true;
102        crypto.alpn_protocols = vec![ALPN_TPU_PROTOCOL_ID.to_vec()];
103
104        let mut config = ClientConfig::new(Arc::new(QuicClientConfig::try_from(crypto).unwrap()));
105        let mut transport_config = TransportConfig::default();
106
107        let timeout = IdleTimeout::try_from(QUIC_MAX_TIMEOUT).unwrap();
108        transport_config.max_idle_timeout(Some(timeout));
109        transport_config.keep_alive_interval(Some(QUIC_KEEP_ALIVE));
110        transport_config.send_fairness(QUIC_SEND_FAIRNESS);
111        config.transport_config(Arc::new(transport_config));
112
113        endpoint.set_default_client_config(config);
114
115        endpoint
116    }
117
118    async fn get_endpoint(&self) -> Arc<Endpoint> {
119        self.endpoint
120            .get_or_init(|| async { Arc::new(self.create_endpoint()) })
121            .await
122            .clone()
123    }
124}
125
126impl Default for QuicLazyInitializedEndpoint {
127    fn default() -> Self {
128        let (cert, priv_key) = new_dummy_x509_certificate(&Keypair::new());
129        Self::new(
130            Arc::new(QuicClientCertificate {
131                certificate: cert,
132                key: priv_key,
133            }),
134            None,
135        )
136    }
137}
138
139/// A wrapper over NewConnection with additional capability to create the endpoint as part
140/// of creating a new connection.
141#[derive(Clone)]
142struct QuicNewConnection {
143    endpoint: Arc<Endpoint>,
144    connection: Arc<Connection>,
145}
146
147impl QuicNewConnection {
148    /// Create a QuicNewConnection given the remote address 'addr'.
149    async fn make_connection(
150        endpoint: Arc<QuicLazyInitializedEndpoint>,
151        addr: SocketAddr,
152        stats: &ClientStats,
153    ) -> Result<Self, QuicError> {
154        let mut make_connection_measure = Measure::start("make_connection_measure");
155        let endpoint = endpoint.get_endpoint().await;
156        let server_name = socket_addr_to_quic_server_name(addr);
157        let connecting = endpoint.connect(addr, &server_name)?;
158        stats.total_connections.fetch_add(1, Ordering::Relaxed);
159        if let Ok(connecting_result) = timeout(QUIC_CONNECTION_HANDSHAKE_TIMEOUT, connecting).await
160        {
161            if connecting_result.is_err() {
162                stats.connection_errors.fetch_add(1, Ordering::Relaxed);
163            }
164            make_connection_measure.stop();
165            stats
166                .make_connection_ms
167                .fetch_add(make_connection_measure.as_ms(), Ordering::Relaxed);
168
169            let connection = connecting_result?;
170
171            Ok(Self {
172                endpoint,
173                connection: Arc::new(connection),
174            })
175        } else {
176            Err(ConnectionError::TimedOut.into())
177        }
178    }
179
180    fn create_endpoint(config: EndpointConfig, client_socket: UdpSocket) -> Endpoint {
181        quinn::Endpoint::new(config, None, client_socket, Arc::new(TokioRuntime))
182            .expect("QuicNewConnection::create_endpoint quinn::Endpoint::new")
183    }
184
185    // Attempts to make a faster connection by taking advantage of pre-existing key material.
186    // Only works if connection to this endpoint was previously established.
187    async fn make_connection_0rtt(
188        &mut self,
189        addr: SocketAddr,
190        stats: &ClientStats,
191    ) -> Result<Arc<Connection>, QuicError> {
192        let server_name = socket_addr_to_quic_server_name(addr);
193        let connecting = self.endpoint.connect(addr, &server_name)?;
194        stats.total_connections.fetch_add(1, Ordering::Relaxed);
195        let connection = match connecting.into_0rtt() {
196            Ok((connection, zero_rtt)) => {
197                if let Ok(zero_rtt) = timeout(QUIC_CONNECTION_HANDSHAKE_TIMEOUT, zero_rtt).await {
198                    if zero_rtt {
199                        stats.zero_rtt_accepts.fetch_add(1, Ordering::Relaxed);
200                    } else {
201                        stats.zero_rtt_rejects.fetch_add(1, Ordering::Relaxed);
202                    }
203                    connection
204                } else {
205                    return Err(ConnectionError::TimedOut.into());
206                }
207            }
208            Err(connecting) => {
209                stats.connection_errors.fetch_add(1, Ordering::Relaxed);
210
211                if let Ok(connecting_result) =
212                    timeout(QUIC_CONNECTION_HANDSHAKE_TIMEOUT, connecting).await
213                {
214                    connecting_result?
215                } else {
216                    return Err(ConnectionError::TimedOut.into());
217                }
218            }
219        };
220        self.connection = Arc::new(connection);
221        Ok(self.connection.clone())
222    }
223}
224
225pub struct QuicClient {
226    endpoint: Arc<QuicLazyInitializedEndpoint>,
227    connection: Arc<Mutex<Option<QuicNewConnection>>>,
228    addr: SocketAddr,
229    stats: Arc<ClientStats>,
230}
231
232const CONNECTION_CLOSE_CODE_APPLICATION_CLOSE: u32 = 0u32;
233const CONNECTION_CLOSE_REASON_APPLICATION_CLOSE: &[u8] = b"dropped";
234
235impl QuicClient {
236    /// Explicitly close the connection. Must be called manually if cleanup is needed.
237    pub async fn close(&self) {
238        let mut conn_guard = self.connection.lock().await;
239        if let Some(conn) = conn_guard.take() {
240            debug!(
241                "Closing connection to {} connection_id: {:?}",
242                self.addr, conn.connection
243            );
244            conn.connection.close(
245                CONNECTION_CLOSE_CODE_APPLICATION_CLOSE.into(),
246                CONNECTION_CLOSE_REASON_APPLICATION_CLOSE,
247            );
248        }
249    }
250}
251
252impl QuicClient {
253    pub fn new(endpoint: Arc<QuicLazyInitializedEndpoint>, addr: SocketAddr) -> Self {
254        Self {
255            endpoint,
256            connection: Arc::new(Mutex::new(None)),
257            addr,
258            stats: Arc::new(ClientStats::default()),
259        }
260    }
261
262    async fn _send_buffer_using_conn(
263        data: &[u8],
264        connection: &Connection,
265    ) -> Result<(), QuicError> {
266        let mut send_stream = connection.open_uni().await?;
267        send_stream.write_all(data).await?;
268        Ok(())
269    }
270
271    // Attempts to send data, connecting/reconnecting as necessary
272    // On success, returns the connection used to successfully send the data
273    async fn _send_buffer(
274        &self,
275        data: &[u8],
276        stats: &ClientStats,
277        connection_stats: Arc<ConnectionCacheStats>,
278    ) -> Result<Arc<Connection>, QuicError> {
279        let mut measure_send_packet = Measure::start("send_packet_us");
280        let mut measure_prepare_connection = Measure::start("prepare_connection");
281        let mut connection_try_count = 0;
282        let mut last_connection_id = 0;
283        let mut last_error = None;
284        while connection_try_count < 2 {
285            let connection = {
286                let mut conn_guard = self.connection.lock().await;
287
288                let maybe_conn = conn_guard.as_mut();
289                match maybe_conn {
290                    Some(conn) => {
291                        if conn.connection.stable_id() == last_connection_id {
292                            // this is the problematic connection we had used before, create a new one
293                            let conn = conn.make_connection_0rtt(self.addr, stats).await;
294                            match conn {
295                                Ok(conn) => {
296                                    info!(
297                                        "Made 0rtt connection to {} with id {} try_count {}, last_connection_id: {}, last_error: {:?}",
298                                        self.addr,
299                                        conn.stable_id(),
300                                        connection_try_count,
301                                        last_connection_id,
302                                        last_error,
303                                    );
304                                    connection_try_count += 1;
305                                    conn
306                                }
307                                Err(err) => {
308                                    info!(
309                                        "Cannot make 0rtt connection to {}, error {:}",
310                                        self.addr, err
311                                    );
312                                    return Err(err);
313                                }
314                            }
315                        } else {
316                            stats.connection_reuse.fetch_add(1, Ordering::Relaxed);
317                            conn.connection.clone()
318                        }
319                    }
320                    None => {
321                        let conn = QuicNewConnection::make_connection(
322                            self.endpoint.clone(),
323                            self.addr,
324                            stats,
325                        )
326                        .await;
327                        match conn {
328                            Ok(conn) => {
329                                *conn_guard = Some(conn.clone());
330                                info!(
331                                    "Made connection to {} id {} try_count {}, from connection cache warming?: {}",
332                                    self.addr,
333                                    conn.connection.stable_id(),
334                                    connection_try_count,
335                                    data.is_empty(),
336                                );
337                                connection_try_count += 1;
338                                conn.connection.clone()
339                            }
340                            Err(err) => {
341                                info!("Cannot make connection to {}, error {:}, from connection cache warming?: {}",
342                                    self.addr, err, data.is_empty());
343                                return Err(err);
344                            }
345                        }
346                    }
347                }
348            };
349
350            let new_stats = connection.stats();
351
352            connection_stats
353                .total_client_stats
354                .congestion_events
355                .update_stat(
356                    &self.stats.congestion_events,
357                    new_stats.path.congestion_events,
358                );
359
360            connection_stats
361                .total_client_stats
362                .streams_blocked_uni
363                .update_stat(
364                    &self.stats.streams_blocked_uni,
365                    new_stats.frame_tx.streams_blocked_uni,
366                );
367
368            connection_stats
369                .total_client_stats
370                .data_blocked
371                .update_stat(&self.stats.data_blocked, new_stats.frame_tx.data_blocked);
372
373            connection_stats
374                .total_client_stats
375                .acks
376                .update_stat(&self.stats.acks, new_stats.frame_tx.acks);
377
378            if data.is_empty() {
379                // no need to send packet as it is only for warming connections
380                return Ok(connection);
381            }
382
383            last_connection_id = connection.stable_id();
384            measure_prepare_connection.stop();
385
386            match Self::_send_buffer_using_conn(data, &connection).await {
387                Ok(()) => {
388                    measure_send_packet.stop();
389                    stats.successful_packets.fetch_add(1, Ordering::Relaxed);
390                    stats
391                        .send_packets_us
392                        .fetch_add(measure_send_packet.as_us(), Ordering::Relaxed);
393                    stats
394                        .prepare_connection_us
395                        .fetch_add(measure_prepare_connection.as_us(), Ordering::Relaxed);
396                    trace!(
397                        "Succcessfully sent to {} with id {}, thread: {:?}, data len: {}, send_packet_us: {} prepare_connection_us: {}",
398                        self.addr,
399                        connection.stable_id(),
400                        thread::current().id(),
401                        data.len(),
402                        measure_send_packet.as_us(),
403                        measure_prepare_connection.as_us(),
404                    );
405
406                    return Ok(connection);
407                }
408                Err(err) => match err {
409                    QuicError::ConnectionError(_) => {
410                        last_error = Some(err);
411                    }
412                    _ => {
413                        info!(
414                            "Error sending to {} with id {}, error {:?} thread: {:?}",
415                            self.addr,
416                            connection.stable_id(),
417                            err,
418                            thread::current().id(),
419                        );
420                        return Err(err);
421                    }
422                },
423            }
424        }
425
426        // if we come here, that means we have exhausted maximum retries, return the error
427        info!(
428            "Ran into an error sending data {:?}, exhausted retries to {}",
429            last_error, self.addr
430        );
431        // If we get here but last_error is None, then we have a logic error
432        // in this function, so panic here with an expect to help debugging
433        Err(last_error.expect("QuicClient::_send_buffer last_error.expect"))
434    }
435
436    pub async fn send_buffer<T>(
437        &self,
438        data: T,
439        stats: &ClientStats,
440        connection_stats: Arc<ConnectionCacheStats>,
441    ) -> Result<(), ClientErrorKind>
442    where
443        T: AsRef<[u8]>,
444    {
445        self._send_buffer(data.as_ref(), stats, connection_stats)
446            .await
447            .map_err(Into::<ClientErrorKind>::into)?;
448        Ok(())
449    }
450
451    pub async fn send_batch<T>(
452        &self,
453        buffers: &[T],
454        stats: &ClientStats,
455        connection_stats: Arc<ConnectionCacheStats>,
456    ) -> Result<(), ClientErrorKind>
457    where
458        T: AsRef<[u8]>,
459    {
460        // Start off by "testing" the connection by sending the first buffer
461        // This will also connect to the server if not already connected
462        // and reconnect and retry if the first send attempt failed
463        // (for example due to a timed out connection), returning an error
464        // or the connection that was used to successfully send the buffer.
465        // We will use the returned connection to send the rest of the buffers in the batch
466        // to avoid touching the mutex in self, and not bother reconnecting if we fail along the way
467        // since testing even in the ideal GCE environment has found no cases
468        // where reconnecting and retrying in the middle of a batch send
469        // (i.e. we encounter a connection error in the middle of a batch send, which presumably cannot
470        // be due to a timed out connection) has succeeded
471        if buffers.is_empty() {
472            return Ok(());
473        }
474        let connection = self
475            ._send_buffer(buffers[0].as_ref(), stats, connection_stats)
476            .await
477            .map_err(Into::<ClientErrorKind>::into)?;
478
479        for data in buffers[1..buffers.len()].iter() {
480            Self::_send_buffer_using_conn(data.as_ref(), &connection).await?;
481        }
482        Ok(())
483    }
484
485    pub fn server_addr(&self) -> &SocketAddr {
486        &self.addr
487    }
488
489    pub fn stats(&self) -> Arc<ClientStats> {
490        self.stats.clone()
491    }
492}
493
494pub struct QuicClientConnection {
495    pub client: Arc<QuicClient>,
496    pub connection_stats: Arc<ConnectionCacheStats>,
497}
498
499impl QuicClientConnection {
500    pub fn base_stats(&self) -> Arc<ClientStats> {
501        self.client.stats()
502    }
503
504    pub fn connection_stats(&self) -> Arc<ConnectionCacheStats> {
505        self.connection_stats.clone()
506    }
507
508    pub fn new(
509        endpoint: Arc<QuicLazyInitializedEndpoint>,
510        addr: SocketAddr,
511        connection_stats: Arc<ConnectionCacheStats>,
512    ) -> Self {
513        let client = Arc::new(QuicClient::new(endpoint, addr));
514        Self::new_with_client(client, connection_stats)
515    }
516
517    pub fn new_with_client(
518        client: Arc<QuicClient>,
519        connection_stats: Arc<ConnectionCacheStats>,
520    ) -> Self {
521        Self {
522            client,
523            connection_stats,
524        }
525    }
526}
527
528#[async_trait]
529impl ClientConnection for QuicClientConnection {
530    fn server_addr(&self) -> &SocketAddr {
531        self.client.server_addr()
532    }
533
534    async fn send_data_batch(&self, buffers: &[Vec<u8>]) -> TransportResult<()> {
535        let stats = ClientStats::default();
536        let len = buffers.len();
537        let res = self
538            .client
539            .send_batch(buffers, &stats, self.connection_stats.clone())
540            .await;
541        self.connection_stats
542            .add_client_stats(&stats, len, res.is_ok());
543        res?;
544        Ok(())
545    }
546
547    async fn send_data(&self, data: &[u8]) -> TransportResult<()> {
548        let stats = Arc::new(ClientStats::default());
549        // When data is empty which is from cache warmer, we are not sending packets actually, do not count it in
550        let num_packets = if data.is_empty() { 0 } else { 1 };
551        self.client
552            .send_buffer(data, &stats, self.connection_stats.clone())
553            .map_ok(|v| {
554                self.connection_stats
555                    .add_client_stats(&stats, num_packets, true);
556                v
557            })
558            .map_err(|e| {
559                warn!(
560                    "Failed to send data async to {}, error: {:?} ",
561                    self.server_addr(),
562                    e
563                );
564                datapoint_warn!("send-wire-async", ("failure", 1, i64),);
565                self.connection_stats
566                    .add_client_stats(&stats, num_packets, false);
567                e.into()
568            })
569            .await
570    }
571}