Skip to main content

sc_network/service/
metrics.rs

1// This file is part of Substrate.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19use crate::{service::traits::BandwidthSink, ProtocolName};
20
21use prometheus_endpoint::{
22	self as prometheus, Counter, CounterVec, Gauge, GaugeVec, HistogramOpts, MetricSource, Opts,
23	PrometheusError, Registry, SourcedCounter, SourcedGauge, U64,
24};
25
26use std::{
27	str,
28	sync::{
29		atomic::{AtomicUsize, Ordering},
30		Arc,
31	},
32};
33
34pub use prometheus_endpoint::{Histogram, HistogramVec};
35
36/// Registers all networking metrics with the given registry.
37pub fn register(registry: &Registry, sources: MetricSources) -> Result<Metrics, PrometheusError> {
38	BandwidthCounters::register(registry, sources.bandwidth)?;
39	NumConnectedGauge::register(registry, sources.connected_peers)?;
40	Metrics::register(registry)
41}
42
43// Register `sc-network` metrics without bandwidth/connected peer sources.
44pub fn register_without_sources(registry: &Registry) -> Result<Metrics, PrometheusError> {
45	Metrics::register(registry)
46}
47
48/// Predefined metric sources that are fed directly into prometheus.
49pub struct MetricSources {
50	pub bandwidth: Arc<dyn BandwidthSink>,
51	pub connected_peers: Arc<AtomicUsize>,
52}
53
54impl MetricSources {
55	pub fn register(
56		registry: &Registry,
57		bandwidth: Arc<dyn BandwidthSink>,
58		connected_peers: Arc<AtomicUsize>,
59	) -> Result<(), PrometheusError> {
60		BandwidthCounters::register(registry, bandwidth)?;
61		NumConnectedGauge::register(registry, connected_peers)
62	}
63}
64
65/// Dedicated metrics.
66#[derive(Clone)]
67pub struct Metrics {
68	// This list is ordered alphabetically
69	pub connections_closed_total: CounterVec<U64>,
70	pub connections_opened_total: CounterVec<U64>,
71	pub distinct_peers_connections_closed_total: Counter<U64>,
72	pub distinct_peers_connections_opened_total: Counter<U64>,
73	pub incoming_connections_errors_total: CounterVec<U64>,
74	pub incoming_connections_total: Counter<U64>,
75	pub kademlia_query_duration: HistogramVec,
76	pub kademlia_random_queries_total: Counter<U64>,
77	pub kademlia_records_count: Gauge<U64>,
78	pub kademlia_records_sizes_total: Gauge<U64>,
79	pub kbuckets_num_nodes: GaugeVec<U64>,
80	pub listeners_local_addresses: Gauge<U64>,
81	pub listeners_errors_total: Counter<U64>,
82	pub pending_connections: Gauge<U64>,
83	pub pending_connections_errors_total: CounterVec<U64>,
84	pub requests_in_failure_total: CounterVec<U64>,
85	pub requests_in_success_total: HistogramVec,
86	pub requests_out_failure_total: CounterVec<U64>,
87	pub requests_out_success_total: HistogramVec,
88	pub requests_response_bytes_total: CounterVec<U64>,
89}
90
91impl Metrics {
92	fn register(registry: &Registry) -> Result<Self, PrometheusError> {
93		Ok(Self {
94			// This list is ordered alphabetically
95			connections_closed_total: prometheus::register(CounterVec::new(
96				Opts::new(
97					"substrate_sub_libp2p_connections_closed_total",
98					"Total number of connections closed, by direction and reason"
99				),
100				&["direction", "reason"]
101			)?, registry)?,
102			connections_opened_total: prometheus::register(CounterVec::new(
103				Opts::new(
104					"substrate_sub_libp2p_connections_opened_total",
105					"Total number of connections opened by direction"
106				),
107				&["direction"]
108			)?, registry)?,
109			distinct_peers_connections_closed_total: prometheus::register(Counter::new(
110					"substrate_sub_libp2p_distinct_peers_connections_closed_total",
111					"Total number of connections closed with distinct peers"
112			)?, registry)?,
113			distinct_peers_connections_opened_total: prometheus::register(Counter::new(
114					"substrate_sub_libp2p_distinct_peers_connections_opened_total",
115					"Total number of connections opened with distinct peers"
116			)?, registry)?,
117			incoming_connections_errors_total: prometheus::register(CounterVec::new(
118				Opts::new(
119					"substrate_sub_libp2p_incoming_connections_handshake_errors_total",
120					"Total number of incoming connections that have failed during the \
121					initial handshake"
122				),
123				&["reason"]
124			)?, registry)?,
125			incoming_connections_total: prometheus::register(Counter::new(
126				"substrate_sub_libp2p_incoming_connections_total",
127				"Total number of incoming connections on the listening sockets"
128			)?, registry)?,
129			kademlia_query_duration: prometheus::register(HistogramVec::new(
130				HistogramOpts {
131					common_opts: Opts::new(
132						"substrate_sub_libp2p_kademlia_query_duration",
133						"Duration of Kademlia queries per query type"
134					),
135					buckets: prometheus::exponential_buckets(0.5, 2.0, 10)
136						.expect("parameters are always valid values; qed"),
137				},
138				&["type"]
139			)?, registry)?,
140			kademlia_random_queries_total: prometheus::register(Counter::new(
141				"substrate_sub_libp2p_kademlia_random_queries_total",
142				"Number of random Kademlia queries started",
143			)?, registry)?,
144			kademlia_records_count: prometheus::register(Gauge::new(
145				"substrate_sub_libp2p_kademlia_records_count",
146				"Number of records in the Kademlia records store",
147			)?, registry)?,
148			kademlia_records_sizes_total: prometheus::register(Gauge::new(
149				"substrate_sub_libp2p_kademlia_records_sizes_total",
150				"Total size of all the records in the Kademlia records store",
151			)?, registry)?,
152			kbuckets_num_nodes: prometheus::register(GaugeVec::new(
153				Opts::new(
154					"substrate_sub_libp2p_kbuckets_num_nodes",
155					"Number of nodes per kbucket per Kademlia instance"
156				),
157				&["lower_ilog2_bucket_bound"]
158			)?, registry)?,
159			listeners_local_addresses: prometheus::register(Gauge::new(
160				"substrate_sub_libp2p_listeners_local_addresses",
161				"Number of local addresses we're listening on"
162			)?, registry)?,
163			listeners_errors_total: prometheus::register(Counter::new(
164				"substrate_sub_libp2p_listeners_errors_total",
165				"Total number of non-fatal errors reported by a listener"
166			)?, registry)?,
167			pending_connections: prometheus::register(Gauge::new(
168				"substrate_sub_libp2p_pending_connections",
169				"Number of connections in the process of being established",
170			)?, registry)?,
171			pending_connections_errors_total: prometheus::register(CounterVec::new(
172				Opts::new(
173					"substrate_sub_libp2p_pending_connections_errors_total",
174					"Total number of pending connection errors"
175				),
176				&["reason"]
177			)?, registry)?,
178			requests_in_failure_total: prometheus::register(CounterVec::new(
179				Opts::new(
180					"substrate_sub_libp2p_requests_in_failure_total",
181					"Total number of incoming requests that the node has failed to answer"
182				),
183				&["protocol", "reason"]
184			)?, registry)?,
185			requests_in_success_total: prometheus::register(HistogramVec::new(
186				HistogramOpts {
187					common_opts: Opts::new(
188						"substrate_sub_libp2p_requests_in_success_total",
189						"For successful incoming requests, time between receiving the request and \
190						 starting to send the response"
191					),
192					buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
193						.expect("parameters are always valid values; qed"),
194				},
195				&["protocol"]
196			)?, registry)?,
197			requests_out_failure_total: prometheus::register(CounterVec::new(
198				Opts::new(
199					"substrate_sub_libp2p_requests_out_failure_total",
200					"Total number of requests that have failed"
201				),
202				&["protocol", "reason"]
203			)?, registry)?,
204			requests_out_success_total: prometheus::register(HistogramVec::new(
205				HistogramOpts {
206					common_opts: Opts::new(
207						"substrate_sub_libp2p_requests_out_success_total",
208						"For successful outgoing requests, time between a request's start and finish"
209					),
210					buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
211						.expect("parameters are always valid values; qed"),
212				},
213				&["protocol"]
214			)?, registry)?,
215			requests_response_bytes_total: prometheus::register(CounterVec::new(
216				Opts::new(
217					"substrate_sub_libp2p_requests_response_bytes_total",
218					"Total bytes sent and received by request-response protocols"
219				),
220				&["direction", "protocol"]
221			)?, registry)?,
222		})
223	}
224}
225
226/// Peer store metrics.
227#[derive(Clone, Debug)]
228pub struct PeerStoreMetrics {
229	pub num_banned_peers: Gauge<U64>,
230	pub num_discovered: Gauge<U64>,
231}
232
233impl PeerStoreMetrics {
234	pub fn register(registry: &Registry) -> Result<Self, PrometheusError> {
235		Ok(Self {
236			num_banned_peers: prometheus::register(
237				Gauge::new(
238					"substrate_sub_libp2p_peerset_num_banned_peers",
239					"Number of banned peers stored in the peerset manager",
240				)?,
241				registry,
242			)?,
243			num_discovered: prometheus::register(
244				Gauge::new(
245					"substrate_sub_libp2p_peerset_num_discovered",
246					"Number of nodes stored in the peerset manager",
247				)?,
248				registry,
249			)?,
250		})
251	}
252}
253
254/// The bandwidth counter metric.
255#[derive(Clone)]
256pub struct BandwidthCounters(Arc<dyn BandwidthSink>);
257
258impl BandwidthCounters {
259	/// Registers the `BandwidthCounters` metric whose values are
260	/// obtained from the given sinks.
261	fn register(registry: &Registry, sinks: Arc<dyn BandwidthSink>) -> Result<(), PrometheusError> {
262		prometheus::register(
263			SourcedCounter::new(
264				&Opts::new("substrate_sub_libp2p_network_bytes_total", "Total bandwidth usage")
265					.variable_label("direction"),
266				BandwidthCounters(sinks),
267			)?,
268			registry,
269		)?;
270
271		Ok(())
272	}
273}
274
275impl MetricSource for BandwidthCounters {
276	type N = u64;
277
278	fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
279		set(&["in"], self.0.total_inbound());
280		set(&["out"], self.0.total_outbound());
281	}
282}
283
284/// The connected peers metric.
285#[derive(Clone)]
286pub struct NumConnectedGauge(Arc<AtomicUsize>);
287
288impl NumConnectedGauge {
289	/// Registers the `MajorSyncingGauge` metric whose value is
290	/// obtained from the given `AtomicUsize`.
291	fn register(registry: &Registry, value: Arc<AtomicUsize>) -> Result<(), PrometheusError> {
292		prometheus::register(
293			SourcedGauge::new(
294				&Opts::new("substrate_sub_libp2p_peers_count", "Number of connected peers"),
295				NumConnectedGauge(value),
296			)?,
297			registry,
298		)?;
299
300		Ok(())
301	}
302}
303
304impl MetricSource for NumConnectedGauge {
305	type N = u64;
306
307	fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
308		set(&[], self.0.load(Ordering::Relaxed) as u64);
309	}
310}
311
312/// Notification metrics.
313///
314/// Wrapper over `Option<InnerNotificationMetrics>` to make metrics reporting code cleaner.
315#[derive(Debug, Clone)]
316pub struct NotificationMetrics {
317	/// Metrics, if enabled.
318	metrics: Option<InnerNotificationMetrics>,
319}
320
321impl NotificationMetrics {
322	/// Create new [`NotificationMetrics`].
323	pub fn new(registry: Option<&Registry>) -> NotificationMetrics {
324		let metrics = match registry {
325			Some(registry) => InnerNotificationMetrics::register(registry).ok(),
326			None => None,
327		};
328
329		Self { metrics }
330	}
331
332	/// Register opened substream to Prometheus.
333	pub fn register_substream_opened(&self, protocol: &ProtocolName) {
334		if let Some(metrics) = &self.metrics {
335			metrics.notifications_streams_opened_total.with_label_values(&[&protocol]).inc();
336		}
337	}
338
339	/// Register closed substream to Prometheus.
340	pub fn register_substream_closed(&self, protocol: &ProtocolName) {
341		if let Some(metrics) = &self.metrics {
342			metrics
343				.notifications_streams_closed_total
344				.with_label_values(&[&protocol[..]])
345				.inc();
346		}
347	}
348
349	/// Register sent notification to Prometheus.
350	pub fn register_notification_sent(&self, protocol: &ProtocolName, size: usize) {
351		if let Some(metrics) = &self.metrics {
352			metrics
353				.notifications_sizes
354				.with_label_values(&["out", protocol])
355				.observe(size as f64);
356		}
357	}
358
359	/// Register received notification to Prometheus.
360	pub fn register_notification_received(&self, protocol: &ProtocolName, size: usize) {
361		if let Some(metrics) = &self.metrics {
362			metrics
363				.notifications_sizes
364				.with_label_values(&["in", protocol])
365				.observe(size as f64);
366		}
367	}
368}
369
370/// Notification metrics.
371#[derive(Debug, Clone)]
372struct InnerNotificationMetrics {
373	// Total number of opened substreams.
374	pub notifications_streams_opened_total: CounterVec<U64>,
375
376	/// Total number of closed substreams.
377	pub notifications_streams_closed_total: CounterVec<U64>,
378
379	/// In/outbound notification sizes.
380	pub notifications_sizes: HistogramVec,
381}
382
383impl InnerNotificationMetrics {
384	fn register(registry: &Registry) -> Result<Self, PrometheusError> {
385		Ok(Self {
386			notifications_sizes: prometheus::register(
387				HistogramVec::new(
388					HistogramOpts {
389						common_opts: Opts::new(
390							"substrate_sub_libp2p_notifications_sizes",
391							"Sizes of the notifications send to and received from all nodes",
392						),
393						buckets: prometheus::exponential_buckets(64.0, 4.0, 8)
394							.expect("parameters are always valid values; qed"),
395					},
396					&["direction", "protocol"],
397				)?,
398				registry,
399			)?,
400			notifications_streams_closed_total: prometheus::register(
401				CounterVec::new(
402					Opts::new(
403						"substrate_sub_libp2p_notifications_streams_closed_total",
404						"Total number of notification substreams that have been closed",
405					),
406					&["protocol"],
407				)?,
408				registry,
409			)?,
410			notifications_streams_opened_total: prometheus::register(
411				CounterVec::new(
412					Opts::new(
413						"substrate_sub_libp2p_notifications_streams_opened_total",
414						"Total number of notification substreams that have been opened",
415					),
416					&["protocol"],
417				)?,
418				registry,
419			)?,
420		})
421	}
422}