Skip to main content

soil_network/service/
metrics.rs

1// This file is part of Soil.
2
3// Copyright (C) Soil contributors.
4// Copyright (C) Parity Technologies (UK) Ltd.
5// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
6
7use crate::{service::traits::BandwidthSink, ProtocolName};
8
9use soil_prometheus::{
10	self as prometheus, Counter, CounterVec, Gauge, GaugeVec, HistogramOpts, MetricSource, Opts,
11	PrometheusError, Registry, SourcedCounter, SourcedGauge, U64,
12};
13
14use std::{
15	str,
16	sync::{
17		atomic::{AtomicUsize, Ordering},
18		Arc,
19	},
20};
21
22pub use soil_prometheus::{Histogram, HistogramVec};
23
24/// Registers all networking metrics with the given registry.
25pub fn register(registry: &Registry, sources: MetricSources) -> Result<Metrics, PrometheusError> {
26	BandwidthCounters::register(registry, sources.bandwidth)?;
27	NumConnectedGauge::register(registry, sources.connected_peers)?;
28	Metrics::register(registry)
29}
30
31// Register `soil-network` metrics without bandwidth/connected peer sources.
32pub fn register_without_sources(registry: &Registry) -> Result<Metrics, PrometheusError> {
33	Metrics::register(registry)
34}
35
36/// Predefined metric sources that are fed directly into prometheus.
37pub struct MetricSources {
38	pub bandwidth: Arc<dyn BandwidthSink>,
39	pub connected_peers: Arc<AtomicUsize>,
40}
41
42impl MetricSources {
43	pub fn register(
44		registry: &Registry,
45		bandwidth: Arc<dyn BandwidthSink>,
46		connected_peers: Arc<AtomicUsize>,
47	) -> Result<(), PrometheusError> {
48		BandwidthCounters::register(registry, bandwidth)?;
49		NumConnectedGauge::register(registry, connected_peers)
50	}
51}
52
53/// Dedicated metrics.
54#[derive(Clone)]
55pub struct Metrics {
56	// This list is ordered alphabetically
57	pub connections_closed_total: CounterVec<U64>,
58	pub connections_opened_total: CounterVec<U64>,
59	pub distinct_peers_connections_closed_total: Counter<U64>,
60	pub distinct_peers_connections_opened_total: Counter<U64>,
61	pub incoming_connections_errors_total: CounterVec<U64>,
62	pub incoming_connections_total: Counter<U64>,
63	pub kademlia_query_duration: HistogramVec,
64	pub kademlia_random_queries_total: Counter<U64>,
65	pub kademlia_records_count: Gauge<U64>,
66	pub kademlia_records_sizes_total: Gauge<U64>,
67	pub kbuckets_num_nodes: GaugeVec<U64>,
68	pub listeners_local_addresses: Gauge<U64>,
69	pub listeners_errors_total: Counter<U64>,
70	pub pending_connections: Gauge<U64>,
71	pub pending_connections_errors_total: CounterVec<U64>,
72	pub requests_in_failure_total: CounterVec<U64>,
73	pub requests_in_success_total: HistogramVec,
74	pub requests_out_failure_total: CounterVec<U64>,
75	pub requests_out_success_total: HistogramVec,
76	pub requests_response_bytes_total: CounterVec<U64>,
77}
78
79impl Metrics {
80	fn register(registry: &Registry) -> Result<Self, PrometheusError> {
81		Ok(Self {
82			// This list is ordered alphabetically
83			connections_closed_total: prometheus::register(CounterVec::new(
84				Opts::new(
85					"substrate_sub_libp2p_connections_closed_total",
86					"Total number of connections closed, by direction and reason"
87				),
88				&["direction", "reason"]
89			)?, registry)?,
90			connections_opened_total: prometheus::register(CounterVec::new(
91				Opts::new(
92					"substrate_sub_libp2p_connections_opened_total",
93					"Total number of connections opened by direction"
94				),
95				&["direction"]
96			)?, registry)?,
97			distinct_peers_connections_closed_total: prometheus::register(Counter::new(
98					"substrate_sub_libp2p_distinct_peers_connections_closed_total",
99					"Total number of connections closed with distinct peers"
100			)?, registry)?,
101			distinct_peers_connections_opened_total: prometheus::register(Counter::new(
102					"substrate_sub_libp2p_distinct_peers_connections_opened_total",
103					"Total number of connections opened with distinct peers"
104			)?, registry)?,
105			incoming_connections_errors_total: prometheus::register(CounterVec::new(
106				Opts::new(
107					"substrate_sub_libp2p_incoming_connections_handshake_errors_total",
108					"Total number of incoming connections that have failed during the \
109					initial handshake"
110				),
111				&["reason"]
112			)?, registry)?,
113			incoming_connections_total: prometheus::register(Counter::new(
114				"substrate_sub_libp2p_incoming_connections_total",
115				"Total number of incoming connections on the listening sockets"
116			)?, registry)?,
117			kademlia_query_duration: prometheus::register(HistogramVec::new(
118				HistogramOpts {
119					common_opts: Opts::new(
120						"substrate_sub_libp2p_kademlia_query_duration",
121						"Duration of Kademlia queries per query type"
122					),
123					buckets: prometheus::exponential_buckets(0.5, 2.0, 10)
124						.expect("parameters are always valid values; qed"),
125				},
126				&["type"]
127			)?, registry)?,
128			kademlia_random_queries_total: prometheus::register(Counter::new(
129				"substrate_sub_libp2p_kademlia_random_queries_total",
130				"Number of random Kademlia queries started",
131			)?, registry)?,
132			kademlia_records_count: prometheus::register(Gauge::new(
133				"substrate_sub_libp2p_kademlia_records_count",
134				"Number of records in the Kademlia records store",
135			)?, registry)?,
136			kademlia_records_sizes_total: prometheus::register(Gauge::new(
137				"substrate_sub_libp2p_kademlia_records_sizes_total",
138				"Total size of all the records in the Kademlia records store",
139			)?, registry)?,
140			kbuckets_num_nodes: prometheus::register(GaugeVec::new(
141				Opts::new(
142					"substrate_sub_libp2p_kbuckets_num_nodes",
143					"Number of nodes per kbucket per Kademlia instance"
144				),
145				&["lower_ilog2_bucket_bound"]
146			)?, registry)?,
147			listeners_local_addresses: prometheus::register(Gauge::new(
148				"substrate_sub_libp2p_listeners_local_addresses",
149				"Number of local addresses we're listening on"
150			)?, registry)?,
151			listeners_errors_total: prometheus::register(Counter::new(
152				"substrate_sub_libp2p_listeners_errors_total",
153				"Total number of non-fatal errors reported by a listener"
154			)?, registry)?,
155			pending_connections: prometheus::register(Gauge::new(
156				"substrate_sub_libp2p_pending_connections",
157				"Number of connections in the process of being established",
158			)?, registry)?,
159			pending_connections_errors_total: prometheus::register(CounterVec::new(
160				Opts::new(
161					"substrate_sub_libp2p_pending_connections_errors_total",
162					"Total number of pending connection errors"
163				),
164				&["reason"]
165			)?, registry)?,
166			requests_in_failure_total: prometheus::register(CounterVec::new(
167				Opts::new(
168					"substrate_sub_libp2p_requests_in_failure_total",
169					"Total number of incoming requests that the node has failed to answer"
170				),
171				&["protocol", "reason"]
172			)?, registry)?,
173			requests_in_success_total: prometheus::register(HistogramVec::new(
174				HistogramOpts {
175					common_opts: Opts::new(
176						"substrate_sub_libp2p_requests_in_success_total",
177						"For successful incoming requests, time between receiving the request and \
178						 starting to send the response"
179					),
180					buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
181						.expect("parameters are always valid values; qed"),
182				},
183				&["protocol"]
184			)?, registry)?,
185			requests_out_failure_total: prometheus::register(CounterVec::new(
186				Opts::new(
187					"substrate_sub_libp2p_requests_out_failure_total",
188					"Total number of requests that have failed"
189				),
190				&["protocol", "reason"]
191			)?, registry)?,
192			requests_out_success_total: prometheus::register(HistogramVec::new(
193				HistogramOpts {
194					common_opts: Opts::new(
195						"substrate_sub_libp2p_requests_out_success_total",
196						"For successful outgoing requests, time between a request's start and finish"
197					),
198					buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
199						.expect("parameters are always valid values; qed"),
200				},
201				&["protocol"]
202			)?, registry)?,
203			requests_response_bytes_total: prometheus::register(CounterVec::new(
204				Opts::new(
205					"substrate_sub_libp2p_requests_response_bytes_total",
206					"Total bytes sent and received by request-response protocols"
207				),
208				&["direction", "protocol"]
209			)?, registry)?,
210		})
211	}
212}
213
214/// Peer store metrics.
215#[derive(Clone, Debug)]
216pub struct PeerStoreMetrics {
217	pub num_banned_peers: Gauge<U64>,
218	pub num_discovered: Gauge<U64>,
219}
220
221impl PeerStoreMetrics {
222	pub fn register(registry: &Registry) -> Result<Self, PrometheusError> {
223		Ok(Self {
224			num_banned_peers: prometheus::register(
225				Gauge::new(
226					"substrate_sub_libp2p_peerset_num_banned_peers",
227					"Number of banned peers stored in the peerset manager",
228				)?,
229				registry,
230			)?,
231			num_discovered: prometheus::register(
232				Gauge::new(
233					"substrate_sub_libp2p_peerset_num_discovered",
234					"Number of nodes stored in the peerset manager",
235				)?,
236				registry,
237			)?,
238		})
239	}
240}
241
242/// The bandwidth counter metric.
243#[derive(Clone)]
244pub struct BandwidthCounters(Arc<dyn BandwidthSink>);
245
246impl BandwidthCounters {
247	/// Registers the `BandwidthCounters` metric whose values are
248	/// obtained from the given sinks.
249	fn register(registry: &Registry, sinks: Arc<dyn BandwidthSink>) -> Result<(), PrometheusError> {
250		prometheus::register(
251			SourcedCounter::new(
252				&Opts::new("substrate_sub_libp2p_network_bytes_total", "Total bandwidth usage")
253					.variable_label("direction"),
254				BandwidthCounters(sinks),
255			)?,
256			registry,
257		)?;
258
259		Ok(())
260	}
261}
262
263impl MetricSource for BandwidthCounters {
264	type N = u64;
265
266	fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
267		set(&["in"], self.0.total_inbound());
268		set(&["out"], self.0.total_outbound());
269	}
270}
271
272/// The connected peers metric.
273#[derive(Clone)]
274pub struct NumConnectedGauge(Arc<AtomicUsize>);
275
276impl NumConnectedGauge {
277	/// Registers the `MajorSyncingGauge` metric whose value is
278	/// obtained from the given `AtomicUsize`.
279	fn register(registry: &Registry, value: Arc<AtomicUsize>) -> Result<(), PrometheusError> {
280		prometheus::register(
281			SourcedGauge::new(
282				&Opts::new("substrate_sub_libp2p_peers_count", "Number of connected peers"),
283				NumConnectedGauge(value),
284			)?,
285			registry,
286		)?;
287
288		Ok(())
289	}
290}
291
292impl MetricSource for NumConnectedGauge {
293	type N = u64;
294
295	fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
296		set(&[], self.0.load(Ordering::Relaxed) as u64);
297	}
298}
299
300/// Notification metrics.
301///
302/// Wrapper over `Option<InnerNotificationMetrics>` to make metrics reporting code cleaner.
303#[derive(Debug, Clone)]
304pub struct NotificationMetrics {
305	/// Metrics, if enabled.
306	metrics: Option<InnerNotificationMetrics>,
307}
308
309impl NotificationMetrics {
310	/// Create new [`NotificationMetrics`].
311	pub fn new(registry: Option<&Registry>) -> NotificationMetrics {
312		let metrics = match registry {
313			Some(registry) => InnerNotificationMetrics::register(registry).ok(),
314			None => None,
315		};
316
317		Self { metrics }
318	}
319
320	/// Register opened substream to Prometheus.
321	pub fn register_substream_opened(&self, protocol: &ProtocolName) {
322		if let Some(metrics) = &self.metrics {
323			metrics.notifications_streams_opened_total.with_label_values(&[&protocol]).inc();
324		}
325	}
326
327	/// Register closed substream to Prometheus.
328	pub fn register_substream_closed(&self, protocol: &ProtocolName) {
329		if let Some(metrics) = &self.metrics {
330			metrics
331				.notifications_streams_closed_total
332				.with_label_values(&[&protocol[..]])
333				.inc();
334		}
335	}
336
337	/// Register sent notification to Prometheus.
338	pub fn register_notification_sent(&self, protocol: &ProtocolName, size: usize) {
339		if let Some(metrics) = &self.metrics {
340			metrics
341				.notifications_sizes
342				.with_label_values(&["out", protocol])
343				.observe(size as f64);
344		}
345	}
346
347	/// Register received notification to Prometheus.
348	pub fn register_notification_received(&self, protocol: &ProtocolName, size: usize) {
349		if let Some(metrics) = &self.metrics {
350			metrics
351				.notifications_sizes
352				.with_label_values(&["in", protocol])
353				.observe(size as f64);
354		}
355	}
356}
357
358/// Notification metrics.
359#[derive(Debug, Clone)]
360struct InnerNotificationMetrics {
361	// Total number of opened substreams.
362	pub notifications_streams_opened_total: CounterVec<U64>,
363
364	/// Total number of closed substreams.
365	pub notifications_streams_closed_total: CounterVec<U64>,
366
367	/// In/outbound notification sizes.
368	pub notifications_sizes: HistogramVec,
369}
370
371impl InnerNotificationMetrics {
372	fn register(registry: &Registry) -> Result<Self, PrometheusError> {
373		Ok(Self {
374			notifications_sizes: prometheus::register(
375				HistogramVec::new(
376					HistogramOpts {
377						common_opts: Opts::new(
378							"substrate_sub_libp2p_notifications_sizes",
379							"Sizes of the notifications send to and received from all nodes",
380						),
381						buckets: prometheus::exponential_buckets(64.0, 4.0, 8)
382							.expect("parameters are always valid values; qed"),
383					},
384					&["direction", "protocol"],
385				)?,
386				registry,
387			)?,
388			notifications_streams_closed_total: prometheus::register(
389				CounterVec::new(
390					Opts::new(
391						"substrate_sub_libp2p_notifications_streams_closed_total",
392						"Total number of notification substreams that have been closed",
393					),
394					&["protocol"],
395				)?,
396				registry,
397			)?,
398			notifications_streams_opened_total: prometheus::register(
399				CounterVec::new(
400					Opts::new(
401						"substrate_sub_libp2p_notifications_streams_opened_total",
402						"Total number of notification substreams that have been opened",
403					),
404					&["protocol"],
405				)?,
406				registry,
407			)?,
408		})
409	}
410}