ckb_metrics/
lib.rs

1#![allow(missing_docs)]
2//! A lightweight metrics facade used in CKB.
3//!
4//! The `ckb-metrics` crate is a set of tools for metrics.
5//! The crate [`ckb-metrics-service`] is the runtime which handles the metrics data in CKB.
6//!
7//! [`ckb-metrics-service`]: ../ckb_metrics_service/index.html
8
9use prometheus_static_metric::make_static_metric;
10use std::cell::Cell;
11
12pub use prometheus::*;
13
14pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
15    prometheus::gather()
16}
17
18make_static_metric! {
19    // Struct for the CKB sys mem process statistics type label
20    struct CkbSysMemProcessStatistics: IntGauge{
21        "type" => {
22            rss,
23            vms,
24        },
25    }
26
27    // Struct for the CKB sys mem jemalloc statistics type label
28    struct CkbSysMemJemallocStatistics: IntGauge{
29        "type" => {
30            allocated,
31            resident,
32            active,
33            mapped,
34            retained,
35            metadata,
36        },
37    }
38
39    // Struct for CKB tx-pool entry status statistics type label
40    struct CkbTxPoolEntryStatistics: IntGauge{
41        "type" => {
42            pending,
43            gap,
44            proposed,
45        },
46    }
47
48    struct CkbHeaderMapMemoryHitMissStatistics: IntCounter{
49        "type" => {
50            hit,
51            miss,
52        },
53    }
54}
55
56pub struct Metrics {
57    /// Gauge metric for CKB chain tip header number
58    pub ckb_chain_tip: IntGauge,
59    /// CKB chain unverified tip header number
60    pub ckb_chain_unverified_tip: IntGauge,
61    /// ckb_chain asynchronous_process duration (seconds)
62    pub ckb_chain_async_process_block_duration: Histogram,
63    /// ckb_chain consume_orphan thread's process_lonely_block duration (seconds)
64    pub ckb_chain_process_lonely_block_duration: Histogram,
65    /// ckb_chain consume_unverified thread's consume_unverified_block duration (seconds)
66    pub ckb_chain_consume_unverified_block_duration: Histogram,
67    /// ckb_chain consume_unverified thread's consume_unverified_block waiting for block duration (seconds)
68    pub ckb_chain_consume_unverified_block_waiting_block_duration: Histogram,
69    /// ckb_chain execute_callback duration (seconds)
70    pub ckb_chain_execute_callback_duration: Histogram,
71    /// ckb_chain orphan blocks count
72    pub ckb_chain_orphan_count: IntGauge,
73    pub ckb_chain_lonely_block_ch_len: IntGauge,
74    pub ckb_chain_unverified_block_ch_len: IntGauge,
75    pub ckb_chain_preload_unverified_block_ch_len: IntGauge,
76    pub ckb_chain_load_full_unverified_block: Histogram,
77    /// ckb_sync_msg_process duration (seconds)
78    pub ckb_sync_msg_process_duration: HistogramVec,
79    /// ckb_sync_block_fetch duraiton (seconds)
80    pub ckb_sync_block_fetch_duration: Histogram,
81    // ckb_header_map_limit_memory duration (seconds)
82    pub ckb_header_map_limit_memory_duration: Histogram,
83    // ckb_header_map_limit_memory operation duration (seconds)
84    pub ckb_header_map_ops_duration: HistogramVec,
85    // how many headers in the HeaderMap's memory map?
86    pub ckb_header_map_memory_count: IntGauge,
87    // how many times the HeaderMap's memory map is hit?
88    pub ckb_header_map_memory_hit_miss_count: CkbHeaderMapMemoryHitMissStatistics,
89    /// Gauge for tracking the size of all frozen data
90    pub ckb_freezer_size: IntGauge,
91    /// Counter for measuring the effective amount of data read
92    pub ckb_freezer_read: IntCounter,
93    /// Gauge for tracking the number of ckb_freezer
94    pub ckb_freezer_number: IntGauge,
95    /// Counter for relay transaction short id collide
96    pub ckb_relay_transaction_short_id_collide: IntCounter,
97    /// Histogram for relay compact block verify duration
98    pub ckb_relay_cb_verify_duration: Histogram,
99    /// Histogram for block process duration
100    pub ckb_block_process_duration: Histogram,
101    /// Histogram for sync process tx in txpool
102    pub ckb_tx_pool_sync_process: Histogram,
103    /// Histogram for async process tx in txpool
104    pub ckb_tx_pool_async_process: Histogram,
105    /// Counter for relay compact block transaction count
106    pub ckb_relay_cb_transaction_count: IntCounter,
107    /// Counter for relay compact block reconstruct ok
108    pub ckb_relay_cb_reconstruct_ok: IntCounter,
109    /// Counter for relay compact block fresh transaction count
110    pub ckb_relay_cb_fresh_tx_cnt: IntCounter,
111    /// Counter for relay compact block reconstruct fail
112    pub ckb_relay_cb_reconstruct_fail: IntCounter,
113    // Gauge for CKB shared best number
114    pub ckb_shared_best_number: IntGauge,
115    // GaugeVec for CKB system memory process statistics
116    pub ckb_sys_mem_process: CkbSysMemProcessStatistics,
117    // GaugeVec for CKB system memory jemalloc statistics
118    pub ckb_sys_mem_jemalloc: CkbSysMemJemallocStatistics,
119    // GaugeVec for CKB tx-pool tx entry status statistics
120    pub ckb_tx_pool_entry: CkbTxPoolEntryStatistics,
121    /// Histogram for CKB network connections
122    pub ckb_message_bytes: HistogramVec,
123    /// Gauge for CKB rocksdb statistics
124    pub ckb_sys_mem_rocksdb: IntGaugeVec,
125    /// Counter for CKB network ban peers
126    pub ckb_network_ban_peer: IntCounter,
127    pub ckb_inflight_blocks_count: IntGauge,
128    pub ckb_inflight_timeout_count: IntCounter,
129    pub ckb_hole_punching_active_count: IntCounter,
130    pub ckb_hole_punching_active_success_count: IntCounter,
131    pub ckb_hole_punching_passive_count: IntCounter,
132    pub ckb_hole_punching_passive_success_count: IntCounter,
133    /// Gauge metric for CKB indexer tip block number
134    pub ckb_indexer_tip: IntGauge,
135}
136
137static METRICS: std::sync::LazyLock<Metrics> = std::sync::LazyLock::new(|| {
138    Metrics {
139    ckb_chain_tip: register_int_gauge!("ckb_chain_tip", "The CKB chain tip header number").unwrap(),
140    ckb_chain_unverified_tip: register_int_gauge!(
141        "ckb_chain_unverified_tip",
142        "The CKB chain unverified tip header number"
143    )
144            .unwrap(),
145    ckb_chain_async_process_block_duration: register_histogram!(
146        "ckb_chain_async_process_block_duration",
147        "The CKB chain asynchronous_process_block duration (seconds)"
148    )
149            .unwrap(),
150    ckb_chain_process_lonely_block_duration: register_histogram!(
151        "ckb_chain_process_lonely_block_duration",
152        "The CKB chain consume_orphan thread's process_lonely_block duration (seconds)"
153    )
154            .unwrap(),
155    ckb_chain_consume_unverified_block_duration: register_histogram!(
156        "ckb_chain_consume_unverified_block_duration",
157        "The CKB chain consume_unverified thread's consume_unverified_block duration (seconds)"
158    )
159            .unwrap(),
160    ckb_chain_consume_unverified_block_waiting_block_duration: register_histogram!(
161        "ckb_chain_consume_unverified_block_waiting_block_duration",
162        "The CKB chain consume_unverified thread's consume_unverified_block waiting for block duration (seconds)"
163    ).unwrap(),
164    ckb_chain_execute_callback_duration: register_histogram!(
165            "ckb_chain_execute_callback_duration",
166            "The CKB chain execute_callback duration (seconds)"
167        ).unwrap(),
168    ckb_chain_orphan_count: register_int_gauge!(
169            "ckb_chain_orphan_count",
170            "The CKB chain orphan blocks count",
171        ).unwrap(),
172    ckb_chain_lonely_block_ch_len: register_int_gauge!(
173            "ckb_chain_lonely_block_ch_len",
174            "The CKB chain lonely block channel length",
175        ).unwrap(),
176    ckb_chain_unverified_block_ch_len: register_int_gauge!(
177            "ckb_chain_unverified_block_ch_len",
178            "The CKB chain unverified block channel length",
179        ).unwrap(),
180    ckb_chain_preload_unverified_block_ch_len: register_int_gauge!(
181            "ckb_chain_preload_unverified_block_ch_len",
182            "The CKB chain fill unverified block channel length",
183        ).unwrap(),
184    ckb_chain_load_full_unverified_block: register_histogram!(
185            "ckb_chain_load_full_unverified_block",
186            "The CKB chain load_full_unverified_block duration (seconds)"
187        ).unwrap(),
188    ckb_sync_msg_process_duration: register_histogram_vec!(
189            "ckb_sync_msg_process_duration",
190            "The CKB sync message process duration (seconds)",
191            &["msg_type"],
192        ).unwrap(),
193    ckb_sync_block_fetch_duration: register_histogram!(
194            "ckb_sync_block_fetch_duration",
195            "The CKB sync block fetch duration (seconds)"
196        ).unwrap(),
197    ckb_header_map_limit_memory_duration: register_histogram!(
198            "ckb_header_map_limit_memory_duration",
199            "The CKB header map limit_memory job duration (seconds)"
200        ).unwrap(),
201    ckb_header_map_ops_duration: register_histogram_vec!(
202            "ckb_header_map_ops_duration",
203            "The CKB header map operation duration (seconds)",
204            &["operation"],
205        ).unwrap(),
206    ckb_header_map_memory_count: register_int_gauge!(
207            "ckb_header_map_memory_count",
208            "The CKB HeaderMap memory count",
209        ).unwrap(),
210    ckb_header_map_memory_hit_miss_count: CkbHeaderMapMemoryHitMissStatistics::from(
211            &register_int_counter_vec!(
212            "ckb_header_map_memory_hit_miss_count",
213            "The CKB HeaderMap memory hit count",
214            &["type"]
215        )
216                .unwrap()
217        ),
218    ckb_freezer_size: register_int_gauge!("ckb_freezer_size", "The CKB freezer size").unwrap(),
219    ckb_freezer_read: register_int_counter!("ckb_freezer_read", "The CKB freezer read").unwrap(),
220    ckb_freezer_number: register_int_gauge!("ckb_freezer_number", "The CKB freezer number").unwrap(),
221    ckb_relay_transaction_short_id_collide: register_int_counter!(
222        "ckb_relay_transaction_short_id_collide",
223        "The CKB relay transaction short id collide"
224    )
225            .unwrap(),
226    ckb_relay_cb_verify_duration: register_histogram!(
227        "ckb_relay_cb_verify_duration",
228        "The CKB relay compact block verify duration"
229    )
230            .unwrap(),
231    ckb_block_process_duration: register_histogram!(
232        "ckb_block_process_duration",
233        "The CKB block process duration"
234    )
235    .unwrap(),
236    ckb_tx_pool_sync_process: register_histogram!(
237        "ckb_tx_pool_sync_process",
238        "The CKB tx_pool sync process tx duration"
239    )
240    .unwrap(),
241    ckb_tx_pool_async_process: register_histogram!(
242        "ckb_tx_pool_async_process",
243        "The CKB tx_pool async process tx duration"
244    )
245    .unwrap(),
246    ckb_relay_cb_transaction_count: register_int_counter!(
247        "ckb_relay_cb_transaction_count",
248        "The CKB relay compact block transaction count"
249    ).unwrap(),
250    ckb_relay_cb_reconstruct_ok: register_int_counter!(
251        "ckb_relay_cb_reconstruct_ok",
252        "The CKB relay compact block reconstruct ok count"
253    ).unwrap(),
254    ckb_relay_cb_fresh_tx_cnt: register_int_counter!(
255        "ckb_relay_cb_fresh_tx_cnt",
256        "The CKB relay compact block fresh tx count"
257    ).unwrap(),
258    ckb_relay_cb_reconstruct_fail: register_int_counter!(
259        "ckb_relay_cb_reconstruct_fail",
260        "The CKB relay compact block reconstruct fail count"
261    )
262            .unwrap(),
263    ckb_shared_best_number: register_int_gauge!(
264        "ckb_shared_best_number",
265        "The CKB shared best header number"
266    )
267            .unwrap(),
268    ckb_sys_mem_process: CkbSysMemProcessStatistics::from(
269            &register_int_gauge_vec!(
270            "ckb_sys_mem_process",
271            "CKB system memory for process statistics",
272            &["type"]
273        )
274                .unwrap(),
275        ),
276    ckb_sys_mem_jemalloc: CkbSysMemJemallocStatistics::from(
277            &register_int_gauge_vec!(
278            "ckb_sys_mem_jemalloc",
279            "CKB system memory for jemalloc statistics",
280            &["type"]
281        )
282                .unwrap(),
283        ),
284    ckb_tx_pool_entry: CkbTxPoolEntryStatistics::from(
285            &register_int_gauge_vec!(
286            "ckb_tx_pool_entry",
287            "CKB tx-pool entry status statistics",
288            &["type"]
289        )
290                .unwrap(),
291        ),
292    ckb_message_bytes: register_histogram_vec!(
293        "ckb_message_bytes",
294        "The CKB message bytes",
295        &["direction", "protocol_name", "msg_item_name", "status_code"],
296        vec![
297            500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 500000.0
298        ]
299    )
300            .unwrap(),
301    ckb_sys_mem_rocksdb: register_int_gauge_vec!(
302        "ckb_sys_mem_rocksdb",
303        "CKB system memory for rocksdb statistics",
304        &["type", "cf"]
305    )
306            .unwrap(),
307    ckb_network_ban_peer: register_int_counter!(
308        "ckb_network_ban_peer",
309        "CKB network baned peer count"
310    )
311            .unwrap(),
312    ckb_inflight_blocks_count: register_int_gauge!(
313            "ckb_inflight_blocks_count",
314            "The CKB inflight blocks count"
315    )
316            .unwrap(),
317    ckb_inflight_timeout_count: register_int_counter!(
318            "ckb_inflight_timeout_count",
319            "The CKB inflight timeout count"
320    ).unwrap(),
321    ckb_hole_punching_active_count: register_int_counter!(
322        "ckb_hole_punching_active_count",
323        "The CKB hole punching active count"
324    )
325            .unwrap(),
326    ckb_hole_punching_active_success_count: register_int_counter!(
327        "ckb_hole_punching_active_success_count",
328        "The CKB hole punching active success count"
329    )
330            .unwrap(),
331    ckb_hole_punching_passive_count: register_int_counter!(
332        "ckb_hole_punching_passive_count",
333        "The CKB hole punching passive reception count"
334    )
335            .unwrap(),
336    ckb_hole_punching_passive_success_count: register_int_counter!(
337        "ckb_hole_punching_passive_success_count",
338        "The CKB hole punching passive success count"
339    )
340            .unwrap(),
341    ckb_indexer_tip: register_int_gauge!(
342        "ckb_indexer_tip",
343        "The CKB indexer tip block number"
344    )
345            .unwrap(),
346    }
347});
348
349/// Indicate whether the metrics service is enabled.
350/// This value will set by ckb-metrics-service
351pub static METRICS_SERVICE_ENABLED: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
352
353thread_local! {
354    static ENABLE_COLLECT_METRICS: Cell<Option<bool>>= Cell::default();
355}
356
357/// if metrics service is enabled, `handle()` will return `Some(&'static METRICS)`
358/// else will return `None`
359pub fn handle() -> Option<&'static Metrics> {
360    let enabled_collect_metrics: bool =
361        ENABLE_COLLECT_METRICS.with(
362            |enable_collect_metrics| match enable_collect_metrics.get() {
363                Some(enabled) => enabled,
364                None => match METRICS_SERVICE_ENABLED.get().copied() {
365                    Some(enabled) => {
366                        enable_collect_metrics.set(Some(enabled));
367                        enabled
368                    }
369                    None => false,
370                },
371            },
372        );
373
374    if enabled_collect_metrics {
375        Some(&METRICS)
376    } else {
377        None
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use crate::METRICS;
384    use std::ops::Deref;
385
386    // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
387    // The Metric names may contain ASCII letters and digits, as well as underscores and colons. It must match the regex [a-zA-Z_:][a-zA-Z0-9_:]*.
388    // The Metric Label names may contain ASCII letters, numbers, as well as underscores. They must match the regex [a-zA-Z_][a-zA-Z0-9_]*. Label names beginning with __ are reserved for internal use.
389    // Test that all metrics have valid names and labels
390    // Just simple call .deref() method to make sure all metrics are initialized successfully
391    // If the metrics name or label is invalid, this test will panic
392    #[test]
393    fn test_metrics_name() {
394        let _ = METRICS.deref();
395    }
396
397    #[test]
398    #[should_panic]
399    fn test_bad_metrics_name() {
400        let res = prometheus::register_int_gauge!(
401            "ckb.chain.tip",
402            "a bad metric which contains '.' in its name"
403        );
404        assert!(res.is_err());
405        let res = prometheus::register_int_gauge!(
406            "ckb-chain-tip",
407            "a bad metric which contains '-' in its name"
408        );
409        assert!(res.is_err());
410    }
411}