use super::{
super::{
common::{get_metric, GetMetricResult},
types::{MetricsEvaluatorError, MetricsEvaluatorInput},
},
CATEGORY,
};
use crate::{
configuration::EvaluatorArgs,
evaluator::{EvaluationResult, Evaluator},
evaluators::EvaluatorType,
};
use anyhow::Result;
use clap::Parser;
use log::debug;
use poem_openapi::Object as PoemObject;
use prometheus_parse::Scrape as PrometheusScrape;
use serde::{Deserialize, Serialize};
const METRIC: &str = "aptos_consensus_timeout_count";
#[derive(Clone, Debug, Deserialize, Parser, PoemObject, Serialize)]
pub struct ConsensusTimeoutsEvaluatorArgs {
#[clap(long, default_value_t = 0)]
pub allowed_consensus_timeouts: u64,
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct ConsensusTimeoutsEvaluator {
args: ConsensusTimeoutsEvaluatorArgs,
}
impl ConsensusTimeoutsEvaluator {
pub fn new(args: ConsensusTimeoutsEvaluatorArgs) -> Self {
Self { args }
}
fn get_consensus_timeouts(
&self,
metrics: &PrometheusScrape,
metrics_round: &str,
) -> GetMetricResult {
let evaluation_on_missing_fn = || {
self.build_evaluation_result(
"Consensus timeouts metric missing".to_string(),
0,
format!(
"The {} set of metrics from the target node is missing the metric: {}",
metrics_round, METRIC
),
)
};
get_metric(metrics, METRIC, None, evaluation_on_missing_fn)
}
#[allow(clippy::comparison_chain)]
fn build_evaluation(
&self,
previous_timeouts_count: u64,
latest_timeouts_count: u64,
) -> EvaluationResult {
if latest_timeouts_count > previous_timeouts_count + self.args.allowed_consensus_timeouts {
self.build_evaluation_result(
"Consensus timeouts metric increased".to_string(),
50,
format!(
"The consensus timeouts count increased from {} to {} between metrics rounds more than the allowed amount ({})",
previous_timeouts_count, latest_timeouts_count, self.args.allowed_consensus_timeouts
),
)
} else {
self.build_evaluation_result(
"Consensus timeouts metric okay".to_string(),
100,
format!(
"The consensus timeouts count was {} in the first round and {} in the second round of metrics collection, which is within tolerance of the allowed increase ({})",
previous_timeouts_count, latest_timeouts_count, self.args.allowed_consensus_timeouts
),
)
}
}
}
#[async_trait::async_trait]
impl Evaluator for ConsensusTimeoutsEvaluator {
type Input = MetricsEvaluatorInput;
type Error = MetricsEvaluatorError;
async fn evaluate(&self, input: &Self::Input) -> Result<Vec<EvaluationResult>, Self::Error> {
let mut evaluation_results = vec![];
let previous_timeouts_count = self
.get_consensus_timeouts(&input.previous_target_metrics, "first")
.unwrap(&mut evaluation_results);
let latest_timeouts_count = self
.get_consensus_timeouts(&input.latest_target_metrics, "second")
.unwrap(&mut evaluation_results);
match (previous_timeouts_count, latest_timeouts_count) {
(Some(previous), Some(latest)) => {
evaluation_results.push(self.build_evaluation(previous, latest));
}
_ => {
debug!(
"Not evaluating timeouts count because we're missing metrics from the target"
);
}
};
Ok(evaluation_results)
}
fn get_category_name() -> String {
CATEGORY.to_string()
}
fn get_evaluator_name() -> String {
"timeouts".to_string()
}
fn from_evaluator_args(evaluator_args: &EvaluatorArgs) -> Result<Self> {
Ok(Self::new(evaluator_args.consensus_timeouts_args.clone()))
}
fn evaluator_type_from_evaluator_args(evaluator_args: &EvaluatorArgs) -> Result<EvaluatorType> {
Ok(EvaluatorType::Metrics(Box::new(Self::from_evaluator_args(
evaluator_args,
)?)))
}
}