mod parse;
mod validate;
pub use validate::validate;
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::error::{Result, SlokitError};
use crate::sli::Sli;
use crate::slo::{Objective, Slo};
use crate::window::Window;
pub const DEFAULT_PERIOD: Window = Window::days(30);
fn default_version() -> String {
"prometheus/v1".to_string()
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct Spec {
#[serde(default = "default_version")]
pub version: String,
pub service: String,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub labels: BTreeMap<String, String>,
pub slos: Vec<SloSpec>,
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct SloSpec {
pub name: String,
pub objective: f64,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub description: String,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub labels: BTreeMap<String, String>,
pub sli: SliSpec,
#[serde(default)]
pub alerting: Alerting,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub period: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)]
pub struct SliSpec {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub events: Option<EventsSli>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub raw: Option<RawSli>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub latency: Option<LatencySli>,
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct EventsSli {
pub error_query: String,
pub total_query: String,
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct RawSli {
pub error_ratio_query: String,
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct LatencySli {
pub histogram_metric: String,
pub threshold: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selector: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)]
pub struct Alerting {
#[serde(default, skip_serializing_if = "String::is_empty")]
pub name: String,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub labels: BTreeMap<String, String>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub annotations: BTreeMap<String, String>,
#[serde(default)]
pub page_alert: AlertMeta,
#[serde(default)]
pub ticket_alert: AlertMeta,
}
#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)]
pub struct AlertMeta {
#[serde(default)]
pub disable: bool,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub labels: BTreeMap<String, String>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub annotations: BTreeMap<String, String>,
}
impl Spec {
pub fn from_yaml(yaml: &str) -> Result<Self> {
parse::from_yaml(yaml)
}
pub fn from_path(path: impl AsRef<std::path::Path>) -> Result<Self> {
parse::from_path(path.as_ref())
}
pub fn validate(&self) -> Result<()> {
validate(self)
}
}
impl SloSpec {
pub fn sloth_id(&self, service: &str) -> String {
format!("{service}-{}", self.name)
}
pub fn resolve_period(&self, default: Window) -> Result<Window> {
match &self.period {
Some(p) => Window::parse(p),
None => Ok(default),
}
}
pub fn to_slo(&self, default_period: Window) -> Result<Slo> {
let objective = Objective::percent(self.objective)?;
let period = self.resolve_period(default_period)?;
Ok(Slo::new(objective, period))
}
pub fn to_sli(&self) -> Result<Sli> {
let set_count = [
self.sli.events.is_some(),
self.sli.raw.is_some(),
self.sli.latency.is_some(),
]
.iter()
.filter(|x| **x)
.count();
if set_count > 1 {
return Err(SlokitError::Spec(format!(
"SLO '{}' sets multiple SLIs; pick one of `events`, `raw`, or `latency`",
self.name
)));
}
if let Some(events) = &self.sli.events {
Ok(Sli::Events {
error_query: events.error_query.clone(),
total_query: events.total_query.clone(),
})
} else if let Some(raw) = &self.sli.raw {
Ok(Sli::Raw {
error_ratio_query: raw.error_ratio_query.clone(),
})
} else if let Some(latency) = &self.sli.latency {
Ok(Sli::Latency {
histogram_metric: latency.histogram_metric.clone(),
threshold: latency.threshold.clone(),
selector: latency.selector.clone(),
})
} else {
Err(SlokitError::Spec(format!(
"SLO '{}' has no `events`, `raw`, or `latency` SLI",
self.name
)))
}
}
pub fn alert_name(&self) -> &str {
if self.alerting.name.is_empty() {
&self.name
} else {
&self.alerting.name
}
}
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE: &str = r#"
version: "prometheus/v1"
service: myservice
labels:
owner: team-platform
slos:
- name: requests-availability
objective: 99.9
description: "99.9% of requests succeed"
sli:
events:
error_query: sum(rate(http_requests_total{code=~"5.."}[{{.window}}]))
total_query: sum(rate(http_requests_total[{{.window}}]))
alerting:
name: HighErrorRate
page_alert:
labels:
severity: page
ticket_alert:
labels:
severity: ticket
"#;
#[test]
fn parses_sample_spec() {
let spec = Spec::from_yaml(SAMPLE).unwrap();
assert_eq!(spec.service, "myservice");
assert_eq!(spec.slos.len(), 1);
let slo = &spec.slos[0];
assert_eq!(slo.objective, 99.9);
assert_eq!(slo.sloth_id("myservice"), "myservice-requests-availability");
assert_eq!(slo.alert_name(), "HighErrorRate");
}
#[test]
fn converts_to_core_types() {
let spec = Spec::from_yaml(SAMPLE).unwrap();
let slo = spec.slos[0].to_slo(DEFAULT_PERIOD).unwrap();
assert!((slo.objective.as_percent() - 99.9).abs() < 1e-9);
assert_eq!(slo.period, DEFAULT_PERIOD);
let sli = spec.slos[0].to_sli().unwrap();
assert!(matches!(sli, Sli::Events { .. }));
}
#[test]
fn per_slo_period_override_is_respected() {
let yaml = r#"
service: s
slos:
- name: a
objective: 99.0
period: 7d
sli:
raw:
error_ratio_query: my_ratio[{{.window}}]
"#;
let spec = Spec::from_yaml(yaml).unwrap();
let slo = spec.slos[0].to_slo(DEFAULT_PERIOD).unwrap();
assert_eq!(slo.period, Window::days(7));
}
#[test]
fn ignores_unknown_sloth_fields() {
let yaml = r#"
service: s
some_future_sloth_key: true
slos:
- name: a
objective: 99.0
sli:
raw:
error_ratio_query: my_ratio[{{.window}}]
"#;
assert!(Spec::from_yaml(yaml).is_ok());
}
#[test]
fn latency_sli_converts_to_core() {
let yaml = r#"
service: s
slos:
- name: latency
objective: 99.0
sli:
latency:
histogram_metric: http_request_duration_seconds
threshold: "0.3"
selector: job="api"
"#;
let spec = Spec::from_yaml(yaml).unwrap();
let sli = spec.slos[0].to_sli().unwrap();
assert!(matches!(sli, Sli::Latency { .. }));
assert!(sli
.error_ratio_expr(Window::minutes(5))
.contains("le=\"0.3\""));
}
#[test]
fn multiple_slis_is_an_error() {
let yaml = r#"
service: s
slos:
- name: a
objective: 99.0
sli:
raw:
error_ratio_query: r[{{.window}}]
latency:
histogram_metric: m
threshold: "1"
"#;
let spec = Spec::from_yaml(yaml).unwrap();
let err = spec.slos[0].to_sli().unwrap_err();
assert!(err.to_string().contains("multiple SLIs"));
}
}