use std::io::Write;
use std::path::PathBuf;
use std::time::Duration;
use anyhow::{Context, Result};
use clap::{Args, Parser, Subcommand, ValueEnum};
use slokit::check::{check_spec, PrometheusClient, SloStatus, StatusLevel};
use slokit::dashboard::dashboard_json;
use slokit::generate::{generate_rules_with, GenerateOptions};
use slokit::spec::Spec;
use slokit::{BurnRate, MwmbrConfig, Objective, Slo, Window};
#[derive(Parser)]
#[command(
name = "slokit",
version,
about = "SLO and error-budget engine: generate Prometheus burn-rate alerts and do error-budget math"
)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Generate(GenerateArgs),
Validate(ValidateArgs),
Calc(CalcArgs),
Check(CheckArgs),
Dashboard(DashboardArgs),
}
#[derive(Clone, Copy, ValueEnum)]
enum Format {
Prometheus,
Operator,
}
#[derive(Args)]
struct GenerateArgs {
#[arg(short, long)]
input: PathBuf,
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(long, value_enum, default_value_t = Format::Prometheus)]
format: Format,
#[arg(long, default_value = "30d")]
period: String,
#[arg(long)]
name: Option<String>,
}
#[derive(Args)]
struct ValidateArgs {
#[arg(short, long)]
input: PathBuf,
}
#[derive(Args)]
struct DashboardArgs {
#[arg(short, long)]
input: PathBuf,
#[arg(short, long)]
output: Option<PathBuf>,
}
#[derive(Args)]
struct CheckArgs {
#[arg(short, long)]
input: PathBuf,
#[arg(short, long)]
url: String,
#[arg(long, default_value = "1h")]
window: String,
#[arg(long, default_value = "30d")]
period: String,
#[arg(long)]
bearer_token: Option<String>,
#[arg(long, default_value_t = 30)]
timeout: u64,
}
#[derive(Args)]
struct CalcArgs {
#[arg(long)]
objective: f64,
#[arg(long, default_value = "30d")]
period: String,
#[arg(long)]
total: Option<f64>,
#[arg(long)]
bad: Option<f64>,
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Command::Generate(args) => run_generate(args),
Command::Validate(args) => run_validate(args),
Command::Calc(args) => run_calc(args),
Command::Check(args) => run_check(args),
Command::Dashboard(args) => run_dashboard(args),
}
}
fn run_dashboard(args: DashboardArgs) -> Result<()> {
let spec = Spec::from_path(&args.input)
.with_context(|| format!("loading spec from {}", args.input.display()))?;
spec.validate()?;
let rendered = dashboard_json(&spec)?;
match args.output {
Some(path) => {
std::fs::write(&path, rendered)
.with_context(|| format!("writing dashboard to {}", path.display()))?;
eprintln!("wrote dashboard to {}", path.display());
}
None => {
std::io::stdout().write_all(rendered.as_bytes())?;
}
}
Ok(())
}
fn run_generate(args: GenerateArgs) -> Result<()> {
let spec = Spec::from_path(&args.input)
.with_context(|| format!("loading spec from {}", args.input.display()))?;
let opts = GenerateOptions {
default_period: Window::parse(&args.period)?,
mwmbr: MwmbrConfig::sre_default(),
};
let ruleset = generate_rules_with(&spec, &opts)?;
let rendered = match args.format {
Format::Prometheus => ruleset.to_prometheus_yaml()?,
Format::Operator => {
let name = args.name.as_deref().unwrap_or(&spec.service);
ruleset.to_operator_yaml(name, &spec.labels)?
}
};
match args.output {
Some(path) => {
std::fs::write(&path, rendered)
.with_context(|| format!("writing rules to {}", path.display()))?;
eprintln!("wrote rules to {}", path.display());
}
None => {
std::io::stdout().write_all(rendered.as_bytes())?;
}
}
Ok(())
}
fn run_validate(args: ValidateArgs) -> Result<()> {
let spec = Spec::from_path(&args.input)
.with_context(|| format!("loading spec from {}", args.input.display()))?;
spec.validate()?;
let slo_count = spec.slos.len();
println!(
"ok: '{}' is valid ({slo_count} SLO{})",
args.input.display(),
if slo_count == 1 { "" } else { "s" }
);
Ok(())
}
fn run_calc(args: CalcArgs) -> Result<()> {
let objective = Objective::percent(args.objective)?;
let period = Window::parse(&args.period)?;
let slo = Slo::new(objective, period);
println!("Objective: {}% over {}", objective.as_percent(), period);
println!(
"Error budget: {} of events",
ratio_str(slo.error_budget_ratio())
);
if let Some(total) = args.total {
let budget = slo.error_budget(total);
println!("Total events: {total}");
println!("Allowed bad: {:.2}", budget.allowed_bad_events());
if let Some(bad) = args.bad {
let observed_ratio = if total > 0.0 { bad / total } else { 0.0 };
let burn = BurnRate::from_error_ratio(observed_ratio, &slo);
println!("Observed bad: {bad}");
println!("Burn rate: {:.2}x", burn.value());
println!("Consumed: {}", ratio_str(budget.consumed_ratio(bad)));
println!("Remaining: {}", ratio_str(budget.remaining_ratio(bad)));
match budget.time_to_exhaustion(bad, burn, period) {
Some(d) => println!("Exhausted in: {}", humanize(d)),
None => println!("Exhausted in: never (no budget burn)"),
}
}
}
println!("\nBurn-rate alert thresholds (error ratio that fires each window):");
let budget_ratio = slo.error_budget_ratio();
for w in MwmbrConfig::sre_default().windows {
println!(
" {:<6} long={:<4} short={:<4} factor={:<5} threshold={}",
w.severity.label(),
w.long.to_string(),
w.short.to_string(),
w.factor,
ratio_str(w.factor * budget_ratio),
);
}
Ok(())
}
fn run_check(args: CheckArgs) -> Result<()> {
let spec = Spec::from_path(&args.input)
.with_context(|| format!("loading spec from {}", args.input.display()))?;
let default_period = Window::parse(&args.period)?;
let current_window = Window::parse(&args.window)?;
let mut client = PrometheusClient::with_timeout(&args.url, Duration::from_secs(args.timeout))?;
if let Some(token) = args.bearer_token {
client = client.with_bearer_token(token);
}
let statuses = check_spec(&client, &spec, default_period, current_window)?;
println!(
"service '{}' against {} (current window {})\n",
spec.service, args.url, current_window
);
println!(
"{:<7} {:<32} {:>9} {:>10} {:>9}",
"STATUS", "SLO", "CONSUMED", "REMAINING", "BURN"
);
let mut breaching = false;
for s in &statuses {
breaching |= s.level == StatusLevel::Breaching;
print_status_row(s);
}
if breaching {
std::process::exit(1);
}
Ok(())
}
fn print_status_row(s: &SloStatus) {
println!(
"{:<7} {:<32} {:>9} {:>10} {:>9}",
s.level.label(),
s.name,
opt_pct(s.budget_consumed_ratio),
opt_pct(s.budget_remaining_ratio),
opt_burn(s.current_burn_rate),
);
}
fn opt_pct(v: Option<f64>) -> String {
match v {
Some(x) => format!("{:.2}%", x * 100.0),
None => "n/a".to_string(),
}
}
fn opt_burn(v: Option<f64>) -> String {
match v {
Some(x) => format!("{x:.2}x"),
None => "n/a".to_string(),
}
}
fn ratio_str(r: f64) -> String {
if r.is_infinite() {
return "inf".to_string();
}
format!("{:.4}%", r * 100.0)
}
fn humanize(d: Duration) -> String {
let secs = d.as_secs();
let days = secs / 86_400;
let hours = (secs % 86_400) / 3_600;
let minutes = (secs % 3_600) / 60;
if days > 0 {
format!("{days}d {hours}h")
} else if hours > 0 {
format!("{hours}h {minutes}m")
} else {
format!("{minutes}m")
}
}