blueprint_qos/servers/
grafana.rs1use blueprint_core::{debug, info, warn};
2use std::collections::HashMap;
3use std::sync::{Arc, Mutex};
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::logging::GrafanaConfig;
8use crate::logging::loki::LokiConfig;
9use crate::servers::ServerManager;
10use crate::servers::common::DockerManager;
11
12const HEALTH_CHECK_TIMEOUT_SECS: u64 = 90;
13const GRAFANA_IMAGE_NAME_FULL: &str = "grafana/grafana:10.4.3";
14
15#[derive(Clone, Debug)]
17pub struct GrafanaServerConfig {
18 pub port: u16,
20
21 pub admin_user: String,
23
24 pub admin_password: String,
26
27 pub allow_anonymous: bool,
29
30 pub anonymous_role: String,
32
33 pub data_dir: String,
35
36 pub container_name: String,
38
39 pub loki_config: Option<LokiConfig>,
41}
42
43impl Default for GrafanaServerConfig {
44 fn default() -> Self {
45 Self {
46 port: 3000,
47 admin_user: "admin".to_string(),
48 admin_password: "admin".to_string(),
49 allow_anonymous: true,
50 anonymous_role: "Admin".to_string(),
51 data_dir: "/var/lib/grafana".to_string(),
52 container_name: "blueprint-grafana".to_string(),
53 loki_config: None,
54 }
55 }
56}
57
58pub struct GrafanaServer {
60 docker: DockerManager,
62
63 config: GrafanaServerConfig,
65
66 container_id: Arc<Mutex<Option<String>>>,
68}
69
70impl GrafanaServer {
71 pub fn new(config: GrafanaServerConfig) -> Result<Self> {
76 Ok(Self {
77 docker: DockerManager::new().map_err(|e| Error::DockerConnection(e.to_string()))?,
78 config,
79 container_id: Arc::new(Mutex::new(None)),
80 })
81 }
82
83 #[must_use]
85 pub fn client_config(&self) -> GrafanaConfig {
86 GrafanaConfig {
87 url: self.url(),
88 api_key: None,
89 org_id: None,
90 folder: None,
91 admin_user: Some(self.config.admin_user.clone()),
92 admin_password: Some(self.config.admin_password.clone()),
93 loki_config: self.config.loki_config.clone(),
94 prometheus_datasource_url: None,
95 }
96 }
97}
98
99impl ServerManager for GrafanaServer {
100 async fn start(&self, network: Option<&str>, bind_ip: Option<String>) -> Result<()> {
101 info!("Starting Grafana server on port {}", self.config.port);
102
103 let mut env_vars = HashMap::new();
104 env_vars.insert(
105 "GF_SECURITY_ADMIN_USER".to_string(),
106 self.config.admin_user.clone(),
107 );
108 env_vars.insert(
109 "GF_SECURITY_ADMIN_PASSWORD".to_string(),
110 self.config.admin_password.clone(),
111 );
112 env_vars.insert("GF_LOG_LEVEL".to_string(), "debug".to_string());
113
114 if self.config.allow_anonymous {
115 env_vars.insert("GF_AUTH_ANONYMOUS_ENABLED".to_string(), "true".to_string());
116 env_vars.insert(
117 "GF_AUTH_ANONYMOUS_ORG_ROLE".to_string(),
118 self.config.anonymous_role.clone(),
119 );
120 env_vars.insert("GF_AUTH_DISABLE_LOGIN_FORM".to_string(), "true".to_string());
121 }
122
123 env_vars.insert(
124 "GF_FEATURE_TOGGLES_ENABLE".to_string(),
125 "publicDashboards".to_string(),
126 );
127
128 let mut ports = HashMap::new();
129 ports.insert("3000/tcp".to_string(), self.config.port.to_string());
130
131 let mut volumes = HashMap::new();
132 if !self.config.data_dir.is_empty() && self.config.data_dir != "/var/lib/grafana" {
133 volumes.insert(self.config.data_dir.clone(), "/var/lib/grafana".to_string());
134 }
135
136 let container_id = self
137 .docker
138 .run_container(
139 GRAFANA_IMAGE_NAME_FULL,
140 &self.config.container_name,
141 env_vars,
142 ports,
143 volumes,
144 None,
145 Some(vec!["host.docker.internal:host-gateway".to_string()]),
146 None,
147 bind_ip,
148 )
149 .await?;
150
151 if let Some(net) = network {
152 info!(
153 "Connecting Grafana container {} to network {}",
154 &self.config.container_name, net
155 );
156 self.docker.connect_to_network(&container_id, net).await?;
157 }
158
159 {
160 let mut id = self.container_id.lock().unwrap();
161 *id = Some(container_id.clone());
162 }
163
164 self.wait_until_ready(HEALTH_CHECK_TIMEOUT_SECS).await?;
165
166 info!("Grafana server started successfully");
167 Ok(())
168 }
169
170 async fn stop(&self) -> Result<()> {
171 let container_id = {
172 let id = self.container_id.lock().unwrap();
173 match id.as_ref() {
174 Some(id) => id.clone(),
175 None => {
176 info!("Grafana server is not running, nothing to stop.");
177 return Ok(());
178 }
179 }
180 };
181
182 info!("Stopping Grafana server: {}", &self.config.container_name);
183 self.docker
184 .stop_and_remove_container(&container_id, &self.config.container_name)
185 .await?;
186
187 let mut id = self.container_id.lock().unwrap();
188 *id = None;
189
190 info!("Grafana server stopped successfully.");
191 Ok(())
192 }
193
194 fn url(&self) -> String {
195 format!("http://localhost:{}", self.config.port)
196 }
197
198 async fn is_running(&self) -> Result<bool> {
199 let container_id = {
200 let id = self.container_id.lock().unwrap();
201 match id.as_ref() {
202 Some(id) => id.clone(),
203 None => return Ok(false),
204 }
205 };
206
207 self.docker.is_container_running(&container_id).await
208 }
209
210 async fn wait_until_ready(&self, timeout_secs: u64) -> Result<()> {
211 let container_id = {
212 let id = self.container_id.lock().unwrap();
213 id.as_ref()
214 .map(String::clone)
215 .ok_or_else(|| Error::Generic("Grafana server is not running".to_string()))?
216 };
217
218 info!("Waiting for Grafana container to be healthy...");
219 if let Err(e) = self
220 .docker
221 .wait_for_container_health(&container_id, timeout_secs)
222 .await
223 {
224 warn!(
225 "Grafana container health check failed: {}. Proceeding with API check.",
226 e
227 );
228 } else {
229 info!("Grafana container health check passed.");
230 }
231
232 info!("Waiting for Grafana API to be responsive...");
233 let client = reqwest::Client::new();
234 let url = format!("{}/api/health", self.url());
235 let start_time = tokio::time::Instant::now();
236 let timeout = Duration::from_secs(timeout_secs);
237
238 loop {
239 if start_time.elapsed() > timeout {
240 return Err(Error::Generic(format!(
241 "Grafana API did not become responsive within {} seconds.",
242 timeout_secs
243 )));
244 }
245
246 match client.get(&url).send().await {
247 Ok(response) if response.status().is_success() => {
248 info!("Grafana API is responsive.");
249 return Ok(());
250 }
251 Ok(response) => {
252 debug!(
253 "Grafana API check failed with status: {}. Retrying...",
254 response.status()
255 );
256 }
257 Err(e) => {
258 debug!("Grafana API check failed with error: {}. Retrying...", e);
259 }
260 }
261
262 tokio::time::sleep(Duration::from_secs(1)).await;
263 }
264 }
265}