blueprint_qos/servers/
grafana.rs

1use blueprint_core::{debug, info, warn};
2use std::collections::HashMap;
3use std::sync::{Arc, Mutex};
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::logging::GrafanaConfig;
8use crate::logging::loki::LokiConfig;
9use crate::servers::ServerManager;
10use crate::servers::common::DockerManager;
11
12const HEALTH_CHECK_TIMEOUT_SECS: u64 = 90;
13const GRAFANA_IMAGE_NAME_FULL: &str = "grafana/grafana:10.4.3";
14
15/// Grafana server configuration
16#[derive(Clone, Debug)]
17pub struct GrafanaServerConfig {
18    /// Port to expose Grafana on
19    pub port: u16,
20
21    /// Admin username
22    pub admin_user: String,
23
24    /// Admin password
25    pub admin_password: String,
26
27    /// Whether to allow anonymous access
28    pub allow_anonymous: bool,
29
30    /// Anonymous user role
31    pub anonymous_role: String,
32
33    /// Data directory
34    pub data_dir: String,
35
36    /// Container name
37    pub container_name: String,
38
39    /// Optional Loki configuration to be used by the Grafana client.
40    pub loki_config: Option<LokiConfig>,
41}
42
43impl Default for GrafanaServerConfig {
44    fn default() -> Self {
45        Self {
46            port: 3000,
47            admin_user: "admin".to_string(),
48            admin_password: "admin".to_string(),
49            allow_anonymous: true,
50            anonymous_role: "Admin".to_string(),
51            data_dir: "/var/lib/grafana".to_string(),
52            container_name: "blueprint-grafana".to_string(),
53            loki_config: None,
54        }
55    }
56}
57
58/// Grafana server manager
59pub struct GrafanaServer {
60    /// Docker manager
61    docker: DockerManager,
62
63    /// Server configuration
64    config: GrafanaServerConfig,
65
66    /// Container ID
67    container_id: Arc<Mutex<Option<String>>>,
68}
69
70impl GrafanaServer {
71    /// Create a new Grafana server manager
72    ///
73    /// # Errors
74    /// Returns an error if the Docker manager fails to create a new container
75    pub fn new(config: GrafanaServerConfig) -> Result<Self> {
76        Ok(Self {
77            docker: DockerManager::new().map_err(|e| Error::DockerConnection(e.to_string()))?,
78            config,
79            container_id: Arc::new(Mutex::new(None)),
80        })
81    }
82
83    /// Get the Grafana client configuration
84    #[must_use]
85    pub fn client_config(&self) -> GrafanaConfig {
86        GrafanaConfig {
87            url: self.url(),
88            api_key: None,
89            org_id: None,
90            folder: None,
91            admin_user: Some(self.config.admin_user.clone()),
92            admin_password: Some(self.config.admin_password.clone()),
93            loki_config: self.config.loki_config.clone(),
94            prometheus_datasource_url: None,
95        }
96    }
97}
98
99impl ServerManager for GrafanaServer {
100    async fn start(&self, network: Option<&str>, bind_ip: Option<String>) -> Result<()> {
101        info!("Starting Grafana server on port {}", self.config.port);
102
103        let mut env_vars = HashMap::new();
104        env_vars.insert(
105            "GF_SECURITY_ADMIN_USER".to_string(),
106            self.config.admin_user.clone(),
107        );
108        env_vars.insert(
109            "GF_SECURITY_ADMIN_PASSWORD".to_string(),
110            self.config.admin_password.clone(),
111        );
112        env_vars.insert("GF_LOG_LEVEL".to_string(), "debug".to_string());
113
114        if self.config.allow_anonymous {
115            env_vars.insert("GF_AUTH_ANONYMOUS_ENABLED".to_string(), "true".to_string());
116            env_vars.insert(
117                "GF_AUTH_ANONYMOUS_ORG_ROLE".to_string(),
118                self.config.anonymous_role.clone(),
119            );
120            env_vars.insert("GF_AUTH_DISABLE_LOGIN_FORM".to_string(), "true".to_string());
121        }
122
123        env_vars.insert(
124            "GF_FEATURE_TOGGLES_ENABLE".to_string(),
125            "publicDashboards".to_string(),
126        );
127
128        let mut ports = HashMap::new();
129        ports.insert("3000/tcp".to_string(), self.config.port.to_string());
130
131        let mut volumes = HashMap::new();
132        if !self.config.data_dir.is_empty() && self.config.data_dir != "/var/lib/grafana" {
133            volumes.insert(self.config.data_dir.clone(), "/var/lib/grafana".to_string());
134        }
135
136        let container_id = self
137            .docker
138            .run_container(
139                GRAFANA_IMAGE_NAME_FULL,
140                &self.config.container_name,
141                env_vars,
142                ports,
143                volumes,
144                None,
145                Some(vec!["host.docker.internal:host-gateway".to_string()]),
146                None,
147                bind_ip,
148            )
149            .await?;
150
151        if let Some(net) = network {
152            info!(
153                "Connecting Grafana container {} to network {}",
154                &self.config.container_name, net
155            );
156            self.docker.connect_to_network(&container_id, net).await?;
157        }
158
159        {
160            let mut id = self.container_id.lock().unwrap();
161            *id = Some(container_id.clone());
162        }
163
164        self.wait_until_ready(HEALTH_CHECK_TIMEOUT_SECS).await?;
165
166        info!("Grafana server started successfully");
167        Ok(())
168    }
169
170    async fn stop(&self) -> Result<()> {
171        let container_id = {
172            let id = self.container_id.lock().unwrap();
173            match id.as_ref() {
174                Some(id) => id.clone(),
175                None => {
176                    info!("Grafana server is not running, nothing to stop.");
177                    return Ok(());
178                }
179            }
180        };
181
182        info!("Stopping Grafana server: {}", &self.config.container_name);
183        self.docker
184            .stop_and_remove_container(&container_id, &self.config.container_name)
185            .await?;
186
187        let mut id = self.container_id.lock().unwrap();
188        *id = None;
189
190        info!("Grafana server stopped successfully.");
191        Ok(())
192    }
193
194    fn url(&self) -> String {
195        format!("http://localhost:{}", self.config.port)
196    }
197
198    async fn is_running(&self) -> Result<bool> {
199        let container_id = {
200            let id = self.container_id.lock().unwrap();
201            match id.as_ref() {
202                Some(id) => id.clone(),
203                None => return Ok(false),
204            }
205        };
206
207        self.docker.is_container_running(&container_id).await
208    }
209
210    async fn wait_until_ready(&self, timeout_secs: u64) -> Result<()> {
211        let container_id = {
212            let id = self.container_id.lock().unwrap();
213            id.as_ref()
214                .map(String::clone)
215                .ok_or_else(|| Error::Generic("Grafana server is not running".to_string()))?
216        };
217
218        info!("Waiting for Grafana container to be healthy...");
219        if let Err(e) = self
220            .docker
221            .wait_for_container_health(&container_id, timeout_secs)
222            .await
223        {
224            warn!(
225                "Grafana container health check failed: {}. Proceeding with API check.",
226                e
227            );
228        } else {
229            info!("Grafana container health check passed.");
230        }
231
232        info!("Waiting for Grafana API to be responsive...");
233        let client = reqwest::Client::new();
234        let url = format!("{}/api/health", self.url());
235        let start_time = tokio::time::Instant::now();
236        let timeout = Duration::from_secs(timeout_secs);
237
238        loop {
239            if start_time.elapsed() > timeout {
240                return Err(Error::Generic(format!(
241                    "Grafana API did not become responsive within {} seconds.",
242                    timeout_secs
243                )));
244            }
245
246            match client.get(&url).send().await {
247                Ok(response) if response.status().is_success() => {
248                    info!("Grafana API is responsive.");
249                    return Ok(());
250                }
251                Ok(response) => {
252                    debug!(
253                        "Grafana API check failed with status: {}. Retrying...",
254                        response.status()
255                    );
256                }
257                Err(e) => {
258                    debug!("Grafana API check failed with error: {}. Retrying...", e);
259                }
260            }
261
262            tokio::time::sleep(Duration::from_secs(1)).await;
263        }
264    }
265}