1use anyhow::{Context, Result};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11fn default_true() -> bool {
12 true
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
17#[serde(rename_all = "snake_case")]
18pub enum RedactionLevel {
19 Strict,
21 Balanced,
23 Permissive,
25}
26
27impl Default for RedactionLevel {
28 fn default() -> Self {
29 RedactionLevel::Permissive
30 }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
35#[serde(rename_all = "snake_case")]
36pub enum SamplingStrategy {
37 AlwaysOn,
39 AlwaysOff,
41 ParentBased { fallback_ratio: f64 },
43 Ratio(f64),
45}
46
47impl Default for SamplingStrategy {
48 fn default() -> Self {
49 SamplingStrategy::ParentBased {
50 fallback_ratio: 0.1,
51 }
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct CollectorConfig {
58 pub otlp_endpoint: String,
60
61 pub otlp_http_endpoint: String,
63
64 pub clickhouse: ClickHouseConfig,
66
67 pub phoenix: PhoenixConfig,
69
70 pub hyperdx: HyperDxConfig,
72
73 pub memory_limit_mib: u64,
75
76 pub batch_timeout_ms: u64,
78 pub batch_send_size: u32,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct ClickHouseConfig {
84 pub url: String,
86
87 pub database: String,
89
90 pub username: String,
92
93 pub password: String,
95
96 pub compression: bool,
98
99 pub ttl_days: u32,
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct PhoenixConfig {
106 pub otlp_endpoint: String,
108
109 pub web_endpoint: String,
111
112 pub enabled: bool,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct HyperDxConfig {
119 pub web_endpoint: String,
121
122 pub api_endpoint: String,
124
125 pub enabled: bool,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct ObservabilityConfig {
132 pub enabled: bool,
134
135 pub redaction_level: RedactionLevel,
137
138 pub service_name: String,
140 pub service_version: String,
141 pub deployment_environment: String,
142
143 pub sampling: SamplingStrategy,
145
146 pub resource_attributes: HashMap<String, String>,
148
149 pub collector: CollectorConfig,
151
152 pub traces_enabled: bool,
154 pub metrics_enabled: bool,
155 pub logs_enabled: bool,
156
157 pub nats_propagation_enabled: bool,
159
160 pub session_timeout_minutes: u64,
162
163 pub cost_tracking_enabled: bool,
165
166 pub performance_thresholds: PerformanceThresholds,
168
169 #[serde(default)]
171 pub chat_bridge_tasks: Option<TasksBridgeConfig>,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct TasksBridgeConfig {
177 pub enabled: bool,
179
180 pub mattermost: MattermostBridgeSettings,
182
183 pub channel: MattermostChannelSettings,
185}
186
187impl Default for TasksBridgeConfig {
188 fn default() -> Self {
189 Self {
190 enabled: false,
191 mattermost: MattermostBridgeSettings::default(),
192 channel: MattermostChannelSettings::default(),
193 }
194 }
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct MattermostBridgeSettings {
200 pub base_url: String,
202
203 pub access_token: String,
205
206 #[serde(default)]
208 pub use_agent_bridge: bool,
209
210 #[serde(default)]
212 pub plugin_id: Option<String>,
213
214 #[serde(default)]
216 pub bridge_url: Option<String>,
217
218 #[serde(default)]
220 pub webhook_secret: Option<String>,
221
222 #[serde(default)]
224 pub agent_id: Option<String>,
225
226 #[serde(default)]
228 pub label: Option<String>,
229
230 #[serde(default = "default_true")]
232 pub verify_tls: bool,
233}
234
235impl Default for MattermostBridgeSettings {
236 fn default() -> Self {
237 Self {
238 base_url: "http://localhost:8065".to_string(),
239 access_token: String::new(),
240 use_agent_bridge: false,
241 plugin_id: None,
242 bridge_url: None,
243 webhook_secret: None,
244 agent_id: None,
245 label: Some("mattermost-tasks".to_string()),
246 verify_tls: true,
247 }
248 }
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct MattermostChannelSettings {
254 pub team_id: String,
256
257 pub channel_id: String,
259
260 #[serde(default)]
262 pub channel_name: Option<String>,
263
264 #[serde(default)]
266 pub thread_prefix: Option<String>,
267}
268
269impl Default for MattermostChannelSettings {
270 fn default() -> Self {
271 Self {
272 team_id: String::new(),
273 channel_id: String::new(),
274 channel_name: None,
275 thread_prefix: Some("#tasks".to_string()),
276 }
277 }
278}
279
280#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct PerformanceThresholds {
283 pub max_latency_ms: u64,
285
286 pub max_cost_usd: f64,
288
289 pub max_memory_mb: u64,
291
292 pub cpu_threshold_percent: f32,
294}
295
296impl Default for CollectorConfig {
297 fn default() -> Self {
298 Self {
299 otlp_endpoint: "http://localhost:4317".to_string(),
300 otlp_http_endpoint: "http://localhost:4318".to_string(),
301 clickhouse: ClickHouseConfig::default(),
302 phoenix: PhoenixConfig::default(),
303 hyperdx: HyperDxConfig::default(),
304 memory_limit_mib: 512,
305 batch_timeout_ms: 5000,
306 batch_send_size: 512,
307 }
308 }
309}
310
311impl Default for ClickHouseConfig {
312 fn default() -> Self {
313 Self {
314 url: "http://localhost:8123".to_string(),
315 database: "otel".to_string(),
316 username: "default".to_string(),
317 password: "".to_string(),
318 compression: true,
319 ttl_days: 30,
320 }
321 }
322}
323
324impl Default for PhoenixConfig {
325 fn default() -> Self {
326 Self {
327 otlp_endpoint: "http://localhost:6006".to_string(),
328 web_endpoint: "http://localhost:6006".to_string(),
329 enabled: true,
330 }
331 }
332}
333
334impl Default for HyperDxConfig {
335 fn default() -> Self {
336 Self {
337 web_endpoint: "http://localhost:8080".to_string(),
338 api_endpoint: "http://localhost:8080/api".to_string(),
339 enabled: true,
340 }
341 }
342}
343
344impl Default for PerformanceThresholds {
345 fn default() -> Self {
346 Self {
347 max_latency_ms: 30000, max_cost_usd: 1.0, max_memory_mb: 1024, cpu_threshold_percent: 80.0,
351 }
352 }
353}
354
355impl Default for ObservabilityConfig {
356 fn default() -> Self {
357 Self {
358 enabled: false, redaction_level: RedactionLevel::Permissive,
360 service_name: "smith".to_string(),
361 service_version: "0.1.1".to_string(),
362 deployment_environment: "development".to_string(),
363 sampling: SamplingStrategy::default(),
364 resource_attributes: HashMap::new(),
365 collector: CollectorConfig::default(),
366 traces_enabled: true,
367 metrics_enabled: true,
368 logs_enabled: true,
369 nats_propagation_enabled: true,
370 session_timeout_minutes: 60,
371 cost_tracking_enabled: true,
372 performance_thresholds: PerformanceThresholds::default(),
373 chat_bridge_tasks: None,
374 }
375 }
376}
377
378impl ObservabilityConfig {
379 pub fn validate(&self) -> Result<()> {
381 if self.service_name.is_empty() {
382 return Err(anyhow::anyhow!("Service name cannot be empty"));
383 }
384
385 if self.service_version.is_empty() {
386 return Err(anyhow::anyhow!("Service version cannot be empty"));
387 }
388
389 if self.deployment_environment.is_empty() {
390 return Err(anyhow::anyhow!("Deployment environment cannot be empty"));
391 }
392
393 if self.session_timeout_minutes == 0 {
394 return Err(anyhow::anyhow!("Session timeout cannot be zero"));
395 }
396
397 if self.session_timeout_minutes > 1440 {
398 return Err(anyhow::anyhow!("Session timeout cannot exceed 24 hours"));
399 }
400
401 match &self.sampling {
403 SamplingStrategy::Ratio(ratio) => {
404 if *ratio < 0.0 || *ratio > 1.0 {
405 return Err(anyhow::anyhow!(
406 "Sampling ratio must be between 0.0 and 1.0"
407 ));
408 }
409 }
410 SamplingStrategy::ParentBased { fallback_ratio } => {
411 if *fallback_ratio < 0.0 || *fallback_ratio > 1.0 {
412 return Err(anyhow::anyhow!(
413 "Fallback sampling ratio must be between 0.0 and 1.0"
414 ));
415 }
416 }
417 _ => {}
418 }
419
420 self.collector
422 .validate()
423 .context("Collector configuration validation failed")?;
424
425 self.performance_thresholds
427 .validate()
428 .context("Performance thresholds validation failed")?;
429
430 if let Some(tasks) = &self.chat_bridge_tasks {
431 if tasks.enabled {
432 if tasks.mattermost.base_url.trim().is_empty() {
433 return Err(anyhow::anyhow!(
434 "Mattermost base_url must be set when chat bridge tasks are enabled"
435 ));
436 }
437 if tasks.mattermost.use_agent_bridge {
438 let secret_empty = tasks
439 .mattermost
440 .webhook_secret
441 .as_ref()
442 .map(|secret| secret.trim().is_empty())
443 .unwrap_or(true);
444 if secret_empty {
445 return Err(anyhow::anyhow!(
446 "Mattermost webhook_secret must be set when use_agent_bridge is enabled"
447 ));
448 }
449 } else if tasks.mattermost.access_token.trim().is_empty() {
450 return Err(anyhow::anyhow!(
451 "Mattermost access_token must be set when chat bridge tasks are enabled"
452 ));
453 }
454 if tasks.channel.team_id.trim().is_empty() {
455 return Err(anyhow::anyhow!(
456 "Mattermost team_id must be set for chat bridge tasks"
457 ));
458 }
459 if tasks.channel.channel_id.trim().is_empty() {
460 return Err(anyhow::anyhow!(
461 "Mattermost channel_id must be set for chat bridge tasks"
462 ));
463 }
464 }
465 }
466
467 Ok(())
468 }
469
470 pub fn development() -> Self {
472 Self {
473 enabled: false, deployment_environment: "development".to_string(),
475 sampling: SamplingStrategy::AlwaysOn, collector: CollectorConfig {
477 memory_limit_mib: 256, batch_timeout_ms: 1000, ..CollectorConfig::default()
480 },
481 ..Self::default()
482 }
483 }
484
485 pub fn production() -> Self {
487 Self {
488 enabled: false, deployment_environment: "production".to_string(),
490 redaction_level: RedactionLevel::Strict, sampling: SamplingStrategy::ParentBased {
492 fallback_ratio: 0.1,
493 },
494 collector: CollectorConfig {
495 memory_limit_mib: 1024, ..CollectorConfig::default()
497 },
498 performance_thresholds: PerformanceThresholds {
499 max_latency_ms: 10000, ..PerformanceThresholds::default()
501 },
502 ..Self::default()
503 }
504 }
505
506 pub fn testing() -> Self {
508 Self {
509 enabled: false, deployment_environment: "testing".to_string(),
511 sampling: SamplingStrategy::AlwaysOff, traces_enabled: false,
513 metrics_enabled: false,
514 logs_enabled: false,
515 ..Self::default()
516 }
517 }
518}
519
520impl CollectorConfig {
521 pub fn validate(&self) -> Result<()> {
522 if self.otlp_endpoint.is_empty() {
523 return Err(anyhow::anyhow!("OTLP endpoint cannot be empty"));
524 }
525
526 if self.otlp_http_endpoint.is_empty() {
527 return Err(anyhow::anyhow!("OTLP HTTP endpoint cannot be empty"));
528 }
529
530 if self.memory_limit_mib == 0 {
531 return Err(anyhow::anyhow!("Memory limit cannot be zero"));
532 }
533
534 if self.memory_limit_mib < 64 {
535 return Err(anyhow::anyhow!(
536 "Memory limit too low, minimum 64 MiB required"
537 ));
538 }
539
540 if self.batch_timeout_ms == 0 {
541 return Err(anyhow::anyhow!("Batch timeout cannot be zero"));
542 }
543
544 if self.batch_send_size == 0 {
545 return Err(anyhow::anyhow!("Batch send size cannot be zero"));
546 }
547
548 self.clickhouse
549 .validate()
550 .context("ClickHouse configuration validation failed")?;
551
552 Ok(())
553 }
554}
555
556impl ClickHouseConfig {
557 pub fn validate(&self) -> Result<()> {
558 if self.url.is_empty() {
559 return Err(anyhow::anyhow!("ClickHouse URL cannot be empty"));
560 }
561
562 if self.database.is_empty() {
563 return Err(anyhow::anyhow!("ClickHouse database cannot be empty"));
564 }
565
566 if self.username.is_empty() {
567 return Err(anyhow::anyhow!("ClickHouse username cannot be empty"));
568 }
569
570 if self.ttl_days == 0 {
571 return Err(anyhow::anyhow!("TTL cannot be zero"));
572 }
573
574 if self.ttl_days > 365 {
575 tracing::warn!("TTL > 1 year may consume significant storage space");
576 }
577
578 Ok(())
579 }
580}
581
582impl PerformanceThresholds {
583 pub fn validate(&self) -> Result<()> {
584 if self.max_latency_ms == 0 {
585 return Err(anyhow::anyhow!("Maximum latency cannot be zero"));
586 }
587
588 if self.max_cost_usd < 0.0 {
589 return Err(anyhow::anyhow!("Maximum cost cannot be negative"));
590 }
591
592 if self.max_memory_mb == 0 {
593 return Err(anyhow::anyhow!("Maximum memory cannot be zero"));
594 }
595
596 if self.cpu_threshold_percent <= 0.0 || self.cpu_threshold_percent > 100.0 {
597 return Err(anyhow::anyhow!("CPU threshold must be between 0 and 100"));
598 }
599
600 Ok(())
601 }
602}
603
604#[cfg(test)]
605mod tests {
606 use super::*;
607
608 #[test]
609 fn test_default_observability_config() {
610 let config = ObservabilityConfig::default();
611
612 assert!(!config.enabled);
614 assert_eq!(config.redaction_level, RedactionLevel::Permissive);
615 assert!(config.validate().is_ok());
616 }
617
618 #[test]
619 fn test_environment_configs() {
620 let dev_config = ObservabilityConfig::development();
621 let prod_config = ObservabilityConfig::production();
622 let test_config = ObservabilityConfig::testing();
623
624 assert!(dev_config.validate().is_ok());
625 assert!(prod_config.validate().is_ok());
626 assert!(test_config.validate().is_ok());
627
628 assert_eq!(dev_config.sampling, SamplingStrategy::AlwaysOn);
630
631 assert_eq!(prod_config.redaction_level, RedactionLevel::Strict);
633
634 assert!(!test_config.traces_enabled);
636 assert!(!test_config.metrics_enabled);
637 assert!(!test_config.logs_enabled);
638 }
639
640 #[test]
641 fn test_redaction_levels() {
642 let strict = RedactionLevel::Strict;
643 let balanced = RedactionLevel::Balanced;
644 let permissive = RedactionLevel::Permissive;
645
646 assert_ne!(strict, balanced);
647 assert_ne!(balanced, permissive);
648 assert_eq!(RedactionLevel::default(), RedactionLevel::Permissive);
649 }
650
651 #[test]
652 fn test_sampling_validation() {
653 let valid_config = ObservabilityConfig {
654 sampling: SamplingStrategy::Ratio(0.5),
655 ..ObservabilityConfig::default()
656 };
657 assert!(valid_config.validate().is_ok());
658
659 let invalid_low = ObservabilityConfig {
660 sampling: SamplingStrategy::Ratio(-0.1),
661 ..ObservabilityConfig::default()
662 };
663 assert!(invalid_low.validate().is_err());
664
665 let invalid_high = ObservabilityConfig {
666 sampling: SamplingStrategy::Ratio(1.1),
667 ..ObservabilityConfig::default()
668 };
669 assert!(invalid_high.validate().is_err());
670 }
671
672 #[test]
673 fn test_performance_thresholds_validation() {
674 let mut thresholds = PerformanceThresholds::default();
675
676 assert!(thresholds.validate().is_ok());
678
679 thresholds.cpu_threshold_percent = 150.0;
681 assert!(thresholds.validate().is_err());
682
683 thresholds.cpu_threshold_percent = -10.0;
684 assert!(thresholds.validate().is_err());
685
686 thresholds.cpu_threshold_percent = 80.0;
688 thresholds.max_cost_usd = -1.0;
689 assert!(thresholds.validate().is_err());
690 }
691}