Skip to main content

heartbit_core/config/
sensor.rs

1//! Sensor pipeline configuration types.
2#![allow(missing_docs)]
3use serde::Deserialize;
4
5use super::SensorModality;
6use super::agent::McpServerEntry;
7
8/// Sensor layer configuration for continuous perception.
9#[derive(Debug, Clone, Deserialize)]
10pub struct SensorConfig {
11    /// Master switch for the sensor layer. Defaults to `true`.
12    #[serde(default = "super::default_true")]
13    pub enabled: bool,
14    /// Model routing configuration for triage decisions.
15    #[serde(default)]
16    pub routing: Option<SensorRoutingConfig>,
17    /// Salience scoring weights for triage promotion.
18    #[serde(default)]
19    pub salience: Option<SalienceConfig>,
20    /// Token budget limits for the sensor pipeline.
21    #[serde(default)]
22    pub token_budget: Option<TokenBudgetConfig>,
23    /// Story correlation settings.
24    #[serde(default)]
25    pub stories: Option<StoryCorrelationConfig>,
26    /// Sensor source definitions.
27    #[serde(default)]
28    pub sources: Vec<SensorSourceConfig>,
29}
30
31/// Model routing configuration for sensor triage.
32#[derive(Debug, Clone, Deserialize)]
33pub struct SensorRoutingConfig {
34    /// Which model tier to use for triage: "local", "cloud_light", "cloud_frontier".
35    #[serde(default = "default_triage_model")]
36    pub triage_model: String,
37    /// Path to local GGUF model file (for local SLM inference).
38    pub local_model_path: Option<String>,
39    /// Confidence threshold below which to escalate to a higher model tier.
40    #[serde(default = "default_confidence_threshold")]
41    pub confidence_threshold: f64,
42}
43
44fn default_triage_model() -> String {
45    "cloud_light".into()
46}
47
48fn default_confidence_threshold() -> f64 {
49    0.85
50}
51
52/// Salience scoring weights for triage promotion decisions.
53#[derive(Debug, Clone, Deserialize)]
54pub struct SalienceConfig {
55    /// Weight for urgency signals (0.0-1.0).
56    #[serde(default = "default_urgency_weight")]
57    pub urgency_weight: f64,
58    /// Weight for novelty signals (0.0-1.0).
59    #[serde(default = "default_novelty_weight")]
60    pub novelty_weight: f64,
61    /// Weight for relevance signals (0.0-1.0).
62    #[serde(default = "default_relevance_weight")]
63    pub relevance_weight: f64,
64    /// Minimum salience score for promotion (0.0-1.0).
65    #[serde(default = "default_salience_threshold")]
66    pub threshold: f64,
67}
68
69fn default_urgency_weight() -> f64 {
70    0.3
71}
72
73fn default_novelty_weight() -> f64 {
74    0.3
75}
76
77fn default_relevance_weight() -> f64 {
78    0.4
79}
80
81fn default_salience_threshold() -> f64 {
82    0.3
83}
84
85/// Token budget limits for the sensor pipeline.
86#[derive(Debug, Clone, Deserialize)]
87pub struct TokenBudgetConfig {
88    /// Maximum tokens per hour across all sensor processing.
89    #[serde(default = "default_hourly_limit")]
90    pub hourly_limit: usize,
91    /// Maximum queued events before back-pressure.
92    #[serde(default = "default_queue_size")]
93    pub queue_size: usize,
94}
95
96fn default_hourly_limit() -> usize {
97    100_000
98}
99
100fn default_queue_size() -> usize {
101    200
102}
103
104/// Story correlation configuration.
105#[derive(Debug, Clone, Deserialize)]
106pub struct StoryCorrelationConfig {
107    /// Time window in hours for correlating events into stories.
108    #[serde(default = "default_correlation_window_hours")]
109    pub correlation_window_hours: u64,
110    /// Maximum events tracked per story before archival.
111    #[serde(default = "default_max_events_per_story")]
112    pub max_events_per_story: usize,
113    /// Hours of inactivity after which a story is marked stale.
114    #[serde(default = "default_stale_after_hours")]
115    pub stale_after_hours: u64,
116}
117
118fn default_correlation_window_hours() -> u64 {
119    4
120}
121
122fn default_max_events_per_story() -> usize {
123    50
124}
125
126fn default_stale_after_hours() -> u64 {
127    24
128}
129
130/// A sensor source definition.
131#[derive(Debug, Clone, Deserialize)]
132#[serde(tag = "type", rename_all = "snake_case")]
133pub enum SensorSourceConfig {
134    /// JMAP email sensor (push/poll).
135    JmapEmail {
136        name: String,
137        server: String,
138        username: String,
139        /// Environment variable containing the password.
140        password_env: String,
141        /// Senders that get automatic `Priority::High`.
142        #[serde(default)]
143        priority_senders: Vec<String>,
144        /// Senders whose emails are silently dropped during triage.
145        #[serde(default)]
146        blocked_senders: Vec<String>,
147        #[serde(default = "default_email_poll_interval")]
148        poll_interval_seconds: u64,
149    },
150    /// RSS/Atom feed sensor.
151    Rss {
152        name: String,
153        feeds: Vec<String>,
154        #[serde(default)]
155        interest_keywords: Vec<String>,
156        #[serde(default = "default_rss_poll_interval")]
157        poll_interval_seconds: u64,
158    },
159    /// Directory watcher for images.
160    Image {
161        name: String,
162        watch_directory: String,
163        #[serde(default = "default_file_poll_interval")]
164        poll_interval_seconds: u64,
165    },
166    /// Directory watcher for audio files.
167    Audio {
168        name: String,
169        watch_directory: String,
170        /// Whisper model size: "tiny", "base", "small", "medium", "large".
171        #[serde(default = "default_whisper_model")]
172        whisper_model: String,
173        /// Known contacts whose voice recordings get priority triage.
174        #[serde(default)]
175        known_contacts: Vec<String>,
176        #[serde(default = "default_file_poll_interval")]
177        poll_interval_seconds: u64,
178    },
179    /// Weather API sensor.
180    Weather {
181        name: String,
182        /// Environment variable containing the API key.
183        api_key_env: String,
184        locations: Vec<String>,
185        #[serde(default = "default_weather_poll_interval")]
186        poll_interval_seconds: u64,
187        /// When true, only promote weather alerts (not regular readings).
188        #[serde(default)]
189        alert_only: bool,
190    },
191    /// Generic webhook receiver.
192    Webhook {
193        name: String,
194        /// URL path for the webhook endpoint (e.g., "/webhooks/github").
195        path: String,
196        /// Environment variable containing the webhook secret.
197        secret_env: Option<String>,
198    },
199    /// Generic MCP sensor — polls a tool on any MCP server.
200    Mcp {
201        name: String,
202        /// MCP server endpoint (string URL, `{url, auth_header}`, or `{command, args, env}`).
203        server: Box<McpServerEntry>,
204        /// MCP tool to call each poll cycle.
205        tool_name: String,
206        /// Arguments passed to the tool (default: `{}`).
207        #[serde(default = "default_empty_object")]
208        tool_args: serde_json::Value,
209        /// Kafka topic to produce events to.
210        kafka_topic: String,
211        /// Sensory modality of produced events (default: `"text"`).
212        #[serde(default = "default_mcp_modality")]
213        modality: SensorModality,
214        /// Poll interval in seconds (default: 60).
215        #[serde(default = "default_mcp_poll_interval")]
216        poll_interval_seconds: u64,
217        /// JSON field path for item ID (default: `"id"`).
218        #[serde(default = "default_id_field")]
219        id_field: String,
220        /// JSON field for event content (default: entire item as JSON).
221        #[serde(default)]
222        content_field: Option<String>,
223        /// JSON field containing items array in tool result (default: root is array).
224        #[serde(default)]
225        items_field: Option<String>,
226        /// Priority senders for email triage (only when `kafka_topic = "hb.sensor.email"`).
227        #[serde(default)]
228        priority_senders: Vec<String>,
229        /// Blocked senders for email triage.
230        #[serde(default)]
231        blocked_senders: Vec<String>,
232        /// Optional enrichment tool to call for each new item (e.g., `gmail_get_message`).
233        /// When set, the sensor calls this tool with the item's ID to fetch detailed
234        /// metadata (headers, body, labels) before producing to Kafka.
235        #[serde(default)]
236        enrich_tool: Option<String>,
237        /// Parameter name for the item ID when calling the enrichment tool (default: `"id"`).
238        #[serde(default)]
239        enrich_id_param: Option<String>,
240        /// Dedup TTL in seconds. Seen IDs older than this are evicted. Default: 7 days.
241        #[serde(default = "default_dedup_ttl_seconds")]
242        dedup_ttl_seconds: u64,
243    },
244}
245
246fn default_dedup_ttl_seconds() -> u64 {
247    7 * 24 * 3600 // 7 days
248}
249
250impl SensorSourceConfig {
251    /// Get the name of this sensor source.
252    pub fn name(&self) -> &str {
253        match self {
254            SensorSourceConfig::JmapEmail { name, .. }
255            | SensorSourceConfig::Rss { name, .. }
256            | SensorSourceConfig::Image { name, .. }
257            | SensorSourceConfig::Audio { name, .. }
258            | SensorSourceConfig::Weather { name, .. }
259            | SensorSourceConfig::Webhook { name, .. }
260            | SensorSourceConfig::Mcp { name, .. } => name,
261        }
262    }
263
264    /// Get priority and blocked sender lists for trust resolution.
265    ///
266    /// Returns `(priority_senders, blocked_senders)`. Only email-type sources
267    /// have these lists; other source types return empty slices.
268    pub fn sender_lists(&self) -> (&[String], &[String]) {
269        match self {
270            SensorSourceConfig::JmapEmail {
271                priority_senders,
272                blocked_senders,
273                ..
274            }
275            | SensorSourceConfig::Mcp {
276                priority_senders,
277                blocked_senders,
278                ..
279            } => (priority_senders, blocked_senders),
280            _ => (&[], &[]),
281        }
282    }
283}
284
285fn default_email_poll_interval() -> u64 {
286    60
287}
288
289fn default_rss_poll_interval() -> u64 {
290    900
291}
292
293fn default_file_poll_interval() -> u64 {
294    30
295}
296
297fn default_whisper_model() -> String {
298    "base".into()
299}
300
301fn default_weather_poll_interval() -> u64 {
302    1800
303}
304
305fn default_mcp_poll_interval() -> u64 {
306    60
307}
308
309fn default_mcp_modality() -> SensorModality {
310    SensorModality::Text
311}
312
313fn default_id_field() -> String {
314    "id".into()
315}
316
317fn default_empty_object() -> serde_json::Value {
318    serde_json::json!({})
319}