Skip to main content

hematite/agent/
mcp_server.rs

1// MCP stdio server mode — run with `hematite --mcp-server`
2//
3// Protocol: JSON-RPC 2.0, newline-delimited over stdin/stdout.
4// stderr is the only safe log channel — stdout is the protocol wire.
5//
6// Exposes:
7//   inspect_host — 116+ read-only diagnostic topics (SysAdmin, Network Admin,
8//                  hardware, security, developer tooling)
9//
10// Privacy modes:
11//   --edge-redact        Tier 1 regex: strips usernames, MACs, serials, hostnames, credentials
12//   --semantic-redact    Tier 2: local model summarizes output before it leaves; Tier 1 applied after
13//   --edge-redact + policy file: per-topic allow/block lists and per-topic redaction level overrides
14//
15// Claude Desktop config (~/.claude/claude_desktop_config.json):
16//   {
17//     "mcpServers": {
18//       "hematite": { "command": "hematite", "args": ["--mcp-server"] }
19//     }
20//   }
21
22use crate::agent::redact_audit::{AuditEntry, RedactMode};
23use crate::agent::redact_policy::{load_policy, RedactPolicy, RedactionLevel};
24use serde_json::{json, Value};
25use std::collections::BTreeMap;
26type Tier1Hits = BTreeMap<&'static str, usize>;
27use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
28
29const PROTOCOL_VERSION: &str = "2024-11-05";
30const SERVER_NAME: &str = "hematite";
31const SERVER_VERSION: &str = env!("CARGO_PKG_VERSION");
32
33pub async fn run_mcp_server(
34    edge_redact: bool,
35    semantic_redact: bool,
36    api_url: &str,
37    semantic_url: &str,
38    semantic_model: &str,
39) -> anyhow::Result<()> {
40    let mode_label = if semantic_redact {
41        "semantic+regex"
42    } else if edge_redact {
43        "regex"
44    } else {
45        "none"
46    };
47    eprintln!(
48        "[hematite mcp] server v{SERVER_VERSION} started (protocol {PROTOCOL_VERSION}, redact: {mode_label})"
49    );
50
51    let policy = load_policy();
52
53    let stdin = tokio::io::stdin();
54    let stdout = tokio::io::stdout();
55    let mut reader = BufReader::new(stdin);
56    let mut writer = tokio::io::BufWriter::new(stdout);
57    let mut line = String::new();
58
59    loop {
60        line.clear();
61        let n = reader.read_line(&mut line).await?;
62        if n == 0 {
63            break; // EOF — client disconnected
64        }
65        let trimmed = line.trim();
66        if trimmed.is_empty() {
67            continue;
68        }
69
70        let msg: Value = match serde_json::from_str(trimmed) {
71            Ok(v) => v,
72            Err(e) => {
73                eprintln!("[hematite mcp] parse error: {e}");
74                send_parse_error(&mut writer).await?;
75                continue;
76            }
77        };
78
79        let method = match msg.get("method").and_then(|m| m.as_str()) {
80            Some(m) => m.to_string(),
81            None => continue,
82        };
83
84        let id = msg.get("id").cloned();
85
86        match method.as_str() {
87            "initialize" => {
88                let resp = json!({
89                    "jsonrpc": "2.0",
90                    "id": id,
91                    "result": {
92                        "protocolVersion": PROTOCOL_VERSION,
93                        "capabilities": { "tools": {} },
94                        "serverInfo": {
95                            "name": SERVER_NAME,
96                            "version": SERVER_VERSION,
97                            "redactMode": mode_label,
98                            "privacyNote": match mode_label {
99                                "semantic+regex" => "Tier 2: local model summarizes output before sending; Tier 1 regex applied after. Raw data never forwarded if model is unreachable.",
100                                "regex"          => "Tier 1: usernames, MACs, serials, hostnames, and credentials stripped before forwarding.",
101                                _                => "No redaction active. Raw diagnostic output is forwarded as-is.",
102                            }
103                        }
104                    }
105                });
106                send_response(&resp, &mut writer).await?;
107            }
108
109            "initialized" => {
110                eprintln!("[hematite mcp] client initialized");
111            }
112
113            "ping" => {
114                if let Some(id) = id {
115                    let resp = json!({ "jsonrpc": "2.0", "id": id, "result": {} });
116                    send_response(&resp, &mut writer).await?;
117                }
118            }
119
120            "tools/list" => {
121                if let Some(id) = id {
122                    let resp = json!({
123                        "jsonrpc": "2.0",
124                        "id": id,
125                        "result": { "tools": tool_list() }
126                    });
127                    send_response(&resp, &mut writer).await?;
128                }
129            }
130
131            "tools/call" => {
132                if let Some(id) = id {
133                    let params = msg.get("params").cloned().unwrap_or(Value::Null);
134                    let result = dispatch_tool_call(
135                        &params,
136                        edge_redact,
137                        semantic_redact,
138                        api_url,
139                        semantic_url,
140                        semantic_model,
141                        &policy,
142                    )
143                    .await;
144                    let resp = match result {
145                        Ok(text) => json!({
146                            "jsonrpc": "2.0",
147                            "id": id,
148                            "result": {
149                                "content": [{ "type": "text", "text": text }],
150                                "isError": false
151                            }
152                        }),
153                        Err(e) => json!({
154                            "jsonrpc": "2.0",
155                            "id": id,
156                            "result": {
157                                "content": [{ "type": "text", "text": format!("Error: {e}") }],
158                                "isError": true
159                            }
160                        }),
161                    };
162                    send_response(&resp, &mut writer).await?;
163                }
164            }
165
166            other => {
167                eprintln!("[hematite mcp] unknown method: {other}");
168                if let Some(id) = id {
169                    let resp = json!({
170                        "jsonrpc": "2.0",
171                        "id": id,
172                        "error": { "code": -32601, "message": "Method not found" }
173                    });
174                    send_response(&resp, &mut writer).await?;
175                }
176            }
177        }
178    }
179
180    eprintln!("[hematite mcp] server exiting (client disconnected)");
181    Ok(())
182}
183
184async fn dispatch_tool_call(
185    params: &Value,
186    edge_redact: bool,
187    semantic_redact: bool,
188    _api_url: &str,
189    semantic_url: &str,
190    semantic_model: &str,
191    policy: &RedactPolicy,
192) -> Result<String, String> {
193    let name = params
194        .get("name")
195        .and_then(|v| v.as_str())
196        .ok_or_else(|| "Missing tool name in tools/call params".to_string())?;
197
198    // Strip args to declared schema fields only (jailbreak resistance: Phase 5)
199    let args = sanitize_args(
200        params
201            .get("arguments")
202            .cloned()
203            .unwrap_or_else(|| Value::Object(Default::default())),
204    );
205
206    match name {
207        "inspect_host" => {
208            let topic = args
209                .get("topic")
210                .and_then(|v| v.as_str())
211                .unwrap_or("summary");
212
213            // Policy: blocked topics return a hard error — never run the inspection
214            if policy.is_blocked(topic) {
215                return Err(format!(
216                    "Topic '{topic}' is blocked by the local redaction policy. \
217                     Check .hematite/redact_policy.json."
218                ));
219            }
220
221            // Run the inspection
222            let raw = crate::tools::host_inspect::inspect_host(&args).await?;
223            let raw_len = raw.len();
224
225            // Determine effective redaction level.
226            // When --semantic-redact is active, the default is Semantic unless the
227            // policy file explicitly overrides the topic to "none" or "regex".
228            let level = if semantic_redact {
229                let per_topic = policy.redaction_level(topic, false); // false = no edge-redact default
230                match per_topic {
231                    RedactionLevel::None => RedactionLevel::Semantic,
232                    other => other,
233                }
234            } else {
235                policy.redaction_level(topic, edge_redact)
236            };
237
238            let (output, audit_mode, semantic_applied, tier1_hits) = match level {
239                RedactionLevel::None => {
240                    let labeled =
241                        format!("[hematite: no redaction active — raw diagnostic output]\n\n{raw}");
242                    (labeled, RedactMode::None, false, Tier1Hits::new())
243                }
244
245                RedactionLevel::Regex => {
246                    let r = crate::agent::edge_redact::redact(&raw);
247                    (
248                        format!("{}\n\n{}", r.summary_header, r.text),
249                        RedactMode::Regex,
250                        false,
251                        r.tier1_hits,
252                    )
253                }
254
255                RedactionLevel::Semantic => {
256                    match crate::agent::semantic_redact::summarize(
257                        &raw,
258                        topic,
259                        semantic_url,
260                        Some(semantic_model),
261                    )
262                    .await
263                    {
264                        Ok(summary) => {
265                            // Tier 1 as safety net after semantic pass
266                            let r = crate::agent::edge_redact::redact(&summary);
267                            let header = format!(
268                                "[edge-redact: semantic+regex — local model summary applied{}]\n\n",
269                                if r.redaction_count > 0 {
270                                    format!("; {} tier1 residual hit(s)", r.redaction_count)
271                                } else {
272                                    String::new()
273                                }
274                            );
275                            (
276                                format!("{header}{}", r.text),
277                                RedactMode::Semantic,
278                                true,
279                                r.tier1_hits,
280                            )
281                        }
282                        Err(e) => {
283                            // Fail-safe: return the error, never the raw data
284                            return Err(e);
285                        }
286                    }
287                }
288            };
289
290            // Phase 4: write audit entry (non-blocking — errors go to stderr only)
291            let tier1_hits_owned: BTreeMap<String, usize> = tier1_hits
292                .into_iter()
293                .map(|(k, v)| (k.to_string(), v))
294                .collect();
295            crate::agent::redact_audit::record(&AuditEntry {
296                topic: topic.to_string(),
297                mode: audit_mode,
298                tier1_hits: tier1_hits_owned,
299                semantic_applied,
300                input_chars: raw_len,
301                output_chars: output.len(),
302                caller_pid: std::process::id(),
303            });
304
305            Ok(output)
306        }
307
308        other => Err(format!("Unknown tool: '{other}'")),
309    }
310}
311
312/// Strip MCP call arguments to the declared schema fields.
313/// Unknown keys are silently dropped — they cannot influence tool behavior.
314fn sanitize_args(args: Value) -> Value {
315    const ALLOWED: &[&str] = &[
316        "topic",
317        "host",
318        "port",
319        "name",
320        "type",
321        "path",
322        "process",
323        "event_id",
324        "log",
325        "source",
326        "hours",
327        "level",
328        "issue",
329        "max_entries",
330    ];
331    match args {
332        Value::Object(map) => {
333            let cleaned: serde_json::Map<String, Value> = map
334                .into_iter()
335                .filter(|(k, _)| ALLOWED.contains(&k.as_str()))
336                .collect();
337            Value::Object(cleaned)
338        }
339        other => other,
340    }
341}
342
343fn tool_list() -> Value {
344    json!([
345        {
346            "name": "inspect_host",
347            "description": "Run a read-only diagnostic inspection of the local machine. Returns grounded data from 116+ topics covering SysAdmin, Network Admin, hardware, security, and developer tooling. No mutations — all reads. Works on Windows, Linux, and macOS.",
348            "inputSchema": {
349                "type": "object",
350                "properties": {
351                    "topic": {
352                        "type": "string",
353                        "description": "The inspection topic. Core topics: summary, processes, services, ports, connections, network, storage, hardware, health_report, security, updates, pending_reboot, disk_health, battery, recent_crashes, app_crashes, scheduled_tasks, dev_conflicts, connectivity, wifi, vpn, proxy, firewall_rules, traceroute, dns_cache, arp, route_table, os_config, resource_load, env, hosts_file, docker, wsl, ssh, installed_software, git_config, databases, user_accounts, audit_policy, shares, dns_servers, bitlocker, rdp, shadow_copies, pagefile, windows_features, printers, winrm, network_stats, udp_ports, gpo, certificates, integrity, domain, device_health, drivers, peripherals, sessions, thermal, activation, patch_history, hyperv, ip_config, overclocker, event_query, display_config, ntp, cpu_power, credentials, tpm, latency, network_adapter, dhcp, mtu, ipv6, tcp_params, wlan_profiles, ipsec, netbios, nic_teaming, snmp, port_test, network_profile, audio, bluetooth, camera, sign_in, installer_health, onedrive, browser_health, identity_auth, outlook, teams, windows_backup, search_index, lan_discovery, toolchains, path, env_doctor, fix_plan, repo_doctor, directory, disk_benchmark, desktop, downloads, disk, permissions, login_history, share_access, registry_audit, ad_user, dns_lookup"
354                    },
355                    "host": {
356                        "type": "string",
357                        "description": "Target host (for traceroute, port_test, dns_lookup)"
358                    },
359                    "port": {
360                        "type": "integer",
361                        "description": "Port number (for port_test)"
362                    },
363                    "name": {
364                        "type": "string",
365                        "description": "Hostname to resolve (for dns_lookup)"
366                    },
367                    "type": {
368                        "type": "string",
369                        "description": "DNS record type (for dns_lookup): A, AAAA, MX, TXT, SRV"
370                    },
371                    "path": {
372                        "type": "string",
373                        "description": "File path (for directory, disk, permissions, share_access)"
374                    },
375                    "process": {
376                        "type": "string",
377                        "description": "Process name filter (for app_crashes)"
378                    },
379                    "event_id": {
380                        "type": "integer",
381                        "description": "Windows Event ID to filter on (for event_query)"
382                    },
383                    "log": {
384                        "type": "string",
385                        "description": "Event log name (for event_query): System, Application, Security"
386                    },
387                    "source": {
388                        "type": "string",
389                        "description": "Event source/provider name (for event_query)"
390                    },
391                    "hours": {
392                        "type": "integer",
393                        "description": "Time window in hours (for event_query, default 24)"
394                    },
395                    "level": {
396                        "type": "string",
397                        "description": "Event severity level (for event_query): Error, Warning, Information"
398                    },
399                    "issue": {
400                        "type": "string",
401                        "description": "Problem description (for fix_plan)"
402                    },
403                    "max_entries": {
404                        "type": "integer",
405                        "description": "Maximum results to return (default 20)"
406                    }
407                },
408                "required": ["topic"]
409            }
410        }
411    ])
412}
413
414async fn send_response(
415    resp: &Value,
416    writer: &mut tokio::io::BufWriter<tokio::io::Stdout>,
417) -> anyhow::Result<()> {
418    let mut bytes = serde_json::to_vec(resp)?;
419    bytes.push(b'\n');
420    writer.write_all(&bytes).await?;
421    writer.flush().await?;
422    Ok(())
423}
424
425async fn send_parse_error(
426    writer: &mut tokio::io::BufWriter<tokio::io::Stdout>,
427) -> anyhow::Result<()> {
428    let resp = json!({
429        "jsonrpc": "2.0",
430        "id": null,
431        "error": { "code": -32700, "message": "Parse error" }
432    });
433    send_response(&resp, writer).await
434}