forge_server/
lib.rs

1#![warn(missing_docs)]
2
3//! # forge-server
4//!
5//! MCP server for the Forgemax Code Mode Gateway.
6//!
7//! Exposes exactly two tools to agents:
8//! - `search` — query the capability manifest to discover tools
9//! - `execute` — run code against the tool API
10//!
11//! This collapses N servers x M tools into a fixed ~1,000 token footprint.
12
13use std::sync::Arc;
14use std::time::Duration;
15
16use forge_manifest::Manifest;
17use forge_sandbox::groups::{
18    GroupEnforcingDispatcher, GroupEnforcingResourceDispatcher, GroupPolicy,
19};
20use forge_sandbox::stash::{SessionStash, StashConfig};
21use forge_sandbox::{
22    ResourceDispatcher, SandboxConfig, SandboxExecutor, StashDispatcher, ToolDispatcher,
23};
24use rmcp::handler::server::router::tool::ToolRouter;
25use rmcp::handler::server::wrapper::Parameters;
26use rmcp::model::{Implementation, ServerCapabilities, ServerInfo};
27use rmcp::schemars::JsonSchema;
28use rmcp::{tool, tool_handler, tool_router, ServerHandler};
29use serde::Deserialize;
30
31/// Maximum result size in characters before truncation.
32///
33/// Results exceeding this limit are wrapped in a JSON envelope with metadata
34/// about the truncation. This prevents oversized results from consuming the
35/// LLM's entire context window.
36const MAX_RESULT_CHARS: usize = 100_000;
37
38/// Truncate an oversized JSON result string, wrapping it with metadata.
39///
40/// Short results pass through unchanged. Results exceeding [`MAX_RESULT_CHARS`]
41/// are cut at the last valid character boundary before the limit and wrapped in
42/// a JSON object with `_truncated`, `_original_chars`, `_shown_chars`, and `data`.
43fn truncate_result_if_needed(json: String) -> String {
44    if json.len() <= MAX_RESULT_CHARS {
45        return json;
46    }
47    let shown = MAX_RESULT_CHARS.saturating_sub(200);
48    let end = json[..shown]
49        .char_indices()
50        .last()
51        .map(|(i, c)| i + c.len_utf8())
52        .unwrap_or(0);
53    serde_json::json!({
54        "_truncated": true,
55        "_original_chars": json.len(),
56        "_shown_chars": end,
57        "data": &json[..end]
58    })
59    .to_string()
60}
61
62/// The Forge MCP server handler.
63///
64/// Implements `ServerHandler` from rmcp to serve the `search` and `execute`
65/// Code Mode tools over MCP stdio or SSE transport.
66#[derive(Clone)]
67pub struct ForgeServer {
68    executor: Arc<SandboxExecutor>,
69    manifest: Arc<Manifest>,
70    dispatcher: Arc<dyn ToolDispatcher>,
71    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
72    group_policy: Option<Arc<GroupPolicy>>,
73    session_stash: Option<Arc<tokio::sync::Mutex<SessionStash>>>,
74    tool_router: ToolRouter<Self>,
75}
76
77/// Stash dispatcher that wraps a shared [`SessionStash`] behind a Mutex.
78///
79/// Created per-execution by `ForgeServer::execute()` to provide the stash API
80/// to sandbox code. The `current_group` is set from the server group context.
81struct ServerStashDispatcher {
82    stash: Arc<tokio::sync::Mutex<SessionStash>>,
83    current_group: Option<String>,
84}
85
86#[async_trait::async_trait]
87impl StashDispatcher for ServerStashDispatcher {
88    async fn put(
89        &self,
90        key: &str,
91        value: serde_json::Value,
92        ttl_secs: Option<u32>,
93        _current_group: Option<String>,
94    ) -> Result<serde_json::Value, anyhow::Error> {
95        let ttl = ttl_secs
96            .filter(|&s| s > 0)
97            .map(|s| Duration::from_secs(s as u64));
98        let mut stash = self.stash.lock().await;
99        stash.put(key, value, ttl, self.current_group.as_deref())?;
100        Ok(serde_json::json!({"ok": true}))
101    }
102
103    async fn get(
104        &self,
105        key: &str,
106        _current_group: Option<String>,
107    ) -> Result<serde_json::Value, anyhow::Error> {
108        let stash = self.stash.lock().await;
109        match stash.get(key, self.current_group.as_deref())? {
110            Some(v) => Ok(v.clone()),
111            None => Ok(serde_json::Value::Null),
112        }
113    }
114
115    async fn delete(
116        &self,
117        key: &str,
118        _current_group: Option<String>,
119    ) -> Result<serde_json::Value, anyhow::Error> {
120        let mut stash = self.stash.lock().await;
121        let deleted = stash.delete(key, self.current_group.as_deref())?;
122        Ok(serde_json::json!({"deleted": deleted}))
123    }
124
125    async fn keys(
126        &self,
127        _current_group: Option<String>,
128    ) -> Result<serde_json::Value, anyhow::Error> {
129        let stash = self.stash.lock().await;
130        let keys: Vec<&str> = stash.keys(self.current_group.as_deref());
131        Ok(serde_json::json!(keys))
132    }
133}
134
135impl ForgeServer {
136    /// Create a new Forge server with the given config, manifest, dispatcher,
137    /// and optional resource dispatcher.
138    pub fn new(
139        config: SandboxConfig,
140        manifest: Manifest,
141        dispatcher: Arc<dyn ToolDispatcher>,
142        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
143    ) -> Self {
144        Self {
145            executor: Arc::new(SandboxExecutor::new(config)),
146            manifest: Arc::new(manifest),
147            dispatcher,
148            resource_dispatcher,
149            group_policy: None,
150            session_stash: None,
151            tool_router: Self::tool_router(),
152        }
153    }
154
155    /// Set a group policy for cross-server data flow enforcement.
156    ///
157    /// When set, each `execute()` call wraps the dispatcher with a fresh
158    /// [`GroupEnforcingDispatcher`] that tracks group access for that execution.
159    /// If a resource dispatcher is also configured, it is wrapped with a
160    /// [`GroupEnforcingResourceDispatcher`] sharing the same lock.
161    pub fn with_group_policy(mut self, policy: GroupPolicy) -> Self {
162        if !policy.is_empty() {
163            self.group_policy = Some(Arc::new(policy));
164        }
165        self
166    }
167
168    /// Enable the session stash with the given configuration.
169    ///
170    /// When enabled, `forge.stash.put/get/delete/keys()` are available in
171    /// sandbox execute mode.
172    pub fn with_stash(mut self, config: StashConfig) -> Self {
173        self.session_stash = Some(Arc::new(tokio::sync::Mutex::new(SessionStash::new(config))));
174        self
175    }
176}
177
178/// Input for the `search` tool.
179#[derive(Debug, Deserialize, JsonSchema)]
180pub struct SearchInput {
181    /// JavaScript async arrow function to search the capability manifest.
182    /// The manifest is available as `globalThis.manifest` with servers,
183    /// categories, and tool schemas.
184    ///
185    /// IMPORTANT: `server.categories` is an Object keyed by name (NOT an array).
186    /// Use `Object.entries(s.categories)` or `Object.values(s.categories)` to iterate.
187    /// Each category has a `.tools` Array with `.name`, `.description`, `.input_schema`.
188    /// Check `input_schema.required` before calling a tool to get the right parameters.
189    pub code: String,
190}
191
192/// Input for the `execute` tool.
193#[derive(Debug, Deserialize, JsonSchema)]
194pub struct ExecuteInput {
195    /// JavaScript async arrow function to execute against the tool API.
196    /// Use `forge.callTool(server, tool, args)` or
197    /// `forge.server("name").category.tool(args)` to call tools.
198    ///
199    /// Runs in a sandboxed V8 isolate — no filesystem, network, or module access.
200    /// `import()`, `require()`, `eval()`, and `Deno.*` are all blocked.
201    pub code: String,
202}
203
204#[tool_router(router = tool_router)]
205impl ForgeServer {
206    /// Search the capability manifest to discover available tools across all
207    /// connected servers. The manifest is available as `globalThis.manifest`.
208    #[tool(
209        name = "search",
210        description = "Search the capability manifest to discover available tools across all connected servers. The manifest is available as `globalThis.manifest` with servers, categories, and tool schemas. Write a JavaScript async arrow function to query it.\n\nManifest structure: manifest.servers is an Array of {name, description, categories}. IMPORTANT: categories is an Object keyed by name (NOT an array) — use Object.entries() or Object.values() to iterate. Each category has a .tools Array with {name, description, input_schema}. Check input_schema for required parameters before calling a tool.\n\nExample: `async () => { const s = manifest.servers[0]; return Object.entries(s.categories).map(([name, cat]) => ({ name, tools: cat.tools.map(t => t.name) })); }`"
211    )]
212    pub async fn search(
213        &self,
214        Parameters(input): Parameters<SearchInput>,
215    ) -> Result<String, String> {
216        tracing::info!(code_len = input.code.len(), "search: starting");
217
218        let manifest_json = self
219            .manifest
220            .to_json()
221            .map_err(|e| format!("manifest serialization failed: {e}"))?;
222
223        match self
224            .executor
225            .execute_search(&input.code, &manifest_json)
226            .await
227        {
228            Ok(result) => {
229                let json = serde_json::to_string_pretty(&result)
230                    .map_err(|e| format!("result serialization failed: {e}"))?;
231                tracing::info!(result_len = json.len(), "search: complete");
232                Ok(truncate_result_if_needed(json))
233            }
234            Err(e) => {
235                tracing::warn!(error = %e, "search: failed");
236                let msg = format!("{e}");
237                // JsError messages are already redacted at the op level; strip
238                // the internal "javascript error:" prefix for cleaner LLM output.
239                let clean = msg.strip_prefix("javascript error: ").unwrap_or(&msg);
240                Ok(serde_json::json!({"error": clean}).to_string())
241            }
242        }
243    }
244
245    /// Execute code against the tool API in a sandboxed V8 isolate.
246    #[tool(
247        name = "execute",
248        description = "Execute JavaScript against the tool API. Use `forge.server('name').category.tool(args)` or `forge.callTool(server, tool, args)` to call tools on connected servers. Chain multiple operations in a single call.\n\nIMPORTANT: Code runs in a sandboxed V8 isolate with NO filesystem, network, or module access. import(), require(), eval(), and Deno.* are all blocked. Use forge.callTool() for all external operations.\n\nExample: `async () => { const result = await forge.callTool('narsil', 'scan_security', { repo: 'MyProject' }); return result; }`\n\nAdditional APIs:\n- `forge.readResource(server, uri)` — read MCP resources\n- `forge.stash.put(key, value, {ttl?})` / `.get(key)` / `.delete(key)` / `.keys()` — session key-value store\n- `forge.parallel(calls, opts)` — bounded concurrent execution\n\nAlways check tool input_schema via search() before calling unfamiliar tools."
249    )]
250    pub async fn execute(
251        &self,
252        Parameters(input): Parameters<ExecuteInput>,
253    ) -> Result<String, String> {
254        tracing::info!(code_len = input.code.len(), "execute: starting");
255
256        // Wrap dispatcher(s) with group enforcement if a policy is configured.
257        // A fresh pair of GroupEnforcingDispatcher/GroupEnforcingResourceDispatcher
258        // is created per-execution so that group locking state doesn't leak
259        // between executions. Both share the same lock for consistent enforcement.
260        let (dispatcher, resource_dispatcher): (
261            Arc<dyn ToolDispatcher>,
262            Option<Arc<dyn ResourceDispatcher>>,
263        ) = match &self.group_policy {
264            Some(policy) => {
265                let tool_enforcer =
266                    GroupEnforcingDispatcher::new(self.dispatcher.clone(), policy.clone());
267                let shared_lock = tool_enforcer.shared_lock();
268
269                let resource = self.resource_dispatcher.as_ref().map(|rd| {
270                    Arc::new(GroupEnforcingResourceDispatcher::new(
271                        rd.clone(),
272                        policy.clone(),
273                        shared_lock,
274                    )) as Arc<dyn ResourceDispatcher>
275                });
276
277                (Arc::new(tool_enforcer), resource)
278            }
279            None => (self.dispatcher.clone(), self.resource_dispatcher.clone()),
280        };
281
282        // Create stash dispatcher if session stash is configured
283        let stash_dispatcher: Option<Arc<dyn StashDispatcher>> =
284            self.session_stash.as_ref().map(|stash| {
285                Arc::new(ServerStashDispatcher {
286                    stash: stash.clone(),
287                    current_group: None, // Group tracking done at ForgeServer level
288                }) as Arc<dyn StashDispatcher>
289            });
290
291        // SR-R6: Collect known server names from manifest for op-level validation
292        let known_servers: std::collections::HashSet<String> = self
293            .manifest
294            .servers
295            .iter()
296            .map(|s| s.name.clone())
297            .collect();
298
299        match self
300            .executor
301            .execute_code_with_options(
302                &input.code,
303                dispatcher,
304                resource_dispatcher,
305                stash_dispatcher,
306                Some(known_servers),
307            )
308            .await
309        {
310            Ok(result) => {
311                let json = serde_json::to_string_pretty(&result)
312                    .map_err(|e| format!("result serialization failed: {e}"))?;
313                tracing::info!(result_len = json.len(), "execute: complete");
314                Ok(truncate_result_if_needed(json))
315            }
316            Err(e) => {
317                tracing::warn!(error = %e, "execute: failed");
318                let msg = format!("{e}");
319                // JsError messages are already redacted at the op level; strip
320                // the internal "javascript error:" prefix for cleaner LLM output.
321                let clean = msg.strip_prefix("javascript error: ").unwrap_or(&msg);
322                Ok(serde_json::json!({"error": clean}).to_string())
323            }
324        }
325    }
326}
327
328#[tool_handler(router = self.tool_router)]
329impl ServerHandler for ForgeServer {
330    fn get_info(&self) -> ServerInfo {
331        let stats = format!(
332            "{} servers, {} tools",
333            self.manifest.total_servers(),
334            self.manifest.total_tools(),
335        );
336
337        ServerInfo {
338            capabilities: ServerCapabilities::builder().enable_tools().build(),
339            instructions: Some(format!(
340                "Forgemax Code Mode Gateway ({stats}). \
341                 Use search() to discover available tools, then execute() to call them.\n\
342                 \n\
343                 Both tools take a `code` parameter containing a JavaScript async arrow function.\n\
344                 Example: `async () => {{ return manifest.servers.map(s => s.name); }}`\n\
345                 \n\
346                 Manifest shape:\n\
347                 - manifest.servers: Array of {{ name, description, categories }}\n\
348                 - server.categories: Object (NOT array) keyed by category name, e.g. categories[\"ast\"]\n\
349                 - Use Object.entries(s.categories) or Object.values(s.categories) to iterate categories\n\
350                 - Each category has .tools (Array) with .name, .description, .input_schema\n\
351                 - Always check a tool's input_schema.required before calling it\n\
352                 \n\
353                 Sandboxed environment — no filesystem, network, or module imports (import/require/eval are blocked). \
354                 Use forge.callTool(server, tool, args) for all external operations.\n\
355                 \n\
356                 When calling tools, use the tool name only (e.g. \"find_symbols\"), \
357                 not the category-prefixed form (e.g. NOT \"general.find_symbols\").\n\
358                 \n\
359                 Additional APIs (execute mode only):\n\
360                 - forge.readResource(server, uri) — read MCP resources from downstream servers\n\
361                 - forge.stash.put(key, value, {{ttl?}}) / .get(key) / .delete(key) / .keys() — \
362                 session-scoped key-value store for sharing data across executions\n\
363                 - forge.parallel(calls, opts) — bounded concurrent execution of tool/resource calls"
364            )),
365            server_info: Implementation {
366                name: "forge".into(),
367                version: env!("CARGO_PKG_VERSION").into(),
368                title: None,
369                description: None,
370                icons: None,
371                website_url: None,
372            },
373            ..Default::default()
374        }
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use forge_manifest::{Category, ManifestBuilder, ServerBuilder, ToolEntry};
382
383    struct TestDispatcher;
384
385    #[async_trait::async_trait]
386    impl ToolDispatcher for TestDispatcher {
387        async fn call_tool(
388            &self,
389            server: &str,
390            tool: &str,
391            args: serde_json::Value,
392        ) -> Result<serde_json::Value, anyhow::Error> {
393            Ok(serde_json::json!({
394                "server": server,
395                "tool": tool,
396                "args": args,
397                "status": "ok"
398            }))
399        }
400    }
401
402    fn test_server() -> ForgeServer {
403        let manifest = ManifestBuilder::new()
404            .add_server(
405                ServerBuilder::new("test-server", "A test server")
406                    .add_category(Category {
407                        name: "tools".into(),
408                        description: "Test tools".into(),
409                        tools: vec![ToolEntry {
410                            name: "echo".into(),
411                            description: "Echoes input".into(),
412                            params: vec![],
413                            returns: Some("The input".into()),
414                            input_schema: None,
415                        }],
416                    })
417                    .build(),
418            )
419            .build();
420        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
421        ForgeServer::new(SandboxConfig::default(), manifest, dispatcher, None)
422    }
423
424    #[test]
425    fn get_info_returns_correct_metadata() {
426        let server = test_server();
427        let info = server.get_info();
428        assert_eq!(info.server_info.name, "forge");
429        assert_eq!(info.server_info.version, env!("CARGO_PKG_VERSION"));
430        let instructions = info.instructions.unwrap();
431        assert!(instructions.contains("search()"));
432        assert!(instructions.contains("execute()"));
433        assert!(instructions.contains("1 servers, 1 tools"));
434        // Verify improved documentation is present
435        assert!(
436            instructions.contains("async arrow function"),
437            "instructions should mention async arrow function format"
438        );
439        assert!(
440            instructions.contains("Object (NOT array)"),
441            "instructions should warn about categories being an Object"
442        );
443        assert!(
444            instructions.contains("input_schema"),
445            "instructions should mention input_schema for parameter discovery"
446        );
447        assert!(
448            instructions.contains("no filesystem"),
449            "instructions should mention sandbox constraints"
450        );
451        assert!(
452            instructions.contains("use the tool name only"),
453            "instructions should clarify tool name vs category-prefixed form"
454        );
455    }
456
457    #[tokio::test]
458    async fn search_returns_json() {
459        let server = test_server();
460        let result = server
461            .search(Parameters(SearchInput {
462                code: r#"async () => { return manifest.servers.map(s => s.name); }"#.into(),
463            }))
464            .await;
465        match result {
466            Ok(json) => {
467                let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
468                let names = parsed.as_array().unwrap();
469                assert_eq!(names[0], "test-server");
470            }
471            Err(e) => panic!("search should succeed: {e}"),
472        }
473    }
474
475    #[tokio::test]
476    async fn search_with_invalid_code_returns_error() {
477        let server = test_server();
478        let result = server
479            .search(Parameters(SearchInput {
480                // eval( is a banned pattern
481                code: r#"async () => { return eval("bad"); }"#.into(),
482            }))
483            .await;
484        // WI-1: Errors return Ok with JSON error field (not Err) to prevent
485        // sibling tool call cascade failures.
486        assert!(result.is_ok(), "should return Ok with error JSON");
487        let json = result.unwrap();
488        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
489        assert!(
490            parsed["error"].as_str().unwrap().contains("banned pattern"),
491            "error should mention banned pattern: {parsed}"
492        );
493    }
494
495    #[tokio::test]
496    async fn execute_calls_tool() {
497        let server = test_server();
498        let result = server
499            .execute(Parameters(ExecuteInput {
500                code: r#"async () => {
501                    return await forge.callTool("test-server", "tools.echo", { msg: "hi" });
502                }"#
503                .into(),
504            }))
505            .await;
506        match result {
507            Ok(json) => {
508                let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
509                assert_eq!(parsed["server"], "test-server");
510                assert_eq!(parsed["tool"], "tools.echo");
511                assert_eq!(parsed["status"], "ok");
512            }
513            Err(e) => panic!("execute should succeed: {e}"),
514        }
515    }
516
517    #[tokio::test]
518    async fn execute_with_banned_code_returns_error() {
519        let server = test_server();
520        let result = server
521            .execute(Parameters(ExecuteInput {
522                code: r#"async () => { return eval("bad"); }"#.into(),
523            }))
524            .await;
525        // WI-1: Errors return Ok with JSON error field (not Err)
526        assert!(result.is_ok(), "should return Ok with error JSON");
527        let json = result.unwrap();
528        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
529        assert!(
530            parsed["error"].as_str().unwrap().contains("banned pattern"),
531            "error should mention banned pattern: {parsed}"
532        );
533    }
534
535    #[tokio::test]
536    async fn empty_code_returns_error() {
537        let server = test_server();
538        let result = server
539            .search(Parameters(SearchInput { code: "   ".into() }))
540            .await;
541        // WI-1: Errors return Ok with JSON error field (not Err)
542        assert!(result.is_ok(), "should return Ok with error JSON");
543        let json = result.unwrap();
544        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
545        assert!(
546            parsed["error"].as_str().unwrap().contains("empty"),
547            "error should mention empty: {parsed}"
548        );
549    }
550
551    // --- WI-2: Output truncation tests ---
552
553    #[test]
554    fn truncate_result_short_passthrough() {
555        let short = r#"{"data": "hello"}"#.to_string();
556        let result = truncate_result_if_needed(short.clone());
557        assert_eq!(result, short, "short strings should pass through unchanged");
558    }
559
560    #[test]
561    fn truncate_result_long_truncates() {
562        // Create a string longer than MAX_RESULT_CHARS
563        let long = "x".repeat(MAX_RESULT_CHARS + 1000);
564        let result = truncate_result_if_needed(long.clone());
565
566        // Should be valid JSON with truncation metadata
567        let parsed: serde_json::Value =
568            serde_json::from_str(&result).expect("truncated result should be valid JSON");
569        assert_eq!(parsed["_truncated"], true);
570        assert_eq!(parsed["_original_chars"], long.len());
571        let shown = parsed["_shown_chars"].as_u64().unwrap() as usize;
572        assert!(
573            shown < long.len(),
574            "shown chars should be less than original"
575        );
576        assert!(shown > 0, "should show some content");
577        let data = parsed["data"].as_str().unwrap();
578        assert_eq!(data.len(), shown, "data length should match _shown_chars");
579    }
580}
forge_server/lib.rs

forge_server/
lib.rs