import subprocess
import sys
import os
import tempfile
import json
import time
def test_codex_js_generation():
print("🔧 测试CODEX JavaScript生成能力...")
print("=" * 50)
prompt = """Generate a complete MCP tool registration JSON for a JavaScript workflow that combines file operations.
## Workflow Plan:
- Step 1: Read a file using filesystem::read_file
- Step 2: Process and write content using filesystem::write_file
Requirements:
1. Generate a complete JSON object with name, description, input_schema, and js_code fields
2. The js_code must be exactly: async function workflow(input) with proper MCP calls
3. Use mcp.call("filesystem", "read_file", {...}) and mcp.call("filesystem", "write_file", {...})
4. Include try/catch error handling in the JavaScript code
5. The input_schema must define sourcePath and outputPath as required string properties
Expected JSON structure:
{
"name": "file_processor_workflow",
"description": "A workflow that reads a file and writes processed content to a new location",
"input_schema": {
"type": "object",
"properties": {
"sourcePath": { "type": "string", "description": "Path to source file" },
"outputPath": { "type": "string", "description": "Path for output file" }
},
"required": ["sourcePath", "outputPath"]
},
"js_code": "async function workflow(input) { try { const fileResult = await mcp.call('filesystem', 'read_file', { path: input.sourcePath }); const content = typeof fileResult === 'string' ? fileResult : fileResult?.content ?? ''; const writeResult = await mcp.call('filesystem', 'write_file', { path: input.outputPath, content }); return { success: true, data: writeResult }; } catch (error) { return { success: false, error: error.message }; } }"
}
Respond with ONLY the JSON object, no markdown fences, no explanation."""
print(f"📝 提示词长度: {len(prompt)} 字符")
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
codex_cmd = [
"/home/putao/.nvm/versions/node/v24.5.0/bin/codex",
"exec",
"--dangerously-bypass-approvals-and-sandbox",
]
print("🚀 调用CODEX生成JavaScript...")
result = subprocess.run(
codex_cmd,
input=prompt + "\n", text=True,
capture_output=True,
timeout=120, cwd="/home/putao/code/rust/agentic-warden"
)
print(f"✅ CODEX调用完成,退出码: {result.returncode}")
print(f"📄 输出长度: {len(result.stdout)} 字符")
print(f"📄 错误长度: {len(result.stderr)} 字符")
if result.returncode == 0 and result.stdout:
print("\n🎯 CODEX生成的MCP工具JSON:")
print("-" * 60)
print(result.stdout)
print("-" * 60)
validation_results = validate_mcp_tool_json(result.stdout)
print(f"\n📊 MCP工具JSON验证结果:")
for check, passed in validation_results.items():
status = "✅" if passed else "❌"
print(f"{status} {check}")
passed_count = sum(1 for passed in validation_results.values() if passed)
total_count = len(validation_results)
pass_rate = (passed_count / total_count) * 100
print(f"\n🎯 MCP工具JSON严格验证通过率: {pass_rate:.1f}% ({passed_count}/{total_count})")
failed_checks = [check for check, passed in validation_results.items() if not passed]
if failed_checks:
print(f"❌ 失败的检查项: {', '.join(failed_checks)}")
if validation_results.get("有效JSON格式", False) and validation_results.get("包含js_code字段", False):
try:
data = json.loads(result.stdout)
js_code = data.get("js_code", "")
print(f"\n📄 提取的JavaScript代码 ({len(js_code)} 字符):")
print("┌" + "─" * 58 + "┐")
for i, line in enumerate(js_code.split('\n')):
print(f"│ {line:56s} │")
print("└" + "─" * 58 + "┘")
except:
print("⚠️ 无法提取JavaScript代码进行展示")
if pass_rate == 100.0:
print("🎉 CODEX完全符合100%严格要求!生成了完整的MCP工具注册JSON!")
return True
else:
print(f"⚠️ CODEX未达到100%要求 (需要100%, 实际: {pass_rate:.1f}%)")
return False
else:
print("❌ CODEX调用失败")
if result.stderr:
print("错误信息:", result.stderr[:500])
return False
except subprocess.TimeoutExpired:
print("⏱️ CODEX调用超时")
return False
except Exception as e:
print(f"❌ 测试异常: {e}")
return False
finally:
if os.path.exists(prompt_file):
os.remove(prompt_file)
def validate_mcp_tool_json(json_str):
results = {}
try:
data = json.loads(json_str)
required_fields = ["name", "description", "input_schema", "js_code"]
for field in required_fields:
results[f"包含{field}字段"] = field in data
if "js_code" in data:
js_code = data["js_code"]
js_validations = [
("async function workflow(input)", "精确的函数签名"),
("try {", "try块开始"),
("catch (", "catch块"),
("await mcp.call(", "MCP调用格式"),
("return { success: true", "成功返回格式"),
("return { success: false", "错误返回格式"),
("filesystem", "使用正确的server名称"),
("read_file", "使用正确的工具名称"),
("write_file", "使用正确的工具名称"),
]
for pattern, description in js_validations:
results[f"js_code包含{description}"] = pattern in js_code
results["js_code无markdown"] = "```" not in js_code
results["js_code函数完整性"] = js_code.count("{") == js_code.count("}")
results["js_code正确错误处理"] = "catch (error)" in js_code and "error.message" in js_code
if "input_schema" in data:
schema = data["input_schema"]
results["schema是object类型"] = schema.get("type") == "object"
if "properties" in schema:
props = schema["properties"]
results["schema包含sourcePath"] = "sourcePath" in props
results["schema包含outputPath"] = "outputPath" in props
if "sourcePath" in props:
results["sourcePath是string类型"] = props["sourcePath"].get("type") == "string"
if "outputPath" in props:
results["outputPath是string类型"] = props["outputPath"].get("type") == "string"
if "required" in schema:
required = schema["required"]
results["required包含sourcePath"] = "sourcePath" in required
results["required包含outputPath"] = "outputPath" in required
if "name" in data:
name = data["name"]
results["name不为空"] = len(name.strip()) > 0
results["name是有效标识符"] = name.replace("_", "").replace("-", "").isalnum()
if "description" in data:
desc = data["description"]
results["description不为空"] = len(desc.strip()) > 0
except json.JSONDecodeError:
results["有效JSON格式"] = False
for key in list(results.keys()):
results[key] = False
return results
if __name__ == "__main__":
success = test_codex_js_generation()
print(f"\n🏁 最终结果: {'成功' if success else '失败'}")
sys.exit(0 if success else 1)