use crate::error::Result;
use serde_json::{json, Value};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::debug;
use super::client::GLM46Client;
use super::types::{ChatMessage, ChatRequest, ResponseFormat, TokenUsage, Tool};
use crate::thinktool::{ThinkToolContext, ThinkToolModule, ThinkToolModuleConfig, ThinkToolOutput};
#[derive(Debug, Clone)]
pub struct GLM46ThinkToolConfig {
pub context_budget: usize,
pub temperature: f32,
pub bilingual_optimization: bool,
pub cost_tracking: bool,
pub structured_output: bool,
}
impl Default for GLM46ThinkToolConfig {
fn default() -> Self {
Self {
context_budget: 198_000, temperature: 0.15, bilingual_optimization: true,
cost_tracking: true,
structured_output: true,
}
}
}
#[derive(Debug)]
pub struct GLM46ThinkToolProfile {
client: GLM46Client,
config: GLM46ThinkToolConfig,
cached_responses: Arc<std::sync::Mutex<HashMap<String, CachedResponse>>>,
usage_stats: Arc<std::sync::Mutex<UsageStats>>,
}
#[derive(Debug, Clone)]
struct CachedResponse {
response: String,
created_at: std::time::SystemTime,
ttl: std::time::Duration,
confidence_score: f64,
}
#[derive(Debug, Clone, Default)]
struct UsageStats {
total_requests: u64,
total_tokens: u64,
total_cost: f64,
cache_hits: u64,
structured_outputs: u64,
bilingual_reasoning: u64,
}
impl GLM46ThinkToolProfile {
pub fn new(client: GLM46Client, config: GLM46ThinkToolConfig) -> Self {
Self {
client,
config,
cached_responses: Arc::new(std::sync::Mutex::new(HashMap::new())),
usage_stats: Arc::new(std::sync::Mutex::new(UsageStats::default())),
}
}
pub async fn from_env() -> Result<Self> {
let client =
GLM46Client::from_env().map_err(|e| crate::error::Error::Config(e.to_string()))?;
let config = GLM46ThinkToolConfig::default();
Ok(Self::new(client, config))
}
pub async fn execute_reasoning_chain(
&self,
prompt: &str,
module_type: &str,
) -> Result<ThinkToolOutput> {
debug!("Executing GLM-4.6 reasoning for {} module", module_type);
let cache_key = format!("{}:{}", module_type, prompt);
if let Some(cached) = self.get_cached_response(&cache_key).await {
self.update_usage_stats(true, false, false).await;
return self.parse_cached_output(&cached, module_type);
}
let optimized_prompt = self.build_specialized_prompt(prompt, module_type)?;
let response = self
.client
.chat_completion(ChatRequest {
messages: vec![
ChatMessage::system(self.get_module_system_prompt(module_type)),
ChatMessage::user(optimized_prompt),
],
temperature: self.config.temperature,
max_tokens: self.config.context_budget / 2, response_format: Some(ResponseFormat::Structured),
tools: self.get_module_tools(module_type),
tool_choice: None,
stop: None,
top_p: None,
frequency_penalty: None,
presence_penalty: None,
stream: None,
})
.await
.map_err(|e| crate::error::Error::Mcp(e.to_string()))?;
let content = response
.choices
.first()
.map(|c| c.message.content.clone())
.unwrap_or_default();
self.cache_response(&cache_key, &content, 0.9).await;
self.update_usage_stats(false, true, false).await;
if let Ok(structured) = serde_json::from_str::<serde_json::Value>(&content) {
self.update_usage_stats(false, false, true).await;
Ok(self.convert_to_thinktool_output(structured, module_type, &response.usage)?)
} else {
Ok(ThinkToolOutput {
module: module_type.to_string(),
confidence: 0.85, output: json!({
"result": content.clone(),
"model": "glm-4.6",
"tokens_used": response.usage.total_tokens,
"cost_estimate": self.estimate_cost(&response.usage),
"context_window_used": content.len(),
"bilingual_optimization": self.config.bilingual_optimization
}),
})
}
}
pub async fn execute_bilingual_reasoning(
&self,
prompt: &str,
language: BilingualMode,
) -> Result<ThinkToolOutput> {
debug!(
"Executing bilingual reasoning with GLM-4.6 in {:?}",
language
);
let bilingual_prompt = self.build_bilingual_prompt(prompt, language.clone())?;
let response = self
.client
.chat_completion(ChatRequest {
messages: vec![
ChatMessage::system(self.get_bilingual_system_prompt(language.clone())),
ChatMessage::user(bilingual_prompt),
],
temperature: self.config.temperature - 0.05, max_tokens: self.config.context_budget / 2,
response_format: Some(ResponseFormat::Structured),
tools: None,
tool_choice: None,
stop: None,
top_p: None,
frequency_penalty: None,
presence_penalty: None,
stream: None,
})
.await
.map_err(|e| crate::error::Error::Mcp(e.to_string()))?;
self.update_usage_stats(false, true, true).await;
let content = response
.choices
.first()
.map(|c| c.message.content.clone())
.unwrap_or_default();
self.convert_to_thinktool_output(
serde_json::from_str(&content)?,
"bilingual_reasoning",
&response.usage,
)
}
pub async fn execute_comprehensive_analysis(
&self,
input: &str,
analysis_type: AnalysisType,
) -> Result<ThinkToolOutput> {
debug!(
"Executing comprehensive analysis with GLM-4.6 ({})",
analysis_type
);
let analysis_prompt = self.build_comprehensive_prompt(input, analysis_type)?;
let response = self
.client
.chat_completion(ChatRequest {
messages: vec![
ChatMessage::system(self.get_comprehensive_system_prompt(analysis_type)),
ChatMessage::user(analysis_prompt),
],
temperature: 0.1, max_tokens: self.config.context_budget / 3, response_format: Some(ResponseFormat::JsonSchema {
name: "comprehensive_analysis".to_string(),
schema: self.get_comprehensive_schema(analysis_type),
}),
tools: None,
tool_choice: None,
stop: None,
top_p: None,
frequency_penalty: None,
presence_penalty: None,
stream: None,
})
.await
.map_err(|e| crate::error::Error::Mcp(e.to_string()))?;
self.update_usage_stats(false, true, true).await;
let content = response
.choices
.first()
.map(|c| c.message.content.clone())
.unwrap_or_default();
self.convert_to_thinktool_output(
serde_json::from_str(&content)?,
&format!("comprehensive_{:?}", analysis_type),
&response.usage,
)
}
pub async fn get_performance_metrics(&self) -> PerformanceMetrics {
let usage = self
.usage_stats
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
PerformanceMetrics {
total_requests: usage.total_requests,
total_tokens: usage.total_tokens,
total_cost: usage.total_cost,
cache_hit_rate: if usage.total_requests > 0 {
usage.cache_hits as f64 / usage.total_requests as f64
} else {
0.0
},
structured_output_rate: if usage.total_requests > 0 {
usage.structured_outputs as f64 / usage.total_requests as f64
} else {
0.0
},
bilingual_usage_rate: if usage.total_requests > 0 {
usage.bilingual_reasoning as f64 / usage.total_requests as f64
} else {
0.0
},
average_response_time_ms: 0.0, cost_savings_vs_claude: usage.total_cost * 21.0, }
}
fn build_specialized_prompt(&self, prompt: &str, module_type: &str) -> Result<String> {
let specialized_prompt = match module_type {
"gigathink" => format!(
"As GigaThink specialist using GLM-4.6's expansive thinking, generate 10+ diverse perspectives for:\n\n{}",
prompt
),
"laserlogic" => format!(
"As LaserLogic specialist using GLM-4.6's precise deductive reasoning, analyze the logical structure of:\n\n{}",
prompt
),
"bedrock" => format!(
"As BedRock specialist using GLM-4.6's first principles decomposition, break down to fundamental axioms:\n\n{}",
prompt
),
"proofguard" => format!(
"As ProofGuard specialist using GLM-4.6's multi-source verification, triangulate and validate claims in:\n\n{}",
prompt
),
"brutalhonesty" => format!(
"As BrutalHonesty specialist using GLM-4.6's adversarial critique, challenge and stress-test:\n\n{}",
prompt
),
_ => format!(
"As GLM-4.6 reasoning specialist with 198K context and 70.1% TAU-Bench performance, analyze:\n\n{}",
prompt
),
};
Ok(specialized_prompt)
}
fn get_module_system_prompt(&self, module_type: &str) -> &'static str {
match module_type {
"gigathink" => {
"You are GLM-4.6 GigaThink specialist with elite creative thinking capabilities (70.1% TAU-Bench). Generate 10+ diverse perspectives using your 198K context window and structured output mastery."
}
"laserlogic" => {
"You are GLM-4.6 LaserLogic specialist with precise deductive reasoning. Leverage your structured output mastery and 15% token efficiency for logical analysis."
}
"bedrock" => {
"You are GLM-4.6 BedRock specialist with first principles decomposition. Use your expanded context window to break down to fundamental axioms."
}
"proofguard" => {
"You are GLM-4.6 ProofGuard specialist with multi-source triangulation. Leverage your bilingual support and 198K context for comprehensive verification."
}
"brutalhonesty" => {
"You are GLM-4.6 BrutalHonesty specialist with adversarial critique. Use your elite agentic reasoning to challenge assumptions and identify weaknesses."
}
_ => {
"You are GLM-4.6 ThinkTool specialist with elite agentic coordination (70.1% TAU-Bench), 198K context, and structured output mastery."
}
}
}
fn get_module_tools(&self, _module_type: &str) -> Option<Vec<Tool>> {
None
}
async fn get_cached_response(&self, cache_key: &str) -> Option<CachedResponse> {
let cache = self
.cached_responses
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if let Some(cached) = cache.get(cache_key) {
if cached.created_at.elapsed().unwrap_or_default() < cached.ttl {
return Some(cached.clone());
}
}
None
}
async fn cache_response(&self, cache_key: &str, response: &str, confidence: f64) {
let mut cache = self
.cached_responses
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
cache.insert(
cache_key.to_string(),
CachedResponse {
response: response.to_string(),
created_at: std::time::SystemTime::now(),
ttl: std::time::Duration::from_secs(3600), confidence_score: confidence,
},
);
}
async fn update_usage_stats(&self, cache_hit: bool, structured: bool, bilingual: bool) {
let mut stats = self
.usage_stats
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if !cache_hit {
stats.total_requests += 1;
} else {
stats.cache_hits += 1;
}
if structured {
stats.structured_outputs += 1;
}
if bilingual {
stats.bilingual_reasoning += 1;
}
}
fn estimate_cost(&self, usage: &TokenUsage) -> f64 {
let input_cost = (usage.prompt_tokens as f64 / 1000.0) * 0.0001;
let output_cost = (usage.completion_tokens as f64 / 1000.0) * 0.0002;
input_cost + output_cost
}
fn parse_cached_output(
&self,
cached: &CachedResponse,
module_type: &str,
) -> Result<ThinkToolOutput> {
Ok(ThinkToolOutput {
module: module_type.to_string(),
confidence: cached.confidence_score,
output: serde_json::json!({
"cached": true,
"response": cached.response,
"cached_at": cached.created_at,
"model": "glm-4.6"
}),
})
}
fn convert_to_thinktool_output(
&self,
structured: serde_json::Value,
module_type: &str,
usage: &TokenUsage,
) -> Result<ThinkToolOutput> {
Ok(ThinkToolOutput {
module: module_type.to_string(),
confidence: 0.95, output: json!({
"reasoning": serde_json::to_string_pretty(&structured)?,
"model": "glm-4.6",
"tokens_used": usage.total_tokens,
"cost_estimate": self.estimate_cost(usage),
"structured_output": true,
"bilingual_optimization": self.config.bilingual_optimization
}),
})
}
fn build_bilingual_prompt(&self, prompt: &str, language: BilingualMode) -> Result<String> {
let bilingual_instruction = match language {
BilingualMode::Chinese => "请用中文回答,并保留英文术语以确保技术准确性:",
BilingualMode::English => {
"Please respond in English, preserving Chinese terms for cultural context:"
}
BilingualMode::Both => {
"请用中英文双语回答,确保文化准确性的同时保持技术精确 (Please respond in both Chinese and English, ensuring cultural accuracy while maintaining technical precision):"
}
};
Ok(format!("{}\n\n{}", bilingual_instruction, prompt))
}
fn get_bilingual_system_prompt(&self, language: BilingualMode) -> String {
match language {
BilingualMode::Chinese => {
"您是GLM-4.6中文推理专家,具有70.1% TAU-Bench性能和出色的双语能力。使用您的198K上下文窗口和结构化输出优势,提供精确、文化准确的中文分析。".to_string()
},
BilingualMode::English => {
"You are GLM-4.6 reasoning specialist for English contexts with elite agentic capabilities (70.1% TAU-Bench) and 198K context window. Provide precise, culturally-aware English analysis.".to_string()
},
BilingualMode::Both => {
"您是GLM-4.6双语推理专家(70.1% TAU-Bench),精通中英文。利用198K上下文窗口和结构化输出优势,提供文化准确、技术精确的双语分析。/ You are GLM-4.6 bilingual reasoning specialist (70.1% TAU-Bench) fluent in Chinese and English. Leverage 198K context window and structured output mastery for culturally-accurate, technically-precise bilingual analysis.".to_string()
},
}
}
fn build_comprehensive_prompt(
&self,
input: &str,
analysis_type: AnalysisType,
) -> Result<String> {
let prompt = match analysis_type {
AnalysisType::SystemArchitecture => {
format!(
"作为GLM-4.6系统架构专家(70.1% TAU-Bench),使用198K上下文窗口进行全面系统分析:\n\n{}\n\n提供:架构评估、依赖分析、风险识别、优化建议。/ As GLM-4.6 system architecture expert (70.1% TAU-Bench), perform comprehensive system analysis using 198K context: [input] Provide: architecture assessment, dependency analysis, risk identification, optimization recommendations.",
input
)
}
AnalysisType::CrossCrateRelations => {
format!(
"作为GLM-4.6跨crate关系专家,使用完整上下文分析依赖关系和优化机会:\n\n{}",
input
)
}
AnalysisType::PerformanceOptimization => {
format!("作为GLM-4.6性能优化专家,分析并提供优化策略:\n\n{}", input)
}
AnalysisType::SecurityAudit => {
format!("作为GLM-4.6安全审计专家,进行全面安全分析:\n\n{}", input)
}
};
Ok(prompt)
}
fn get_comprehensive_system_prompt(&self, analysis_type: AnalysisType) -> String {
match analysis_type {
AnalysisType::SystemArchitecture => {
"您是GLM-4.6系统架构专家,具有70.1% TAU-Bench性能和198K上下文窗口能力。进行全面的系统架构分析和优化建议。".to_string()
},
_ => {
"您是GLM-4.6综合分析专家,拥有行业领先的agentic性能(70.1% TAU-Bench)和198K上下文窗口。提供全面、准确的分析和建议。".to_string()
},
}
}
fn get_comprehensive_schema(&self, analysis_type: AnalysisType) -> Value {
match analysis_type {
AnalysisType::SystemArchitecture => json!({
"type": "object",
"properties": {
"architecture_assessment": {"type": "object"},
"dependency_analysis": {"type": "object"},
"risk_identification": {"type": "array"},
"optimization_recommendations": {"type": "array"}
},
"required": ["architecture_assessment", "dependency_analysis", "risk_identification", "optimization_recommendations"]
}),
_ => json!({
"type": "object",
"properties": {
"analysis_results": {"type": "object"},
"recommendations": {"type": "array"}
},
"required": ["analysis_results", "recommendations"]
}),
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub enum BilingualMode {
Chinese,
English,
Both,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum AnalysisType {
SystemArchitecture,
CrossCrateRelations,
PerformanceOptimization,
SecurityAudit,
}
impl std::fmt::Display for AnalysisType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AnalysisType::SystemArchitecture => write!(f, "SystemArchitecture"),
AnalysisType::CrossCrateRelations => write!(f, "CrossCrateRelations"),
AnalysisType::PerformanceOptimization => write!(f, "PerformanceOptimization"),
AnalysisType::SecurityAudit => write!(f, "SecurityAudit"),
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct PerformanceMetrics {
pub total_requests: u64,
pub total_tokens: u64,
pub total_cost: f64,
pub cache_hit_rate: f64,
pub structured_output_rate: f64,
pub bilingual_usage_rate: f64,
pub average_response_time_ms: f64,
pub cost_savings_vs_claude: f64,
}
#[derive(Debug)]
pub struct GLM46GigaThink {
}
impl GLM46GigaThink {
pub fn new(_profile: GLM46ThinkToolProfile) -> Self {
Self {}
}
}
impl ThinkToolModule for GLM46GigaThink {
fn config(&self) -> &ThinkToolModuleConfig {
static CONFIG: std::sync::OnceLock<ThinkToolModuleConfig> = std::sync::OnceLock::new();
CONFIG.get_or_init(|| ThinkToolModuleConfig {
name: "glm46_gigathink".to_string(),
description:
"GLM-4.6 enhanced GigaThink with 198K context and elite agentic capabilities"
.to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
confidence_weight: 0.25,
})
}
fn execute(&self, _context: &ThinkToolContext) -> Result<ThinkToolOutput> {
Err(crate::error::Error::Mcp(
"GLM-4.6 GigaThink requires async execution. Use AsyncThinkToolModule trait instead."
.to_string(),
))
}
}
#[derive(Debug)]
pub struct GLM46LaserLogic {
}
impl GLM46LaserLogic {
pub fn new(_profile: GLM46ThinkToolProfile) -> Self {
Self {}
}
}
impl ThinkToolModule for GLM46LaserLogic {
fn config(&self) -> &ThinkToolModuleConfig {
static CONFIG: std::sync::OnceLock<ThinkToolModuleConfig> = std::sync::OnceLock::new();
CONFIG.get_or_init(|| ThinkToolModuleConfig {
name: "glm46_laserlogic".to_string(),
description: "GLM-4.6 enhanced LaserLogic with precise deductive reasoning and structured output mastery".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
confidence_weight: 0.30,
})
}
fn execute(&self, _context: &ThinkToolContext) -> Result<ThinkToolOutput> {
Err(crate::error::Error::Mcp(
"GLM-4.6 LaserLogic requires async execution. Use AsyncThinkToolModule trait instead."
.to_string(),
))
}
}