yomo 0.7.1

A QUIC-based runtime for AI-LLM tool routing and serverless execution
Documentation
use async_stream::try_stream;
use async_trait::async_trait;
use futures_core::Stream;
use futures_util::StreamExt;
use std::pin::Pin;

use crate::llm_provider::openai_compatible::client::ClientError;
use crate::llm_provider::openai_compatible::{client, mapper};
use crate::llm_provider::{Provider, ProviderError, UnifiedEvent, UnifiedResponse};
use crate::openai_http_mapping::validate_openai_request;
use crate::openai_types::{ChatCompletionRequest, Message, Role, ThinkingConfig, ThinkingType};
use crate::serve_config::ConfigError;

#[derive(Clone)]
pub struct TokenHubProvider {
    client: client::Client,
    model_id: Option<String>,
}

impl TokenHubProvider {
    pub fn new(client: client::Client, model_id: Option<String>) -> Self {
        Self { client, model_id }
    }
}

#[async_trait]
impl Provider for TokenHubProvider {
    fn model_id(&self) -> &str {
        "tokenhub"
    }

    async fn complete(
        &self,
        mut request: ChatCompletionRequest,
    ) -> Result<UnifiedResponse, ProviderError> {
        if let Some(model_id) = &self.model_id {
            request.model = model_id.clone();
        }

        normalize_tokenhub_request(&mut request);
        validate_request(&request)?;

        let response = self
            .client
            .chat_completions(request)
            .await
            .map_err(map_openai_error)?;

        mapper::map_response(response)
    }

    async fn stream<'a>(
        &'a self,
        mut request: ChatCompletionRequest,
    ) -> Result<
        Pin<Box<dyn Stream<Item = Result<UnifiedEvent, ProviderError>> + Send + 'a>>,
        ProviderError,
    > {
        if let Some(model_id) = &self.model_id {
            request.model = model_id.clone();
        }

        normalize_tokenhub_request(&mut request);
        validate_request(&request)?;

        let stream = self
            .client
            .chat_completions_stream(request)
            .await
            .map_err(map_openai_error)?;

        let output = try_stream! {
            futures_util::pin_mut!(stream);
            let mut state = mapper::StreamMapState::default();

            while let Some(item) = stream.next().await {
                let chunk = item.map_err(map_openai_error)?;
                for event in mapper::map_stream_chunk(chunk, &mut state) {
                    yield event;
                }
            }
        };

        Ok(Box::pin(output))
    }
}

pub fn build_tokenhub_provider(
    params: &std::collections::HashMap<String, String>,
) -> Result<TokenHubProvider, ConfigError> {
    let api_key = params
        .get("api_key")
        .ok_or_else(|| ConfigError::InvalidProvider("api_key is required".to_string()))?;

    let mut config = client::Config::new(api_key.to_string());
    let model_id = params.get("model").cloned();
    if let Some(base_url) = params.get("base_url") {
        config = config.base_url(base_url.to_string());
    } else {
        config = config.base_url("https://tokenhub.tencentmaas.com/v1".to_string());
    }

    let client =
        client::Client::new(config).map_err(|err| ConfigError::InvalidProvider(err.to_string()))?;
    Ok(TokenHubProvider::new(client, model_id))
}

fn validate_request(request: &ChatCompletionRequest) -> Result<(), ProviderError> {
    validate_openai_request(request).map_err(ProviderError::Internal)
}

pub(crate) fn normalize_tokenhub_request(request: &mut ChatCompletionRequest) {
    normalize_default_thinking(request);
    normalize_reasoning_effort(request);
    normalize_reasoning_content_for_tool_calls(request);
}

fn normalize_default_thinking(request: &mut ChatCompletionRequest) {
    if request.thinking.is_none() {
        request.thinking = Some(ThinkingConfig {
            kind: ThinkingType::Enabled,
        });
    }
}

fn normalize_reasoning_effort(request: &mut ChatCompletionRequest) {
    let effort = request.reasoning_effort.take();
    let Some(effort) = effort else {
        return;
    };

    request.thinking = Some(ThinkingConfig {
        kind: ThinkingType::Enabled,
    });

    request.reasoning_effort = Some(map_reasoning_effort_to_tokenhub(&effort).to_string());
}

fn map_reasoning_effort_to_tokenhub(effort: &str) -> &str {
    match effort {
        "minimal" => "low",
        "low" => "low",
        "medium" => "medium",
        "high" => "high",
        "max" => "high",
        other => other,
    }
}

fn normalize_reasoning_content_for_tool_calls(request: &mut ChatCompletionRequest) {
    let thinking_enabled = matches!(
        request.thinking.as_ref().map(|value| &value.kind),
        Some(ThinkingType::Enabled)
    );
    if !thinking_enabled {
        return;
    }

    for message in &mut request.messages {
        ensure_reasoning_content_for_assistant_tool_call(message);
    }
}

fn ensure_reasoning_content_for_assistant_tool_call(message: &mut Message) {
    if message.role != Role::Assistant {
        return;
    }

    let has_tool_calls = message
        .tool_calls
        .as_ref()
        .map(|calls| !calls.is_empty())
        .unwrap_or(false);
    if !has_tool_calls {
        return;
    }

    let missing_reasoning_content = message
        .reasoning_content
        .as_deref()
        .map(str::trim)
        .map(|value| value.is_empty())
        .unwrap_or(true);
    if missing_reasoning_content {
        message.reasoning_content = Some(" ".to_string());
    }
}

fn map_openai_error(err: ClientError) -> ProviderError {
    ProviderError::Internal(err.to_string())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::openai_types::{Content, ToolCall, ToolCallFunction};

    fn request_with_messages(messages: Vec<Message>) -> ChatCompletionRequest {
        ChatCompletionRequest {
            model: "kimi-k2.6".to_string(),
            messages,
            n: None,
            temperature: None,
            top_p: None,
            presence_penalty: None,
            frequency_penalty: None,
            logprobs: None,
            top_logprobs: None,
            modalities: None,
            audio: None,
            max_completion_tokens: None,
            stop: None,
            response_format: None,
            thinking: None,
            reasoning_effort: None,
            chat_template_kwargs: None,
            prediction: None,
            verbosity: None,
            tools: None,
            tool_choice: None,
            allowed_tools: None,
            parallel_tool_calls: None,
            service_tier: None,
            seed: None,
            stream: None,
            stream_options: None,
            metadata: None,
            agent_context: None,
        }
    }

    fn assistant_tool_call_message(reasoning_content: Option<&str>) -> Message {
        Message {
            role: Role::Assistant,
            content: Content::Text(String::new()),
            reasoning_content: reasoning_content.map(|value| value.to_string()),
            tool_call_id: None,
            tool_calls: Some(vec![ToolCall {
                id: Some("call_1".to_string()),
                r#type: Some("function".to_string()),
                function: ToolCallFunction {
                    name: "search".to_string(),
                    arguments: "{}".to_string(),
                    description: None,
                },
            }]),
        }
    }

    #[test]
    fn normalize_reasoning_effort_maps_and_enables_thinking() {
        let mut request = request_with_messages(vec![Message {
            role: Role::User,
            content: Content::Text("hello".to_string()),
            reasoning_content: None,
            tool_call_id: None,
            tool_calls: None,
        }]);
        request.reasoning_effort = Some("max".to_string());

        normalize_tokenhub_request(&mut request);

        assert!(matches!(
            request.thinking.as_ref().map(|value| &value.kind),
            Some(ThinkingType::Enabled)
        ));
        assert_eq!(request.reasoning_effort.as_deref(), Some("high"));
    }

    #[test]
    fn normalize_enables_thinking_by_default() {
        let mut request = request_with_messages(vec![Message {
            role: Role::User,
            content: Content::Text("hello".to_string()),
            reasoning_content: None,
            tool_call_id: None,
            tool_calls: None,
        }]);

        normalize_tokenhub_request(&mut request);

        assert!(matches!(
            request.thinking.as_ref().map(|value| &value.kind),
            Some(ThinkingType::Enabled)
        ));
    }

    #[test]
    fn normalize_fills_reasoning_content_for_assistant_tool_call_when_thinking_enabled() {
        let mut request = request_with_messages(vec![assistant_tool_call_message(None)]);
        request.thinking = Some(ThinkingConfig {
            kind: ThinkingType::Enabled,
        });

        normalize_tokenhub_request(&mut request);

        assert_eq!(request.messages[0].reasoning_content.as_deref(), Some(" "));
    }

    #[test]
    fn normalize_does_not_fill_reasoning_content_when_thinking_disabled() {
        let mut request = request_with_messages(vec![assistant_tool_call_message(None)]);
        request.thinking = Some(ThinkingConfig {
            kind: ThinkingType::Disabled,
        });

        normalize_tokenhub_request(&mut request);

        assert_eq!(request.messages[0].reasoning_content, None);
    }

    #[test]
    fn normalize_keeps_existing_reasoning_content() {
        let mut request = request_with_messages(vec![assistant_tool_call_message(Some("trace"))]);
        request.thinking = Some(ThinkingConfig {
            kind: ThinkingType::Enabled,
        });

        normalize_tokenhub_request(&mut request);

        assert_eq!(
            request.messages[0].reasoning_content.as_deref(),
            Some("trace")
        );
    }

    #[test]
    fn normalize_fills_reasoning_content_for_empty_string() {
        let mut request = request_with_messages(vec![assistant_tool_call_message(Some(""))]);
        request.thinking = Some(ThinkingConfig {
            kind: ThinkingType::Enabled,
        });

        normalize_tokenhub_request(&mut request);

        assert_eq!(request.messages[0].reasoning_content.as_deref(), Some(" "));
    }

    #[test]
    fn normalize_fills_reasoning_content_with_default_thinking() {
        let mut request = request_with_messages(vec![assistant_tool_call_message(None)]);

        normalize_tokenhub_request(&mut request);

        assert_eq!(request.messages[0].reasoning_content.as_deref(), Some(" "));
    }

    #[test]
    fn map_reasoning_effort_to_tokenhub_values() {
        assert_eq!(map_reasoning_effort_to_tokenhub("minimal"), "low");
        assert_eq!(map_reasoning_effort_to_tokenhub("low"), "low");
        assert_eq!(map_reasoning_effort_to_tokenhub("medium"), "medium");
        assert_eq!(map_reasoning_effort_to_tokenhub("high"), "high");
        assert_eq!(map_reasoning_effort_to_tokenhub("max"), "high");
    }
}