capability-example 0.1.0

// ---------------- [ File: capability-example/src/grower_language_model_client.rs ]
crate::ix!();

/// Our language-model client using `async-openai` 0.27.x 
/// with a synchronous facade around the actual async calls.
#[derive(Debug)]
pub struct GrowerLanguageModelClient {

    /// The underlying OpenAI client. In async-openai 0.27.x, it's `Client<Config>`.
    openai_client: Arc<OpenAIClientHandle<GrowerLanguageModelClientError>>,

    /// The "model" name or variant to use (e.g., gpt-3.5-turbo, gpt-4, etc.).
    model:         LanguageModelType,

    /// Temperature for controlling "creativity" or randomness of output.
    temperature:   f32,

    /// Maximum tokens to generate in completion.
    max_tokens:    u16,
}

impl GrowerLanguageModelClient {

    pub fn new() -> Self {
        Self {
            openai_client: OpenAIClientHandle::new(),
            model:         LanguageModelType::O1Pro,
            temperature:   0.7,
            max_tokens:    8192,
        }
    }

    /// Because the rest of our code wants a sync signature, we create a small
    /// single-threaded tokio runtime for each call. 
    /// This is not ideal for performance, but keeps things simple for now.
    #[instrument(level = "trace", skip(self, query_string))]
    pub fn run_oneshot_query(&self, query_string: &str) -> Result<String, GrowerLanguageModelClientError> {
        trace!("Preparing to run a one-shot query with async-openai. Query length: {} chars", query_string.len());

        // Build a tiny tokio runtime
        let rt = match tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
        {
            Ok(r) => r,
            Err(e) => {
                error!("Failed to build a tokio runtime for one-shot query: {:?}", e);
                return Err(GrowerLanguageModelClientError::FailedToBuildTokioRuntimeForOneShotQuery);
            }
        };

        // Run our async method in a blocking manner
        rt.block_on(async {
            self.run_chat_completion(query_string).await
        })
    }

    /// The asynchronous portion that calls `async_openai` and returns raw text.
    #[instrument(level = "trace", skip(self, user_text))]
    async fn run_chat_completion(&self, user_text: &str) -> Result<String, GrowerLanguageModelClientError> {

        trace!("Constructing chat request for model={}", self.model);

        let system_prompt =
            "You are a skill-tree generator. Please produce valid JSON only, no extraneous text."
            .to_string();

        // Build your request using the real enum variants:
        let request = CreateChatCompletionRequestArgs::default()
            .model(self.model.to_string())
            .max_tokens(self.max_tokens)
            .temperature(self.temperature)
            .messages(vec![
                ChatCompletionRequestMessage::System(
                    ChatCompletionRequestSystemMessage {
                        content: ChatCompletionRequestSystemMessageContent::Text(system_prompt),
                        name: None,
                    }
                ),
                ChatCompletionRequestMessage::User(
                    ChatCompletionRequestUserMessage {
                        content: ChatCompletionRequestUserMessageContent::Text(
                                     user_text.to_string()
                                 ),
                                 name: None,
                    }
                ),
            ])
            .build()
            .map_err(|_e| {
                error!("Could not build chat completion request.");
                GrowerLanguageModelClientError::CouldNotBuildChatCompletionRequest
            })?;

        trace!("Sending request to the OpenAI /v1/chat/completions endpoint...");

        // The next line fails because `openai_client.chat()` doesn't exist yet.
        let response = self.openai_client.chat().create(request).await?;

        // Usually we expect at least one choice
        let content = match response.choices.first() {
            Some(choice) => {
                debug!("Successfully got a completion choice from OpenAI");
                choice.message.content.clone().expect("we expect this to be set")
            }
            None => {
                error!("No choices returned by OpenAI completion");
                return Err(GrowerLanguageModelClientError::NoChoicesReturnedByOpenAICompletion);
            }
        };

        trace!("Returning raw content from OpenAI chat response. length={}", content.len());
        Ok(content)
    }

    /// Attempts to parse the model's response into `TargetType` by:
    ///  1) calling `run_oneshot_query()`,
    ///  2) "repairing" the JSON with `repair_json_string()`,
    ///  3) fuzzy-deserializing into the expected type.
    #[instrument(level = "trace", skip(self, query_string))]
    pub fn run_oneshot_query_with_repair<TargetType, TargetErrorType>(
        &self,
        query_string: &str,
    ) -> Result<TargetType, TargetErrorType>
    where
        TargetType: FuzzyFromJsonValue,
        TargetErrorType: From<JsonRepairError>
            + From<GrowerLanguageModelClientError>
            + From<FuzzyFromJsonValueError>,
    {
        trace!("Running one-shot query with JSON repair + fuzzy parse");
        // 1) synchronous query to get raw string from the model
        let language_model_response = self.run_oneshot_query(query_string)?;

        // 2) attempt to repair any partial or malformed JSON
        let language_model_response_json: serde_json::Value =
            repair_json_string(&language_model_response)?;

        // 3) fuzzy parse into the desired type
        let target = TargetType::fuzzy_from_json_value(&language_model_response_json)?;

        Ok(target)
    }
}