llama-rs 0.17.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
//! Shared factory for turning a [`CouncilConfig`] into concrete agents.
//!
//! Consumed by both the CLI (`llama-rs council`) and the HTTP handler
//! (`POST /v1/council/*`). Connects to all agents in parallel via
//! `try_join_all`.

use std::sync::Arc;

use futures::future::try_join_all;

use crate::council::agent::{Agent, AgentError};
use crate::council::config::{AgentConfig, AgentRole, CouncilConfig};
use crate::council::grpc_agent::GrpcAgent;
use crate::council::http_agent::OpenAiHttpAgent;

#[derive(Debug, thiserror::Error)]
pub enum BuildAgentsError {
    #[error("{endpoint}: {source}")]
    Connect {
        endpoint: String,
        #[source]
        source: AgentError,
    },
    #[error("more than one synthesizer configured")]
    DuplicateSynthesizer,
    #[error("no synthesizer configured")]
    NoSynthesizer,
}

/// Build and connect every agent in the config concurrently.
///
/// Returns `(experts, synthesizer)`. Experts are assigned letters
/// `A`, `B`, `C`, … in config order; the synthesizer is labeled `S`.
pub async fn build_agents(
    cfg: &CouncilConfig,
) -> Result<(Vec<Arc<dyn Agent>>, Arc<dyn Agent>), BuildAgentsError> {
    let mut next_letter: u8 = b'A';
    let items: Vec<(AgentRole, String, &AgentConfig)> = cfg
        .agents
        .iter()
        .map(|a| {
            let id = if matches!(a.role, AgentRole::Synthesizer) {
                "S".to_string()
            } else {
                let l = next_letter;
                next_letter += 1;
                (l as char).to_string()
            };
            (a.role, id, a)
        })
        .collect();

    let built = try_join_all(items.iter().map(|(role, id, a)| {
        let endpoint = a.endpoint.clone();
        let role = *role;
        let id = id.clone();
        let model = a.model.clone();
        let timeout_ms = a.timeout_ms;
        let api_key = a
            .api_key_env
            .as_ref()
            .and_then(|k| std::env::var(k).ok());
        let endpoint_err = a.endpoint.clone();
        async move {
            let agent: Arc<dyn Agent> = if endpoint.starts_with("grpc://") {
                Arc::new(
                    GrpcAgent::connect(id, model, &endpoint, timeout_ms)
                        .await
                        .map_err(|source| BuildAgentsError::Connect {
                            endpoint: endpoint_err.clone(),
                            source,
                        })?,
                )
            } else {
                Arc::new(OpenAiHttpAgent::new(id, model, endpoint, timeout_ms, api_key).map_err(
                    |source| BuildAgentsError::Connect {
                        endpoint: endpoint_err.clone(),
                        source,
                    },
                )?)
            };
            Ok::<_, BuildAgentsError>((role, agent))
        }
    }))
    .await?;

    let mut experts: Vec<Arc<dyn Agent>> = Vec::new();
    let mut synthesizer: Option<Arc<dyn Agent>> = None;
    for (role, agent) in built {
        match role {
            AgentRole::Expert => experts.push(agent),
            AgentRole::Synthesizer => {
                if synthesizer.is_some() {
                    return Err(BuildAgentsError::DuplicateSynthesizer);
                }
                synthesizer = Some(agent);
            }
        }
    }
    let synthesizer = synthesizer.ok_or(BuildAgentsError::NoSynthesizer)?;
    Ok((experts, synthesizer))
}