Skip to main content

talon_core/config/
endpoints.rs

1//! HTTP capability endpoint configuration.
2
3use std::collections::BTreeMap;
4
5use serde::{Deserialize, Serialize};
6
7use super::auth::EndpointAuthConfig;
8use crate::llm::ReasoningEffort;
9
10/// Wire protocol for embedding HTTP calls.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "kebab-case")]
13pub enum EmbeddingAdapter {
14    /// TEI-compatible `/embed` and `/embed-chunked` routes.
15    Tei,
16    /// OpenAI-compatible `POST /embeddings`.
17    OpenAi,
18}
19
20/// Embedding endpoint configuration (`[embedding]`).
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22#[serde(deny_unknown_fields)]
23pub struct EmbeddingConfig {
24    pub base_url: String,
25    #[serde(flatten)]
26    pub auth: EndpointAuthConfig,
27    pub adapter: EmbeddingAdapter,
28    /// Model slug for query vectors and single-chunk notes.
29    pub model: String,
30    /// Model slug for multi-chunk notes; defaults to [`Self::model`].
31    #[serde(default)]
32    pub document_model: Option<String>,
33    /// Prompt budget hint for query embedding and recall distillation.
34    #[serde(default = "default_embedding_context_tokens")]
35    pub context_tokens: u32,
36}
37
38impl EmbeddingConfig {
39    /// Model slug persisted for multi-chunk document embeddings.
40    #[must_use]
41    pub fn document_model(&self) -> &str {
42        self.document_model.as_deref().unwrap_or(&self.model)
43    }
44}
45
46/// Wire protocol for rerank HTTP calls.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "kebab-case")]
49pub enum RerankAdapter {
50    /// TEI-compatible `/rerank` with `raw_scores` and `truncate`.
51    Tei,
52    /// Common minimal `/rerank` with `{ query, texts, return_text }`.
53    Minimal,
54    /// Cohere-style `/rerank` with `{ query, documents, top_n }`.
55    Cohere,
56    /// Jina-style `/rerank` (same response mapping as [`Self::Cohere`]).
57    Jina,
58}
59
60/// Score scale emitted by the reranker endpoint.
61#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(rename_all = "kebab-case")]
63pub enum RerankScoreScale {
64    /// Endpoint returns normalized scores in `[0, 1]`.
65    #[default]
66    Normalized,
67    /// Endpoint returns raw logits; Talon applies sigmoid before blending.
68    Logits,
69}
70
71/// Rerank endpoint configuration (`[rerank]`).
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(deny_unknown_fields)]
74pub struct RerankConfig {
75    pub base_url: String,
76    #[serde(flatten)]
77    pub auth: EndpointAuthConfig,
78    pub adapter: RerankAdapter,
79    pub model: String,
80    #[serde(default)]
81    pub score_scale: RerankScoreScale,
82    /// Whether to ask TEI-style servers to truncate overlong inputs.
83    #[serde(default = "default_rerank_truncate")]
84    pub truncate: bool,
85}
86
87/// Wire protocol for chat HTTP calls.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
89#[serde(rename_all = "kebab-case")]
90pub enum ChatAdapter {
91    /// OpenAI-compatible `POST /chat/completions`.
92    #[default]
93    OpenAi,
94}
95
96/// Query expansion chat endpoint (`[chat.expansion]`).
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98#[serde(deny_unknown_fields)]
99pub struct ChatExpansionConfig {
100    pub base_url: String,
101    #[serde(flatten)]
102    pub auth: EndpointAuthConfig,
103    #[serde(default)]
104    pub adapter: ChatAdapter,
105    pub model: String,
106    #[serde(default = "default_chat_context_tokens")]
107    pub context_tokens: u32,
108    #[serde(default)]
109    pub max_output_tokens: Option<u32>,
110}
111
112/// Ask chat endpoint overrides (`[chat.ask]`).
113///
114/// Unset transport fields inherit from [`ChatExpansionConfig`].
115#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
116#[serde(deny_unknown_fields)]
117pub struct ChatAskConfig {
118    #[serde(default)]
119    pub base_url: Option<String>,
120    #[serde(flatten)]
121    pub auth: EndpointAuthConfig,
122    #[serde(default)]
123    pub adapter: Option<ChatAdapter>,
124    #[serde(default)]
125    pub model: Option<String>,
126    #[serde(default = "default_ask_context_tokens")]
127    pub context_tokens: u32,
128    #[serde(default = "default_ask_max_output_tokens")]
129    pub max_output_tokens: u32,
130    #[serde(default)]
131    pub planning_enable_thinking: Option<bool>,
132    #[serde(default)]
133    pub synthesis_enable_thinking: Option<bool>,
134    #[serde(default)]
135    pub planning_reasoning_effort: Option<ReasoningEffort>,
136    #[serde(default)]
137    pub synthesis_reasoning_effort: Option<ReasoningEffort>,
138    #[serde(default)]
139    pub planning_chat_template_kwargs: Option<BTreeMap<String, serde_json::Value>>,
140    #[serde(default)]
141    pub synthesis_chat_template_kwargs: Option<BTreeMap<String, serde_json::Value>>,
142}
143
144impl ChatAskConfig {
145    /// Effective chat-completions base URL.
146    #[must_use]
147    pub fn resolved_base_url<'a>(&'a self, expansion: &'a ChatExpansionConfig) -> &'a str {
148        self.base_url
149            .as_deref()
150            .filter(|url| !url.is_empty())
151            .unwrap_or(expansion.base_url.as_str())
152    }
153
154    /// Effective ask model name.
155    #[must_use]
156    pub fn resolved_model<'a>(&'a self, expansion: &'a ChatExpansionConfig) -> &'a str {
157        self.model
158            .as_deref()
159            .filter(|model| !model.is_empty())
160            .unwrap_or(expansion.model.as_str())
161    }
162
163    /// Effective chat adapter.
164    #[must_use]
165    pub fn resolved_adapter(&self, expansion: &ChatExpansionConfig) -> ChatAdapter {
166        self.adapter.unwrap_or(expansion.adapter)
167    }
168
169    /// Merged auth: ask overrides win when set, otherwise expansion auth applies.
170    #[must_use]
171    pub fn resolved_auth(&self, expansion: &ChatExpansionConfig) -> EndpointAuthConfig {
172        EndpointAuthConfig {
173            credential: self
174                .auth
175                .credential
176                .clone()
177                .or_else(|| expansion.auth.credential.clone()),
178            api_key: self
179                .auth
180                .api_key
181                .clone()
182                .or_else(|| expansion.auth.api_key.clone()),
183            api_key_env: self
184                .auth
185                .api_key_env
186                .clone()
187                .or_else(|| expansion.auth.api_key_env.clone()),
188            extra_headers: merge_headers(&expansion.auth.extra_headers, &self.auth.extra_headers),
189        }
190    }
191}
192
193/// Chat capability group (`[chat]`).
194#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
195#[serde(deny_unknown_fields)]
196pub struct ChatSection {
197    pub expansion: ChatExpansionConfig,
198    #[serde(default)]
199    pub ask: ChatAskConfig,
200}
201
202/// MCP runtime configuration.
203#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
204#[serde(deny_unknown_fields)]
205pub struct McpConfig {
206    /// Hook-specific runtime budgets.
207    #[serde(default)]
208    pub hooks: McpHooksConfig,
209}
210
211/// Synchronous MCP hook budgets.
212#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
213#[serde(deny_unknown_fields)]
214pub struct McpHooksConfig {
215    /// Wall-clock deadline for the recall hook, in milliseconds.
216    #[serde(default = "default_recall_deadline_ms")]
217    pub recall_deadline_ms: u64,
218}
219
220impl Default for McpHooksConfig {
221    fn default() -> Self {
222        Self {
223            recall_deadline_ms: default_recall_deadline_ms(),
224        }
225    }
226}
227
228fn merge_headers(
229    base: &BTreeMap<String, String>,
230    override_headers: &BTreeMap<String, String>,
231) -> BTreeMap<String, String> {
232    let mut merged = base.clone();
233    merged.extend(override_headers.iter().map(|(k, v)| (k.clone(), v.clone())));
234    merged
235}
236
237const fn default_embedding_context_tokens() -> u32 {
238    512
239}
240
241const fn default_rerank_truncate() -> bool {
242    true
243}
244
245const fn default_chat_context_tokens() -> u32 {
246    32_768
247}
248
249const fn default_ask_context_tokens() -> u32 {
250    65_536
251}
252
253const fn default_ask_max_output_tokens() -> u32 {
254    2_048
255}
256
257const fn default_recall_deadline_ms() -> u64 {
258    20_000
259}